1
import * as fs from 'fs';
2
import * as path from 'path';
3
import * as os from 'os';
4
import { createHash } from 'crypto';
6
import $RefParser from '@apidevtools/json-schema-ref-parser';
7
import invariant from 'tiny-invariant';
8
import nunjucks from 'nunjucks';
9
import yaml from 'js-yaml';
10
import { stringify } from 'csv-stringify/sync';
11
import { globSync } from 'glob';
12
import { desc, eq } from 'drizzle-orm';
14
import cliState from './cliState';
15
import logger from './logger';
16
import { getDirectory, importModule } from './esm';
17
import { readTests } from './testCases';
27
import { runDbMigrations } from './migrate';
28
import { runPython } from './python/wrapper';
39
TestCasesWithMetadata,
40
TestCasesWithMetadataPrompt,
48
let globalConfigCache: any = null;
50
export function resetGlobalConfig(): void {
51
globalConfigCache = null;
54
export function readGlobalConfig(): any {
55
if (!globalConfigCache) {
56
const configDir = getConfigDirectoryPath();
57
const configFilePath = path.join(configDir, 'promptfoo.yaml');
59
if (fs.existsSync(configFilePath)) {
60
globalConfigCache = yaml.load(fs.readFileSync(configFilePath, 'utf-8'));
62
if (!fs.existsSync(configDir)) {
63
fs.mkdirSync(configDir, { recursive: true });
65
globalConfigCache = { hasRun: false };
66
fs.writeFileSync(configFilePath, yaml.dump(globalConfigCache));
70
return globalConfigCache;
73
export function maybeRecordFirstRun(): boolean {
74
// Return true if first run
76
const config = readGlobalConfig();
79
fs.writeFileSync(path.join(getConfigDirectoryPath(), 'promptfoo.yaml'), yaml.dump(config));
88
export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
89
if (!fs.existsSync(configPath)) {
92
return readConfig(configPath);
95
export async function dereferenceConfig(rawConfig: UnifiedConfig): Promise<UnifiedConfig> {
96
if (process.env.PROMPTFOO_DISABLE_REF_PARSER) {
100
// Track and delete tools[i].function for each tool, preserving the rest of the properties
101
// https://github.com/promptfoo/promptfoo/issues/364
103
// Remove parameters from functions and tools to prevent dereferencing
104
const extractFunctionParameters = (functions: { parameters?: object }[]) => {
105
return functions.map((func) => {
106
const { parameters } = func;
107
delete func.parameters;
108
return { parameters };
112
const extractToolParameters = (tools: { function?: { parameters?: object } }[]) => {
113
return tools.map((tool) => {
114
const { parameters } = tool.function || {};
115
if (tool.function?.parameters) {
116
delete tool.function.parameters;
118
return { parameters };
122
// Restore parameters to functions and tools after dereferencing
123
const restoreFunctionParameters = (
124
functions: { parameters?: object }[],
125
parametersList: { parameters?: object }[],
127
functions.forEach((func, index) => {
128
if (parametersList[index]?.parameters) {
129
func.parameters = parametersList[index].parameters;
134
const restoreToolParameters = (
135
tools: { function?: { parameters?: object } }[],
136
parametersList: { parameters?: object }[],
138
tools.forEach((tool, index) => {
139
if (parametersList[index]?.parameters) {
140
tool.function = tool.function || {};
141
tool.function.parameters = parametersList[index].parameters;
146
let functionsParametersList: { parameters?: object }[][] = [];
147
let toolsParametersList: { parameters?: object }[][] = [];
149
if (Array.isArray(rawConfig.providers)) {
150
rawConfig.providers.forEach((provider, providerIndex) => {
151
if (typeof provider === 'string') return;
152
if (!provider.config) {
153
// Handle when provider is a map
154
provider = Object.values(provider)[0] as ProviderOptions;
157
if (provider.config?.functions) {
158
functionsParametersList[providerIndex] = extractFunctionParameters(
159
provider.config.functions,
163
if (provider.config?.tools) {
164
toolsParametersList[providerIndex] = extractToolParameters(provider.config.tools);
170
const config = (await $RefParser.dereference(rawConfig)) as unknown as UnifiedConfig;
172
// Restore functions and tools parameters
173
if (Array.isArray(config.providers)) {
174
config.providers.forEach((provider, index) => {
175
if (typeof provider === 'string') return;
176
if (!provider.config) {
177
// Handle when provider is a map
178
provider = Object.values(provider)[0] as ProviderOptions;
181
if (functionsParametersList[index]) {
182
provider.config.functions = provider.config.functions || [];
183
restoreFunctionParameters(provider.config.functions, functionsParametersList[index]);
186
if (toolsParametersList[index]) {
187
provider.config.tools = provider.config.tools || [];
188
restoreToolParameters(provider.config.tools, toolsParametersList[index]);
195
export async function readConfig(configPath: string): Promise<UnifiedConfig> {
196
const ext = path.parse(configPath).ext;
201
let rawConfig = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
202
return dereferenceConfig(rawConfig);
206
return (await importModule(configPath)) as UnifiedConfig;
208
throw new Error(`Unsupported configuration file format: ${ext}`);
213
* Reads multiple configuration files and combines them into a single UnifiedConfig.
215
* @param {string[]} configPaths - An array of paths to configuration files. Supports glob patterns.
216
* @returns {Promise<UnifiedConfig>} A promise that resolves to a unified configuration object.
218
export async function readConfigs(configPaths: string[]): Promise<UnifiedConfig> {
219
const configs: UnifiedConfig[] = [];
220
for (const configPath of configPaths) {
221
const globPaths = globSync(configPath, {
222
windowsPathsNoEscape: true,
224
if (globPaths.length === 0) {
225
throw new Error(`No configuration file found at ${configPath}`);
227
for (const globPath of globPaths) {
228
const config = await readConfig(globPath);
229
configs.push(config);
233
const providers: UnifiedConfig['providers'] = [];
234
const seenProviders = new Set<string>();
235
configs.forEach((config) => {
237
typeof config.providers !== 'function',
238
'Providers cannot be a function for multiple configs',
240
if (typeof config.providers === 'string') {
241
if (!seenProviders.has(config.providers)) {
242
providers.push(config.providers);
243
seenProviders.add(config.providers);
245
} else if (Array.isArray(config.providers)) {
246
config.providers.forEach((provider) => {
247
if (!seenProviders.has(JSON.stringify(provider))) {
248
providers.push(provider);
249
seenProviders.add(JSON.stringify(provider));
255
const tests: UnifiedConfig['tests'] = [];
256
configs.forEach(async (config) => {
257
if (typeof config.tests === 'string') {
258
const newTests = await readTests(config.tests, path.dirname(configPaths[0]));
259
tests.push(...newTests);
260
} else if (Array.isArray(config.tests)) {
261
tests.push(...config.tests);
265
const configsAreStringOrArray = configs.every(
266
(config) => typeof config.prompts === 'string' || Array.isArray(config.prompts),
268
const configsAreObjects = configs.every((config) => typeof config.prompts === 'object');
269
let prompts: UnifiedConfig['prompts'] = configsAreStringOrArray ? [] : {};
271
const makeAbsolute = (configPath: string, relativePath: string) => {
272
if (relativePath.startsWith('file://')) {
274
'file://' + path.resolve(path.dirname(configPath), relativePath.slice('file://'.length));
279
const seenPrompts = new Set<string>();
280
configs.forEach((config, idx) => {
281
if (typeof config.prompts === 'string') {
282
invariant(Array.isArray(prompts), 'Cannot mix string and map-type prompts');
283
const absolutePrompt = makeAbsolute(configPaths[idx], config.prompts);
284
seenPrompts.add(absolutePrompt);
285
} else if (Array.isArray(config.prompts)) {
286
invariant(Array.isArray(prompts), 'Cannot mix configs with map and array-type prompts');
288
.map((prompt) => makeAbsolute(configPaths[idx], prompt))
289
.forEach((prompt) => seenPrompts.add(prompt));
291
// Object format such as { 'prompts/prompt1.txt': 'foo', 'prompts/prompt2.txt': 'bar' }
292
invariant(typeof prompts === 'object', 'Cannot mix configs with map and array-type prompts');
293
prompts = { ...prompts, ...config.prompts };
296
if (Array.isArray(prompts)) {
297
prompts.push(...Array.from(seenPrompts));
300
// Combine all configs into a single UnifiedConfig
301
const combinedConfig: UnifiedConfig = {
302
description: configs.map((config) => config.description).join(', '),
306
scenarios: configs.flatMap((config) => config.scenarios || []),
307
defaultTest: configs.reduce((prev: Partial<TestCase> | undefined, curr) => {
311
vars: { ...prev?.vars, ...curr.defaultTest?.vars },
312
assert: [...(prev?.assert || []), ...(curr.defaultTest?.assert || [])],
313
options: { ...prev?.options, ...curr.defaultTest?.options },
316
nunjucksFilters: configs.reduce((prev, curr) => ({ ...prev, ...curr.nunjucksFilters }), {}),
317
env: configs.reduce((prev, curr) => ({ ...prev, ...curr.env }), {}),
318
evaluateOptions: configs.reduce((prev, curr) => ({ ...prev, ...curr.evaluateOptions }), {}),
319
commandLineOptions: configs.reduce(
320
(prev, curr) => ({ ...prev, ...curr.commandLineOptions }),
323
sharing: !configs.some((config) => config.sharing === false),
326
return combinedConfig;
329
export function writeMultipleOutputs(
330
outputPaths: string[],
331
results: EvaluateSummary,
332
config: Partial<UnifiedConfig>,
333
shareableUrl: string | null,
335
for (const outputPath of outputPaths) {
336
writeOutput(outputPath, results, config, shareableUrl);
340
export function writeOutput(
342
results: EvaluateSummary,
343
config: Partial<UnifiedConfig>,
344
shareableUrl: string | null,
346
const outputExtension = outputPath.split('.').pop()?.toLowerCase();
348
const outputToSimpleString = (output: EvaluateTableOutput) => {
349
const passFailText = output.pass ? '[PASS]' : '[FAIL]';
350
const namedScoresText = Object.entries(output.namedScores)
351
.map(([name, value]) => `${name}: ${value.toFixed(2)}`)
354
namedScoresText.length > 0
355
? `(${output.score.toFixed(2)}, ${namedScoresText})`
356
: `(${output.score.toFixed(2)})`;
357
const gradingResultText = output.gradingResult
358
? `${output.pass ? 'Pass' : 'Fail'} Reason: ${output.gradingResult.reason}`
360
return `${passFailText} ${scoreText}
364
${gradingResultText}`.trim();
367
// Ensure the directory exists
368
const outputDir = path.dirname(outputPath);
369
if (!fs.existsSync(outputDir)) {
370
fs.mkdirSync(outputDir, { recursive: true });
373
if (outputExtension === 'csv') {
374
const csvOutput = stringify([
376
...results.table.head.vars,
377
...results.table.head.prompts.map((prompt) => JSON.stringify(prompt)),
379
...results.table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
381
fs.writeFileSync(outputPath, csvOutput);
382
} else if (outputExtension === 'json') {
385
JSON.stringify({ results, config, shareableUrl } as OutputFile, null, 2),
387
} else if (outputExtension === 'yaml' || outputExtension === 'yml' || outputExtension === 'txt') {
388
fs.writeFileSync(outputPath, yaml.dump({ results, config, shareableUrl } as OutputFile));
389
} else if (outputExtension === 'html') {
390
const template = fs.readFileSync(`${getDirectory()}/tableOutput.html`, 'utf-8');
392
[...results.table.head.vars, ...results.table.head.prompts.map((prompt) => prompt.display)],
393
...results.table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
395
const htmlOutput = getNunjucksEngine().renderString(template, {
398
results: results.results,
400
fs.writeFileSync(outputPath, htmlOutput);
403
`Unsupported output file format ${outputExtension}, please use csv, txt, json, yaml, yml, html.`,
408
let configDirectoryPath: string | undefined = process.env.PROMPTFOO_CONFIG_DIR;
410
export function getConfigDirectoryPath(): string {
411
return configDirectoryPath || path.join(os.homedir(), '.promptfoo');
414
export function setConfigDirectoryPath(newPath: string): void {
415
configDirectoryPath = newPath;
419
* TODO(ian): Remove this
420
* @deprecated Use readLatestResults directly instead.
422
export function getLatestResultsPath(): string {
423
return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
426
export async function writeResultsToDatabase(
427
results: EvaluateSummary,
428
config: Partial<UnifiedConfig>,
431
createdAt = createdAt || new Date();
432
const evalId = `eval-${createdAt.toISOString().slice(0, 19)}`;
441
createdAt: createdAt.getTime(),
442
description: config.description,
446
.onConflictDoNothing()
450
logger.debug(`Inserting eval ${evalId}`);
452
// Record prompt relation
453
for (const prompt of results.table.head.prompts) {
454
const promptId = sha256(prompt.display);
461
prompt: prompt.display,
463
.onConflictDoNothing()
469
.insert(evalsToPrompts)
474
.onConflictDoNothing()
478
logger.debug(`Inserting prompt ${promptId}`);
481
// Record dataset relation
482
const datasetId = sha256(JSON.stringify(config.tests || []));
490
.onConflictDoNothing()
496
.insert(evalsToDatasets)
501
.onConflictDoNothing()
505
logger.debug(`Inserting dataset ${datasetId}`);
507
logger.debug(`Awaiting ${promises.length} promises to database...`);
508
await Promise.all(promises);
510
// "touch" db signal path
511
const filePath = getDbSignalPath();
513
const now = new Date();
514
fs.utimesSync(filePath, now, now);
516
fs.closeSync(fs.openSync(filePath, 'w'));
524
* @returns Last 100 evals in descending order.
526
export function listPreviousResults(): { evalId: string; description?: string | null }[] {
531
description: evals.description,
534
.orderBy(desc(evals.createdAt))
538
return results.map((result) => ({
540
description: result.description,
545
* @deprecated Used only for migration to sqlite
547
export function listPreviousResultFilenames_fileSystem(): string[] {
548
const directory = path.join(getConfigDirectoryPath(), 'output');
549
if (!fs.existsSync(directory)) {
552
const files = fs.readdirSync(directory);
553
const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
554
return resultsFiles.sort((a, b) => {
555
const statA = fs.statSync(path.join(directory, a));
556
const statB = fs.statSync(path.join(directory, b));
557
return statA.birthtime.getTime() - statB.birthtime.getTime(); // sort in ascending order
561
const resultsCache: { [fileName: string]: ResultsFile | undefined } = {};
564
* @deprecated Used only for migration to sqlite
566
export function listPreviousResults_fileSystem(): { fileName: string; description?: string }[] {
567
const directory = path.join(getConfigDirectoryPath(), 'output');
568
if (!fs.existsSync(directory)) {
571
const sortedFiles = listPreviousResultFilenames_fileSystem();
572
return sortedFiles.map((fileName) => {
573
if (!resultsCache[fileName]) {
575
const fileContents = fs.readFileSync(path.join(directory, fileName), 'utf8');
576
const data = yaml.load(fileContents) as ResultsFile;
577
resultsCache[fileName] = data;
579
logger.warn(`Failed to read results from ${fileName}:\n${error}`);
584
description: resultsCache[fileName]?.config.description,
589
let attemptedMigration = false;
590
export async function migrateResultsFromFileSystemToDatabase() {
591
if (attemptedMigration) {
592
// TODO(ian): Record this bit in the database.
596
// First run db migrations
597
logger.debug('Running db migrations...');
598
await runDbMigrations();
600
const fileNames = listPreviousResultFilenames_fileSystem();
601
if (fileNames.length === 0) {
605
logger.info(`🔁 Migrating ${fileNames.length} flat files to local database.`);
606
logger.info('This is a one-time operation and may take a minute...');
607
attemptedMigration = true;
609
const outputDir = path.join(getConfigDirectoryPath(), 'output');
610
const backupDir = `${outputDir}-backup-${new Date()
613
.replace(/-/g, '')}`;
615
fs.cpSync(outputDir, backupDir, { recursive: true });
616
logger.info(`Backup of output directory created at ${backupDir}`);
617
} catch (backupError) {
618
logger.error(`Failed to create backup of output directory: ${backupError}`);
622
logger.info('Moving files into database...');
623
const migrationPromises = fileNames.map(async (fileName) => {
624
const fileData = readResult_fileSystem(fileName);
626
await writeResultsToDatabase(
627
fileData.result.results,
628
fileData.result.config,
629
filenameToDate(fileName),
631
logger.debug(`Migrated ${fileName} to database.`);
633
fs.unlinkSync(path.join(outputDir, fileName));
635
logger.warn(`Failed to delete ${fileName} after migration: ${err}`);
638
logger.warn(`Failed to migrate result ${fileName} due to read error.`);
641
await Promise.all(migrationPromises);
643
fs.unlinkSync(getLatestResultsPath());
645
logger.warn(`Failed to delete latest.json: ${err}`);
647
logger.info('Migration complete. Please restart your web server if it is running.');
650
const RESULT_HISTORY_LENGTH = parseInt(process.env.RESULT_HISTORY_LENGTH || '', 10) || 100;
652
export function cleanupOldFileResults(remaining = RESULT_HISTORY_LENGTH) {
653
const sortedFilenames = listPreviousResultFilenames_fileSystem();
654
for (let i = 0; i < sortedFilenames.length - remaining; i++) {
655
fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFilenames[i]));
659
export function filenameToDate(filename: string) {
660
const dateString = filename.slice('eval-'.length, filename.length - '.json'.length);
662
// Replace hyphens with colons where necessary (Windows compatibility).
663
const dateParts = dateString.split('T');
664
const timePart = dateParts[1].replace(/-/g, ':');
665
const formattedDateString = `${dateParts[0]}T${timePart}`;
667
const date = new Date(formattedDateString);
670
return date.toLocaleDateString('en-US', {
677
timeZoneName: 'short',
682
export function dateToFilename(date: Date) {
683
return `eval-${date.toISOString().replace(/:/g, '-')}.json`;
686
export async function readResult(
688
): Promise<{ id: string; result: ResultsFile; createdAt: Date } | undefined> {
691
const evalResult = await db
694
createdAt: evals.createdAt,
695
results: evals.results,
696
config: evals.config,
699
.where(eq(evals.id, id))
702
if (evalResult.length === 0) {
706
const { id: resultId, createdAt, results, config } = evalResult[0];
707
const result: ResultsFile = {
709
createdAt: new Date(createdAt).toISOString().slice(0, 10),
716
createdAt: new Date(createdAt),
719
logger.error(`Failed to read result with ID ${id} from database:\n${err}`);
724
* @deprecated Used only for migration to sqlite
726
export function readResult_fileSystem(
728
): { id: string; result: ResultsFile; createdAt: Date } | undefined {
729
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
730
const resultsPath = path.join(resultsDirectory, name);
732
const result = JSON.parse(
733
fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'),
735
const createdAt = filenameToDate(name);
737
id: sha256(JSON.stringify(result.config)),
742
logger.error(`Failed to read results from ${resultsPath}:\n${err}`);
746
export async function updateResult(
748
newConfig?: Partial<UnifiedConfig>,
749
newTable?: EvaluateTable,
753
// Fetch the existing eval data from the database
754
const existingEval = await db
756
config: evals.config,
757
results: evals.results,
760
.where(eq(evals.id, id))
764
if (existingEval.length === 0) {
765
logger.error(`Eval with ID ${id} not found.`);
769
const evalData = existingEval[0];
771
evalData.config = newConfig;
774
evalData.results.table = newTable;
780
description: evalData.config.description,
781
config: evalData.config,
782
results: evalData.results,
784
.where(eq(evals.id, id))
787
logger.info(`Updated eval with ID ${id}`);
789
logger.error(`Failed to update eval with ID ${id}:\n${err}`);
793
export async function readLatestResults(): Promise<ResultsFile | undefined> {
795
const latestResults = await db
798
createdAt: evals.createdAt,
799
description: evals.description,
800
results: evals.results,
801
config: evals.config,
804
.orderBy(desc(evals.createdAt))
807
if (!latestResults || latestResults.length === 0) {
811
const latestResult = latestResults[0];
814
createdAt: new Date(latestResult.createdAt).toISOString(),
815
results: latestResult.results,
816
config: latestResult.config,
820
export function getPromptsForTestCases(testCases: TestCase[]) {
821
const testCasesJson = JSON.stringify(testCases);
822
const testCasesSha256 = sha256(testCasesJson);
823
return getPromptsForTestCasesHash(testCasesSha256);
826
export function getPromptsForTestCasesHash(testCasesSha256: string) {
827
return getPromptsWithPredicate((result) => {
828
const testsJson = JSON.stringify(result.config.tests);
829
const hash = sha256(testsJson);
830
return hash === testCasesSha256;
834
export function sha256(str: string) {
835
return createHash('sha256').update(str).digest('hex');
838
export function getPrompts() {
839
return getPromptsWithPredicate(() => true);
842
export async function getPromptsWithPredicate(
843
predicate: (result: ResultsFile) => boolean,
844
): Promise<PromptWithMetadata[]> {
845
// TODO(ian): Make this use a proper database query
847
const evals_ = await db
850
createdAt: evals.createdAt,
851
results: evals.results,
852
config: evals.config,
858
const groupedPrompts: { [hash: string]: PromptWithMetadata } = {};
860
for (const eval_ of evals_) {
861
const createdAt = new Date(eval_.createdAt).toISOString();
862
const resultWrapper: ResultsFile = {
865
results: eval_.results,
866
config: eval_.config,
868
if (predicate(resultWrapper)) {
869
for (const prompt of resultWrapper.results.table.head.prompts) {
870
const promptId = sha256(prompt.raw);
871
const datasetId = resultWrapper.config.tests
872
? sha256(JSON.stringify(resultWrapper.config.tests))
874
if (promptId in groupedPrompts) {
875
groupedPrompts[promptId].recentEvalDate = new Date(
877
groupedPrompts[promptId].recentEvalDate.getTime(),
878
new Date(createdAt).getTime(),
881
groupedPrompts[promptId].count += 1;
882
groupedPrompts[promptId].evals.push({
885
metrics: prompt.metrics,
888
groupedPrompts[promptId] = {
892
recentEvalDate: new Date(createdAt),
893
recentEvalId: eval_.id,
898
metrics: prompt.metrics,
907
return Object.values(groupedPrompts);
910
export async function getTestCases() {
911
return getTestCasesWithPredicate(() => true);
914
export async function getTestCasesWithPredicate(
915
predicate: (result: ResultsFile) => boolean,
916
): Promise<TestCasesWithMetadata[]> {
918
const evals_ = await db
921
createdAt: evals.createdAt,
922
results: evals.results,
923
config: evals.config,
929
const groupedTestCases: { [hash: string]: TestCasesWithMetadata } = {};
931
for (const eval_ of evals_) {
932
const createdAt = new Date(eval_.createdAt).toISOString();
933
const resultWrapper: ResultsFile = {
936
results: eval_.results,
937
config: eval_.config,
939
const testCases = resultWrapper.config.tests;
940
if (testCases && predicate(resultWrapper)) {
941
const evalId = eval_.id;
942
const datasetId = sha256(JSON.stringify(testCases));
943
if (datasetId in groupedTestCases) {
944
groupedTestCases[datasetId].recentEvalDate = new Date(
945
Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), eval_.createdAt),
947
groupedTestCases[datasetId].count += 1;
948
const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
949
id: sha256(prompt.raw),
953
const promptsById: Record<string, TestCasesWithMetadataPrompt> = {};
954
for (const prompt of groupedTestCases[datasetId].prompts.concat(newPrompts)) {
955
if (!(prompt.id in promptsById)) {
956
promptsById[prompt.id] = prompt;
959
groupedTestCases[datasetId].prompts = Object.values(promptsById);
961
const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
962
id: sha256(prompt.raw),
966
const promptsById: Record<string, TestCasesWithMetadataPrompt> = {};
967
for (const prompt of newPrompts) {
968
if (!(prompt.id in promptsById)) {
969
promptsById[prompt.id] = prompt;
972
groupedTestCases[datasetId] = {
976
recentEvalDate: new Date(createdAt),
977
recentEvalId: evalId,
978
prompts: Object.values(promptsById),
984
return Object.values(groupedTestCases);
987
export async function getPromptFromHash(hash: string) {
988
const prompts = await getPrompts();
989
for (const prompt of prompts) {
990
if (prompt.id.startsWith(hash)) {
997
export async function getDatasetFromHash(hash: string) {
998
const datasets = await getTestCases();
999
for (const dataset of datasets) {
1000
if (dataset.id.startsWith(hash)) {
1007
export async function getEvals() {
1008
return getEvalsWithPredicate(() => true);
1011
export async function getEvalFromHash(hash: string) {
1012
const evals_ = await getEvals();
1013
for (const eval_ of evals_) {
1014
if (eval_.id.startsWith(hash)) {
1021
export async function getEvalsWithPredicate(
1022
predicate: (result: ResultsFile) => boolean,
1023
): Promise<EvalWithMetadata[]> {
1025
const evals_ = await db
1028
createdAt: evals.createdAt,
1029
results: evals.results,
1030
config: evals.config,
1036
const ret: EvalWithMetadata[] = [];
1038
for (const eval_ of evals_) {
1039
const createdAt = new Date(eval_.createdAt).toISOString();
1040
const resultWrapper: ResultsFile = {
1042
createdAt: createdAt,
1043
results: eval_.results,
1044
config: eval_.config,
1046
if (predicate(resultWrapper)) {
1047
const evalId = eval_.id;
1050
date: new Date(eval_.createdAt),
1051
config: eval_.config,
1052
results: eval_.results,
1060
export async function readFilters(
1061
filters: Record<string, string>,
1062
): Promise<NunjucksFilterMap> {
1063
const ret: NunjucksFilterMap = {};
1064
const basePath = cliState.basePath || '';
1065
for (const [name, filterPath] of Object.entries(filters)) {
1066
const globPath = path.join(basePath, filterPath);
1067
const filePaths = globSync(globPath, {
1068
windowsPathsNoEscape: true,
1070
for (const filePath of filePaths) {
1071
const finalPath = path.resolve(filePath);
1072
ret[name] = await importModule(finalPath);
1078
export function getNunjucksEngine(filters?: NunjucksFilterMap) {
1079
if (process.env.PROMPTFOO_DISABLE_TEMPLATING) {
1081
renderString: (template: string) => template,
1085
const env = nunjucks.configure({
1090
for (const [name, filter] of Object.entries(filters)) {
1091
env.addFilter(name, filter);
1097
export function printBorder() {
1098
const border = '='.repeat((process.stdout.columns || 80) - 10);
1099
logger.info(border);
1102
export async function transformOutput(
1103
codeOrFilepath: string,
1104
output: string | object | undefined,
1105
context: { vars?: Record<string, string | object | undefined>; prompt: Partial<Prompt> },
1108
if (codeOrFilepath.startsWith('file://')) {
1109
const filePath = codeOrFilepath.slice('file://'.length);
1111
codeOrFilepath.endsWith('.js') ||
1112
codeOrFilepath.endsWith('.cjs') ||
1113
codeOrFilepath.endsWith('.mjs')
1115
const requiredModule = await importModule(filePath);
1116
if (typeof requiredModule === 'function') {
1117
postprocessFn = requiredModule;
1118
} else if (requiredModule.default && typeof requiredModule.default === 'function') {
1119
postprocessFn = requiredModule.default;
1122
`Transform ${filePath} must export a function or have a default export as a function`,
1125
} else if (codeOrFilepath.endsWith('.py')) {
1126
postprocessFn = async (
1128
context: { vars: Record<string, string | object> },
1130
return runPython(filePath, 'get_transform', [output, context]);
1133
throw new Error(`Unsupported transform file format: ${codeOrFilepath}`);
1136
postprocessFn = new Function(
1139
codeOrFilepath.includes('\n') ? codeOrFilepath : `return ${codeOrFilepath}`,
1142
const ret = await Promise.resolve(postprocessFn(output, context));
1144
throw new Error(`Transform function did not return a value\n\n${codeOrFilepath}`);
1149
export type StandaloneEval = CompletedPrompt & {
1151
datasetId: string | null;
1152
promptId: string | null;
1154
export function getStandaloneEvals(): StandaloneEval[] {
1159
description: evals.description,
1160
config: evals.config,
1161
results: evals.results,
1162
promptId: evalsToPrompts.promptId,
1163
datasetId: evalsToDatasets.datasetId,
1166
.leftJoin(evalsToPrompts, eq(evals.id, evalsToPrompts.evalId))
1167
.leftJoin(evalsToDatasets, eq(evals.id, evalsToDatasets.evalId))
1168
.orderBy(desc(evals.createdAt))
1172
const flatResults: StandaloneEval[] = [];
1173
results.forEach((result) => {
1174
const table = result.results.table;
1175
table.head.prompts.forEach((col) => {
1177
evalId: result.evalId,
1178
promptId: result.promptId,
1179
datasetId: result.datasetId,