promptfoo

Форк
0
/
util.ts 
1185 строк · 33.9 Кб
1
import * as fs from 'fs';
2
import * as path from 'path';
3
import * as os from 'os';
4
import { createHash } from 'crypto';
5

6
import $RefParser from '@apidevtools/json-schema-ref-parser';
7
import invariant from 'tiny-invariant';
8
import nunjucks from 'nunjucks';
9
import yaml from 'js-yaml';
10
import { stringify } from 'csv-stringify/sync';
11
import { globSync } from 'glob';
12
import { desc, eq } from 'drizzle-orm';
13

14
import cliState from './cliState';
15
import logger from './logger';
16
import { getDirectory, importModule } from './esm';
17
import { readTests } from './testCases';
18
import {
19
  datasets,
20
  getDb,
21
  evals,
22
  evalsToDatasets,
23
  evalsToPrompts,
24
  prompts,
25
  getDbSignalPath,
26
} from './database';
27
import { runDbMigrations } from './migrate';
28
import { runPython } from './python/wrapper';
29

30
import type {
31
  EvalWithMetadata,
32
  EvaluateSummary,
33
  EvaluateTable,
34
  EvaluateTableOutput,
35
  NunjucksFilterMap,
36
  PromptWithMetadata,
37
  ResultsFile,
38
  TestCase,
39
  TestCasesWithMetadata,
40
  TestCasesWithMetadataPrompt,
41
  UnifiedConfig,
42
  OutputFile,
43
  ProviderOptions,
44
  Prompt,
45
  CompletedPrompt,
46
} from './types';
47

48
let globalConfigCache: any = null;
49

50
export function resetGlobalConfig(): void {
51
  globalConfigCache = null;
52
}
53

54
export function readGlobalConfig(): any {
55
  if (!globalConfigCache) {
56
    const configDir = getConfigDirectoryPath();
57
    const configFilePath = path.join(configDir, 'promptfoo.yaml');
58

59
    if (fs.existsSync(configFilePath)) {
60
      globalConfigCache = yaml.load(fs.readFileSync(configFilePath, 'utf-8'));
61
    } else {
62
      if (!fs.existsSync(configDir)) {
63
        fs.mkdirSync(configDir, { recursive: true });
64
      }
65
      globalConfigCache = { hasRun: false };
66
      fs.writeFileSync(configFilePath, yaml.dump(globalConfigCache));
67
    }
68
  }
69

70
  return globalConfigCache;
71
}
72

73
export function maybeRecordFirstRun(): boolean {
74
  // Return true if first run
75
  try {
76
    const config = readGlobalConfig();
77
    if (!config.hasRun) {
78
      config.hasRun = true;
79
      fs.writeFileSync(path.join(getConfigDirectoryPath(), 'promptfoo.yaml'), yaml.dump(config));
80
      return true;
81
    }
82
    return false;
83
  } catch (err) {
84
    return false;
85
  }
86
}
87

88
export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
89
  if (!fs.existsSync(configPath)) {
90
    return undefined;
91
  }
92
  return readConfig(configPath);
93
}
94

95
export async function dereferenceConfig(rawConfig: UnifiedConfig): Promise<UnifiedConfig> {
96
  if (process.env.PROMPTFOO_DISABLE_REF_PARSER) {
97
    return rawConfig;
98
  }
99

100
  // Track and delete tools[i].function for each tool, preserving the rest of the properties
101
  // https://github.com/promptfoo/promptfoo/issues/364
102

103
  // Remove parameters from functions and tools to prevent dereferencing
104
  const extractFunctionParameters = (functions: { parameters?: object }[]) => {
105
    return functions.map((func) => {
106
      const { parameters } = func;
107
      delete func.parameters;
108
      return { parameters };
109
    });
110
  };
111

112
  const extractToolParameters = (tools: { function?: { parameters?: object } }[]) => {
113
    return tools.map((tool) => {
114
      const { parameters } = tool.function || {};
115
      if (tool.function?.parameters) {
116
        delete tool.function.parameters;
117
      }
118
      return { parameters };
119
    });
120
  };
121

122
  // Restore parameters to functions and tools after dereferencing
123
  const restoreFunctionParameters = (
124
    functions: { parameters?: object }[],
125
    parametersList: { parameters?: object }[],
126
  ) => {
127
    functions.forEach((func, index) => {
128
      if (parametersList[index]?.parameters) {
129
        func.parameters = parametersList[index].parameters;
130
      }
131
    });
132
  };
133

134
  const restoreToolParameters = (
135
    tools: { function?: { parameters?: object } }[],
136
    parametersList: { parameters?: object }[],
137
  ) => {
138
    tools.forEach((tool, index) => {
139
      if (parametersList[index]?.parameters) {
140
        tool.function = tool.function || {};
141
        tool.function.parameters = parametersList[index].parameters;
142
      }
143
    });
144
  };
145

146
  let functionsParametersList: { parameters?: object }[][] = [];
147
  let toolsParametersList: { parameters?: object }[][] = [];
148

149
  if (Array.isArray(rawConfig.providers)) {
150
    rawConfig.providers.forEach((provider, providerIndex) => {
151
      if (typeof provider === 'string') return;
152
      if (!provider.config) {
153
        // Handle when provider is a map
154
        provider = Object.values(provider)[0] as ProviderOptions;
155
      }
156

157
      if (provider.config?.functions) {
158
        functionsParametersList[providerIndex] = extractFunctionParameters(
159
          provider.config.functions,
160
        );
161
      }
162

163
      if (provider.config?.tools) {
164
        toolsParametersList[providerIndex] = extractToolParameters(provider.config.tools);
165
      }
166
    });
167
  }
168

169
  // Dereference JSON
170
  const config = (await $RefParser.dereference(rawConfig)) as unknown as UnifiedConfig;
171

172
  // Restore functions and tools parameters
173
  if (Array.isArray(config.providers)) {
174
    config.providers.forEach((provider, index) => {
175
      if (typeof provider === 'string') return;
176
      if (!provider.config) {
177
        // Handle when provider is a map
178
        provider = Object.values(provider)[0] as ProviderOptions;
179
      }
180

181
      if (functionsParametersList[index]) {
182
        provider.config.functions = provider.config.functions || [];
183
        restoreFunctionParameters(provider.config.functions, functionsParametersList[index]);
184
      }
185

186
      if (toolsParametersList[index]) {
187
        provider.config.tools = provider.config.tools || [];
188
        restoreToolParameters(provider.config.tools, toolsParametersList[index]);
189
      }
190
    });
191
  }
192
  return config;
193
}
194

195
export async function readConfig(configPath: string): Promise<UnifiedConfig> {
196
  const ext = path.parse(configPath).ext;
197
  switch (ext) {
198
    case '.json':
199
    case '.yaml':
200
    case '.yml':
201
      let rawConfig = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
202
      return dereferenceConfig(rawConfig);
203
    case '.js':
204
    case '.cjs':
205
    case '.mjs':
206
      return (await importModule(configPath)) as UnifiedConfig;
207
    default:
208
      throw new Error(`Unsupported configuration file format: ${ext}`);
209
  }
210
}
211

212
/**
213
 * Reads multiple configuration files and combines them into a single UnifiedConfig.
214
 *
215
 * @param {string[]} configPaths - An array of paths to configuration files. Supports glob patterns.
216
 * @returns {Promise<UnifiedConfig>} A promise that resolves to a unified configuration object.
217
 */
218
export async function readConfigs(configPaths: string[]): Promise<UnifiedConfig> {
219
  const configs: UnifiedConfig[] = [];
220
  for (const configPath of configPaths) {
221
    const globPaths = globSync(configPath, {
222
      windowsPathsNoEscape: true,
223
    });
224
    if (globPaths.length === 0) {
225
      throw new Error(`No configuration file found at ${configPath}`);
226
    }
227
    for (const globPath of globPaths) {
228
      const config = await readConfig(globPath);
229
      configs.push(config);
230
    }
231
  }
232

233
  const providers: UnifiedConfig['providers'] = [];
234
  const seenProviders = new Set<string>();
235
  configs.forEach((config) => {
236
    invariant(
237
      typeof config.providers !== 'function',
238
      'Providers cannot be a function for multiple configs',
239
    );
240
    if (typeof config.providers === 'string') {
241
      if (!seenProviders.has(config.providers)) {
242
        providers.push(config.providers);
243
        seenProviders.add(config.providers);
244
      }
245
    } else if (Array.isArray(config.providers)) {
246
      config.providers.forEach((provider) => {
247
        if (!seenProviders.has(JSON.stringify(provider))) {
248
          providers.push(provider);
249
          seenProviders.add(JSON.stringify(provider));
250
        }
251
      });
252
    }
253
  });
254

255
  const tests: UnifiedConfig['tests'] = [];
256
  configs.forEach(async (config) => {
257
    if (typeof config.tests === 'string') {
258
      const newTests = await readTests(config.tests, path.dirname(configPaths[0]));
259
      tests.push(...newTests);
260
    } else if (Array.isArray(config.tests)) {
261
      tests.push(...config.tests);
262
    }
263
  });
264

265
  const configsAreStringOrArray = configs.every(
266
    (config) => typeof config.prompts === 'string' || Array.isArray(config.prompts),
267
  );
268
  const configsAreObjects = configs.every((config) => typeof config.prompts === 'object');
269
  let prompts: UnifiedConfig['prompts'] = configsAreStringOrArray ? [] : {};
270

271
  const makeAbsolute = (configPath: string, relativePath: string) => {
272
    if (relativePath.startsWith('file://')) {
273
      relativePath =
274
        'file://' + path.resolve(path.dirname(configPath), relativePath.slice('file://'.length));
275
    }
276
    return relativePath;
277
  };
278

279
  const seenPrompts = new Set<string>();
280
  configs.forEach((config, idx) => {
281
    if (typeof config.prompts === 'string') {
282
      invariant(Array.isArray(prompts), 'Cannot mix string and map-type prompts');
283
      const absolutePrompt = makeAbsolute(configPaths[idx], config.prompts);
284
      seenPrompts.add(absolutePrompt);
285
    } else if (Array.isArray(config.prompts)) {
286
      invariant(Array.isArray(prompts), 'Cannot mix configs with map and array-type prompts');
287
      config.prompts
288
        .map((prompt) => makeAbsolute(configPaths[idx], prompt))
289
        .forEach((prompt) => seenPrompts.add(prompt));
290
    } else {
291
      // Object format such as { 'prompts/prompt1.txt': 'foo', 'prompts/prompt2.txt': 'bar' }
292
      invariant(typeof prompts === 'object', 'Cannot mix configs with map and array-type prompts');
293
      prompts = { ...prompts, ...config.prompts };
294
    }
295
  });
296
  if (Array.isArray(prompts)) {
297
    prompts.push(...Array.from(seenPrompts));
298
  }
299

300
  // Combine all configs into a single UnifiedConfig
301
  const combinedConfig: UnifiedConfig = {
302
    description: configs.map((config) => config.description).join(', '),
303
    providers,
304
    prompts,
305
    tests,
306
    scenarios: configs.flatMap((config) => config.scenarios || []),
307
    defaultTest: configs.reduce((prev: Partial<TestCase> | undefined, curr) => {
308
      return {
309
        ...prev,
310
        ...curr.defaultTest,
311
        vars: { ...prev?.vars, ...curr.defaultTest?.vars },
312
        assert: [...(prev?.assert || []), ...(curr.defaultTest?.assert || [])],
313
        options: { ...prev?.options, ...curr.defaultTest?.options },
314
      };
315
    }, {}),
316
    nunjucksFilters: configs.reduce((prev, curr) => ({ ...prev, ...curr.nunjucksFilters }), {}),
317
    env: configs.reduce((prev, curr) => ({ ...prev, ...curr.env }), {}),
318
    evaluateOptions: configs.reduce((prev, curr) => ({ ...prev, ...curr.evaluateOptions }), {}),
319
    commandLineOptions: configs.reduce(
320
      (prev, curr) => ({ ...prev, ...curr.commandLineOptions }),
321
      {},
322
    ),
323
    sharing: !configs.some((config) => config.sharing === false),
324
  };
325

326
  return combinedConfig;
327
}
328

329
export function writeMultipleOutputs(
330
  outputPaths: string[],
331
  results: EvaluateSummary,
332
  config: Partial<UnifiedConfig>,
333
  shareableUrl: string | null,
334
): void {
335
  for (const outputPath of outputPaths) {
336
    writeOutput(outputPath, results, config, shareableUrl);
337
  }
338
}
339

340
export function writeOutput(
341
  outputPath: string,
342
  results: EvaluateSummary,
343
  config: Partial<UnifiedConfig>,
344
  shareableUrl: string | null,
345
): void {
346
  const outputExtension = outputPath.split('.').pop()?.toLowerCase();
347

348
  const outputToSimpleString = (output: EvaluateTableOutput) => {
349
    const passFailText = output.pass ? '[PASS]' : '[FAIL]';
350
    const namedScoresText = Object.entries(output.namedScores)
351
      .map(([name, value]) => `${name}: ${value.toFixed(2)}`)
352
      .join(', ');
353
    const scoreText =
354
      namedScoresText.length > 0
355
        ? `(${output.score.toFixed(2)}, ${namedScoresText})`
356
        : `(${output.score.toFixed(2)})`;
357
    const gradingResultText = output.gradingResult
358
      ? `${output.pass ? 'Pass' : 'Fail'} Reason: ${output.gradingResult.reason}`
359
      : '';
360
    return `${passFailText} ${scoreText}
361

362
${output.text}
363

364
${gradingResultText}`.trim();
365
  };
366

367
  // Ensure the directory exists
368
  const outputDir = path.dirname(outputPath);
369
  if (!fs.existsSync(outputDir)) {
370
    fs.mkdirSync(outputDir, { recursive: true });
371
  }
372

373
  if (outputExtension === 'csv') {
374
    const csvOutput = stringify([
375
      [
376
        ...results.table.head.vars,
377
        ...results.table.head.prompts.map((prompt) => JSON.stringify(prompt)),
378
      ],
379
      ...results.table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
380
    ]);
381
    fs.writeFileSync(outputPath, csvOutput);
382
  } else if (outputExtension === 'json') {
383
    fs.writeFileSync(
384
      outputPath,
385
      JSON.stringify({ results, config, shareableUrl } as OutputFile, null, 2),
386
    );
387
  } else if (outputExtension === 'yaml' || outputExtension === 'yml' || outputExtension === 'txt') {
388
    fs.writeFileSync(outputPath, yaml.dump({ results, config, shareableUrl } as OutputFile));
389
  } else if (outputExtension === 'html') {
390
    const template = fs.readFileSync(`${getDirectory()}/tableOutput.html`, 'utf-8');
391
    const table = [
392
      [...results.table.head.vars, ...results.table.head.prompts.map((prompt) => prompt.display)],
393
      ...results.table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
394
    ];
395
    const htmlOutput = getNunjucksEngine().renderString(template, {
396
      config,
397
      table,
398
      results: results.results,
399
    });
400
    fs.writeFileSync(outputPath, htmlOutput);
401
  } else {
402
    throw new Error(
403
      `Unsupported output file format ${outputExtension}, please use csv, txt, json, yaml, yml, html.`,
404
    );
405
  }
406
}
407

408
let configDirectoryPath: string | undefined = process.env.PROMPTFOO_CONFIG_DIR;
409

410
export function getConfigDirectoryPath(): string {
411
  return configDirectoryPath || path.join(os.homedir(), '.promptfoo');
412
}
413

414
export function setConfigDirectoryPath(newPath: string): void {
415
  configDirectoryPath = newPath;
416
}
417

418
/**
419
 * TODO(ian): Remove this
420
 * @deprecated Use readLatestResults directly instead.
421
 */
422
export function getLatestResultsPath(): string {
423
  return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
424
}
425

426
export async function writeResultsToDatabase(
427
  results: EvaluateSummary,
428
  config: Partial<UnifiedConfig>,
429
  createdAt?: Date,
430
): Promise<string> {
431
  createdAt = createdAt || new Date();
432
  const evalId = `eval-${createdAt.toISOString().slice(0, 19)}`;
433
  const db = getDb();
434

435
  const promises = [];
436
  promises.push(
437
    db
438
      .insert(evals)
439
      .values({
440
        id: evalId,
441
        createdAt: createdAt.getTime(),
442
        description: config.description,
443
        config,
444
        results,
445
      })
446
      .onConflictDoNothing()
447
      .run(),
448
  );
449

450
  logger.debug(`Inserting eval ${evalId}`);
451

452
  // Record prompt relation
453
  for (const prompt of results.table.head.prompts) {
454
    const promptId = sha256(prompt.display);
455

456
    promises.push(
457
      db
458
        .insert(prompts)
459
        .values({
460
          id: promptId,
461
          prompt: prompt.display,
462
        })
463
        .onConflictDoNothing()
464
        .run(),
465
    );
466

467
    promises.push(
468
      db
469
        .insert(evalsToPrompts)
470
        .values({
471
          evalId,
472
          promptId,
473
        })
474
        .onConflictDoNothing()
475
        .run(),
476
    );
477

478
    logger.debug(`Inserting prompt ${promptId}`);
479
  }
480

481
  // Record dataset relation
482
  const datasetId = sha256(JSON.stringify(config.tests || []));
483
  promises.push(
484
    db
485
      .insert(datasets)
486
      .values({
487
        id: datasetId,
488
        tests: config.tests,
489
      })
490
      .onConflictDoNothing()
491
      .run(),
492
  );
493

494
  promises.push(
495
    db
496
      .insert(evalsToDatasets)
497
      .values({
498
        evalId,
499
        datasetId,
500
      })
501
      .onConflictDoNothing()
502
      .run(),
503
  );
504

505
  logger.debug(`Inserting dataset ${datasetId}`);
506

507
  logger.debug(`Awaiting ${promises.length} promises to database...`);
508
  await Promise.all(promises);
509

510
  // "touch" db signal path
511
  const filePath = getDbSignalPath();
512
  try {
513
    const now = new Date();
514
    fs.utimesSync(filePath, now, now);
515
  } catch (err) {
516
    fs.closeSync(fs.openSync(filePath, 'w'));
517
  }
518

519
  return evalId;
520
}
521

522
/**
523
 *
524
 * @returns Last 100 evals in descending order.
525
 */
526
export function listPreviousResults(): { evalId: string; description?: string | null }[] {
527
  const db = getDb();
528
  const results = db
529
    .select({
530
      name: evals.id,
531
      description: evals.description,
532
    })
533
    .from(evals)
534
    .orderBy(desc(evals.createdAt))
535
    .limit(100)
536
    .all();
537

538
  return results.map((result) => ({
539
    evalId: result.name,
540
    description: result.description,
541
  }));
542
}
543

544
/**
545
 * @deprecated Used only for migration to sqlite
546
 */
547
export function listPreviousResultFilenames_fileSystem(): string[] {
548
  const directory = path.join(getConfigDirectoryPath(), 'output');
549
  if (!fs.existsSync(directory)) {
550
    return [];
551
  }
552
  const files = fs.readdirSync(directory);
553
  const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
554
  return resultsFiles.sort((a, b) => {
555
    const statA = fs.statSync(path.join(directory, a));
556
    const statB = fs.statSync(path.join(directory, b));
557
    return statA.birthtime.getTime() - statB.birthtime.getTime(); // sort in ascending order
558
  });
559
}
560

561
const resultsCache: { [fileName: string]: ResultsFile | undefined } = {};
562

563
/**
564
 * @deprecated Used only for migration to sqlite
565
 */
566
export function listPreviousResults_fileSystem(): { fileName: string; description?: string }[] {
567
  const directory = path.join(getConfigDirectoryPath(), 'output');
568
  if (!fs.existsSync(directory)) {
569
    return [];
570
  }
571
  const sortedFiles = listPreviousResultFilenames_fileSystem();
572
  return sortedFiles.map((fileName) => {
573
    if (!resultsCache[fileName]) {
574
      try {
575
        const fileContents = fs.readFileSync(path.join(directory, fileName), 'utf8');
576
        const data = yaml.load(fileContents) as ResultsFile;
577
        resultsCache[fileName] = data;
578
      } catch (error) {
579
        logger.warn(`Failed to read results from ${fileName}:\n${error}`);
580
      }
581
    }
582
    return {
583
      fileName,
584
      description: resultsCache[fileName]?.config.description,
585
    };
586
  });
587
}
588

589
let attemptedMigration = false;
590
export async function migrateResultsFromFileSystemToDatabase() {
591
  if (attemptedMigration) {
592
    // TODO(ian): Record this bit in the database.
593
    return;
594
  }
595

596
  // First run db migrations
597
  logger.debug('Running db migrations...');
598
  await runDbMigrations();
599

600
  const fileNames = listPreviousResultFilenames_fileSystem();
601
  if (fileNames.length === 0) {
602
    return;
603
  }
604

605
  logger.info(`🔁 Migrating ${fileNames.length} flat files to local database.`);
606
  logger.info('This is a one-time operation and may take a minute...');
607
  attemptedMigration = true;
608

609
  const outputDir = path.join(getConfigDirectoryPath(), 'output');
610
  const backupDir = `${outputDir}-backup-${new Date()
611
    .toISOString()
612
    .slice(0, 10)
613
    .replace(/-/g, '')}`;
614
  try {
615
    fs.cpSync(outputDir, backupDir, { recursive: true });
616
    logger.info(`Backup of output directory created at ${backupDir}`);
617
  } catch (backupError) {
618
    logger.error(`Failed to create backup of output directory: ${backupError}`);
619
    return;
620
  }
621

622
  logger.info('Moving files into database...');
623
  const migrationPromises = fileNames.map(async (fileName) => {
624
    const fileData = readResult_fileSystem(fileName);
625
    if (fileData) {
626
      await writeResultsToDatabase(
627
        fileData.result.results,
628
        fileData.result.config,
629
        filenameToDate(fileName),
630
      );
631
      logger.debug(`Migrated ${fileName} to database.`);
632
      try {
633
        fs.unlinkSync(path.join(outputDir, fileName));
634
      } catch (err) {
635
        logger.warn(`Failed to delete ${fileName} after migration: ${err}`);
636
      }
637
    } else {
638
      logger.warn(`Failed to migrate result ${fileName} due to read error.`);
639
    }
640
  });
641
  await Promise.all(migrationPromises);
642
  try {
643
    fs.unlinkSync(getLatestResultsPath());
644
  } catch (err) {
645
    logger.warn(`Failed to delete latest.json: ${err}`);
646
  }
647
  logger.info('Migration complete. Please restart your web server if it is running.');
648
}
649

650
const RESULT_HISTORY_LENGTH = parseInt(process.env.RESULT_HISTORY_LENGTH || '', 10) || 100;
651

652
export function cleanupOldFileResults(remaining = RESULT_HISTORY_LENGTH) {
653
  const sortedFilenames = listPreviousResultFilenames_fileSystem();
654
  for (let i = 0; i < sortedFilenames.length - remaining; i++) {
655
    fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFilenames[i]));
656
  }
657
}
658

659
export function filenameToDate(filename: string) {
660
  const dateString = filename.slice('eval-'.length, filename.length - '.json'.length);
661

662
  // Replace hyphens with colons where necessary (Windows compatibility).
663
  const dateParts = dateString.split('T');
664
  const timePart = dateParts[1].replace(/-/g, ':');
665
  const formattedDateString = `${dateParts[0]}T${timePart}`;
666

667
  const date = new Date(formattedDateString);
668
  return date;
669
  /*
670
  return date.toLocaleDateString('en-US', {
671
    year: 'numeric',
672
    month: 'long',
673
    day: 'numeric',
674
    hour: '2-digit',
675
    minute: '2-digit',
676
    second: '2-digit',
677
    timeZoneName: 'short',
678
  });
679
  */
680
}
681

682
export function dateToFilename(date: Date) {
683
  return `eval-${date.toISOString().replace(/:/g, '-')}.json`;
684
}
685

686
export async function readResult(
687
  id: string,
688
): Promise<{ id: string; result: ResultsFile; createdAt: Date } | undefined> {
689
  const db = getDb();
690
  try {
691
    const evalResult = await db
692
      .select({
693
        id: evals.id,
694
        createdAt: evals.createdAt,
695
        results: evals.results,
696
        config: evals.config,
697
      })
698
      .from(evals)
699
      .where(eq(evals.id, id))
700
      .execute();
701

702
    if (evalResult.length === 0) {
703
      return undefined;
704
    }
705

706
    const { id: resultId, createdAt, results, config } = evalResult[0];
707
    const result: ResultsFile = {
708
      version: 3,
709
      createdAt: new Date(createdAt).toISOString().slice(0, 10),
710
      results,
711
      config,
712
    };
713
    return {
714
      id: resultId,
715
      result,
716
      createdAt: new Date(createdAt),
717
    };
718
  } catch (err) {
719
    logger.error(`Failed to read result with ID ${id} from database:\n${err}`);
720
  }
721
}
722

723
/**
724
 * @deprecated Used only for migration to sqlite
725
 */
726
export function readResult_fileSystem(
727
  name: string,
728
): { id: string; result: ResultsFile; createdAt: Date } | undefined {
729
  const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
730
  const resultsPath = path.join(resultsDirectory, name);
731
  try {
732
    const result = JSON.parse(
733
      fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'),
734
    ) as ResultsFile;
735
    const createdAt = filenameToDate(name);
736
    return {
737
      id: sha256(JSON.stringify(result.config)),
738
      result,
739
      createdAt,
740
    };
741
  } catch (err) {
742
    logger.error(`Failed to read results from ${resultsPath}:\n${err}`);
743
  }
744
}
745

746
export async function updateResult(
747
  id: string,
748
  newConfig?: Partial<UnifiedConfig>,
749
  newTable?: EvaluateTable,
750
): Promise<void> {
751
  const db = getDb();
752
  try {
753
    // Fetch the existing eval data from the database
754
    const existingEval = await db
755
      .select({
756
        config: evals.config,
757
        results: evals.results,
758
      })
759
      .from(evals)
760
      .where(eq(evals.id, id))
761
      .limit(1)
762
      .all();
763

764
    if (existingEval.length === 0) {
765
      logger.error(`Eval with ID ${id} not found.`);
766
      return;
767
    }
768

769
    const evalData = existingEval[0];
770
    if (newConfig) {
771
      evalData.config = newConfig;
772
    }
773
    if (newTable) {
774
      evalData.results.table = newTable;
775
    }
776

777
    await db
778
      .update(evals)
779
      .set({
780
        description: evalData.config.description,
781
        config: evalData.config,
782
        results: evalData.results,
783
      })
784
      .where(eq(evals.id, id))
785
      .run();
786

787
    logger.info(`Updated eval with ID ${id}`);
788
  } catch (err) {
789
    logger.error(`Failed to update eval with ID ${id}:\n${err}`);
790
  }
791
}
792

793
export async function readLatestResults(): Promise<ResultsFile | undefined> {
794
  const db = getDb();
795
  const latestResults = await db
796
    .select({
797
      id: evals.id,
798
      createdAt: evals.createdAt,
799
      description: evals.description,
800
      results: evals.results,
801
      config: evals.config,
802
    })
803
    .from(evals)
804
    .orderBy(desc(evals.createdAt))
805
    .limit(1);
806

807
  if (!latestResults || latestResults.length === 0) {
808
    return undefined;
809
  }
810

811
  const latestResult = latestResults[0];
812
  return {
813
    version: 3,
814
    createdAt: new Date(latestResult.createdAt).toISOString(),
815
    results: latestResult.results,
816
    config: latestResult.config,
817
  };
818
}
819

820
export function getPromptsForTestCases(testCases: TestCase[]) {
821
  const testCasesJson = JSON.stringify(testCases);
822
  const testCasesSha256 = sha256(testCasesJson);
823
  return getPromptsForTestCasesHash(testCasesSha256);
824
}
825

826
export function getPromptsForTestCasesHash(testCasesSha256: string) {
827
  return getPromptsWithPredicate((result) => {
828
    const testsJson = JSON.stringify(result.config.tests);
829
    const hash = sha256(testsJson);
830
    return hash === testCasesSha256;
831
  });
832
}
833

834
export function sha256(str: string) {
835
  return createHash('sha256').update(str).digest('hex');
836
}
837

838
export function getPrompts() {
839
  return getPromptsWithPredicate(() => true);
840
}
841

842
export async function getPromptsWithPredicate(
843
  predicate: (result: ResultsFile) => boolean,
844
): Promise<PromptWithMetadata[]> {
845
  // TODO(ian): Make this use a proper database query
846
  const db = getDb();
847
  const evals_ = await db
848
    .select({
849
      id: evals.id,
850
      createdAt: evals.createdAt,
851
      results: evals.results,
852
      config: evals.config,
853
    })
854
    .from(evals)
855
    .limit(100)
856
    .all();
857

858
  const groupedPrompts: { [hash: string]: PromptWithMetadata } = {};
859

860
  for (const eval_ of evals_) {
861
    const createdAt = new Date(eval_.createdAt).toISOString();
862
    const resultWrapper: ResultsFile = {
863
      version: 3,
864
      createdAt,
865
      results: eval_.results,
866
      config: eval_.config,
867
    };
868
    if (predicate(resultWrapper)) {
869
      for (const prompt of resultWrapper.results.table.head.prompts) {
870
        const promptId = sha256(prompt.raw);
871
        const datasetId = resultWrapper.config.tests
872
          ? sha256(JSON.stringify(resultWrapper.config.tests))
873
          : '-';
874
        if (promptId in groupedPrompts) {
875
          groupedPrompts[promptId].recentEvalDate = new Date(
876
            Math.max(
877
              groupedPrompts[promptId].recentEvalDate.getTime(),
878
              new Date(createdAt).getTime(),
879
            ),
880
          );
881
          groupedPrompts[promptId].count += 1;
882
          groupedPrompts[promptId].evals.push({
883
            id: eval_.id,
884
            datasetId,
885
            metrics: prompt.metrics,
886
          });
887
        } else {
888
          groupedPrompts[promptId] = {
889
            count: 1,
890
            id: promptId,
891
            prompt,
892
            recentEvalDate: new Date(createdAt),
893
            recentEvalId: eval_.id,
894
            evals: [
895
              {
896
                id: eval_.id,
897
                datasetId,
898
                metrics: prompt.metrics,
899
              },
900
            ],
901
          };
902
        }
903
      }
904
    }
905
  }
906

907
  return Object.values(groupedPrompts);
908
}
909

910
export async function getTestCases() {
911
  return getTestCasesWithPredicate(() => true);
912
}
913

914
export async function getTestCasesWithPredicate(
915
  predicate: (result: ResultsFile) => boolean,
916
): Promise<TestCasesWithMetadata[]> {
917
  const db = getDb();
918
  const evals_ = await db
919
    .select({
920
      id: evals.id,
921
      createdAt: evals.createdAt,
922
      results: evals.results,
923
      config: evals.config,
924
    })
925
    .from(evals)
926
    .limit(100)
927
    .all();
928

929
  const groupedTestCases: { [hash: string]: TestCasesWithMetadata } = {};
930

931
  for (const eval_ of evals_) {
932
    const createdAt = new Date(eval_.createdAt).toISOString();
933
    const resultWrapper: ResultsFile = {
934
      version: 3,
935
      createdAt,
936
      results: eval_.results,
937
      config: eval_.config,
938
    };
939
    const testCases = resultWrapper.config.tests;
940
    if (testCases && predicate(resultWrapper)) {
941
      const evalId = eval_.id;
942
      const datasetId = sha256(JSON.stringify(testCases));
943
      if (datasetId in groupedTestCases) {
944
        groupedTestCases[datasetId].recentEvalDate = new Date(
945
          Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), eval_.createdAt),
946
        );
947
        groupedTestCases[datasetId].count += 1;
948
        const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
949
          id: sha256(prompt.raw),
950
          prompt,
951
          evalId,
952
        }));
953
        const promptsById: Record<string, TestCasesWithMetadataPrompt> = {};
954
        for (const prompt of groupedTestCases[datasetId].prompts.concat(newPrompts)) {
955
          if (!(prompt.id in promptsById)) {
956
            promptsById[prompt.id] = prompt;
957
          }
958
        }
959
        groupedTestCases[datasetId].prompts = Object.values(promptsById);
960
      } else {
961
        const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
962
          id: sha256(prompt.raw),
963
          prompt,
964
          evalId,
965
        }));
966
        const promptsById: Record<string, TestCasesWithMetadataPrompt> = {};
967
        for (const prompt of newPrompts) {
968
          if (!(prompt.id in promptsById)) {
969
            promptsById[prompt.id] = prompt;
970
          }
971
        }
972
        groupedTestCases[datasetId] = {
973
          id: datasetId,
974
          count: 1,
975
          testCases,
976
          recentEvalDate: new Date(createdAt),
977
          recentEvalId: evalId,
978
          prompts: Object.values(promptsById),
979
        };
980
      }
981
    }
982
  }
983

984
  return Object.values(groupedTestCases);
985
}
986

987
export async function getPromptFromHash(hash: string) {
988
  const prompts = await getPrompts();
989
  for (const prompt of prompts) {
990
    if (prompt.id.startsWith(hash)) {
991
      return prompt;
992
    }
993
  }
994
  return undefined;
995
}
996

997
export async function getDatasetFromHash(hash: string) {
998
  const datasets = await getTestCases();
999
  for (const dataset of datasets) {
1000
    if (dataset.id.startsWith(hash)) {
1001
      return dataset;
1002
    }
1003
  }
1004
  return undefined;
1005
}
1006

1007
export async function getEvals() {
1008
  return getEvalsWithPredicate(() => true);
1009
}
1010

1011
export async function getEvalFromHash(hash: string) {
1012
  const evals_ = await getEvals();
1013
  for (const eval_ of evals_) {
1014
    if (eval_.id.startsWith(hash)) {
1015
      return eval_;
1016
    }
1017
  }
1018
  return undefined;
1019
}
1020

1021
export async function getEvalsWithPredicate(
1022
  predicate: (result: ResultsFile) => boolean,
1023
): Promise<EvalWithMetadata[]> {
1024
  const db = getDb();
1025
  const evals_ = await db
1026
    .select({
1027
      id: evals.id,
1028
      createdAt: evals.createdAt,
1029
      results: evals.results,
1030
      config: evals.config,
1031
    })
1032
    .from(evals)
1033
    .limit(100)
1034
    .all();
1035

1036
  const ret: EvalWithMetadata[] = [];
1037

1038
  for (const eval_ of evals_) {
1039
    const createdAt = new Date(eval_.createdAt).toISOString();
1040
    const resultWrapper: ResultsFile = {
1041
      version: 3,
1042
      createdAt: createdAt,
1043
      results: eval_.results,
1044
      config: eval_.config,
1045
    };
1046
    if (predicate(resultWrapper)) {
1047
      const evalId = eval_.id;
1048
      ret.push({
1049
        id: evalId,
1050
        date: new Date(eval_.createdAt),
1051
        config: eval_.config,
1052
        results: eval_.results,
1053
      });
1054
    }
1055
  }
1056

1057
  return ret;
1058
}
1059

1060
export async function readFilters(
1061
  filters: Record<string, string>,
1062
): Promise<NunjucksFilterMap> {
1063
  const ret: NunjucksFilterMap = {};
1064
  const basePath = cliState.basePath || '';
1065
  for (const [name, filterPath] of Object.entries(filters)) {
1066
    const globPath = path.join(basePath, filterPath);
1067
    const filePaths = globSync(globPath, {
1068
      windowsPathsNoEscape: true,
1069
    });
1070
    for (const filePath of filePaths) {
1071
      const finalPath = path.resolve(filePath);
1072
      ret[name] = await importModule(finalPath);
1073
    }
1074
  }
1075
  return ret;
1076
}
1077

1078
export function getNunjucksEngine(filters?: NunjucksFilterMap) {
1079
  if (process.env.PROMPTFOO_DISABLE_TEMPLATING) {
1080
    return {
1081
      renderString: (template: string) => template,
1082
    };
1083
  }
1084

1085
  const env = nunjucks.configure({
1086
    autoescape: false,
1087
  });
1088

1089
  if (filters) {
1090
    for (const [name, filter] of Object.entries(filters)) {
1091
      env.addFilter(name, filter);
1092
    }
1093
  }
1094
  return env;
1095
}
1096

1097
export function printBorder() {
1098
  const border = '='.repeat((process.stdout.columns || 80) - 10);
1099
  logger.info(border);
1100
}
1101

1102
export async function transformOutput(
1103
  codeOrFilepath: string,
1104
  output: string | object | undefined,
1105
  context: { vars?: Record<string, string | object | undefined>; prompt: Partial<Prompt> },
1106
) {
1107
  let postprocessFn;
1108
  if (codeOrFilepath.startsWith('file://')) {
1109
    const filePath = codeOrFilepath.slice('file://'.length);
1110
    if (
1111
      codeOrFilepath.endsWith('.js') ||
1112
      codeOrFilepath.endsWith('.cjs') ||
1113
      codeOrFilepath.endsWith('.mjs')
1114
    ) {
1115
      const requiredModule = await importModule(filePath);
1116
      if (typeof requiredModule === 'function') {
1117
        postprocessFn = requiredModule;
1118
      } else if (requiredModule.default && typeof requiredModule.default === 'function') {
1119
        postprocessFn = requiredModule.default;
1120
      } else {
1121
        throw new Error(
1122
          `Transform ${filePath} must export a function or have a default export as a function`,
1123
        );
1124
      }
1125
    } else if (codeOrFilepath.endsWith('.py')) {
1126
      postprocessFn = async (
1127
        output: string,
1128
        context: { vars: Record<string, string | object> },
1129
      ) => {
1130
        return runPython(filePath, 'get_transform', [output, context]);
1131
      };
1132
    } else {
1133
      throw new Error(`Unsupported transform file format: ${codeOrFilepath}`);
1134
    }
1135
  } else {
1136
    postprocessFn = new Function(
1137
      'output',
1138
      'context',
1139
      codeOrFilepath.includes('\n') ? codeOrFilepath : `return ${codeOrFilepath}`,
1140
    );
1141
  }
1142
  const ret = await Promise.resolve(postprocessFn(output, context));
1143
  if (ret == null) {
1144
    throw new Error(`Transform function did not return a value\n\n${codeOrFilepath}`);
1145
  }
1146
  return ret;
1147
}
1148

1149
export type StandaloneEval = CompletedPrompt & {
1150
  evalId: string;
1151
  datasetId: string | null;
1152
  promptId: string | null;
1153
};
1154
export function getStandaloneEvals(): StandaloneEval[] {
1155
  const db = getDb();
1156
  const results = db
1157
    .select({
1158
      evalId: evals.id,
1159
      description: evals.description,
1160
      config: evals.config,
1161
      results: evals.results,
1162
      promptId: evalsToPrompts.promptId,
1163
      datasetId: evalsToDatasets.datasetId,
1164
    })
1165
    .from(evals)
1166
    .leftJoin(evalsToPrompts, eq(evals.id, evalsToPrompts.evalId))
1167
    .leftJoin(evalsToDatasets, eq(evals.id, evalsToDatasets.evalId))
1168
    .orderBy(desc(evals.createdAt))
1169
    .limit(100)
1170
    .all();
1171

1172
  const flatResults: StandaloneEval[] = [];
1173
  results.forEach((result) => {
1174
    const table = result.results.table;
1175
    table.head.prompts.forEach((col) => {
1176
      flatResults.push({
1177
        evalId: result.evalId,
1178
        promptId: result.promptId,
1179
        datasetId: result.datasetId,
1180
        ...col,
1181
      });
1182
    });
1183
  });
1184
  return flatResults;
1185
}
1186

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.