1
import * as path from 'path';
2
import * as fs from 'fs';
4
import yaml from 'js-yaml';
5
import { parse as parsePath } from 'path';
6
import { parse as parseCsv } from 'csv-parse/sync';
7
import { globSync } from 'glob';
9
import logger from './logger';
10
import { fetchCsvFromGoogleSheet } from './fetch';
11
import { OpenAiChatCompletionProvider } from './providers/openai';
12
import { testCaseFromCsvRow } from './csv';
24
const SYNTHESIZE_DEFAULT_PROVIDER = 'gpt-4-0125-preview';
26
function parseJson(json: string): any | undefined {
28
return JSON.parse(json);
34
export async function readVarsFiles(
35
pathOrGlobs: string | string[],
36
basePath: string = '',
37
): Promise<Record<string, string | string[] | object>> {
38
if (typeof pathOrGlobs === 'string') {
39
pathOrGlobs = [pathOrGlobs];
42
const ret: Record<string, string | string[] | object> = {};
43
for (const pathOrGlob of pathOrGlobs) {
44
const resolvedPath = path.resolve(basePath, pathOrGlob);
45
const paths = globSync(resolvedPath, {
46
windowsPathsNoEscape: true,
49
for (const p of paths) {
50
const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
51
Object.assign(ret, yamlData);
58
export async function readStandaloneTestsFile(
60
basePath: string = '',
61
): Promise<TestCase[]> {
64
const resolvedVarsPath = path.resolve(basePath, varsPath);
65
const fileExtension = parsePath(varsPath).ext.slice(1);
66
let rows: CsvRow[] = [];
68
if (varsPath.startsWith('https://docs.google.com/spreadsheets/')) {
69
const csvData = await fetchCsvFromGoogleSheet(varsPath);
70
rows = parseCsv(csvData, { columns: true });
71
} else if (fileExtension === 'csv') {
72
rows = parseCsv(fs.readFileSync(resolvedVarsPath, 'utf-8'), { columns: true });
73
} else if (fileExtension === 'json') {
74
rows = parseJson(fs.readFileSync(resolvedVarsPath, 'utf-8'));
75
} else if (fileExtension === 'yaml' || fileExtension === 'yml') {
76
rows = yaml.load(fs.readFileSync(resolvedVarsPath, 'utf-8')) as unknown as any;
79
return rows.map((row, idx) => {
80
const test = testCaseFromCsvRow(row);
81
test.description = `Row #${idx + 1}`;
86
type TestCaseWithVarsFile = TestCase<
87
Record<string, string | string[] | object> | string | string[]
89
export async function readTest(
90
test: string | TestCaseWithVarsFile,
91
basePath: string = '',
93
const loadTestWithVars = async (
94
testCase: TestCaseWithVarsFile,
96
): Promise<TestCase> => {
97
const ret: TestCase = { ...testCase, vars: undefined };
98
if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
99
ret.vars = await readVarsFiles(testCase.vars, testBasePath);
101
ret.vars = testCase.vars;
123
let testCase: TestCase;
125
if (typeof test === 'string') {
126
const testFilePath = path.resolve(basePath, test);
127
const testBasePath = path.dirname(testFilePath);
128
const rawTestCase = yaml.load(fs.readFileSync(testFilePath, 'utf-8')) as TestCaseWithVarsFile;
129
testCase = await loadTestWithVars(rawTestCase, testBasePath);
131
testCase = await loadTestWithVars(test, basePath);
135
if (!testCase.assert && !testCase.vars && !testCase.options) {
137
`Test case must have either assert, vars, or options property. Instead got ${JSON.stringify(
148
export async function readTests(
149
tests: TestSuiteConfig['tests'],
150
basePath: string = '',
151
): Promise<TestCase[]> {
152
const ret: TestCase[] = [];
154
const loadTestsFromGlob = async (loadTestsGlob: string) => {
155
const resolvedPath = path.resolve(basePath, loadTestsGlob);
156
const testFiles = globSync(resolvedPath, {
157
windowsPathsNoEscape: true,
160
for (const testFile of testFiles) {
161
let testCases: TestCase[] | undefined;
162
if (testFile.endsWith('.csv')) {
163
testCases = await readStandaloneTestsFile(testFile, basePath);
164
} else if (testFile.endsWith('.yaml') || testFile.endsWith('.yml')) {
165
testCases = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
166
} else if (testFile.endsWith('.json')) {
167
testCases = require(testFile);
169
throw new Error(`Unsupported file type for test file: ${testFile}`);
172
for (const testCase of testCases) {
173
ret.push(await readTest(testCase, path.dirname(testFile)));
180
if (typeof tests === 'string') {
181
if (tests.endsWith('yaml') || tests.endsWith('yml')) {
183
return loadTestsFromGlob(tests);
186
return readStandaloneTestsFile(tests, basePath);
188
} else if (Array.isArray(tests)) {
189
for (const globOrTest of tests) {
190
if (typeof globOrTest === 'string') {
192
ret.push(...(await loadTestsFromGlob(globOrTest)));
195
ret.push(await readTest(globOrTest, basePath));
203
interface SynthesizeOptions {
205
instructions?: string;
207
numPersonas?: number;
208
numTestCasesPerPersona?: number;
211
export async function synthesizeFromTestSuite(
212
testSuite: TestSuite,
213
options: Partial<SynthesizeOptions>,
217
prompts: testSuite.prompts.map((prompt) => prompt.raw),
218
tests: testSuite.tests || [],
222
export async function synthesize({
227
numTestCasesPerPersona,
228
}: SynthesizeOptions) {
229
if (prompts.length < 1) {
230
throw new Error('Dataset synthesis requires at least one prompt.');
233
numPersonas = numPersonas || 5;
234
numTestCasesPerPersona = numTestCasesPerPersona || 3;
237
if (process.env.LOG_LEVEL !== 'debug') {
238
const cliProgress = await import('cli-progress');
239
progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
240
const totalProgressSteps = 1 + numPersonas * numTestCasesPerPersona;
241
progressBar.start(totalProgressSteps, 0);
245
`Starting dataset synthesis. We'll begin by generating up to ${numPersonas} personas. Each persona will be used to generate ${numTestCasesPerPersona} test cases.`,
249
logger.debug(`\nGenerating user personas from ${prompts.length} prompts...`);
250
const provider = new OpenAiChatCompletionProvider(SYNTHESIZE_DEFAULT_PROVIDER, {
258
const promptsString = `<Prompts>
259
${prompts.map((prompt) => `<Prompt>\n${prompt}\n</Prompt>`).join('\n')}
261
const resp = await provider.callApi(
262
`Consider the following prompt${prompts.length > 1 ? 's' : ''} for an LLM application:
265
List up to ${numPersonas} user personas that would send ${
266
prompts.length > 1 ? 'these prompts' : 'this prompt'
267
}. Your response should be JSON of the form {personas: string[]}`,
270
const personas = (JSON.parse(resp.output as string) as { personas: string[] }).personas;
272
`\nGenerated ${personas.length} personas:\n${personas.map((p) => ` - ${p}`).join('\n')}`,
276
progressBar.increment();
280
const variableRegex = /{{\s*(\w+)\s*}}/g;
281
const variables = new Set();
282
for (const prompt of prompts) {
284
while ((match = variableRegex.exec(prompt)) !== null) {
285
variables.add(match[1]);
289
`\nExtracted ${variables.size} variables from prompts:\n${Array.from(variables)
290
.map((v) => ` - ${v}`)
294
const existingTests =
295
`Here are some existing tests:` +
302
${JSON.stringify(test.vars, null, 2)}
311
const testCaseVars: VarMapping[] = [];
312
for (let i = 0; i < personas.length; i++) {
313
const persona = personas[i];
314
logger.debug(`\nGenerating test cases for persona ${i + 1}...`);
316
const personaPrompt = `Consider ${
317
prompts.length > 1 ? 'these prompts' : 'this prompt'
318
}, which contains some {{variables}}:
328
Fully embody this persona and determine a value for each variable, such that the prompt would be sent by this persona.
330
You are a tester, so try to think of ${numTestCasesPerPersona} sets of values that would be interesting or unusual to test. ${
334
Your response should contain a JSON map of variable names to values, of the form {vars: {${Array.from(
337
.map((varName) => `${varName}: string`)
340
const personaResponse = await provider.callApi(personaPrompt);
341
const parsed = JSON.parse(personaResponse.output as string) as {
344
for (const vars of parsed.vars) {
345
logger.debug(`${JSON.stringify(vars, null, 2)}`);
346
testCaseVars.push(vars);
348
progressBar.increment();
358
const uniqueTestCaseStrings = new Set(testCaseVars.map((testCase) => JSON.stringify(testCase)));
359
const dedupedTestCaseVars: VarMapping[] = Array.from(uniqueTestCaseStrings).map((testCase) =>
360
JSON.parse(testCase),
362
return dedupedTestCaseVars;