promptfoo

show.ts
175 строк · 5.1 Кб
Перенос по словам
1
import chalk from 'chalk';
2
import { Command } from 'commander';
3

4
import { getEvalFromHash, getPromptFromHash, getDatasetFromHash, printBorder } from '../util';
5
import { generateTable, wrapTable } from '../table';
6
import logger from '../logger';
7
import telemetry from '../telemetry';
8

9
export async function showCommand(program: Command) {
10
  const showCommand = program
11
    .command('show <id>')
12
    .description('Show details of a specific resource')
13
    .action(async (id: string) => {
14
      const evl = await getEvalFromHash(id);
15
      if (evl) {
16
        return handleEval(id);
17
      }
18

19
      const prompt = await getPromptFromHash(id);
20
      if (prompt) {
21
        return handlePrompt(id);
22
      }
23

24
      const dataset = await getDatasetFromHash(id);
25
      if (dataset) {
26
        return handleDataset(id);
27
      }
28

29
      logger.error(`No resource found with ID ${id}`);
30
    });
31

32
  showCommand
33
    .command('eval <id>')
34
    .description('Show details of a specific evaluation')
35
    .action(handleEval);
36

37
  showCommand
38
    .command('prompt <id>')
39
    .description('Show details of a specific prompt')
40
    .action(handlePrompt);
41

42
  showCommand
43
    .command('dataset <id>')
44
    .description('Show details of a specific dataset')
45
    .action(handleDataset);
46
}
47

48
async function handleEval(id: string) {
49
  telemetry.maybeShowNotice();
50
  telemetry.record('command_used', {
51
    name: 'show eval',
52
  });
53
  await telemetry.send();
54

55
  const evl = await getEvalFromHash(id);
56
  if (!evl) {
57
    logger.error(`No evaluation found with ID ${id}`);
58
    return;
59
  }
60

61
  const { prompts, vars } = evl.results.table.head;
62
  logger.info(generateTable(evl.results, 100, 25));
63
  if (evl.results.table.body.length > 25) {
64
    const rowsLeft = evl.results.table.body.length - 25;
65
    logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
66
  }
67

68
  printBorder();
69
  logger.info(chalk.cyan(`Eval ${id}`));
70
  printBorder();
71
  // TODO(ian): List prompt ids
72
  logger.info(`${prompts.length} prompts`);
73
  logger.info(
74
    `${vars.length} variables: ${vars.slice(0, 5).join(', ')}${
75
      vars.length > 5 ? ` (and ${vars.length - 5} more...)` : ''
76
    }`,
77
  );
78
}
79

80
async function handlePrompt(id: string) {
81
  telemetry.maybeShowNotice();
82
  telemetry.record('command_used', {
83
    name: 'show prompt',
84
  });
85
  await telemetry.send();
86

87
  const prompt = await getPromptFromHash(id);
88
  if (!prompt) {
89
    logger.error(`Prompt with ID ${id} not found.`);
90
    return;
91
  }
92

93
  printBorder();
94
  logger.info(chalk.cyan(prompt.prompt.raw));
95
  printBorder();
96
  logger.info(chalk.bold(`Prompt ${id}`));
97
  printBorder();
98

99
  logger.info(`This prompt is used in the following evals:`);
100
  const table = [];
101
  for (const evl of prompt.evals.sort((a, b) => b.id.localeCompare(a.id)).slice(0, 10)) {
102
    table.push({
103
      'Eval ID': evl.id.slice(0, 6),
104
      'Dataset ID': evl.datasetId.slice(0, 6),
105
      'Raw score': evl.metrics?.score.toFixed(2) || '-',
106
      'Pass rate':
107
        evl.metrics && evl.metrics.testPassCount + evl.metrics.testFailCount > 0
108
          ? `${(
109
              (evl.metrics.testPassCount /
110
                (evl.metrics.testPassCount + evl.metrics.testFailCount)) *
111
              100
112
            ).toFixed(2)}%`
113
          : '-',
114
      'Pass count': evl.metrics?.testPassCount || '-',
115
      'Fail count': evl.metrics?.testFailCount || '-',
116
    });
117
  }
118
  logger.info(wrapTable(table));
119
  printBorder();
120
  logger.info(
121
    `Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
122
  );
123
  logger.info(
124
    `Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`,
125
  );
126
}
127

128
async function handleDataset(id: string) {
129
  telemetry.maybeShowNotice();
130
  telemetry.record('command_used', {
131
    name: 'show dataset',
132
  });
133
  await telemetry.send();
134

135
  const dataset = await getDatasetFromHash(id);
136
  if (!dataset) {
137
    logger.error(`Dataset with ID ${id} not found.`);
138
    return;
139
  }
140

141
  printBorder();
142
  logger.info(chalk.bold(`Dataset ${id}`));
143
  printBorder();
144

145
  logger.info(`This dataset is used in the following evals:`);
146
  const table = [];
147
  for (const prompt of dataset.prompts
148
    .sort((a, b) => b.evalId.localeCompare(a.evalId))
149
    .slice(0, 10)) {
150
    table.push({
151
      'Eval ID': prompt.evalId.slice(0, 6),
152
      'Prompt ID': prompt.id.slice(0, 6),
153
      'Raw score': prompt.prompt.metrics?.score.toFixed(2) || '-',
154
      'Pass rate':
155
        prompt.prompt.metrics &&
156
        prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount > 0
157
          ? `${(
158
              (prompt.prompt.metrics.testPassCount /
159
                (prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount)) *
160
              100
161
            ).toFixed(2)}%`
162
          : '-',
163
      'Pass count': prompt.prompt.metrics?.testPassCount || '-',
164
      'Fail count': prompt.prompt.metrics?.testFailCount || '-',
165
    });
166
  }
167
  logger.info(wrapTable(table));
168
  printBorder();
169
  logger.info(
170
    `Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`,
171
  );
172
  logger.info(
173
    `Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
174
  );
175
}
176
promptfoo

Использование cookies