1
import chalk from 'chalk';
2
import { Command } from 'commander';
4
import { getEvalFromHash, getPromptFromHash, getDatasetFromHash, printBorder } from '../util';
5
import { generateTable, wrapTable } from '../table';
6
import logger from '../logger';
7
import telemetry from '../telemetry';
9
export async function showCommand(program: Command) {
10
const showCommand = program
12
.description('Show details of a specific resource')
13
.action(async (id: string) => {
14
const evl = await getEvalFromHash(id);
16
return handleEval(id);
19
const prompt = await getPromptFromHash(id);
21
return handlePrompt(id);
24
const dataset = await getDatasetFromHash(id);
26
return handleDataset(id);
29
logger.error(`No resource found with ID ${id}`);
34
.description('Show details of a specific evaluation')
38
.command('prompt <id>')
39
.description('Show details of a specific prompt')
40
.action(handlePrompt);
43
.command('dataset <id>')
44
.description('Show details of a specific dataset')
45
.action(handleDataset);
48
async function handleEval(id: string) {
49
telemetry.maybeShowNotice();
50
telemetry.record('command_used', {
53
await telemetry.send();
55
const evl = await getEvalFromHash(id);
57
logger.error(`No evaluation found with ID ${id}`);
61
const { prompts, vars } = evl.results.table.head;
62
logger.info(generateTable(evl.results, 100, 25));
63
if (evl.results.table.body.length > 25) {
64
const rowsLeft = evl.results.table.body.length - 25;
65
logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
69
logger.info(chalk.cyan(`Eval ${id}`));
71
// TODO(ian): List prompt ids
72
logger.info(`${prompts.length} prompts`);
74
`${vars.length} variables: ${vars.slice(0, 5).join(', ')}${
75
vars.length > 5 ? ` (and ${vars.length - 5} more...)` : ''
80
async function handlePrompt(id: string) {
81
telemetry.maybeShowNotice();
82
telemetry.record('command_used', {
85
await telemetry.send();
87
const prompt = await getPromptFromHash(id);
89
logger.error(`Prompt with ID ${id} not found.`);
94
logger.info(chalk.cyan(prompt.prompt.raw));
96
logger.info(chalk.bold(`Prompt ${id}`));
99
logger.info(`This prompt is used in the following evals:`);
101
for (const evl of prompt.evals.sort((a, b) => b.id.localeCompare(a.id)).slice(0, 10)) {
103
'Eval ID': evl.id.slice(0, 6),
104
'Dataset ID': evl.datasetId.slice(0, 6),
105
'Raw score': evl.metrics?.score.toFixed(2) || '-',
107
evl.metrics && evl.metrics.testPassCount + evl.metrics.testFailCount > 0
109
(evl.metrics.testPassCount /
110
(evl.metrics.testPassCount + evl.metrics.testFailCount)) *
114
'Pass count': evl.metrics?.testPassCount || '-',
115
'Fail count': evl.metrics?.testFailCount || '-',
118
logger.info(wrapTable(table));
121
`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
124
`Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`,
128
async function handleDataset(id: string) {
129
telemetry.maybeShowNotice();
130
telemetry.record('command_used', {
131
name: 'show dataset',
133
await telemetry.send();
135
const dataset = await getDatasetFromHash(id);
137
logger.error(`Dataset with ID ${id} not found.`);
142
logger.info(chalk.bold(`Dataset ${id}`));
145
logger.info(`This dataset is used in the following evals:`);
147
for (const prompt of dataset.prompts
148
.sort((a, b) => b.evalId.localeCompare(a.evalId))
151
'Eval ID': prompt.evalId.slice(0, 6),
152
'Prompt ID': prompt.id.slice(0, 6),
153
'Raw score': prompt.prompt.metrics?.score.toFixed(2) || '-',
155
prompt.prompt.metrics &&
156
prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount > 0
158
(prompt.prompt.metrics.testPassCount /
159
(prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount)) *
163
'Pass count': prompt.prompt.metrics?.testPassCount || '-',
164
'Fail count': prompt.prompt.metrics?.testFailCount || '-',
167
logger.info(wrapTable(table));
170
`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`,
173
`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,