Skip to content

Commit 2b53cc0

Browse files
authored
feat(ai-builder): CSV input for evals (no-changelog) (#21150)
1 parent 85fb6e4 commit 2b53cc0

File tree

5 files changed

+163
-9
lines changed

5 files changed

+163
-9
lines changed

packages/@n8n/ai-workflow-builder.ee/evaluations/cli/runner.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,19 @@ import {
2121
import { formatHeader, saveEvaluationResults } from '../utils/evaluation-helpers.js';
2222
import { generateMarkdownReport } from '../utils/evaluation-reporter.js';
2323

24+
type CliEvaluationOptions = {
25+
testCaseFilter?: string; // Optional test case ID to run only a specific test
26+
testCases?: TestCase[]; // Optional array of test cases to run (if not provided, uses defaults and generation)
27+
repetitions?: number; // Number of times to run each test (e.g. for cache warming analysis)
28+
};
29+
2430
/**
2531
* Main CLI evaluation runner that executes all test cases in parallel
2632
* Supports concurrency control via EVALUATION_CONCURRENCY environment variable
27-
* @param testCaseFilter - Optional test case ID to run only a specific test
28-
* @param repetitions - Number of times to run each test (for cache warming analysis)
2933
*/
30-
export async function runCliEvaluation(
31-
testCaseFilter?: string,
32-
repetitions: number = 1,
33-
): Promise<void> {
34+
export async function runCliEvaluation(options: CliEvaluationOptions = {}): Promise<void> {
35+
const { repetitions = 1, testCaseFilter } = options;
36+
3437
console.log(formatHeader('AI Workflow Builder Full Evaluation', 70));
3538
if (repetitions > 1) {
3639
console.log(pc.yellow(`➔ Each test will be run ${repetitions} times for cache analysis`));
@@ -41,7 +44,14 @@ export async function runCliEvaluation(
4144
const { parsedNodeTypes, llm, tracer } = await setupTestEnvironment();
4245

4346
// Determine test cases to run
44-
let testCases: TestCase[] = basicTestCases;
47+
const providedTestCases =
48+
options.testCases && options.testCases.length > 0 ? options.testCases : undefined;
49+
50+
let testCases: TestCase[] = providedTestCases ?? basicTestCases;
51+
52+
if (providedTestCases) {
53+
console.log(pc.blue(`➔ Loaded ${providedTestCases.length} test cases from CSV`));
54+
}
4555

4656
// Filter to single test case if specified
4757
if (testCaseFilter) {
@@ -56,7 +66,7 @@ export async function runCliEvaluation(
5666
}
5767
} else {
5868
// Optionally generate additional test cases
59-
if (shouldGenerateTestCases()) {
69+
if (!providedTestCases && shouldGenerateTestCases()) {
6070
console.log(pc.blue('➔ Generating additional test cases...'));
6171
const generatedCases = await generateTestCases(llm, howManyTestCasesToGenerate());
6272
testCases = [...testCases, ...generatedCases];

packages/@n8n/ai-workflow-builder.ee/evaluations/index.ts

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { runCliEvaluation } from './cli/runner.js';
22
import { runLangsmithEvaluation } from './langsmith/runner.js';
3+
import { loadTestCasesFromCsv } from './utils/csv-prompt-loader.js';
34

45
// Re-export for external use if needed
56
export { runCliEvaluation } from './cli/runner.js';
@@ -19,6 +20,13 @@ async function main(): Promise<void> {
1920
? process.argv[process.argv.indexOf('--test-case') + 1]
2021
: undefined;
2122

23+
// Parse command line argument for CSV prompts file path
24+
const promptsCsvPath = getFlagValue('--prompts-csv') ?? process.env.PROMPTS_CSV_FILE;
25+
26+
if (promptsCsvPath && useLangsmith) {
27+
console.warn('CSV-driven evaluations are only supported in CLI mode. Ignoring --prompts-csv.');
28+
}
29+
2230
// Parse command line arguments for a number of repetitions (applies to both modes)
2331
const repetitionsArg = process.argv.includes('--repetitions')
2432
? parseInt(process.argv[process.argv.indexOf('--repetitions') + 1], 10)
@@ -28,10 +36,33 @@ async function main(): Promise<void> {
2836
if (useLangsmith) {
2937
await runLangsmithEvaluation(repetitions);
3038
} else {
31-
await runCliEvaluation(testCaseId, repetitions);
39+
const csvTestCases = promptsCsvPath ? loadTestCasesFromCsv(promptsCsvPath) : undefined;
40+
await runCliEvaluation({ testCases: csvTestCases, testCaseFilter: testCaseId, repetitions });
3241
}
3342
}
3443

44+
function getFlagValue(flag: string): string | undefined {
45+
const exactMatchIndex = process.argv.findIndex((arg) => arg === flag);
46+
if (exactMatchIndex !== -1) {
47+
const value = process.argv[exactMatchIndex + 1];
48+
if (!value || value.startsWith('--')) {
49+
throw new Error(`Flag ${flag} requires a value`);
50+
}
51+
return value;
52+
}
53+
54+
const withValue = process.argv.find((arg) => arg.startsWith(`${flag}=`));
55+
if (withValue) {
56+
const value = withValue.slice(flag.length + 1);
57+
if (!value) {
58+
throw new Error(`Flag ${flag} requires a value`);
59+
}
60+
return value;
61+
}
62+
63+
return undefined;
64+
}
65+
3566
// Run if called directly
3667
if (require.main === module) {
3768
main().catch(console.error);
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import { parse } from 'csv-parse/sync';
2+
import { existsSync, readFileSync } from 'node:fs';
3+
import path from 'node:path';
4+
5+
import type { TestCase } from '../types/evaluation.js';
6+
7+
type ParsedCsvRow = string[];
8+
9+
function isHeaderRow(row: ParsedCsvRow) {
10+
return row.some((cell) => cell.trim().toLowerCase() === 'prompt');
11+
}
12+
13+
function detectColumnIndex(header: ParsedCsvRow, name: string) {
14+
const normalized = name.toLowerCase();
15+
const index = header.findIndex((cell) => cell.trim().toLowerCase() === normalized);
16+
return index >= 0 ? index : undefined;
17+
}
18+
19+
function sanitizeValue(value: string | undefined) {
20+
return value?.trim() ?? '';
21+
}
22+
23+
function generateNameFromPrompt(prompt: string, index: number) {
24+
const normalized = prompt.replace(/\s+/g, ' ').trim();
25+
if (!normalized) {
26+
return `CSV Prompt ${index + 1}`;
27+
}
28+
29+
const maxLength = 60;
30+
if (normalized.length <= maxLength) {
31+
return normalized;
32+
}
33+
34+
return `${normalized.slice(0, maxLength - 3)}...`;
35+
}
36+
37+
function parseCsv(content: string): ParsedCsvRow[] {
38+
try {
39+
const rows = parse(content.replace(/^\ufeff/, ''), {
40+
columns: false,
41+
skip_empty_lines: true,
42+
trim: true,
43+
relax_column_count: true,
44+
}) as ParsedCsvRow[];
45+
46+
return rows.map((row) => row.map((cell) => cell ?? ''));
47+
} catch (error) {
48+
const message = error instanceof Error ? error.message : 'Unknown parsing error';
49+
throw new Error(`Failed to parse CSV file: ${message}`);
50+
}
51+
}
52+
53+
export function loadTestCasesFromCsv(csvPath: string): TestCase[] {
54+
const resolvedPath = path.isAbsolute(csvPath) ? csvPath : path.resolve(process.cwd(), csvPath);
55+
56+
if (!existsSync(resolvedPath)) {
57+
throw new Error(`CSV file not found at ${resolvedPath}`);
58+
}
59+
60+
const fileContents = readFileSync(resolvedPath, 'utf8');
61+
const rows = parseCsv(fileContents);
62+
63+
if (rows.length === 0) {
64+
throw new Error('The provided CSV file is empty');
65+
}
66+
67+
let header: ParsedCsvRow | undefined;
68+
let dataRows = rows;
69+
70+
if (isHeaderRow(rows[0])) {
71+
header = rows[0]!;
72+
dataRows = rows.slice(1);
73+
}
74+
75+
if (dataRows.length === 0) {
76+
throw new Error('No prompt rows found in the provided CSV file');
77+
}
78+
79+
const promptIndex = header ? (detectColumnIndex(header, 'prompt') ?? 0) : 0;
80+
const idIndex = header ? detectColumnIndex(header, 'id') : undefined;
81+
const nameIndex = header
82+
? (detectColumnIndex(header, 'name') ?? detectColumnIndex(header, 'title'))
83+
: undefined;
84+
85+
const testCases = dataRows
86+
.map<TestCase | undefined>((row, index) => {
87+
const prompt = sanitizeValue(row[promptIndex]);
88+
if (!prompt) {
89+
return undefined;
90+
}
91+
92+
const idSource = sanitizeValue(idIndex !== undefined ? row[idIndex] : undefined);
93+
const nameSource = sanitizeValue(nameIndex !== undefined ? row[nameIndex] : undefined);
94+
95+
return {
96+
id: idSource || `csv-case-${index + 1}`,
97+
name: nameSource || generateNameFromPrompt(prompt, index),
98+
prompt,
99+
};
100+
})
101+
.filter((testCase): testCase is TestCase => testCase !== undefined);
102+
103+
if (testCases.length === 0) {
104+
throw new Error('No valid prompts found in the provided CSV file');
105+
}
106+
107+
return testCases;
108+
}

packages/@n8n/ai-workflow-builder.ee/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"deps:orphans": "madge src/index.ts --orphans",
2222
"deps:all": "pnpm run deps:graph && pnpm run deps:graph:service && pnpm run deps:graph:tools && pnpm run deps:circular && pnpm run deps:report",
2323
"eval": "tsx evaluations",
24+
"eval:csv": "tsx evaluations --prompts-csv",
2425
"eval:langsmith": "USE_LANGSMITH_EVAL=true tsx evaluations",
2526
"eval:generate": "GENERATE_TEST_CASES=true tsx evaluations"
2627
},
@@ -46,6 +47,7 @@
4647
"@n8n/config": "workspace:*",
4748
"@n8n/di": "workspace:*",
4849
"@n8n_io/ai-assistant-sdk": "catalog:",
50+
"csv-parse": "5.5.0",
4951
"langsmith": "^0.3.45",
5052
"lodash": "catalog:",
5153
"n8n-workflow": "workspace:*",

pnpm-lock.yaml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)