From b2b164bc7d7fbc10a873b7f670f53c863dcfd3d6 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 26 Jun 2026 01:42:38 -0400 Subject: [PATCH 1/2] feat(schema): add optional crossImplementation.suiteScenarioIds corpus field Corpus entry ids are file/line-derived and unusable as cross-repo join keys, so a tests renderer cannot place per-test "Other implementations" rows next to a safe-docx scenario. Add an optional, additive crossImplementation.suiteScenarioIds field to the corpus contract, authored via a new @suiteScenarioIds JSDoc tag (a comma/whitespace list of join keys, parsed statically), and emitted only when present. The keys are not prose, so they live outside the word-count tagDefinitions and outside the entry narrative object. Ref: #391 --- .../proposal.md | 30 ++++++++++++ .../specs/test-corpus-narrative/spec.md | 46 +++++++++++++++++++ .../tasks.md | 13 ++++++ .../test-narrative/src/astExtractor.test.ts | 46 +++++++++++++++++++ packages/test-narrative/src/astExtractor.ts | 40 +++++++++++++++- packages/test-narrative/src/index.ts | 3 ++ packages/test-narrative/src/tagSchema.test.ts | 28 +++++++++++ packages/test-narrative/src/tagSchema.ts | 16 +++++++ scripts/build_tests_corpus.mjs | 20 ++++++++ scripts/generate_tests_corpus_schema.mjs | 18 ++++++++ tests-corpus.schema.json | 22 +++++++++ 11 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 openspec/changes/add-corpus-cross-implementation-join/proposal.md create mode 100644 openspec/changes/add-corpus-cross-implementation-join/specs/test-corpus-narrative/spec.md create mode 100644 openspec/changes/add-corpus-cross-implementation-join/tasks.md diff --git a/openspec/changes/add-corpus-cross-implementation-join/proposal.md b/openspec/changes/add-corpus-cross-implementation-join/proposal.md new file mode 100644 index 00000000..256d66fa --- /dev/null +++ b/openspec/changes/add-corpus-cross-implementation-join/proposal.md @@ -0,0 +1,30 @@ +# Change: Corpus cross-implementation suite join keys + +## Why + +The tests-renderer matrix page consumes the cross-impl suite repo's published +results JSON keyed by suite scenario id. Corpus entry ids are file/line-derived +(`scripts/build_tests_corpus.mjs`) and are unusable as cross-repo join keys, so +a renderer cannot place per-test-page "Other implementations" rows next to a +safe-docx scenario. The corpus contract needs an explicit, optional place to +carry the suite scenario ids a given test corresponds to. (Ref: #391, #283.) + +## What Changes + +- Add an optional `crossImplementation: { suiteScenarioIds: string[] }` field to + each corpus entry in `tests-corpus.schema.json`. Additive and optional — entries + without the field stay valid. +- Add a `@suiteScenarioIds` narrative JSDoc tag (a comma/space-separated list of + suite scenario ids), parsed statically by the AST extractor. It is a list of + join keys, not prose, so it lives outside the word-count `tagDefinitions` and + outside the entry `narrative` object. +- Populate the entry's `crossImplementation` from the parsed tag in + `scripts/build_tests_corpus.mjs`, emitting the field only when the tag is present. + +## Impact + +- Affected specs: test-corpus-narrative +- Affected code: `packages/test-narrative/src/tagSchema.ts`, + `packages/test-narrative/src/astExtractor.ts`, + `scripts/generate_tests_corpus_schema.mjs`, + `scripts/build_tests_corpus.mjs`, `tests-corpus.schema.json` diff --git a/openspec/changes/add-corpus-cross-implementation-join/specs/test-corpus-narrative/spec.md b/openspec/changes/add-corpus-cross-implementation-join/specs/test-corpus-narrative/spec.md new file mode 100644 index 00000000..8c1d2b8e --- /dev/null +++ b/openspec/changes/add-corpus-cross-implementation-join/specs/test-corpus-narrative/spec.md @@ -0,0 +1,46 @@ +# test-corpus-narrative Specification (delta) + +## ADDED Requirements + +### Requirement: Corpus entries carry cross-implementation suite join keys + +The corpus schema SHALL permit each corpus entry to carry an optional +`crossImplementation` object whose `suiteScenarioIds` array lists the +cross-implementation suite scenario ids the test corresponds to. The field is +the renderer-facing join key between a +safe-docx corpus entry and the cross-impl suite repo's published results JSON, +because corpus entry ids are file/line-derived and unusable as cross-repo keys. +The field is optional and additive: entries without it remain valid against +`tests-corpus.schema.json`, and when present the array MUST contain at least one +non-empty id with no duplicates. + +The ids SHALL be authored as a `@suiteScenarioIds` JSDoc tag above the +`test.openspec(...)(...)` call, holding a comma- or whitespace-separated list of +ids. The AST extractor SHALL parse the tag statically into a string array. The +join keys are not prose, so they MUST NOT be subject to the narrative word-count +tag rules and MUST NOT appear inside the entry's `narrative` object. +`scripts/build_tests_corpus.mjs` SHALL emit `crossImplementation` only when the +tag is present. + +#### Scenario: suite scenario ids are extracted from the tag + +- **GIVEN** a test with a `@suiteScenarioIds docx/track-changes/a, docx/track-changes/b` JSDoc tag +- **WHEN** the AST extractor processes the test +- **THEN** the scenario evidence SHALL include a `suiteScenarioIds` array equal to + `["docx/track-changes/a", "docx/track-changes/b"]` +- **AND** the parsed `narrative` object SHALL NOT contain a `suiteScenarioIds` key + +#### Scenario: corpus omits the field when the tag is absent + +- **GIVEN** a test with no `@suiteScenarioIds` tag +- **WHEN** `scripts/build_tests_corpus.mjs` emits the corpus entry +- **THEN** the entry SHALL NOT include a `crossImplementation` field +- **AND** the entry SHALL remain valid against `tests-corpus.schema.json` + +#### Scenario: schema accepts the optional field + +- **GIVEN** a corpus entry that includes `crossImplementation` with a non-empty + `suiteScenarioIds` array +- **WHEN** the entry is validated against the generated `tests-corpus.schema.json` +- **THEN** validation SHALL pass +- **AND** an entry that omits `crossImplementation` SHALL also pass diff --git a/openspec/changes/add-corpus-cross-implementation-join/tasks.md b/openspec/changes/add-corpus-cross-implementation-join/tasks.md new file mode 100644 index 00000000..a0447e89 --- /dev/null +++ b/openspec/changes/add-corpus-cross-implementation-join/tasks.md @@ -0,0 +1,13 @@ +## 1. Implementation + +- [x] 1.1 Add `@suiteScenarioIds` tag name + Zod validator to `tagSchema.ts` +- [x] 1.2 Parse `@suiteScenarioIds` into `ScenarioEvidence.suiteScenarioIds` in `astExtractor.ts` +- [x] 1.3 Add optional `crossImplementation.suiteScenarioIds` to the generated schema +- [x] 1.4 Populate `crossImplementation` in `build_tests_corpus.mjs` when present +- [x] 1.5 Regenerate and commit `tests-corpus.schema.json` + +## 2. Tests + +- [x] 2.1 AST extractor test: `@suiteScenarioIds` parses to a string array +- [x] 2.2 tagSchema test: suite-scenario-ids validator accepts/rejects ids +- [x] 2.3 Generator test: emitted schema carries optional `crossImplementation` diff --git a/packages/test-narrative/src/astExtractor.test.ts b/packages/test-narrative/src/astExtractor.test.ts index 2ef75cdd..9bc34abb 100644 --- a/packages/test-narrative/src/astExtractor.test.ts +++ b/packages/test-narrative/src/astExtractor.test.ts @@ -307,6 +307,52 @@ describe("extractScenarios", () => { expect(Object.keys(scenario!.narrative).sort()).toEqual(["motivatingProblem"]); }); + it("extracts @suiteScenarioIds as a string array outside the narrative object", () => { + const filePath = writeFixture(` + const test = testAllure.epic("DOCX Primitives").withLabels({ feature: "Track Changes", visibility: "public" }); + + /** + * @suiteScenarioIds docx/track-changes/a, docx/track-changes/b + * @motivatingProblem ${words(60)} + */ + test.openspec("join keys")("Scenario: cross-impl join", async () => {}); + `); + + const [scenario] = extractScenarios(filePath); + + expect(scenario?.suiteScenarioIds).toEqual(["docx/track-changes/a", "docx/track-changes/b"]); + expect(scenario?.narrative as Record).not.toHaveProperty("suiteScenarioIds"); + expect(scenario?.narrative).toEqual({ motivatingProblem: words(60) }); + }); + + it("splits @suiteScenarioIds on commas and whitespace across multiple lines", () => { + const filePath = writeFixture(` + const test = testAllure.epic("DOCX Primitives").withLabels({ feature: "Track Changes" }); + + /** + * @suiteScenarioIds docx/one + * docx/two,docx/three + */ + test.openspec("multiline")("Scenario: multiline ids", async () => {}); + `); + + const [scenario] = extractScenarios(filePath); + + expect(scenario?.suiteScenarioIds).toEqual(["docx/one", "docx/two", "docx/three"]); + }); + + it("omits suiteScenarioIds when no @suiteScenarioIds tag is present", () => { + const filePath = writeFixture(` + const test = testAllure.epic("DOCX Primitives").withLabels({ feature: "Track Changes" }); + + test.openspec("none")("Scenario: no join keys", async () => {}); + `); + + const [scenario] = extractScenarios(filePath); + + expect(scenario?.suiteScenarioIds).toBeUndefined(); + }); + it("preserves rejected aliases in the narrative so the validator can report them explicitly", () => { // The extractor distinguishes "unknown JSDoc tag" (drop) from // "rejected alias the schema knows about" (keep, so the validator can diff --git a/packages/test-narrative/src/astExtractor.ts b/packages/test-narrative/src/astExtractor.ts index 006fb9b1..769bf6ec 100644 --- a/packages/test-narrative/src/astExtractor.ts +++ b/packages/test-narrative/src/astExtractor.ts @@ -3,7 +3,13 @@ import fs from "node:fs"; import { parse } from "@typescript-eslint/parser"; import type { TSESTree } from "@typescript-eslint/types"; -import { rejectedAliases, tagDefinitions, type NarrativeVisibility, type TagName } from "./tagSchema.js"; +import { + rejectedAliases, + SUITE_SCENARIO_IDS_TAG, + tagDefinitions, + type NarrativeVisibility, + type TagName +} from "./tagSchema.js"; const KNOWN_NARRATIVE_KEYS = new Set([ ...Object.keys(tagDefinitions), @@ -51,6 +57,7 @@ export type ScenarioEvidence = { sourceRef: SourceRef; visibility?: NarrativeVisibility; narrative: Partial>; + suiteScenarioIds?: string[]; bddSteps: BddStepEvidence[]; fixtures: FixtureEvidence[]; expectArgs: ExpectArgEvidence[]; @@ -238,6 +245,35 @@ function extractNarrative(commentValue: string | undefined): Partial id.trim()) + .filter(Boolean); + return ids.length > 0 ? ids : undefined; +} + function findLeadingJsDoc( ast: TSESTree.Program, source: string, @@ -426,11 +462,13 @@ export function extractScenarios(filePath: string): ScenarioEvidence[] { const comment = findLeadingJsDoc(ast, source, node); const body = collectScenarioBody(node); const evidence = extractBodyEvidence(body, filePath, source); + const suiteScenarioIds = extractSuiteScenarioIds(comment?.value); scenarios.push({ scenarioName: extractScenarioName(node, source), sourceRef: sourceRefFor(filePath, node), visibility: visibilityForScenarioCall(node, openspecCall, fileBindings), narrative: extractNarrative(comment?.value), + ...(suiteScenarioIds ? { suiteScenarioIds } : {}), ...evidence }); }); diff --git a/packages/test-narrative/src/index.ts b/packages/test-narrative/src/index.ts index 3004ee31..cafe9da8 100644 --- a/packages/test-narrative/src/index.ts +++ b/packages/test-narrative/src/index.ts @@ -1,9 +1,12 @@ export { CANONICAL_SECTION_ORDER, rejectedAliases, + SUITE_SCENARIO_IDS_TAG, + suiteScenarioIdsSchema, tagDefinitions, tagSchema, validateTags, + type SuiteScenarioIds, type TagName, type NarrativeTags, type NarrativeVisibility, diff --git a/packages/test-narrative/src/tagSchema.test.ts b/packages/test-narrative/src/tagSchema.test.ts index d670542b..78c443e3 100644 --- a/packages/test-narrative/src/tagSchema.test.ts +++ b/packages/test-narrative/src/tagSchema.test.ts @@ -3,6 +3,8 @@ import { describe, expect } from "vitest"; import { CANONICAL_SECTION_ORDER, rejectedAliases, + SUITE_SCENARIO_IDS_TAG, + suiteScenarioIdsSchema, tagDefinitions, tagSchema, validateTags, @@ -143,4 +145,30 @@ describe("tagSchema", () => { expect(CANONICAL_SECTION_ORDER).toContain(tagName); } }); + + it("keeps the suite-scenario-id tag outside the prose tag definitions", () => { + expect(SUITE_SCENARIO_IDS_TAG).toBe("suiteScenarioIds"); + expect(Object.keys(tagDefinitions)).not.toContain(SUITE_SCENARIO_IDS_TAG); + }); + + it("accepts a non-empty list of unique suite scenario ids", () => { + expect(suiteScenarioIdsSchema.safeParse(["docx/a", "docx/b"]).success).toBe(true); + }); + + it("rejects an empty suite-scenario-id list", () => { + expect(suiteScenarioIdsSchema.safeParse([]).success).toBe(false); + }); + + it("rejects blank suite scenario ids", () => { + expect(suiteScenarioIdsSchema.safeParse(["docx/a", " "]).success).toBe(false); + }); + + it("rejects duplicate suite scenario ids", () => { + const result = suiteScenarioIdsSchema.safeParse(["docx/a", "docx/a"]); + + expect(result.success).toBe(false); + if (!result.success) { + expect(JSON.stringify(result.error.issues)).toContain("duplicates"); + } + }); }); diff --git a/packages/test-narrative/src/tagSchema.ts b/packages/test-narrative/src/tagSchema.ts index 0d42069d..75b03825 100644 --- a/packages/test-narrative/src/tagSchema.ts +++ b/packages/test-narrative/src/tagSchema.ts @@ -62,6 +62,22 @@ export const tagDefinitions = { export type TagName = keyof typeof tagDefinitions; +// Cross-implementation suite join keys. Authored as a `@suiteScenarioIds` +// JSDoc tag (comma/whitespace-separated list), these are renderer-facing join +// keys between a corpus entry and the cross-impl suite repo's results JSON. +// They are NOT prose, so they live outside `tagDefinitions` (no word counts) +// and outside the entry `narrative` object. +export const SUITE_SCENARIO_IDS_TAG = "suiteScenarioIds"; + +export const suiteScenarioIdsSchema = z + .array(z.string().trim().min(1).max(200)) + .min(1) + .refine((ids) => new Set(ids).size === ids.length, { + message: "suiteScenarioIds must not contain duplicates" + }); + +export type SuiteScenarioIds = z.infer; + export const rejectedAliases = [ "limitation", "aiContext", diff --git a/scripts/build_tests_corpus.mjs b/scripts/build_tests_corpus.mjs index eebe9531..ba39b397 100644 --- a/scripts/build_tests_corpus.mjs +++ b/scripts/build_tests_corpus.mjs @@ -15,6 +15,7 @@ import Ajv from 'ajv'; import { CANONICAL_SECTION_ORDER, extractScenarios, + suiteScenarioIdsSchema, validateTags, } from '../packages/test-narrative/dist/index.js'; import { loadRegistry } from './lib/conformance-registry.mjs'; @@ -241,6 +242,23 @@ function sectionsForEntry(scenario, result, conformanceClaims) { return CANONICAL_SECTION_ORDER.filter((section) => present.has(section)); } +function buildCrossImplementation(fileRel, scenario) { + // Renderer-facing join keys between this corpus entry and the cross-impl + // suite repo. Authored as a `@suiteScenarioIds` JSDoc tag; emitted only when + // present so entries without it stay clean and schema-valid. + if (!scenario.suiteScenarioIds) return undefined; + const parsed = suiteScenarioIdsSchema.safeParse(scenario.suiteScenarioIds); + if (!parsed.success) { + const issues = parsed.error.issues + .map((issue) => `${issue.path.join('.') || ''}: ${issue.message}`) + .join('; '); + throw new Error( + `${fileRel}:${scenario.sourceRef.line}: invalid @suiteScenarioIds tag: ${issues}`, + ); + } + return { suiteScenarioIds: [...parsed.data] }; +} + function buildCorpusEntries() { const registry = loadRegistry(); if (registry.errors.length > 0) { @@ -306,12 +324,14 @@ function buildCorpusEntries() { const conformanceClaims = resolveConformanceClaims(matchedResult.result, registry); const results = serializeResult(matchedResult.result); + const crossImplementation = buildCrossImplementation(file.rel, scenario); entries.push({ id: stableEntryId(packageName, scenario), package: packageName, scenarioName: normalizeScenarioName(scenario.scenarioName), sourceRef: serializeSourceRef(scenario.sourceRef), sections: sectionsForEntry(scenario, results, conformanceClaims), + ...(crossImplementation ? { crossImplementation } : {}), narrative: { ...scenario.narrative }, scenario: { bddSteps: scenario.bddSteps.map(serializeBddStep), diff --git a/scripts/generate_tests_corpus_schema.mjs b/scripts/generate_tests_corpus_schema.mjs index 0666fef3..774dc073 100644 --- a/scripts/generate_tests_corpus_schema.mjs +++ b/scripts/generate_tests_corpus_schema.mjs @@ -96,6 +96,7 @@ export function buildTestsCorpusSchema() { items: { $ref: '#/$defs/SectionIdentifier' }, uniqueItems: true, }, + crossImplementation: { $ref: '#/$defs/CrossImplementation' }, narrative: { type: 'object', additionalProperties: false, @@ -130,6 +131,23 @@ export function buildTestsCorpusSchema() { }, }, }, + CrossImplementation: { + type: 'object', + additionalProperties: false, + required: ['suiteScenarioIds'], + properties: { + suiteScenarioIds: { + type: 'array', + items: { + type: 'string', + minLength: 1, + maxLength: 200, + }, + minItems: 1, + uniqueItems: true, + }, + }, + }, SourceRef: { type: 'object', additionalProperties: false, diff --git a/tests-corpus.schema.json b/tests-corpus.schema.json index 29c3dcc2..a4b50ada 100644 --- a/tests-corpus.schema.json +++ b/tests-corpus.schema.json @@ -72,6 +72,9 @@ }, "uniqueItems": true }, + "crossImplementation": { + "$ref": "#/$defs/CrossImplementation" + }, "narrative": { "type": "object", "additionalProperties": false, @@ -161,6 +164,25 @@ } } }, + "CrossImplementation": { + "type": "object", + "additionalProperties": false, + "required": [ + "suiteScenarioIds" + ], + "properties": { + "suiteScenarioIds": { + "type": "array", + "items": { + "type": "string", + "minLength": 1, + "maxLength": 200 + }, + "minItems": 1, + "uniqueItems": true + } + } + }, "SourceRef": { "type": "object", "additionalProperties": false, From 0af3e557cb678dde848f83b363f9ef862bc080d1 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 26 Jun 2026 01:54:23 -0400 Subject: [PATCH 2/2] refactor(test-narrative): share JSDoc tag parsing across extractors extractNarrative and extractSuiteScenarioIds each re-implemented the same JSDoc comment loop (split lines, strip the `* ` gutter, match @tag/continuation lines). The sibling derivation could silently drift from the canonical narrative parse. Consolidate both onto a single parseJsDocTags helper so tag recognition has one source of truth, with extractNarrative last-winning per tag and extractSuiteScenarioIds accumulating across repeats. Behavior and schema are unchanged; existing tests pass. Addresses the LLM-gate "re-derived facts vs canonical sources" finding. Ref: #391 --- packages/test-narrative/src/astExtractor.ts | 76 +++++++++++---------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/packages/test-narrative/src/astExtractor.ts b/packages/test-narrative/src/astExtractor.ts index 769bf6ec..452a9c1b 100644 --- a/packages/test-narrative/src/astExtractor.ts +++ b/packages/test-narrative/src/astExtractor.ts @@ -211,58 +211,60 @@ function evidenceForExpression( }; } +type JsDocTag = { tag: string; lines: string[] }; + +/** + * Canonical JSDoc-tag parser shared by every narrative derivation. + * + * Splits a block comment into an ordered list of `{ tag, lines }` entries: + * the opening line's post-tag text plus any continuation lines up to the next + * tag. Repeated tags yield repeated entries (callers decide whether to + * accumulate or last-win). Lines before the first tag are ignored. Keeping + * this loop in one place means `extractNarrative` and `extractSuiteScenarioIds` + * cannot drift in how they recognize tags or strip the leading `* ` gutter. + */ +function parseJsDocTags(commentValue: string | undefined): JsDocTag[] { + const tags: JsDocTag[] = []; + if (!commentValue) return tags; + + let current: JsDocTag | undefined; + for (const rawLine of commentValue.split("\n")) { + const line = rawLine.replace(/^\s*\* ?/, "").trimEnd(); + const tagMatch = line.match(/^@([A-Za-z][\w-]*)\s*(.*)$/); + if (tagMatch) { + current = { tag: tagMatch[1] ?? "", lines: [tagMatch[2] ?? ""] }; + tags.push(current); + continue; + } + if (current) current.lines.push(line.trim()); + } + + return tags; +} + function extractNarrative(commentValue: string | undefined): Partial> { const narrative: Record = {}; - if (!commentValue) return narrative; - - let currentTag: string | undefined; - let currentLines: string[] = []; - const flush = () => { - if (!currentTag) return; + for (const { tag, lines } of parseJsDocTags(commentValue)) { // Only emit tags that the schema cares about. Unknown JSDoc tags // (@see, @example, @deprecated, etc.) are part of normal TS convention // and must not poison validation. Known-but-rejected aliases stay so the // downstream validator can produce an explicit "this alias is forbidden" - // error rather than silently dropping it. - if (KNOWN_NARRATIVE_KEYS.has(currentTag)) { - narrative[currentTag] = currentLines.join(" ").replace(/\s+/g, " ").trim(); + // error rather than silently dropping it. A repeated tag last-wins. + if (KNOWN_NARRATIVE_KEYS.has(tag)) { + narrative[tag] = lines.join(" ").replace(/\s+/g, " ").trim(); } - }; - - for (const rawLine of commentValue.split("\n")) { - const line = rawLine.replace(/^\s*\* ?/, "").trimEnd(); - const tagMatch = line.match(/^@([A-Za-z][\w-]*)\s*(.*)$/); - if (tagMatch) { - flush(); - currentTag = tagMatch[1]; - currentLines = [tagMatch[2] ?? ""]; - continue; - } - if (currentTag) currentLines.push(line.trim()); } - flush(); return narrative; } function extractSuiteScenarioIds(commentValue: string | undefined): string[] | undefined { - if (!commentValue) return undefined; - let seen = false; - let collecting = false; const parts: string[] = []; - for (const rawLine of commentValue.split("\n")) { - const line = rawLine.replace(/^\s*\* ?/, "").trimEnd(); - const tagMatch = line.match(/^@([A-Za-z][\w-]*)\s*(.*)$/); - if (tagMatch) { - collecting = tagMatch[1] === SUITE_SCENARIO_IDS_TAG; - if (collecting) { - seen = true; - parts.push(tagMatch[2] ?? ""); - } - continue; - } - if (collecting) parts.push(line.trim()); + for (const { tag, lines } of parseJsDocTags(commentValue)) { + if (tag !== SUITE_SCENARIO_IDS_TAG) continue; + seen = true; + parts.push(...lines); } if (!seen) return undefined;