Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions src/api/parse.integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,71 @@ describe('POST /parse integration', () => {
});
});

describe('POST /parse — refs persistence + replace-on-reparse (#53)', () => {
async function postSecFixture(): Promise<string> {
const secContent = readFileSync(
resolve('docs/references/UFGS/DIVISION_01/01_11_00.SEC'),
'utf-8'
);
const form = new FormData();
form.append('file', new Blob([secContent], { type: 'text/plain' }), '01_11_00.sec');
const postRes = await fetch(`${baseUrl}/parse`, { method: 'POST', body: form });
expect(postRes.status).toBe(202);
const postBody = (await postRes.json()) as { success: boolean; data: { jobId: string } };
const job = await waitForJob(postBody.data.jobId);
expect(job.status).toBe('complete');
const specId = job.result?.specId;
if (!specId) throw new Error('expected specId in completed job result');
return specId;
}

it('persists spec_references rows on API path (SEC fixture with SRF tags)', async () => {
// Ensure clean slate — delete any spec from a previous test run that used the same fixture.
await pool.query(`DELETE FROM specs WHERE section = '01 11 00' AND source = 'ufgs'`);
const specId = await postSecFixture();
cleanupIds.push(specId);

const refsResult = await pool.query<{ count: string }>(
'SELECT COUNT(*) AS count FROM spec_references WHERE source_spec_id = $1',
[specId]
);
const refCount = parseInt(refsResult.rows[0]?.count ?? '0', 10);
expect(refCount).toBeGreaterThan(0);
}, 30_000);

it('re-POST same fixture returns same specId (upsert) and replaces paragraphs + refs', async () => {
await pool.query(`DELETE FROM specs WHERE section = '01 11 00' AND source = 'ufgs'`);
const firstSpecId = await postSecFixture();
cleanupIds.push(firstSpecId);

const firstParaIds = await pool.query<{ id: string }>(
'SELECT id FROM paragraphs WHERE spec_id = $1 ORDER BY id',
[firstSpecId]
);
expect(firstParaIds.rows.length).toBeGreaterThan(0);

const secondSpecId = await postSecFixture();
// Upsert: same row, same id.
expect(secondSpecId).toBe(firstSpecId);

const secondParaIds = await pool.query<{ id: string }>(
'SELECT id FROM paragraphs WHERE spec_id = $1 ORDER BY id',
[secondSpecId]
);
// Replace-on-reparse: old paragraph ids gone, new ids present.
const firstSet = new Set(firstParaIds.rows.map((r) => r.id));
const overlap = secondParaIds.rows.filter((r) => firstSet.has(r.id));
expect(overlap).toEqual([]);

// Refs still present after re-upload (deleted + reinserted).
const refsResult = await pool.query<{ count: string }>(
'SELECT COUNT(*) AS count FROM spec_references WHERE source_spec_id = $1',
[secondSpecId]
);
expect(parseInt(refsResult.rows[0]?.count ?? '0', 10)).toBeGreaterThan(0);
}, 60_000);
});

describe('POST /parse — .txt upload', () => {
it('accepts .txt file and returns 202 with jobId', async () => {
const fixture = readFileSync(resolve('tests/fixtures/text/numbered-prefixes.txt'));
Expand Down
126 changes: 122 additions & 4 deletions src/api/parse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ vi.mock('../parser/index.js', () => ({
}));
vi.mock('../lib/parse-pool.js', () => ({
parsePool: {
run: vi.fn().mockResolvedValue({ tree: { id: '', section: 'test', title: 'T', parts: [] } }),
run: vi
.fn()
.mockResolvedValue({ tree: { id: '', section: 'test', title: 'T', parts: [] }, refs: [] }),
},
}));
vi.mock('../lib/jobs.js', () => ({
Expand All @@ -18,9 +20,7 @@ vi.mock('../lib/jobs.js', () => ({
getJob: vi.fn(),
}));
vi.mock('../db/index.js', () => ({
pool: { connect: vi.fn(), query: vi.fn() },
createSpec: vi.fn(),
insertTree: vi.fn(),
persistParsedSpec: vi.fn().mockResolvedValue('persisted-spec-id'),
}));
vi.mock('../lib/logger.js', () => ({
logger: { info: vi.fn(), error: vi.fn(), debug: vi.fn(), warn: vi.fn() },
Expand All @@ -35,6 +35,7 @@ function makeRes(): Response {

beforeEach(() => {
vi.resetModules();
vi.clearAllMocks();
});

describe('parseHandler', () => {
Expand Down Expand Up @@ -163,3 +164,120 @@ describe('parseJobHandler', () => {
expect(res.status).toHaveBeenCalledWith(200);
});
});

describe('processParseJob refs persistence (#53)', () => {
it('forwards populated refs from worker output to persistParsedSpec', async () => {
const { parsePool } = await import('../lib/parse-pool.js');
const { persistParsedSpec } = await import('../db/index.js');
const { updateJob } = await import('../lib/jobs.js');

const refs = [
{
sourceNodeId: 'a1b2c3d4-e5f6-4789-8abc-def012345678',
targetType: 'section' as const,
targetSpecSection: '01 33 00',
referenceText: 'Section 01 33 00 SUBMITTAL PROCEDURES',
},
];
vi.mocked(parsePool.run).mockResolvedValueOnce({
tree: {
id: '00000000-0000-0000-0000-000000000001',
section: '01 11 00',
title: 'T',
parts: [],
},
refs,
});
vi.mocked(persistParsedSpec).mockResolvedValueOnce('persisted-spec-id');
vi.mocked(updateJob).mockImplementation(() => {});

const { parseHandler } = await import('./parse.js');
const req = {
file: {
originalname: 'spec.sec',
mimetype: 'text/xml',
buffer: Buffer.from('<?xml?>', 'utf-8'),
},
body: {},
} as unknown as Request;
await parseHandler(req, makeRes());
// wait for the async processParseJob to run
await new Promise((r) => setImmediate(r));
await new Promise((r) => setImmediate(r));

expect(persistParsedSpec).toHaveBeenCalledTimes(1);
const callArg = vi.mocked(persistParsedSpec).mock.calls[0]?.[0];
expect(callArg?.tree.section).toBe('01 11 00');
expect(callArg?.refs).toEqual(refs);
});

it('defaults refs to [] when worker omits the field (legacy worker output)', async () => {
const { parsePool } = await import('../lib/parse-pool.js');
const { persistParsedSpec } = await import('../db/index.js');
const { updateJob } = await import('../lib/jobs.js');

// Worker omits refs entirely — schema .default([]) must fill it in.
vi.mocked(parsePool.run).mockResolvedValueOnce({
tree: {
id: '00000000-0000-0000-0000-000000000002',
section: '01 11 00',
title: 'T',
parts: [],
},
});
vi.mocked(persistParsedSpec).mockResolvedValueOnce('persisted-spec-id-2');
vi.mocked(updateJob).mockImplementation(() => {});

const { parseHandler } = await import('./parse.js');
const req = {
file: {
originalname: 'spec.sec',
mimetype: 'text/xml',
buffer: Buffer.from('<?xml?>', 'utf-8'),
},
body: {},
} as unknown as Request;
await parseHandler(req, makeRes());
await new Promise((r) => setImmediate(r));
await new Promise((r) => setImmediate(r));

expect(persistParsedSpec).toHaveBeenCalledTimes(1);
const callArg = vi.mocked(persistParsedSpec).mock.calls[0]?.[0];
expect(callArg?.refs).toEqual([]);
});

it('forwards empty refs array unchanged when worker emits refs: []', async () => {
const { parsePool } = await import('../lib/parse-pool.js');
const { persistParsedSpec } = await import('../db/index.js');
const { updateJob } = await import('../lib/jobs.js');

vi.mocked(parsePool.run).mockResolvedValueOnce({
tree: {
id: '00000000-0000-0000-0000-000000000003',
section: '01 11 00',
title: 'T',
parts: [],
},
refs: [],
});
vi.mocked(persistParsedSpec).mockResolvedValueOnce('persisted-spec-id-3');
vi.mocked(updateJob).mockImplementation(() => {});

const { parseHandler } = await import('./parse.js');
const req = {
file: {
originalname: 'spec.sec',
mimetype: 'text/xml',
buffer: Buffer.from('<?xml?>', 'utf-8'),
},
body: {},
} as unknown as Request;
await parseHandler(req, makeRes());
await new Promise((r) => setImmediate(r));
await new Promise((r) => setImmediate(r));

expect(persistParsedSpec).toHaveBeenCalledTimes(1);
const callArg = vi.mocked(persistParsedSpec).mock.calls[0]?.[0];
expect(callArg?.refs).toEqual([]);
});
});
27 changes: 5 additions & 22 deletions src/api/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ import { assertDocxSafe, assertSecSafe } from '../parser/index.js';
import { createJob, updateJob, getJob, type ParseStage } from '../lib/jobs.js';
import { parsePool } from '../lib/parse-pool.js';
import type { WorkerOutput } from '../lib/parse-worker.js';
import { pool, createSpec, insertTree } from '../db/index.js';
import { persistParsedSpec } from '../db/index.js';
import { logger } from '../lib/logger.js';
import type { SpecNode, SpecTree } from '../ast/types.js';
import { ParseWarningSchema } from '../ast/schemas.js';
import { ParseWarningSchema, SecRefSchema } from '../ast/schemas.js';

interface ParseBody {
readonly section?: string;
Expand Down Expand Up @@ -88,24 +88,6 @@ function countNodes(nodes: readonly SpecNode[]): number {
return nodes.reduce((sum, n) => sum + 1 + countNodes(n.children), 0);
}

async function persistTree(tree: SpecTree): Promise<string> {
const client = await pool.connect();
try {
await client.query('BEGIN');
const source = tree.parts[0]?.meta.source ?? 'unknown';
const specId = await createSpec({ section: tree.section, title: tree.title, source }, client);
const treeWithId: SpecTree = { ...tree, id: specId };
await insertTree(treeWithId, specId, client);
await client.query('COMMIT');
return specId;
} catch (err) {
await client.query('ROLLBACK');
throw err;
} finally {
client.release();
}
}

const workerOutputSchema = z.object({
tree: z.object({
id: z.string(),
Expand All @@ -114,6 +96,7 @@ const workerOutputSchema = z.object({
parts: z.array(z.unknown()),
warnings: z.array(ParseWarningSchema).optional(),
}),
refs: z.array(SecRefSchema).default([]),
capabilities: z.array(z.string()).optional(),
});

Expand All @@ -131,7 +114,7 @@ async function processParseJob(
onProgress('extracting', 10);
// Buffer from multer may reference a shared pool — structured clone (no transferList) is safe.
const workerRaw: unknown = await parsePool.run({ buffer, ext });
const { tree, capabilities } = workerOutputSchema.parse(workerRaw) as WorkerOutput;
const { tree, refs, capabilities } = workerOutputSchema.parse(workerRaw) as WorkerOutput;
onProgress('classifying', 75);

const finalTree: SpecTree = {
Expand All @@ -141,7 +124,7 @@ async function processParseJob(
};

updateJob(jobId, { stage: 'persisting', pct: 90, status: 'running' });
const specId = await persistTree(finalTree);
const specId = await persistParsedSpec({ tree: finalTree, refs });
const nodeCount = countNodes(finalTree.parts);

updateJob(jobId, {
Expand Down
1 change: 1 addition & 0 deletions src/ast/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export {
SpecNodeMetaSchema,
SpecNodeSchema,
SpecTreeSchema,
SecRefSchema,
PatchSpecBodySchema,
CreateProjectBodySchema,
AddSpecToProjectBodySchema,
Expand Down
17 changes: 17 additions & 0 deletions src/ast/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,23 @@ export const SpecNodeSchema: z.ZodType<SpecNode> = z.lazy(() =>
})
);

export const SecRefSchema = z.discriminatedUnion('targetType', [
z.object({
sourceNodeId: z.uuid(),
targetType: z.literal('section'),
targetSpecSection: z.string().check(z.minLength(1)),
standardCode: z.never().optional(),
referenceText: z.string(),
}),
z.object({
sourceNodeId: z.uuid(),
targetType: z.literal('standard'),
standardCode: z.string().check(z.minLength(1)),
targetSpecSection: z.never().optional(),
referenceText: z.string(),
}),
]);

export const ParseWarningTypeSchema = z.enum([
'root-continuation',
'empty-part',
Expand Down
10 changes: 2 additions & 8 deletions src/ast/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { z } from 'zod';
import { NodeTypeSchema } from './schemas.js';
import { NodeTypeSchema, SecRefSchema } from './schemas.js';

export type NodeType = z.infer<typeof NodeTypeSchema>;

Expand Down Expand Up @@ -34,10 +34,4 @@ export interface SpecTree {
readonly warnings?: readonly ParseWarning[];
}

export interface SecRef {
readonly sourceNodeId: string;
readonly targetType: 'section' | 'standard';
readonly targetSpecSection?: string;
readonly standardCode?: string;
readonly referenceText: string;
}
export type SecRef = z.infer<typeof SecRefSchema>;
13 changes: 8 additions & 5 deletions src/lib/parse-worker.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { parseSec, parseDocx, parseText, assertSecSafe } from '../parser/index.js';
import { extractRefsFromTree } from '../parser/index.js';
import { decodeTextBuffer } from './decode-text.js';
import type { SpecTree } from '../ast/types.js';
import type { SpecTree, SecRef } from '../ast/types.js';

export interface WorkerInput {
readonly buffer: Buffer;
Expand All @@ -9,23 +10,25 @@ export interface WorkerInput {

export interface WorkerOutput {
readonly tree: SpecTree;
readonly refs: readonly SecRef[];
readonly capabilities?: readonly string[];
}

export default async function parseWorker({ buffer, ext }: WorkerInput): Promise<WorkerOutput> {
if (ext === '.sec') {
const tree = parseSec(assertSecSafe(buffer)).tree;
return { tree };
const { tree, refs } = parseSec(assertSecSafe(buffer));
return { tree, refs };
}
if (ext === '.txt') {
const rawText = decodeTextBuffer(buffer);
const parsed = parseText(rawText);
return { tree: parsed.tree, capabilities: parsed.capabilities };
return { tree: parsed.tree, refs: parsed.refs, capabilities: parsed.capabilities };
}
if (ext === '.docx') {
// validation already performed in main thread
const tree = await parseDocx(buffer);
return { tree };
const refs = extractRefsFromTree(tree);
return { tree, refs };
}
throw new Error(`unsupported extension in parse worker: ${ext}`);
}