diff --git a/src/server/infra/dream/dream-response-schemas.ts b/src/server/infra/dream/dream-response-schemas.ts index a3ee5ec4e..adfb2028a 100644 --- a/src/server/infra/dream/dream-response-schemas.ts +++ b/src/server/infra/dream/dream-response-schemas.ts @@ -21,6 +21,10 @@ export type ConsolidateResponse = z.infer // ── Synthesize ─────────────────────────────────────────────────────────────── +// Bounds are slightly above the prompt's soft targets (200 chars / 3-5 tags / +// 5-10 keywords) so a model that goes a little over still produces a usable +// synthesis instead of being rejected outright; the caps still prevent a +// runaway model from landing oversized text directly in card-mode YAML. export const SynthesisCandidateSchema = z.object({ claim: z.string(), confidence: z.number().min(0).max(1), @@ -28,7 +32,10 @@ export const SynthesisCandidateSchema = z.object({ domain: z.string(), fact: z.string(), })), + keywords: z.array(z.string()).max(15), placement: z.string(), + summary: z.string().max(500), + tags: z.array(z.string()).max(8), title: z.string(), }) diff --git a/src/server/infra/dream/operations/consolidate.ts b/src/server/infra/dream/operations/consolidate.ts index 39f5eeaeb..6e1f6ff01 100644 --- a/src/server/infra/dream/operations/consolidate.ts +++ b/src/server/infra/dream/operations/consolidate.ts @@ -295,7 +295,7 @@ function addFrontmatterFields(content: string, fields: Record): if (parsed && typeof parsed === 'object') { // Spread preserves existing key order; new fields are appended at end. const merged = {...parsed, ...fields} - const newYaml = yamlDump(merged, {flowLevel: 2, lineWidth: -1, sortKeys: false}).trimEnd() + const newYaml = yamlDump(merged, {flowLevel: 1, lineWidth: -1, sortKeys: false}).trimEnd() return `---\n${newYaml}\n---\n${body}` } } catch { @@ -305,7 +305,7 @@ function addFrontmatterFields(content: string, fields: Record): } // No valid frontmatter — prepend - const yaml = yamlDump(fields, {flowLevel: 2, lineWidth: -1, sortKeys: false}).trimEnd() + const yaml = yamlDump(fields, {flowLevel: 1, lineWidth: -1, sortKeys: false}).trimEnd() return `---\n${yaml}\n---\n${content}` } diff --git a/src/server/infra/dream/operations/synthesize.ts b/src/server/infra/dream/operations/synthesize.ts index 4bd359b6a..a72050e05 100644 --- a/src/server/infra/dream/operations/synthesize.ts +++ b/src/server/infra/dream/operations/synthesize.ts @@ -257,15 +257,36 @@ async function writeSynthesisFile( } const sources = candidate.evidence.map((e) => `${e.domain}/_index.md`) + // Normalize tags to lowercase kebab-case so card chips and BM25 search see + // a consistent label regardless of whether the model honored the prompt's + // formatting rule. Empty entries (post-trim) are dropped. + const normalizedTags = candidate.tags + .map((t) => t.toLowerCase().trim().replaceAll(/\s+/g, '-')) + .filter((t) => t.length > 0) + const now = new Date().toISOString() + // Field order is enforced by insertion order (yamlDump uses sortKeys:false). + // Synthesis markers (confidence, sources, synthesized_at, type) come first + // in the order pre-existing synthesized files use on disk, so re-generating + // an old file does not produce a mechanical reorder diff. The seven + // semantic fields below mirror the order in markdown-writer.ts's + // generateFrontmatter so the on-disk shape matches regular `brv save` + // files; cogit then exposes them in DtoV3MemoryCardResource for card-mode + // display in the web UI. /* eslint-disable camelcase */ - const frontmatter = { - confidence: candidate.confidence, - sources, - synthesized_at: new Date().toISOString(), - type: 'synthesis', - } + const frontmatter: Record = {} + frontmatter.confidence = candidate.confidence + frontmatter.sources = sources + frontmatter.synthesized_at = now + frontmatter.type = 'synthesis' + frontmatter.title = candidate.title + frontmatter.summary = candidate.summary + frontmatter.tags = normalizedTags + frontmatter.related = [] + frontmatter.keywords = candidate.keywords + frontmatter.createdAt = now + frontmatter.updatedAt = now /* eslint-enable camelcase */ - const yaml = yamlDump(frontmatter, {lineWidth: -1, sortKeys: false}).trimEnd() + const yaml = yamlDump(frontmatter, {flowLevel: 1, lineWidth: -1, sortKeys: false}).trimEnd() const body = [ `# ${candidate.title}`, '', @@ -344,11 +365,17 @@ function buildPrompt(domains: DomainSummary[], existingSyntheses: string[]): str '- Do NOT report trivial or obvious connections (e.g., "both domains use TypeScript").', '- Each synthesis must reference at least 2 domains with specific evidence.', '- For "placement", choose the domain where this insight is MOST actionable.', + '- "summary" is one sentence (≤ 200 chars) describing the insight; this is what the UI shows as a card preview.', + '- "tags" are 3-5 short topical labels drawn from the source domains (e.g., "auth", "caching"). Lowercase, kebab-case.', + '- "keywords" are 5-10 single words a developer would search for to surface this synthesis.', '- If nothing meaningful is found, return an empty array. That is fine — but missing a clear cross-domain pattern is a failure.', '', + // Keep the JSON shape below in sync with SynthesisCandidateSchema in + // dream-response-schemas.ts; the schema rejects responses that omit any + // listed field, so adding a field there requires updating this example. 'Respond with JSON:', '```', - '{ "syntheses": [{ "title": "...", "claim": "...", "evidence": [{"domain": "...", "fact": "..."}], "confidence": 0.0-1.0, "placement": "..." }] }', + '{ "syntheses": [{ "title": "...", "summary": "...", "claim": "...", "evidence": [{"domain": "...", "fact": "..."}], "tags": ["..."], "keywords": ["..."], "confidence": 0.0-1.0, "placement": "..." }] }', '```', ].join('\n') } diff --git a/test/unit/infra/dream/dream-response-schemas.test.ts b/test/unit/infra/dream/dream-response-schemas.test.ts index 76e21cdc3..3bb6226e7 100644 --- a/test/unit/infra/dream/dream-response-schemas.test.ts +++ b/test/unit/infra/dream/dream-response-schemas.test.ts @@ -87,7 +87,10 @@ describe('dream-response-schemas', () => { {domain: 'auth', fact: 'uses JWT for session management'}, {domain: 'api', fact: 'validates JWT in middleware'}, ], + keywords: ['jwt', 'auth'], placement: 'api', + summary: 'Shared JWT validation across auth and api.', + tags: ['auth', 'api'], title: 'Shared auth pattern', }], } @@ -107,7 +110,10 @@ describe('dream-response-schemas', () => { claim: 'test', confidence: -0.1, evidence: [{domain: 'a', fact: 'f'}], + keywords: [], placement: 'a', + summary: '', + tags: [], title: 'test', }], } @@ -120,7 +126,10 @@ describe('dream-response-schemas', () => { claim: 'test', confidence: 1.1, evidence: [{domain: 'a', fact: 'f'}], + keywords: [], placement: 'a', + summary: '', + tags: [], title: 'test', }], } @@ -133,7 +142,10 @@ describe('dream-response-schemas', () => { claim: 'test', confidence: 0, evidence: [{domain: 'a', fact: 'f'}], + keywords: [], placement: 'a', + summary: '', + tags: [], title: 'test', }], } @@ -146,12 +158,63 @@ describe('dream-response-schemas', () => { claim: 'test', confidence: 1, evidence: [{domain: 'a', fact: 'f'}], + keywords: [], placement: 'a', + summary: '', + tags: [], title: 'test', }], } expect(() => SynthesizeResponseSchema.parse(input)).to.not.throw() }) + + it('should reject summary longer than 500 characters', () => { + const input = { + syntheses: [{ + claim: 'test', + confidence: 0.5, + evidence: [{domain: 'a', fact: 'f'}], + keywords: [], + placement: 'a', + summary: 'x'.repeat(501), + tags: [], + title: 'test', + }], + } + expect(() => SynthesizeResponseSchema.parse(input)).to.throw() + }) + + it('should reject tags array longer than 8 entries', () => { + const input = { + syntheses: [{ + claim: 'test', + confidence: 0.5, + evidence: [{domain: 'a', fact: 'f'}], + keywords: [], + placement: 'a', + summary: '', + tags: Array.from({length: 9}, (_, i) => `tag-${i}`), + title: 'test', + }], + } + expect(() => SynthesizeResponseSchema.parse(input)).to.throw() + }) + + it('should reject keywords array longer than 15 entries', () => { + const input = { + syntheses: [{ + claim: 'test', + confidence: 0.5, + evidence: [{domain: 'a', fact: 'f'}], + keywords: Array.from({length: 16}, (_, i) => `kw-${i}`), + placement: 'a', + summary: '', + tags: [], + title: 'test', + }], + } + expect(() => SynthesizeResponseSchema.parse(input)).to.throw() + }) }) describe('PruneResponseSchema', () => { diff --git a/test/unit/infra/dream/operations/consolidate.test.ts b/test/unit/infra/dream/operations/consolidate.test.ts index e73fb5b42..3e81f9c6e 100644 --- a/test/unit/infra/dream/operations/consolidate.test.ts +++ b/test/unit/infra/dream/operations/consolidate.test.ts @@ -569,6 +569,46 @@ describe('consolidate', () => { expect(titleIdx, 'title should appear before createdAt (canonical order)').to.be.lessThan(createdAtIdx) }) + it('TEMPORAL_UPDATE preserves flow-style arrays (no block-style reflow)', async () => { + await createCanonicalFile(ctxDir, 'auth/session.md', '# Old session info') + + // Input frontmatter uses flow-style arrays (the canonical CLI format + // emitted by markdown-writer with flowLevel: 1). After consolidate + // appends consolidated_at, the rewritten file must keep the SAME + // flow style — block-style reflow (`- a\n - b`) silently diverges + // from regular brv curate output and recreates the synthesis-vs-regular + // inconsistency this work eliminates. + const updatedWithFm = [ + '---', + 'title: Auth Session', + "summary: Updated session handling", + 'tags: [auth, session, security]', + 'related: []', + 'keywords: [session, cookie, jwt]', + "createdAt: '2026-04-01T00:00:00.000Z'", + "updatedAt: '2026-04-10T00:00:00.000Z'", + '---', + '# Updated session info', + ].join('\n') + + agent.executeOnSession.resolves(llmResponse([{ + files: ['auth/session.md'], + reason: 'Outdated info', + type: 'TEMPORAL_UPDATE', + updatedContent: updatedWithFm, + }])) + + await consolidate(['auth/session.md'], deps) + + const updated = await readFile(join(ctxDir, 'auth/session.md'), 'utf8') + expect(updated).to.include('tags: [auth, session, security]') + expect(updated).to.include('keywords: [session, cookie, jwt]') + expect(updated).to.include('related: []') + // Reject block-style reflow + expect(updated).to.not.match(/^tags:\s*\n\s+- /m) + expect(updated).to.not.match(/^keywords:\s*\n\s+- /m) + }) + it('CROSS_REFERENCE preserves existing frontmatter field order', async () => { await createCanonicalFile(ctxDir, 'auth/session.md', '# Session') await createCanonicalFile(ctxDir, 'auth/tokens.md', '# Tokens') diff --git a/test/unit/infra/dream/operations/synthesize.test.ts b/test/unit/infra/dream/operations/synthesize.test.ts index 11c9cb373..f597df482 100644 --- a/test/unit/infra/dream/operations/synthesize.test.ts +++ b/test/unit/infra/dream/operations/synthesize.test.ts @@ -25,9 +25,28 @@ async function createMdFile(dir: string, relativePath: string, body: string, fro await writeFile(fullPath, content, 'utf8') } -/** Build a canned LLM response */ -function llmResponse(syntheses: Array<{claim: string; confidence?: number; evidence: Array<{domain: string; fact: string}>; placement: string; title: string}>): string { - return '```json\n' + JSON.stringify({syntheses}) + '\n```' +/** + * Build a canned LLM response. Tests only need to specify what they're + * exercising — summary/tags/keywords default to placeholders so the zod + * schema parses without forcing every test to repeat them. + */ +function llmResponse(syntheses: Array<{ + claim: string; + confidence?: number; + evidence: Array<{domain: string; fact: string}>; + keywords?: string[]; + placement: string; + summary?: string; + tags?: string[]; + title: string; +}>): string { + const withDefaults = syntheses.map((s) => ({ + keywords: ['test-keyword'], + summary: 'Test summary.', + tags: ['test-tag'], + ...s, + })) + return '```json\n' + JSON.stringify({syntheses: withDefaults}) + '\n```' } /** Narrow DreamOperation to SYNTHESIZE variant */ @@ -111,6 +130,12 @@ describe('synthesize', () => { expect(prompt).to.include('# Auth Summary') expect(prompt).to.include('DOMAIN: api') expect(prompt).to.include('# API Summary') + // The prompt must instruct the model to produce the semantic fields the + // web UI's card-mode display needs (summary/tags/keywords); without + // them, synthesized files render with empty preview slots. + expect(prompt).to.match(/"summary"/) + expect(prompt).to.match(/"tags"/) + expect(prompt).to.match(/"keywords"/) expect(agent.deleteTaskSession.calledOnce).to.be.true }) @@ -166,7 +191,7 @@ describe('synthesize', () => { expect(content).to.include('Both auth and API share token validation logic.') }) - it('writes correct frontmatter fields', async () => { + it('writes the 7 semantic frontmatter fields plus synthesis markers', async () => { await createMdFile(ctxDir, 'auth/_index.md', '# Auth', {type: 'summary'}) await createMdFile(ctxDir, 'api/_index.md', '# API', {type: 'summary'}) @@ -174,7 +199,10 @@ describe('synthesize', () => { claim: 'Test claim.', confidence: 0.7, evidence: [{domain: 'auth', fact: 'Fact A'}, {domain: 'api', fact: 'Fact B'}], + keywords: ['authentication', 'tokens'], placement: 'api', + summary: 'Both auth and API share token validation logic.', + tags: ['security', 'cross-cutting'], title: 'Test Synthesis', }])) @@ -182,11 +210,100 @@ describe('synthesize', () => { expect(results).to.have.lengthOf(1) const content = await readFile(join(ctxDir, 'api/test-synthesis.md'), 'utf8') + + // Semantic fields — required by the web UI's card-mode display + expect(content).to.include('title: Test Synthesis') + expect(content).to.include('summary: Both auth and API share token validation logic.') + // Arrays MUST render in flow style ([a, b, c]) so on-disk output matches + // markdown-writer.ts; reverting flowLevel to 2 would fail this assertion. + expect(content).to.match(/^tags: \[/m) + expect(content).to.match(/^keywords: \[/m) + expect(content).to.include('security') + expect(content).to.include('cross-cutting') + expect(content).to.include('authentication') + expect(content).to.include('tokens') + expect(content).to.include('related:') + expect(content).to.include('createdAt:') + expect(content).to.include('updatedAt:') + + // Synthesis markers — kept for traceability and review gating expect(content).to.include('confidence:') expect(content).to.include('sources:') expect(content).to.include('synthesized_at:') + expect(content).to.include('type: synthesis') expect(content).to.include('auth/_index.md') expect(content).to.include('api/_index.md') + + // Sidecar fields must not bleed into markdown frontmatter + expect(content).to.not.include('maturity:') + expect(content).to.not.include('importance:') + }) + + it('normalizes tags to lowercase kebab-case', async () => { + await createMdFile(ctxDir, 'auth/_index.md', '# Auth', {type: 'summary'}) + await createMdFile(ctxDir, 'api/_index.md', '# API', {type: 'summary'}) + + agent.executeOnSession.resolves(llmResponse([{ + claim: 'Test.', + confidence: 0.9, + evidence: [{domain: 'auth', fact: 'A'}, {domain: 'api', fact: 'B'}], + keywords: ['x'], + placement: 'auth', + summary: 'A summary.', + // Mixed-case + multi-word tags — should be normalized at write time so + // card chips and BM25 search see consistent labels regardless of + // whether the model followed the prompt's "lowercase, kebab-case" rule. + tags: ['Auth Service', 'JWT-Validation', ' cross-cutting '], + title: 'Tag Normalization Test', + }])) + + const results = await synthesize(deps) + expect(results).to.have.lengthOf(1) + + const content = await readFile(join(ctxDir, 'auth/tag-normalization-test.md'), 'utf8') + expect(content).to.include('auth-service') + expect(content).to.include('jwt-validation') + expect(content).to.include('cross-cutting') + expect(content).to.not.include('Auth Service') + expect(content).to.not.include('JWT-Validation') + }) + + it('emits frontmatter parseable as the regular semantic shape', async () => { + const {load: yamlLoad} = await import('js-yaml') + + await createMdFile(ctxDir, 'auth/_index.md', '# Auth', {type: 'summary'}) + await createMdFile(ctxDir, 'api/_index.md', '# API', {type: 'summary'}) + + agent.executeOnSession.resolves(llmResponse([{ + claim: 'Test.', + confidence: 0.9, + evidence: [{domain: 'auth', fact: 'A'}, {domain: 'api', fact: 'B'}], + keywords: ['x', 'y'], + placement: 'auth', + summary: 'A summary.', + tags: ['z'], + title: 'Strict Test', + }])) + + const results = await synthesize(deps) + expect(results).to.have.lengthOf(1) + + const content = await readFile(join(ctxDir, 'auth/strict-test.md'), 'utf8') + const yamlBlock = content.match(/^---\n([\S\s]+?)\n---/)?.[1] + expect(yamlBlock).to.be.a('string') + const parsed = yamlLoad(yamlBlock ?? '') + expect(parsed).to.be.an('object').and.not.null + + // Cogit's Go parser populates DtoV3MemoryCardResource fields from these + // YAML keys (summary→short_description, related→relateds, + // updatedAt→last_updated_at). All seven must be present and well-typed. + expect(parsed).to.have.property('title').that.is.a('string') + expect(parsed).to.have.property('summary').that.is.a('string') + expect(parsed).to.have.property('tags').that.is.an('array') + expect(parsed).to.have.property('keywords').that.is.an('array') + expect(parsed).to.have.property('related').that.is.an('array') + expect(parsed).to.have.property('createdAt').that.is.a('string') + expect(parsed).to.have.property('updatedAt').that.is.a('string') }) it('writes evidence section in body', async () => {