From 8dde75f3bb122edb46c404bc54a5a5e46e83100f Mon Sep 17 00:00:00 2001 From: Luke Date: Wed, 24 Jun 2026 23:26:20 +0200 Subject: [PATCH] feat: add provider-agnostic schema reviewers --- README.md | 22 +++ skills/schemator/SKILL.md | 89 ++++++---- src/cli.ts | 117 ++++++++---- src/codex-review.ts | 91 ++++++++-- src/convergence.ts | 17 +- src/extract/index.ts | 8 + src/extract/sql.ts | 362 ++++++++++++++++++++++++++++++++++++++ src/jobs.ts | 4 + test/schemator.test.ts | 222 ++++++++++++++++++++++- 9 files changed, 844 insertions(+), 88 deletions(-) create mode 100644 src/extract/sql.ts diff --git a/README.md b/README.md index 8bbb0f6..134571c 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,28 @@ naming constraints before applying schema changes. per field, validates each JSON result, aggregates the decisions, and applies safe changes. +Schemator can also use another agent runtime: + +```bash +schemator run --strategy pi --reviewer-model claude-bridge/claude-sonnet-4-6 --source schema.md --context project-context.md --out .schemator +schemator run --strategy pi --reviewer-model openai/gpt-5.1 --reviewer-arg=--thinking --reviewer-arg off --source schema.md --out .schemator +``` + +Use `command` for any CLI that reads the field-review prompt from stdin and +prints one field-review JSON object to stdout: + +```bash +schemator review --strategy command --reviewer-command ./review-field --graph .schemator/graph.iteration-1.json --out .schemator/reviews.iteration-1 +``` + +Shared reviewer options: + +- `--reviewer-command `: executable for `codex`, `pi`, or `command`. +- `--reviewer-model `: provider/model passed to the reviewer when supported. +- `--reviewer-timeout-ms `: per-field timeout. +- `--reviewer-concurrency `: maximum concurrent external reviewers. +- `--reviewer-arg `: extra reviewer argument; repeat for multiple args. Use `--reviewer-arg=--flag` when the value itself starts with `--`. + Use local mode only for smoke tests: ```bash diff --git a/skills/schemator/SKILL.md b/skills/schemator/SKILL.md index e8c62d8..e1402de 100644 --- a/skills/schemator/SKILL.md +++ b/skills/schemator/SKILL.md @@ -1,71 +1,92 @@ --- name: schemator -description: Use when running Schemator to extract, review, simplify, converge, diff, or report on data models, JSON schemas, API payloads, SQL tables, migrations, TypeScript interfaces, YAML resources, or other field-based schemas. +description: "Use when running Schemator to extract, review, simplify, converge, diff, or report on database schemas, ORM models, migrations, API payloads, JSON Schema, TypeScript interfaces, YAML resources, or other field-based data models." --- # Schemator -Use this skill when a task asks to run or interpret Schemator, improve a schema -with Schemator, inspect Schemator artifacts, or prepare a Schemator-backed -schema report. +Use Schemator as a schema-design reviewer: it extracts fields from an existing +schema/proposal, asks reviewers to challenge each field, applies safe reductions, +and reports the resulting graph. It is a design aid, not product truth. ## Workflow -1. Start from a real draft schema or proposal. Schemator reviews existing model - shapes; it does not invent the first draft. -2. Write or locate project/task context before review. Context should explain - the product goal, naming conventions, borrowed vocabulary, user-facing - constraints, and what should remain stable. -3. Run real Codex review for semantic decisions. Use the local strategy only - for smoke tests and plumbing checks. -4. Inspect generated prompts under `jobs.iteration-N/` when decisions look - wrong. Verify the expected context is actually injected. -5. Read reduction artifacts, not just aggregate totals. Applied changes, - skipped proposals, manual proposals, and consistency warnings have different - meanings. -6. Treat a converged run as a candidate, not automatic product truth. Do a - manual naming and product-semantics pass before accepting the final schema. -7. For published or handoff reports, use the `final-report` skill too. +1. Start from a real draft schema, model, migration, payload, or proposal. + Schemator reviews existing shapes; it does not invent the first draft. +2. Locate the source shape and context: + - DB / ORM: migrations, SQL DDL, Prisma/Drizzle/TypeORM/SQLAlchemy/Rails/Laravel models. + - API / contract: OpenAPI snippets, JSON Schema, GraphQL types, protobuf, API payload examples. + - App model: TypeScript interfaces/types, Zod schemas, YAML/JSON resources, Markdown proposals. +3. Write or locate project/task context before review. Good context explains the + product goal, naming conventions, borrowed vocabulary, user-facing fields, + compatibility constraints, and what must remain stable. +4. Run a real reviewer for semantic decisions. Prefer `--strategy pi` in Luke's + Pi runtime, or `--strategy codex` when you specifically want Codex. Use + `--strategy local` only for smoke tests and plumbing checks. +5. Inspect generated prompts under `jobs.iteration-N/` when decisions look wrong. + Verify the expected schema source and context are injected. +6. Read reduction artifacts, not just aggregate totals. Applied changes, skipped + proposals, manual proposals, and consistency warnings mean different things. +7. Treat convergence as a candidate schema. Do a manual naming, product-semantics, + and backwards-compatibility pass before accepting changes. +8. For published or handoff reports, use the `schemator-final-report` skill too if installed. ## Commands -End-to-end review: +Pi runtime / provider-model selected from CLI: ```bash -npm run dev -- run --source schema.md --context project-context.md --out .schemator +schemator run --strategy pi --reviewer-model claude-bridge/claude-sonnet-4-6 --source schema.md --context project-context.md --out .schemator +schemator run --strategy pi --reviewer-model openai/gpt-5.1 --reviewer-arg=--thinking --reviewer-arg off --source schema.md --out .schemator ``` -Write prompts without reviewing: +Default Codex strategy: ```bash -npm run dev -- create-jobs --graph .schemator/graph.iteration-1.json --context project-context.md --out .schemator/jobs.iteration-1 +schemator run --source schema.md --context project-context.md --out .schemator +``` + +Any command that reads the prompt from stdin and prints one field-review JSON object: + +```bash +schemator review --strategy command --reviewer-command ./review-field --graph .schemator/graph.iteration-1.json --out .schemator/reviews.iteration-1 ``` Generate report and graph diff: ```bash -npm run dev -- report --run .schemator --out .schemator/final-report.md -npm run dev -- diff --run .schemator --out .schemator/graph-diff.md +schemator report --run .schemator --out .schemator/final-report.md +schemator diff --run .schemator --out .schemator/graph-diff.md ``` Smoke test only: ```bash -npm run dev -- run --strategy local --source schema.md --out .schemator-smoke +schemator run --strategy local --source schema.md --out .schemator-smoke ``` +Reviewer knobs: + +- `--reviewer-command ` sets the executable for `codex`, `pi`, or `command`. +- `--reviewer-model ` passes a provider/model to the reviewer when supported. +- `--reviewer-timeout-ms ` sets the per-field timeout. +- `--reviewer-concurrency ` sets max concurrent external reviewers. +- `--reviewer-arg ` adds extra reviewer args; use `--reviewer-arg=--flag` for flag-looking values. + ## Review Rules - Do not add local field-specific keep, rename, remove, merge, derive, or move rules unless the user explicitly asks for that exact field outcome. -- Use project context to explain naming intent instead of hardcoding outcomes. -- Preserve intentional declarative/configuration vocabulary when the context - says it is borrowed or meaningful. -- Prefer short clear names. Do not accept longer explicit names unless they - prevent a real ambiguity. +- Use context to explain naming/product intent instead of hardcoding outcomes. +- Preserve intentional domain vocabulary when context says it is borrowed or stable. +- Prefer short clear names. Accept longer explicit names only when they prevent + a real ambiguity. - Watch for renamed fields that became more verbose without improving the model. +- For DB/ORM sources, flag compatibility risks separately: migrations, data + backfills, API clients, analytics, imports/exports, and generated code may all + depend on field names. - Check whether removals are missing because the run was partial, because the - field had a current use case, or because the reviewer lacked enough context. + field has a current use case, or because reviewers lacked enough context. ## Report Checklist @@ -76,5 +97,5 @@ Before calling a Schemator result final: - Include the project context and command lines used. - Include applied, skipped, and manual decisions separately. - Include the initial-vs-final graph diff. -- State manual corrections or naming overrides clearly. -- Link the pushed artifact or PR. +- State manual corrections, naming overrides, and compatibility concerns clearly. +- Link the pushed artifact or PR when relevant. diff --git a/src/cli.ts b/src/cli.ts index 9016bb1..0bd938d 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -4,7 +4,7 @@ import { join } from "node:path"; import { Command } from "commander"; import { findSkillsRoot, maybeHandleSkillflag } from "skillflag"; import { renderPatchPlan } from "./apply.js"; -import { writeCodexReviews } from "./codex-review.js"; +import { writeReviewerReviews, type ReviewerStrategy } from "./codex-review.js"; import { aggregateFromFiles, combineAggregates, deriveFinalGraph, runConvergence } from "./convergence.js"; import { diffGraphs, renderGraphDiff } from "./diff.js"; import { extractGraph } from "./extract/index.js"; @@ -61,11 +61,16 @@ program .requiredOption("--out ", "review output directory") .option("--context ", "project/task context Markdown") .option("--jobs ", "also write independent field-review prompts") - .option("--strategy ", "review strategy", "codex") - .option("--codex-command ", "Codex executable for --strategy codex", "codex") - .option("--codex-model ", "Codex model for --strategy codex") - .option("--codex-timeout-ms ", "per-field Codex timeout in milliseconds", "120000") - .option("--codex-concurrency ", "maximum concurrent Codex reviewers", "4") + .option("--strategy ", "review strategy: codex, pi, command, or local", "codex") + .option("--reviewer-command ", "reviewer executable for codex, pi, or command strategy") + .option("--reviewer-model ", "provider/model or model name passed to the reviewer") + .option("--reviewer-timeout-ms ", "per-field reviewer timeout in milliseconds", "120000") + .option("--reviewer-concurrency ", "maximum concurrent external reviewers", "4") + .option("--reviewer-arg ", "extra reviewer argument; repeat for multiple args", collectOption, []) + .option("--codex-command ", "deprecated alias for --reviewer-command with --strategy codex") + .option("--codex-model ", "deprecated alias for --reviewer-model with --strategy codex") + .option("--codex-timeout-ms ", "deprecated alias for --reviewer-timeout-ms") + .option("--codex-concurrency ", "deprecated alias for --reviewer-concurrency") .action(async (options: ReviewCommandOptions) => { await runCommand(async () => { const graph = assertModelGraph(await readJson(resolvePath(options.graph))); @@ -73,14 +78,15 @@ program if (options.jobs) { await writeReviewJobs(graph, resolvePath(options.jobs), reviewContextOptions(projectContext)); } - const reviews = options.strategy === "codex" - ? await writeCodexReviews(graph, resolvePath(options.out), { - ...codexOptions(options), + const reviews = options.strategy === "local" + ? await writeDeterministicReviews(graph, resolvePath(options.out), { + strategy: "local", ...reviewContextOptions(projectContext), }) - : options.strategy === "local" - ? await writeDeterministicReviews(graph, resolvePath(options.out), { - strategy: "local", + : isReviewerStrategy(options.strategy) + ? await writeReviewerReviews(graph, resolvePath(options.out), { + ...reviewerOptions(options), + strategy: options.strategy, ...reviewContextOptions(projectContext), }) : unsupportedStrategy(options.strategy); @@ -202,11 +208,16 @@ program .requiredOption("--out ", "run output directory") .option("--context ", "project/task context Markdown") .option("--max-iterations ", "maximum simplification iterations", "4") - .option("--strategy ", "review strategy", "codex") - .option("--codex-command ", "Codex executable for --strategy codex", "codex") - .option("--codex-model ", "Codex model for --strategy codex") - .option("--codex-timeout-ms ", "per-field Codex timeout in milliseconds", "120000") - .option("--codex-concurrency ", "maximum concurrent Codex reviewers", "4") + .option("--strategy ", "review strategy: codex, pi, command, or local", "codex") + .option("--reviewer-command ", "reviewer executable for codex, pi, or command strategy") + .option("--reviewer-model ", "provider/model or model name passed to the reviewer") + .option("--reviewer-timeout-ms ", "per-field reviewer timeout in milliseconds", "120000") + .option("--reviewer-concurrency ", "maximum concurrent external reviewers", "4") + .option("--reviewer-arg ", "extra reviewer argument; repeat for multiple args", collectOption, []) + .option("--codex-command ", "deprecated alias for --reviewer-command with --strategy codex") + .option("--codex-model ", "deprecated alias for --reviewer-model with --strategy codex") + .option("--codex-timeout-ms ", "deprecated alias for --reviewer-timeout-ms") + .option("--codex-concurrency ", "deprecated alias for --reviewer-concurrency") .action(async (options: RunCommandOptions) => { await runCommand(async () => { const source = resolvePath(options.source); @@ -216,7 +227,7 @@ program throw new Error("--max-iterations must be a positive integer"); } const projectContext = await readProjectContext(options.context); - if (options.strategy !== "codex" && options.strategy !== "local") { + if (options.strategy !== "local" && !isReviewerStrategy(options.strategy)) { unsupportedStrategy(options.strategy); } await runConvergence({ @@ -225,7 +236,7 @@ program maxIterations, strategy: options.strategy, ...(projectContext === undefined ? {} : { projectContext }), - codex: codexOptions(options), + reviewer: reviewerOptions(options), }); }); }); @@ -238,10 +249,15 @@ type ReviewCommandOptions = { context?: string; jobs?: string; strategy: string; - codexCommand: string; + reviewerCommand?: string; + reviewerModel?: string; + reviewerTimeoutMs?: string; + reviewerConcurrency?: string; + reviewerArg?: string[]; + codexCommand?: string; codexModel?: string; - codexTimeoutMs: string; - codexConcurrency: string; + codexTimeoutMs?: string; + codexConcurrency?: string; }; type RunCommandOptions = { @@ -250,36 +266,67 @@ type RunCommandOptions = { context?: string; maxIterations: string; strategy: string; - codexCommand: string; + reviewerCommand?: string; + reviewerModel?: string; + reviewerTimeoutMs?: string; + reviewerConcurrency?: string; + reviewerArg?: string[]; + codexCommand?: string; codexModel?: string; - codexTimeoutMs: string; - codexConcurrency: string; + codexTimeoutMs?: string; + codexConcurrency?: string; }; -function codexOptions( - options: Pick, -): { - command: string; +type ReviewerCommandOptions = Pick< + ReviewCommandOptions, + | "strategy" + | "reviewerCommand" + | "reviewerModel" + | "reviewerTimeoutMs" + | "reviewerConcurrency" + | "reviewerArg" + | "codexCommand" + | "codexModel" + | "codexTimeoutMs" + | "codexConcurrency" +>; + +function reviewerOptions(options: ReviewerCommandOptions): { + command?: string; + args?: string[]; model?: string; timeoutMs: number; concurrency: number; } { - const timeoutMs = Number.parseInt(options.codexTimeoutMs, 10); + const timeoutRaw = options.reviewerTimeoutMs ?? options.codexTimeoutMs ?? "120000"; + const timeoutMs = Number.parseInt(timeoutRaw, 10); if (!Number.isInteger(timeoutMs) || timeoutMs < 1) { - throw new Error("--codex-timeout-ms must be a positive integer"); + throw new Error("--reviewer-timeout-ms must be a positive integer"); } - const concurrency = Number.parseInt(options.codexConcurrency, 10); + const concurrencyRaw = options.reviewerConcurrency ?? options.codexConcurrency ?? "4"; + const concurrency = Number.parseInt(concurrencyRaw, 10); if (!Number.isInteger(concurrency) || concurrency < 1) { - throw new Error("--codex-concurrency must be a positive integer"); + throw new Error("--reviewer-concurrency must be a positive integer"); } + const command = options.reviewerCommand ?? options.codexCommand; + const model = options.reviewerModel ?? options.codexModel; return { - command: options.codexCommand, - ...(options.codexModel ? { model: options.codexModel } : {}), + ...(command ? { command } : {}), + ...(options.reviewerArg && options.reviewerArg.length > 0 ? { args: options.reviewerArg } : {}), + ...(model ? { model } : {}), timeoutMs, concurrency, }; } +function collectOption(value: string, previous: string[]): string[] { + return [...previous, value]; +} + +function isReviewerStrategy(strategy: string): strategy is ReviewerStrategy { + return strategy === "codex" || strategy === "pi" || strategy === "command"; +} + function unsupportedStrategy(strategy: string): never { throw new Error(`unsupported review strategy: ${strategy}`); } diff --git a/src/codex-review.ts b/src/codex-review.ts index 071ae15..29bccec 100644 --- a/src/codex-review.ts +++ b/src/codex-review.ts @@ -6,8 +6,12 @@ import { renderFieldPrompt, type RunHistoryEntry } from "./jobs.js"; import type { FieldReview, ModelGraph } from "./types.js"; import { validateFieldReview } from "./validate.js"; -export type CodexReviewOptions = { +export type ReviewerStrategy = "codex" | "pi" | "command"; + +export type ReviewerOptions = { + strategy?: ReviewerStrategy; command?: string; + args?: string[]; model?: string; cwd?: string; timeoutMs?: number; @@ -16,12 +20,23 @@ export type CodexReviewOptions = { runHistory?: RunHistoryEntry[]; }; +export type CodexReviewOptions = Omit; + export async function writeCodexReviews( graph: ModelGraph, outputDir: string, options: CodexReviewOptions = {}, +): Promise { + return writeReviewerReviews(graph, outputDir, { ...options, strategy: "codex" }); +} + +export async function writeReviewerReviews( + graph: ModelGraph, + outputDir: string, + options: ReviewerOptions = {}, ): Promise { await prepareGeneratedOutputDir(outputDir, ".review.json"); + const strategy = options.strategy ?? "codex"; const jobs = graph.models.flatMap((model) => model.fields.map((field) => ({ model, field }))); const reviews = new Array(jobs.length); let cursor = 0; @@ -48,14 +63,14 @@ export async function writeCodexReviews( }, ); const review = bindReviewIdentity( - await runCodexFieldReview(prompt, options, abortController.signal), + await runFieldReview(prompt, { ...options, strategy }, abortController.signal), model.id, field.path, ); const validation = validateFieldReview(review); if (!validation.ok) { throw new Error( - `Codex review for ${model.id}.${field.path} is invalid:\n${validation.errors.join("\n")}`, + `${strategy} review for ${model.id}.${field.path} is invalid:\n${validation.errors.join("\n")}`, ); } reviews[index] = review; @@ -75,7 +90,7 @@ export async function writeCodexReviews( } for (const review of reviews) { if (!review) { - throw new Error("Codex review worker finished without writing every review."); + throw new Error(`${strategy} review worker finished without writing every review.`); } } return reviews; @@ -89,11 +104,24 @@ function bindReviewIdentity(review: FieldReview, model: string, fieldPath: strin }; } -async function runCodexFieldReview( +async function runFieldReview( prompt: string, - options: CodexReviewOptions, + options: ReviewerOptions & { strategy: ReviewerStrategy }, signal?: AbortSignal, ): Promise { + const output = options.strategy === "codex" + ? await runCodexFieldReview(prompt, options, signal) + : options.strategy === "pi" + ? await runPiFieldReview(prompt, options, signal) + : await runCommandFieldReview(prompt, options, signal); + return parseFieldReviewOutput(output, options.strategy); +} + +async function runCodexFieldReview( + prompt: string, + options: ReviewerOptions, + signal?: AbortSignal, +): Promise { const command = options.command ?? "codex"; const args = [ "--ask-for-approval", @@ -107,14 +135,57 @@ async function runCodexFieldReview( "--color", "never", ...(options.model ? ["--model", options.model] : []), + ...(options.args ?? []), "-", ]; - const output = await execWithInput(command, args, prompt, { + return execWithInput(command, args, prompt, { + cwd: options.cwd ?? process.cwd(), + timeoutMs: options.timeoutMs ?? 120_000, + ...(signal === undefined ? {} : { signal }), + }); +} + +async function runPiFieldReview( + prompt: string, + options: ReviewerOptions, + signal?: AbortSignal, +): Promise { + const command = options.command ?? "pi"; + const args = [ + "--print", + "--no-session", + "--source", + "child-agent", + "--no-tools", + "--no-context-files", + "--no-skills", + "--mode", + "text", + ...(options.model ? ["--model", options.model] : []), + ...(options.args ?? []), + prompt, + ]; + return execWithInput(command, args, "", { + cwd: options.cwd ?? process.cwd(), + timeoutMs: options.timeoutMs ?? 120_000, + ...(signal === undefined ? {} : { signal }), + }); +} + +async function runCommandFieldReview( + prompt: string, + options: ReviewerOptions, + signal?: AbortSignal, +): Promise { + const command = options.command; + if (!command) { + throw new Error("--reviewer-command is required for --strategy command"); + } + return execWithInput(command, options.args ?? [], prompt, { cwd: options.cwd ?? process.cwd(), timeoutMs: options.timeoutMs ?? 120_000, ...(signal === undefined ? {} : { signal }), }); - return parseFieldReviewOutput(output); } function fieldReviewSchemaPath(): string { @@ -191,7 +262,7 @@ function execWithInput( }); } -function parseFieldReviewOutput(output: string): FieldReview { +function parseFieldReviewOutput(output: string, strategy: string): FieldReview { const trimmed = output.trim(); const direct = tryParseJson(trimmed); if (direct) { @@ -210,7 +281,7 @@ function parseFieldReviewOutput(output: string): FieldReview { return normalizeFieldReview(objectJson); } - throw new Error("Codex review did not return a JSON object"); + throw new Error(`${strategy} review did not return a JSON object`); } function normalizeFieldReview(value: unknown): FieldReview { diff --git a/src/convergence.ts b/src/convergence.ts index e7ff2da..8d294a5 100644 --- a/src/convergence.ts +++ b/src/convergence.ts @@ -3,7 +3,7 @@ import { mkdir } from "node:fs/promises"; import { join } from "node:path"; import { aggregateReviews, readReviews } from "./aggregate.js"; import { renderPatchPlan } from "./apply.js"; -import { writeCodexReviews, type CodexReviewOptions } from "./codex-review.js"; +import { writeReviewerReviews, type ReviewerOptions, type ReviewerStrategy } from "./codex-review.js"; import { extractGraph } from "./extract/index.js"; import { readJson, writeJson, writeText } from "./files.js"; import { @@ -19,7 +19,7 @@ import { writeDeterministicReviews } from "./review.js"; import type { AggregateReview, ModelGraph } from "./types.js"; import { validateAggregateReview, validateFieldReview, validateModelGraph } from "./validate.js"; -export type ReviewStrategy = "codex" | "local"; +export type ReviewStrategy = ReviewerStrategy | "local"; export type RunConvergenceOptions = { source: string; @@ -27,7 +27,7 @@ export type RunConvergenceOptions = { maxIterations: number; strategy: ReviewStrategy; projectContext?: string; - codex?: Pick; + reviewer?: Pick; }; export type RunSummary = { @@ -69,14 +69,15 @@ export async function runConvergence(options: RunConvergenceOptions): Promise model.fields.length > 0); diff --git a/src/extract/sql.ts b/src/extract/sql.ts new file mode 100644 index 0000000..cf0c2f6 --- /dev/null +++ b/src/extract/sql.ts @@ -0,0 +1,362 @@ +import type { FieldNode, ModelNode, SourceSpan } from "../types.js"; + +type Statement = { + text: string; + startLine: number; + endLine: number; +}; + +type ColumnDefinition = { + text: string; + startLine: number; + endLine: number; +}; + +const tableConstraintKeywords = new Set([ + "constraint", + "primary", + "foreign", + "unique", + "check", + "exclude", + "key", +]); + +const columnConstraintKeywords = new Set([ + "not", + "null", + "default", + "primary", + "unique", + "references", + "check", + "constraint", + "collate", + "generated", + "identity", + "comment", + "encode", + "compress", +]); + +export function extractSqlModels(sql: string, sourcePath: string, startLine = 1): ModelNode[] { + return splitSqlStatements(sql, startLine).flatMap((statement) => extractCreateTable(statement, sourcePath)); +} + +function extractCreateTable(statement: Statement, sourcePath: string): ModelNode[] { + const match = /create\s+(?:temporary\s+|temp\s+|unlogged\s+)?table\s+(?:if\s+not\s+exists\s+)?([^\s(]+)\s*\(/i.exec( + statement.text, + ); + if (!match || match.index === undefined) { + return []; + } + + const tableName = normalizeIdentifier(match[1] ?? "Table"); + const tableStartLine = statement.startLine + countNewlines(statement.text.slice(0, match.index)); + const openParen = statement.text.indexOf("(", match.index); + const closeParen = findMatchingParen(statement.text, openParen); + if (openParen < 0 || closeParen < 0) { + return []; + } + + const body = statement.text.slice(openParen + 1, closeParen); + const bodyStartLine = statement.startLine + countNewlines(statement.text.slice(0, openParen + 1)); + const fields = splitColumnDefinitions(body, bodyStartLine) + .map((definition) => columnToField(definition, tableName, sourcePath)) + .filter((field): field is FieldNode => field !== null); + + return fields.length > 0 + ? [ + { + id: tableName, + kind: "table", + source: { + path: sourcePath, + span: { startLine: tableStartLine, endLine: statement.endLine }, + }, + fields, + }, + ] + : []; +} + +function columnToField(definition: ColumnDefinition, tableName: string, sourcePath: string): FieldNode | null { + const stripped = stripLeadingSqlComments(definition.text); + const trimmed = stripped.text.trim(); + if (!trimmed) { + return null; + } + const [rawName, rest] = readIdentifier(trimmed); + if (!rawName || tableConstraintKeywords.has(rawName.toLowerCase())) { + return null; + } + const name = normalizeIdentifier(rawName); + const type = readColumnType(rest.trim()); + const lower = trimmed.toLowerCase(); + const required = /\bnot\s+null\b/.test(lower) || /\bprimary\s+key\b/.test(lower); + return { + path: name, + name, + type, + required, + nullable: !required, + parent: tableName, + objectLike: false, + source: { + path: sourcePath, + span: { startLine: definition.startLine + stripped.lineDelta, endLine: definition.endLine }, + }, + }; +} + +function stripLeadingSqlComments(text: string): { text: string; lineDelta: number } { + let rest = text; + let lineDelta = 0; + const trimLeading = (): void => { + const trimmed = rest.trimStart(); + lineDelta += countNewlines(rest.slice(0, rest.length - trimmed.length)); + rest = trimmed; + }; + trimLeading(); + let changed = true; + while (changed) { + changed = false; + if (rest.startsWith("--")) { + const newline = rest.indexOf("\n"); + const removed = newline === -1 ? rest : rest.slice(0, newline + 1); + rest = newline === -1 ? "" : rest.slice(newline + 1); + lineDelta += countNewlines(removed); + trimLeading(); + changed = true; + } + if (rest.startsWith("/*")) { + const end = rest.indexOf("*/"); + const removed = end === -1 ? rest : rest.slice(0, end + 2); + rest = end === -1 ? "" : rest.slice(end + 2); + lineDelta += countNewlines(removed); + trimLeading(); + changed = true; + } + } + return { text: rest, lineDelta }; +} + +function readColumnType(rest: string): string { + const tokens = rest.match(/"[^"]+"|'[^']+'|`[^`]+`|\[[^\]]+\]|\S+/g) ?? []; + const typeTokens: string[] = []; + let parenDepth = 0; + for (const token of tokens) { + const bare = token.replace(/[(),]/g, "").toLowerCase(); + if (parenDepth === 0 && columnConstraintKeywords.has(bare)) { + break; + } + typeTokens.push(token); + parenDepth += countChar(token, "(") - countChar(token, ")"); + } + return typeTokens.join(" ").trim() || "unknown"; +} + +function splitSqlStatements(sql: string, startLine: number): Statement[] { + const statements: Statement[] = []; + let start = 0; + let line = startLine; + let statementStartLine = startLine; + let singleQuote = false; + let doubleQuote = false; + let lineComment = false; + let blockComment = false; + + for (let index = 0; index < sql.length; index += 1) { + const char = sql[index]; + const next = sql[index + 1]; + if (char === "\n") { + line += 1; + lineComment = false; + } + if (lineComment) { + continue; + } + if (blockComment) { + if (char === "*" && next === "/") { + blockComment = false; + index += 1; + } + continue; + } + if (!singleQuote && !doubleQuote && char === "-" && next === "-") { + lineComment = true; + index += 1; + continue; + } + if (!singleQuote && !doubleQuote && char === "/" && next === "*") { + blockComment = true; + index += 1; + continue; + } + if (!doubleQuote && char === "'" && sql[index - 1] !== "\\") { + singleQuote = !singleQuote; + } else if (!singleQuote && char === '"') { + doubleQuote = !doubleQuote; + } + if (!singleQuote && !doubleQuote && char === ";") { + const text = sql.slice(start, index + 1).trim(); + if (text) { + statements.push({ text, startLine: firstContentLine(sql.slice(start, index + 1), 0, statementStartLine), endLine: line }); + } + start = index + 1; + statementStartLine = line; + } + } + + const text = sql.slice(start).trim(); + if (text) { + statements.push({ text, startLine: firstContentLine(sql.slice(start), 0, statementStartLine), endLine: line }); + } + return statements; +} + +function splitColumnDefinitions(body: string, startLine: number): ColumnDefinition[] { + const columns: ColumnDefinition[] = []; + let start = 0; + let line = startLine; + let parenDepth = 0; + let singleQuote = false; + let doubleQuote = false; + let lineComment = false; + let blockComment = false; + + for (let index = 0; index < body.length; index += 1) { + const char = body[index]; + const next = body[index + 1]; + if (char === "\n") { + line += 1; + lineComment = false; + } + if (lineComment) { + continue; + } + if (blockComment) { + if (char === "*" && next === "/") { + blockComment = false; + index += 1; + } + continue; + } + if (!singleQuote && !doubleQuote && char === "-" && next === "-") { + lineComment = true; + index += 1; + continue; + } + if (!singleQuote && !doubleQuote && char === "/" && next === "*") { + blockComment = true; + index += 1; + continue; + } + if (!doubleQuote && char === "'" && body[index - 1] !== "\\") { + singleQuote = !singleQuote; + } else if (!singleQuote && char === '"') { + doubleQuote = !doubleQuote; + } + if (singleQuote || doubleQuote) { + continue; + } + if (char === "(") { + parenDepth += 1; + } else if (char === ")") { + parenDepth = Math.max(0, parenDepth - 1); + } else if (char === "," && parenDepth === 0) { + const text = body.slice(start, index).trim(); + if (text) { + columns.push({ text, startLine: firstContentLine(body, start, startLine), endLine: line }); + } + start = index + 1; + } + } + const text = body.slice(start).trim(); + if (text) { + columns.push({ text, startLine: firstContentLine(body, start, startLine), endLine: line }); + } + return columns; +} + +function findMatchingParen(text: string, openParen: number): number { + let depth = 0; + let singleQuote = false; + let doubleQuote = false; + let lineComment = false; + let blockComment = false; + for (let index = openParen; index < text.length; index += 1) { + const char = text[index]; + const next = text[index + 1]; + if (char === "\n") { + lineComment = false; + } + if (lineComment) { + continue; + } + if (blockComment) { + if (char === "*" && next === "/") { + blockComment = false; + index += 1; + } + continue; + } + if (!singleQuote && !doubleQuote && char === "-" && next === "-") { + lineComment = true; + index += 1; + continue; + } + if (!singleQuote && !doubleQuote && char === "/" && next === "*") { + blockComment = true; + index += 1; + continue; + } + if (!doubleQuote && char === "'" && text[index - 1] !== "\\") { + singleQuote = !singleQuote; + } else if (!singleQuote && char === '"') { + doubleQuote = !doubleQuote; + } + if (singleQuote || doubleQuote) { + continue; + } + if (char === "(") { + depth += 1; + } else if (char === ")") { + depth -= 1; + if (depth === 0) { + return index; + } + } + } + return -1; +} + +function readIdentifier(text: string): [string | null, string] { + const trimmed = text.trimStart(); + const quoted = /^("[^"]+"|`[^`]+`|\[[^\]]+\])\s*(.*)$/s.exec(trimmed); + if (quoted) { + return [quoted[1] ?? null, quoted[2] ?? ""]; + } + const bare = /^([^\s]+)\s*(.*)$/s.exec(trimmed); + return [bare?.[1] ?? null, bare?.[2] ?? ""]; +} + +function normalizeIdentifier(identifier: string): string { + const trimmed = identifier.trim().replace(/^["`\[]|["`\]]$/g, ""); + const parts = trimmed.split("."); + return parts[parts.length - 1] ?? trimmed; +} + +function firstContentLine(text: string, startOffset: number, baseLine: number): number { + const rest = text.slice(startOffset); + const leadingWhitespace = /^\s*/.exec(rest)?.[0] ?? ""; + return baseLine + countNewlines(text.slice(0, startOffset)) + countNewlines(leadingWhitespace); +} + +function countNewlines(text: string): number { + return (text.match(/\n/g) ?? []).length; +} + +function countChar(text: string, char: string): number { + return [...text].filter((candidate) => candidate === char).length; +} diff --git a/src/jobs.ts b/src/jobs.ts index 94a86a1..1f32f4b 100644 --- a/src/jobs.ts +++ b/src/jobs.ts @@ -41,6 +41,10 @@ export function renderFieldPrompt( "You are reviewing exactly one data-model field. Be skeptical. Prefer the smallest Lindy schema: boring names, durable concepts, no metaphors, no generic bags, and no fields without a current use case. Aim for a data model that can remain the same for the next ten or a hundred years.", "", "Return only valid JSON matching `schemas/field-review.schema.json`.", + "The JSON object must include: `schemaVersion: 1`, `model`, `fieldPath`, `decision`, `finalName`, `finalType`, `required`, `rationale`, `alternatives`, `simplestChoice`, `confidence`, and `questions`.", + "Allowed `decision` values: `keep`, `rename`, `merge`, `derive`, `move`, `defer`, `remove`, `opaque`.", + "Allowed `confidence` values: `low`, `medium`, `high`.", + "Do not include Markdown, prose, or code fences outside the JSON object.", "", ...projectContextSection(options.projectContext), ...runHistorySection(options.runHistory), diff --git a/test/schemator.test.ts b/test/schemator.test.ts index f9e1700..42b3656 100644 --- a/test/schemator.test.ts +++ b/test/schemator.test.ts @@ -7,7 +7,7 @@ import { promisify } from "node:util"; import { describe, expect, test } from "vitest"; import { aggregateReviews } from "../src/aggregate.js"; import { renderPatchPlan } from "../src/apply.js"; -import { writeCodexReviews } from "../src/codex-review.js"; +import { writeCodexReviews, writeReviewerReviews } from "../src/codex-review.js"; import { extractGraph } from "../src/extract/index.js"; import { pathToFileNamePart } from "../src/files.js"; import { applyAggregateToGraph, graphDecisionKey, hasGraphChange, hasSimplification, reduceAggregateGraph } from "../src/graph.js"; @@ -65,6 +65,44 @@ describe("schemator", () => { } }); + test("extracts SQL create table columns", async () => { + const dir = await mkdtemp(join(tmpdir(), "schemator-")); + try { + const source = join(dir, "schema.sql"); + await writeFile( + source, + [ + "CREATE TABLE public.customer_accounts (", + " id uuid PRIMARY KEY,", + " display_name text NOT NULL,", + " billing_email text,", + " marketing_email_opt_in boolean DEFAULT false,", + " created_at timestamptz NOT NULL,", + " CONSTRAINT customer_accounts_email_check CHECK (billing_email <> '')", + ");", + ].join("\n"), + ); + + const graph = await extractGraph(source); + + expect(graph.models.map((model) => model.id)).toEqual(["customer_accounts"]); + expect(graph.models[0]?.kind).toBe("table"); + expect(graph.models[0]?.fields.map((field) => field.path)).toEqual([ + "id", + "display_name", + "billing_email", + "marketing_email_opt_in", + "created_at", + ]); + expect(graph.models[0]?.fields.find((field) => field.path === "id")?.required).toBe(true); + expect(graph.models[0]?.fields.find((field) => field.path === "id")?.source.span.startLine).toBe(2); + expect(graph.models[0]?.fields.find((field) => field.path === "billing_email")?.required).toBe(false); + expect(graph.models[0]?.fields.find((field) => field.path === "billing_email")?.source.span.startLine).toBe(4); + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); + test("merges duplicate TypeScript model declarations inside Markdown", async () => { const dir = await mkdtemp(join(tmpdir(), "schemator-")); try { @@ -146,6 +184,61 @@ describe("schemator", () => { } }); + test("extracts SQL create table columns around comments", async () => { + const dir = await mkdtemp(join(tmpdir(), "schemator-")); + try { + const source = join(dir, "schema.sql"); + await writeFile( + source, + [ + "-- leading comment with ;", + "CREATE TABLE account_events (", + " id uuid PRIMARY KEY, -- comma, paren ) in comment", + " account_id uuid NOT NULL,", + " /* comma, and paren ) in block comment */", + " event_type text NOT NULL", + ");", + ].join("\n"), + ); + + const graph = await extractGraph(source); + + expect(graph.models.map((model) => model.id)).toEqual(["account_events"]); + expect(graph.models[0]?.source.span.startLine).toBe(2); + expect(graph.models[0]?.fields.map((field) => field.path)).toEqual(["id", "account_id", "event_type"]); + expect(graph.models[0]?.fields.find((field) => field.path === "id")?.source.span.startLine).toBe(3); + expect(graph.models[0]?.fields.find((field) => field.path === "event_type")?.source.span.startLine).toBe(6); + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); + + test("extracts SQL fences from Markdown proposals", async () => { + const dir = await mkdtemp(join(tmpdir(), "schemator-")); + try { + const source = join(dir, "proposal.md"); + await writeFile( + source, + [ + "```sql", + "CREATE TABLE account_events (", + " id uuid PRIMARY KEY,", + " account_id uuid NOT NULL,", + " event_type text NOT NULL", + ");", + "```", + ].join("\n"), + ); + + const graph = await extractGraph(source); + + expect(graph.models.map((model) => model.id)).toEqual(["account_events"]); + expect(graph.models[0]?.fields.map((field) => field.path)).toEqual(["id", "account_id", "event_type"]); + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); + test("local fallback does not make field-specific semantic renames", async () => { const dir = await mkdtemp(join(tmpdir(), "schemator-")); try { @@ -234,6 +327,133 @@ describe("schemator", () => { } }); + test("runs command review strategy through a stdin reviewer", async () => { + const dir = await mkdtemp(join(tmpdir(), "schemator-")); + try { + const fakeReviewer = join(dir, "fake-reviewer.js"); + const promptPath = join(dir, "prompt.txt"); + const reviewsDir = join(dir, "reviews"); + await writeFile( + fakeReviewer, + [ + "#!/usr/bin/env node", + "const fs = require('fs');", + "let prompt = '';", + "process.stdin.setEncoding('utf8');", + "process.stdin.on('data', (chunk) => { prompt += chunk; });", + "process.stdin.on('end', () => {", + " fs.writeFileSync(process.env.SCHEMATOR_PROMPT_PATH, prompt);", + " console.log(JSON.stringify({", + " schemaVersion: 1,", + " model: 'Policy',", + " fieldPath: 'id',", + " decision: 'keep',", + " finalName: 'id',", + " finalType: 'string',", + " required: true,", + " rationale: 'test',", + " alternatives: ['id'],", + " simplestChoice: 'id',", + " confidence: 'high',", + " questions: []", + " }));", + "});", + ].join("\n"), + ); + await chmod(fakeReviewer, 0o755); + const originalPromptPath = process.env["SCHEMATOR_PROMPT_PATH"]; + process.env["SCHEMATOR_PROMPT_PATH"] = promptPath; + try { + const reviews = await writeReviewerReviews(graphWithOneField("Policy", "id", "id"), reviewsDir, { + strategy: "command", + command: fakeReviewer, + timeoutMs: 5_000, + }); + const prompt = await readFile(promptPath, "utf8"); + + expect(reviews[0]?.decision).toBe("keep"); + expect(prompt).toContain("# Schemator Field Review"); + expect(prompt).toContain("Allowed `decision` values"); + } finally { + if (originalPromptPath === undefined) { + delete process.env["SCHEMATOR_PROMPT_PATH"]; + } else { + process.env["SCHEMATOR_PROMPT_PATH"] = originalPromptPath; + } + } + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); + + test("runs pi review strategy with model and extra args", async () => { + const dir = await mkdtemp(join(tmpdir(), "schemator-")); + try { + const fakePi = join(dir, "fake-pi.js"); + const argsPath = join(dir, "args.json"); + const reviewsDir = join(dir, "reviews"); + await writeFile( + fakePi, + [ + "#!/usr/bin/env node", + "const fs = require('fs');", + "fs.writeFileSync(process.env.SCHEMATOR_ARGS_PATH, JSON.stringify(process.argv.slice(2)));", + "console.log(JSON.stringify({", + " schemaVersion: 1,", + " model: 'Policy',", + " fieldPath: 'id',", + " decision: 'keep',", + " finalName: 'id',", + " finalType: 'string',", + " required: true,", + " rationale: 'test',", + " alternatives: ['id'],", + " simplestChoice: 'id',", + " confidence: 'high',", + " questions: []", + "}));", + ].join("\n"), + ); + await chmod(fakePi, 0o755); + const originalArgsPath = process.env["SCHEMATOR_ARGS_PATH"]; + process.env["SCHEMATOR_ARGS_PATH"] = argsPath; + try { + await writeReviewerReviews(graphWithOneField("Policy", "id", "id"), reviewsDir, { + strategy: "pi", + command: fakePi, + model: "claude-bridge/claude-sonnet-4-6", + args: ["--thinking", "off"], + timeoutMs: 5_000, + }); + const args = JSON.parse(await readFile(argsPath, "utf8")) as string[]; + + expect(args.slice(0, 9)).toEqual([ + "--print", + "--no-session", + "--source", + "child-agent", + "--no-tools", + "--no-context-files", + "--no-skills", + "--mode", + "text", + ]); + expect(args).toContain("--model"); + expect(args).toContain("claude-bridge/claude-sonnet-4-6"); + expect(args).toContain("--thinking"); + expect(args.at(-1)).toContain("# Schemator Field Review"); + } finally { + if (originalArgsPath === undefined) { + delete process.env["SCHEMATOR_ARGS_PATH"]; + } else { + process.env["SCHEMATOR_ARGS_PATH"] = originalArgsPath; + } + } + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); + test("binds Codex review identity to the reviewed field", async () => { const dir = await mkdtemp(join(tmpdir(), "schemator-")); try {