diff --git a/src/claude-engine.ts b/src/claude-engine.ts new file mode 100644 index 0000000000..f28bcecef2 --- /dev/null +++ b/src/claude-engine.ts @@ -0,0 +1,173 @@ +// Pure, testable helpers for the Claude CLI (`-p`) review engine: building the +// subprocess argv (read-only, no permission bypass) and recognising the +// terminal result record in the CLI's output. Kept separate from clawsweeper.ts +// so the security-relevant command construction and the output parsing are +// unit-tested in isolation. + +// Read-only inspection tools the reviewer is permitted to use. This is an +// explicit allow-list — NOT a permission bypass — so the spawned binary can +// never invoke Edit/Write/Bash/etc. against the target checkout. A local review +// has the full diff and context in the prompt, so read-only file inspection is +// all the engine needs. +export const CLAUDE_REVIEW_READONLY_TOOLS = ["Read", "Grep", "Glob"] as const; + +// Credentials the configurable Claude binary must never receive: GitHub tokens, +// the ClawSweeper GitHub App identity/key, and OpenAI/Codex keys. The Claude CLI +// authenticates from its own local credentials, so none of these are needed. +export const SCRUBBED_CREDENTIAL_ENV_KEYS = [ + "GH_TOKEN", + "GITHUB_TOKEN", + "COMMIT_SWEEPER_TARGET_GH_TOKEN", + "CLAWSWEEPER_PROOF_INSPECTION_TOKEN", + "CLAWSWEEPER_APP_ID", + "CLAWSWEEPER_APP_PRIVATE_KEY", + "OPENAI_API_KEY", + "CODEX_API_KEY", +] as const; + +// Build the environment for the Claude review subprocess, scrubbing the +// credentials above. The binary is operator-configurable via --claude-bin, so +// this boundary matters even though Codex's path scrubs the same set. +export function claudeReviewEnv(baseEnv: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { ...baseEnv }; + for (const key of SCRUBBED_CREDENTIAL_ENV_KEYS) { + delete env[key]; + } + env.GIT_OPTIONAL_LOCKS = "0"; + return env; +} + +export type ClaudeReviewArgsOptions = { + proofScratchDir: string; + model?: string; +}; + +// Build the argv for a headless, read-only `claude -p` review. Deliberately +// uses an `--allowedTools` allow-list and NO `--permission-mode bypassPermissions`, +// so the engine cannot mutate the checkout via tools. +export function buildClaudeReviewArgs(options: ClaudeReviewArgsOptions): string[] { + const args = [ + "-p", + "--output-format", + "json", + "--allowedTools", + ...CLAUDE_REVIEW_READONLY_TOOLS, + "--add-dir", + options.proofScratchDir, + ]; + if (options.model) { + args.push("--model", options.model); + } + return args; +} + +export type ClaudeResultRecord = { + isError: boolean; + structuredOutput: unknown; + resultText: string; + errorText: string; +}; + +// Recognise the terminal result record in Claude CLI `-p` output. claude-code +// emits the conversation as JSONL records (an `init`, any number of +// `stream_event`s, then exactly one `{ "type": "result", ... }`). With +// `--output-format json` those records arrive as a single JSON array; with +// `stream-json` they arrive one-per-line. This mirrors how OpenClaw's own +// claude-stream-json parser locates the terminal record, in either shape. +export function parseClaudeResultRecord(raw: string): ClaudeResultRecord | null { + const trimmed = raw.trim(); + if (!trimmed) return null; + const records: Record[] = []; + const pushRecord = (value: unknown): void => { + if (value && typeof value === "object" && !Array.isArray(value)) { + records.push(value as Record); + } + }; + try { + const parsed: unknown = JSON.parse(trimmed); + if (Array.isArray(parsed)) parsed.forEach(pushRecord); + else pushRecord(parsed); + } catch { + for (const line of trimmed.split(/\r?\n/)) { + const text = line.trim(); + if (!text) continue; + try { + pushRecord(JSON.parse(text)); + } catch { + // Skip non-JSON lines (banners, partial chunks). + } + } + } + for (let i = records.length - 1; i >= 0; i -= 1) { + const record = records[i]; + if (record && record.type === "result") { + return { + isError: record.is_error === true, + structuredOutput: record.structured_output, + resultText: typeof record.result === "string" ? record.result : "", + errorText: typeof record.error === "string" ? record.error : "", + }; + } + } + return null; +} + +// Pull a bare JSON object out of the reviewer's final text (handles an optional +// ```json fence or surrounding prose). +export function extractJsonObject(text: string): string | null { + const trimmed = text.trim(); + if (!trimmed) return null; + const fence = trimmed.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/i); + const candidate = (fence ? (fence[1]?.trim() ?? "") : trimmed).trim(); + if (candidate.startsWith("{")) return candidate; + const first = candidate.indexOf("{"); + const last = candidate.lastIndexOf("}"); + if (first >= 0 && last > first) return candidate.slice(first, last + 1); + return null; +} + +export type PruneToSchemaResult = { value: unknown; droppedPaths: string[] }; + +// Drop object keys a JSON Schema does not define, at every nesting level, +// returning the pruned value and the dropped key paths. The Codex engine +// constrains generation to the decision schema (via `--output-schema`), so its +// output never carries stray keys; the Claude engine only gets the schema as +// prompt guidance, so the model occasionally adds a plausible-but-unspecified +// key (e.g. `kind` on a review finding) that strict decision validation would +// reject outright. Pruning to the schema before validation makes the Claude +// path tolerant of that harmless improvisation while still surfacing genuine +// problems (missing required fields, bad enum values) through the validator. +// The decision schema is flat (no `$ref`/`oneOf`/`anyOf`/`allOf`), so a direct +// `properties`/`items` walk is sufficient; nodes the schema does not describe +// as object/array are passed through untouched. +export function pruneToSchema(value: unknown, schema: unknown): PruneToSchemaResult { + const droppedPaths: string[] = []; + const walk = (node: unknown, schemaNode: unknown, path: string): unknown => { + if (!schemaNode || typeof schemaNode !== "object") return node; + const s = schemaNode as Record; + // Recognise object/array nodes by the presence of `properties`/`items` + // rather than an exact `type === "object"` string, so nodes typed as a + // union (e.g. `["object", "null"]`) or with `type` omitted are still + // pruned. The node's own runtime shape is re-checked before descending. + if (s.properties && typeof s.properties === "object") { + if (!node || typeof node !== "object" || Array.isArray(node)) return node; + const properties = s.properties as Record; + const out: Record = {}; + for (const [key, child] of Object.entries(node as Record)) { + const childPath = path ? `${path}.${key}` : key; + if (Object.prototype.hasOwnProperty.call(properties, key)) { + out[key] = walk(child, properties[key], childPath); + } else { + droppedPaths.push(childPath); + } + } + return out; + } + if (s.items) { + if (!Array.isArray(node)) return node; + return node.map((element, index) => walk(element, s.items, `${path}[${index}]`)); + } + return node; + }; + return { value: walk(value, schema, ""), droppedPaths }; +} diff --git a/src/clawsweeper.ts b/src/clawsweeper.ts index 64a0ef2cb6..190a0a5965 100644 --- a/src/clawsweeper.ts +++ b/src/clawsweeper.ts @@ -24,7 +24,14 @@ import { repositoryProfileForSlug, type RepositoryProfile, } from "./repository-profiles.js"; -import { codexEnv } from "./codex-env.js"; +import { codexEnv, codexLoginMethodConfig } from "./codex-env.js"; +import { + buildClaudeReviewArgs, + claudeReviewEnv, + extractJsonObject, + parseClaudeResultRecord, + pruneToSchema, +} from "./claude-engine.js"; import { ghRetryKind, ghRetryWaitMs, @@ -66,7 +73,7 @@ import { } from "./clawsweeper-args.js"; import { escapeRegExp, safeOutputTail, trimMiddle, truncateText } from "./clawsweeper-text.js"; -export { codexEnv } from "./codex-env.js"; +export { codexEnv, resolveCodexLoginMethod, codexLoginMethodConfig } from "./codex-env.js"; export { parseGhJson, parseGhJsonLines } from "./github-json.js"; export { itemNumbersArg } from "./clawsweeper-args.js"; export { safeOutputTail } from "./clawsweeper-text.js"; @@ -5651,6 +5658,16 @@ export function reviewDecisionSchemaText(): string { return reviewDecisionSchemaCache; } +let reviewDecisionSchemaObjectCache: unknown; + +// Parsed form of the decision schema, memoized for the process lifetime (the +// schema is immutable). Used by the Claude engine to prune model output to the +// schema before strict validation. +function reviewDecisionSchemaObject(): unknown { + reviewDecisionSchemaObjectCache ??= JSON.parse(reviewDecisionSchemaText()); + return reviewDecisionSchemaObjectCache; +} + function contextJsonForPrompt(context: ItemContext): string { return JSON.stringify(context, null, 2); } @@ -6174,12 +6191,14 @@ function runCodex(options: { } const codexConfig = [ `model_reasoning_effort="${options.reasoningEffort}"`, - 'forced_login_method="api"', + codexLoginMethodConfig(), 'approval_policy="never"', ]; if (options.serviceTier) codexConfig.splice(1, 0, `service_tier="${options.serviceTier}"`); + const codexBin = process.env.CODEX_BIN ?? "codex"; + const useShell = process.platform === "win32"; const result = spawnSync( - "codex", + codexBin, [ "exec", "-m", @@ -6207,6 +6226,7 @@ function runCodex(options: { input: prompt, maxBuffer: 128 * 1024 * 1024, timeout: options.timeoutMs, + ...(useShell ? { shell: true } : {}), }, ); const dirtyAfter = openclawDirtyStatus(options.openclawDir); @@ -6281,6 +6301,223 @@ function runCodex(options: { } } +// Review a single item with the Claude CLI in headless `-p` mode, as an +// alternative to the Codex engine. The binary is configurable (CLAUDE_BIN / +// --claude-bin), so any `claude -p`-compatible CLI can be dropped in. Reuses +// the same review prompt and the same Decision schema validation as Codex; the +// schema travels in the prompt (the CLI's inline-only `--json-schema` can't fit +// the full schema within the platform argv limit). Read-only is enforced by a +// read-only tool allow-list (no permission bypass), a scrubbed environment that +// withholds GitHub/OpenAI/app credentials from the binary, the prompt contract, +// and a dirty-checkout guard. See src/claude-engine.ts for the (unit-tested) +// argv/env construction and result parsing. +function runClaude(options: { + item: Item; + context: ItemContext; + git: GitInfo; + model: string; + targetDir: string; + claudeBin: string; + timeoutMs: number; + workDir: string; + additionalPrompt?: string; +}): Decision { + ensureDir(options.workDir); + const proofScratchDir = join(options.workDir, "proof-scratch", String(options.item.number)); + ensureDir(proofScratchDir); + const preparedMediaProof = prepareMediaProofArtifacts(options.context, proofScratchDir); + const promptPath = join(options.workDir, `${options.item.number}.claude.prompt.md`); + const basePrompt = buildReviewPrompt( + options.item, + options.context, + options.git, + options.additionalPrompt, + mediaProofRuntimeHints(proofScratchDir, preparedMediaProof), + ).text; + const prompt = `${basePrompt} + +## Output Contract + +Return ONLY a single JSON object that conforms exactly to the schema below. No prose, no markdown fences, and no commentary before or after the object. Every required field must be present with a valid enum value. + +\`\`\`json +${reviewDecisionSchemaText()} +\`\`\` +`; + writeFileSync(promptPath, prompt, "utf8"); + const dirtyBefore = openclawDirtyStatus(options.targetDir); + if (dirtyBefore) { + throw new Error( + `Target checkout is dirty before reviewing #${options.item.number}:\n${dirtyBefore}`, + ); + } + const useShell = process.platform === "win32"; + const claudeArgs = buildClaudeReviewArgs({ + proofScratchDir, + ...(options.model ? { model: options.model } : {}), + }); + const env = claudeReviewEnv(); + env.CLAWSWEEPER_PROOF_SCRATCH_DIR = proofScratchDir; + const decisionSchema = reviewDecisionSchemaObject(); + + // Run one Claude invocation with the given stdin prompt and reduce its output + // to a pruned decision-JSON candidate. Hard failures (spawn error, no result + // record, CLI-reported error, no JSON) throw; a parseable-but-imperfect object + // is returned for the caller's validation/retry loop. + const invokeClaude = (inputPrompt: string): { value: unknown; rawJson: string } => { + const result = spawnSync(options.claudeBin, claudeArgs, { + cwd: options.targetDir, + encoding: "utf8", + env, + input: inputPrompt, + maxBuffer: 128 * 1024 * 1024, + timeout: options.timeoutMs, + ...(useShell ? { shell: true } : {}), + }); + const dirtyAfter = openclawDirtyStatus(options.targetDir); + if (dirtyAfter) { + throw new Error( + `Claude dirtied the target checkout while reviewing #${options.item.number}:\n${dirtyAfter}`, + ); + } + if (result.error) { + throw new Error( + `Claude review failed for #${options.item.number}: ${result.error.message}\n${ + safeOutputTail(result.stderr) || safeOutputTail(result.stdout) || "No output." + }`, + ); + } + const parsed = parseClaudeResultRecord(result.stdout ?? ""); + if (!parsed) { + throw new Error( + `Claude review produced no parseable result record for #${options.item.number} (exit ${ + result.status ?? "unknown" + }).\n${safeOutputTail(result.stderr) || safeOutputTail(result.stdout) || "No output."}`, + ); + } + if (parsed.isError) { + throw new Error( + `Claude review reported an error for #${options.item.number}: ${ + parsed.errorText || parsed.resultText || "unknown error" + }`, + ); + } + let decisionJson: unknown; + if (parsed.structuredOutput && typeof parsed.structuredOutput === "object") { + decisionJson = parsed.structuredOutput; + } else { + const jsonText = extractJsonObject(parsed.resultText); + if (!jsonText) { + throw new Error( + `Claude review did not return a JSON decision for #${options.item.number}.\n${safeOutputTail( + parsed.resultText, + )}`, + ); + } + try { + decisionJson = JSON.parse(jsonText); + } catch (error) { + throw new Error( + `Claude review returned invalid JSON for #${options.item.number}: ${ + error instanceof Error ? error.message : String(error) + }\n${safeOutputTail(jsonText)}`, + ); + } + } + const pruned = pruneToSchema(decisionJson, decisionSchema); + if (pruned.droppedPaths.length) { + console.error( + `[local-review] dropped ${pruned.droppedPaths.length} non-schema key(s) from Claude output for #${options.item.number}: ${pruned.droppedPaths.join(", ")}`, + ); + } + const normalized = normalizeClaudeDecisionShape(pruned.value); + return { value: normalized, rawJson: JSON.stringify(normalized) }; + }; + + // Codex constrains generation to the schema, so its first output always + // validates. Claude only gets the schema as prompt guidance and its + // `--json-schema` is validate-or-drop (not constrained decoding), so a large + // schema like this one occasionally comes back with a missing required field + // or wrong-typed value. Rather than blind-default critical fields (e.g. the + // close/keep decision), feed the exact validation error back and let Claude + // self-correct, up to a small bound. Pruning above already absorbs the common + // extra-key case without a round-trip. + let candidate = invokeClaude(prompt); + let lastError: unknown; + for (let attempt = 0; attempt <= CLAUDE_REVIEW_MAX_CORRECTIONS; attempt += 1) { + try { + return parseDecision(candidate.value, options.item); + } catch (error) { + lastError = error; + const message = error instanceof Error ? error.message : String(error); + if (attempt === CLAUDE_REVIEW_MAX_CORRECTIONS) break; + console.error( + `[local-review] Claude decision for #${options.item.number} failed validation (attempt ${ + attempt + 1 + }/${CLAUDE_REVIEW_MAX_CORRECTIONS + 1}), asking Claude to correct it: ${message}`, + ); + candidate = invokeClaude(claudeCorrectionPrompt(candidate.rawJson, message)); + } + } + throw new Error( + `Claude review output failed schema validation for #${options.item.number} after ${ + CLAUDE_REVIEW_MAX_CORRECTIONS + 1 + } attempt(s): ${lastError instanceof Error ? lastError.message : String(lastError)}`, + ); +} + +// How many times the Claude review may be asked to correct a decision object +// that parses as JSON but fails strict schema validation. One initial attempt +// plus this many corrections. +const CLAUDE_REVIEW_MAX_CORRECTIONS = 2; + +// Prompt that hands Claude its previous (pruned) decision JSON and the exact +// validation error, asking for a corrected, complete object. Deliberately +// self-contained: it does not require re-reading the diff, only fixing the +// reported problem in the JSON it already produced. +function claudeCorrectionPrompt(previousJson: string, validationError: string): string { + return `Your previous review decision JSON failed strict schema validation with this error: + +${validationError} + +Here is the JSON you returned: + +\`\`\`json +${previousJson} +\`\`\` + +Fix ONLY the reported problem and return the corrected, COMPLETE decision object. Every required field must be present with a valid value of the correct type and a valid enum value where applicable. Return ONLY the single JSON object — no prose, no markdown fences, no commentary. + +\`\`\`json +${reviewDecisionSchemaText()} +\`\`\` +`; +} + +// Claude-only normalization applied to a pruned decision object before strict +// `parseDecision`. The shared Decision parser stays strict for every engine; +// this only smooths over the one omission Claude makes systematically. +// +// A `mantisRecommendation.maintainerComment` is only meaningful when mantis IS +// recommended. When it is not (the common case for a code review), models +// reliably omit the field, so default it to "" — but ONLY in the +// not-recommended case. A `status: "recommended"` result missing the comment is +// a genuinely malformed, non-dispatchable recommendation: it is left untouched +// so strict validation rejects it and the bounded correction retry has Claude +// supply the command. This preserves the "recommended ⇒ needs a command" +// invariant that downstream dispatch relies on. +export function normalizeClaudeDecisionShape(value: unknown): unknown { + if (!value || typeof value !== "object" || Array.isArray(value)) return value; + const decision = value as Record; + const mantis = decision.mantisRecommendation; + if (!mantis || typeof mantis !== "object" || Array.isArray(mantis)) return value; + const m = mantis as Record; + if (m.status !== "recommended" && typeof m.maintainerComment !== "string") { + return { ...decision, mantisRecommendation: { ...m, maintainerComment: "" } }; + } + return value; +} + function stripTextFence(markdown: string): string { const trimmed = markdown.trim(); const match = trimmed.match(/^```(?:markdown|md|text)?\s*\n([\s\S]*?)\n```\s*$/i); @@ -6434,11 +6671,13 @@ function runCodexAssist(options: { writeFileSync(promptPath, prompt, "utf8"); const codexConfig = [ `model_reasoning_effort="${options.reasoningEffort}"`, - 'forced_login_method="api"', + codexLoginMethodConfig(), 'approval_policy="never"', ]; + const codexBin = process.env.CODEX_BIN ?? "codex"; + const useShell = process.platform === "win32"; const result = spawnSync( - "codex", + codexBin, [ "exec", "-m", @@ -6457,6 +6696,7 @@ function runCodexAssist(options: { input: prompt, maxBuffer: 32 * 1024 * 1024, timeout: options.timeoutMs, + ...(useShell ? { shell: true } : {}), }, ); if (result.error || result.status !== 0 || !existsSync(outputPath)) { @@ -16908,6 +17148,164 @@ function checkCommand(): void { console.log("ok"); } +function localReviewCommand(args: Args): void { + const targetDir = resolve(stringArg(args.target_dir, ".")); + const targetRepoArg = stringArg(args.repo, ""); + const baseBranch = stringArg(args.base, "main"); + const model = stringArg(args.codex_model, DEFAULT_CODEX_MODEL); + const reasoningEffort = stringArg(args.codex_reasoning_effort, DEFAULT_REASONING_EFFORT); + const sandboxMode = stringArg(args.codex_sandbox, "read-only"); + const serviceTier = stringArg(args.codex_service_tier, DEFAULT_SERVICE_TIER); + const timeoutMs = numberArg(args.codex_timeout_ms, 600_000); + const engine = stringArg(args.engine, "codex").toLowerCase(); + if (engine !== "codex" && engine !== "claude") { + throw new Error(`--engine must be "codex" or "claude", got "${engine}"`); + } + // The Claude binary is configurable so any `claude -p`-compatible CLI can be + // dropped in; flag takes precedence over CLAUDE_BIN, default "claude". + // + // TRUST BOUNDARY (operators): --claude-bin / CLAUDE_BIN names an executable + // that this command spawns with the FULL review prompt (the target diff and + // repository context) on stdin, and which authenticates from the host's local + // Claude credentials. The subprocess env is scrubbed of GitHub/OpenAI/Codex + // and ClawSweeper App credentials (see claudeReviewEnv) and the binary is + // given only read-only Read/Grep/Glob tools, but it still receives review + // content and uses local Claude auth. Only point it at a binary you trust. + const claudeBin = stringArg(args.claude_bin, process.env.CLAUDE_BIN ?? "claude"); + const claudeModel = stringArg(args.claude_model, ""); + const outputDir = resolve(stringArg(args.output_dir, join(homedir(), ".clawsweeper-local-reviews"))); + ensureDir(outputDir); + + const repoName = + targetRepoArg || + run("git", ["remote", "get-url", "origin"], { cwd: targetDir }) + .replace(/.*github\.com[:/]/, "") + .replace(/\.git\s*$/, "") + .trim(); + + const diff = run("git", ["diff", `${baseBranch}...HEAD`], { cwd: targetDir }); + if (!diff.trim()) { + console.error("[local-review] no diff against " + baseBranch); + process.exit(1); + } + + const commitTitle = run("git", ["log", "--format=%s", "-1"], { cwd: targetDir }).trim(); + const commitBody = run("git", ["log", "--format=%b", "-1"], { cwd: targetDir }).trim(); + const author = run("git", ["log", "--format=%an", "-1"], { cwd: targetDir }).trim(); + const headSha = run("git", ["rev-parse", "HEAD"], { cwd: targetDir }).trim(); + const baseSha = run("git", ["rev-parse", baseBranch], { cwd: targetDir }).trim(); + const changedFiles = run("git", ["diff", "--name-only", `${baseBranch}...HEAD`], { cwd: targetDir }) + .trim() + .split("\n") + .filter(Boolean); + + const prBodyFile = stringArg(args.pr_body_file, ""); + let prBodyOverride = ""; + if (prBodyFile) { + const resolvedBodyFile = resolve(prBodyFile); + if (!existsSync(resolvedBodyFile)) { + throw new Error(`--pr-body-file not found: ${resolvedBodyFile}`); + } + prBodyOverride = readFileSync(resolvedBodyFile, "utf8").trim(); + } + const prBody = prBodyOverride || commitBody || commitTitle; + + const now = new Date().toISOString(); + const item: Item = { + repo: repoName, + number: 0, + kind: "pull_request", + title: commitTitle, + url: `local://${repoName}@${headSha.slice(0, 8)}`, + createdAt: now, + updatedAt: now, + closedAt: null, + author, + authorAssociation: "CONTRIBUTOR", + labels: [], + locked: false, + activeLockReason: null, + }; + + const context: ItemContext = { + issue: { title: commitTitle, body: prBody, user: { login: author }, state: "open" }, + comments: [], + timeline: [], + pullRequest: { + title: commitTitle, + body: prBody, + head: { sha: headSha, ref: "HEAD" }, + base: { sha: baseSha, ref: baseBranch }, + diff, + changed_files: changedFiles.length, + additions: diff.split("\n").filter((l: string) => l.startsWith("+") && !l.startsWith("+++")).length, + deletions: diff.split("\n").filter((l: string) => l.startsWith("-") && !l.startsWith("---")).length, + mergeable: true, + user: { login: author }, + }, + pullFiles: changedFiles.map((f: string) => ({ filename: f, status: "modified", patch: "" })), + pullCommits: [{ sha: headSha, commit: { message: commitTitle + "\n\n" + commitBody } }], + pullReviewComments: [], + counts: { comments: 0, timeline: 0 }, + }; + + const git: GitInfo = { mainSha: baseSha, latestRelease: null }; + + try { + setTargetRepo(repoName); + } catch { + setTargetRepo("openclaw/openclaw"); + } + + console.error( + `[local-review] repo=${repoName} base=${baseBranch} files=${changedFiles.length} diff=${diff.length} chars`, + ); + console.error(`[local-review] title: ${commitTitle}`); + console.error( + engine === "claude" + ? `[local-review] engine=claude bin=${claudeBin}${claudeModel ? ` model=${claudeModel}` : ""}` + : `[local-review] engine=codex model=${model} reasoning=${reasoningEffort}`, + ); + + const staleOutputPath = join(outputDir, `${item.number}.json`); + if (existsSync(staleOutputPath)) { + unlinkSync(staleOutputPath); + } + + const additionalPrompt = `This is a LOCAL review of staged changes, not a GitHub PR. The diff is from git diff ${baseBranch}...HEAD. Focus on code correctness, not PR metadata.`; + const decision = + engine === "claude" + ? runClaude({ + item, + context, + git, + model: claudeModel, + targetDir, + claudeBin, + timeoutMs, + workDir: outputDir, + additionalPrompt, + }) + : runCodex({ + item, + context, + git, + model, + openclawDir: targetDir, + reasoningEffort, + sandboxMode, + serviceTier, + timeoutMs, + workDir: outputDir, + additionalPrompt, + }); + + const outputPath = join(outputDir, "local-review.json"); + writeFileSync(outputPath, JSON.stringify(decision, null, 2), "utf8"); + console.error(`[local-review] decision written to ${outputPath}`); + console.log(JSON.stringify(decision, null, 2)); +} + export async function main(argv = process.argv.slice(2)): Promise { const args = parseArgs(argv); const command = args._[0] ?? "review"; @@ -16927,6 +17325,7 @@ export async function main(argv = process.argv.slice(2)): Promise { } else if (command === "status") statusCommand(args); else if (command === "assist") assistCommand(args); else if (command === "check") checkCommand(); + else if (command === "local-review") localReviewCommand(args); else { console.error(`Unknown command: ${command}`); process.exit(1); diff --git a/src/codex-env.ts b/src/codex-env.ts index adb62cf131..5026e41a70 100644 --- a/src/codex-env.ts +++ b/src/codex-env.ts @@ -1,3 +1,17 @@ +const VALID_LOGIN_METHODS = new Set(["api", "chatgpt"]); + +export function resolveCodexLoginMethod(): string { + const value = process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD?.trim().toLowerCase(); + if (value && VALID_LOGIN_METHODS.has(value)) { + return value; + } + return "api"; +} + +export function codexLoginMethodConfig(): string { + return `forced_login_method="${resolveCodexLoginMethod()}"`; +} + export type CodexEnvOptions = { ghToken?: string | undefined; }; diff --git a/src/commit-sweeper.ts b/src/commit-sweeper.ts index 28a91f29be..3eee537ecf 100644 --- a/src/commit-sweeper.ts +++ b/src/commit-sweeper.ts @@ -11,7 +11,7 @@ import { import { publishCheckFromReport, splitFrontMatter } from "./commit-checks.js"; import { argBool, argNumber, argString, parseArgs, type Args } from "./clawsweeper-args.js"; import { safeOutputTail } from "./clawsweeper-text.js"; -import { codexEnv } from "./codex-env.js"; +import { codexEnv, codexLoginMethodConfig } from "./codex-env.js"; import { runText } from "./command.js"; import { ghRetryKind, ghRetryWaitMs } from "./github-retry.js"; import { DEFAULT_TARGET_REPO, repositoryProfileFor } from "./repository-profiles.js"; @@ -296,12 +296,14 @@ function runCodex(options: { ); const codexConfig = [ `model_reasoning_effort="${options.reasoningEffort}"`, - 'forced_login_method="api"', + codexLoginMethodConfig(), 'approval_policy="never"', ]; if (options.serviceTier) codexConfig.splice(1, 0, `service_tier="${options.serviceTier}"`); + const codexBin = process.env.CODEX_BIN ?? "codex"; + const useShell = process.platform === "win32"; const result = spawnSync( - "codex", + codexBin, [ "exec", "-m", @@ -322,6 +324,7 @@ function runCodex(options: { input: readFileSync(promptPath, "utf8"), maxBuffer: 128 * 1024 * 1024, timeout: options.timeoutMs, + ...(useShell ? { shell: true } : {}), }, ); if (result.error || result.status !== 0 || !existsSync(outputPath)) { diff --git a/src/pr-close-coverage-proof.ts b/src/pr-close-coverage-proof.ts index 0b6f08685a..4944588730 100644 --- a/src/pr-close-coverage-proof.ts +++ b/src/pr-close-coverage-proof.ts @@ -1,7 +1,7 @@ import { spawnSync } from "node:child_process"; import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; import { join } from "node:path"; -import { codexEnv } from "./codex-env.js"; +import { codexEnv, codexLoginMethodConfig } from "./codex-env.js"; import { safeOutputTail, truncateText } from "./clawsweeper-text.js"; export type PrCloseCoverageProofModelDecision = "covered" | "keep_open"; @@ -252,14 +252,16 @@ export function runPrCloseCoverageProofModel(options: { if (existsSync(outputPath)) unlinkSync(outputPath); const codexConfig = [ `model_reasoning_effort="${options.runtime.reasoningEffort}"`, - 'forced_login_method="api"', + codexLoginMethodConfig(), 'approval_policy="never"', ]; if (options.runtime.serviceTier) { codexConfig.splice(1, 0, `service_tier="${options.runtime.serviceTier}"`); } + const codexBin = process.env.CODEX_BIN ?? "codex"; + const useShell = process.platform === "win32"; const result = spawnSync( - "codex", + codexBin, [ "exec", "-m", @@ -282,6 +284,7 @@ export function runPrCloseCoverageProofModel(options: { input: prompt, maxBuffer: 64 * 1024 * 1024, timeout: options.runtime.timeoutMs, + ...(useShell ? { shell: true } : {}), }, ); if (result.error) { diff --git a/test/clawsweeper.test.ts b/test/clawsweeper.test.ts index bc2a79b84a..cd8fe9109c 100644 --- a/test/clawsweeper.test.ts +++ b/test/clawsweeper.test.ts @@ -34,6 +34,8 @@ import { referencingMergedPullRequestsForIssueForTest, configSurfaceChangeFromPullFilesForTest, codexEnv, + resolveCodexLoginMethod, + codexLoginMethodConfig, dashboardClosedAt, extractLatestClawSweeperReviewForTest, filterReviewContextCommentsForTest, @@ -62,6 +64,7 @@ import { parseGhJson, parseGhJsonLines, parseDecision, + normalizeClaudeDecisionShape, mergeRiskLabelsForTest, mergeRiskLabelSchemeForTest, prRatingLabelsForTest, @@ -13873,6 +13876,152 @@ process.exit(1); } }); +test("runCodex accepts synthetic local-review Item and ItemContext shapes", () => { + const root = mkdtempSync(tmpPrefix); + const openclawDir = join(root, "openclaw"); + const workDir = join(root, "codex-work"); + const binDir = join(root, "bin"); + mkdirSync(openclawDir, { recursive: true }); + mkdirSync(binDir, { recursive: true }); + execFileSync("git", ["init"], { cwd: openclawDir, stdio: "ignore" }); + const scriptBody = `const fs = require("node:fs"); +const outputIndex = process.argv.indexOf("--output-last-message"); +fs.writeFileSync(process.argv[outputIndex + 1], process.env.CODEX_DECISION_JSON); +`; + if (process.platform === "win32") { + const scriptPath = join(binDir, "codex-fake.js"); + writeFileSync(scriptPath, scriptBody); + writeFileSync(join(binDir, "codex.cmd"), `@echo off\nnode "${scriptPath}" %*\n`); + } else { + const codexPath = join(binDir, "codex"); + writeFileSync(codexPath, `#!/usr/bin/env node\n${scriptBody}`, { mode: 0o755 }); + } + const originalPath = process.env.PATH; + const originalDecision = process.env.CODEX_DECISION_JSON; + process.env.PATH = `${binDir}${delimiter}${process.env.PATH ?? ""}`; + process.env.CODEX_DECISION_JSON = JSON.stringify( + closeDecision({ + decision: "keep_open", + closeReason: "none", + confidence: "medium", + summary: "Local review test.", + bestSolution: "Verify synthetic shapes.", + closeComment: "", + workReason: "Test.", + }), + ); + try { + const decision = runCodexForTest({ + item: { + repo: "test/repo", + number: 0, + kind: "pull_request", + title: "local change", + url: "local://test/repo@abc12345", + createdAt: "2026-06-04T00:00:00Z", + updatedAt: "2026-06-04T00:00:00Z", + closedAt: null, + author: "localuser", + authorAssociation: "CONTRIBUTOR", + labels: [], + locked: false, + activeLockReason: null, + }, + context: { + issue: { title: "local change", body: "test body", user: { login: "localuser" }, state: "open" }, + comments: [], + timeline: [], + pullRequest: { + title: "local change", + body: "test body", + head: { sha: "abc1234567890", ref: "HEAD" }, + base: { sha: "def0987654321", ref: "main" }, + diff: "+added line\n-removed line", + changed_files: 1, + additions: 1, + deletions: 1, + mergeable: true, + user: { login: "localuser" }, + }, + pullFiles: [{ filename: "test.ts", status: "modified", patch: "" }], + pullCommits: [{ sha: "abc1234567890", commit: { message: "local change" } }], + pullReviewComments: [], + counts: { comments: 0, timeline: 0 }, + }, + git: { mainSha: "def0987654321", latestRelease: null }, + model: "gpt-test", + openclawDir, + reasoningEffort: "high", + sandboxMode: "read-only", + serviceTier: "", + timeoutMs: 10_000, + workDir, + prompt: "Review this local change.", + }); + + assert.equal(decision.decision, "keep_open"); + assert.equal(decision.summary, "Local review test."); + } finally { + if (originalPath === undefined) delete process.env.PATH; + else process.env.PATH = originalPath; + if (originalDecision === undefined) delete process.env.CODEX_DECISION_JSON; + else process.env.CODEX_DECISION_JSON = originalDecision; + rmSync(root, { recursive: true, force: true }); + } +}); + +test("decision parser keeps mantisRecommendation.maintainerComment strictly required (all engines)", () => { + // The shared parser must reject an omitted maintainerComment regardless of + // status — defaulting it in the common parser would let a `recommended` + // result with no command pass validation yet be non-dispatchable downstream. + for (const status of ["not_recommended", "recommended"]) { + assert.throws( + () => + parseDecision( + closeDecision({ + mantisRecommendation: { + status, + scenario: status === "recommended" ? "telegram_live" : "none", + reason: "r", + }, + }), + ), + /maintainerComment must be a string/, + `omitted maintainerComment must be rejected for status=${status}`, + ); + } +}); + +test("normalizeClaudeDecisionShape defaults a missing maintainerComment ONLY when not recommended", () => { + // not-recommended + omitted → defaulted to "" (the common Claude case). + const notRec = normalizeClaudeDecisionShape({ + decision: "keep_open", + mantisRecommendation: { status: "not_recommended", scenario: "none", reason: "r" }, + }) as Record; + assert.equal(notRec.mantisRecommendation.maintainerComment, ""); + // recommended + omitted → left untouched, so strict parseDecision still + // rejects it (then the bounded retry has Claude supply the command). + const rec = normalizeClaudeDecisionShape({ + decision: "keep_open", + mantisRecommendation: { status: "recommended", scenario: "telegram_live", reason: "r" }, + }) as Record; + assert.ok(!("maintainerComment" in rec.mantisRecommendation)); + // A provided comment is preserved and the object is returned untouched. + const provided = { + decision: "keep_open", + mantisRecommendation: { + status: "not_recommended", + scenario: "none", + reason: "r", + maintainerComment: "keep me", + }, + }; + assert.equal( + (normalizeClaudeDecisionShape(provided) as any).mantisRecommendation.maintainerComment, + "keep me", + ); +}); + test("decision parser enforces required schema-shaped evidence", () => { assert.equal(parseDecision(closeDecision()).decision, "close"); assert.equal(parseDecision(closeDecision({ itemCategory: "skill" })).itemCategory, "skill"); @@ -16727,6 +16876,54 @@ test("codex subprocess env can expose an explicit read-only GitHub token", () => } }); +test("resolveCodexLoginMethod defaults to api", () => { + const original = process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + try { + delete process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + assert.equal(resolveCodexLoginMethod(), "api"); + } finally { + if (original === undefined) delete process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + else process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = original; + } +}); + +test("resolveCodexLoginMethod accepts chatgpt", () => { + const original = process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + try { + process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = "chatgpt"; + assert.equal(resolveCodexLoginMethod(), "chatgpt"); + } finally { + if (original === undefined) delete process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + else process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = original; + } +}); + +test("resolveCodexLoginMethod rejects invalid values", () => { + const original = process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + try { + process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = "oauth-browser"; + assert.equal(resolveCodexLoginMethod(), "api"); + process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = ""; + assert.equal(resolveCodexLoginMethod(), "api"); + } finally { + if (original === undefined) delete process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + else process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = original; + } +}); + +test("codexLoginMethodConfig produces valid Codex config string", () => { + const original = process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + try { + delete process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + assert.equal(codexLoginMethodConfig(), 'forced_login_method="api"'); + process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = "chatgpt"; + assert.equal(codexLoginMethodConfig(), 'forced_login_method="chatgpt"'); + } finally { + if (original === undefined) delete process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD; + else process.env.CLAWSWEEPER_CODEX_LOGIN_METHOD = original; + } +}); + test("related title search terms keep issue-specific words", () => { assert.deepEqual( relatedTitleSearchTerms( diff --git a/test/local-review-claude.test.ts b/test/local-review-claude.test.ts new file mode 100644 index 0000000000..21fdc9b108 --- /dev/null +++ b/test/local-review-claude.test.ts @@ -0,0 +1,209 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { + buildClaudeReviewArgs, + claudeReviewEnv, + extractJsonObject, + parseClaudeResultRecord, + pruneToSchema, + CLAUDE_REVIEW_READONLY_TOOLS, + SCRUBBED_CREDENTIAL_ENV_KEYS, +} from "../dist/claude-engine.js"; + +const REVIEW_FINDING_SCHEMA = { + type: "object", + additionalProperties: false, + properties: { + decision: { type: "string" }, + reviewFindings: { + type: "array", + items: { + type: "object", + additionalProperties: false, + properties: { + title: { type: "string" }, + file: { type: "string" }, + lineStart: { type: "number" }, + }, + }, + }, + reviewMetrics: { + type: "array", + items: { + type: "object", + additionalProperties: false, + properties: { label: { type: "string" }, value: { type: "string" } }, + }, + }, + }, +}; + +test("buildClaudeReviewArgs uses a read-only tool allow-list and no permission bypass", () => { + const args = buildClaudeReviewArgs({ proofScratchDir: "/tmp/scratch" }); + // Headless JSON output. + assert.deepEqual(args.slice(0, 3), ["-p", "--output-format", "json"]); + // Explicit read-only allow-list, never a bypass. + assert.ok(args.includes("--allowedTools")); + for (const tool of CLAUDE_REVIEW_READONLY_TOOLS) assert.ok(args.includes(tool)); + assert.ok(!args.includes("bypassPermissions")); + assert.ok(!args.includes("--permission-mode")); + // No mutating tools are ever permitted. + for (const tool of ["Edit", "Write", "NotebookEdit", "Bash"]) { + assert.ok(!args.includes(tool), `must not allow ${tool}`); + } + assert.deepEqual( + args.slice(args.indexOf("--add-dir")), + ["--add-dir", "/tmp/scratch"], + "scratch dir is the last arg group when no model is set", + ); +}); + +test("buildClaudeReviewArgs appends --model only when provided", () => { + assert.ok(!buildClaudeReviewArgs({ proofScratchDir: "/s" }).includes("--model")); + const withModel = buildClaudeReviewArgs({ proofScratchDir: "/s", model: "claude-x" }); + assert.deepEqual(withModel.slice(-2), ["--model", "claude-x"]); +}); + +test("claudeReviewEnv scrubs credentials the configurable binary must not receive", () => { + const base: NodeJS.ProcessEnv = { + GH_TOKEN: "gh", + GITHUB_TOKEN: "ghub", + COMMIT_SWEEPER_TARGET_GH_TOKEN: "t", + CLAWSWEEPER_PROOF_INSPECTION_TOKEN: "pi", + CLAWSWEEPER_APP_ID: "id", + CLAWSWEEPER_APP_PRIVATE_KEY: "key", + OPENAI_API_KEY: "oai", + CODEX_API_KEY: "cdx", + ANTHROPIC_API_KEY: "anthropic-kept", + PATH: "/usr/bin", + }; + const env = claudeReviewEnv(base); + for (const key of SCRUBBED_CREDENTIAL_ENV_KEYS) { + assert.equal(env[key], undefined, `${key} must be scrubbed`); + } + // Claude's own auth and benign vars are preserved; the source env is untouched. + assert.equal(env.ANTHROPIC_API_KEY, "anthropic-kept"); + assert.equal(env.PATH, "/usr/bin"); + assert.equal(env.GIT_OPTIONAL_LOCKS, "0"); + assert.equal(base.GH_TOKEN, "gh", "input env must not be mutated"); +}); + +test("parseClaudeResultRecord finds the result in the --output-format json array", () => { + const raw = JSON.stringify([ + { type: "system", subtype: "init" }, + { type: "stream_event", event: {} }, + { type: "result", subtype: "success", is_error: false, result: "{\"ok\":true}", structured_output: { ok: true } }, + ]); + const parsed = parseClaudeResultRecord(raw); + assert.ok(parsed); + assert.equal(parsed.isError, false); + assert.equal(parsed.resultText, '{"ok":true}'); + assert.deepEqual(parsed.structuredOutput, { ok: true }); +}); + +test("parseClaudeResultRecord finds the result in stream-json (NDJSON) lines", () => { + const raw = [ + JSON.stringify({ type: "system", subtype: "init" }), + JSON.stringify({ type: "result", is_error: false, result: "done" }), + ].join("\n"); + const parsed = parseClaudeResultRecord(raw); + assert.ok(parsed); + assert.equal(parsed.resultText, "done"); +}); + +test("parseClaudeResultRecord surfaces errors and returns null when no result record exists", () => { + const err = parseClaudeResultRecord( + JSON.stringify([{ type: "result", is_error: true, error: "boom", result: "" }]), + ); + assert.ok(err); + assert.equal(err.isError, true); + assert.equal(err.errorText, "boom"); + assert.equal(parseClaudeResultRecord(JSON.stringify([{ type: "system" }])), null); + assert.equal(parseClaudeResultRecord(""), null); + assert.equal(parseClaudeResultRecord("not json at all"), null); +}); + +test("extractJsonObject handles bare objects, fenced blocks, and surrounding prose", () => { + assert.equal(extractJsonObject('{"a":1}'), '{"a":1}'); + assert.equal(extractJsonObject('```json\n{"a":1}\n```'), '{"a":1}'); + assert.equal(extractJsonObject('Here is the decision:\n{"a":1}\nThanks.'), '{"a":1}'); + assert.equal(extractJsonObject("no object here"), null); + assert.equal(extractJsonObject(""), null); +}); + +test("pruneToSchema drops the stray keys Claude improvises and keeps real data", () => { + // The exact shapes the Claude review engine emitted in live runs: an extra + // `kind` on a review finding and a `label_note` on a review metric. + const { value, droppedPaths } = pruneToSchema( + { + decision: "keep_open", + reviewFindings: [{ title: "t", file: "a.ts", lineStart: 1, kind: "bug" }], + reviewMetrics: [{ label: "scope", value: "small", label_note: "extra" }], + bogusTopLevelKey: 123, + }, + REVIEW_FINDING_SCHEMA, + ); + assert.deepEqual(droppedPaths.sort(), [ + "bogusTopLevelKey", + "reviewFindings[0].kind", + "reviewMetrics[0].label_note", + ]); + const pruned = value as Record; + assert.equal(pruned.decision, "keep_open"); + assert.equal(pruned.reviewFindings[0].title, "t"); + assert.equal(pruned.reviewFindings[0].lineStart, 1); + assert.equal(pruned.reviewMetrics[0].label, "scope"); + assert.ok(!("kind" in pruned.reviewFindings[0])); + assert.ok(!("label_note" in pruned.reviewMetrics[0])); + assert.ok(!("bogusTopLevelKey" in pruned)); +}); + +test("pruneToSchema prunes nodes typed as a union (e.g. [\"object\",\"null\"]) or with type omitted", () => { + const schema = { + type: "object", + properties: { + // nullable object (union type) — must still be pruned + nested: { + type: ["object", "null"], + properties: { keep: { type: "string" } }, + }, + // object node with no explicit `type`, only `properties` + bag: { properties: { keep: { type: "string" } } }, + }, + }; + const { value, droppedPaths } = pruneToSchema( + { nested: { keep: "a", stray: 1 }, bag: { keep: "b", stray2: 2 } }, + schema, + ); + assert.deepEqual(droppedPaths.sort(), ["bag.stray2", "nested.stray"]); + const pruned = value as Record; + assert.equal(pruned.nested.keep, "a"); + assert.ok(!("stray" in pruned.nested)); + assert.equal(pruned.bag.keep, "b"); + assert.ok(!("stray2" in pruned.bag)); +}); + +test("pruneToSchema reports nothing to drop for a clean object and ignores unschema'd nodes", () => { + assert.deepEqual( + pruneToSchema({ decision: "close", reviewFindings: [] }, REVIEW_FINDING_SCHEMA).droppedPaths, + [], + ); + // A node the schema does not describe as object/array passes through untouched. + assert.equal(pruneToSchema("scalar", REVIEW_FINDING_SCHEMA).value, "scalar"); + assert.deepEqual(pruneToSchema({ any: 1 }, undefined).value, { any: 1 }); +}); + +test("pruneToSchema passes values through untouched when the schema node is malformed or mismatched", () => { + // null / non-object schema node → value returned as-is (top-of-walk guard). + assert.deepEqual(pruneToSchema({ a: 1 }, null).value, { a: 1 }); + // `properties` present but not an object → not treated as an object node. + const notObj = pruneToSchema({ a: 1, b: 2 }, { properties: "nope" }); + assert.deepEqual(notObj.value, { a: 1, b: 2 }); + assert.deepEqual(notObj.droppedPaths, []); + // Array schema but the value is not an array → returned unchanged, never mapped. + assert.deepEqual( + pruneToSchema({ a: 1 }, { type: "array", items: { type: "object", properties: {} } }).value, + { a: 1 }, + ); +});