diff --git a/docs/commit-sweeper.md b/docs/commit-sweeper.md index 58b8fd61bb..95a8ff3d1e 100644 --- a/docs/commit-sweeper.md +++ b/docs/commit-sweeper.md @@ -251,8 +251,27 @@ Write/check credentials are created only after Codex exits. The Codex environment strips GitHub and app secrets before subprocess launch. -Commit Sweeper is main-only. PR or branch review is deliberately out of scope -for this lane. +The scheduled/hosted Commit Sweeper lane is main-only — automated PR or branch +review on the server is deliberately out of scope. + +## Local branch review (`local-review`) + +For a manual, offline pre-PR self-review, the `local-review` subcommand reuses the +same Commit Sweeper engine against the current branch's committed range: + +```text +pnpm run build +pnpm local-review -- --base main +# reviews merge-base(, HEAD)..HEAD as one unit +# writes ~/.clawsweeper-local-reviews/run---/local-review.md +``` + +It is offline by contract and never contacts GitHub: it requires a clean checkout, +uses a unique per-run output directory, withholds all GitHub token env vars, skips +the `gh`-api commit-metadata hydration, points `GH_CONFIG_DIR` at an empty directory, +disables Codex web search, and explicitly forbids network lookups. Repositories +without a configured profile are rejected (no foreign-profile fallback). Unlike the +hosted lane it never writes to GitHub — the local Markdown report is the only output. ## Enable / Disable diff --git a/package.json b/package.json index 08c6537c8f..fca803f9fd 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "status": "node dist/clawsweeper.js status", "commit-review": "node dist/commit-sweeper.js", "commit-reports": "node dist/commit-sweeper.js reports", + "local-review": "node dist/commit-sweeper.js local-review", "repair:validate": "node dist/repair/validate-all.js", "repair:validate-job": "node dist/repair/validate-job.js", "repair:render": "node dist/repair/render-prompt.js", diff --git a/src/commit-sweeper.ts b/src/commit-sweeper.ts index b35bedf3eb..495a6b30e8 100644 --- a/src/commit-sweeper.ts +++ b/src/commit-sweeper.ts @@ -2,6 +2,7 @@ import { spawnSync } from "node:child_process"; import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs"; import { dirname, join, relative, resolve } from "node:path"; +import { homedir } from "node:os"; import { fileURLToPath } from "node:url"; import { changedFilesForCommit, @@ -15,7 +16,11 @@ import { codexEnv, codexLoginConfig, codexModelArgs, PUBLIC_CODEX_MODEL } from " import { codexProcessErrorCode, runCodexProcess } from "./codex-process.js"; import { runText } from "./command.js"; import { ghRetryKind, ghRetryWaitMs } from "./github-retry.js"; -import { DEFAULT_TARGET_REPO, repositoryProfileFor } from "./repository-profiles.js"; +import { + configuredRepositoryProfileFor, + DEFAULT_TARGET_REPO, + repositoryProfileFor, +} from "./repository-profiles.js"; export { isReviewableCommitPath } from "./commit-classifier.js"; @@ -99,7 +104,12 @@ function optionalGhJson(path: string, jq: string): string { } } -function commitMetadata(targetDir: string, targetRepo: string, sha: string): CommitMetadata { +export function commitMetadata( + targetDir: string, + targetRepo: string, + sha: string, + offline = false, +): CommitMetadata { const separator = "\x1f"; const raw = run( "git", @@ -113,14 +123,16 @@ function commitMetadata(targetDir: string, targetRepo: string, sha: string): Com ); const parts = raw.split(separator); const body = parts.slice(9).join(separator); - const githubAuthor = optionalGhJson( - `repos/${targetRepo}/commits/${sha}`, - ".author.login // empty", - ); - const githubCommitter = optionalGhJson( - `repos/${targetRepo}/commits/${sha}`, - ".committer.login // empty", - ); + // Offline mode (e.g. local-review) must not contact GitHub: skip the gh-api + // author/committer hydration. `gh` uses its own configured auth, so removing + // token env vars is not enough — the only way to honor the "no GitHub access" + // contract is to not run `gh` at all. + const githubAuthor = offline + ? "" + : optionalGhJson(`repos/${targetRepo}/commits/${sha}`, ".author.login // empty"); + const githubCommitter = offline + ? "" + : optionalGhJson(`repos/${targetRepo}/commits/${sha}`, ".committer.login // empty"); return { sha: assertSha(parts[0] ?? sha), parents: (parts[1] ?? "") @@ -279,6 +291,7 @@ function runCodex(options: { timeoutMs: number; workDir: string; additionalPrompt: string; + extraCodexConfig?: readonly string[]; }): string { ensureDir(options.workDir); const promptPath = join(options.workDir, `${options.sha}.prompt.md`); @@ -299,6 +312,7 @@ function runCodex(options: { `model_reasoning_effort="${options.reasoningEffort}"`, codexLoginConfig(), 'approval_policy="never"', + ...(options.extraCodexConfig ?? []), ]; if (options.serviceTier) codexConfig.splice(1, 0, `service_tier="${options.serviceTier}"`); const result = runCodexProcess({ @@ -379,6 +393,121 @@ function reviewCommand(args: Args): void { console.log(outputPath); } +// GitHub credential env vars scrubbed before the offline local-review engine runs. +// Covers both gh enterprise aliases (GH_ENTERPRISE_TOKEN and GITHUB_ENTERPRISE_TOKEN), +// since gh honors either; this is belt-and-suspenders with the empty GH_CONFIG_DIR set +// per run. +export const LOCAL_REVIEW_SCRUBBED_TOKEN_ENV: readonly string[] = [ + "GH_TOKEN", + "GITHUB_TOKEN", + "GH_ENTERPRISE_TOKEN", + "GITHUB_ENTERPRISE_TOKEN", + "COMMIT_SWEEPER_TARGET_GH_TOKEN", + "CLAWSWEEPER_PROOF_INSPECTION_TOKEN", +]; +export const LOCAL_REVIEW_WEB_SEARCH_CONFIG = 'web_search="disabled"'; + +export function localReviewAdditionalPrompt( + baseSha: string, + headSha: string, + baseBranch: string, +): string { + return `This is a LOCAL pre-PR review of the COMMITTED range ${baseSha.slice(0, 8)}..${headSha.slice(0, 8)} (your branch vs ${baseBranch}) on a clean checkout — no staged or untracked changes. Review code correctness, bugs, and security; ignore PR metadata. This review is offline: do not run gh, use web search, access URLs, or make any network request. Use only the local checkout and git history.`; +} + +// Local, offline pre-PR review of a whole branch: reviews the committed range +// merge-base(base, HEAD)..HEAD as a single unit, reusing the Commit Sweeper engine. +// Conforms to the #253 replacement spec: clean checkout, unique run dir, no GitHub +// token, and reject unsupported repos (never fall back to a foreign profile). +function localReviewCommand(args: Args): void { + const targetDir = resolve(argString(args, "target_dir", ".")); + const baseBranch = argString(args, "base", "main"); + const reportDir = resolve( + argString(args, "report_dir", join(homedir(), ".clawsweeper-local-reviews")), + ); + + // Spec: genuinely offline — withhold every GitHub credential from the review engine. + for (const tokenVar of LOCAL_REVIEW_SCRUBBED_TOKEN_ENV) { + delete process.env[tokenVar]; + } + + // Spec: committed-range review requires a clean checkout (no hidden staged/untracked work). + const dirtyTree = run("git", ["status", "--porcelain"], { cwd: targetDir }).trim(); + if (dirtyTree) { + console.error(`[local-review] working tree not clean — commit or stash first:\n${dirtyTree}`); + process.exit(1); + } + + const targetRepo = + argString(args, "target_repo", "") || + run("git", ["remote", "get-url", "origin"], { cwd: targetDir }) + .replace(/.*github\.com[:/]/, "") + .replace(/\.git\s*$/, "") + .trim(); + + // Spec: reject unsupported repos — never silently fall back to a foreign profile. + const profile = configuredRepositoryProfileFor(targetRepo); + if (!profile) { + console.error( + `[local-review] no review profile for '${targetRepo}'. Add a repository profile, or pass --target-repo .`, + ); + process.exit(1); + } + const profileSlug = profile.slug; + + // Range = merge-base(base, HEAD)..HEAD — the whole branch, reviewed as one unit. + const headSha = run("git", ["rev-parse", "HEAD"], { cwd: targetDir }).trim(); + const baseSha = run("git", ["merge-base", baseBranch, "HEAD"], { cwd: targetDir }).trim(); + if (!baseSha || baseSha === headSha) { + console.error(`[local-review] no commits on HEAD beyond ${baseBranch} — nothing to review.`); + process.exit(1); + } + + const metadata = commitMetadata(targetDir, targetRepo, headSha, true); + + // Spec: unique per-run dir so concurrent runs never collide on result paths. + const runDir = join(reportDir, `run-${headSha.slice(0, 8)}-${Date.now()}-${process.pid}`); + ensureDir(runDir); + + // Spec: hard-enforce no GitHub access. The review prompt suggests `gh` for issue + // refs, and `gh` uses its own configured auth (token-env deletion can't stop it), + // so point it at an empty config dir — any `gh` the spawned reviewer runs finds + // no cached credentials. Belt-and-suspenders with Codex's read-only sandbox. + const ghEmptyConfig = join(runDir, ".gh-empty"); + ensureDir(ghEmptyConfig); + process.env.GH_CONFIG_DIR = ghEmptyConfig; + + const additionalPrompt = localReviewAdditionalPrompt(baseSha, headSha, baseBranch); + + console.error( + `[local-review] repo=${targetRepo} profile=${profileSlug} base=${baseBranch} range=${baseSha.slice(0, 8)}..${headSha.slice(0, 8)}`, + ); + + const markdown = ensureCommitReportTimestamps( + runCodex({ + targetDir, + targetRepo, + sha: headSha, + baseSha, + metadata, + model: argString(args, "codex_model", DEFAULT_CODEX_MODEL), + reasoningEffort: argString(args, "codex_reasoning_effort", DEFAULT_REASONING_EFFORT), + sandboxMode: argString(args, "codex_sandbox", "read-only"), + serviceTier: argString(args, "codex_service_tier", DEFAULT_SERVICE_TIER), + timeoutMs: argNumber(args, "codex_timeout_ms", 1_800_000), + workDir: runDir, + additionalPrompt, + extraCodexConfig: [LOCAL_REVIEW_WEB_SEARCH_CONFIG], + }), + metadata, + ); + + const outputPath = join(runDir, "local-review.md"); + writeFileSync(outputPath, markdown.endsWith("\n") ? markdown : `${markdown}\n`, "utf8"); + console.error(`[local-review] report written to ${outputPath}`); + console.log(outputPath); +} + function commitShasArg(value: string): string[] { return value .split(/[\s,]+/) @@ -794,6 +923,7 @@ export function main(argv = process.argv.slice(2)): void { else if (command === "reports") reportsCommand(args); else if (command === "copy-artifacts") copyArtifactsCommand(args); else if (command === "dispatch-findings") dispatchFindingsCommand(args); + else if (command === "local-review") localReviewCommand(args); else { console.error(`Unknown command: ${command}`); process.exit(1); diff --git a/src/repository-profiles.ts b/src/repository-profiles.ts index 89dca5838f..7a4694df4e 100644 --- a/src/repository-profiles.ts +++ b/src/repository-profiles.ts @@ -92,9 +92,7 @@ export const REPOSITORY_PROFILES: RepositoryProfile[] = [ export function repositoryProfileFor(targetRepo: string): RepositoryProfile { const normalized = normalizeRepo(targetRepo); - const profile = REPOSITORY_PROFILES.find( - (candidate) => normalizeRepo(candidate.targetRepo) === normalized, - ); + const profile = configuredRepositoryProfileFor(normalized); if (profile) return profile; const fallback = fallbackRepositoryProfile(normalized); @@ -105,6 +103,13 @@ export function repositoryProfileFor(targetRepo: string): RepositoryProfile { ); } +export function configuredRepositoryProfileFor(targetRepo: string): RepositoryProfile | undefined { + const normalized = normalizeRepo(targetRepo); + return REPOSITORY_PROFILES.find( + (candidate) => normalizeRepo(candidate.targetRepo) === normalized, + ); +} + export function repositoryProfileForSlug(slug: string): RepositoryProfile | undefined { return REPOSITORY_PROFILES.find((candidate) => candidate.slug === slug); } diff --git a/test/local-review.test.ts b/test/local-review.test.ts new file mode 100644 index 0000000000..647b729da5 --- /dev/null +++ b/test/local-review.test.ts @@ -0,0 +1,153 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { execFileSync, spawnSync } from "node:child_process"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { + commitMetadata, + localReviewAdditionalPrompt, + LOCAL_REVIEW_SCRUBBED_TOKEN_ENV, + LOCAL_REVIEW_WEB_SEARCH_CONFIG, +} from "../dist/commit-sweeper.js"; + +const GIT = process.env.GIT_BIN ?? "git"; +const CLI = fileURLToPath(new URL("../dist/commit-sweeper.js", import.meta.url)); + +function git(cwd: string, ...args: string[]): string { + return execFileSync(GIT, args, { cwd, encoding: "utf8" }).trim(); +} + +function initRepo(): string { + const dir = mkdtempSync(join(tmpdir(), "lr-")); + git(dir, "init", "-q"); + git(dir, "config", "user.name", "Test Author"); + git(dir, "config", "user.email", "test@example.com"); + git(dir, "config", "commit.gpgsign", "false"); + writeFileSync(join(dir, "a.txt"), "1\n"); + git(dir, "add", "a.txt"); + git(dir, "commit", "-q", "-m", "init"); + return dir; +} + +function runLocalReview(dir: string, args: string[]): { status: number | null; out: string } { + const result = spawnSync(process.execPath, [CLI, "local-review", "--target-dir", dir, ...args], { + cwd: dir, + encoding: "utf8", + env: { ...process.env }, + }); + return { status: result.status, out: `${result.stderr ?? ""}${result.stdout ?? ""}` }; +} + +// The local-review offline contract: commitMetadata(..., offline=true) must read +// only local git and never shell out to `gh`. Using an UNSUPPORTED repo slug proves +// it: a real gh api call against "example/unsupported-repo" would fail, so a passing +// run with populated local fields confirms gh was never invoked. +test("commitMetadata offline mode uses only local git and never contacts GitHub", () => { + const dir = initRepo(); + try { + const sha = git(dir, "rev-parse", "HEAD"); + const meta = commitMetadata(dir, "example/unsupported-repo", sha, true); + + assert.equal(meta.githubAuthor, ""); + assert.equal(meta.githubCommitter, ""); + assert.equal(meta.sha, sha); + assert.equal(meta.authorName, "Test Author"); + assert.equal(meta.authorEmail, "test@example.com"); + assert.equal(meta.subject, "init"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("local-review refuses a dirty working tree", () => { + const dir = initRepo(); + try { + writeFileSync(join(dir, "dirty.txt"), "x\n"); // untracked -> dirty + const { status, out } = runLocalReview(dir, [ + "--target-repo", + "openclaw/clawsweeper", + "--base", + "HEAD", + ]); + assert.equal(status, 1); + assert.match(out, /working tree not clean/); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("local-review rejects an unsupported repository instead of a foreign-profile fallback", () => { + const dir = initRepo(); + try { + const { status, out } = runLocalReview(dir, [ + "--target-repo", + "nobody/not-a-real-profile", + "--base", + "HEAD", + ]); + assert.equal(status, 1); + assert.match(out, /no review profile/); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("local-review rejects repositories covered only by a generic owner fallback", () => { + const dir = initRepo(); + try { + const { status, out } = runLocalReview(dir, [ + "--target-repo", + "openclaw/example-tool", + "--base", + "HEAD", + ]); + assert.equal(status, 1); + assert.match(out, /no review profile/); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("local-review reports nothing to review when HEAD has no commits beyond base", () => { + const dir = initRepo(); + try { + const { status, out } = runLocalReview(dir, [ + "--target-repo", + "openclaw/clawsweeper", + "--base", + "HEAD", + ]); + assert.equal(status, 1); + assert.match(out, /no commits on HEAD beyond/); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test("local-review scrubs both GitHub and GitHub Enterprise token aliases", () => { + for (const v of [ + "GH_TOKEN", + "GITHUB_TOKEN", + "GH_ENTERPRISE_TOKEN", + "GITHUB_ENTERPRISE_TOKEN", + "COMMIT_SWEEPER_TARGET_GH_TOKEN", + "CLAWSWEEPER_PROOF_INSPECTION_TOKEN", + ]) { + assert.ok( + LOCAL_REVIEW_SCRUBBED_TOKEN_ENV.includes(v), + `${v} must be in the offline scrub list`, + ); + } +}); + +test("local-review disables web search and forbids network lookups in its prompt", () => { + assert.equal(LOCAL_REVIEW_WEB_SEARCH_CONFIG, 'web_search="disabled"'); + const prompt = localReviewAdditionalPrompt("a".repeat(40), "b".repeat(40), "main"); + assert.match(prompt, /do not run gh/i); + assert.match(prompt, /do not .*web search/i); + assert.match(prompt, /do not .*network request/i); + assert.match(prompt, /only the local checkout and git history/i); +});