promptdriven · Serhan-Asad · May 15, 2026
diff --git a/README.md b/README.md
@@ -2746,7 +2746,7 @@ Options:
 - `--reviewers ROLES`: Legacy comma-separated review-loop role order, interpreted as `reviewer,fixer` (default: `codex,claude`).
 - `--reviewer-fallback ROLE`: Optional secondary reviewer role to invoke once if the primary reviewer cannot complete (for example, because of auth, network, sandbox, or CLI failures). The fallback must resolve to a role different from the reviewer and fixer; if it succeeds, it becomes the active reviewer for the remaining loop and the superseded primary's row in the final report is annotated `(optional, superseded by <fallback>)` so downstream verdict adapters drop the failed primary from the required-reviewer set and resolve to `ship_degraded` instead of `unknown`.
 - `--max-review-rounds INT`: Maximum primary-reviewer/fixer rounds (default: 5).
-- `--max-review-cost FLOAT`: Maximum review-loop LLM cost in USD (default: 10.0).
+- `--max-review-cost FLOAT`: Deprecated compatibility flag. Review-loop cost is reported as telemetry and does not stop the codex-review / claude-fix / codex-verify loop (default: 0.0).
 - `--max-review-minutes FLOAT`: Maximum review-loop wall-clock minutes (default: 90.0).
 - `--blocking-severities LIST`: Comma-separated severity names used for review-loop reporting and prompt guidance (default: `blocker,critical,medium`). The fixer still receives every valid reviewer finding.
 - `--continue-on-reviewer-limit`: Report provider, rate, context-window, timeout, auth, network, sandbox, permission, and non-zero-exit reviewer failures as `degraded` instead of `failed`. Degraded reviewers are still not clean unless a configured fallback reviewer completes successfully and takes over as the active reviewer.

diff --git a/examples/checkup_review_loop_example.py b/examples/checkup_review_loop_example.py
@@ -49,7 +49,6 @@ def main() -> None:
     config = ReviewLoopConfig(
         reviewers=parse_reviewers("codex,claude"),
         max_rounds=1,
-        max_cost=1.0,
         max_minutes=5.0,
     )
 
@@ -94,4 +93,4 @@ def main() -> None:
     console.print(f"Model: {model}")
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/pdd/agentic_checkup.py b/pdd/agentic_checkup.py
@@ -322,7 +322,7 @@ def run_agentic_checkup(
     fixer: Optional[str] = None,
     reviewer_fallback: Optional[str] = None,
     max_review_rounds: int = 5,
-    max_review_cost: float = 10.0,
+    max_review_cost: float = 0.0,
     max_review_minutes: float = 90.0,
     require_all_reviewers_clean: bool = True,
     continue_on_reviewer_limit: bool = False,

diff --git a/pdd/agentic_split_orchestrator.py b/pdd/agentic_split_orchestrator.py
@@ -1404,9 +1404,8 @@ def _check_budget(at_step: str) -> Optional[Tuple[bool, str, float, str, List[st
         """Budget guard — return abort tuple if --max-cost crossed, else None.
 
         Persists state so the next run without --max-cost (or with a higher
-        one) resumes from ``at_step``. Mirrors the pattern in
-        ``pdd/checkup_review_loop.py:1849-1862`` (max_cost_reached flag +
-        stop_reason).
+        one) resumes from ``at_step`` with the max_cost_reached flag and
+        stop_reason recorded.
 
         MUST be called both at the top of each step iteration AND after every
         ``total_cost += ...`` accumulation that precedes a possible early

diff --git a/pdd/checkup_review_loop.py b/pdd/checkup_review_loop.py
@@ -582,7 +582,9 @@ class ReviewLoopConfig:
     reviewer_fallback: Optional[str] = None
     review_only: bool = False
     max_rounds: int = 5
-    max_cost: float = 10.0
+    # Deprecated compatibility field. Cost is reported but does not stop the
+    # implement/review loop; max_rounds and max_minutes bound normal execution.
+    max_cost: float = 0.0
     max_minutes: float = 90.0
     # Kept for CLI/API compatibility. The loop has one authoritative reviewer,
     # so this is no longer used as a ship gate.
@@ -2998,21 +3000,18 @@ def _fix_dispute_note(fix: FixResult, finding: ReviewFinding) -> str:
 
 
 def _budget_exhausted(
-    config: ReviewLoopConfig,
-    state: ReviewLoopState,
+    _config: ReviewLoopConfig,
+    _state: ReviewLoopState,
     deadline: float,
 ) -> bool:
-    return state.total_cost >= config.max_cost or time.monotonic() >= deadline
+    return time.monotonic() >= deadline
 
 
 def _mark_budget_exhausted(
     config: ReviewLoopConfig,
     state: ReviewLoopState,
     deadline: float,
 ) -> None:
-    if state.total_cost >= config.max_cost:
-        state.max_cost_reached = True
-        state.stop_reason = f"Max review cost reached: ${config.max_cost:.2f}."
     if time.monotonic() >= deadline:
         state.max_duration_reached = True
         state.stop_reason = f"Max review duration reached: {config.max_minutes:g} minutes."

diff --git a/pdd/commands/checkup.py b/pdd/commands/checkup.py
@@ -129,9 +129,12 @@
 @click.option(
     "--max-review-cost",
     type=float,
-    default=10.0,
+    default=0.0,
     show_default=True,
-    help="Maximum review-loop LLM cost in USD.",
+    help=(
+        "Deprecated compatibility flag. Review-loop cost is reported but does "
+        "not stop the implement/review loop."
+    ),
 )
 @click.option(
     "--max-review-minutes",
@@ -279,9 +282,9 @@ def checkup(
             "--max-review-rounds must be >= 1.",
             param_hint="'--max-review-rounds'",
         )
-    if review_loop and max_review_cost <= 0:
+    if review_loop and max_review_cost < 0:
         raise click.BadParameter(
-            "--max-review-cost must be > 0.",
+            "--max-review-cost must be >= 0.",
             param_hint="'--max-review-cost'",
         )
     if review_loop and max_review_minutes <= 0:

diff --git a/pdd/pdd_completion.fish b/pdd/pdd_completion.fish
@@ -149,7 +149,7 @@ complete -c pdd -n "__fish_seen_subcommand_from checkup" -l reviewer -x -d "Prim
 complete -c pdd -n "__fish_seen_subcommand_from checkup" -l fixer -x -d "Fixer role"
 complete -c pdd -n "__fish_seen_subcommand_from checkup" -l reviewer-fallback -x -d "Fallback reviewer when primary fails"
 complete -c pdd -n "__fish_seen_subcommand_from checkup" -l max-review-rounds -x -d "Max review rounds"
-complete -c pdd -n "__fish_seen_subcommand_from checkup" -l max-review-cost -x -d "Max review cost (USD)"
+complete -c pdd -n "__fish_seen_subcommand_from checkup" -l max-review-cost -x -d "Deprecated; review cost is report-only"
 complete -c pdd -n "__fish_seen_subcommand_from checkup" -l max-review-minutes -x -d "Max review wall-clock minutes"
 complete -c pdd -n "__fish_seen_subcommand_from checkup" -l require-all-reviewers-clean -d "Require all reviewers clean"
 complete -c pdd -n "__fish_seen_subcommand_from checkup" -l no-require-all-reviewers-clean -d "Do not require all reviewers clean"

diff --git a/pdd/pdd_completion.zsh b/pdd/pdd_completion.zsh
@@ -466,7 +466,7 @@ _pdd_checkup() {
     '--fixer[Fixer role]:fixer:' \
     '--reviewer-fallback[Fallback reviewer when primary fails]:reviewer:' \
     '--max-review-rounds[Max review rounds]:rounds:' \
-    '--max-review-cost[Max review cost (USD)]:cost:' \
+    '--max-review-cost[Deprecated; review cost is report-only]:cost:' \
     '--max-review-minutes[Max review wall-clock minutes]:minutes:' \
     '--require-all-reviewers-clean[Require all reviewers clean]' \
     '--no-require-all-reviewers-clean[Do not require all reviewers clean]' \

diff --git a/pdd/prompts/agentic_checkup_python.prompt b/pdd/prompts/agentic_checkup_python.prompt
@@ -9,7 +9,7 @@ Write the `pdd/agentic_checkup.py` module.
 Entry point for the agentic checkup workflow. Accepts a GitHub issue URL, fetches issue content and comments, loads project context (architecture.json, .pddrc), then dispatches to the multi-step orchestrator that explores the project, identifies problems, and optionally fixes them.
 
 % Requirements
-1. Function: `run_agentic_checkup(issue_url: str, *, verbose: bool = False, quiet: bool = False, no_fix: bool = False, timeout_adder: float = 0.0, use_github_state: bool = True, reasoning_time: Optional[float] = None, pr_url: Optional[str] = None, review_loop: bool = False, review_only: bool = False, reviewers: str = "codex,claude", reviewer: Optional[str] = None, fixer: Optional[str] = None, reviewer_fallback: Optional[str] = None, max_review_rounds: int = 5, max_review_cost: float = 10.0, max_review_minutes: float = 90.0, require_all_reviewers_clean: bool = True, continue_on_reviewer_limit: bool = False, require_final_fresh_review: bool = True, blocking_severities: Optional[str] = None, clean_reviewer_states: Optional[str] = None, fallback_reviewer_on_failure: bool = False) -> Tuple[bool, str, float, str]`
+1. Function: `run_agentic_checkup(issue_url: str, *, verbose: bool = False, quiet: bool = False, no_fix: bool = False, timeout_adder: float = 0.0, use_github_state: bool = True, reasoning_time: Optional[float] = None, pr_url: Optional[str] = None, review_loop: bool = False, review_only: bool = False, reviewers: str = "codex,claude", reviewer: Optional[str] = None, fixer: Optional[str] = None, reviewer_fallback: Optional[str] = None, max_review_rounds: int = 5, max_review_cost: float = 0.0, max_review_minutes: float = 90.0, require_all_reviewers_clean: bool = True, continue_on_reviewer_limit: bool = False, require_final_fresh_review: bool = True, blocking_severities: Optional[str] = None, clean_reviewer_states: Optional[str] = None, fallback_reviewer_on_failure: bool = False) -> Tuple[bool, str, float, str]`
 2. Return 4-tuple: (success, message, total_cost, model_used)
 3. Check `gh` CLI availability via `_check_gh_cli()` (reused from `agentic_change.py`)
 4. Parse GitHub issue URL via `_parse_issue_url()` (reused from `agentic_change.py`)

diff --git a/pdd/prompts/agentic_split_orchestrator_python.prompt b/pdd/prompts/agentic_split_orchestrator_python.prompt
@@ -36,7 +36,7 @@ Orchestrator for the agentic split workflow. Diagnoses whether a PDD dev unit ha
    - `delete_dead`: opt-in dead symbol deletion in step 6 (last child only)
    - `intent`: user hint (CLI flag); if set, step 0 is skipped
    - `no_phase_extraction`: skip step 6a (phase decomposition)
-   - `max_cost` (Issue #1384): when not None, the orchestrator checks `state.total_cost >= max_cost` BOTH at the top of every iteration of the step loop AND immediately after every `total_cost += ...` accumulation that precedes a possible early return (diagnose_only, propose_only, LEAVE_ALONE, hard-stop) AND after each per-child fan-out cost in steps 6a/6/8 (post per-child state persistence). Without the post-accumulation check, an LLM step that adds cost and then immediately exits via early return — or a multi-child fan-out — can bypass the cap by one full step's cost (or N × per-child cost). Implement as a closure `_check_budget(at_step) -> Optional[abort_tuple]` that returns the standard 5-tuple when crossed, else `None`. On hit: sets `state["max_cost_reached"] = True`, persists state via `save_workflow_state`, restores signal handlers, calls `clear_agentic_progress()`, and returns the standard 5-tuple with `success=False` and a stop message of the form `"Aborted at step <step>: --max-cost ${cap:.2f} reached (spent ${cost:.2f}). Re-run without --max-cost or with a higher cap to resume."`. The next invocation without `--max-cost` (or with a higher value) resumes from the same state. State persistence ordering matters: budget check fires AFTER `state["step_outputs"][step] = output` AND `state["last_completed_step"] = step` so resume picks up the next step rather than re-running the just-completed one. For `9_refine_check`, also parse the paid `RefineCheck` output and persist `_pending_refine` BEFORE that post-step budget check; otherwise a max-cost abort after step 9 can save `last_completed_step="9_refine_check"` but lose the focused-refinement request. Mirrors the `max_cost_reached` pattern in `pdd/checkup_review_loop.py:1849-1862`.
+   - `max_cost` (Issue #1384): when not None, the orchestrator checks `state.total_cost >= max_cost` BOTH at the top of every iteration of the step loop AND immediately after every `total_cost += ...` accumulation that precedes a possible early return (diagnose_only, propose_only, LEAVE_ALONE, hard-stop) AND after each per-child fan-out cost in steps 6a/6/8 (post per-child state persistence). Without the post-accumulation check, an LLM step that adds cost and then immediately exits via early return — or a multi-child fan-out — can bypass the cap by one full step's cost (or N × per-child cost). Implement as a closure `_check_budget(at_step) -> Optional[abort_tuple]` that returns the standard 5-tuple when crossed, else `None`. On hit: sets `state["max_cost_reached"] = True`, persists state via `save_workflow_state`, restores signal handlers, calls `clear_agentic_progress()`, and returns the standard 5-tuple with `success=False` and a stop message of the form `"Aborted at step <step>: --max-cost ${cap:.2f} reached (spent ${cost:.2f}). Re-run without --max-cost or with a higher cap to resume."`. The next invocation without `--max-cost` (or with a higher value) resumes from the same state. State persistence ordering matters: budget check fires AFTER `state["step_outputs"][step] = output` AND `state["last_completed_step"] = step` so resume picks up the next step rather than re-running the just-completed one. For `9_refine_check`, also parse the paid `RefineCheck` output and persist `_pending_refine` BEFORE that post-step budget check; otherwise a max-cost abort after step 9 can save `last_completed_step="9_refine_check"` but lose the focused-refinement request. This split-specific cap is independent from `pdd checkup --review-loop`, where review cost is report-only telemetry.
 
 5. **Path sanitization helpers** (Issue #1384): expose two module-level helpers that the orchestrator MUST call between the Step 4 LLM output and any filesystem operation:
    - `_sanitize_path_string(value)`: when `value` is a `str`, strip whitespace, remove leading markdown backticks, then strip prose tails by truncating at the first whitespace / em-dash / hash / remaining backtick character. This preserves common markdown-wrapped paths like `` `pdd/foo.py` `` as `pdd/foo.py` while still dropping prose tails. Non-string inputs pass through unchanged. Empty strings remain empty.

diff --git a/pdd/prompts/checkup_review_loop_python.prompt b/pdd/prompts/checkup_review_loop_python.prompt
@@ -104,12 +104,10 @@ Add a PR-mode primary-reviewer/fixer review loop for `pdd checkup`.
 17b. The fixer prompt MUST ask the fixer to address every valid, in-scope finding when practical, while prioritizing the configured blocking severities because those determine whether the loop continues. It must explain that "focused fixes" means avoiding unrelated refactors and broad churn, not leaving real issues unfixed. If the fixer leaves any valid finding unfixed, it must return `partially_fixed` or `blocked` with a concrete rationale.
 18. The verifier pass is also the next full review pass: it must verify prior fixes and then re-review the whole PR. If it returns actionable findings, those findings become the next round's fixer input. Do not run an extra reviewer at the start of the next round unless there is no pending verifier finding set. This models the manual Codex-review / Claude-fix / Codex-review workflow and avoids two independent reviewers arguing.
 19. A reviewer that hits a provider/rate/quota/context-window limit, times out, or returns unparsable output produces `status` in `{"failed", "degraded", "missing"}` (use `_failure_status` to classify based on output keywords and `continue_on_reviewer_limit`). With the default `continue_on_reviewer_limit=False`, provider/rate/quota/context-window/timeout failures are `failed`; with the flag enabled they are `degraded`. Such statuses are NOT clean even if `clean_reviewer_states` is widened — preserve safety. Do not classify normal finding prose as degraded merely because it contains the word "context"; require context-window/length/limit wording.
-20. Budget caps:
+20. Loop bounds:
     - `max_rounds` (int, ≥ 1)
-    - `max_cost` (float USD, > 0)
     - `max_minutes` (float wall clock, > 0)
-    Crossing any cap stops the loop, sets the relevant `*_reached` flag on state, and renders a final report whose reviewer-status reflects the unfinished round.
-20a. If a cap is crossed during a successful fixer call, the loop MUST still commit and push the completed fixer changes before stopping. Push/commit is local git/GitHub plumbing, not another LLM spend. Stop before verifier/fresh-review when the cap is reached after push, so the final report correctly marks the fix as unverified but does not strand completed changes only in the local worktree.
+    Cost is reported but MUST NOT stop the normal implement/review loop. The loop continues until the reviewer reports merge-ready/no actionable findings, `max_rounds` is reached, a hard reviewer/fixer/push failure occurs, or the wall-clock duration is reached. `max_cost` is a deprecated compatibility field and does not set `max_cost_reached`.
 
 % Persisted artifacts
 21. Artifacts live under `<git_root>/.pdd/checkup-review-loop/issue-{issue_number}-pr-{pr_number}/`. The directory is created before the first reviewer runs.

diff --git a/pdd/prompts/commands/checkup_python.prompt b/pdd/prompts/commands/checkup_python.prompt
@@ -21,7 +21,7 @@
   - `--fixer` (explicit fixer role; overrides the second `--reviewers` role)
   - `--reviewer-fallback` (optional secondary reviewer role invoked once if the primary reviewer cannot complete; must differ from the resolved reviewer and fixer)
   - `--max-review-rounds` (int, default `5`)
-  - `--max-review-cost` (float, default `10.0`)
+  - `--max-review-cost` (float, default `0.0`; deprecated compatibility flag, report-only)
   - `--max-review-minutes` (float, default `90.0`)
   - `--require-all-reviewers-clean/--no-require-all-reviewers-clean` (default true)
   - `--continue-on-reviewer-limit` (flag; reports provider/rate/context-limit/auth/network/sandbox reviewer failures as `degraded` instead of `failed`, but never marks the active reviewer clean or continues mutation without a completed review)
@@ -33,7 +33,7 @@
 - Mode validation:
   - If `--validate-arch-includes` is set: `target`, `--pr`, `--issue` must all be `None`; dispatch to `run_validate_arch_includes_cli(...)` and return.
   - If `--review-only` is set without `--review-loop`: reject it.
-  - If `--review-loop` is set: require PR mode (`--pr` and `--issue`); reject `--no-fix` unless `--review-only` is also set; validate max-review values are positive (`--max-review-rounds >= 1`, cost/minutes > 0).
+  - If `--review-loop` is set: require PR mode (`--pr` and `--issue`); reject `--no-fix` unless `--review-only` is also set; validate max-review values (`--max-review-rounds >= 1`, `--max-review-cost >= 0`, minutes > 0).
   - If `--pr` or `--issue` is set: BOTH must be present; `target` must be `None`; validate `--pr` via `_parse_pr_url(pr_url)` (mention "GitHub pull-request URL" in error); validate `--issue` via `_is_github_issue_url(issue_url_opt)` (mention "GitHub issue URL" in error); use `issue_url_opt` as the effective issue URL.
   - Otherwise (default issue mode): require `target`; validate via `_is_github_issue_url(target)` (BadParameter with `param_hint="'TARGET'"` and message must mention "GitHub issue URL").
 - Force `--no-fix` when `--pr` is set and `--review-loop` is NOT set (legacy PR verification still cannot push fixes). Print a warning to stderr explaining why and how to re-invoke if fixes were intended.

diff --git a/pdd/prompts/pdd_completion_fish.prompt b/pdd/prompts/pdd_completion_fish.prompt
@@ -17,6 +17,6 @@
 % The generated pdd_completion.fish script must provide comprehensive auto-completion support for:
 %   - All commands of the PDD CLI (as per the README).
 %   - All command-specific options (as per the README).
-%   - The `checkup` command and its PR/review-loop options: `--pr`, `--issue`, `--review-loop`, `--review-only`, `--reviewers`, `--reviewer`, `--fixer`, `--reviewer-fallback`, review round/cost/minute limits, reviewer-status compatibility flags (`--continue-on-reviewer-limit`, `--fallback-reviewer-on-failure`), `--blocking-severities`, and `--clean-reviewer-states`. Emit only one `# checkup command` completion block — do not append a second checkup section for additional options, fish does not merge them and duplicates produce stale suggestions.
+%   - The `checkup` command and its PR/review-loop options: `--pr`, `--issue`, `--review-loop`, `--review-only`, `--reviewers`, `--reviewer`, `--fixer`, `--reviewer-fallback`, review round/minute limits, deprecated report-only cost telemetry (`--max-review-cost`), reviewer-status compatibility flags (`--continue-on-reviewer-limit`, `--fallback-reviewer-on-failure`), `--blocking-severities`, and `--clean-reviewer-states`. Emit only one `# checkup command` completion block — do not append a second checkup section for additional options, fish does not merge them and duplicates produce stale suggestions.
 %   - All global options, including those found in the README and the newly specified `--time` option.
 %   - Filenames, where appropriate for command arguments.