Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2d59813
feat: track attempted models in cost CSV (#1086)
pdd-bot May 19, 2026
9b49954
chore: PDD sync changes for #1086
pdd-bot May 19, 2026
85c9880
fix: address codex review-loop findings
pdd-bot May 19, 2026
253f74a
fix: address codex review-loop findings
pdd-bot May 19, 2026
c7f7af4
fix: address checkup attempted models findings
Serhan-Asad May 19, 2026
a93c4bb
fix: address codex review-loop findings (round 1)
Serhan-Asad May 19, 2026
d6b83ed
fix: address codex review-loop findings (round 2)
Serhan-Asad May 19, 2026
9df4957
test: harden attempted-model cost tracking coverage
Serhan-Asad May 19, 2026
d46638d
test: isolate fallback cwd unit test from cloud auth
Serhan-Asad May 19, 2026
464b7ce
fix: header-only peek on track_cost append (codex P2)
Serhan-Asad May 19, 2026
c9b71bd
fix: propagate cloud attempts through pre-loop setup errors (codex P2)
Serhan-Asad May 19, 2026
16bcf64
fix: dedup ctx attempted_models when chain extends existing (codex P2)
Serhan-Asad May 19, 2026
fff661c
fix: simplify attempted_models propagation contract (codex round 4)
Serhan-Asad May 19, 2026
02fe9ad
fix: pure concat for cross-invocation chain (codex P2)
Serhan-Asad May 19, 2026
6748e31
fix: don't mutate PYTEST_CURRENT_TEST at import (codex P2)
Serhan-Asad May 19, 2026
b4d4795
fix: prompt contradiction + arch nit + atomic migration
Serhan-Asad May 19, 2026
3eeadc2
fix: thread attempted_models through summarize_directory and auto_deps
Serhan-Asad May 20, 2026
5037d84
fix: surface cloud-fix attempted_models in track_cost
Serhan-Asad May 20, 2026
dbadf62
fix: address codex post-human-review findings
Serhan-Asad May 20, 2026
84a500e
fix: preserve failed cloud_fix_errors attempts (codex P2)
Serhan-Asad May 20, 2026
773fe3e
fix: align prompt / architecture / meta with code (review-loop)
Serhan-Asad May 20, 2026
b2a6894
fix: meta hashes + portable lock + scalar guard (3rd-pass review)
Serhan-Asad May 20, 2026
afa434e
fix: nonce + PID-alive lock semantics (4th-pass review)
Serhan-Asad May 20, 2026
f8299aa
fix: migration-only lock + nonce-write retry + malformed cleanup (5th…
Serhan-Asad May 20, 2026
b3194c0
fix: peek exception breadth + unsupported-FS unlocked migration (6th-…
Serhan-Asad May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .pdd/meta/llm_invoke_python.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"pdd_version": "0.0.241",
"timestamp": "2026-05-17T18:33:34.290918+00:00",
"command": "example",
"prompt_hash": "08f20f2e33886688d14103186bc55760a745b23e9bbe2626a0a7a6c4e1c1528c",
"code_hash": "42370b9d59b0caca264991021fb612f02bdae906c8043451288a64697af894ae",
"pdd_version": "0.0.244.dev7",
"timestamp": "2026-05-19T22:00:00.000000+00:00",
"command": "fix",
"prompt_hash": "8769b1adcb99a783edbfc75a0db123357c6a9959af78bba15ae2c80de00d802b",
"code_hash": "4c853854bca72a48109d0e6c6ac71c13869e37ff1bf2e5e17ad8b776500690ef",
"example_hash": "48c5aece0ddd153f95ec8a53802d2173d4da12a920b398c5feebb307b9958417",
"test_hash": "630ae15410752b1bda2608d72ab104add66b1f9cbbd134c003183339b46353f9",
"test_hash": "b612ca94d95d2902db9219088726a76eee2bf636ead5a1495b2087f4c6cddf6b",
"test_files": {
"test_llm_invoke.py": "630ae15410752b1bda2608d72ab104add66b1f9cbbd134c003183339b46353f9",
"test_llm_invoke.py": "b612ca94d95d2902db9219088726a76eee2bf636ead5a1495b2087f4c6cddf6b",
"test_llm_invoke_csv_model_registration.py": "1583b5e076fe5227a11f3d1079034ab4a21bc6131a3433f37bd68e35a99ba49e",
"test_llm_invoke_integration.py": "2eb4bd2565761a4148762c6fb73c887cb6e12ff962742e05f5c2d4d62b47aaf9",
"test_llm_invoke_nested_schema.py": "c983a19874abacc3e0ea9d6ca2ec87495960d2dd96d4e93be4039ed1bc995b9b",
Expand All @@ -21,4 +21,4 @@
"pdd/core/cloud.py": "0487c0b989996af144df3af1c818a6eeafe52fd209710e5a54eb43990c89ae97",
"pdd/server/token_counter.py": "3391a7c708713e3d370bf9e981a837f1f7ade08af75d69f402301dbf65a79c9a"
}
}
}
20 changes: 10 additions & 10 deletions .pdd/meta/summarize_directory_python.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
{
"pdd_version": "0.0.228",
"timestamp": "2026-05-06T03:28:21.684198+00:00",
"command": "regenerate-public",
"prompt_hash": "ea40b7699d4cf1ab98a7eed076aea7117a1c175ae16956afcfc7f06856837b17",
"code_hash": "a96e3b5ddb13717395fbc046cf0279486e1225d63deb51657f1ddb4eaf80c0af",
"pdd_version": "0.0.244.dev7",
"timestamp": "2026-05-19T22:00:00.000000+00:00",
"command": "fix",
"prompt_hash": "8aaf8f0055d1d10efb75b4219753f7a03c9a0094d730939b85e1952f7b8e004f",
"code_hash": "89663dc352f5a79a0bac618840d05bafbb900d47425d31e12b6229aac9797fa7",
"example_hash": null,
"test_hash": "6198ba0d33bcb2d06b6a90106dd2ae7adfc112a1ca8de62c58b23b8f9c2502a9",
"test_hash": "16f0ccf2eaf2c2a1498528da7913ac22aca8cffcc26941bb34a961c1ced032fb",
"test_files": {
"test_summarize_directory.py": "6198ba0d33bcb2d06b6a90106dd2ae7adfc112a1ca8de62c58b23b8f9c2502a9"
"test_summarize_directory.py": "16f0ccf2eaf2c2a1498528da7913ac22aca8cffcc26941bb34a961c1ced032fb"
},
"include_deps": {
"context/llm_invoke_example.py": "d749aa00a35c88c254c4aaa9e1280d85342d71214710e2f3f7a2bd9388a92168",
"context/llm_invoke_example.py": "48c5aece0ddd153f95ec8a53802d2173d4da12a920b398c5feebb307b9958417",
"context/load_prompt_template_example.py": "a1cd6619182c6c951f5856dda4070e202875a5884bbfab9cc191d24de2f4951f",
"context/python_preamble.prompt": "57a3e51f529024ec0cb9658cd6ac61a7c8051ba0c8e887b31cf00b2e78a07d83"
"context/python_preamble.prompt": "0388ed131bf986f8752e1bc4c81e4da0460cfe2908ec8c60b1314edbab768254"
}
}
}
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -804,8 +804,9 @@ The generated CSV file includes the following columns:
- cost: The estimated cost of the operation in USD (e.g., 0.05 for 5 cents). This will be zero for local models or operations that do not use a LLM.
- input_files: A list of input files involved in the operation
- output_files: A list of output files generated or modified by the operation
- attempted_models: A semicolon-delimited, chronological list of every model PDD attempted for this command, including the final successful model as well as any earlier models that failed and were abandoned (e.g., `vertex_ai/gemini-2.5-pro;deepseek/deepseek-chat`). For a normal single-model success this contains a single entry; empty only when no LLM attempt was recorded for the command.

This comprehensive output allows for detailed tracking of not only the cost and type of operations but also the specific files involved in each PDD command execution.
This comprehensive output allows for detailed tracking of not only the cost and type of operations but also the specific files involved in each PDD command execution, plus the full fallback chain when PDD switched models mid-run.

### Environment Variable

Expand Down
60 changes: 51 additions & 9 deletions architecture.json
Original file line number Diff line number Diff line change
Expand Up @@ -1138,8 +1138,8 @@
}
},
{
"reason": "Tracks LLM usage costs across operations.",
"description": "Records token usage and calculates costs. Aggregates costs across workflow steps.",
"reason": "Tracks LLM usage costs across operations and records the attempted-model fallback chain.",
"description": "Click decorator that records timestamp, model, command, cost, input/output files, and the full attempted-model chain (semicolon-delimited) to the cost CSV. Accepts both legacy tuple and enriched-dict result shapes from decorated commands.",
"dependencies": [],
"priority": 33,
"filename": "track_cost_python.prompt",
Expand All @@ -1155,15 +1155,15 @@
{
"name": "track_cost",
"signature": "(func)",
"returns": "None",
"returns": "Callable",
"sideEffects": [
"None"
]
},
{
"name": "extract_cost_and_model",
"signature": "(result: Any) -> Tuple[Any, str]",
"returns": "Tuple[Any, str]",
"signature": "(result: Any) -> Tuple[Any, str, List[str]]",
"returns": "Tuple[Any, str, List[str]]",
"sideEffects": [
"None"
]
Expand Down Expand Up @@ -2416,8 +2416,8 @@
},
{
"name": "cloud_fix_errors",
"signature": "(unit_test: str, code: str, prompt: str, error: str, error_file: str, strength: float, temperature: float, verbose: bool = False, time: float = DEFAULT_TIME, code_file_ext: str = '.py') -> Tuple[bool, bool, str, str, str, float, str]",
"returns": "Tuple[bool, bool, str, str, str, float, str]",
"signature": "(unit_test: str, code: str, prompt: str, error: str, error_file: str, strength: float, temperature: float, verbose: bool = False, time: float = DEFAULT_TIME, code_file_ext: str = '.py', protect_tests: bool = False, failure_classification: str | None = None) -> Tuple[bool, bool, str, str, str, float, str, List[str]]",
"returns": "Tuple[bool, bool, str, str, str, float, str, List[str]]",
"sideEffects": [
"None"
]
Expand All @@ -2440,8 +2440,8 @@
},
{
"name": "fix_error_loop",
"signature": "(unit_test_file: str, code_file: str, prompt_file: str, prompt: str, verification_program: str, strength: float, temperature: float, max_attempts: int, budget: float, error_log_file: str = 'error_log.txt', verbose: bool = False, time: float = DEFAULT_TIME, agentic_fallback: bool = True, use_cloud: bool = False)",
"returns": "None",
"signature": "(unit_test_file: str, code_file: str, prompt_file: str, prompt: str, verification_program: str, strength: float, temperature: float, max_attempts: int, budget: float, error_log_file: str = 'error_log.txt', verbose: bool = False, time: float = DEFAULT_TIME, agentic_fallback: bool = True, protect_tests: bool = False, use_cloud: bool = False, test_files: list[str] | None = None, failure_aware_retries: bool = True) -> Tuple[bool, str, str, int, float, str]",
"returns": "Tuple[bool, str, str, int, float, str]",
"sideEffects": [
"None"
]
Expand Down Expand Up @@ -8808,5 +8808,47 @@
"x": 24600,
"y": 800
}
},
{
"reason": "Provides unified LLM invocation across all PDD operations with provider abstraction, retries, and context window validation.",
"description": "Implements the llm_invoke entry point with model selection, structured output handling, cloud/local routing, attempted-model tracking, batch mode, cost accumulation, and Pydantic schema validation.",
"dependencies": [
"path_resolution_python.prompt",
"server/token_counter_python.prompt"
],
"priority": 236,
"filename": "llm_invoke_python.prompt",
"filepath": "pdd/llm_invoke.py",
"tags": [
"module",
"python"
],
"interface": {
"type": "module",
"module": {
"functions": [
{
"name": "llm_invoke",
"signature": "(prompt, input_json, strength, temperature, verbose, output_pydantic, output_schema, time, use_batch_mode, messages, language, use_cloud)",
"returns": "Dict[str, Any]"
},
{
"name": "setup_file_logging",
"signature": "(log_file_path=None)",
"returns": "None"
},
{
"name": "set_verbose_logging",
"signature": "(verbose=False)",
"returns": "None"
},
{
"name": "set_quiet_logging",
"signature": "()",
"returns": "None"
}
]
}
}
}
]
5 changes: 4 additions & 1 deletion context/cli_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ def example_cost_tracking_setup():
- cost: Estimated cost in USD (e.g., 0.05 for 5 cents)
- input_files: List of input files involved
- output_files: List of output files generated/modified
- attempted_models: Semicolon-delimited model attempt chain. A
single-model success contains the successful model once; this field
is empty only when no LLM attempt was recorded.

Args:
None
Expand Down Expand Up @@ -355,4 +358,4 @@ def main():


if __name__ == '__main__':
main()
main()
3 changes: 3 additions & 0 deletions context/core/cli_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ def example_cost_tracking_setup():
- cost: Estimated cost in USD (e.g., 0.05 for 5 cents)
- input_files: List of input files involved
- output_files: List of output files generated/modified
- attempted_models: Semicolon-delimited model attempt chain. A
single-model success contains the successful model once; this field
is empty only when no LLM attempt was recorded.

Args:
None
Expand Down
Loading
Loading