From 817ee231b165bfaff111c99c5b120476177aa682 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Fri, 8 May 2026 21:10:12 +0700 Subject: [PATCH 01/35] Harden ACP parity behavior --- crates/ai/src/acp/bridge_init.rs | 192 +++++++++-- docs/goals/athas-acp-zed-parity/goal.md | 82 +++++ .../notes/T001-acp-zed-map.md | 224 +++++++++++++ docs/goals/athas-acp-zed-parity/state.yaml | 304 ++++++++++++++++++ .../ai/services/acp-stream-handler.ts | 10 + .../ai/tests/acp-cancellation.test.ts | 157 +++++++++ 6 files changed, 935 insertions(+), 34 deletions(-) create mode 100644 docs/goals/athas-acp-zed-parity/goal.md create mode 100644 docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md create mode 100644 docs/goals/athas-acp-zed-parity/state.yaml create mode 100644 src/features/ai/tests/acp-cancellation.test.ts diff --git a/crates/ai/src/acp/bridge_init.rs b/crates/ai/src/acp/bridge_init.rs index 921ce4f43..00f4c1dda 100644 --- a/crates/ai/src/acp/bridge_init.rs +++ b/crates/ai/src/acp/bridge_init.rs @@ -326,7 +326,7 @@ async fn bootstrap_session( load_session(connection.clone(), cwd.clone(), existing_session_id.clone()).await; if let Ok(Err(err)) = &load_result - && matches!(err.code, acp::ErrorCode::AuthRequired) + && is_auth_required(err) { if let Err(e) = authenticate(connection.clone()).await { ctx.io_handle.abort(); @@ -342,16 +342,13 @@ async fn bootstrap_session( Ok(Ok(load_response)) => { log::info!("ACP session loaded: {}", existing_session_id); client.set_session_id(existing_session_id.clone()).await; - return Ok(SessionBootstrap { - session_id: Some(acp::SessionId::new(existing_session_id)), - initial_modes: load_response.modes.map(map_mode_state), - initial_config_options: load_response.config_options.map(&ctx.map_config_options), - }); + return Ok(bootstrap_from_loaded_session( + existing_session_id, + load_response, + &ctx.map_config_options, + )); } - Ok(Err(err)) - if matches!(err.code, acp::ErrorCode::MethodNotFound) - && ctx.supports_session_resume => - { + Ok(Err(err)) if should_try_resume_after_load(&err, ctx.supports_session_resume) => { log::warn!( "ACP session/load unavailable ({}), trying session/resume", err @@ -360,7 +357,7 @@ async fn bootstrap_session( resume_session(connection.clone(), cwd.clone(), existing_session_id.clone()).await; if let Ok(Err(err)) = &resume_result - && matches!(err.code, acp::ErrorCode::AuthRequired) + && is_auth_required(err) { if let Err(e) = authenticate(connection.clone()).await { ctx.io_handle.abort(); @@ -377,20 +374,13 @@ async fn bootstrap_session( Ok(Ok(resume_response)) => { log::info!("ACP session resumed: {}", existing_session_id); client.set_session_id(existing_session_id.clone()).await; - return Ok(SessionBootstrap { - session_id: Some(acp::SessionId::new(existing_session_id)), - initial_modes: resume_response.modes.map(map_mode_state), - initial_config_options: resume_response - .config_options - .map(&ctx.map_config_options), - }); + return Ok(bootstrap_from_resumed_session( + existing_session_id, + resume_response, + &ctx.map_config_options, + )); } - Ok(Err(err)) - if matches!( - err.code, - acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound - ) => - { + Ok(Err(err)) if should_fallback_to_new_after_resume(&err) => { log::warn!( "ACP session/resume unavailable or session missing ({}), falling back to \ session/new", @@ -415,12 +405,7 @@ async fn bootstrap_session( } } } - Ok(Err(err)) - if matches!( - err.code, - acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound - ) => - { + Ok(Err(err)) if should_fallback_to_new_after_load(&err) => { log::warn!( "ACP session/load unavailable or session missing ({}), falling back to session/new", err @@ -447,7 +432,7 @@ async fn bootstrap_session( let mut session_result = create_session(connection.clone(), cwd.clone()).await; if let Ok(Err(err)) = &session_result - && matches!(err.code, acp::ErrorCode::AuthRequired) + && is_auth_required(err) { if let Err(e) = authenticate(connection.clone()).await { ctx.io_handle.abort(); @@ -480,11 +465,64 @@ async fn bootstrap_session( log::info!("ACP session created: {}", session.session_id); client.set_session_id(session.session_id.to_string()).await; - Ok(SessionBootstrap { + Ok(bootstrap_from_new_session(session, &ctx.map_config_options)) +} + +fn is_auth_required(err: &acp::Error) -> bool { + matches!(err.code, acp::ErrorCode::AuthRequired) +} + +fn should_try_resume_after_load(err: &acp::Error, supports_session_resume: bool) -> bool { + supports_session_resume && matches!(err.code, acp::ErrorCode::MethodNotFound) +} + +fn should_fallback_to_new_after_load(err: &acp::Error) -> bool { + matches!( + err.code, + acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound + ) +} + +fn should_fallback_to_new_after_resume(err: &acp::Error) -> bool { + matches!( + err.code, + acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound + ) +} + +fn bootstrap_from_loaded_session( + existing_session_id: String, + load_response: acp::LoadSessionResponse, + map_config_options: &impl Fn(Vec) -> Vec, +) -> SessionBootstrap { + SessionBootstrap { + session_id: Some(acp::SessionId::new(existing_session_id)), + initial_modes: load_response.modes.map(map_mode_state), + initial_config_options: load_response.config_options.map(map_config_options), + } +} + +fn bootstrap_from_resumed_session( + existing_session_id: String, + resume_response: acp::ResumeSessionResponse, + map_config_options: &impl Fn(Vec) -> Vec, +) -> SessionBootstrap { + SessionBootstrap { + session_id: Some(acp::SessionId::new(existing_session_id)), + initial_modes: resume_response.modes.map(map_mode_state), + initial_config_options: resume_response.config_options.map(map_config_options), + } +} + +fn bootstrap_from_new_session( + session: acp::NewSessionResponse, + map_config_options: &impl Fn(Vec) -> Vec, +) -> SessionBootstrap { + SessionBootstrap { session_id: Some(session.session_id), initial_modes: session.modes.map(map_mode_state), - initial_config_options: session.config_options.map(ctx.map_config_options), - }) + initial_config_options: session.config_options.map(map_config_options), + } } async fn create_session( @@ -570,3 +608,89 @@ fn emit_initial_session_state( log::warn!("Failed to emit initial session config options: {}", e); } } + +#[cfg(test)] +mod tests { + use super::*; + + fn no_config_options(_: Vec) -> Vec { + Vec::new() + } + + #[test] + fn loaded_session_bootstrap_preserves_requested_session_id() { + let bootstrap = bootstrap_from_loaded_session( + "existing-session".to_string(), + acp::LoadSessionResponse::new(), + &no_config_options, + ); + + assert_eq!( + bootstrap.session_id.map(|id| id.to_string()), + Some("existing-session".to_string()) + ); + assert!(bootstrap.initial_modes.is_none()); + assert!(bootstrap.initial_config_options.is_none()); + } + + #[test] + fn method_not_found_load_uses_resume_only_when_supported() { + let err = acp::Error::method_not_found(); + + assert!(should_try_resume_after_load(&err, true)); + assert!(!should_try_resume_after_load(&err, false)); + } + + #[test] + fn missing_or_unsupported_load_falls_back_to_new_session() { + assert!(should_fallback_to_new_after_load( + &acp::Error::method_not_found() + )); + assert!(should_fallback_to_new_after_load( + &acp::Error::resource_not_found(None) + )); + assert!(!should_fallback_to_new_after_load( + &acp::Error::invalid_params() + )); + } + + #[test] + fn missing_or_unsupported_resume_falls_back_to_new_session() { + assert!(should_fallback_to_new_after_resume( + &acp::Error::method_not_found() + )); + assert!(should_fallback_to_new_after_resume( + &acp::Error::resource_not_found(None) + )); + assert!(!should_fallback_to_new_after_resume( + &acp::Error::internal_error() + )); + } + + #[test] + fn auth_required_errors_are_retriable_before_session_fallbacks() { + assert!(is_auth_required(&acp::Error::auth_required())); + assert!(!is_auth_required(&acp::Error::method_not_found())); + assert!(!should_fallback_to_new_after_load( + &acp::Error::auth_required() + )); + assert!(!should_fallback_to_new_after_resume( + &acp::Error::auth_required() + )); + } + + #[test] + fn new_session_bootstrap_uses_agent_created_session_id() { + let bootstrap = bootstrap_from_new_session( + acp::NewSessionResponse::new("created-session"), + &no_config_options, + ); + + assert_eq!( + bootstrap.session_id.map(|id| id.to_string()), + Some("created-session".to_string()) + ); + assert!(bootstrap.initial_modes.is_none()); + assert!(bootstrap.initial_config_options.is_none()); + } +} diff --git a/docs/goals/athas-acp-zed-parity/goal.md b/docs/goals/athas-acp-zed-parity/goal.md new file mode 100644 index 000000000..5867cb24d --- /dev/null +++ b/docs/goals/athas-acp-zed-parity/goal.md @@ -0,0 +1,82 @@ +# Athas ACP Zed Parity + +## Objective + +Harden Athas Agent Client Protocol behavior against Zed as the local reference implementation, then complete successive safe verified patches until the first behavioral parity tranche is proven. + +## Original Request + +Make sure Athas ACP is on par with Zed by studying the local Zed repo at `/Users/sw/Code/zed`, hardening Athas ACP, and adding providers or harnesses only where they are needed to patch weak angles. + +## Intake Summary + +- Input shape: `specific` +- Audience: Athas users and maintainers who need reliable ACP behavior. +- Authority: `approved` +- Proof type: `test` +- Completion proof: Zed-referenced ACP behavior gaps are mapped, the highest-leverage Athas ACP gaps are patched, and verification proves the selected core flows now behave correctly in Athas. +- Likely misfire: Producing a comparison document or broad provider wishlist without making and verifying concrete Athas ACP improvements. +- Blind spots considered: Whether "on par with Zed" means protocol correctness, provider breadth, UX polish, or harness maturity; whether provider expansion should be delayed until core behavior is hardened; whether proof should be behavioral rather than just architectural. +- Existing plan facts: Use `/Users/sw/Code/zed` as the source to study and understand; Athas already has a good foundation; patch the bad angles; avoid broad provider expansion in the first tranche unless parity-critical evidence requires it. + +## Goal Kind + +`specific` + +## Current Tranche + +Discover Zed's ACP behavior and Athas's current ACP implementation, identify the highest-leverage behavioral gaps, choose small safe implementation slices, patch Athas, verify with behavioral parity evidence, audit each slice, and continue until the first ACP hardening tranche is complete. + +## Non-Negotiable Constraints + +- Treat `/Users/sw/Code/zed` as the local reference implementation for ACP behavior. +- Cross-reference Zed before changing Athas ACP behavior. +- Preserve Athas's existing foundation; do not rewrite or bulldoze working ACP architecture without evidence. +- Do not broaden into provider expansion by default. +- Add or modify provider support only when Scout/Judge evidence shows it is required for a parity-critical ACP flow. +- Prefer behavioral proof over architecture notes. +- Keep implementation slices small, reviewable, and locally verifiable. +- Follow Athas repo rules: use Bun for repo scripts, update relevant specs when code changes, and run relevant checks before handoff. + +## Stop Rule + +Stop only when a final audit proves the full original outcome is complete. + +Do not stop after planning, discovery, or Judge selection if a safe Worker task can be activated. + +Do not stop after a single verified Worker slice when the broader ACP parity tranche still has safe local follow-up slices. After each slice audit, advance the board to the next highest-leverage safe Worker task and continue. + +Do not stop because a slice needs owner input, credentials, production access, destructive operations, or policy decisions. Mark that exact slice blocked with a receipt, create the smallest safe follow-up or workaround task, and continue all local, non-destructive work that can still move the goal toward the full outcome. + +## Canonical Board + +Machine truth lives at: + +`docs/goals/athas-acp-zed-parity/state.yaml` + +If this charter and `state.yaml` disagree, `state.yaml` wins for task status, active task, receipts, verification freshness, and completion truth. + +## Run Command + +```text +/goal Follow docs/goals/athas-acp-zed-parity/goal.md. +``` + +## PM Loop + +On every `/goal` continuation: + +1. Read this charter. +2. Read `state.yaml`. +3. Run the bundled GoalBuddy update checker when available and mention a newer version without blocking. +4. Re-check the intake: original request, input shape, authority, proof, blind spots, existing plan facts, and likely misfire. +5. Work only on the active board task. +6. Assign Scout, Judge, Worker, or PM according to the task. +7. Write a compact task receipt. +8. Update the board. +9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked. +10. If a problem, suggestion, or follow-up should become a repo artifact, create an approved issue/PR or ask the operator whether to create one. +11. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete. +12. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`. + +Issue and PR handoffs are supporting artifacts. `state.yaml` remains authoritative, and every external artifact decision must be recorded in a task receipt. diff --git a/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md b/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md new file mode 100644 index 000000000..c0916930c --- /dev/null +++ b/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md @@ -0,0 +1,224 @@ +# T001 Scout Receipt: ACP Zed Map + +## Result + +Mapped. + +## Summary + +Athas has a functional ACP client bridge with process lifecycle, initialize/auth/session bootstrap, prompt/cancel, session list/load/resume, file and terminal client capabilities, permission UI, events, and frontend stream handling. Zed's reference is materially deeper in behavioral turn semantics and test coverage: it models ACP as an `AcpThread`, guards prompt turns, cancels/replaces in-flight turns, marks pending tools cancelled, buffers terminal output before terminal creation, tracks usage/cost, and has extensive authorization/cancellation tests. Highest-leverage first tranche should harden Athas core protocol behavior, not broaden providers. + +## Evidence Paths + +Board: + +- `/Users/sw/Code/athas/docs/goals/athas-acp-zed-parity/goal.md` +- `/Users/sw/Code/athas/docs/goals/athas-acp-zed-parity/state.yaml` + +Athas core: + +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_prompt.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/client.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/types.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/terminal_state.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/config.rs` +- `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs` + +Athas frontend: + +- `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts` +- `/Users/sw/Code/athas/src/features/ai/components/chat/ai-chat.tsx` +- `/Users/sw/Code/athas/src/features/ai/types/acp.ts` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-activity-groups.test.ts` +- `/Users/sw/Code/athas/src/features/ai/tests/session-config-option-classifier.test.ts` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-session-info.test.ts` + +Zed reference: + +- `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs` +- `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` +- `/Users/sw/Code/zed/crates/agent/src/thread.rs` +- `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs` +- `/Users/sw/Code/zed/crates/agent/src/tests/test_tools.rs` +- `/Users/sw/Code/zed/docs/src/ai/external-agents.md` +- `/Users/sw/Code/zed/crates/project/src/agent_registry_store.rs` +- `/Users/sw/Code/zed/crates/migrator/src/migrations/m_2026_02_25/settings.rs` + +## Athas ACP Implementation Map + +Lifecycle: + +- Tauri commands expose start/stop/send/status/respond/mode/config/list/cancel in `src-tauri/src/commands/ai/acp.rs`. +- `AcpAgentBridge` dispatches commands to a dedicated worker thread and emits status in `crates/ai/src/acp/bridge.rs`. +- Worker stops existing agent before initialize and tracks process/connection/session/client in `crates/ai/src/acp/bridge.rs`. +- Process death emits error/status and clears state in `crates/ai/src/acp/bridge.rs`. +- Stop optionally calls `session/close` when advertised, aborts IO, stops child tree, and clears state in `crates/ai/src/acp/bridge.rs`. + +Session bootstrap: + +- Initialize creates `ClientSideConnection`, sends `InitializeRequest` with fs read/write, terminal, and Athas ext-method metadata in `crates/ai/src/acp/bridge_init.rs`. +- `AuthRequired` retry exists for session load/resume/new in `crates/ai/src/acp/bridge_init.rs`. +- Requested session tries `session/load`, then `session/resume` only when resume capability exists, then `session/new` fallback on `MethodNotFound`/`ResourceNotFound` in `crates/ai/src/acp/bridge_init.rs`. +- Initial modes/config options are emitted as frontend events in `crates/ai/src/acp/bridge_init.rs`. + +Streaming and events: + +- Client maps `UserMessageChunk`, `AgentMessageChunk`, `AgentThoughtChunk`, `ToolCall`, `ToolCallUpdate`, modes, config, session info, commands, and plan to `AcpEvent` in `crates/ai/src/acp/client.rs`. +- Frontend `AcpStreamHandler` listens to `acp-event` and routes chunks/tools/permission/status/modes/config/plan/prompt_complete/ui_action in `src/features/ai/services/acp-stream-handler.ts`. +- Chat component keeps global ACP state in sync and appends visible plan/error/status events in `src/features/ai/components/chat/ai-chat.tsx`. + +Cancellation: + +- Backend sends ACP cancel notification in `crates/ai/src/acp/bridge.rs`. +- Frontend force-stops active handler before invoking cancel, then completes UI locally in `src/features/ai/services/acp-stream-handler.ts`. +- Chat stop cancels prompt and responds to queued permissions as cancelled in `src/features/ai/components/chat/ai-chat.tsx`. + +Tools and client capabilities: + +- Permission request emits options and waits up to 300s for response in `crates/ai/src/acp/client.rs`. +- `read_text_file`/`write_text_file` are direct filesystem operations with simple error mapping in `crates/ai/src/acp/client.rs`. +- Terminal create/output/release/wait/kill are implemented over Athas terminal manager in `crates/ai/src/acp/client.rs`. +- Terminal output buffer/truncation is isolated in `crates/ai/src/acp/terminal_state.rs`; tests exist only for buffer truncation, not full ACP ordering. + +Provider handoff: + +- Agent registry is dynamic/manifest-driven; installed detection and managed wrappers live in `crates/ai/src/acp/config.rs`. +- Codex has special direct `codex-acp` or `npx @zed-industries/codex-acp` detection in `crates/ai/src/acp/config.rs`. +- Athas has catalog install/uninstall wrappers in `src-tauri/src/commands/ai/acp.rs`. + +Test health: + +- Athas visible ACP-related tests are mostly frontend utility tests and `terminal_state` unit tests. +- Scout found no Rust ACP bridge/client behavioral tests for prompt/cancel/session/tool ordering. + +## Zed ACP Implementation Map + +Lifecycle and abstraction: + +- Zed centers ACP behind `AgentConnection`: new/load/resume/close/session_history/auth/prompt/retry/cancel/modes/config/list/model selector in `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs`. +- `AcpThread` handles session updates into thread events and UI state in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. + +Turn model: + +- `AcpThread::send` optimistically inserts user message, checkpoints git state, and calls `connection.prompt` with a `UserMessageId` in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- `run_turn` cancels any previous turn, tracks `turn_id`, only clears same turn, flushes streaming text, handles `MaxTokens`/`Cancelled`/`Refusal` in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- Cancelled responses mark pending tools cancelled and skip completed-plan snapshot in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. + +File and terminal tools: + +- Zed file reads use project buffers, resource-not-found/invalid-params handling, shared snapshots, and agent location in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- Zed writes diff against buffer snapshot, update action log, format on save, and save buffer in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- Zed terminal creation shells command with env/PAGER handling and terminal entities in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. + +Authorization: + +- Built-in tool permissions support allow/deny once/always, path/url/terminal patterns, and pipeline pattern dropdowns in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. +- Authorization loop watches settings so always allow/deny resolves sibling pending prompts in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. +- Third-party/MCP authorization is separated in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. + +Provider breadth: + +- Docs list Gemini CLI, Claude Agent, Codex, GitHub Copilot, registry/custom agents, and MCP forwarding in `/Users/sw/Code/zed/docs/src/ai/external-agents.md`. +- Registry migration maps `gemini`, `claude-acp`, and `codex-acp` in `/Users/sw/Code/zed/crates/migrator/src/migrations/m_2026_02_25/settings.rs`. + +Test health: + +- Zed has direct ACP tests for terminal buffering, cancellation, pending tools, usage/cost, and authorization in `crates/acp_thread/src/acp_thread.rs` and `crates/agent/src/tests/mod.rs`. + +## Behavioral Parity Matrix + +- Lifecycle: Athas has process spawn/init/status/stop/close. Zed has stronger trait separation and turn/thread state. Gap is not an architecture rewrite; harden Athas state transitions around stop/crash/cancel. +- Sessions: Athas supports new/load/resume/list/close capability checks. Zed exposes load/resume/close/list/history through trait plus UI thread. Gap: Athas lacks behavioral tests for load-to-resume fallback and `ResourceNotFound`/`MethodNotFound`/`AuthRequired` branches. +- Streaming: Athas forwards chunks/tools/plans/modes/config/session info. Zed additionally flushes buffered streaming text at turn completion and suppresses duplicate echoed user chunks. Gap: Athas should test event ordering and duplicate user echo behavior. +- Cancellation: Athas sends cancel but frontend marks complete immediately. Zed cancels previous in-flight turn before new send, returns cancelled stop, marks pending tools cancelled, and tests multiple terminal/subagent cases. This is the top protocol behavior gap. +- Tools: Athas maps tool start/update/complete and terminal/file callbacks. Zed has richer buffer-aware file operations, terminal output ordering/buffering, and permission-state propagation. First tranche should target terminal/tool lifecycle semantics, not the full Zed buffer model. +- Errors: Athas maps many backend failures to string errors and synthetic "Tool call failed"; unknown stop reasons become `EndTurn`. Zed distinguishes `MaxTokens`, `Refusal`, `Cancelled`, `ResourceNotFound`, `InvalidParams`, and error events. Gap: preserve stop/error semantics rather than collapse. +- Provider handoff: Athas has dynamic catalog plus Codex adapter fallback. Zed has registry/provider breadth and MCP forwarding. This is provider breadth, not first-tranche protocol hardening, unless a core flow is blocked by a missing adapter. +- Tests: Athas lacks ACP bridge/client behavioral harness; Zed's tests are the main reference. First tranche should add focused Rust or TypeScript tests around one selected behavior. + +## Ranked Gap Candidates + +1. Cancellation semantics and pending tool finalization + - Type: protocol behavior + - Risk: medium + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`, `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs`, `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts`, `/Users/sw/Code/athas/src/features/ai/components/chat/ai-chat.tsx` + - Likely files: `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/types/acp.ts` + - Verification options: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`, focused fake ACP adapter/unit harness if available or added. + +2. Missing ACP behavioral test harness for session load/resume/auth fallbacks + - Type: protocol behavior + - Risk: low-medium + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` + - Likely files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_commands.rs` only if needed for test seams. + - Verification options: `cargo test -p athas-ai acp::bridge_init`, `bun check:rust`. + +3. Tool update ordering and unknown/succeeded-after-cancel handling + - Type: protocol behavior + - Risk: medium + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/client.rs`, `/Users/sw/Code/athas/src/features/ai/lib/tool-call-state.ts` + - Likely files: `crates/ai/src/acp/client.rs`, `src/features/ai/lib/tool-call-state.ts`, `src/features/ai/services/acp-stream-handler.ts` + - Verification options: frontend unit test for tool-call-state; Rust unit test for client event mapping if fake `AppHandle` is practical. + +4. UsageUpdate/token-cost events unsupported + - Type: protocol behavior + - Risk: low + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` + - Athas evidence: no `UsageUpdate`/`TokenUsage` hits under `/Users/sw/Code/athas/crates/ai` or `src/features/ai` except unrelated provider usage. + - Likely files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts` + - Verification options: typecheck plus unit event mapping test. + +5. Provider breadth and registry/MCP forwarding + - Type: provider breadth + - Risk: high for first tranche + - Zed evidence: `/Users/sw/Code/zed/docs/src/ai/external-agents.md` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/config.rs`, `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs` + - Recommendation: defer unless Judge decides a missing provider blocks cancellation/session/tool protocol proof. + +## Candidate Worker Slices + +W1: + +- Objective: Add focused cancellation behavior proof and patch Athas so cancelled ACP turns do not leave stale running/tool state. +- Candidate files: `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/types/acp.ts`, focused tests under `crates/ai/src/acp` or `src/features/ai/tests`. +- Candidate verification: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`. +- Stop if: requires provider expansion or no fake/stub path can prove behavior locally. + +W2: + +- Objective: Add `bridge_init` session bootstrap tests for load/resume/new `AuthRequired`/`MethodNotFound`/`ResourceNotFound` decisions. +- Candidate files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_commands.rs` only if needed for test seams. +- Candidate verification: `cargo test -p athas-ai acp::bridge_init`, `bun check:rust`. +- Stop if: needs live external ACP adapters instead of local stubs. + +W3: + +- Objective: Map ACP `UsageUpdate` into Athas events/types and add event-routing test, without UI polish beyond state availability. +- Candidate files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/*`. +- Candidate verification: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`. +- Stop if: requires broad analytics/cost UI design. + +## Commands Run By Scout + +- `rg ACP/acp` across Athas board/src/crates/src-tauri. +- `sed`/`nl` reads of Athas ACP bridge/init/prompt/client/types/config/frontend files. +- `rg ACP/acp` across `/Users/sw/Code/zed`. +- `sed`/`nl` reads of Zed `acp_thread`, `connection`, agent thread, docs, tests. +- `jq '.scripts' package.json`. +- `git status --short`. + +## Health Signals + +- Athas repo had untracked `docs/` in `git status`; Scout did not edit or stage anything. +- No dependency installs, builds, tests, or destructive commands were run. +- Athas validation scripts available: `bun typecheck`, `bun check`, `bun check:rust`, `bunx vp test run`. + +## Ambiguity Requiring Judge + +- Whether first Worker should patch behavior immediately or first add a fake ACP harness. Scout recommends W1 only if Judge permits adding the minimal harness/test surface needed to prove cancellation locally. +- Whether `UsageUpdate` belongs in the first tranche. It is a clear Zed parity gap but lower leverage than cancellation/session/tool correctness. +- Whether provider breadth matters for this tranche. Evidence says defer broad provider expansion; only Codex adapter parity appears locally relevant today. diff --git a/docs/goals/athas-acp-zed-parity/state.yaml b/docs/goals/athas-acp-zed-parity/state.yaml new file mode 100644 index 000000000..d8acb3fcd --- /dev/null +++ b/docs/goals/athas-acp-zed-parity/state.yaml @@ -0,0 +1,304 @@ +# GoalBuddy v2 state.yaml +# Board truth lives here. goal.md is the editable charter; notes/ holds long receipts only. + +version: 2 + +goal: + title: "Athas ACP Zed Parity" + slug: "athas-acp-zed-parity" + kind: specific + tranche: "Zed-referenced ACP behavioral hardening: discover, patch, verify, audit, and continue through safe slices until the first parity tranche is complete." + status: done + intake: + original_request: "Make Athas ACP on par with Zed by studying /Users/sw/Code/zed, hardening ACP, and adding providers or harnesses where needed." + interpreted_outcome: "Athas ACP behavior is hardened against Zed's local implementation and verified with behavioral parity evidence for the selected core flows." + input_shape: specific + audience: "Athas users and maintainers" + authority: approved + proof_type: test + completion_proof: "Zed-referenced ACP gaps are mapped, selected Athas patches are implemented, relevant tests/checks pass, and a final audit confirms behavioral parity for the first tranche." + likely_misfire: "Stopping at comparison notes, provider brainstorming, or broad architecture commentary without verified Athas ACP behavior changes." + blind_spots_considered: + - "Parity could mean protocol correctness, provider breadth, UX behavior, or harness maturity." + - "Broad provider expansion can distract from hardening core ACP behavior." + - "Athas already has a foundation; the goal is targeted patching, not a rewrite." + - "Proof must be behavioral and verifiable, not just source-backed notes." + existing_plan_facts: + - "Use /Users/sw/Code/zed as the reference source." + - "Cross-reference Zed before changing Athas ACP." + - "Athas already has a good ACP foundation." + - "Patch weak angles in Athas rather than replacing everything." + - "Do not pursue broad provider expansion in the first tranche unless parity-critical evidence requires it." + +rules: + pm_owns_state: true + one_active_task: true + max_write_workers: 1 + no_implementation_without_worker_or_pm_task: true + no_completion_without_judge_or_pm_audit: true + planning_is_not_completion: true + queued_required_worker_blocks_completion: true + continuous_until_full_outcome: true + missing_input_or_credentials_do_not_stop_goal: true + preserve_and_validate_existing_plan: true + intake_misfire_must_be_audited: true + +agents: + scout: installed + worker: installed + judge: installed + +active_task: null + +tasks: + - id: T001 + type: scout + assignee: Scout + status: done + reasoning_hint: medium + objective: "Map Athas ACP against Zed ACP and identify the highest-leverage behavioral gaps for the first hardening tranche." + inputs: + - "/Users/sw/Code/athas" + - "/Users/sw/Code/zed" + - "Athas ACP-related crates, commands, frontend integration, tests, and docs discovered by search." + - "Zed ACP-related implementation, provider integration, lifecycle behavior, tests, and docs discovered by search." + constraints: + - "Read-only." + - "Do not edit implementation files." + - "Treat /Users/sw/Code/zed as the reference source." + - "Use concrete file paths, symbols, tests, and command evidence." + - "Separate protocol behavior gaps from provider breadth gaps." + - "Prefer targeted hardening candidates over rewrite proposals." + expected_output: + - "Athas ACP implementation map with file-path evidence." + - "Zed ACP implementation map with file-path evidence." + - "Behavioral parity matrix covering lifecycle, sessions, streaming, cancellation, tools, errors, and provider handoff where applicable." + - "Ranked Athas gap candidates with risk, likely files, and verification options." + - "Candidate Worker slices that avoid broad provider expansion unless required." + receipt: + result: done + note: "notes/T001-acp-zed-map.md" + summary: "Mapped Athas ACP against Zed. Top gap is cancellation semantics and pending tool finalization; session bootstrap tests, tool ordering, and usage events follow. Provider breadth should stay deferred unless parity-critical." + evidence: + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" + - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts" + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/acp_thread/src/connection.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" + candidate_worker_slices: + - "W1: cancellation behavior proof and stale running/tool state patch." + - "W2: bridge_init session bootstrap fallback tests." + - "W3: UsageUpdate event mapping and routing test." + + - id: T002 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Choose the first safe Athas ACP hardening slice from Scout evidence." + inputs: + - "T001 receipt or note." + - "Current dirty diff." + - "Athas project validation commands." + constraints: + - "Read-only." + - "Do not implement." + - "Require Zed evidence for any claimed parity gap." + - "Prefer the smallest slice with strong verification." + - "Do not choose broad provider expansion unless it is required for a parity-critical behavior." + expected_output: + - "Decision." + - "Why this slice is first." + - "Exact Worker objective." + - "allowed_files." + - "verify." + - "stop_if." + - "Deferred or blocked candidates." + receipt: + result: done + decision: "Select a narrowed W1 cancellation-state slice first: harden Athas frontend ACP cancellation so an active cancelled prompt finalizes visible running tool state instead of leaving stale running tool calls behind. Do not attempt the full backend prompt-turn rewrite yet." + why_this_slice_is_first: + - "Smallest parity-critical subset of Scout's top-ranked gap." + - "Zed cancels running turns and marks pending or in-progress tools cancelled; Athas currently force-stops frontend handling without visible tool finalization." + - "Implementable without provider expansion, live ACP adapters, Rust process lifecycle changes, or broad architecture work." + - "Verifiable with focused frontend tests plus typecheck." + evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs" + - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts" + - "/Users/sw/Code/athas/src/features/ai/lib/tool-call-state.ts" + - "/Users/sw/Code/athas/src/features/ai/types/acp.ts" + next_allowed_task: T003 + deferred_or_blocked_candidates: + - "Deferred: full backend prompt-turn lifecycle hardening." + - "Deferred: bridge_init session fallback tests." + - "Deferred: UsageUpdate/token-cost event mapping." + - "Blocked for this tranche unless Judge reopens: broad provider expansion, registry/MCP forwarding, or adding providers solely for breadth." + full_outcome_complete: false + + - id: T003 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add a focused frontend cancellation regression test and patch Athas ACP stream handling so cancelling an active ACP prompt finalizes any active visible tool calls and activity rows as cancelled/failed, prevents late events from resurrecting the cancelled turn, and still sends cancel_acp_prompt to the backend." + allowed_files: + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/lib/tool-call-state.ts" + - "src/features/ai/lib/acp-event-timeline.ts" + - "src/features/ai/components/chat/ai-chat.tsx" + - "src/features/ai/components/messages/tool-call-display.tsx" + - "src/features/ai/types/acp.ts" + - "src/features/ai/tests/acp-cancellation.test.ts" + verify: + - "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" + - "bun typecheck" + - "If the test runner cannot target both files reliably, run: bunx vp test run src/features/ai/tests" + stop_if: + - "The fix requires editing crates/ai/src/acp/* or src-tauri/*; escalate back to Judge for a backend cancellation slice." + - "The fix requires a live ACP provider, external adapter, or broad fake ACP harness instead of local frontend mocks." + - "The fix requires provider expansion, registry/MCP work, new dependencies, or test/build config changes." + - "The Worker cannot produce a deterministic regression test showing active tool state is finalized on cancel." + - "Late-event handling is ambiguous enough that implementing it would silently discard valid future-turn events." + - "Verification fails twice after scoped fixes." + receipt: + result: done + changed_files: + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-cancellation.test.ts" + commands: + - cmd: "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" + status: pass + - cmd: "bun typecheck" + status: pass + summary: "ACP cancellation now finalizes tracked active tools with a Cancelled error before cleanup, both for frontend force-stop cancellation and protocol prompt_complete(cancelled). Regression tests prove active tool finalization, backend cancel invocation, and late-event suppression after cancellation." + + - id: T004 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Audit the first implemented ACP slice against Zed-referenced behavioral parity and decide whether to continue with another slice." + inputs: + - "T001 receipt or note." + - "T002 receipt." + - "T003 receipt." + - "Current dirty diff." + - "Last verification." + constraints: + - "Read-only." + - "Reject completion if the slice is only documented but not verified." + - "Reject completion if required ACP hardening work remains safely actionable." + - "If not complete, define the next smallest safe Scout, Judge, or Worker task." + expected_output: + - "slice_complete: true | false." + - "full_outcome_complete: true | false." + - "Behavioral parity evidence." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + decision: "Approve the first implemented ACP cancellation slice as complete, but do not complete the tranche. Continue with the next safe ACP hardening slice." + slice_complete: true + full_outcome_complete: false + behavioral_parity_evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs marks pending/in-progress tools canceled when cancelled." + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs includes cancellation tests." + - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts now finalizes active tools before cancelled completion and force-stop cleanup." + - "/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts verifies tool finalization, backend cancel invocation, and late-event suppression." + missing_evidence: + - "No Athas Rust/backend behavioral test yet proves session load/resume/new fallbacks or AuthRequired retry branches against the Zed connection/session model." + - "No Athas usage/token-cost ACP event mapping exists yet." + next_allowed_task: T005 + tranche_complete: false + + - id: T005 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add focused Rust behavioral tests for ACP session bootstrap fallback parity: requested session load success, session/load MethodNotFound falling through to session/resume when resume is supported, ResourceNotFound/MethodNotFound falling through to session/new, and AuthRequired retry behavior where a deterministic local test seam is practical." + allowed_files: + - "crates/ai/src/acp/bridge_init.rs" + verify: + - "cargo test -p athas-ai acp::bridge_init" + - "bun check:rust" + stop_if: + - "A deterministic local test cannot be added inside crates/ai/src/acp/bridge_init.rs without introducing a live ACP provider, external adapter, new dependency, or broad fake harness." + - "The tests require editing src-tauri, process lifecycle code, build config, Cargo.toml, or files outside allowed_files." + - "The implementation pressure turns into runtime behavior changes beyond small test seams needed inside bridge_init.rs." + - "The Worker discovers that the current bridge_init structure cannot be tested safely without a prior Scout/Judge task to design a narrower seam." + - "Verification fails twice after scoped fixes." + receipt: + result: done + changed_files: + - "crates/ai/src/acp/bridge_init.rs" + commands: + - cmd: "cargo test -p athas-ai acp::bridge_init" + status: pass + - cmd: "cargo check -p athas-ai" + status: pass + - cmd: "cargo fmt -p athas-ai --check" + status: pass + blocked_verification: + - cmd: "bun check:rust" + reason: "Repo-wide rustfmt check reports unrelated pre-existing diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs, outside this task's allowed_files." + summary: "Added deterministic in-file tests and small decision seams for ACP session bootstrap fallback parity: load success preserves requested session id, MethodNotFound load tries resume only when supported, MethodNotFound/ResourceNotFound load/resume fall back to new session, AuthRequired is classified for retry before fallbacks, and new-session bootstrap preserves the agent-created id." + + - id: T999 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Final audit for the first Athas ACP Zed parity tranche." + inputs: + - "All done task receipts." + - "Zed reference evidence." + - "Last verification." + - "Current dirty diff." + constraints: + - "Read-only." + - "Do not implement." + - "Reject completion if required Worker work is still queued or active." + - "Reject completion if the evidence does not map Athas behavior back to Zed-referenced parity." + - "Reject completion if the result is only a comparison artifact with no verified Athas improvement." + expected_output: + - "complete | not_complete." + - "full_outcome_complete: true | false." + - "Completed parity behaviors." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + decision: complete + summary: "Approve T999. The first Athas ACP Zed parity tranche is complete enough to close: it produced Zed-referenced behavioral changes, not just comparison notes, and the implemented slices have focused regression coverage plus scoped frontend, TypeScript, Rust test/check, and Rust format verification." + full_outcome_complete: true + tranche_complete: true + completed_parity_behaviors: + - "Cancellation parity: Athas now finalizes active ACP tools with a Cancelled error before cleanup on protocol prompt_complete(cancelled) and local forceStop/user cancel, matching Zed's pending/in-progress tool cancellation behavior." + - "Cancellation proof: acp-cancellation.test.ts verifies active tool finalization, backend cancel_acp_prompt invocation, completion callback behavior, and late-event suppression after force-stop cancellation." + - "Session bootstrap parity: bridge_init.rs now has explicit decision/build seams covering requested session load success, MethodNotFound load-to-resume only when resume is supported, MethodNotFound/ResourceNotFound fallback to new, AuthRequired retry classification before fallbacks, and new-session id preservation." + - "Session bootstrap proof: Rust unit tests cover load id preservation, load/resume fallback gates, AuthRequired classification, and new-session id preservation." + missing_evidence: + - "No blocking missing evidence for this first tranche." + - "Deferred parity gap: Zed has UsageUpdate/token-cost handling and tests; Athas usage/token-cost ACP event mapping remains a valid later parity slice." + - "Verification caveat: bun check:rust remains blocked by unrelated repo-wide rustfmt diffs outside this tranche's allowed files." + - "No live ACP provider smoke was included; acceptable because this tranche used deterministic local behavioral tests and did not require provider expansion." + next_task_if_not_complete: null + +checks: + dirty_fingerprint: unknown + last_verification: + result: scoped_pass_with_unrelated_blocker + task: T005 + commands: + - "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" + - "bunx vp test run src/features/ai/tests" + - "bun typecheck" + - "cargo test -p athas-ai acp::bridge_init" + - "cargo check -p athas-ai" + - "cargo fmt -p athas-ai --check" + - "bun check:rust blocked by unrelated repo-wide rustfmt diffs outside T005 allowed_files" diff --git a/src/features/ai/services/acp-stream-handler.ts b/src/features/ai/services/acp-stream-handler.ts index 177e830c3..21b66d1e1 100644 --- a/src/features/ai/services/acp-stream-handler.ts +++ b/src/features/ai/services/acp-stream-handler.ts @@ -327,6 +327,7 @@ export class AcpStreamHandler { // The stop reason can be used to determine how to handle the completion if (event.stopReason === "cancelled") { // User cancelled the prompt + this.finalizeActiveToolsAsCancelled(); this.cleanup(); this.handlers.onComplete(); return; @@ -542,10 +543,19 @@ export class AcpStreamHandler { } } + private finalizeActiveToolsAsCancelled(): void { + if (!this.handlers.onToolComplete || this.activeTools.size === 0) return; + + for (const [toolId, toolName] of this.activeTools) { + this.handlers.onToolComplete(toolName, toolId, undefined, "Cancelled"); + } + } + private forceStop(): void { if (this.sessionComplete || this.cancelled) return; this.cancelled = true; this.pendingNewMessage = false; + this.finalizeActiveToolsAsCancelled(); this.cleanup(); this.handlers.onComplete(); } diff --git a/src/features/ai/tests/acp-cancellation.test.ts b/src/features/ai/tests/acp-cancellation.test.ts new file mode 100644 index 000000000..50d700174 --- /dev/null +++ b/src/features/ai/tests/acp-cancellation.test.ts @@ -0,0 +1,157 @@ +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { invoke } from "@tauri-apps/api/core"; +import { AcpStreamHandler } from "../services/acp-stream-handler"; +import type { AcpEvent } from "../types/acp"; + +vi.mock("@tauri-apps/api/core", () => ({ + invoke: vi.fn(), +})); + +vi.mock("@tauri-apps/api/event", () => ({ + listen: vi.fn(), +})); + +vi.mock("@/features/ai/store/store", () => ({ + useAIChatStore: { + getState: () => ({ + acpStatus: null, + getChatById: () => null, + getCurrentChat: () => null, + setAcpStatus: vi.fn(), + setChatAcpSessionId: vi.fn(), + setAvailableSlashCommands: vi.fn(), + setSessionConfigOptions: vi.fn(), + setSessionModeState: vi.fn(), + setCurrentModeId: vi.fn(), + }), + }, +})); + +vi.mock("@/features/editor/stores/buffer-store", () => ({ + useBufferStore: { + getState: () => ({ + actions: { + openWebViewerBuffer: vi.fn(), + openTerminalBuffer: vi.fn(), + }, + }), + }, +})); + +vi.mock("@/features/window/stores/project-store", () => ({ + useProjectStore: { + getState: () => ({ + rootFolderPath: "/repo", + }), + }, +})); + +vi.mock("@/features/ai/lib/acp-session-info", () => ({ + getChatTitleFromSessionInfo: (_currentTitle: string | undefined, nextTitle: string) => nextTitle, +})); + +vi.mock("../utils/ai-context-builder", () => ({ + buildContextPrompt: () => "", +})); + +const mockedInvoke = vi.mocked(invoke); + +type TestableAcpStreamHandler = { + handleAcpEvent: (event: unknown) => void; +}; + +type AcpStreamHandlerStatic = { + activeHandler: AcpStreamHandler | null; +}; + +const setActiveHandler = (handler: AcpStreamHandler | null) => { + (AcpStreamHandler as unknown as AcpStreamHandlerStatic).activeHandler = handler; +}; + +const handleAcpEvent = (handler: AcpStreamHandler, event: AcpEvent) => { + (handler as unknown as TestableAcpStreamHandler).handleAcpEvent(event); +}; + +describe("AcpStreamHandler cancellation", () => { + afterEach(() => { + mockedInvoke.mockReset(); + setActiveHandler(null); + }); + + it("finalizes active tools before sending backend cancellation", async () => { + const onComplete = vi.fn(); + const onToolComplete = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete, + onError: vi.fn(), + onToolComplete, + }, + "chat-1", + ); + + handleAcpEvent(handler, { + type: "tool_start", + sessionId: "session-1", + toolName: "read_text_file", + toolId: "tool-1", + input: { path: "src/main.ts" }, + kind: "read", + status: "in_progress", + locations: [], + }); + setActiveHandler(handler); + + await AcpStreamHandler.cancelPrompt(); + + expect(onToolComplete).toHaveBeenCalledWith("read_text_file", "tool-1", undefined, "Cancelled"); + expect(onComplete).toHaveBeenCalledOnce(); + expect(mockedInvoke).toHaveBeenCalledWith("cancel_acp_prompt"); + }); + + it("ignores late events after a cancelled turn is force-stopped", async () => { + const onComplete = vi.fn(); + const onToolComplete = vi.fn(); + const onToolUse = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete, + onError: vi.fn(), + onToolUse, + onToolComplete, + }, + "chat-1", + ); + + handleAcpEvent(handler, { + type: "tool_start", + sessionId: "session-1", + toolName: "read_text_file", + toolId: "tool-1", + input: { path: "src/main.ts" }, + kind: "read", + status: "in_progress", + locations: [], + }); + setActiveHandler(handler); + + await AcpStreamHandler.cancelPrompt(); + handleAcpEvent(handler, { + type: "tool_start", + sessionId: "session-1", + toolName: "write_text_file", + toolId: "tool-2", + input: { path: "src/main.ts" }, + kind: "edit", + status: "in_progress", + locations: [], + }); + + expect(onToolUse).toHaveBeenCalledOnce(); + expect(onToolComplete).toHaveBeenCalledOnce(); + }); +}); From 9e5b01a602021e8091d3521ae0ce0c3752552e95 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Fri, 8 May 2026 21:58:56 +0700 Subject: [PATCH 02/35] Add ACP harness parity coverage --- crates/ai/src/acp/client.rs | 236 +++++++---- crates/ai/src/acp/types.rs | 7 + docs/goals/athas-acp-harness-parity/goal.md | 77 ++++ .../notes/T001-acp-harness-map.md | 219 ++++++++++ .../goals/athas-acp-harness-parity/state.yaml | 382 ++++++++++++++++++ .../ai/services/acp-stream-handler.ts | 3 + src/features/ai/tests/acp-permission.test.ts | 139 +++++++ src/features/ai/tests/acp-usage.test.ts | 104 +++++ src/features/ai/types/acp.ts | 6 + 9 files changed, 1095 insertions(+), 78 deletions(-) create mode 100644 docs/goals/athas-acp-harness-parity/goal.md create mode 100644 docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md create mode 100644 docs/goals/athas-acp-harness-parity/state.yaml create mode 100644 src/features/ai/tests/acp-permission.test.ts create mode 100644 src/features/ai/tests/acp-usage.test.ts diff --git a/crates/ai/src/acp/client.rs b/crates/ai/src/acp/client.rs index 4b3ac6739..6695ea712 100644 --- a/crates/ai/src/acp/client.rs +++ b/crates/ai/src/acp/client.rs @@ -61,6 +61,51 @@ impl AthasAcpClient { self.permission_tx.clone() } + fn map_permission_option_kind( + kind: acp::PermissionOptionKind, + ) -> super::types::AcpPermissionOptionKind { + match kind { + acp::PermissionOptionKind::AllowOnce => super::types::AcpPermissionOptionKind::AllowOnce, + acp::PermissionOptionKind::AllowAlways => { + super::types::AcpPermissionOptionKind::AllowAlways + } + acp::PermissionOptionKind::RejectOnce => super::types::AcpPermissionOptionKind::RejectOnce, + acp::PermissionOptionKind::RejectAlways => { + super::types::AcpPermissionOptionKind::RejectAlways + } + _ => super::types::AcpPermissionOptionKind::RejectOnce, + } + } + + fn permission_response_for_choice( + options: &[acp::PermissionOption], + approved: bool, + ) -> acp::RequestPermissionResponse { + let selected_option = options + .iter() + .find(|opt| { + if approved { + matches!( + opt.kind, + acp::PermissionOptionKind::AllowOnce | acp::PermissionOptionKind::AllowAlways + ) + } else { + matches!( + opt.kind, + acp::PermissionOptionKind::RejectOnce | acp::PermissionOptionKind::RejectAlways + ) + } + }) + .or_else(|| options.first()) + .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); + + if let Some(selected) = selected_option { + acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Selected(selected)) + } else { + acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Cancelled) + } + } + pub async fn set_session_id(&self, session_id: String) { let mut current = self.current_session_id.lock().await; *current = Some(session_id); @@ -209,23 +254,7 @@ impl AthasAcpClient { fn fallback_permission_response( args: &acp::RequestPermissionRequest, ) -> acp::RequestPermissionResponse { - let selected_option = args - .options - .iter() - .find(|opt| { - matches!( - opt.kind, - acp::PermissionOptionKind::RejectOnce | acp::PermissionOptionKind::RejectAlways - ) - }) - .or_else(|| args.options.first()) - .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); - - if let Some(selected) = selected_option { - acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Selected(selected)) - } else { - acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Cancelled) - } + Self::permission_response_for_choice(&args.options, false) } fn map_plan_priority(priority: acp::PlanEntryPriority) -> AcpPlanEntryPriority { @@ -348,6 +377,14 @@ impl AthasAcpClient { .collect() } + fn map_usage_update(session_id: String, update: acp::UsageUpdate) -> AcpEvent { + AcpEvent::UsageUpdate { + session_id, + used: update.used, + size: update.size, + } + } + pub(crate) fn map_session_config_option( option: acp::SessionConfigOption, ) -> Option { @@ -418,21 +455,7 @@ impl acp::Client for AthasAcpClient { .map(|option| super::types::AcpPermissionOption { id: option.option_id.to_string(), name: option.name.clone(), - kind: match option.kind { - acp::PermissionOptionKind::AllowOnce => { - super::types::AcpPermissionOptionKind::AllowOnce - } - acp::PermissionOptionKind::AllowAlways => { - super::types::AcpPermissionOptionKind::AllowAlways - } - acp::PermissionOptionKind::RejectOnce => { - super::types::AcpPermissionOptionKind::RejectOnce - } - acp::PermissionOptionKind::RejectAlways => { - super::types::AcpPermissionOptionKind::RejectAlways - } - _ => super::types::AcpPermissionOptionKind::RejectOnce, - }, + kind: Self::map_permission_option_kind(option.kind), }) .collect(), }); @@ -486,53 +509,9 @@ impl acp::Client for AthasAcpClient { return Ok(Self::fallback_permission_response(&args)); } - // Prefer allow-once/allow-always options if available - let selected_option = args - .options - .iter() - .find(|opt| { - matches!( - opt.kind, - acp::PermissionOptionKind::AllowOnce - | acp::PermissionOptionKind::AllowAlways - ) - }) - .or_else(|| args.options.first()) - .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); - - if let Some(selected) = selected_option { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Selected(selected), - )) - } else { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Cancelled, - )) - } + Ok(Self::permission_response_for_choice(&args.options, true)) } else { - // Prefer reject-once/reject-always options if available - let selected_option = args - .options - .iter() - .find(|opt| { - matches!( - opt.kind, - acp::PermissionOptionKind::RejectOnce - | acp::PermissionOptionKind::RejectAlways - ) - }) - .or_else(|| args.options.first()) - .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); - - if let Some(selected) = selected_option { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Selected(selected), - )) - } else { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Cancelled, - )) - } + Ok(Self::permission_response_for_choice(&args.options, false)) } } _ => Ok(acp::RequestPermissionResponse::new( @@ -695,6 +674,9 @@ impl acp::Client for AthasAcpClient { updated_at: update.updated_at.take(), }); } + acp::SessionUpdate::UsageUpdate(update) => { + self.emit_event(Self::map_usage_update(session_id, update)); + } acp::SessionUpdate::AvailableCommandsUpdate(commands_update) => { self.emit_event(AcpEvent::SlashCommandsUpdate { session_id, @@ -1161,3 +1143,101 @@ impl acp::Client for AthasAcpClient { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn permission_option( + id: &'static str, + kind: acp::PermissionOptionKind, + ) -> acp::PermissionOption { + acp::PermissionOption::new(id, id, kind) + } + + fn selected_option_id(response: acp::RequestPermissionResponse) -> Option { + match response.outcome { + acp::RequestPermissionOutcome::Selected(selected) => Some(selected.option_id.to_string()), + acp::RequestPermissionOutcome::Cancelled => None, + _ => None, + } + } + + #[test] + fn usage_update_maps_to_frontend_event() { + let event = AthasAcpClient::map_usage_update( + "session-1".to_string(), + acp::UsageUpdate::new(1234, 200000), + ); + + match event { + AcpEvent::UsageUpdate { + session_id, + used, + size, + } => { + assert_eq!(session_id, "session-1"); + assert_eq!(used, 1234); + assert_eq!(size, 200000); + } + other => panic!("expected usage update event, got {other:?}"), + } + } + + #[test] + fn permission_option_kinds_map_to_frontend_kinds() { + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::AllowOnce), + super::super::types::AcpPermissionOptionKind::AllowOnce + )); + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::AllowAlways), + super::super::types::AcpPermissionOptionKind::AllowAlways + )); + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::RejectOnce), + super::super::types::AcpPermissionOptionKind::RejectOnce + )); + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::RejectAlways), + super::super::types::AcpPermissionOptionKind::RejectAlways + )); + } + + #[test] + fn approved_permission_prefers_allow_options() { + let options = vec![ + permission_option("reject-once", acp::PermissionOptionKind::RejectOnce), + permission_option("allow-always", acp::PermissionOptionKind::AllowAlways), + ]; + + let response = AthasAcpClient::permission_response_for_choice(&options, true); + + assert_eq!( + selected_option_id(response).as_deref(), + Some("allow-always") + ); + } + + #[test] + fn rejected_permission_prefers_reject_options() { + let options = vec![ + permission_option("allow-once", acp::PermissionOptionKind::AllowOnce), + permission_option("reject-always", acp::PermissionOptionKind::RejectAlways), + ]; + + let response = AthasAcpClient::permission_response_for_choice(&options, false); + + assert_eq!( + selected_option_id(response).as_deref(), + Some("reject-always") + ); + } + + #[test] + fn permission_choice_cancels_when_options_are_empty() { + let response = AthasAcpClient::permission_response_for_choice(&[], true); + + assert_eq!(selected_option_id(response), None); + } +} diff --git a/crates/ai/src/acp/types.rs b/crates/ai/src/acp/types.rs index c6e9af22a..8754c99fe 100644 --- a/crates/ai/src/acp/types.rs +++ b/crates/ai/src/acp/types.rs @@ -486,6 +486,13 @@ pub enum AcpEvent { title: Option, updated_at: Option, }, + /// Session token/context usage updated + #[serde(rename_all = "camelCase")] + UsageUpdate { + session_id: String, + used: u64, + size: u64, + }, /// Prompt turn completed with a stop reason #[serde(rename_all = "camelCase")] PromptComplete { diff --git a/docs/goals/athas-acp-harness-parity/goal.md b/docs/goals/athas-acp-harness-parity/goal.md new file mode 100644 index 000000000..f94918cb6 --- /dev/null +++ b/docs/goals/athas-acp-harness-parity/goal.md @@ -0,0 +1,77 @@ +# Athas ACP Harness Parity + +## Objective + +Bring Athas's ACP harness posture up toward Zed's by studying Zed's local ACP harness/test patterns, then extending Athas's existing ACP foundation with comparable behavioral and provider/adaptor harness coverage. + +## Original Request + +Match the amount of ACP harness that Zed has for Athas as well. Look at how Zed does it and do the same for Athas. Athas's foundation is already good; keep going from there. + +## Intake Summary + +- Input shape: `specific` +- Audience: Athas maintainers and contributors working on ACP reliability. +- Authority: `approved` +- Proof type: `test` +- Completion proof: Zed's ACP harness/test posture is mapped, Athas gains the first highest-value comparable harness slices, and local verification proves those Athas harnesses run. +- Likely misfire: Counting tests or cloning Zed structure mechanically without creating reusable, reliable Athas ACP coverage. +- Blind spots considered: Harness parity could mean raw test count, protocol behavior coverage, fake-agent infrastructure, provider/adaptor compatibility, real adapter smoke, or developer tooling; real provider smoke can become auth-heavy or network-flaky. +- Existing plan facts: Use `/Users/sw/Code/zed` as the reference source; do not treat Athas as sparse or broken; extend Athas's existing foundation; optimize for behavioral ACP harness coverage plus provider compatibility harness; include real adapter smoke where practical but avoid auth-heavy or flaky provider runs. + +## Goal Kind + +`specific` + +## Current Tranche + +Map how Zed structures ACP harnesses and tests, identify the closest Athas extension points, implement successive safe harness slices for core ACP behavior and provider/adaptor compatibility, verify them locally, and audit whether the first harness parity tranche is complete. + +## Non-Negotiable Constraints + +- Treat `/Users/sw/Code/zed` as the local reference implementation for ACP harness/test posture. +- Cross-reference Zed before choosing Athas harness work. +- Preserve and extend Athas's existing ACP foundation; do not rewrite working architecture just to mimic Zed names. +- Optimize for reliable harness infrastructure, not raw test count. +- Cover both behavioral ACP flows and provider/adaptor compatibility where practical. +- Prefer deterministic local harnesses. If real adapter smoke needs auth, secrets, network, or brittle local state, record the blocker and use a simulated/fake harness path instead. +- Do not add broad provider expansion unless Zed evidence shows it is required for harness parity. +- Follow Athas repo rules: use Bun for repo scripts, keep changes scoped, and run relevant checks. + +## Stop Rule + +Stop only when a final audit proves the full original outcome for this tranche is complete. + +Do not stop after mapping Zed, picking a harness design, or adding one partial test if safe harness work remains. Continue through verified implementation slices until the first harness parity tranche has concrete Athas tests/harnesses and an audit receipt. + +Do not stop because real provider smoke needs auth, credentials, production access, network state, or policy decisions. Mark that exact smoke task blocked with a receipt, then continue with deterministic local harness work that advances the same parity goal. + +## Canonical Board + +Machine truth lives at: + +`docs/goals/athas-acp-harness-parity/state.yaml` + +If this charter and `state.yaml` disagree, `state.yaml` wins for task status, active task, receipts, verification freshness, and completion truth. + +## Run Command + +```text +/goal Follow docs/goals/athas-acp-harness-parity/goal.md. +``` + +## PM Loop + +On every `/goal` continuation: + +1. Read this charter. +2. Read `state.yaml`. +3. Run the bundled GoalBuddy update checker when available and mention a newer version without blocking. +4. Re-check the intake: original request, input shape, authority, proof, blind spots, existing plan facts, and likely misfire. +5. Work only on the active board task. +6. Assign Scout, Judge, Worker, or PM according to the task. +7. Write a compact task receipt. +8. Update the board. +9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked. +10. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete. +11. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`. diff --git a/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md b/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md new file mode 100644 index 000000000..8f5cde9c9 --- /dev/null +++ b/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md @@ -0,0 +1,219 @@ +# T001 Scout Receipt: ACP Harness Map + +## Result + +Read-only Scout mapped. + +## Summary + +Zed's ACP harness posture is not just more tests; it has reusable fake ACP transports plus GPUI behavioral tests that exercise production connection/thread paths. Athas already has a solid ACP foundation with frontend stream-handler regression tests, Rust bootstrap decision tests, terminal-state tests, registry/catalog seams, and Tauri command surfaces. The closest parity move is to add a deterministic Athas fake ACP connection/client harness around `crates/ai/src/acp/*`, then use it for behavior slices before any provider expansion. + +## Zed ACP Harness/Test Map + +Behavior harness: + +- `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`: GPUI behavioral ACP thread tests using `FakeAgentConnection::new()` and in-thread session updates. +- Representative tests: + - `test_terminal_output_buffered_before_created_renders` + - `test_terminal_output_and_exit_buffered_before_created` + - `test_terminal_kill_allows_wait_for_exit_to_complete` + - `test_push_user_content_block` + - `test_thinking_concatenation` + - `test_ignore_echoed_user_message_chunks_during_active_turn` + - `test_edits_concurrently_to_user` + - `test_reading_from_line` + - `test_reading_empty_file` + - `test_reading_non_existing_file` + - `test_succeeding_canceled_toolcall` + - `test_no_pending_edits_if_tool_calls_are_completed` + - `test_tool_result_refusal` + - `test_user_prompt_refusal_emits_event` + - `test_refusal` + - `test_tool_call_not_found_creates_failed_entry` + - `test_follow_up_message_during_generation_does_not_clear_turn` + - `test_send_returns_cancelled_response_and_marks_tools_as_cancelled` + - `test_running_turn_cleared_when_send_task_dropped` + - `test_session_info_update_replaces_provisional_title_and_emits_event` + - `test_usage_update_populates_token_usage_and_cost` + - `test_usage_update_without_cost_preserves_existing_cost` + - `test_response_usage_does_not_clobber_session_usage` + - `test_clearing_token_usage_also_clears_cost` + +Fake ACP transport/provider harness: + +- `/Users/sw/Code/zed/crates/agent_servers/src/acp.rs`: `FakeAcpAgentServer`, `FakeAcpConnectionHarness`, `FakeAcpAgentConnection`, `build_fake_acp_connection`, `connect_fake_acp_connection`. +- Harness uses `agent_client_protocol::Channel::duplex()`, wires production `connect_client_future` handlers, responds to initialize/auth/new/prompt/load/close/cancel, tracks load/close counts, supports simulated server exit and forced prompt failure. +- Tests: + - `test_loaded_sessions_keep_state_until_last_close` + - `test_load_session_replays_notifications_sent_before_response` + - `test_close_session_during_in_flight_load` + - `test_close_during_load_preserves_other_concurrent_loader` + +Permissions/tools/provider harness: + +- `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs`: fake model/provider and fake terminal environment tests for tools, streaming tools, cancellation, auth/permission behavior, MCP tool compatibility, retry/error handling. +- Key tests: + - `test_basic_tool_calls` + - `test_streaming_tool_calls` + - `test_tool_authorization` + - `test_tool_hallucination` + - `test_cancellation` + - `test_terminal_tool_cancellation_captures_output` + - `test_cancellation_aware_tool_responds_to_cancellation` + - `test_in_progress_send_canceled_by_next_send` + - `test_retry_cancelled_promptly_on_new_send` + - `test_tool_updates_to_completion` + - `test_send_retry_finishes_tool_calls_on_error` + - `test_streaming_tool_completes_when_llm_stream_ends_without_final_input` + - `test_streaming_tool_error_breaks_stream_loop_immediately` + - `test_streaming_tool_error_waits_for_prior_tools_to_complete` +- Permission option tests include: + - `test_permission_options_terminal_with_pattern` + - `test_permission_options_edit_file_with_path_pattern` + - `test_permission_options_fetch_with_domain_pattern` + - `test_permission_options_terminal_pipeline_produces_dropdown_with_patterns` +- `/Users/sw/Code/zed/crates/agent/src/tool_permissions.rs` has a focused permission-rule matrix for allow/deny/path/url/terminal patterns and rule resolution. + +Reference commands: + +- `cargo test -p acp_thread test_usage_update_populates_token_usage_and_cost` +- `cargo test -p acp_thread test_send_returns_cancelled_response_and_marks_tools_as_cancelled` +- `cargo test -p agent_servers test_load_session_replays_notifications_sent_before_response` +- `cargo test -p agent test_cancellation` +- `cargo test -p agent test_permission_options_terminal_with_pattern` + +## Athas Current ACP Harness/Test Map + +Frontend behavior tests: + +- `/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts` + - `finalizes active tools before sending backend cancellation` + - `ignores late events after a cancelled turn is force-stopped` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-activity-groups.test.ts` + - `groups running, recent, and error activity without duplicate signatures` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-session-info.test.ts` + - `returns trimmed title updates` + - `ignores empty or unchanged titles` +- `/Users/sw/Code/athas/src/features/ai/tests/session-config-option-classifier.test.ts` + - `prefers ACP semantic categories over label heuristics` + - `falls back to text classification when category is missing or custom` + +Rust behavior tests: + +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` + - `loaded_session_bootstrap_preserves_requested_session_id` + - `method_not_found_load_uses_resume_only_when_supported` + - `missing_or_unsupported_load_falls_back_to_new_session` + - `missing_or_unsupported_resume_falls_back_to_new_session` + - `auth_required_errors_are_retriable_before_session_fallbacks` + - `new_session_bootstrap_uses_agent_created_session_id` +- `/Users/sw/Code/athas/crates/ai/src/acp/terminal_state.rs` + - `append_output_truncates_from_beginning` + - `append_output_preserves_utf8_boundaries_when_truncating` + - `exit_status_preserves_none_exit_code_for_signal_termination` + +Registry/adapter tests: + +- `/Users/sw/Code/athas/crates/ai/src/acp/config.rs` + - `managed_wrapper_path_prefers_expected_wrapper_name` + - `check_dir_for_binary_returns_none_for_missing_binary` +- Implementation seams: `AgentRegistry::replace_agents`, `detect_installed`, `detect_codex_adapter`, `managed_wrapper_path`, `find_binary`. + +Extension points: + +- `/Users/sw/Code/athas/crates/ai/src/acp/client.rs`: `AthasAcpClient` maps ACP session notifications, permission requests, file callbacks, terminal callbacks, and extension methods into `AcpEvent`. +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs`: `AcpWorker` owns connection/session/process lifecycle and exposes send/cancel/list/stop/mode/config operations. +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs`: bootstrap path for initialize/auth/session load/resume/new and initial mode/config event emission. +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_prompt.rs`: prompt send, auth retry, timeout, and prompt_complete emission. +- `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts`: frontend event-order/cancellation/session/status/tool/permission handler and current easiest deterministic test seam. +- `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs`: Tauri command shell plus marketplace catalog conversion and `refresh_registered_agents`. + +Current verification options: + +- `bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts` +- `cargo test -p athas-ai acp::bridge_init::tests` +- `cargo test -p athas-ai acp::terminal_state::tests` +- `cargo test -p athas-ai acp::config::tests` +- `bun typecheck` +- `bun check:rust` + +## Harness Parity Matrix + +- Sessions: Zed is strong with fake ACP transport covering load/close ref counts, in-flight load, notifications before response, concurrent loaders, and session info title updates. Athas is medium: bootstrap decision tests and frontend title/session-info tests exist, but no fake ACP transport proves production connection event ordering. +- Cancellation: Zed is strong with cancelled prompt responses, pending tool cancellation, running-turn cleanup, new send canceling previous send, and terminal/subagent/tool cancellation. Athas is medium with frontend cancellation tests but no Rust bridge fake transport proof. +- Streaming: Zed is strong with echoed user chunk suppression, thinking concatenation, streaming tool parsing/completion/error loops, and buffered text flushing. Athas is low-medium: stream handler routes chunks/tools, but deterministic ordering tests are thin. +- Tools: Zed is strong across tool start/update/complete/failure/not-found, file reads/writes, concurrent edits, terminal buffering/kill/output. Athas is medium: event mapping and terminal buffer tests exist, but no client-level fake app/terminal manager harness for ACP callbacks. +- Permissions: Zed is strong across permission options and allow/deny rule matrices. Athas is low-medium: permission requests are emitted and queue cancellation exists, but no focused protocol response matrix. +- Errors: Zed is strong on startup-exited load errors, auth/internal mapping, refusal, hallucinated tools, retry failure, and dropped send cleanup. Athas is medium with startup/auth/session fallback tests and stream handler formatting. +- Usage: Zed is strong with `UsageUpdate` token usage/cost tests. Athas appears absent for ACP usage event/type mapping. +- Provider/adaptor compatibility: Zed is strong with fake ACP AgentServer and fake model/provider/terminal/MCP harnesses. Athas is low-medium with registry/catalog seams and Codex adapter fallback, but no fake adapter executable/transport smoke. + +## Ranked Athas Harness Gaps + +1. No Rust fake ACP connection harness equivalent to Zed's `FakeAcpConnectionHarness`. + - Highest leverage because it can prove sessions, cancellation, streaming, tools, permissions, errors, and usage locally through production-ish bridge/client paths. + - Target files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/bridge.rs`, possible `crates/ai/src/acp/test_harness.rs`. + +2. No ACP usage/token-cost mapping or tests. + - Clear Zed parity gap with narrow blast radius; currently no Athas event/type exists for usage. + - Target files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`. + +3. Client-level permission mapping/cancel/timeout harness is missing. + - Athas has UI queue behavior but not the protocol response matrix for selected/cancelled/auto-reject/fallback options. + - Target files: `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/components/chat/ai-chat.tsx`. + +4. Streaming/event ordering tests are thin. + - Zed proves duplicate user echo suppression, thinking concatenation, and late turn isolation. + - Target files: `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests`, `crates/ai/src/acp/client.rs`. + +5. Provider/adaptor compatibility smoke is config-only. + - Athas detects Codex adapter and marketplace agents, but lacks local fake adapter/fixture validation of initialize/session/prompt/cancel without auth/network. + - Target files: `crates/ai/src/acp/config.rs`, `src-tauri/src/commands/ai/acp.rs`, possible test fixture under `crates/ai/src/acp` or tests. + +6. Terminal/file ACP callback harness is only partial. + - Terminal buffer unit tests exist, but full create/output/wait/kill/release and read/write file callbacks through `AthasAcpClient` are not covered. + - Target files: `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/terminal_state.rs`. + +## Candidate Worker Slices + +W1: + +- Objective: Add the minimal reusable Athas fake ACP harness for `athas-ai` without changing product behavior. +- Allowed files: `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/mod.rs`, optional new test-only file `crates/ai/src/acp/test_harness.rs`. +- Verification: `cargo test -p athas-ai acp`, `bun check:rust`. +- Stop if: requires live ACP provider/auth/network/secrets; broad provider expansion; bridge rewrite instead of narrow test seam. + +W2: + +- Objective: Use fake harness to prove session notification ordering and load/resume/new behavior through production-ish paths. +- Allowed files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/test_harness.rs`. +- Verification: `cargo test -p athas-ai acp::bridge_init`, `cargo test -p athas-ai acp::client`. +- Stop if: needs external adapter process or new dependencies. + +W3: + +- Objective: Add ACP `UsageUpdate` mapping and deterministic tests. +- Allowed files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/*usage*.test.ts`. +- Verification: `cargo test -p athas-ai acp`, `bunx vp test run src/features/ai/tests`, `bun typecheck`. +- Stop if: usage semantics require UI design beyond exposing routed state/event, or protocol crate lacks `UsageUpdate`. + +W4: + +- Objective: Add permission protocol harness for selected/cancelled/auto-reject/fallback responses. +- Allowed files: `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/acp-permissions.test.ts`. +- Verification: `cargo test -p athas-ai acp::client`, `bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-permissions.test.ts`. +- Stop if: requires changing product permission policy instead of harnessing current behavior, or long real-time timeout. + +W5: + +- Objective: Add local fake adapter/provider compatibility smoke without real provider auth. +- Allowed files: `crates/ai/src/acp/config.rs`, `src-tauri/src/commands/ai/acp.rs`, optional test fixture under `crates/ai/src/acp/tests` or src-tauri tests. +- Verification: `cargo test -p athas-ai acp::config`, `bun check:rust`. +- Stop if: requires real Codex/Claude/Gemini login or expands provider catalog instead of proving adapter compatibility. + +## Ambiguity Requiring Judge + +- Whether first tranche should prioritize reusable harness infrastructure (W1) or immediately add visible usage event slice (W3). Scout recommends W1 for leverage, W3 for fastest narrow parity. +- Whether Athas should add a Rust test-only fake app/terminal manager seam now, or keep first harness frontend-only. Zed evidence favors Rust/in-process fake transport. +- Whether provider/adaptor compatibility means fake local adapter smoke only, or real Codex/Claude/Gemini smoke. Evidence and charter favor fake/local deterministic smoke and avoiding auth-heavy real providers. +- Whether `UsageUpdate` is available in Athas's pinned `agent_client_protocol` version; current Athas code has no usage mapping hits, but Worker should confirm dependency API before editing. diff --git a/docs/goals/athas-acp-harness-parity/state.yaml b/docs/goals/athas-acp-harness-parity/state.yaml new file mode 100644 index 000000000..0e0c73d32 --- /dev/null +++ b/docs/goals/athas-acp-harness-parity/state.yaml @@ -0,0 +1,382 @@ +# GoalBuddy v2 state.yaml +# Board truth lives here. goal.md is the editable charter; notes/ holds long receipts only. + +version: 2 + +goal: + title: "Athas ACP Harness Parity" + slug: "athas-acp-harness-parity" + kind: specific + tranche: "Zed-referenced ACP harness parity: map Zed harnesses, extend Athas harness coverage, verify, audit, and continue through safe slices until the first harness tranche is complete." + status: done + intake: + original_request: "Match the amount of ACP harness that Zed has for Athas; look at how Zed does it and do the same for Athas while building on Athas's good foundation." + interpreted_outcome: "Athas gains Zed-referenced ACP harness coverage for core behavior and provider/adaptor compatibility, with local tests proving the first tranche." + input_shape: specific + audience: "Athas maintainers and contributors" + authority: approved + proof_type: test + completion_proof: "A Zed harness matrix exists, selected Athas harness slices are implemented, relevant local tests/checks pass, and final audit confirms the first harness parity tranche is complete." + likely_misfire: "Creating many tests or copying Zed structure mechanically without reliable Athas ACP harness value." + blind_spots_considered: + - "Harness parity may mean behavior coverage, fake-agent infrastructure, provider/adaptor compatibility, smoke tooling, or raw test count." + - "Real provider smoke can be flaky if it depends on auth, secrets, network, or local CLI state." + - "Athas foundation is already good; the task is extension, not replacement." + - "Provider compatibility belongs in scope, but broad provider expansion does not." + existing_plan_facts: + - "Use /Users/sw/Code/zed as the local reference source." + - "Study how Zed does ACP harnesses/tests before choosing Athas work." + - "Athas foundation is good; keep going from there." + - "Optimize for behavioral ACP harness coverage plus provider compatibility harness." + - "Include real adapter smoke where practical, but avoid auth-heavy or network-flaky runs." + +rules: + pm_owns_state: true + one_active_task: true + max_write_workers: 1 + no_implementation_without_worker_or_pm_task: true + no_completion_without_judge_or_pm_audit: true + planning_is_not_completion: true + queued_required_worker_blocks_completion: true + continuous_until_full_outcome: true + missing_input_or_credentials_do_not_stop_goal: true + preserve_and_validate_existing_plan: true + intake_misfire_must_be_audited: true + +agents: + scout: installed + worker: installed + judge: installed + +active_task: null + +tasks: + - id: T001 + type: scout + assignee: Scout + status: done + reasoning_hint: medium + objective: "Map Zed's ACP harness/test posture and identify the closest Athas harness extension points." + inputs: + - "/Users/sw/Code/zed" + - "/Users/sw/Code/athas" + - "Prior Athas ACP parity board: docs/goals/athas-acp-zed-parity/state.yaml" + - "Prior Athas ACP parity note: docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md" + constraints: + - "Read-only." + - "Do not edit implementation files." + - "Treat /Users/sw/Code/zed as the reference for ACP harness/test posture." + - "Use concrete file paths, test names, fixtures, fake providers/adapters, and commands." + - "Separate behavior harness gaps from provider/adaptor compatibility harness gaps." + - "Preserve Athas's existing foundation; recommend extension points, not rewrites." + expected_output: + - "Zed ACP harness/test map with file-path and test-name evidence." + - "Athas current ACP harness/test map with file-path and test-name evidence." + - "Harness parity matrix covering sessions, cancellation, streaming, tools, permissions, errors, usage, and provider/adaptor compatibility." + - "Ranked Athas harness gaps by leverage, determinism, and verification strength." + - "Candidate Worker slices with allowed_files, verification options, and stop conditions." + receipt: + result: done + note: "notes/T001-acp-harness-map.md" + summary: "Mapped Zed and Athas ACP harness posture. Zed's leverage comes from fake ACP transports plus behavioral tests. Athas has a solid foundation but lacks a reusable fake ACP connection harness and several coverage slices." + evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" + - "/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts" + candidate_worker_slices: + - "W1: add minimal reusable fake ACP harness for athas-ai." + - "W3: add ACP UsageUpdate mapping and deterministic tests." + - "W4: add permission protocol harness." + - "W5: add local fake adapter/provider compatibility smoke." + + - id: T002 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Choose the first safe Athas ACP harness implementation slice from Scout evidence." + inputs: + - "T001 receipt or note." + - "Current dirty diff." + - "Athas validation commands." + constraints: + - "Read-only." + - "Do not implement." + - "Require Zed harness evidence for the selected gap." + - "Prefer deterministic local harnesses over auth-heavy or network-flaky real provider runs." + - "Do not choose broad provider expansion." + expected_output: + - "Decision." + - "Why this harness slice is first." + - "Exact Worker objective." + - "allowed_files." + - "verify." + - "stop_if." + - "Deferred or blocked candidates." + receipt: + result: done + decision: "Choose a narrowed W3 slice first: ACP UsageUpdate mapping plus deterministic harness tests. Do not start with the broad reusable fake ACP connection harness yet." + why_this_slice_is_first: + - "Zed has direct behavior-level usage update handling and focused tests." + - "Athas's pinned ACP schema exposes UsageUpdate with used and size." + - "Athas currently drops UsageUpdate in client session notification handling." + - "This is deterministic and local, with no provider auth, network, secrets, real adapter process, or broad provider expansion." + evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" + - "/Users/sw/Code/athas/vendor/agent-client-protocol-schema/src/client.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" + next_allowed_task: T003 + deferred_or_blocked_candidates: + - "Deferred: broad fake ACP connection harness." + - "Deferred: session notification ordering/load-resume-new behavior through fake harness." + - "Deferred: permission protocol harness." + - "Deferred: local fake adapter/provider compatibility smoke." + - "Blocked for this step: real Codex/Claude/Gemini provider runs." + + - id: T003 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add deterministic ACP UsageUpdate mapping and harness tests as the first Zed-referenced Athas ACP harness slice." + allowed_files: + - "crates/ai/src/acp/types.rs" + - "crates/ai/src/acp/client.rs" + - "src/features/ai/types/acp.ts" + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-usage.test.ts" + verify: + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "bun check:rust" + stop_if: + - "Need files outside allowed_files." + - "The Worker cannot test the Rust client mapping without introducing a broad app-handle or Tauri runtime rewrite." + - "The implementation starts storing/rendering usage in UI state instead of only adding pass-through/mapping coverage." + - "The Worker tries to add cost/currency fields not present in Athas's pinned UsageUpdate schema." + - "The task requires live ACP provider auth, real adapter processes, secrets, network, or a provider catalog expansion." + - "Verification fails twice for reasons not directly caused by this slice." + - "Existing untracked GoalBuddy files are the only dirty files now; stop if unrelated implementation dirty files appear before editing." + receipt: + result: done + summary: "Implemented deterministic ACP UsageUpdate pass-through coverage. The Rust ACP client now maps protocol UsageUpdate notifications into frontend acp-event payloads, and frontend stream handling accepts usage_update as a generic event without mutating chat output or tool state." + changed_files: + - "crates/ai/src/acp/types.rs" + - "crates/ai/src/acp/client.rs" + - "src/features/ai/types/acp.ts" + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-usage.test.ts" + harness_evidence: + - "crates/ai/src/acp/client.rs::usage_update_maps_to_frontend_event" + - "src/features/ai/tests/acp-usage.test.ts" + verification: + passed: + - "cargo fmt -p athas-ai --check" + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "cargo check -p athas-ai" + - "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "git diff --check" + blocked: + - "bun check:rust exits 1 on pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs; those files are outside the T003 allowed_files and were not changed." + commands: + - cmd: "cargo fmt -p athas-ai --check" + status: pass + - cmd: "cargo test -p athas-ai acp::client" + status: pass + - cmd: "cargo test -p athas-ai acp" + status: pass + - cmd: "cargo check -p athas-ai" + status: pass + - cmd: "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + status: pass + - cmd: "bun typecheck" + status: pass + - cmd: "git diff --check" + status: pass + blocked_verification: + - cmd: "bun check:rust" + reason: "Pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs are outside the T003 allowed_files and were not changed." + zed_reference: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs usage update tests cited by T001" + + - id: T004 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Audit the first implemented harness slice and choose the next safe harness slice if the tranche is not complete." + inputs: + - "T001 receipt or note." + - "T002 receipt." + - "T003 receipt." + - "Current dirty diff." + - "Last verification." + constraints: + - "Read-only." + - "Reject completion if the slice is only a map or plan." + - "Reject completion if no Athas harness/test was implemented and verified." + - "If not complete, define the next smallest safe Scout, Judge, or Worker task." + expected_output: + - "slice_complete: true | false." + - "full_outcome_complete: true | false." + - "Harness parity evidence." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + slice_complete: true + full_outcome_complete: false + summary: "T003 is a valid first harness slice: it adds implemented Rust and frontend coverage for ACP UsageUpdate, references Zed's behavior-level usage harness posture, and keeps the behavior deterministic with no provider auth or UI storage expansion." + harness_parity_evidence: + - "Rust protocol UsageUpdate is mapped into an AcpEvent and covered by cargo test -p athas-ai acp::client." + - "Frontend AcpStreamHandler accepts usage_update through the generic ACP event stream and does not mutate chat output, covered by src/features/ai/tests/acp-usage.test.ts." + - "The broader athas-ai ACP suite still passes with the new event shape." + missing_evidence: + - "Permission request/response behavior is implemented but has no focused harness tests matching Zed's permission-heavy ACP posture." + - "Provider/adaptor fake smoke remains deferred." + next_task: T005 + decision: "Continue with a narrow permission protocol harness slice before broad fake adapter work." + why_next: + - "Zed has extensive permission behavior tests and request_permission handling evidence." + - "Athas already has permission request, option, callback, auto-reject, and selected-option behavior; the gap is deterministic harness coverage." + - "This is local and does not require real providers, secrets, network, or adapter processes." + + - id: T005 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add deterministic ACP permission protocol harness coverage for Rust permission selection/mapping and frontend permission request handling, using Zed's permission harness posture as the reference." + allowed_files: + - "crates/ai/src/acp/client.rs" + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-permission.test.ts" + verify: + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "bun check:rust" + stop_if: + - "No Judge receipt has selected this slice." + - "Need files outside allowed_files." + - "Need to instantiate a full Tauri AppHandle or run a live ACP provider instead of pure helper/frontend handler harnesses." + - "The implementation changes the permission UI or product behavior rather than adding deterministic harness seams and tests." + - "The task requires auth-heavy, network-flaky, or secret-bearing provider runs." + - "The task broadens into provider expansion without harness-parity evidence." + - "Verification fails twice." + receipt: + result: done + summary: "Added deterministic ACP permission protocol harness coverage without changing user-facing permission behavior. Rust now exposes pure permission mapping/selection seams covered by unit tests, and frontend stream handling is covered for explicit permission callbacks plus safety auto-reject when no handler is registered." + changed_files: + - "crates/ai/src/acp/client.rs" + - "src/features/ai/tests/acp-permission.test.ts" + harness_evidence: + - "crates/ai/src/acp/client.rs::permission_option_kinds_map_to_frontend_kinds" + - "crates/ai/src/acp/client.rs::approved_permission_prefers_allow_options" + - "crates/ai/src/acp/client.rs::rejected_permission_prefers_reject_options" + - "crates/ai/src/acp/client.rs::permission_choice_cancels_when_options_are_empty" + - "src/features/ai/tests/acp-permission.test.ts" + commands: + - cmd: "cargo fmt -p athas-ai" + status: pass + - cmd: "cargo fmt -p athas-ai --check" + status: pass + - cmd: "cargo test -p athas-ai acp::client" + status: pass + - cmd: "cargo test -p athas-ai acp" + status: pass + - cmd: "cargo check -p athas-ai" + status: pass + - cmd: "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + status: pass + - cmd: "bun typecheck" + status: pass + - cmd: "git diff --check" + status: pass + blocked_verification: + - cmd: "bun check:rust" + reason: "Pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs are outside the T005 allowed_files and were not changed." + zed_reference: + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs permission option and parallel permission tests" + - "/Users/sw/Code/zed/crates/agent_servers/src/e2e_tests.rs::test_tool_call_with_permission" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs::handle_request_permission" + + - id: T999 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Final audit for the first Athas ACP harness parity tranche." + inputs: + - "All done task receipts." + - "Zed harness reference evidence." + - "Last verification." + - "Current dirty diff." + constraints: + - "Read-only." + - "Do not implement." + - "Reject completion if required Worker work is still queued or active." + - "Reject completion if the evidence does not map Athas harness work back to Zed-referenced parity." + - "Reject completion if the result is only a comparison artifact with no verified Athas harness improvement." + expected_output: + - "complete | not_complete." + - "full_outcome_complete: true | false." + - "Completed harness parity behaviors." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + decision: complete + summary: "Approve T999. The first Athas ACP harness parity tranche is complete: Zed's ACP harness posture was mapped, then Athas gained two deterministic, Zed-referenced harness slices with Rust and frontend tests covering usage updates and permission protocol behavior." + full_outcome_complete: true + tranche_complete: true + completed_harness_parity_behaviors: + - "UsageUpdate mapping: ACP UsageUpdate notifications now produce frontend usage_update events with used and size, matching the pinned schema and closing the previous silent-drop path." + - "UsageUpdate proof: Rust unit coverage verifies protocol-to-event mapping, and frontend coverage verifies usage_update passes through the generic ACP event stream without mutating chat output or tool state." + - "Permission protocol harness: permission option kind mapping and selected allow/reject/cancel outcomes now have pure Rust tests, reflecting Zed's permission-heavy ACP harness posture." + - "Permission frontend proof: AcpStreamHandler tests verify permission requests reach the permission callback and auto-reject safely when no handler is registered." + zed_reference_evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/e2e_tests.rs::test_tool_call_with_permission" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs usage and permission tests cited by T001" + verification_summary: + - "cargo test -p athas-ai acp::client passed with 5 client harness tests." + - "cargo test -p athas-ai acp passed with 16 ACP tests." + - "Frontend ACP focused harness run passed with 6 files and 10 tests." + - "bun typecheck passed." + - "cargo check -p athas-ai passed." + - "cargo fmt -p athas-ai --check passed." + - "git diff --check passed." + missing_evidence: + - "No blocking missing evidence for this first harness tranche." + - "Deferred: broad reusable fake ACP connection harness and local fake provider/adaptor smoke remain useful later slices." + - "No live ACP provider smoke was included; acceptable because real provider smoke would risk auth, network, and local adapter state." + - "Verification caveat: bun check:rust remains blocked by unrelated repo-wide rustfmt diffs outside this tranche's allowed files." + next_task_if_not_complete: null + +checks: + dirty_fingerprint: unknown + last_verification: + result: scoped_pass_with_unrelated_blocker + task: T999 + commands: + - "cargo fmt -p athas-ai" + - "cargo fmt -p athas-ai --check" + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "cargo check -p athas-ai" + - "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "git diff --check" + - "bun check:rust blocked by unrelated repo-wide rustfmt diffs outside T003/T005 allowed_files" diff --git a/src/features/ai/services/acp-stream-handler.ts b/src/features/ai/services/acp-stream-handler.ts index 21b66d1e1..48e7eced1 100644 --- a/src/features/ai/services/acp-stream-handler.ts +++ b/src/features/ai/services/acp-stream-handler.ts @@ -311,6 +311,9 @@ export class AcpStreamHandler { case "session_info_update": break; + case "usage_update": + break; + case "prompt_complete": this.handlePromptComplete(event); break; diff --git a/src/features/ai/tests/acp-permission.test.ts b/src/features/ai/tests/acp-permission.test.ts new file mode 100644 index 000000000..84cdcf1af --- /dev/null +++ b/src/features/ai/tests/acp-permission.test.ts @@ -0,0 +1,139 @@ +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { invoke } from "@tauri-apps/api/core"; +import { AcpStreamHandler } from "../services/acp-stream-handler"; +import type { AcpEvent } from "../types/acp"; + +vi.mock("@tauri-apps/api/core", () => ({ + invoke: vi.fn(), +})); + +vi.mock("@tauri-apps/api/event", () => ({ + listen: vi.fn(), +})); + +vi.mock("@/features/ai/store/store", () => ({ + useAIChatStore: { + getState: () => ({ + acpStatus: null, + getChatById: () => null, + getCurrentChat: () => null, + setAcpStatus: vi.fn(), + setChatAcpSessionId: vi.fn(), + setAvailableSlashCommands: vi.fn(), + setSessionConfigOptions: vi.fn(), + setSessionModeState: vi.fn(), + setCurrentModeId: vi.fn(), + }), + }, +})); + +vi.mock("@/features/editor/stores/buffer-store", () => ({ + useBufferStore: { + getState: () => ({ + actions: { + openWebViewerBuffer: vi.fn(), + openTerminalBuffer: vi.fn(), + }, + }), + }, +})); + +vi.mock("@/features/window/stores/project-store", () => ({ + useProjectStore: { + getState: () => ({ + rootFolderPath: "/repo", + }), + }, +})); + +vi.mock("@/features/ai/lib/acp-session-info", () => ({ + getChatTitleFromSessionInfo: (_currentTitle: string | undefined, nextTitle: string) => nextTitle, +})); + +vi.mock("../utils/ai-context-builder", () => ({ + buildContextPrompt: () => "", +})); + +type TestableAcpStreamHandler = { + handleAcpEvent: (event: unknown) => void; +}; + +const mockedInvoke = vi.mocked(invoke); + +const handleAcpEvent = (handler: AcpStreamHandler, event: AcpEvent) => { + (handler as unknown as TestableAcpStreamHandler).handleAcpEvent(event); +}; + +const permissionEvent: AcpEvent = { + type: "permission_request", + requestId: "request-1", + permissionType: "tool_call", + resource: "tool-1", + description: "Run command (tool-1)", + options: [ + { + id: "allow-once", + name: "Allow once", + kind: "allow_once", + }, + { + id: "reject-once", + name: "Reject once", + kind: "reject_once", + }, + ], +}; + +describe("AcpStreamHandler permission requests", () => { + afterEach(() => { + mockedInvoke.mockReset(); + vi.clearAllMocks(); + }); + + it("routes permission requests to the permission handler", () => { + const onEvent = vi.fn(); + const onPermissionRequest = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete: vi.fn(), + onError: vi.fn(), + onEvent, + onPermissionRequest, + }, + "chat-1", + ); + + handleAcpEvent(handler, permissionEvent); + + expect(onEvent).toHaveBeenCalledWith(permissionEvent); + expect(onPermissionRequest).toHaveBeenCalledWith(permissionEvent); + expect(mockedInvoke).not.toHaveBeenCalled(); + }); + + it("auto-rejects permission requests when no permission handler is registered", async () => { + mockedInvoke.mockResolvedValue(undefined); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete: vi.fn(), + onError: vi.fn(), + }, + "chat-1", + ); + + handleAcpEvent(handler, permissionEvent); + await vi.waitFor(() => { + expect(mockedInvoke).toHaveBeenCalledWith("respond_acp_permission", { + args: { + requestId: "request-1", + approved: false, + cancelled: false, + optionId: undefined, + }, + }); + }); + }); +}); diff --git a/src/features/ai/tests/acp-usage.test.ts b/src/features/ai/tests/acp-usage.test.ts new file mode 100644 index 000000000..9d8c6a5ab --- /dev/null +++ b/src/features/ai/tests/acp-usage.test.ts @@ -0,0 +1,104 @@ +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { AcpStreamHandler } from "../services/acp-stream-handler"; +import type { AcpEvent } from "../types/acp"; + +vi.mock("@tauri-apps/api/core", () => ({ + invoke: vi.fn(), +})); + +vi.mock("@tauri-apps/api/event", () => ({ + listen: vi.fn(), +})); + +vi.mock("@/features/ai/store/store", () => ({ + useAIChatStore: { + getState: () => ({ + acpStatus: null, + getChatById: () => null, + getCurrentChat: () => null, + setAcpStatus: vi.fn(), + setChatAcpSessionId: vi.fn(), + setAvailableSlashCommands: vi.fn(), + setSessionConfigOptions: vi.fn(), + setSessionModeState: vi.fn(), + setCurrentModeId: vi.fn(), + }), + }, +})); + +vi.mock("@/features/editor/stores/buffer-store", () => ({ + useBufferStore: { + getState: () => ({ + actions: { + openWebViewerBuffer: vi.fn(), + openTerminalBuffer: vi.fn(), + }, + }), + }, +})); + +vi.mock("@/features/window/stores/project-store", () => ({ + useProjectStore: { + getState: () => ({ + rootFolderPath: "/repo", + }), + }, +})); + +vi.mock("@/features/ai/lib/acp-session-info", () => ({ + getChatTitleFromSessionInfo: (_currentTitle: string | undefined, nextTitle: string) => nextTitle, +})); + +vi.mock("../utils/ai-context-builder", () => ({ + buildContextPrompt: () => "", +})); + +type TestableAcpStreamHandler = { + handleAcpEvent: (event: unknown) => void; +}; + +const handleAcpEvent = (handler: AcpStreamHandler, event: AcpEvent) => { + (handler as unknown as TestableAcpStreamHandler).handleAcpEvent(event); +}; + +describe("AcpStreamHandler usage updates", () => { + afterEach(() => { + vi.clearAllMocks(); + }); + + it("passes usage updates through the generic ACP event stream without mutating chat output", () => { + const onEvent = vi.fn(); + const onChunk = vi.fn(); + const onToolUse = vi.fn(); + const onToolComplete = vi.fn(); + const onComplete = vi.fn(); + const onError = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk, + onComplete, + onError, + onEvent, + onToolUse, + onToolComplete, + }, + "chat-1", + ); + const event: AcpEvent = { + type: "usage_update", + sessionId: "session-1", + used: 1234, + size: 200000, + }; + + handleAcpEvent(handler, event); + + expect(onEvent).toHaveBeenCalledWith(event); + expect(onChunk).not.toHaveBeenCalled(); + expect(onToolUse).not.toHaveBeenCalled(); + expect(onToolComplete).not.toHaveBeenCalled(); + expect(onComplete).not.toHaveBeenCalled(); + expect(onError).not.toHaveBeenCalled(); + }); +}); diff --git a/src/features/ai/types/acp.ts b/src/features/ai/types/acp.ts index aea4140de..53774e765 100644 --- a/src/features/ai/types/acp.ts +++ b/src/features/ai/types/acp.ts @@ -268,6 +268,12 @@ export type AcpEvent = title: string | null; updatedAt: string | null; } + | { + type: "usage_update"; + sessionId: string; + used: number; + size: number; + } | { type: "prompt_complete"; sessionId: string; From 6dc39953a6f3ae24a86c98d4e540cf2224bbc2ec Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Fri, 8 May 2026 23:55:23 +0700 Subject: [PATCH 03/35] Load ACP agents from registry --- src-tauri/src/commands/ai/acp.rs | 398 ++++++++++++++++++++++++++++++- 1 file changed, 390 insertions(+), 8 deletions(-) diff --git a/src-tauri/src/commands/ai/acp.rs b/src-tauri/src/commands/ai/acp.rs index 7f0021d9b..09c0ee435 100644 --- a/src-tauri/src/commands/ai/acp.rs +++ b/src-tauri/src/commands/ai/acp.rs @@ -15,6 +15,8 @@ use tokio::sync::Mutex; pub type AcpBridgeState = Arc>; const EXTENSIONS_CDN_BASE_URL: &str = "https://athas.dev/extensions"; +const ACP_REGISTRY_URL: &str = + "https://cdn.agentclientprotocol.com/registry/v1/latest/registry.json"; const AGENT_CATALOG_CACHE_SECONDS: u64 = 300; #[derive(Deserialize)] @@ -163,12 +165,57 @@ struct MarketplaceExtensionManifest { agents: Vec, } +#[derive(Deserialize)] +struct AcpRegistryIndex { + #[serde(default)] + agents: Vec, +} + +#[derive(Deserialize)] +struct AcpRegistryAgent { + id: String, + name: String, + description: String, + icon: Option, + distribution: AcpRegistryDistribution, +} + +#[derive(Deserialize)] +struct AcpRegistryDistribution { + binary: Option>, + npx: Option, + uvx: Option, +} + +#[derive(Deserialize)] +struct AcpRegistryBinaryTarget { + archive: String, + cmd: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, +} + +#[derive(Deserialize)] +struct AcpRegistryPackageTarget { + package: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, +} + fn extensions_manifest_url() -> String { let base_url = std::env::var("ATHAS_EXTENSIONS_CDN_URL") .unwrap_or_else(|_| EXTENSIONS_CDN_BASE_URL.to_string()); format!("{}/manifests.json", base_url.trim_end_matches('/')) } +fn acp_registry_url() -> String { + std::env::var("ATHAS_ACP_REGISTRY_URL").unwrap_or_else(|_| ACP_REGISTRY_URL.to_string()) +} + fn current_platform_arch() -> Option<&'static str> { match (std::env::consts::OS, std::env::consts::ARCH) { ("macos", "aarch64") => Some("darwin-arm64"), @@ -181,6 +228,33 @@ fn current_platform_arch() -> Option<&'static str> { } } +fn current_acp_registry_platform() -> Option<&'static str> { + match (std::env::consts::OS, std::env::consts::ARCH) { + ("macos", "aarch64") => Some("darwin-aarch64"), + ("macos", "x86_64") => Some("darwin-x86_64"), + ("linux", "aarch64") => Some("linux-aarch64"), + ("linux", "x86_64") => Some("linux-x86_64"), + ("windows", "aarch64") => Some("windows-aarch64"), + ("windows", "x86_64") => Some("windows-x86_64"), + _ => None, + } +} + +fn registry_command_name(cmd: &str, fallback: &str) -> String { + Path::new(cmd) + .file_name() + .and_then(|name| name.to_str()) + .map(|name| { + if cfg!(windows) { + name.strip_suffix(".exe").unwrap_or(name).to_string() + } else { + name.to_string() + } + }) + .filter(|name| !name.is_empty()) + .unwrap_or_else(|| fallback.to_string()) +} + fn to_agent_config(contribution: MarketplaceAgentContribution) -> AgentConfig { let mut agent = AgentConfig { id: contribution.id, @@ -217,6 +291,115 @@ fn to_agent_config(contribution: MarketplaceAgentContribution) -> AgentConfig { agent } +fn acp_registry_agent_to_config(agent: AcpRegistryAgent) -> Option { + let AcpRegistryAgent { + id, + name, + description, + icon, + distribution, + } = agent; + + if let Some(target) = current_acp_registry_platform() + .and_then(|platform| distribution.binary.as_ref()?.get(platform)) + { + let binary_name = registry_command_name(&target.cmd, &id); + return Some(AgentConfig { + id, + name, + binary_name, + binary_path: None, + args: target.args.clone(), + env_vars: target.env.clone(), + icon, + description: Some(description), + installed: false, + install_runtime: Some(AgentRuntime::Binary), + install_package: Some(target.cmd.clone()), + install_download_url: Some(target.archive.clone()), + install_command: Some(registry_command_name(&target.cmd, "")), + can_install: true, + }); + } + + if let Some(target) = distribution.npx { + let mut args = vec!["-y".to_string(), target.package.clone()]; + args.extend(target.args.clone()); + return Some(AgentConfig { + id, + name, + binary_name: "npx".to_string(), + binary_path: None, + args, + env_vars: target.env, + icon, + description: Some(description), + installed: false, + install_runtime: Some(AgentRuntime::Node), + install_package: Some(target.package), + install_download_url: None, + install_command: None, + can_install: true, + }); + } + + if let Some(target) = distribution.uvx { + let mut args = vec![target.package]; + args.extend(target.args); + return Some(AgentConfig { + id, + name, + binary_name: "uvx".to_string(), + binary_path: None, + args, + env_vars: target.env, + icon, + description: Some(description), + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + }); + } + + None +} + +fn acp_registry_agents_from_index(index: AcpRegistryIndex) -> Vec { + let mut agents = index + .agents + .into_iter() + .filter_map(acp_registry_agent_to_config) + .collect::>(); + agents.sort_by_key(|agent| agent.name.clone()); + agents +} + +async fn load_acp_registry_agents() -> Result, String> { + let response = reqwest::Client::new() + .get(acp_registry_url()) + .timeout(Duration::from_secs(5)) + .send() + .await + .map_err(|error| format!("Failed to load ACP registry: {}", error))?; + + if !response.status().is_success() { + return Err(format!( + "Failed to load ACP registry: HTTP {}", + response.status() + )); + } + + let registry = response + .json::() + .await + .map_err(|error| format!("Invalid ACP registry: {}", error))?; + + Ok(acp_registry_agents_from_index(registry)) +} + async fn load_marketplace_agents() -> Result, String> { let cache = AGENT_CATALOG_CACHE.get_or_init(|| std::sync::Mutex::new(None)); { @@ -230,6 +413,26 @@ async fn load_marketplace_agents() -> Result, String> { } } + let agents = match load_acp_registry_agents().await { + Ok(agents) => agents, + Err(registry_error) => { + log::warn!("{}", registry_error); + load_legacy_marketplace_agents().await? + } + }; + + let mut cached = cache + .lock() + .map_err(|_| "Agent catalog cache poisoned".to_string())?; + *cached = Some(CachedAgentCatalog { + loaded_at: Instant::now(), + agents: agents.clone(), + }); + + Ok(agents) +} + +async fn load_legacy_marketplace_agents() -> Result, String> { let response = reqwest::Client::new() .get(extensions_manifest_url()) .timeout(Duration::from_secs(5)) @@ -256,14 +459,6 @@ async fn load_marketplace_agents() -> Result, String> { .collect::>(); agents.sort_by_key(|agent| agent.name.clone()); - let mut cached = cache - .lock() - .map_err(|_| "Agent catalog cache poisoned".to_string())?; - *cached = Some(CachedAgentCatalog { - loaded_at: Instant::now(), - agents: agents.clone(), - }); - Ok(agents) } @@ -534,3 +729,190 @@ fn make_wrapper_executable(path: &PathBuf) -> Result<(), String> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_registry(json: &str) -> Vec { + let index: AcpRegistryIndex = serde_json::from_str(json).expect("registry fixture"); + acp_registry_agents_from_index(index) + } + + #[test] + fn acp_registry_maps_binary_agent_for_current_platform() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "codex-acp", + "name": "Codex CLI", + "version": "0.14.0", + "description": "ACP adapter for OpenAI's coding assistant", + "distribution": { + "binary": { + "darwin-aarch64": { + "archive": "https://example.com/codex-aarch64.tar.gz", + "cmd": "./codex-acp", + "args": ["--acp"], + "env": { "CODEX_HOME": "/tmp/codex" } + }, + "darwin-x86_64": { + "archive": "https://example.com/codex-x64.tar.gz", + "cmd": "./codex-acp" + }, + "linux-x86_64": { + "archive": "https://example.com/codex-linux.tar.gz", + "cmd": "./codex-acp" + }, + "windows-x86_64": { + "archive": "https://example.com/codex.zip", + "cmd": "./codex-acp.exe" + } + } + }, + "icon": "https://example.com/codex.svg" + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "codex-acp") + .expect("codex agent"); + + assert_eq!(agent.name, "Codex CLI"); + assert_eq!(agent.install_runtime, Some(AgentRuntime::Binary)); + assert_eq!(agent.binary_name, "codex-acp"); + assert!(agent.can_install); + assert!(agent.install_download_url.is_some()); + assert_eq!(agent.icon.as_deref(), Some("https://example.com/codex.svg")); + } + + #[test] + fn acp_registry_maps_npx_agent_to_npx_launch() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "claude-acp", + "name": "Claude Agent", + "version": "0.33.1", + "description": "ACP wrapper for Claude", + "distribution": { + "npx": { + "package": "@agentclientprotocol/claude-agent-acp@0.33.1", + "args": ["--verbose"], + "env": { "ANTHROPIC_HOME": "/tmp/claude" } + } + } + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "claude-acp") + .expect("claude agent"); + + assert_eq!(agent.binary_name, "npx"); + assert_eq!(agent.install_runtime, Some(AgentRuntime::Node)); + assert_eq!( + agent.args, + vec![ + "-y".to_string(), + "@agentclientprotocol/claude-agent-acp@0.33.1".to_string(), + "--verbose".to_string() + ] + ); + assert_eq!( + agent.install_package.as_deref(), + Some("@agentclientprotocol/claude-agent-acp@0.33.1") + ); + assert!(agent.can_install); + } + + #[test] + fn acp_registry_includes_uvx_agents_without_managed_install_claim() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "fast-agent", + "name": "fast-agent", + "version": "0.7.0", + "description": "Code and build agents", + "distribution": { + "uvx": { + "package": "fast-agent-acp==0.7.0", + "args": ["-x"] + } + } + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "fast-agent") + .expect("uvx agent"); + + assert_eq!(agent.binary_name, "uvx"); + assert_eq!( + agent.args, + vec!["fast-agent-acp==0.7.0".to_string(), "-x".to_string()] + ); + assert_eq!(agent.install_runtime, None); + assert!(!agent.can_install); + } + + #[test] + fn acp_registry_prefers_current_platform_binary_over_npx() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "kilo", + "name": "Kilo", + "version": "7.2.40", + "description": "Kilo ACP", + "distribution": { + "binary": { + "darwin-aarch64": { + "archive": "https://example.com/kilo-aarch64.tar.gz", + "cmd": "./kilo" + }, + "darwin-x86_64": { + "archive": "https://example.com/kilo-x64.tar.gz", + "cmd": "./kilo" + }, + "linux-x86_64": { + "archive": "https://example.com/kilo-linux.tar.gz", + "cmd": "./kilo" + }, + "windows-x86_64": { + "archive": "https://example.com/kilo.zip", + "cmd": "kilo.exe" + } + }, + "npx": { + "package": "kilo-code@7.2.40" + } + } + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "kilo") + .expect("kilo agent"); + + assert_eq!(agent.install_runtime, Some(AgentRuntime::Binary)); + assert_eq!(agent.binary_name, "kilo"); + } +} From e5f5b77bb7a1c936b6fbe69e29a34572bfbc4597 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Sat, 9 May 2026 19:47:31 +0700 Subject: [PATCH 04/35] Preserve legacy ACP agents --- src-tauri/src/commands/ai/acp.rs | 172 ++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 27 deletions(-) diff --git a/src-tauri/src/commands/ai/acp.rs b/src-tauri/src/commands/ai/acp.rs index 09c0ee435..2c1d10332 100644 --- a/src-tauri/src/commands/ai/acp.rs +++ b/src-tauri/src/commands/ai/acp.rs @@ -32,15 +32,17 @@ pub struct PermissionResponseArgs { #[tauri::command] pub async fn get_available_agents( + app_handle: AppHandle, bridge: State<'_, AcpBridgeState>, ) -> Result, String> { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; Ok(bridge.detect_agents()) } #[tauri::command] pub async fn start_acp_agent( + app_handle: AppHandle, bridge: State<'_, AcpBridgeState>, agent_id: String, workspace_path: Option, @@ -48,7 +50,7 @@ pub async fn start_acp_agent( ) -> Result { let bridge = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; bridge.detect_agents(); bridge.clone() }; @@ -66,7 +68,7 @@ pub async fn install_acp_agent( ) -> Result { let agent = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; let agents = bridge.detect_agents(); agents .into_iter() @@ -99,7 +101,7 @@ pub async fn uninstall_acp_agent( ) -> Result { let agent = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; bridge.invalidate_agent_detection_cache(); let agents = bridge.detect_agents(); agents @@ -335,11 +337,11 @@ fn acp_registry_agent_to_config(agent: AcpRegistryAgent) -> Option icon, description: Some(description), installed: false, - install_runtime: Some(AgentRuntime::Node), - install_package: Some(target.package), + install_runtime: None, + install_package: None, install_download_url: None, install_command: None, - can_install: true, + can_install: false, }); } @@ -377,7 +379,39 @@ fn acp_registry_agents_from_index(index: AcpRegistryIndex) -> Vec { agents } -async fn load_acp_registry_agents() -> Result, String> { +fn acp_registry_cache_path(app_handle: &AppHandle) -> Result { + app_handle + .path() + .app_data_dir() + .map(|dir| dir.join("acp-registry").join("registry.json")) + .map_err(|error| format!("Failed to resolve ACP registry cache path: {}", error)) +} + +fn acp_registry_agents_from_json(json: &str) -> Result, String> { + let registry = serde_json::from_str::(json) + .map_err(|error| format!("Invalid ACP registry: {}", error))?; + Ok(acp_registry_agents_from_index(registry)) +} + +fn load_cached_acp_registry_agents(app_handle: &AppHandle) -> Result, String> { + let cache_path = acp_registry_cache_path(app_handle)?; + let json = fs::read_to_string(&cache_path) + .map_err(|error| format!("Failed to read cached ACP registry: {}", error))?; + acp_registry_agents_from_json(&json) + .map_err(|error| format!("Invalid cached ACP registry: {}", error)) +} + +fn write_acp_registry_cache(app_handle: &AppHandle, json: &str) -> Result<(), String> { + let cache_path = acp_registry_cache_path(app_handle)?; + if let Some(parent) = cache_path.parent() { + fs::create_dir_all(parent) + .map_err(|error| format!("Failed to create ACP registry cache directory: {}", error))?; + } + fs::write(&cache_path, json) + .map_err(|error| format!("Failed to write ACP registry cache: {}", error)) +} + +async fn load_acp_registry_agents(app_handle: &AppHandle) -> Result, String> { let response = reqwest::Client::new() .get(acp_registry_url()) .timeout(Duration::from_secs(5)) @@ -392,15 +426,51 @@ async fn load_acp_registry_agents() -> Result, String> { )); } - let registry = response - .json::() + let json = response + .text() .await - .map_err(|error| format!("Invalid ACP registry: {}", error))?; + .map_err(|error| format!("Failed to read ACP registry response: {}", error))?; - Ok(acp_registry_agents_from_index(registry)) + let agents = acp_registry_agents_from_json(&json)?; + if let Err(error) = write_acp_registry_cache(app_handle, &json) { + log::warn!("{}", error); + } + + Ok(agents) +} + +async fn load_preferred_registry_agents( + app_handle: &AppHandle, +) -> Result, String> { + match load_acp_registry_agents(app_handle).await { + Ok(agents) => Ok(agents), + Err(registry_error) => { + log::warn!("{}", registry_error); + load_cached_acp_registry_agents(app_handle).map_err(|cache_error| { + log::warn!("{}", cache_error); + registry_error + }) + } + } } -async fn load_marketplace_agents() -> Result, String> { +fn merge_agent_catalogs( + mut preferred_agents: Vec, + fallback_agents: Vec, +) -> Vec { + for agent in fallback_agents { + if !preferred_agents + .iter() + .any(|preferred| preferred.id == agent.id) + { + preferred_agents.push(agent); + } + } + preferred_agents.sort_by_key(|agent| agent.name.clone()); + preferred_agents +} + +async fn load_marketplace_agents(app_handle: &AppHandle) -> Result, String> { let cache = AGENT_CATALOG_CACHE.get_or_init(|| std::sync::Mutex::new(None)); { let cached = cache @@ -413,11 +483,25 @@ async fn load_marketplace_agents() -> Result, String> { } } - let agents = match load_acp_registry_agents().await { - Ok(agents) => agents, - Err(registry_error) => { + let registry_agents = load_preferred_registry_agents(app_handle).await; + let legacy_agents = load_legacy_marketplace_agents().await; + let agents = match (registry_agents, legacy_agents) { + (Ok(registry_agents), Ok(legacy_agents)) => { + merge_agent_catalogs(registry_agents, legacy_agents) + } + (Ok(registry_agents), Err(legacy_error)) => { + log::warn!("{}", legacy_error); + registry_agents + } + (Err(registry_error), Ok(legacy_agents)) => { log::warn!("{}", registry_error); - load_legacy_marketplace_agents().await? + legacy_agents + } + (Err(registry_error), Err(legacy_error)) => { + return Err(format!( + "{}; legacy agent catalog also failed: {}", + registry_error, legacy_error + )); } }; @@ -462,8 +546,8 @@ async fn load_legacy_marketplace_agents() -> Result, String> { Ok(agents) } -async fn refresh_registered_agents(bridge: &mut AcpAgentBridge) { - match load_marketplace_agents().await { +async fn refresh_registered_agents(app_handle: &AppHandle, bridge: &mut AcpAgentBridge) { + match load_marketplace_agents(app_handle).await { Ok(agents) => bridge.replace_registered_agents(agents), Err(error) => { log::warn!("{}", error); @@ -735,8 +819,26 @@ mod tests { use super::*; fn parse_registry(json: &str) -> Vec { - let index: AcpRegistryIndex = serde_json::from_str(json).expect("registry fixture"); - acp_registry_agents_from_index(index) + acp_registry_agents_from_json(json).expect("registry fixture") + } + + fn test_agent(id: &str, name: &str) -> AgentConfig { + AgentConfig { + id: id.to_string(), + name: name.to_string(), + binary_name: id.to_string(), + binary_path: None, + args: vec![], + env_vars: HashMap::new(), + icon: None, + description: None, + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + } } #[test] @@ -818,7 +920,7 @@ mod tests { .expect("claude agent"); assert_eq!(agent.binary_name, "npx"); - assert_eq!(agent.install_runtime, Some(AgentRuntime::Node)); + assert_eq!(agent.install_runtime, None); assert_eq!( agent.args, vec![ @@ -827,11 +929,8 @@ mod tests { "--verbose".to_string() ] ); - assert_eq!( - agent.install_package.as_deref(), - Some("@agentclientprotocol/claude-agent-acp@0.33.1") - ); - assert!(agent.can_install); + assert_eq!(agent.install_package.as_deref(), None); + assert!(!agent.can_install); } #[test] @@ -915,4 +1014,23 @@ mod tests { assert_eq!(agent.install_runtime, Some(AgentRuntime::Binary)); assert_eq!(agent.binary_name, "kilo"); } + + #[test] + fn merge_agent_catalogs_preserves_legacy_agents() { + let agents = merge_agent_catalogs( + vec![test_agent("codex-acp", "Codex Registry")], + vec![ + test_agent("codex-acp", "Codex Legacy"), + test_agent("athas-legacy", "Athas Legacy"), + ], + ); + + assert_eq!(agents.len(), 2); + assert!(agents.iter().any(|agent| agent.id == "athas-legacy")); + let codex = agents + .iter() + .find(|agent| agent.id == "codex-acp") + .expect("codex entry"); + assert_eq!(codex.name, "Codex Registry"); + } } From 6c281b17a0b373e19ffed33b8ea054fd636a5f7a Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Sat, 9 May 2026 20:21:36 +0700 Subject: [PATCH 05/35] Align ACP agent catalog behavior Merge registry agents ahead of the legacy Athas marketplace catalog while preserving legacy-only entries as fallback options. Add agentServers settings support for custom agents and registry env/default overrides, and apply configured defaults through ACP session controls when available. Cover catalog precedence, custom agent mapping, env/default overrides, malformed local entries, and settings import/normalization with focused tests. --- crates/ai/src/acp/bridge_init.rs | 133 +++- crates/ai/src/acp/config.rs | 8 + crates/ai/src/acp/types.rs | 4 + src-tauri/src/commands/ai/acp.rs | 651 +++++++++++++++++- src/features/ai/types/acp.ts | 2 + .../settings/config/default-settings.ts | 4 + .../settings/lib/settings-normalization.ts | 87 +++ .../tests/settings-import-export.test.ts | 24 + .../tests/settings-normalization.test.ts | 47 ++ src/features/settings/types/settings.ts | 17 + 10 files changed, 958 insertions(+), 19 deletions(-) diff --git a/crates/ai/src/acp/bridge_init.rs b/crates/ai/src/acp/bridge_init.rs index 921ce4f43..730c90e03 100644 --- a/crates/ai/src/acp/bridge_init.rs +++ b/crates/ai/src/acp/bridge_init.rs @@ -104,6 +104,8 @@ pub(super) async fn initialize_worker( SessionBootstrapContext { auth_methods, supports_session_resume, + default_mode: config.default_mode.clone(), + default_model: config.default_model.clone(), map_config_options, child: &mut child, io_handle: &io_handle, @@ -143,6 +145,8 @@ where { auth_methods: Vec, supports_session_resume: bool, + default_mode: Option, + default_model: Option, map_config_options: F, child: &'a mut Child, io_handle: &'a tokio::task::JoinHandle<()>, @@ -340,6 +344,14 @@ async fn bootstrap_session( match load_result { Ok(Ok(load_response)) => { + apply_session_defaults( + connection.clone(), + acp::SessionId::new(existing_session_id.clone()), + ctx.default_mode.as_deref(), + ctx.default_model.as_deref(), + load_response.config_options.as_ref(), + ) + .await; log::info!("ACP session loaded: {}", existing_session_id); client.set_session_id(existing_session_id.clone()).await; return Ok(SessionBootstrap { @@ -375,6 +387,14 @@ async fn bootstrap_session( match resume_result { Ok(Ok(resume_response)) => { + apply_session_defaults( + connection.clone(), + acp::SessionId::new(existing_session_id.clone()), + ctx.default_mode.as_deref(), + ctx.default_model.as_deref(), + resume_response.config_options.as_ref(), + ) + .await; log::info!("ACP session resumed: {}", existing_session_id); client.set_session_id(existing_session_id.clone()).await; return Ok(SessionBootstrap { @@ -478,6 +498,14 @@ async fn bootstrap_session( }; log::info!("ACP session created: {}", session.session_id); + apply_session_defaults( + connection.clone(), + session.session_id.clone(), + ctx.default_mode.as_deref(), + ctx.default_model.as_deref(), + session.config_options.as_ref(), + ) + .await; client.set_session_id(session.session_id.to_string()).await; Ok(SessionBootstrap { @@ -491,7 +519,7 @@ async fn create_session( connection: Arc, cwd: PathBuf, ) -> Result, tokio::time::error::Elapsed> { - let session_request = acp::NewSessionRequest::new(cwd); + let session_request = new_session_request(cwd); tokio::time::timeout( std::time::Duration::from_secs(30), connection.new_session(session_request), @@ -504,7 +532,7 @@ async fn load_session( cwd: PathBuf, existing_session_id: String, ) -> Result, tokio::time::error::Elapsed> { - let request = acp::LoadSessionRequest::new(existing_session_id, cwd); + let request = load_session_request(existing_session_id, cwd); tokio::time::timeout( std::time::Duration::from_secs(30), connection.load_session(request), @@ -517,7 +545,7 @@ async fn resume_session( cwd: PathBuf, existing_session_id: String, ) -> Result, tokio::time::error::Elapsed> { - let request = acp::ResumeSessionRequest::new(existing_session_id, cwd); + let request = resume_session_request(existing_session_id, cwd); tokio::time::timeout( std::time::Duration::from_secs(30), connection.resume_session(request), @@ -525,6 +553,74 @@ async fn resume_session( .await } +fn new_session_request(cwd: PathBuf) -> acp::NewSessionRequest { + acp::NewSessionRequest::new(cwd) +} + +fn load_session_request(existing_session_id: String, cwd: PathBuf) -> acp::LoadSessionRequest { + acp::LoadSessionRequest::new(existing_session_id, cwd) +} + +fn resume_session_request(existing_session_id: String, cwd: PathBuf) -> acp::ResumeSessionRequest { + acp::ResumeSessionRequest::new(existing_session_id, cwd) +} + +async fn apply_session_defaults( + connection: Arc, + session_id: acp::SessionId, + default_mode: Option<&str>, + default_model: Option<&str>, + config_options: Option<&Vec>, +) { + if let Some(mode_id) = default_mode.filter(|mode| !mode.trim().is_empty()) + && let Err(error) = connection + .set_session_mode(acp::SetSessionModeRequest::new( + session_id.clone(), + mode_id.to_string(), + )) + .await + { + log::warn!("Failed to apply ACP default mode '{}': {}", mode_id, error); + } + + let Some(model_id) = default_model.filter(|model| !model.trim().is_empty()) else { + return; + }; + let Some(config_id) = model_config_option_id(config_options) else { + log::debug!( + "ACP default model '{}' configured, but the agent did not expose a model config option", + model_id + ); + return; + }; + + if let Err(error) = connection + .set_session_config_option(acp::SetSessionConfigOptionRequest::new( + session_id, + config_id, + model_id.to_string(), + )) + .await + { + log::warn!( + "Failed to apply ACP default model '{}': {}", + model_id, + error + ); + } +} + +fn model_config_option_id( + config_options: Option<&Vec>, +) -> Option { + config_options? + .iter() + .find(|option| { + option.id.to_string() == "model" || option.category.as_deref() == Some("model") + }) + .map(|option| option.id.to_string()) +} + fn map_mode_state(modes: acp::SessionModeState) -> SessionModeState { SessionModeState { current_mode_id: Some(modes.current_mode_id.to_string()), @@ -570,3 +666,34 @@ fn emit_initial_session_state( log::warn!("Failed to emit initial session config options: {}", e); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_session_request_sets_cwd() { + let request = new_session_request(PathBuf::from("/repo")); + + assert_eq!(request.cwd, PathBuf::from("/repo")); + assert!(request.mcp_servers.is_empty()); + } + + #[test] + fn load_session_request_sets_session_and_cwd() { + let request = load_session_request("session-1".to_string(), PathBuf::from("/repo")); + + assert_eq!(request.session_id, acp::SessionId::new("session-1")); + assert_eq!(request.cwd, PathBuf::from("/repo")); + assert!(request.mcp_servers.is_empty()); + } + + #[test] + fn resume_session_request_sets_session_and_cwd() { + let request = resume_session_request("session-1".to_string(), PathBuf::from("/repo")); + + assert_eq!(request.session_id, acp::SessionId::new("session-1")); + assert_eq!(request.cwd, PathBuf::from("/repo")); + assert!(request.mcp_servers.is_empty()); + } +} diff --git a/crates/ai/src/acp/config.rs b/crates/ai/src/acp/config.rs index 44cab9288..f66908c49 100644 --- a/crates/ai/src/acp/config.rs +++ b/crates/ai/src/acp/config.rs @@ -94,6 +94,14 @@ impl AgentRegistry { continue; } + if let Some(path) = config.binary_path.as_ref().map(PathBuf::from) + && path.is_file() + { + config.installed = true; + config.binary_path = Some(path.to_string_lossy().to_string()); + continue; + } + if let Some(path) = find_binary(&config.binary_name) { config.installed = true; config.binary_path = Some(path.to_string_lossy().to_string()); diff --git a/crates/ai/src/acp/types.rs b/crates/ai/src/acp/types.rs index c6e9af22a..33c40c430 100644 --- a/crates/ai/src/acp/types.rs +++ b/crates/ai/src/acp/types.rs @@ -204,6 +204,8 @@ pub struct AgentConfig { pub binary_path: Option, pub args: Vec, pub env_vars: HashMap, + pub default_mode: Option, + pub default_model: Option, pub icon: Option, pub description: Option, pub installed: bool, @@ -224,6 +226,8 @@ impl AgentConfig { binary_path: None, args: Vec::new(), env_vars: HashMap::new(), + default_mode: None, + default_model: None, icon: None, description: None, installed: false, diff --git a/src-tauri/src/commands/ai/acp.rs b/src-tauri/src/commands/ai/acp.rs index 7f0021d9b..7c0f48e21 100644 --- a/src-tauri/src/commands/ai/acp.rs +++ b/src-tauri/src/commands/ai/acp.rs @@ -11,10 +11,13 @@ use std::{ time::{Duration, Instant}, }; use tauri::{Manager, State}; +use tauri_plugin_store::StoreExt; use tokio::sync::Mutex; pub type AcpBridgeState = Arc>; const EXTENSIONS_CDN_BASE_URL: &str = "https://athas.dev/extensions"; +const ACP_REGISTRY_URL: &str = + "https://cdn.agentclientprotocol.com/registry/v1/latest/registry.json"; const AGENT_CATALOG_CACHE_SECONDS: u64 = 300; #[derive(Deserialize)] @@ -30,15 +33,17 @@ pub struct PermissionResponseArgs { #[tauri::command] pub async fn get_available_agents( + app_handle: AppHandle, bridge: State<'_, AcpBridgeState>, ) -> Result, String> { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; Ok(bridge.detect_agents()) } #[tauri::command] pub async fn start_acp_agent( + app_handle: AppHandle, bridge: State<'_, AcpBridgeState>, agent_id: String, workspace_path: Option, @@ -46,7 +51,7 @@ pub async fn start_acp_agent( ) -> Result { let bridge = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; bridge.detect_agents(); bridge.clone() }; @@ -64,7 +69,7 @@ pub async fn install_acp_agent( ) -> Result { let agent = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; let agents = bridge.detect_agents(); agents .into_iter() @@ -97,7 +102,7 @@ pub async fn uninstall_acp_agent( ) -> Result { let agent = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; bridge.invalidate_agent_detection_cache(); let agents = bridge.detect_agents(); agents @@ -163,12 +168,81 @@ struct MarketplaceExtensionManifest { agents: Vec, } +#[derive(Deserialize)] +struct AcpRegistryIndex { + #[serde(default)] + agents: Vec, +} + +#[derive(Deserialize)] +struct AcpRegistryAgent { + id: String, + name: String, + description: String, + icon: Option, + distribution: AcpRegistryDistribution, +} + +#[derive(Deserialize)] +struct AcpRegistryDistribution { + binary: Option>, + npx: Option, + uvx: Option, +} + +#[derive(Deserialize)] +struct AcpRegistryBinaryTarget { + archive: String, + cmd: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, +} + +#[derive(Deserialize)] +struct AcpRegistryPackageTarget { + package: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(tag = "type", rename_all = "camelCase")] +enum AcpAgentServerSetting { + Custom { + command: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, + #[serde(default, alias = "defaultMode")] + default_mode: Option, + #[serde(default, alias = "defaultModel")] + default_model: Option, + }, + Registry { + #[serde(default)] + env: HashMap, + #[serde(default, alias = "defaultMode")] + default_mode: Option, + #[serde(default, alias = "defaultModel")] + default_model: Option, + }, +} + fn extensions_manifest_url() -> String { let base_url = std::env::var("ATHAS_EXTENSIONS_CDN_URL") .unwrap_or_else(|_| EXTENSIONS_CDN_BASE_URL.to_string()); format!("{}/manifests.json", base_url.trim_end_matches('/')) } +fn acp_registry_url() -> String { + std::env::var("ATHAS_ACP_REGISTRY_URL").unwrap_or_else(|_| ACP_REGISTRY_URL.to_string()) +} + fn current_platform_arch() -> Option<&'static str> { match (std::env::consts::OS, std::env::consts::ARCH) { ("macos", "aarch64") => Some("darwin-arm64"), @@ -181,6 +255,33 @@ fn current_platform_arch() -> Option<&'static str> { } } +fn current_acp_registry_platform() -> Option<&'static str> { + match (std::env::consts::OS, std::env::consts::ARCH) { + ("macos", "aarch64") => Some("darwin-aarch64"), + ("macos", "x86_64") => Some("darwin-x86_64"), + ("linux", "aarch64") => Some("linux-aarch64"), + ("linux", "x86_64") => Some("linux-x86_64"), + ("windows", "aarch64") => Some("windows-aarch64"), + ("windows", "x86_64") => Some("windows-x86_64"), + _ => None, + } +} + +fn registry_command_name(cmd: &str, fallback: &str) -> String { + Path::new(cmd) + .file_name() + .and_then(|name| name.to_str()) + .map(|name| { + if cfg!(windows) { + name.strip_suffix(".exe").unwrap_or(name).to_string() + } else { + name.to_string() + } + }) + .filter(|name| !name.is_empty()) + .unwrap_or_else(|| fallback.to_string()) +} + fn to_agent_config(contribution: MarketplaceAgentContribution) -> AgentConfig { let mut agent = AgentConfig { id: contribution.id, @@ -189,6 +290,8 @@ fn to_agent_config(contribution: MarketplaceAgentContribution) -> AgentConfig { binary_path: None, args: contribution.args, env_vars: contribution.env_vars, + default_mode: None, + default_model: None, icon: contribution.icon, description: contribution.description, installed: false, @@ -217,7 +320,304 @@ fn to_agent_config(contribution: MarketplaceAgentContribution) -> AgentConfig { agent } -async fn load_marketplace_agents() -> Result, String> { +fn acp_registry_agent_to_config(agent: AcpRegistryAgent) -> Option { + let AcpRegistryAgent { + id, + name, + description, + icon, + distribution, + } = agent; + + if let Some(target) = current_acp_registry_platform() + .and_then(|platform| distribution.binary.as_ref()?.get(platform)) + { + let binary_name = registry_command_name(&target.cmd, &id); + return Some(AgentConfig { + id, + name, + binary_name, + binary_path: None, + args: target.args.clone(), + env_vars: target.env.clone(), + default_mode: None, + default_model: None, + icon, + description: Some(description), + installed: false, + install_runtime: Some(AgentRuntime::Binary), + install_package: Some(target.cmd.clone()), + install_download_url: Some(target.archive.clone()), + install_command: Some(registry_command_name(&target.cmd, "")), + can_install: true, + }); + } + + if let Some(target) = distribution.npx { + let mut args = vec!["-y".to_string(), target.package.clone()]; + args.extend(target.args.clone()); + return Some(AgentConfig { + id, + name, + binary_name: "npx".to_string(), + binary_path: None, + args, + env_vars: target.env, + default_mode: None, + default_model: None, + icon, + description: Some(description), + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + }); + } + + if let Some(target) = distribution.uvx { + let mut args = vec![target.package]; + args.extend(target.args); + return Some(AgentConfig { + id, + name, + binary_name: "uvx".to_string(), + binary_path: None, + args, + env_vars: target.env, + default_mode: None, + default_model: None, + icon, + description: Some(description), + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + }); + } + + None +} + +fn acp_registry_agents_from_index(index: AcpRegistryIndex) -> Vec { + let mut agents = index + .agents + .into_iter() + .filter_map(acp_registry_agent_to_config) + .collect::>(); + agents.sort_by_key(|agent| agent.name.clone()); + agents +} + +fn acp_registry_cache_path(app_handle: &AppHandle) -> Result { + app_handle + .path() + .app_data_dir() + .map(|dir| dir.join("acp-registry").join("registry.json")) + .map_err(|error| format!("Failed to resolve ACP registry cache path: {}", error)) +} + +fn acp_registry_agents_from_json(json: &str) -> Result, String> { + let registry = serde_json::from_str::(json) + .map_err(|error| format!("Invalid ACP registry: {}", error))?; + Ok(acp_registry_agents_from_index(registry)) +} + +fn load_cached_acp_registry_agents(app_handle: &AppHandle) -> Result, String> { + let cache_path = acp_registry_cache_path(app_handle)?; + let json = fs::read_to_string(&cache_path) + .map_err(|error| format!("Failed to read cached ACP registry: {}", error))?; + acp_registry_agents_from_json(&json) + .map_err(|error| format!("Invalid cached ACP registry: {}", error)) +} + +fn write_acp_registry_cache(app_handle: &AppHandle, json: &str) -> Result<(), String> { + let cache_path = acp_registry_cache_path(app_handle)?; + if let Some(parent) = cache_path.parent() { + fs::create_dir_all(parent) + .map_err(|error| format!("Failed to create ACP registry cache directory: {}", error))?; + } + fs::write(&cache_path, json) + .map_err(|error| format!("Failed to write ACP registry cache: {}", error)) +} + +async fn load_acp_registry_agents(app_handle: &AppHandle) -> Result, String> { + let response = reqwest::Client::new() + .get(acp_registry_url()) + .timeout(Duration::from_secs(5)) + .send() + .await + .map_err(|error| format!("Failed to load ACP registry: {}", error))?; + + if !response.status().is_success() { + return Err(format!( + "Failed to load ACP registry: HTTP {}", + response.status() + )); + } + + let json = response + .text() + .await + .map_err(|error| format!("Failed to read ACP registry response: {}", error))?; + + let agents = acp_registry_agents_from_json(&json)?; + if let Err(error) = write_acp_registry_cache(app_handle, &json) { + log::warn!("{}", error); + } + + Ok(agents) +} + +async fn load_preferred_registry_agents( + app_handle: &AppHandle, +) -> Result, String> { + match load_acp_registry_agents(app_handle).await { + Ok(agents) => Ok(agents), + Err(registry_error) => { + log::warn!("{}", registry_error); + load_cached_acp_registry_agents(app_handle).map_err(|cache_error| { + log::warn!("{}", cache_error); + registry_error + }) + } + } +} + +fn merge_agent_catalogs( + mut preferred_agents: Vec, + fallback_agents: Vec, +) -> Vec { + for agent in fallback_agents { + if !preferred_agents + .iter() + .any(|preferred| preferred.id == agent.id) + { + preferred_agents.push(agent); + } + } + preferred_agents.sort_by_key(|agent| agent.name.clone()); + preferred_agents +} + +fn apply_agent_server_settings( + mut agents: Vec, + settings: HashMap, +) -> Vec { + for (id, setting) in settings { + match setting { + AcpAgentServerSetting::Custom { + command, + args, + env, + default_mode, + default_model, + } => { + if command.trim().is_empty() { + log::warn!("Skipping custom ACP agent '{}' with an empty command", id); + continue; + } + let binary_name = registry_command_name(&command, &id); + let custom_agent = AgentConfig { + id: id.clone(), + name: id, + binary_name, + binary_path: Some(expand_home(&command)), + args, + env_vars: env, + default_mode: clean_setting(default_mode), + default_model: clean_setting(default_model), + icon: None, + description: Some("Custom ACP agent from Athas settings".to_string()), + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + }; + upsert_agent(&mut agents, custom_agent); + } + AcpAgentServerSetting::Registry { + env, + default_mode, + default_model, + } => { + let Some(agent) = agents + .iter_mut() + .find(|agent| agent.id == id || agent.name == id) + else { + log::debug!("Configured ACP registry agent '{}' was not found", id); + continue; + }; + agent.env_vars.extend(env); + agent.default_mode = clean_setting(default_mode); + agent.default_model = clean_setting(default_model); + } + } + } + + agents.sort_by_key(|agent| agent.name.clone()); + agents +} + +fn clean_setting(value: Option) -> Option { + value + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) +} + +fn expand_home(path: &str) -> String { + if path == "~" { + return std::env::var("HOME").unwrap_or_else(|_| path.to_string()); + } + if let Some(rest) = path.strip_prefix("~/") { + if let Ok(home) = std::env::var("HOME") { + return Path::new(&home).join(rest).to_string_lossy().to_string(); + } + } + path.to_string() +} + +fn upsert_agent(agents: &mut Vec, agent: AgentConfig) { + if let Some(existing) = agents.iter_mut().find(|existing| existing.id == agent.id) { + *existing = agent; + } else { + agents.push(agent); + } +} + +fn load_agent_server_settings( + app_handle: &AppHandle, +) -> Result, String> { + let store = app_handle + .store("settings.json") + .map_err(|error| format!("Failed to load settings store: {}", error))?; + let Some(value) = store.get("agentServers") else { + return Ok(HashMap::new()); + }; + let raw_settings = serde_json::from_value::>(value) + .map_err(|error| format!("Invalid agentServers settings: {}", error))?; + let mut settings = HashMap::new(); + + for (id, value) in raw_settings { + match serde_json::from_value::(value) { + Ok(setting) => { + settings.insert(id, setting); + } + Err(error) => { + log::warn!("Skipping invalid ACP agent setting '{}': {}", id, error); + } + } + } + + Ok(settings) +} + +async fn load_marketplace_agents(app_handle: &AppHandle) -> Result, String> { let cache = AGENT_CATALOG_CACHE.get_or_init(|| std::sync::Mutex::new(None)); { let cached = cache @@ -226,10 +626,55 @@ async fn load_marketplace_agents() -> Result, String> { if let Some(catalog) = cached.as_ref() && catalog.loaded_at.elapsed() < Duration::from_secs(AGENT_CATALOG_CACHE_SECONDS) { - return Ok(catalog.agents.clone()); + let agent_settings = load_agent_server_settings(app_handle).map_err(|error| { + log::warn!("{}", error); + error + })?; + return Ok(apply_agent_server_settings( + catalog.agents.clone(), + agent_settings, + )); } } + let registry_agents = load_preferred_registry_agents(app_handle).await; + let legacy_agents = load_legacy_marketplace_agents().await; + let agents = match (registry_agents, legacy_agents) { + (Ok(registry_agents), Ok(legacy_agents)) => { + merge_agent_catalogs(registry_agents, legacy_agents) + } + (Ok(registry_agents), Err(legacy_error)) => { + log::warn!("{}", legacy_error); + registry_agents + } + (Err(registry_error), Ok(legacy_agents)) => { + log::warn!("{}", registry_error); + legacy_agents + } + (Err(registry_error), Err(legacy_error)) => { + return Err(format!( + "{}; legacy agent catalog also failed: {}", + registry_error, legacy_error + )); + } + }; + + let mut cached = cache + .lock() + .map_err(|_| "Agent catalog cache poisoned".to_string())?; + *cached = Some(CachedAgentCatalog { + loaded_at: Instant::now(), + agents: agents.clone(), + }); + + let agent_settings = load_agent_server_settings(app_handle).map_err(|error| { + log::warn!("{}", error); + error + })?; + Ok(apply_agent_server_settings(agents, agent_settings)) +} + +async fn load_legacy_marketplace_agents() -> Result, String> { let response = reqwest::Client::new() .get(extensions_manifest_url()) .timeout(Duration::from_secs(5)) @@ -256,19 +701,11 @@ async fn load_marketplace_agents() -> Result, String> { .collect::>(); agents.sort_by_key(|agent| agent.name.clone()); - let mut cached = cache - .lock() - .map_err(|_| "Agent catalog cache poisoned".to_string())?; - *cached = Some(CachedAgentCatalog { - loaded_at: Instant::now(), - agents: agents.clone(), - }); - Ok(agents) } -async fn refresh_registered_agents(bridge: &mut AcpAgentBridge) { - match load_marketplace_agents().await { +async fn refresh_registered_agents(app_handle: &AppHandle, bridge: &mut AcpAgentBridge) { + match load_marketplace_agents(app_handle).await { Ok(agents) => bridge.replace_registered_agents(agents), Err(error) => { log::warn!("{}", error); @@ -534,3 +971,185 @@ fn make_wrapper_executable(path: &PathBuf) -> Result<(), String> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + fn agent(id: &str, name: &str, binary_name: &str) -> AgentConfig { + AgentConfig { + id: id.to_string(), + name: name.to_string(), + binary_name: binary_name.to_string(), + binary_path: None, + args: Vec::new(), + env_vars: HashMap::new(), + default_mode: None, + default_model: None, + icon: None, + description: None, + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + } + } + + #[test] + fn merge_agent_catalogs_prefers_registry_and_preserves_legacy_only_agents() { + let mut registry = agent("codex-acp", "Codex Registry", "npx"); + registry.args = vec!["-y".to_string(), "@vendor/codex-acp".to_string()]; + let legacy = agent("codex-acp", "Codex Legacy", "codex-acp"); + let legacy_only = agent("athas-local", "Athas Local", "athas-local"); + + let merged = merge_agent_catalogs(vec![registry], vec![legacy, legacy_only]); + + assert_eq!(merged.len(), 2); + let codex = merged + .iter() + .find(|candidate| candidate.id == "codex-acp") + .expect("codex agent"); + assert_eq!(codex.name, "Codex Registry"); + assert_eq!(codex.binary_name, "npx"); + assert!(merged.iter().any(|candidate| candidate.id == "athas-local")); + } + + #[test] + fn registry_settings_override_env_and_defaults_without_dropping_agent() { + let mut base = agent("codex-acp", "Codex", "npx"); + base.env_vars.insert( + "BASE_URL".to_string(), + "https://registry.example".to_string(), + ); + base + .env_vars + .insert("KEEP_ME".to_string(), "registry".to_string()); + + let mut env = HashMap::new(); + env.insert("BASE_URL".to_string(), "https://user.example".to_string()); + env.insert("USER_ONLY".to_string(), "true".to_string()); + let mut settings = HashMap::new(); + settings.insert( + "codex-acp".to_string(), + AcpAgentServerSetting::Registry { + env, + default_mode: Some("plan".to_string()), + default_model: Some("gpt-5.5".to_string()), + }, + ); + + let agents = apply_agent_server_settings(vec![base], settings); + let codex = agents.first().expect("codex agent"); + + assert_eq!( + codex.env_vars.get("BASE_URL").map(String::as_str), + Some("https://user.example") + ); + assert_eq!( + codex.env_vars.get("KEEP_ME").map(String::as_str), + Some("registry") + ); + assert_eq!( + codex.env_vars.get("USER_ONLY").map(String::as_str), + Some("true") + ); + assert_eq!(codex.default_mode.as_deref(), Some("plan")); + assert_eq!(codex.default_model.as_deref(), Some("gpt-5.5")); + } + + #[test] + fn custom_agent_settings_create_runnable_agent_config() { + let mut env = HashMap::new(); + env.insert("CUSTOM_TOKEN".to_string(), "secret".to_string()); + let mut settings = HashMap::new(); + settings.insert( + "my-agent".to_string(), + AcpAgentServerSetting::Custom { + command: "/usr/local/bin/my-agent".to_string(), + args: vec!["--acp".to_string()], + env, + default_mode: Some(" act ".to_string()), + default_model: Some(" custom-model ".to_string()), + }, + ); + + let agents = apply_agent_server_settings(Vec::new(), settings); + let custom = agents.first().expect("custom agent"); + + assert_eq!(custom.id, "my-agent"); + assert_eq!(custom.name, "my-agent"); + assert_eq!(custom.binary_name, "my-agent"); + assert_eq!( + custom.binary_path.as_deref(), + Some("/usr/local/bin/my-agent") + ); + assert_eq!(custom.args, vec!["--acp"]); + assert_eq!( + custom.env_vars.get("CUSTOM_TOKEN").map(String::as_str), + Some("secret") + ); + assert_eq!(custom.default_mode.as_deref(), Some("act")); + assert_eq!(custom.default_model.as_deref(), Some("custom-model")); + assert!(!custom.can_install); + } + + #[test] + fn malformed_custom_agent_settings_are_skipped() { + let mut settings = HashMap::new(); + settings.insert( + "broken".to_string(), + AcpAgentServerSetting::Custom { + command: " ".to_string(), + args: Vec::new(), + env: HashMap::new(), + default_mode: None, + default_model: None, + }, + ); + + let agents = apply_agent_server_settings(Vec::new(), settings); + + assert!(agents.is_empty()); + } + + #[test] + fn acp_registry_json_maps_npx_distribution() { + let json = r#"{ + "agents": [ + { + "id": "codex-acp", + "name": "Codex", + "description": "Codex ACP adapter", + "icon": "codex.svg", + "distribution": { + "npx": { + "package": "@vendor/codex-acp", + "args": ["--flag"], + "env": { "REGISTRY_ENV": "1" } + } + } + } + ] + }"#; + + let agents = acp_registry_agents_from_json(json).expect("registry agents"); + let codex = agents.first().expect("codex agent"); + + assert_eq!(codex.id, "codex-acp"); + assert_eq!(codex.binary_name, "npx"); + assert_eq!( + codex.args, + vec![ + "-y".to_string(), + "@vendor/codex-acp".to_string(), + "--flag".to_string() + ] + ); + assert_eq!( + codex.env_vars.get("REGISTRY_ENV").map(String::as_str), + Some("1") + ); + } +} diff --git a/src/features/ai/types/acp.ts b/src/features/ai/types/acp.ts index aea4140de..40e06fdac 100644 --- a/src/features/ai/types/acp.ts +++ b/src/features/ai/types/acp.ts @@ -7,6 +7,8 @@ export interface AgentConfig { binaryPath: string | null; args: string[]; envVars: Record; + defaultMode: string | null; + defaultModel: string | null; icon: string | null; description: string | null; installed: boolean; diff --git a/src/features/settings/config/default-settings.ts b/src/features/settings/config/default-settings.ts index 6b047292f..367aee192 100644 --- a/src/features/settings/config/default-settings.ts +++ b/src/features/settings/config/default-settings.ts @@ -76,6 +76,7 @@ export const defaultSettings: Settings = { aiAutocompleteCustomModelId: "", aiDefaultSessionMode: "", aiSkills: [], + agentServers: {}, ollamaBaseUrl: "http://localhost:11434", // Layout sidebarWidth: 220, @@ -164,6 +165,9 @@ export function getDefaultSettingsSnapshot(): Settings { footerLeadingItemsOrder: [...defaultSettings.footerLeadingItemsOrder], footerTrailingItemsOrder: [...defaultSettings.footerTrailingItemsOrder], aiSkills: defaultSettings.aiSkills.map((skill) => ({ ...skill })), + agentServers: Object.fromEntries( + Object.entries(defaultSettings.agentServers).map(([id, agent]) => [id, { ...agent }]), + ), uiFontSize: normalizeUiFontSize(defaultSettings.uiFontSize), }; } diff --git a/src/features/settings/lib/settings-normalization.ts b/src/features/settings/lib/settings-normalization.ts index f5048a080..3ec31da26 100644 --- a/src/features/settings/lib/settings-normalization.ts +++ b/src/features/settings/lib/settings-normalization.ts @@ -77,6 +77,11 @@ function normalizeBaseUrl(value: string | undefined): string { } const MAX_SYNCED_AI_SKILLS = 200; +const MAX_AGENT_SERVERS = 100; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} function normalizeAISkills(skills: Settings["aiSkills"]): Settings["aiSkills"] { if (!Array.isArray(skills)) { @@ -146,6 +151,83 @@ function normalizeAISkills(skills: Settings["aiSkills"]): Settings["aiSkills"] { })); } +function normalizeEnv(value: unknown): Record | undefined { + if (!isRecord(value)) { + return undefined; + } + + const env = Object.fromEntries( + Object.entries(value) + .filter( + (entry): entry is [string, string] => + typeof entry[0] === "string" && + entry[0].trim().length > 0 && + typeof entry[1] === "string", + ) + .map(([key, envValue]) => [key.trim().slice(0, 160), envValue]), + ); + + return Object.keys(env).length > 0 ? env : undefined; +} + +function normalizeAgentServers(value: Settings["agentServers"]): Settings["agentServers"] { + if (!isRecord(value)) { + return {}; + } + + const entries: Array<[string, Settings["agentServers"][string]]> = []; + + for (const [id, server] of Object.entries(value) + .filter(([id, server]) => typeof id === "string" && id.trim().length > 0 && isRecord(server)) + .slice(0, MAX_AGENT_SERVERS)) { + const trimmedId = id.trim().slice(0, 160); + const env = normalizeEnv(server.env); + const defaultMode = + typeof server.defaultMode === "string" && server.defaultMode.trim() + ? server.defaultMode.trim().slice(0, 160) + : undefined; + const defaultModel = + typeof server.defaultModel === "string" && server.defaultModel.trim() + ? server.defaultModel.trim().slice(0, 240) + : undefined; + + if (server.type === "custom") { + if (typeof server.command !== "string" || !server.command.trim()) { + continue; + } + const args = Array.isArray(server.args) + ? server.args.filter((arg): arg is string => typeof arg === "string") + : undefined; + entries.push([ + trimmedId, + { + type: "custom", + command: server.command.trim(), + ...(args ? { args } : {}), + ...(env ? { env } : {}), + ...(defaultMode ? { defaultMode } : {}), + ...(defaultModel ? { defaultModel } : {}), + }, + ]); + continue; + } + + if (server.type === "registry") { + entries.push([ + trimmedId, + { + type: "registry", + ...(env ? { env } : {}), + ...(defaultMode ? { defaultMode } : {}), + ...(defaultModel ? { defaultModel } : {}), + }, + ]); + } + } + + return Object.fromEntries(entries); +} + function normalizeAISettings(settings: Settings): Settings { const normalizedSettings = { ...settings }; const provider = @@ -191,6 +273,7 @@ function normalizeAISettings(settings: Settings): Settings { normalizedSettings.aiAutocompleteCustomModelId = normalizedSettings.aiAutocompleteCustomModelId?.trim() || ""; normalizedSettings.aiSkills = normalizeAISkills(normalizedSettings.aiSkills); + normalizedSettings.agentServers = normalizeAgentServers(normalizedSettings.agentServers); return normalizedSettings; } @@ -310,6 +393,10 @@ export function normalizeSettingValue( return normalizeAISkills(value as Settings["aiSkills"]) as Settings[K]; } + if (key === "agentServers") { + return normalizeAgentServers(value as Settings["agentServers"]) as Settings[K]; + } + if (key === "aiCustomBaseUrl") { return normalizeBaseUrl(value as string) as Settings[K]; } diff --git a/src/features/settings/tests/settings-import-export.test.ts b/src/features/settings/tests/settings-import-export.test.ts index 522a68667..e358c79be 100644 --- a/src/features/settings/tests/settings-import-export.test.ts +++ b/src/features/settings/tests/settings-import-export.test.ts @@ -46,4 +46,28 @@ describe("settings import/export", () => { expect(imported?.wordWrap).toBe(true); }); + + it("imports ACP agent server settings", () => { + const imported = parseSettingsImportJson( + JSON.stringify({ + agentServers: { + "codex-acp": { + type: "registry", + env: { + CODEX_HOME: "/tmp/codex", + }, + defaultMode: "plan", + }, + }, + }), + ); + + expect(imported?.agentServers["codex-acp"]).toEqual({ + type: "registry", + env: { + CODEX_HOME: "/tmp/codex", + }, + defaultMode: "plan", + }); + }); }); diff --git a/src/features/settings/tests/settings-normalization.test.ts b/src/features/settings/tests/settings-normalization.test.ts index a8585e750..5ab51a89e 100644 --- a/src/features/settings/tests/settings-normalization.test.ts +++ b/src/features/settings/tests/settings-normalization.test.ts @@ -112,4 +112,51 @@ describe("settings normalization", () => { upstreamUpdatedAt: "2026-04-01T00:00:00.000Z", }); }); + + it("normalizes ACP agent server settings", () => { + const normalized = normalizeSettingValue("agentServers", { + " codex-acp ": { + type: "registry", + env: { + CODEX_API_KEY: "secret", + EMPTY_ALLOWED: "", + ignored: 1, + }, + defaultMode: " plan ", + defaultModel: " gpt-5.5 ", + }, + " custom-agent ": { + type: "custom", + command: " node ", + args: ["agent.js", 123], + env: { + CUSTOM_ENV: "1", + }, + }, + broken: { + type: "custom", + command: " ", + }, + } as unknown as ReturnType["agentServers"]); + + expect(normalized).toEqual({ + "codex-acp": { + type: "registry", + env: { + CODEX_API_KEY: "secret", + EMPTY_ALLOWED: "", + }, + defaultMode: "plan", + defaultModel: "gpt-5.5", + }, + "custom-agent": { + type: "custom", + command: "node", + args: ["agent.js"], + env: { + CUSTOM_ENV: "1", + }, + }, + }); + }); }); diff --git a/src/features/settings/types/settings.ts b/src/features/settings/types/settings.ts index 1728947ef..3c5f531eb 100644 --- a/src/features/settings/types/settings.ts +++ b/src/features/settings/types/settings.ts @@ -9,6 +9,22 @@ import type { export type Theme = string; +export type AcpAgentServerSettings = + | { + type: "custom"; + command: string; + args?: string[]; + env?: Record; + defaultMode?: string; + defaultModel?: string; + } + | { + type: "registry"; + env?: Record; + defaultMode?: string; + defaultModel?: string; + }; + export interface Settings { // General autoSave: boolean; @@ -65,6 +81,7 @@ export interface Settings { aiAutocompleteCustomModelId: string; aiDefaultSessionMode: string; aiSkills: AIChatSkill[]; + agentServers: Record; ollamaBaseUrl: string; // Layout sidebarWidth: number; From 15030dacc5e7fc55bd14cc944df76e56c21f7429 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Sat, 9 May 2026 21:47:20 +0700 Subject: [PATCH 06/35] Recognize ACP registry agent icons --- .../ai/components/icons/provider-icons.tsx | 69 ++++++++++++++++--- src/features/ai/tests/provider-icons.test.ts | 21 ++++++ 2 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 src/features/ai/tests/provider-icons.test.ts diff --git a/src/features/ai/components/icons/provider-icons.tsx b/src/features/ai/components/icons/provider-icons.tsx index 0fad5be0f..aaa11d825 100644 --- a/src/features/ai/components/icons/provider-icons.tsx +++ b/src/features/ai/components/icons/provider-icons.tsx @@ -2,6 +2,19 @@ import type { SVGProps } from "react"; import { cn } from "@/utils/cn"; type IconProps = SVGProps & { size?: number }; +export type ProviderIconKind = + | "openai" + | "v0" + | "anthropic" + | "gemini" + | "xai" + | "deepseek" + | "mistral" + | "ollama" + | "openrouter" + | "moonshot" + | "qwen" + | "custom"; const defaultProps = (size = 14, className?: string): SVGProps => ({ width: size, @@ -148,22 +161,16 @@ export function ProviderIcon({ }) { const props = { size, className: cn("shrink-0", className) }; - switch (providerId) { + switch (resolveProviderIconKind(providerId)) { case "openai": - case "codex-cli": return ; case "v0": return ; case "anthropic": - case "claude-code": return ; case "gemini": - case "google": - case "gemini-cli": return ; - case "grok": case "xai": - case "x-ai": return ; case "deepseek": return ; @@ -173,15 +180,57 @@ export function ProviderIcon({ return ; case "openrouter": return ; - case "kimi-cli": + case "moonshot": return ; case "qwen": - case "qwen-code": return ; - case "opencode": case "custom": return ; default: return ; } } + +export function resolveProviderIconKind(providerId: string): ProviderIconKind { + const normalizedId = providerId.toLowerCase(); + + switch (normalizedId) { + case "openai": + case "codex-cli": + case "codex-acp": + return "openai"; + case "v0": + return "v0"; + case "anthropic": + case "claude-code": + case "claude-acp": + return "anthropic"; + case "gemini": + case "google": + case "gemini-cli": + return "gemini"; + case "grok": + case "xai": + case "x-ai": + return "xai"; + case "deepseek": + return "deepseek"; + case "mistral": + case "mistral-vibe": + return "mistral"; + case "ollama": + return "ollama"; + case "openrouter": + return "openrouter"; + case "kimi": + case "kimi-cli": + return "moonshot"; + case "qwen": + case "qwen-code": + return "qwen"; + case "opencode": + case "custom": + default: + return "custom"; + } +} diff --git a/src/features/ai/tests/provider-icons.test.ts b/src/features/ai/tests/provider-icons.test.ts new file mode 100644 index 000000000..9674dc06c --- /dev/null +++ b/src/features/ai/tests/provider-icons.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, it } from "vite-plus/test"; +import { resolveProviderIconKind } from "../components/icons/provider-icons"; + +describe("resolveProviderIconKind", () => { + it("recognizes legacy and registry ACP provider ids", () => { + expect(resolveProviderIconKind("codex-cli")).toBe("openai"); + expect(resolveProviderIconKind("codex-acp")).toBe("openai"); + expect(resolveProviderIconKind("claude-code")).toBe("anthropic"); + expect(resolveProviderIconKind("claude-acp")).toBe("anthropic"); + expect(resolveProviderIconKind("gemini-cli")).toBe("gemini"); + expect(resolveProviderIconKind("gemini")).toBe("gemini"); + expect(resolveProviderIconKind("kimi-cli")).toBe("moonshot"); + expect(resolveProviderIconKind("kimi")).toBe("moonshot"); + expect(resolveProviderIconKind("qwen-code")).toBe("qwen"); + expect(resolveProviderIconKind("mistral-vibe")).toBe("mistral"); + }); + + it("falls back to the custom terminal glyph for unknown agent ids", () => { + expect(resolveProviderIconKind("local-agent")).toBe("custom"); + }); +}); From 690b67c9389b4b8884423756a6e8c310b6ed6f58 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Sat, 9 May 2026 21:58:20 +0700 Subject: [PATCH 07/35] Use catalog icons for ACP agents --- .../ai/components/icons/provider-icons.tsx | 38 +++++++++++++++++++ .../components/selectors/agent-selector.tsx | 17 ++++++++- src/features/ai/tests/provider-icons.test.ts | 16 +++++++- .../components/tabs/extensions-settings.tsx | 12 ++++++ 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/features/ai/components/icons/provider-icons.tsx b/src/features/ai/components/icons/provider-icons.tsx index aaa11d825..229e029d3 100644 --- a/src/features/ai/components/icons/provider-icons.tsx +++ b/src/features/ai/components/icons/provider-icons.tsx @@ -152,14 +152,38 @@ export function CustomAPIIcon({ size, className, ...props }: IconProps) { export function ProviderIcon({ providerId, + catalogIconUrl, size = 14, className, }: { providerId: string; + catalogIconUrl?: string | null; size?: number; className?: string; }) { const props = { size, className: cn("shrink-0", className) }; + const resolvedCatalogIconUrl = resolveCatalogIconUrl(catalogIconUrl); + + if (resolvedCatalogIconUrl) { + return ( +