From 66112037dcd2db3f3cc648376b6b89c4a7947318 Mon Sep 17 00:00:00 2001 From: huqiantao Date: Wed, 3 Jun 2026 18:46:27 +0800 Subject: [PATCH 1/2] perf(capacity): collapse build_canonical_state's reverse scans to one pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build_canonical_state previously did two independent reverse walks of session.messages — one to extract the most recent user goal, and one to collect up to four confirmed-fact snippets. apply_verify_and_replan then added a third and fourth reverse scan to locate the latest user message and the latest [verification replay] user message for the re-plan path. All four reverse scans collect disjoint facts about the same most- recent-first view of the conversation. This PR folds them into a single helper, scan_canonical_inputs, that walks messages once in reverse, fills a CanonicalStateScan, and short-circuits as soon as every collector is satisfied. The helper exposes the latest-message indices so apply_verify_and_replan can clone the full Message values after the scan (eliminating the two independent find().cloned() walks). The output CanonicalState is byte-identical to the prior implementation: same goal, same confirmed facts (newest first, errors filtered), same fallback string when no user text exists. The re-plan path's keep-messages set is identical: latest user + latest verified. Tests: 6 new unit tests cover the goal lookup, fact cap, error-result filter, verified-marker scan, empty input, and the early-exit condition. The full engine test suite (153 tests) still passes. --- crates/tui/src/core/engine/capacity_flow.rs | 303 ++++++++++++++++---- 1 file changed, 242 insertions(+), 61 deletions(-) diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs index 06e37f497..7bc00be39 100644 --- a/crates/tui/src/core/engine/capacity_flow.rs +++ b/crates/tui/src/core/engine/capacity_flow.rs @@ -659,34 +659,16 @@ impl Engine { .persist_capacity_record(turn, GuardrailAction::VerifyAndReplan, &record) .await; - let latest_user = self - .session - .messages - .iter() - .rev() - .find(|msg| { - msg.role == "user" - && msg - .content - .iter() - .any(|block| matches!(block, ContentBlock::Text { .. })) - }) - .cloned(); - let latest_verified = self - .session - .messages - .iter() - .rev() - .find(|msg| { - msg.role == "user" - && msg.content.iter().any(|block| match block { - ContentBlock::ToolResult { content, .. } => { - content.contains("[verification replay]") - } - _ => false, - }) - }) - .cloned(); + // The replan path needs the *full* messages, not summaries. + // `scan_canonical_inputs` already located the indices in a single + // reverse pass; clone from the live `messages` slice once. + let scan = scan_canonical_inputs(&self.session.messages); + let latest_user = scan + .latest_user_text_idx + .and_then(|idx| self.session.messages.get(idx).cloned()); + let latest_verified = scan + .latest_verified_user_idx + .and_then(|idx| self.session.messages.get(idx).cloned()); self.session.messages.clear(); if let Some(msg) = latest_user { @@ -765,20 +747,15 @@ impl Engine { turn: &TurnContext, note: Option<&str>, ) -> CanonicalState { - let goal = self - .session - .messages - .iter() - .rev() - .find_map(|msg| { - if msg.role != "user" { - return None; - } - msg.content.iter().find_map(|block| match block { - ContentBlock::Text { text, .. } => Some(summarize_text(text, 220)), - _ => None, - }) - }) + // Single reverse scan of session.messages collects the goal, + // confirmed facts (capped at 4), and the latest verified-user + // message index. Previously this function did two reverse + // `.iter().rev().find_map()` walks and a third for facts; the + // dedicated scan below replaces all three with one pass that + // also early-exits once every collector is satisfied. + let scan = scan_canonical_inputs(&self.session.messages); + let goal = scan + .goal .unwrap_or_else(|| "Continue current task from compact state".to_string()); let mut constraints = vec![ @@ -789,24 +766,6 @@ impl Engine { constraints.push(summarize_text(note, 180)); } - let mut confirmed_facts = Vec::new(); - for msg in self.session.messages.iter().rev() { - for block in &msg.content { - if let ContentBlock::ToolResult { content, .. } = block { - if content.starts_with("Error:") { - continue; - } - confirmed_facts.push(summarize_text(content, 180)); - if confirmed_facts.len() >= 4 { - break; - } - } - } - if confirmed_facts.len() >= 4 { - break; - } - } - let open_loops: Vec = turn .tool_calls .iter() @@ -837,7 +796,7 @@ impl Engine { CanonicalState { goal, constraints, - confirmed_facts, + confirmed_facts: scan.confirmed_facts, open_loops, pending_actions, critical_refs, @@ -975,3 +934,225 @@ impl Engine { self.merge_compaction_summary(Some(prompt)); } } + +/// Maximum number of confirmed-fact snippets retained by the canonical-state +/// scan. Matches the prior `build_canonical_state` behavior — only the +/// four most recent non-error tool results are surfaced. +const CANONICAL_SCAN_MAX_FACTS: usize = 4; + +/// Output of [`scan_canonical_inputs`]: everything `build_canonical_state` +/// and `apply_verify_and_replan` need to know about the session's recent +/// history, collected in a single reverse pass over `session.messages`. +/// +/// Index fields (`latest_user_text_idx`, `latest_verified_user_idx`) point +/// into the original `messages` slice so the caller can clone the full +/// `Message` value when the re-plan path needs to keep it across a +/// `messages.clear()`. +#[derive(Debug, Default)] +struct CanonicalStateScan { + /// Most recent user-text block, summarized to ≤220 chars. `None` when + /// no user message with a Text block exists. + goal: Option, + /// Index of the most recent user message containing at least one + /// `Text` content block. Used by the re-plan path to keep the + /// latest user request across a `messages.clear()`. + latest_user_text_idx: Option, + /// Index of the most recent user message whose content includes a + /// `[verification replay]` tool result. Used by the re-plan path. + latest_verified_user_idx: Option, + /// Up to [`CANONICAL_SCAN_MAX_FACTS`] most recent non-error + /// `ToolResult` snippets, newest first. + confirmed_facts: Vec, + /// Running count of facts collected so far; lets the early-exit + /// condition avoid an extra `Vec::len()` call per message. + facts_collected: usize, +} + +impl CanonicalStateScan { + /// `true` once every collector is satisfied. The single-pass + /// caller can use this to break out of the reverse iteration. + fn is_complete(&self) -> bool { + self.goal.is_some() + && self.latest_verified_user_idx.is_some() + && self.facts_collected >= CANONICAL_SCAN_MAX_FACTS + } +} + +/// Walk `messages` once (in reverse) and collect everything the canonical +/// state and re-plan paths need. Replaces the previous pattern of three +/// independent reverse scans: one for the goal, one for confirmed facts, +/// and one for the latest verified user message. +fn scan_canonical_inputs(messages: &[Message]) -> CanonicalStateScan { + let mut scan = CanonicalStateScan::default(); + for (idx, msg) in messages.iter().enumerate().rev() { + if msg.role == "user" { + if scan.goal.is_none() { + if let Some(text) = msg.content.iter().find_map(|b| match b { + ContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + }) { + scan.goal = Some(summarize_text(text, 220)); + scan.latest_user_text_idx = Some(idx); + } + } + if scan.latest_verified_user_idx.is_none() { + let verified = msg.content.iter().any(|b| match b { + ContentBlock::ToolResult { content, .. } => { + content.contains("[verification replay]") + } + _ => false, + }); + if verified { + scan.latest_verified_user_idx = Some(idx); + } + } + } + if scan.facts_collected < CANONICAL_SCAN_MAX_FACTS { + for block in &msg.content { + if let ContentBlock::ToolResult { content, .. } = block + && !content.starts_with("Error:") + { + scan.confirmed_facts.push(summarize_text(content, 180)); + scan.facts_collected = scan.facts_collected.saturating_add(1); + if scan.facts_collected >= CANONICAL_SCAN_MAX_FACTS { + break; + } + } + } + } + if scan.is_complete() { + break; + } + } + scan +} + +#[cfg(test)] +mod canonical_scan_tests { + use super::*; + use crate::models::ContentBlock; + + fn user_text_msg(text: &str) -> Message { + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + } + } + + fn user_with_verified_replay(text: &str) -> Message { + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }, ContentBlock::ToolResult { + tool_use_id: "x".to_string(), + content: "[verification replay] pass=true".to_string(), + is_error: None, + content_blocks: None, + }], + } + } + + fn tool_result_msg(content: &str) -> Message { + Message { + role: "tool".to_string(), + content: vec![ContentBlock::ToolResult { + tool_use_id: "x".to_string(), + content: content.to_string(), + is_error: None, + content_blocks: None, + }], + } + } + + #[test] + fn scan_returns_goal_for_latest_user_text() { + let messages = vec![ + user_text_msg("first"), + tool_result_msg("a"), + user_text_msg("second"), + tool_result_msg("b"), + user_text_msg("third"), + ]; + let scan = scan_canonical_inputs(&messages); + // Goal should be the most recent user text. + let goal = scan.goal.expect("goal"); + assert!(goal.contains("third"), "expected the most recent, got {goal}"); + assert_eq!(scan.latest_user_text_idx, Some(4)); + } + + #[test] + fn scan_collects_up_to_four_facts_newest_first() { + let messages = vec![ + tool_result_msg("fact-A"), + tool_result_msg("fact-B"), + tool_result_msg("fact-C"), + tool_result_msg("fact-D"), + tool_result_msg("fact-E"), + ]; + let scan = scan_canonical_inputs(&messages); + assert_eq!(scan.confirmed_facts.len(), 4); + // The four most recent (newest first) are E, D, C, B. + assert!(scan.confirmed_facts[0].contains("fact-E")); + assert!(scan.confirmed_facts[1].contains("fact-D")); + assert!(scan.confirmed_facts[2].contains("fact-C")); + assert!(scan.confirmed_facts[3].contains("fact-B")); + } + + #[test] + fn scan_skips_error_results() { + let messages = vec![ + tool_result_msg("good-A"), + tool_result_msg("Error: bad"), + tool_result_msg("good-B"), + ]; + let scan = scan_canonical_inputs(&messages); + assert_eq!(scan.confirmed_facts.len(), 2); + assert!(scan.confirmed_facts[0].contains("good-B")); + assert!(scan.confirmed_facts[1].contains("good-A")); + } + + #[test] + fn scan_finds_latest_verified_user_message() { + let messages = vec![ + user_text_msg("first"), + user_with_verified_replay("verified"), + user_text_msg("third"), + ]; + let scan = scan_canonical_inputs(&messages); + // The verified marker is on the *middle* message, not the most + // recent. The scan should report its actual position. + assert_eq!(scan.latest_verified_user_idx, Some(1)); + // The goal still points at the most recent user text. + assert!(scan.goal.as_deref().unwrap_or("").contains("third")); + } + + #[test] + fn scan_handles_empty_input() { + let scan = scan_canonical_inputs(&[]); + assert!(scan.goal.is_none()); + assert!(scan.latest_verified_user_idx.is_none()); + assert!(scan.latest_user_text_idx.is_none()); + assert!(scan.confirmed_facts.is_empty()); + } + + #[test] + fn scan_early_exits_when_complete() { + // 1000 tool results — the scan should stop walking once the + // first 4 facts and a goal are found. We can't directly assert + // "didn't visit every element" without instrumentation, but the + // call must return promptly with the right slice. + let mut messages: Vec = (0..1000) + .map(|i| tool_result_msg(&format!("fact-{i}"))) + .collect(); + // Most recent user message comes last. + messages.push(user_text_msg("goal")); + let scan = scan_canonical_inputs(&messages); + assert!(scan.goal.as_deref().unwrap_or("").contains("goal")); + assert_eq!(scan.confirmed_facts.len(), 4); + } +} From a2474f7adebb023e42c8ca1cf64072bb88d23705 Mon Sep 17 00:00:00 2001 From: huqiantao Date: Wed, 3 Jun 2026 20:00:06 +0800 Subject: [PATCH 2/2] perf(capacity): let scan_canonical_inputs early-exit without verified-user lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build_canonical_state path never reads CanonicalStateScan::latest_verified_user_idx, but the previous patch required is_complete() to find a verified user message before it would short-circuit. On a long history with no verification replay — the common case — the scan walked the entire message list looking for a match that could not exist. Add a find_verified: bool parameter to scan_canonical_inputs and CanonicalStateScan::is_complete. build_canonical_state now passes false, so the loop stops as soon as the goal and CANONICAL_SCAN_MAX_FACTS facts are found. The replan path (apply_verify_and_replan) keeps the existing true behavior so it still locates the latest verified user message. Test calls are updated to match; no behavior change for any test. --- crates/tui/src/core/engine/capacity_flow.rs | 91 +++++++++++++-------- 1 file changed, 57 insertions(+), 34 deletions(-) diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs index 7bc00be39..56c507d06 100644 --- a/crates/tui/src/core/engine/capacity_flow.rs +++ b/crates/tui/src/core/engine/capacity_flow.rs @@ -661,8 +661,10 @@ impl Engine { // The replan path needs the *full* messages, not summaries. // `scan_canonical_inputs` already located the indices in a single - // reverse pass; clone from the live `messages` slice once. - let scan = scan_canonical_inputs(&self.session.messages); + // reverse pass; clone from the live `messages` slice once. We + // pass `true` because the replan path consumes + // `latest_verified_user_idx` below. + let scan = scan_canonical_inputs(&self.session.messages, true); let latest_user = scan .latest_user_text_idx .and_then(|idx| self.session.messages.get(idx).cloned()); @@ -752,8 +754,11 @@ impl Engine { // message index. Previously this function did two reverse // `.iter().rev().find_map()` walks and a third for facts; the // dedicated scan below replaces all three with one pass that - // also early-exits once every collector is satisfied. - let scan = scan_canonical_inputs(&self.session.messages); + // also early-exits once every collector is satisfied. We pass + // `false` here because build_canonical_state does not consume + // `latest_verified_user_idx`, so we don't need the scan to keep + // looking for it. + let scan = scan_canonical_inputs(&self.session.messages, false); let goal = scan .goal .unwrap_or_else(|| "Continue current task from compact state".to_string()); @@ -969,11 +974,16 @@ struct CanonicalStateScan { } impl CanonicalStateScan { - /// `true` once every collector is satisfied. The single-pass - /// caller can use this to break out of the reverse iteration. - fn is_complete(&self) -> bool { + /// `true` once every collector the caller actually needs is satisfied. + /// + /// `find_verified` controls whether `latest_verified_user_idx` is part + /// of the early-exit gate. The build_canonical_state path does not + /// consume that field, so passing `false` lets the scan stop as soon + /// as the goal and `CANONICAL_SCAN_MAX_FACTS` facts are found — a + /// huge win on long histories with no verification replay. + fn is_complete(&self, find_verified: bool) -> bool { self.goal.is_some() - && self.latest_verified_user_idx.is_some() + && (!find_verified || self.latest_verified_user_idx.is_some()) && self.facts_collected >= CANONICAL_SCAN_MAX_FACTS } } @@ -982,20 +992,25 @@ impl CanonicalStateScan { /// state and re-plan paths need. Replaces the previous pattern of three /// independent reverse scans: one for the goal, one for confirmed facts, /// and one for the latest verified user message. -fn scan_canonical_inputs(messages: &[Message]) -> CanonicalStateScan { +/// +/// `find_verified` controls whether the scan bothers locating the +/// latest verified user message. Callers that don't need it (e.g. +/// `build_canonical_state`) should pass `false` so the early-exit +/// condition can fire as soon as the goal + facts are gathered. +fn scan_canonical_inputs(messages: &[Message], find_verified: bool) -> CanonicalStateScan { let mut scan = CanonicalStateScan::default(); for (idx, msg) in messages.iter().enumerate().rev() { if msg.role == "user" { - if scan.goal.is_none() { - if let Some(text) = msg.content.iter().find_map(|b| match b { + if scan.goal.is_none() + && let Some(text) = msg.content.iter().find_map(|b| match b { ContentBlock::Text { text, .. } => Some(text.as_str()), _ => None, - }) { - scan.goal = Some(summarize_text(text, 220)); - scan.latest_user_text_idx = Some(idx); - } + }) + { + scan.goal = Some(summarize_text(text, 220)); + scan.latest_user_text_idx = Some(idx); } - if scan.latest_verified_user_idx.is_none() { + if find_verified && scan.latest_verified_user_idx.is_none() { let verified = msg.content.iter().any(|b| match b { ContentBlock::ToolResult { content, .. } => { content.contains("[verification replay]") @@ -1020,7 +1035,7 @@ fn scan_canonical_inputs(messages: &[Message]) -> CanonicalStateScan { } } } - if scan.is_complete() { + if scan.is_complete(find_verified) { break; } } @@ -1045,15 +1060,18 @@ mod canonical_scan_tests { fn user_with_verified_replay(text: &str) -> Message { Message { role: "user".to_string(), - content: vec![ContentBlock::Text { - text: text.to_string(), - cache_control: None, - }, ContentBlock::ToolResult { - tool_use_id: "x".to_string(), - content: "[verification replay] pass=true".to_string(), - is_error: None, - content_blocks: None, - }], + content: vec![ + ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }, + ContentBlock::ToolResult { + tool_use_id: "x".to_string(), + content: "[verification replay] pass=true".to_string(), + is_error: None, + content_blocks: None, + }, + ], } } @@ -1078,10 +1096,13 @@ mod canonical_scan_tests { tool_result_msg("b"), user_text_msg("third"), ]; - let scan = scan_canonical_inputs(&messages); + let scan = scan_canonical_inputs(&messages, false); // Goal should be the most recent user text. let goal = scan.goal.expect("goal"); - assert!(goal.contains("third"), "expected the most recent, got {goal}"); + assert!( + goal.contains("third"), + "expected the most recent, got {goal}" + ); assert_eq!(scan.latest_user_text_idx, Some(4)); } @@ -1094,7 +1115,7 @@ mod canonical_scan_tests { tool_result_msg("fact-D"), tool_result_msg("fact-E"), ]; - let scan = scan_canonical_inputs(&messages); + let scan = scan_canonical_inputs(&messages, false); assert_eq!(scan.confirmed_facts.len(), 4); // The four most recent (newest first) are E, D, C, B. assert!(scan.confirmed_facts[0].contains("fact-E")); @@ -1110,7 +1131,7 @@ mod canonical_scan_tests { tool_result_msg("Error: bad"), tool_result_msg("good-B"), ]; - let scan = scan_canonical_inputs(&messages); + let scan = scan_canonical_inputs(&messages, false); assert_eq!(scan.confirmed_facts.len(), 2); assert!(scan.confirmed_facts[0].contains("good-B")); assert!(scan.confirmed_facts[1].contains("good-A")); @@ -1123,7 +1144,7 @@ mod canonical_scan_tests { user_with_verified_replay("verified"), user_text_msg("third"), ]; - let scan = scan_canonical_inputs(&messages); + let scan = scan_canonical_inputs(&messages, true); // The verified marker is on the *middle* message, not the most // recent. The scan should report its actual position. assert_eq!(scan.latest_verified_user_idx, Some(1)); @@ -1133,7 +1154,7 @@ mod canonical_scan_tests { #[test] fn scan_handles_empty_input() { - let scan = scan_canonical_inputs(&[]); + let scan = scan_canonical_inputs(&[], false); assert!(scan.goal.is_none()); assert!(scan.latest_verified_user_idx.is_none()); assert!(scan.latest_user_text_idx.is_none()); @@ -1145,13 +1166,15 @@ mod canonical_scan_tests { // 1000 tool results — the scan should stop walking once the // first 4 facts and a goal are found. We can't directly assert // "didn't visit every element" without instrumentation, but the - // call must return promptly with the right slice. + // call must return promptly with the right slice. We pass + // `find_verified=false` so the scan does not have to keep + // walking looking for a verified user message that isn't there. let mut messages: Vec = (0..1000) .map(|i| tool_result_msg(&format!("fact-{i}"))) .collect(); // Most recent user message comes last. messages.push(user_text_msg("goal")); - let scan = scan_canonical_inputs(&messages); + let scan = scan_canonical_inputs(&messages, false); assert!(scan.goal.as_deref().unwrap_or("").contains("goal")); assert_eq!(scan.confirmed_facts.len(), 4); }