hack-ink
diff --git a/‎apps/elf-api/src/routes.rs‎
Lines changed: 6 additions & 0 deletions b/‎apps/elf-api/src/routes.rs‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎apps/elf-mcp/src/server.rs‎
Lines changed: 25 additions & 0 deletions b/‎apps/elf-mcp/src/server.rs‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎docs/spec/system_doc_extension_v1_filters.md‎
Lines changed: 16 additions & 0 deletions b/‎docs/spec/system_doc_extension_v1_filters.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎docs/spec/system_doc_extension_v1_trajectory.md‎
Lines changed: 46 additions & 4 deletions b/‎docs/spec/system_doc_extension_v1_trajectory.md‎
Lines changed: 46 additions & 4 deletions
@@ -106,6 +106,9 @@ struct DocsSearchL0Body {
 	scope: Option<String>,
 	status: Option<String>,
 	doc_type: Option<String>,
+	sparse_mode: Option<String>,
+	domain: Option<String>,
+	repo: Option<String>,
 	agent_id: Option<String>,
 	thread_id: Option<String>,
 	updated_after: Option<String>,
@@ -1035,6 +1038,9 @@ async fn docs_search_l0(
 			scope: payload.scope,
 			status: payload.status,
 			doc_type: payload.doc_type,
+			sparse_mode: payload.sparse_mode,
+			domain: payload.domain,
+			repo: payload.repo,
 			agent_id: payload.agent_id,
 			thread_id: payload.thread_id,
 			updated_after: payload.updated_after,
 
@@ -882,6 +882,12 @@ fn docs_search_l0_schema() -> Arc<JsonObject> {
 			"ts_lte": { "type": ["string", "null"], "format": "date-time" },
 			"top_k": { "type": ["integer", "null"] },
 			"candidate_k": { "type": ["integer", "null"] },
+			"sparse_mode": {
+				"type": ["string", "null"],
+				"enum": ["auto", "on", "off", null]
+			},
+			"domain": { "type": ["string", "null"] },
+			"repo": { "type": ["string", "null"] },
 			"explain": { "type": ["boolean", "null"] },
 			"read_profile": { "type": ["string", "null"] }
 		}
@@ -1555,6 +1561,9 @@ mod tests {
 			"updated_before",
 			"ts_gte",
 			"ts_lte",
+			"sparse_mode",
+			"domain",
+			"repo",
 			"explain",
 		];
 
@@ -1580,6 +1589,22 @@ mod tests {
 				serde_json::Value::Null,
 			])
 		);
+		assert_eq!(
+			properties.get("sparse_mode").and_then(serde_json::Value::as_object).and_then(
+				|field| {
+					field
+						.get("enum")
+						.and_then(serde_json::Value::as_array)
+						.map(|vals| vals.to_vec())
+				}
+			),
+			Some(vec![
+				serde_json::Value::String("auto".to_string()),
+				serde_json::Value::String("on".to_string()),
+				serde_json::Value::String("off".to_string()),
+				serde_json::Value::Null,
+			])
+		);
 	}
 
 	#[test]
 
@@ -24,8 +24,12 @@ Scope
 - `status` (optional string): defaults to `active` when omitted. Current implementation matches
   this value exactly against stored doc status (`active`/`deleted` in current schema).
 - `doc_type` (optional string): exact-match filter.
+- `sparse_mode` (optional string): retrieval fusion control mode:
+  `auto` (default), `on`, `off`.
 - `agent_id` (optional string): exact-match filter.
 - `thread_id` (optional string): exact-match filter for `thread_id` payload field.
+- `domain` (optional string): exact-match filter for `domain` payload field.
+- `repo` (optional string): exact-match filter for `repo` payload field.
 - `updated_after` (optional string): RFC3339 timestamp lower bound for `updated_at`.
 - `updated_before` (optional string): RFC3339 timestamp upper bound for `updated_at`.
 - `ts_gte` (optional string): RFC3339 timestamp lower bound for `doc_ts`.
@@ -41,8 +45,16 @@ Scope
 Filter evaluation:
 - Every supplied filter is combined with logical AND.
 - `status` defaults to `active` when omitted.
+- `sparse_mode` is validated as one of `auto|on|off` (default `auto`).
+- `domain` requires `doc_type=search` and is rejected with `400` when used with other
+  `doc_type` values or when `doc_type` is omitted.
+- `repo` requires `doc_type=dev` and is rejected with `400` when used with other
+  `doc_type` values or when `doc_type` is omitted.
 - Invalid date values or `updated_after >= updated_before` are rejected with `400`.
 - Invalid date values or `ts_gte >= ts_lte` are rejected with `400`.
+- In `auto` sparse mode, sparse retrieval is enabled only when the query is judged as
+  symbol-heavy / exact-match oriented; otherwise only dense retrieval is used.
+- `sparse_mode=on` runs both dense and sparse retrieval; `sparse_mode=off` runs dense-only.
 
 Response behavior:
 - `docs_search_l0` always returns `trace_id`.
@@ -60,6 +72,8 @@ Each point used by `docs_search_l0` MUST include payload fields:
 - `doc_type`
 - `agent_id`
 - `thread_id`
+- `domain`
+- `repo`
 - `updated_at`
 - `doc_ts`
 
@@ -75,6 +89,8 @@ Implementations MUST provision payload indexes for:
 - `doc_type` (keyword)
 - `agent_id` (keyword)
 - `thread_id` (keyword)
+- `domain` (keyword)
+- `repo` (keyword)
 - `updated_at` (datetime)
 - `doc_ts` (datetime)
 
 
@@ -47,7 +47,12 @@ Allowed/expected stage names (in order):
    Ensures returned vector size matches the configured model/vector size.
 
 4. `vector_search`  
-   Raw candidate retrieval from Qdrant.
+   Dense and optional sparse retrieval from Qdrant.
+   Dense retrieval runs first on every request; sparse retrieval is controlled by
+   `sparse_mode` (`auto`, `on`, `off`).
+   - `auto`: sparse retrieval only for symbol-heavy / exact-match style queries.
+   - `on`: always run both dense and sparse retrieval.
+   - `off`: dense-only retrieval.
 
 5. `dedupe`  
    Chunk-id deduplication between retrieval tiers.
@@ -56,7 +61,9 @@ Allowed/expected stage names (in order):
    Document/chunk metadata hydration from Postgres.
 
 7. `result_projection`  
-   Final scored item projection and output truncation.
+   Final scored item projection and output truncation.  
+   Implementations apply a recency tie-break using `updated_at` and expose the
+   policy knobs in stage stats when available (`recency_tau_days`, `tie_breaker_weight`).
 
 8. `level_selection` (excerpts only)  
    `L0|L1|L2` selection and byte budget.
@@ -89,17 +96,52 @@ and `stage_name` values should be non-empty and meaningful for downstream reader
     {
       "stage_order": 1,
       "stage_name": "vector_search",
-      "stats": { "raw_points": 12 }
+      "stats": {
+        "sparse_mode": "auto",
+        "channels": ["dense"],
+        "dense_raw_points": 24,
+        "sparse_raw_points": 0,
+        "raw_points": 24
+      }
     },
     {
       "stage_order": 2,
       "stage_name": "result_projection",
-      "stats": { "returned_items": 5, "pre_authorization_candidates": 8 }
+      "stats": {
+        "returned_items": 5,
+        "pre_authorization_candidates": 8,
+        "recency_tau_days": 60,
+        "tie_breaker_weight": 0.12
+      }
     }
   ]
 }
 ```
 
+==================================================
+5) Evaluation Scenarios
+==================================================
+
+- English dense-first over mixed-language docs (expected dense-first)
+  - Request `sparse_mode` omitted or `off` for a normal English query.
+  - Example: natural-language question with low symbol density from mixed `chat/dev` content.
+  - `trajectory.stages.vector_search` should show `channels=["dense"]` and `sparse_raw_points=0` (or absent).
+  - `trajectory.stages.result_projection` should show normal ranking output and no symbolic jump from sparse-only terms.
+
+- Exact-match cases (`auto` vs `on`)
+  - Query contains symbols / identifiers (`/`, `:`, `#`, hex, URLs, error codes like `ERR_...`, full stack traces, full identifiers).
+  - With `sparse_mode=auto`, expect `channels=["dense"]` for generic prose and `channels` may include `"sparse"` when the query is symbol-heavy.
+  - With `sparse_mode=on`, expect `channels` to include both `"dense"` and `"sparse"` even if `auto` would stay dense-only.
+  - Compare `vector_search.raw_points` and `result_projection` stability across modes for the same corpus; `sparse_mode=on` should improve retrieval of exact token patterns in symbol-heavy queries.
+
+- Recency bias checks
+  - Configure `cfg.ranking.recency_tau_days` and `cfg.ranking.tie_breaker_weight` > 0.
+  - In `trajectory.stages.result_projection`, verify fields:
+    - `recency_tau_days` (current effective value),
+    - `tie_breaker_weight` (current effective weight),
+    - `pre_authorization_candidates` and `returned_items`.
+  - Expected signal: newer `updated_at` chunks should move upward when fusion scores are close and tie-break is active.
+
 ```json
 {
   "schema": "doc_retrieval_trajectory/v1",