From 2cc58ac9b6a4d69412b565eb3809352193e841ca Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 24 May 2026 11:40:53 -0600
Subject: [PATCH] =?UTF-8?q?feat(0.20.0):=20MCP=20delegation=20tools=20?=
 =?UTF-8?q?=E2=80=94=20delegate=5Fcode,=20delegate=5Fresearch,=20delegate?=
 =?UTF-8?q?=5Ffeedback?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New sub-export `@tangle-network/agent-runtime/mcp` and `agent-runtime-mcp`
bin. Five tools exposed over stdio JSON-RPC (MCP 2024-11-05):

- delegate_code        async, idempotent — runs coderProfile / multi-harness fanout
- delegate_research    async, idempotent — runs an injected researcher delegate
- delegate_feedback    sync, append-only — every rating is its own event
- delegation_status    sync poll — state machine + progress + final result
- delegation_history   sync read — newest-first, filterable, feedback inline

State lives in an in-memory DelegationTaskQueue (Phase 2 → sqlite). The
server is topology-free; consumers wire coder + researcher delegates at
construction. The bin auto-wires the default coder against the real
Sandbox client and lazy-imports a researcher delegate when
@tangle-network/agent-knowledge is installed as an optional peer.

61 new tests cover validation, idempotency, lifecycle, cancellation,
namespace isolation, feedback cross-reference, and a full JSON-RPC
end-to-end through both in-process and stdio transports.
---
 README.md                            | 110 ++++++++
 package.json                         |  16 +-
 src/mcp/bin.ts                       | 218 ++++++++++++++++
 src/mcp/delegates.ts                 | 144 +++++++++++
 src/mcp/feedback-store.ts            |  76 ++++++
 src/mcp/index.ts                     | 100 ++++++++
 src/mcp/server.ts                    | 337 +++++++++++++++++++++++++
 src/mcp/task-queue.ts                | 358 +++++++++++++++++++++++++++
 src/mcp/tools/delegate-code.ts       | 199 +++++++++++++++
 src/mcp/tools/delegate-feedback.ts   | 187 ++++++++++++++
 src/mcp/tools/delegate-research.ts   | 219 ++++++++++++++++
 src/mcp/tools/delegation-history.ts  |  98 ++++++++
 src/mcp/tools/delegation-status.ts   |  76 ++++++
 src/mcp/types.ts                     | 223 +++++++++++++++++
 tests/mcp/delegate-code.test.ts      | 120 +++++++++
 tests/mcp/delegate-feedback.test.ts  | 135 ++++++++++
 tests/mcp/delegate-research.test.ts  |  84 +++++++
 tests/mcp/delegation-history.test.ts | 128 ++++++++++
 tests/mcp/delegation-status.test.ts  |  72 ++++++
 tests/mcp/idempotency.test.ts        |  53 ++++
 tests/mcp/server-integration.test.ts | 181 ++++++++++++++
 tests/mcp/task-queue.test.ts         | 235 ++++++++++++++++++
 tsup.config.ts                       |   2 +
 23 files changed, 3370 insertions(+), 1 deletion(-)
 create mode 100644 src/mcp/bin.ts
 create mode 100644 src/mcp/delegates.ts
 create mode 100644 src/mcp/feedback-store.ts
 create mode 100644 src/mcp/index.ts
 create mode 100644 src/mcp/server.ts
 create mode 100644 src/mcp/task-queue.ts
 create mode 100644 src/mcp/tools/delegate-code.ts
 create mode 100644 src/mcp/tools/delegate-feedback.ts
 create mode 100644 src/mcp/tools/delegate-research.ts
 create mode 100644 src/mcp/tools/delegation-history.ts
 create mode 100644 src/mcp/tools/delegation-status.ts
 create mode 100644 src/mcp/types.ts
 create mode 100644 tests/mcp/delegate-code.test.ts
 create mode 100644 tests/mcp/delegate-feedback.test.ts
 create mode 100644 tests/mcp/delegate-research.test.ts
 create mode 100644 tests/mcp/delegation-history.test.ts
 create mode 100644 tests/mcp/delegation-status.test.ts
 create mode 100644 tests/mcp/idempotency.test.ts
 create mode 100644 tests/mcp/server-integration.test.ts
 create mode 100644 tests/mcp/task-queue.test.ts

diff --git a/README.md b/README.md
index 8f6bd66..6ba5ac0 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval
 | `deriveExecutionId` | Stable substrate executionId for `X-Execution-ID` cross-process reconnect |
 | `startRuntimeRun` | Canonical production-run row + cost ledger |
 | `defineAgent` | Declarative per-vertical agent manifest — surfaces, knowledge, rubric, run fn |
+| `createMcpServer` (`/mcp`) + `agent-runtime-mcp` bin | Stdio MCP server with the 5 delegation tools (`delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history`) |
 | `resolveChatModel` / `validateChatModelId` / `getModels` | Router catalog fetch + fail-closed admission + precedence resolver |
 | `decideKnowledgeReadiness` | `ready` / `blocked` / `caveat` branch for routes / UI |
 | `createOpenAICompatibleBackend` | OpenAI-compatible streaming backend (TCloud / cli-bridge) |
@@ -173,6 +174,115 @@ await run.persist({ runtimeEvents: telemetry.events })
 
 Full runnable: [`examples/runtime-run/`](./examples/runtime-run/).
 
+## Delegation tools (MCP)
+
+`@tangle-network/agent-runtime/mcp` ships a stdio MCP server that exposes
+five delegation tools to a sandbox coding-harness agent (claude-code,
+codex, opencode, ...). The product agent itself runs inside a sandbox
+during a chat; when it needs a long-running coder or researcher loop, it
+calls one of these tools instead of doing the work in-line.
+
+| Tool | Kind | Use |
+|---|---|---|
+| `delegate_code` | async | Code-modification task — returns a `taskId`; poll `delegation_status` for the patch |
+| `delegate_research` | async | Source-grounded research task — returns a `taskId`; poll for items + citations |
+| `delegate_feedback` | sync | Append an agent/user/judge rating against a delegation, artifact, or outcome |
+| `delegation_status` | sync | Snapshot of a delegation's state machine (`pending` → `running` → `completed` \| `failed` \| `cancelled`) |
+| `delegation_history` | sync | Newest-first read of past delegations + attached feedback |
+
+Mount the server from a Node entry point:
+
+```ts
+import { Sandbox } from '@tangle-network/sandbox'
+import {
+  createMcpServer,
+  createDefaultCoderDelegate,
+} from '@tangle-network/agent-runtime/mcp'
+
+const sandboxClient = new Sandbox({ apiKey: process.env.SANDBOX_API_KEY! })
+const server = createMcpServer({
+  coderDelegate: createDefaultCoderDelegate({ sandboxClient }),
+  // researcherDelegate: wire your own — see below.
+})
+await server.serve() // reads JSON-RPC from stdin, writes responses to stdout
+```
+
+Or run the ready-made bin:
+
+```bash
+SANDBOX_API_KEY=sk_sandbox_... agent-runtime-mcp
+```
+
+The bin auto-wires the coder delegate and, when
+`@tangle-network/agent-knowledge` is installed as a peer, the researcher
+delegate. Environment knobs:
+
+- `SANDBOX_API_KEY` — required (unless both `MCP_DISABLE_*` are set)
+- `SANDBOX_BASE_URL` — sandbox-SDK base URL override
+- `MCP_MAX_CONCURRENT_SANDBOXES` — kernel `maxConcurrency` cap (default 4)
+- `MCP_CODER_FANOUT_HARNESSES` — comma-separated harness ids for `variants > 1`
+- `MCP_DISABLE_CODER` / `MCP_DISABLE_RESEARCHER` — omit the matching tool
+
+### Async semantics
+
+Coder + researcher delegations are **fire-and-poll**. The handler returns
+a `taskId` immediately; the agent calls `delegation_status(taskId)` until
+the state is terminal. Identical inputs return the same `taskId` —
+duplicate-call safety is built in via canonical-form hashing.
+
+```
+agent → delegate_code(goal, repoRoot)        → { taskId, estimatedDurationMs }
+agent → delegation_status(taskId)            → { status: 'running', progress: { ... } }
+... (minutes pass)
+agent → delegation_status(taskId)            → { status: 'completed', result: { profile: 'coder', output: <CoderOutput> } }
+agent → delegate_feedback(refersTo, rating)  → { recorded: true, id }
+```
+
+Task state lives in-memory inside the server process. A restart drops
+pending delegations — Phase 2 will move state into sqlite.
+
+### Wiring a researcher delegate
+
+`agent-runtime` cannot depend on `@tangle-network/agent-knowledge` (it
+would induce a dependency cycle). Wire the researcher delegate from your
+own integration code:
+
+```ts
+import { runLoop } from '@tangle-network/agent-runtime/loops'
+import { researcherProfile, multiHarnessResearcherFanout } from '@tangle-network/agent-knowledge/profiles'
+import { createMcpServer, type ResearcherDelegate } from '@tangle-network/agent-runtime/mcp'
+
+const researcherDelegate: ResearcherDelegate = async (args, ctx) => {
+  const task = {
+    question: args.question,
+    knowledgeNamespace: args.namespace,
+    scope: args.scope,
+    sources: args.sources,
+    /* ...map config.recencyWindow ISO strings to Date objects */
+  }
+  if ((args.variants ?? 1) <= 1) {
+    const preset = researcherProfile({ task })
+    const result = await runLoop({
+      driver: { /* single-shot */ async plan(t, h) { return h.length === 0 ? [t] : [] }, decide(h) { return h.length > 0 ? 'pick-winner' : 'fail' } },
+      agentRun: preset.agentRunSpec, output: preset.output, validator: preset.validator,
+      task, ctx: { sandboxClient, signal: ctx.signal }, maxIterations: 1,
+    })
+    return result.winner!.output
+  }
+  const fanout = multiHarnessResearcherFanout({ task })
+  const result = await runLoop({
+    driver: fanout.driver,
+    agentRuns: fanout.agentRuns.slice(0, args.variants),
+    output: fanout.output, validator: fanout.validator,
+    task, ctx: { sandboxClient, signal: ctx.signal },
+    maxIterations: args.variants ?? 1,
+  })
+  return result.winner!.output
+}
+
+createMcpServer({ researcherDelegate })
+```
+
 ## Error taxonomy
 
 | Error | When |
diff --git a/package.json b/package.json
index 1d44356..e70b2bf 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.19.0",
+  "version": "0.20.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {
@@ -43,8 +43,16 @@
       "types": "./dist/profiles.d.ts",
       "import": "./dist/profiles.js",
       "default": "./dist/profiles.js"
+    },
+    "./mcp": {
+      "types": "./dist/mcp/index.d.ts",
+      "import": "./dist/mcp/index.js",
+      "default": "./dist/mcp/index.js"
     }
   },
+  "bin": {
+    "agent-runtime-mcp": "./dist/mcp/bin.js"
+  },
   "files": [
     "dist",
     "README.md"
@@ -88,6 +96,12 @@
   "license": "MIT",
   "packageManager": "pnpm@10.28.0",
   "peerDependencies": {
+    "@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
     "@tangle-network/sandbox": ">=0.1.2 <0.3.0"
+  },
+  "peerDependenciesMeta": {
+    "@tangle-network/agent-knowledge": {
+      "optional": true
+    }
   }
 }
diff --git a/src/mcp/bin.ts b/src/mcp/bin.ts
new file mode 100644
index 0000000..0ae8900
--- /dev/null
+++ b/src/mcp/bin.ts
@@ -0,0 +1,218 @@
+#!/usr/bin/env node
+
+/**
+ * @experimental
+ *
+ * `agent-runtime-mcp` — stdio MCP server entry point.
+ *
+ * Spins up a server with the default coder delegate (wired against the
+ * real `@tangle-network/sandbox` client) and, when the optional
+ * `@tangle-network/agent-knowledge` peer is installed, a researcher
+ * delegate against `multiHarnessResearcherFanout`.
+ *
+ * Environment variables:
+ *   SANDBOX_API_KEY                  required — passed to `new Sandbox({ apiKey })`
+ *   SANDBOX_BASE_URL                 optional — sandbox-SDK base URL override
+ *   MCP_MAX_CONCURRENT_SANDBOXES     default 4 — kernel maxConcurrency cap
+ *   MCP_CODER_FANOUT_HARNESSES       comma-separated harness ids to use for variants > 1
+ *   MCP_DISABLE_CODER                set to `1` to omit `delegate_code`
+ *   MCP_DISABLE_RESEARCHER           set to `1` to omit `delegate_research` even when peer is present
+ */
+
+import type { LoopSandboxClient } from '../loops'
+import { runLoop } from '../loops'
+import { createDefaultCoderDelegate, type ResearcherDelegate } from './delegates'
+import { createMcpServer } from './server'
+import type { ResearchOutputShape } from './types'
+
+async function main(): Promise<void> {
+  const fanoutHarnesses = parseHarnesses(process.env.MCP_CODER_FANOUT_HARNESSES)
+  const maxConcurrency = parseConcurrency(process.env.MCP_MAX_CONCURRENT_SANDBOXES)
+  const wantCoder = !process.env.MCP_DISABLE_CODER
+  const wantResearcher = !process.env.MCP_DISABLE_RESEARCHER
+
+  // Skip the sandbox client load entirely when no profile delegate needs it —
+  // the feedback + status + history tools are queue-bound and require no
+  // sandbox. Useful for tooling that mounts the MCP server purely for
+  // self-introspection.
+  const needsSandbox = wantCoder || wantResearcher
+  let sandboxClient: LoopSandboxClient | undefined
+  if (needsSandbox) {
+    const apiKey = process.env.SANDBOX_API_KEY
+    if (!apiKey && !process.env.AGENT_RUNTIME_MCP_ALLOW_NO_KEY) {
+      process.stderr.write(
+        'agent-runtime-mcp: SANDBOX_API_KEY is required (set AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 to run without it for diagnostics, or set MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to run the queue-only subset)\n',
+      )
+      process.exit(2)
+    }
+    sandboxClient = await loadSandboxClient(apiKey)
+  }
+
+  const coderDelegate =
+    wantCoder && sandboxClient
+      ? createDefaultCoderDelegate({
+          sandboxClient,
+          fanoutHarnesses,
+          maxConcurrency,
+        })
+      : undefined
+
+  const researcherDelegate =
+    wantResearcher && sandboxClient
+      ? await loadResearcherDelegate(sandboxClient, maxConcurrency)
+      : undefined
+
+  const server = createMcpServer({ coderDelegate, researcherDelegate })
+
+  process.on('SIGINT', () => {
+    server.stop()
+    process.exit(0)
+  })
+  process.on('SIGTERM', () => {
+    server.stop()
+    process.exit(0)
+  })
+
+  await server.serve()
+}
+
+async function loadSandboxClient(apiKey: string | undefined): Promise<LoopSandboxClient> {
+  // Dynamic import keeps the bin importable in environments that haven't
+  // installed `@tangle-network/sandbox` yet (the runtime package lists it
+  // as a peer dep, not a hard dep).
+  const mod = await import('@tangle-network/sandbox').catch((err) => {
+    process.stderr.write(
+      `agent-runtime-mcp: failed to load @tangle-network/sandbox (${err.message}); install the peer dependency\n`,
+    )
+    process.exit(2)
+  })
+  const SandboxCtor = (mod as { Sandbox?: new (config: unknown) => LoopSandboxClient }).Sandbox
+  if (!SandboxCtor) {
+    process.stderr.write(
+      'agent-runtime-mcp: @tangle-network/sandbox does not export Sandbox; cannot construct client\n',
+    )
+    process.exit(2)
+  }
+  const baseUrl = process.env.SANDBOX_BASE_URL
+  return new SandboxCtor({
+    apiKey,
+    ...(baseUrl ? { baseUrl } : {}),
+  })
+}
+
+interface ResearcherProfilePreset {
+  agentRunSpec: Parameters<typeof runLoop>[0]['agentRun'] extends infer T ? NonNullable<T> : never
+  output: Parameters<typeof runLoop>[0]['output']
+  validator: Parameters<typeof runLoop>[0]['validator']
+}
+
+interface ResearcherFanoutPreset {
+  agentRuns: NonNullable<Parameters<typeof runLoop>[0]['agentRuns']>
+  output: Parameters<typeof runLoop>[0]['output']
+  validator: Parameters<typeof runLoop>[0]['validator']
+  driver: Parameters<typeof runLoop>[0]['driver']
+}
+
+async function loadResearcherDelegate(
+  sandboxClient: LoopSandboxClient,
+  maxConcurrency: number,
+): Promise<ResearcherDelegate | undefined> {
+  // Optional peer — when `@tangle-network/agent-knowledge` isn't installed,
+  // we silently omit the researcher tool from the advertisement. The
+  // dynamic-import path is resolved at runtime; TypeScript cannot see the
+  // peer, so we type the module structurally rather than via its own
+  // declaration file.
+  const profilesSpecifier = '@tangle-network/agent-knowledge/profiles'
+  const mod = await import(profilesSpecifier).catch(() => undefined)
+  if (!mod) return undefined
+  type SingleFactory = (opts: { task: unknown }) => ResearcherProfilePreset
+  type FanoutFactory = (opts: { task: unknown }) => ResearcherFanoutPreset
+  const fanoutFactory = (mod as { multiHarnessResearcherFanout?: FanoutFactory })
+    .multiHarnessResearcherFanout
+  const singleFactory = (mod as { researcherProfile?: SingleFactory }).researcherProfile
+  if (!fanoutFactory || !singleFactory) return undefined
+
+  return async (args, ctx) => {
+    const task = {
+      question: args.question,
+      knowledgeNamespace: args.namespace,
+      scope: args.scope,
+      sources: args.sources,
+      recencyWindow: args.config?.recencyWindow
+        ? {
+            since: args.config.recencyWindow.since
+              ? new Date(args.config.recencyWindow.since)
+              : undefined,
+            until: args.config.recencyWindow.until
+              ? new Date(args.config.recencyWindow.until)
+              : undefined,
+          }
+        : undefined,
+      maxItems: args.config?.maxItems,
+      minConfidence: args.config?.minConfidence,
+    }
+    const variants = Math.max(1, Math.trunc(args.variants ?? 1))
+    ctx.report({ iteration: 0, phase: 'starting' })
+    if (variants <= 1) {
+      const preset = singleFactory({ task })
+      const result = await runLoop({
+        driver: {
+          name: 'mcp-researcher-single',
+          async plan(t, history) {
+            return history.length === 0 ? [t] : []
+          },
+          decide(history) {
+            return history.length > 0 ? 'pick-winner' : 'fail'
+          },
+        },
+        agentRun: preset.agentRunSpec,
+        output: preset.output,
+        validator: preset.validator,
+        task,
+        ctx: { sandboxClient, signal: ctx.signal },
+        maxIterations: 1,
+        maxConcurrency,
+      })
+      const output = result.winner?.output
+      if (!output) throw new Error('researcher delegate produced no winner')
+      ctx.report({ iteration: 1, phase: 'completed' })
+      return output as ResearchOutputShape
+    }
+    const fanout = fanoutFactory({ task })
+    const result = await runLoop({
+      driver: fanout.driver,
+      agentRuns: fanout.agentRuns.slice(0, variants),
+      output: fanout.output,
+      validator: fanout.validator,
+      task,
+      ctx: { sandboxClient, signal: ctx.signal },
+      maxIterations: variants,
+      maxConcurrency: Math.min(maxConcurrency, variants),
+    })
+    const output = result.winner?.output
+    if (!output) throw new Error('researcher delegate fanout produced no winner')
+    ctx.report({ iteration: result.iterations.length, phase: 'completed' })
+    return output as ResearchOutputShape
+  }
+}
+
+function parseHarnesses(raw: string | undefined): string[] | undefined {
+  if (!raw) return undefined
+  const list = raw
+    .split(',')
+    .map((entry) => entry.trim())
+    .filter(Boolean)
+  return list.length > 0 ? list : undefined
+}
+
+function parseConcurrency(raw: string | undefined): number {
+  if (!raw) return 4
+  const n = Number(raw)
+  if (!Number.isFinite(n) || n < 1) return 4
+  return Math.min(Math.trunc(n), 32)
+}
+
+main().catch((err) => {
+  process.stderr.write(`agent-runtime-mcp: ${err instanceof Error ? err.stack : String(err)}\n`)
+  process.exit(1)
+})
diff --git a/src/mcp/delegates.ts b/src/mcp/delegates.ts
new file mode 100644
index 0000000..0e5d529
--- /dev/null
+++ b/src/mcp/delegates.ts
@@ -0,0 +1,144 @@
+/**
+ * @experimental
+ *
+ * Delegate factories — the layer between MCP tool handlers and the
+ * underlying `runLoop` runners.
+ *
+ * The MCP server is profile-agnostic: it owns the task queue + feedback
+ * store + transport. Each `*Delegate` is the closure that the queue
+ * invokes when a task runs. Consumers can override either delegate to
+ * inject custom drivers, mocks, fleet-aware dispatchers, etc.
+ *
+ * The default coder delegate is wired here because we own
+ * `coderProfile` / `multiHarnessCoderFanout`. The default researcher
+ * delegate is **not** wired in this file — `agent-knowledge` cannot be
+ * imported from `agent-runtime` without inducing a cycle. Consumers
+ * pass `researcherDelegate` explicitly when constructing the server.
+ */
+
+import type { LoopSandboxClient } from '../loops'
+import { runLoop } from '../loops'
+import { coderProfile, multiHarnessCoderFanout } from '../profiles/coder'
+import type {
+  CoderTask,
+  DelegateCodeArgs,
+  DelegateResearchArgs,
+  DelegationProgress,
+  ResearchOutputShape,
+} from './types'
+
+/** @experimental */
+export interface DelegateRunCtx {
+  signal: AbortSignal
+  report(progress: DelegationProgress): void
+}
+
+/** @experimental */
+export type CoderDelegate = (
+  args: DelegateCodeArgs,
+  ctx: DelegateRunCtx,
+) => Promise<import('../profiles/coder').CoderOutput>
+
+/** @experimental */
+export type ResearcherDelegate = (
+  args: DelegateResearchArgs,
+  ctx: DelegateRunCtx,
+) => Promise<ResearchOutputShape>
+
+/** @experimental */
+export interface CreateDefaultCoderDelegateOptions {
+  sandboxClient: LoopSandboxClient
+  /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */
+  fanoutHarnesses?: string[]
+  /** Hard cap on the kernel's per-batch concurrency. Default 4. */
+  maxConcurrency?: number
+}
+
+/**
+ * Build a coder delegate that drives `runLoop` against the project's
+ * sandbox client + coder profile. When `args.variants > 1` it switches
+ * to the multi-harness fanout topology.
+ *
+ * @experimental
+ */
+export function createDefaultCoderDelegate(
+  options: CreateDefaultCoderDelegateOptions,
+): CoderDelegate {
+  const sandboxClient = options.sandboxClient
+  const fanoutHarnesses = options.fanoutHarnesses
+  const maxConcurrency = options.maxConcurrency ?? 4
+  return async (args, ctx) => {
+    const task: CoderTask = {
+      goal: buildCoderGoal(args),
+      repoRoot: args.repoRoot,
+      testCmd: args.config?.testCmd,
+      typecheckCmd: args.config?.typecheckCmd,
+      forbiddenPaths: args.config?.forbiddenPaths,
+      maxDiffLines: args.config?.maxDiffLines,
+    }
+    const variants = Math.max(1, Math.trunc(args.variants ?? 1))
+    ctx.report({ iteration: 0, phase: 'starting' })
+    if (variants <= 1) {
+      const { agentRunSpec, output, validator } = coderProfile({ task })
+      const result = await runLoop({
+        driver: singleShotDriver,
+        agentRun: agentRunSpec,
+        output,
+        validator,
+        task,
+        ctx: { sandboxClient, signal: ctx.signal },
+        maxIterations: 1,
+        maxConcurrency,
+      })
+      const winner = result.winner
+      if (!winner) {
+        throw new Error('coder delegate produced no winner')
+      }
+      ctx.report({ iteration: 1, phase: 'completed' })
+      return winner.output
+    }
+    const fanout = multiHarnessCoderFanout(
+      fanoutHarnesses && fanoutHarnesses.length > 0
+        ? { harnesses: fanoutHarnesses.slice(0, variants) }
+        : { harnesses: undefined },
+    )
+    const agentRuns = fanout.agentRuns.slice(0, variants)
+    const result = await runLoop({
+      driver: fanout.driver,
+      agentRuns,
+      output: fanout.output,
+      validator: fanout.validator,
+      task,
+      ctx: { sandboxClient, signal: ctx.signal },
+      maxIterations: variants,
+      maxConcurrency: Math.min(maxConcurrency, variants),
+    })
+    const winner = result.winner
+    if (!winner) {
+      throw new Error('coder delegate fanout produced no winner')
+    }
+    ctx.report({ iteration: agentRuns.length, phase: 'completed' })
+    return winner.output
+  }
+}
+
+function buildCoderGoal(args: DelegateCodeArgs): string {
+  if (!args.contextHint) return args.goal
+  return [args.goal, '', '## Context', args.contextHint].join('\n')
+}
+
+/**
+ * Single-shot driver — plan one task on iteration 0, stop after one
+ * iteration. Used by the coder delegate when `variants <= 1`. Keeps the
+ * runLoop kernel-level accounting (timing, cost, trace emission) while
+ * skipping fanout/refine topology overhead.
+ */
+const singleShotDriver = {
+  name: 'mcp-single-shot',
+  async plan<Task>(task: Task, history: ReadonlyArray<unknown>): Promise<Task[]> {
+    return history.length === 0 ? [task] : []
+  },
+  decide(history: ReadonlyArray<unknown>): 'pick-winner' | 'fail' {
+    return history.length > 0 ? 'pick-winner' : 'fail'
+  },
+}
diff --git a/src/mcp/feedback-store.ts b/src/mcp/feedback-store.ts
new file mode 100644
index 0000000..4cfa2b9
--- /dev/null
+++ b/src/mcp/feedback-store.ts
@@ -0,0 +1,76 @@
+/**
+ * @experimental
+ *
+ * Feedback persistence surface for the MCP layer.
+ *
+ * The substrate cannot import `@tangle-network/agent-knowledge` (it would
+ * induce a dependency cycle), so the store is an abstract interface. The
+ * default implementation is in-memory; consumers wire their own adapter
+ * (a real KbStore-backed sink, an HTTP relay to gtm-agent's knowledge
+ * service, etc.) via `createMcpServer({ feedbackStore })`.
+ *
+ * Feedback events are append-only: every rating is a new event with a
+ * fresh id, even when the same delegation is rated multiple times. The
+ * caller decides how to roll up scores downstream.
+ */
+
+import type { DelegateFeedbackArgs, DelegationFeedbackSnapshot } from './types'
+
+/** @experimental */
+export interface FeedbackEvent {
+  id: string
+  refersTo: DelegateFeedbackArgs['refersTo']
+  rating: DelegateFeedbackArgs['rating']
+  by: DelegateFeedbackArgs['by']
+  capturedAt: string
+  namespace?: string
+}
+
+/** @experimental */
+export interface FeedbackStore {
+  /** Append a new event. Never dedupes — every rating is its own event. */
+  put(event: FeedbackEvent): Promise<void>
+  /**
+   * List events filtered by `namespace`. When `namespace` is omitted, list
+   * across all namespaces. Returns events in insertion order.
+   */
+  list(filter?: { namespace?: string; refersToRef?: string }): Promise<FeedbackEvent[]>
+}
+
+/** @experimental */
+export class InMemoryFeedbackStore implements FeedbackStore {
+  private readonly events: FeedbackEvent[] = []
+
+  async put(event: FeedbackEvent): Promise<void> {
+    this.events.push({ ...event })
+  }
+
+  async list(filter: { namespace?: string; refersToRef?: string } = {}): Promise<FeedbackEvent[]> {
+    let out = this.events
+    if (filter.namespace !== undefined) {
+      out = out.filter((event) => event.namespace === filter.namespace)
+    }
+    if (filter.refersToRef !== undefined) {
+      out = out.filter((event) => event.refersTo.ref === filter.refersToRef)
+    }
+    return out.map((event) => ({ ...event }))
+  }
+}
+
+/**
+ * Project a `FeedbackEvent` down to the snapshot shape carried on
+ * `delegation_history` entries.
+ *
+ * @experimental
+ */
+export function eventToSnapshot(event: FeedbackEvent): DelegationFeedbackSnapshot {
+  const snap: DelegationFeedbackSnapshot = {
+    id: event.id,
+    score: event.rating.score,
+    by: event.by,
+    notes: event.rating.notes,
+    capturedAt: event.capturedAt,
+  }
+  if (event.rating.label) snap.label = event.rating.label
+  return snap
+}
diff --git a/src/mcp/index.ts b/src/mcp/index.ts
new file mode 100644
index 0000000..6517e90
--- /dev/null
+++ b/src/mcp/index.ts
@@ -0,0 +1,100 @@
+/**
+ * @experimental
+ *
+ * `@tangle-network/agent-runtime/mcp` — Stdio MCP server exposing the 5
+ * delegation tools (`delegate_code`, `delegate_research`,
+ * `delegate_feedback`, `delegation_status`, `delegation_history`) to
+ * sandbox coding-harness agents.
+ *
+ * Mount the server inside a product agent's sandbox via
+ * `agent-runtime-mcp` (the bin) or wire it into a custom Node entry
+ * point with `createMcpServer({ ... })`. Pass `coderDelegate` /
+ * `researcherDelegate` factories you build from your project's
+ * sandbox client + run-loop topology.
+ */
+
+export type {
+  CoderDelegate,
+  CreateDefaultCoderDelegateOptions,
+  DelegateRunCtx,
+  ResearcherDelegate,
+} from './delegates'
+export { createDefaultCoderDelegate } from './delegates'
+export type { FeedbackEvent, FeedbackStore } from './feedback-store'
+export { eventToSnapshot, InMemoryFeedbackStore } from './feedback-store'
+export type {
+  JsonRpcMessage,
+  JsonRpcResponse,
+  McpServer,
+  McpServerOptions,
+  McpToolDescriptor,
+  McpTransport,
+} from './server'
+export { createInProcessTransport, createMcpServer } from './server'
+export type {
+  DelegationRecord,
+  DelegationTaskQueueOptions,
+  SubmitInput,
+  SubmitOutput,
+} from './task-queue'
+export { DelegationTaskQueue, hashIdempotencyInput } from './task-queue'
+export {
+  createDelegateCodeHandler,
+  DELEGATE_CODE_DESCRIPTION,
+  DELEGATE_CODE_INPUT_SCHEMA,
+  DELEGATE_CODE_TOOL_NAME,
+  validateDelegateCodeArgs,
+} from './tools/delegate-code'
+export {
+  createDelegateFeedbackHandler,
+  DELEGATE_FEEDBACK_DESCRIPTION,
+  DELEGATE_FEEDBACK_INPUT_SCHEMA,
+  DELEGATE_FEEDBACK_TOOL_NAME,
+  validateDelegateFeedbackArgs,
+} from './tools/delegate-feedback'
+export {
+  createDelegateResearchHandler,
+  DELEGATE_RESEARCH_DESCRIPTION,
+  DELEGATE_RESEARCH_INPUT_SCHEMA,
+  DELEGATE_RESEARCH_TOOL_NAME,
+  validateDelegateResearchArgs,
+} from './tools/delegate-research'
+export {
+  createDelegationHistoryHandler,
+  DELEGATION_HISTORY_DESCRIPTION,
+  DELEGATION_HISTORY_INPUT_SCHEMA,
+  DELEGATION_HISTORY_TOOL_NAME,
+  validateDelegationHistoryArgs,
+} from './tools/delegation-history'
+export {
+  createDelegationStatusHandler,
+  DELEGATION_STATUS_DESCRIPTION,
+  DELEGATION_STATUS_INPUT_SCHEMA,
+  DELEGATION_STATUS_TOOL_NAME,
+  validateDelegationStatusArgs,
+} from './tools/delegation-status'
+export type {
+  DelegateCodeArgs,
+  DelegateCodeConfig,
+  DelegateCodeResult,
+  DelegateFeedbackArgs,
+  DelegateFeedbackResult,
+  DelegateResearchArgs,
+  DelegateResearchConfig,
+  DelegateResearchResult,
+  DelegationError,
+  DelegationFeedbackSnapshot,
+  DelegationHistoryArgs,
+  DelegationHistoryEntry,
+  DelegationHistoryResult,
+  DelegationProfile,
+  DelegationProgress,
+  DelegationResultPayload,
+  DelegationStatus,
+  DelegationStatusArgs,
+  DelegationStatusResult,
+  FeedbackRating,
+  FeedbackRefersTo,
+  ResearchOutputShape,
+  ResearchSource,
+} from './types'
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
new file mode 100644
index 0000000..ca03bdf
--- /dev/null
+++ b/src/mcp/server.ts
@@ -0,0 +1,337 @@
+/**
+ * @experimental
+ *
+ * Stdio JSON-RPC MCP server exposing the 5 delegation tools to sandbox
+ * coding-harness agents (claude-code, codex, opencode, ...).
+ *
+ * The server is transport-bound but topology-free: tool execution is
+ * delegated to handler functions composed from a queue, a feedback
+ * store, and per-profile run delegates. Consumers wire those at
+ * construction time. The `agent-runtime-mcp` bin spins up a default
+ * configuration for the common case (real sandbox client + coder).
+ *
+ * Wire protocol: line-delimited JSON-RPC 2.0 over stdio. Each line is
+ * one request; each response is one line. `tools/list` and `tools/call`
+ * mirror the MCP 2024-11-05 spec; we do not pull in
+ * `@modelcontextprotocol/sdk` to keep the dependency footprint zero.
+ */
+
+import { createInterface, type Interface as ReadlineInterface } from 'node:readline'
+import { Readable, Writable } from 'node:stream'
+import type { CoderDelegate, ResearcherDelegate } from './delegates'
+import { type FeedbackStore, InMemoryFeedbackStore } from './feedback-store'
+import { DelegationTaskQueue } from './task-queue'
+import {
+  createDelegateCodeHandler,
+  DELEGATE_CODE_DESCRIPTION,
+  DELEGATE_CODE_INPUT_SCHEMA,
+  DELEGATE_CODE_TOOL_NAME,
+} from './tools/delegate-code'
+import {
+  createDelegateFeedbackHandler,
+  DELEGATE_FEEDBACK_DESCRIPTION,
+  DELEGATE_FEEDBACK_INPUT_SCHEMA,
+  DELEGATE_FEEDBACK_TOOL_NAME,
+} from './tools/delegate-feedback'
+import {
+  createDelegateResearchHandler,
+  DELEGATE_RESEARCH_DESCRIPTION,
+  DELEGATE_RESEARCH_INPUT_SCHEMA,
+  DELEGATE_RESEARCH_TOOL_NAME,
+} from './tools/delegate-research'
+import {
+  createDelegationHistoryHandler,
+  DELEGATION_HISTORY_DESCRIPTION,
+  DELEGATION_HISTORY_INPUT_SCHEMA,
+  DELEGATION_HISTORY_TOOL_NAME,
+} from './tools/delegation-history'
+import {
+  createDelegationStatusHandler,
+  DELEGATION_STATUS_DESCRIPTION,
+  DELEGATION_STATUS_INPUT_SCHEMA,
+  DELEGATION_STATUS_TOOL_NAME,
+} from './tools/delegation-status'
+
+/** @experimental */
+export interface McpServerOptions {
+  /** Required to enable delegate_code. */
+  coderDelegate?: CoderDelegate
+  /**
+   * Required to enable delegate_research. The substrate cannot ship a
+   * default — wire one that closes over your `runLoop` + a
+   * researcher profile (typically `@tangle-network/agent-knowledge`'s
+   * `researcherProfile` / `multiHarnessResearcherFanout`).
+   */
+  researcherDelegate?: ResearcherDelegate
+  /** Override the default in-memory feedback store. */
+  feedbackStore?: FeedbackStore
+  /** Override the default in-memory task queue. */
+  queue?: DelegationTaskQueue
+  /** Server display name surfaced via `initialize`. Default `'agent-runtime-mcp'`. */
+  serverName?: string
+  /** Server version surfaced via `initialize`. Default = the package version baked at build time. */
+  serverVersion?: string
+}
+
+/** @experimental */
+export interface McpToolDescriptor {
+  name: string
+  description: string
+  inputSchema: Record<string, unknown>
+  handler: (raw: unknown) => Promise<unknown>
+}
+
+/** @experimental */
+export interface McpServer {
+  /** Tools currently registered (depend on which delegates were wired). */
+  readonly tools: ReadonlyMap<string, McpToolDescriptor>
+  /** The underlying queue — exposed so tests can introspect it. */
+  readonly queue: DelegationTaskQueue
+  /** The feedback store — exposed for the same reason. */
+  readonly feedbackStore: FeedbackStore
+  /** Handle a single parsed JSON-RPC message. Returns the response object (or `null` for notifications). */
+  handle(message: JsonRpcMessage): Promise<JsonRpcResponse | null>
+  /** Drive the server on a stdio-shaped transport until `stop()` is called. */
+  serve(transport?: McpTransport): Promise<void>
+  /** Stop a `serve` call. Subsequent requests are rejected. */
+  stop(): void
+}
+
+/** @experimental */
+export interface McpTransport {
+  input: NodeJS.ReadableStream
+  output: NodeJS.WritableStream
+}
+
+/** @experimental */
+export interface JsonRpcMessage {
+  jsonrpc: '2.0'
+  id?: number | string | null
+  method: string
+  params?: unknown
+}
+
+/** @experimental */
+export interface JsonRpcResponse {
+  jsonrpc: '2.0'
+  id: number | string | null
+  result?: unknown
+  error?: { code: number; message: string; data?: unknown }
+}
+
+const PROTOCOL_VERSION = '2024-11-05'
+const DEFAULT_SERVER_NAME = 'agent-runtime-mcp'
+const DEFAULT_SERVER_VERSION = '0.20.0'
+
+/** @experimental */
+export function createMcpServer(options: McpServerOptions = {}): McpServer {
+  const queue = options.queue ?? new DelegationTaskQueue()
+  const feedbackStore = options.feedbackStore ?? new InMemoryFeedbackStore()
+  const serverName = options.serverName ?? DEFAULT_SERVER_NAME
+  const serverVersion = options.serverVersion ?? DEFAULT_SERVER_VERSION
+
+  const tools = new Map<string, McpToolDescriptor>()
+
+  if (options.coderDelegate) {
+    tools.set(DELEGATE_CODE_TOOL_NAME, {
+      name: DELEGATE_CODE_TOOL_NAME,
+      description: DELEGATE_CODE_DESCRIPTION,
+      inputSchema: DELEGATE_CODE_INPUT_SCHEMA as unknown as Record<string, unknown>,
+      handler: createDelegateCodeHandler({ queue, delegate: options.coderDelegate }),
+    })
+  }
+  if (options.researcherDelegate) {
+    tools.set(DELEGATE_RESEARCH_TOOL_NAME, {
+      name: DELEGATE_RESEARCH_TOOL_NAME,
+      description: DELEGATE_RESEARCH_DESCRIPTION,
+      inputSchema: DELEGATE_RESEARCH_INPUT_SCHEMA as unknown as Record<string, unknown>,
+      handler: createDelegateResearchHandler({ queue, delegate: options.researcherDelegate }),
+    })
+  }
+  tools.set(DELEGATE_FEEDBACK_TOOL_NAME, {
+    name: DELEGATE_FEEDBACK_TOOL_NAME,
+    description: DELEGATE_FEEDBACK_DESCRIPTION,
+    inputSchema: DELEGATE_FEEDBACK_INPUT_SCHEMA as unknown as Record<string, unknown>,
+    handler: createDelegateFeedbackHandler({ queue, store: feedbackStore }),
+  })
+  tools.set(DELEGATION_STATUS_TOOL_NAME, {
+    name: DELEGATION_STATUS_TOOL_NAME,
+    description: DELEGATION_STATUS_DESCRIPTION,
+    inputSchema: DELEGATION_STATUS_INPUT_SCHEMA as unknown as Record<string, unknown>,
+    handler: createDelegationStatusHandler({ queue }),
+  })
+  tools.set(DELEGATION_HISTORY_TOOL_NAME, {
+    name: DELEGATION_HISTORY_TOOL_NAME,
+    description: DELEGATION_HISTORY_DESCRIPTION,
+    inputSchema: DELEGATION_HISTORY_INPUT_SCHEMA as unknown as Record<string, unknown>,
+    handler: createDelegationHistoryHandler({ queue }),
+  })
+
+  let stopped = false
+  let activeReadline: ReadlineInterface | undefined
+
+  async function handle(message: JsonRpcMessage): Promise<JsonRpcResponse | null> {
+    if (stopped) {
+      return rpcError(message.id ?? null, -32099, 'server stopped')
+    }
+    if (message.method === 'initialize') {
+      return rpcResult(message.id ?? null, {
+        protocolVersion: PROTOCOL_VERSION,
+        capabilities: { tools: {} },
+        serverInfo: { name: serverName, version: serverVersion },
+      })
+    }
+    if (message.method === 'notifications/initialized') {
+      // MCP clients send this after the handshake; it has no id and expects
+      // no response.
+      return null
+    }
+    if (message.method === 'tools/list') {
+      return rpcResult(message.id ?? null, {
+        tools: [...tools.values()].map((tool) => ({
+          name: tool.name,
+          description: tool.description,
+          inputSchema: tool.inputSchema,
+        })),
+      })
+    }
+    if (message.method === 'tools/call') {
+      const params = (message.params ?? {}) as { name?: unknown; arguments?: unknown }
+      const name = typeof params.name === 'string' ? params.name : ''
+      const tool = tools.get(name)
+      if (!tool) {
+        return rpcError(message.id ?? null, -32601, `unknown tool: ${name}`)
+      }
+      try {
+        const output = await tool.handler(params.arguments ?? {})
+        return rpcResult(message.id ?? null, {
+          content: [{ type: 'text', text: JSON.stringify(output) }],
+          structuredContent: output,
+          isError: false,
+        })
+      } catch (err) {
+        const reason = err instanceof Error ? err.message : String(err)
+        const code = err instanceof TypeError || err instanceof RangeError ? -32602 : -32000
+        return rpcError(message.id ?? null, code, reason)
+      }
+    }
+    if (message.id === undefined || message.id === null) return null
+    return rpcError(message.id, -32601, `unknown method: ${message.method}`)
+  }
+
+  async function serve(transport?: McpTransport): Promise<void> {
+    const input = transport?.input ?? process.stdin
+    const output = transport?.output ?? process.stdout
+    const rl = createInterface({ input, crlfDelay: Number.POSITIVE_INFINITY })
+    activeReadline = rl
+    return new Promise<void>((resolve, reject) => {
+      rl.on('line', (line) => {
+        const trimmed = line.trim()
+        if (!trimmed) return
+        let parsed: JsonRpcMessage | undefined
+        try {
+          parsed = JSON.parse(trimmed) as JsonRpcMessage
+        } catch (err) {
+          writeResponse(output, rpcError(null, -32700, `parse error: ${(err as Error).message}`))
+          return
+        }
+        if (!parsed || parsed.jsonrpc !== '2.0' || typeof parsed.method !== 'string') {
+          writeResponse(output, rpcError(parsed?.id ?? null, -32600, 'invalid request'))
+          return
+        }
+        void handle(parsed).then((response) => {
+          if (response) writeResponse(output, response)
+        })
+      })
+      rl.on('close', () => resolve())
+      rl.on('error', (err) => reject(err))
+      if (stopped) {
+        rl.close()
+        resolve()
+      }
+    })
+  }
+
+  function stop(): void {
+    stopped = true
+    activeReadline?.close()
+    activeReadline = undefined
+  }
+
+  return {
+    tools,
+    queue,
+    feedbackStore,
+    handle,
+    serve,
+    stop,
+  }
+}
+
+function rpcResult(id: number | string | null, result: unknown): JsonRpcResponse {
+  return { jsonrpc: '2.0', id, result }
+}
+
+function rpcError(
+  id: number | string | null,
+  code: number,
+  message: string,
+  data?: unknown,
+): JsonRpcResponse {
+  return {
+    jsonrpc: '2.0',
+    id,
+    error: data === undefined ? { code, message } : { code, message, data },
+  }
+}
+
+function writeResponse(output: NodeJS.WritableStream, response: JsonRpcResponse): void {
+  output.write(`${JSON.stringify(response)}\n`)
+}
+
+/**
+ * In-process pair of `Readable` + `Writable` streams suitable for driving
+ * `server.serve(...)` from a test. Returns the agent-side stream (the
+ * client writes to it) and the server-side stream (the test reads from it).
+ *
+ * @experimental
+ */
+export function createInProcessTransport(): {
+  transport: McpTransport
+  clientWrite(line: string): void
+  clientClose(): void
+  readServer(): Promise<JsonRpcResponse[]>
+} {
+  const responses: JsonRpcResponse[] = []
+  const input = new Readable({ read() {} })
+  const output = new Writable({
+    write(chunk, _enc, cb) {
+      const text = chunk.toString('utf8')
+      for (const line of text.split('\n')) {
+        const trimmed = line.trim()
+        if (!trimmed) continue
+        try {
+          responses.push(JSON.parse(trimmed) as JsonRpcResponse)
+        } catch {
+          // Non-JSON output should never appear; drop it silently in the
+          // test transport rather than crashing.
+        }
+      }
+      cb()
+    },
+  })
+  return {
+    transport: { input, output },
+    clientWrite(line: string) {
+      input.push(`${line}\n`)
+    },
+    clientClose() {
+      input.push(null)
+    },
+    async readServer() {
+      // Yield to the event loop a few times so async handlers drain.
+      for (let i = 0; i < 5; i += 1) await new Promise((r) => setImmediate(r))
+      return [...responses]
+    },
+  }
+}
diff --git a/src/mcp/task-queue.ts b/src/mcp/task-queue.ts
new file mode 100644
index 0000000..94a90cc
--- /dev/null
+++ b/src/mcp/task-queue.ts
@@ -0,0 +1,358 @@
+/**
+ * @experimental
+ *
+ * In-memory state for async MCP delegations. State machine:
+ *
+ *   pending → running → completed | failed
+ *           ↘ cancelled (from any non-terminal state via cancel())
+ *
+ * Each `submit` returns a `taskId` immediately and kicks the work off in the
+ * background. The work function receives an `AbortSignal` the queue fires
+ * when `cancel(taskId)` is called. The queue does NOT supervise runtime
+ * timeouts — the underlying `runLoop` driver / sandbox imposes those.
+ *
+ * Idempotency: callers may supply an `idempotencyKey` (hash of the input).
+ * A duplicate `submit` with a known key returns the existing task instead of
+ * starting a new one. Mutated input → different key → different task.
+ *
+ * Persistent state (sqlite) is a Phase 2 follow-up. The README documents the
+ * in-memory limitation explicitly so consumers know a worker restart drops
+ * pending delegations.
+ */
+
+import type {
+  DelegateCodeArgs,
+  DelegateResearchArgs,
+  DelegationError,
+  DelegationFeedbackSnapshot,
+  DelegationHistoryArgs,
+  DelegationHistoryEntry,
+  DelegationProfile,
+  DelegationProgress,
+  DelegationResultPayload,
+  DelegationStatus,
+  DelegationStatusResult,
+} from './types'
+
+/** @experimental */
+export interface DelegationRecord {
+  taskId: string
+  profile: DelegationProfile
+  namespace?: string
+  args: DelegateCodeArgs | DelegateResearchArgs
+  status: DelegationStatus
+  progress?: DelegationProgress
+  result?: DelegationResultPayload
+  error?: DelegationError
+  costUsd?: number
+  startedAt: string
+  completedAt?: string
+  /** Sha-prefix hash of the canonical input — used for idempotency lookup. */
+  idempotencyKey?: string
+  /** Feedback events keyed by this delegation's taskId. */
+  feedback: DelegationFeedbackSnapshot[]
+}
+
+/** @experimental */
+export interface SubmitInput<Args extends DelegateCodeArgs | DelegateResearchArgs> {
+  profile: DelegationProfile
+  args: Args
+  namespace?: string
+  idempotencyKey?: string
+  /**
+   * Runs the underlying delegation. The queue passes a fresh `AbortSignal`
+   * and a `report` channel for incremental progress updates. The function
+   * MUST resolve with the typed `DelegationResultPayload['output']`; the
+   * queue wraps it with the profile tag.
+   */
+  run: (ctx: {
+    signal: AbortSignal
+    report(progress: DelegationProgress): void
+  }) => Promise<DelegationResultPayload['output']>
+}
+
+/** @experimental */
+export interface SubmitOutput {
+  taskId: string
+  /** True when a prior matching `idempotencyKey` returned an existing record. */
+  reused: boolean
+}
+
+/** @experimental */
+export interface DelegationTaskQueueOptions {
+  /** ID generator override; default `randomTaskId`. */
+  generateId?: () => string
+  /** Clock override; default `() => new Date().toISOString()`. */
+  now?: () => string
+}
+
+/** @experimental */
+export class DelegationTaskQueue {
+  private readonly records = new Map<string, DelegationRecord>()
+  private readonly controllers = new Map<string, AbortController>()
+  private readonly byIdempotencyKey = new Map<string, string>()
+  private readonly generateId: () => string
+  private readonly now: () => string
+
+  constructor(options: DelegationTaskQueueOptions = {}) {
+    this.generateId = options.generateId ?? randomTaskId
+    this.now = options.now ?? (() => new Date().toISOString())
+  }
+
+  /**
+   * Kick off a delegation in the background. Returns immediately. The
+   * `taskId` is queryable via `status` once this method returns.
+   */
+  submit<Args extends DelegateCodeArgs | DelegateResearchArgs>(
+    input: SubmitInput<Args>,
+  ): SubmitOutput {
+    if (input.idempotencyKey) {
+      const existing = this.byIdempotencyKey.get(input.idempotencyKey)
+      if (existing && this.records.has(existing)) {
+        return { taskId: existing, reused: true }
+      }
+    }
+    const taskId = this.generateId()
+    const controller = new AbortController()
+    const record: DelegationRecord = {
+      taskId,
+      profile: input.profile,
+      namespace: input.namespace,
+      args: input.args,
+      status: 'pending',
+      startedAt: this.now(),
+      feedback: [],
+      idempotencyKey: input.idempotencyKey,
+    }
+    this.records.set(taskId, record)
+    this.controllers.set(taskId, controller)
+    if (input.idempotencyKey) this.byIdempotencyKey.set(input.idempotencyKey, taskId)
+
+    // Fire-and-forget the run function. Errors flow into the record so the
+    // status poll surfaces them; the promise itself is intentionally
+    // unobserved by the queue.
+    queueMicrotask(() => {
+      this.execute(taskId, input, controller)
+    })
+
+    return { taskId, reused: false }
+  }
+
+  /**
+   * Snapshot the current state of a delegation. Returns `undefined` for
+   * unknown ids so callers can distinguish missing from terminal.
+   */
+  status(taskId: string): DelegationStatusResult | undefined {
+    const record = this.records.get(taskId)
+    if (!record) return undefined
+    return toStatusResult(record)
+  }
+
+  /**
+   * Abort an in-flight delegation. Returns `false` if the task is unknown
+   * or already terminal. The underlying `run` function MUST honor the
+   * abort signal for the cancel to take effect; the queue marks the
+   * record `cancelled` regardless so a misbehaving runner cannot pin the
+   * UI on `running` forever.
+   */
+  cancel(taskId: string): boolean {
+    const record = this.records.get(taskId)
+    if (!record) return false
+    if (isTerminal(record.status)) return false
+    const controller = this.controllers.get(taskId)
+    controller?.abort()
+    record.status = 'cancelled'
+    record.completedAt = this.now()
+    record.error = { message: 'cancelled by caller', kind: 'CancelledError' }
+    return true
+  }
+
+  /**
+   * Append a feedback event to the matching delegation. Returns `false`
+   * when `ref` does not name a known taskId — the caller should still
+   * record the feedback through a different surface (artifact/outcome
+   * kinds are not queue-bound).
+   */
+  attachFeedback(taskId: string, snapshot: DelegationFeedbackSnapshot): boolean {
+    const record = this.records.get(taskId)
+    if (!record) return false
+    record.feedback.push(snapshot)
+    return true
+  }
+
+  /**
+   * Query the recorded delegations. Returns entries newest-first (by
+   * `startedAt`), truncated to `limit`.
+   */
+  history(args: DelegationHistoryArgs = {}): DelegationHistoryEntry[] {
+    const limit = clampLimit(args.limit)
+    const since = args.since ? Date.parse(args.since) : Number.NEGATIVE_INFINITY
+    const out: DelegationHistoryEntry[] = []
+    for (const record of this.records.values()) {
+      if (args.namespace && record.namespace !== args.namespace) continue
+      if (args.profile && record.profile !== args.profile) continue
+      if (Number.isFinite(since) && Date.parse(record.startedAt) < since) continue
+      out.push(toHistoryEntry(record))
+    }
+    out.sort((a, b) => b.startedAt.localeCompare(a.startedAt))
+    return out.slice(0, limit)
+  }
+
+  /** Test-only — number of in-flight (non-terminal) records. */
+  inflightCount(): number {
+    let n = 0
+    for (const record of this.records.values()) {
+      if (!isTerminal(record.status)) n += 1
+    }
+    return n
+  }
+
+  private async execute<Args extends DelegateCodeArgs | DelegateResearchArgs>(
+    taskId: string,
+    input: SubmitInput<Args>,
+    controller: AbortController,
+  ): Promise<void> {
+    const record = this.records.get(taskId)
+    if (!record) return
+    record.status = 'running'
+    try {
+      const output = await input.run({
+        signal: controller.signal,
+        report: (progress) => {
+          if (record.status === 'running') record.progress = progress
+        },
+      })
+      // `cancel()` may have flipped the status to `cancelled` while the
+      // run promise was pending. Read the field through a widening
+      // helper so the narrowed `'running'` type from the assignment
+      // above does not exclude that case at compile time.
+      if (currentStatus(record) === 'cancelled') return
+      record.status = 'completed'
+      record.completedAt = this.now()
+      record.result = { profile: input.profile, output } as DelegationResultPayload
+    } catch (err) {
+      if (currentStatus(record) === 'cancelled') return
+      record.status = 'failed'
+      record.completedAt = this.now()
+      record.error = errorToShape(err)
+    } finally {
+      this.controllers.delete(taskId)
+    }
+  }
+}
+
+function isTerminal(status: DelegationStatus): boolean {
+  return status === 'completed' || status === 'failed' || status === 'cancelled'
+}
+
+function currentStatus(record: DelegationRecord): DelegationStatus {
+  return record.status
+}
+
+function clampLimit(raw: number | undefined): number {
+  if (!Number.isFinite(raw)) return 50
+  const n = Math.trunc(raw as number)
+  if (n <= 0) return 50
+  return Math.min(n, 500)
+}
+
+function toStatusResult(record: DelegationRecord): DelegationStatusResult {
+  const out: DelegationStatusResult = {
+    taskId: record.taskId,
+    profile: record.profile,
+    status: record.status,
+    startedAt: record.startedAt,
+  }
+  if (record.progress) out.progress = record.progress
+  if (record.result) out.result = record.result
+  if (record.error) out.error = record.error
+  if (record.costUsd !== undefined) out.costUsd = record.costUsd
+  if (record.completedAt) out.completedAt = record.completedAt
+  return out
+}
+
+function toHistoryEntry(record: DelegationRecord): DelegationHistoryEntry {
+  const entry: DelegationHistoryEntry = {
+    taskId: record.taskId,
+    profile: record.profile,
+    args: record.args,
+    status: record.status,
+    startedAt: record.startedAt,
+  }
+  if (record.namespace) entry.namespace = record.namespace
+  if (record.completedAt) entry.completedAt = record.completedAt
+  if (record.costUsd !== undefined) entry.costUsd = record.costUsd
+  if (record.feedback.length > 0) entry.feedback = [...record.feedback]
+  return entry
+}
+
+function errorToShape(err: unknown): DelegationError {
+  if (err instanceof Error) {
+    return { message: err.message, kind: err.name || 'Error' }
+  }
+  return { message: String(err), kind: 'NonError' }
+}
+
+function randomTaskId(): string {
+  // Caller-stable id: `dlg-${timestamp}-${random}`. The timestamp portion
+  // makes lexicographic sort match chronological order in history queries
+  // even when the system clock skews under the second.
+  const t = Date.now().toString(36)
+  const r = Math.random().toString(36).slice(2, 10)
+  return `dlg-${t}-${r}`
+}
+
+/**
+ * Best-effort stable hash for use as `idempotencyKey`. Not cryptographic;
+ * collisions only affect dedupe, never correctness.
+ *
+ * @experimental
+ */
+export function hashIdempotencyInput(value: unknown): string {
+  let str: string
+  try {
+    str = JSON.stringify(canonicalize(value))
+  } catch {
+    str = String(value)
+  }
+  // FNV-1a 32-bit
+  let h = 0x811c9dc5
+  for (let i = 0; i < str.length; i += 1) {
+    h ^= str.charCodeAt(i)
+    h = Math.imul(h, 0x01000193)
+  }
+  return (h >>> 0).toString(16).padStart(8, '0')
+}
+
+function canonicalize(value: unknown): unknown {
+  if (value === null || typeof value !== 'object') return value
+  if (Array.isArray(value)) return value.map(canonicalize)
+  const entries = Object.entries(value as Record<string, unknown>)
+    .filter(([, v]) => v !== undefined)
+    .sort(([a], [b]) => a.localeCompare(b))
+  const out: Record<string, unknown> = {}
+  for (const [k, v] of entries) out[k] = canonicalize(v)
+  return out
+}
+
+// Re-exports re-used by the feedback-store + handler glue. Kept local so
+// consumers of the queue don't have to import from `./types` separately.
+export type {
+  DelegateCodeArgs,
+  DelegateCodeResult,
+  DelegateFeedbackArgs,
+  DelegateFeedbackResult,
+  DelegateResearchArgs,
+  DelegateResearchResult,
+  DelegationError,
+  DelegationFeedbackSnapshot,
+  DelegationHistoryArgs,
+  DelegationHistoryEntry,
+  DelegationHistoryResult,
+  DelegationProfile,
+  DelegationProgress,
+  DelegationResultPayload,
+  DelegationStatus,
+  DelegationStatusArgs,
+  DelegationStatusResult,
+} from './types'
diff --git a/src/mcp/tools/delegate-code.ts b/src/mcp/tools/delegate-code.ts
new file mode 100644
index 0000000..4f6796f
--- /dev/null
+++ b/src/mcp/tools/delegate-code.ts
@@ -0,0 +1,199 @@
+/**
+ * @experimental
+ *
+ * `delegate_code` MCP tool — async kickoff. The handler validates the
+ * input, computes an idempotency key over the canonical fields, hands
+ * the task to the queue, and returns `{ taskId, estimatedDurationMs }`.
+ */
+
+import type { CoderDelegate } from '../delegates'
+import {
+  type DelegateCodeArgs,
+  type DelegateCodeResult,
+  type DelegationTaskQueue,
+  hashIdempotencyInput,
+} from '../task-queue'
+
+/** @experimental */
+export const DELEGATE_CODE_TOOL_NAME = 'delegate_code'
+
+/** @experimental */
+export const DELEGATE_CODE_DESCRIPTION = [
+  'Delegate a coding task to specialist coder agents that produce a validated patch.',
+  '',
+  'Use when: you need code written, fixed, refactored, or extended to satisfy a',
+  'user goal that touches a real repository. The coder runs in an isolated',
+  'sandbox, opens a fresh branch, keeps the diff minimal, runs the supplied',
+  'test + typecheck commands, and emits a unified-diff patch.',
+  '',
+  'Returns immediately with a taskId. Poll delegation_status to retrieve the',
+  'patch + validator verdict (typically minutes-to-hours, longer for large',
+  'changes). Identical inputs return the same taskId — safe to retry.',
+  '',
+  'When variants > 1, multiple coder harnesses (claude-code, codex, opencode)',
+  'attempt the task in parallel and the highest-scoring patch wins (smallest',
+  'passing diff). Use variants for high-stakes changes; single variant for',
+  'routine ones.',
+  '',
+  'Capability scope: the coder cannot modify paths outside repoRoot and cannot',
+  'touch paths in config.forbiddenPaths. The validator hard-fails on a',
+  'forbidden-path violation, diff above config.maxDiffLines, test failure, or',
+  'typecheck failure — none of those make it past the gate.',
+].join('\n')
+
+/** @experimental */
+export const DELEGATE_CODE_INPUT_SCHEMA = {
+  type: 'object',
+  properties: {
+    goal: {
+      type: 'string',
+      description: 'Natural-language description of what the coder must accomplish.',
+    },
+    repoRoot: {
+      type: 'string',
+      description: 'Absolute path inside the sandbox where the repo lives.',
+    },
+    contextHint: {
+      type: 'string',
+      description: 'Optional free-form context the coder sees in the prompt prelude.',
+    },
+    variants: {
+      type: 'integer',
+      minimum: 1,
+      maximum: 8,
+      description: 'Number of parallel coder harnesses. Default 1.',
+    },
+    config: {
+      type: 'object',
+      properties: {
+        testCmd: { type: 'string' },
+        typecheckCmd: { type: 'string' },
+        forbiddenPaths: { type: 'array', items: { type: 'string' } },
+        maxDiffLines: { type: 'integer', minimum: 1 },
+      },
+      additionalProperties: false,
+    },
+    namespace: {
+      type: 'string',
+      description: 'Multi-tenant scope (customer-id, workspace-id).',
+    },
+  },
+  required: ['goal', 'repoRoot'],
+  additionalProperties: false,
+} as const
+
+const SINGLE_VARIANT_ESTIMATE_MS = 6 * 60 * 1000 // 6 minutes — single coder
+const FANOUT_PER_VARIANT_ESTIMATE_MS = 8 * 60 * 1000 // 8 minutes — fanout
+
+/** @experimental */
+export function validateDelegateCodeArgs(raw: unknown): DelegateCodeArgs {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegate_code: arguments must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const goal = value.goal
+  if (typeof goal !== 'string' || goal.trim().length === 0) {
+    throw new TypeError('delegate_code: `goal` must be a non-empty string')
+  }
+  const repoRoot = value.repoRoot
+  if (typeof repoRoot !== 'string' || repoRoot.trim().length === 0) {
+    throw new TypeError('delegate_code: `repoRoot` must be a non-empty string')
+  }
+  const args: DelegateCodeArgs = { goal: goal.trim(), repoRoot: repoRoot.trim() }
+  if (typeof value.contextHint === 'string') args.contextHint = value.contextHint
+  if (value.variants !== undefined) {
+    const variants = Number(value.variants)
+    if (!Number.isFinite(variants) || variants < 1 || variants > 8) {
+      throw new RangeError('delegate_code: `variants` must be an integer in [1, 8]')
+    }
+    args.variants = Math.trunc(variants)
+  }
+  if (value.config !== undefined) {
+    args.config = validateConfig(value.config)
+  }
+  if (typeof value.namespace === 'string') args.namespace = value.namespace
+  return args
+}
+
+function validateConfig(raw: unknown): DelegateCodeArgs['config'] {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegate_code: `config` must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const out: NonNullable<DelegateCodeArgs['config']> = {}
+  if (value.testCmd !== undefined) {
+    if (typeof value.testCmd !== 'string') {
+      throw new TypeError('delegate_code: `config.testCmd` must be a string')
+    }
+    out.testCmd = value.testCmd
+  }
+  if (value.typecheckCmd !== undefined) {
+    if (typeof value.typecheckCmd !== 'string') {
+      throw new TypeError('delegate_code: `config.typecheckCmd` must be a string')
+    }
+    out.typecheckCmd = value.typecheckCmd
+  }
+  if (value.forbiddenPaths !== undefined) {
+    if (!Array.isArray(value.forbiddenPaths)) {
+      throw new TypeError('delegate_code: `config.forbiddenPaths` must be a string array')
+    }
+    out.forbiddenPaths = value.forbiddenPaths.map((entry, i) => {
+      if (typeof entry !== 'string') {
+        throw new TypeError(`delegate_code: forbiddenPaths[${i}] must be a string`)
+      }
+      return entry
+    })
+  }
+  if (value.maxDiffLines !== undefined) {
+    const n = Number(value.maxDiffLines)
+    if (!Number.isFinite(n) || n < 1) {
+      throw new RangeError('delegate_code: `config.maxDiffLines` must be a positive integer')
+    }
+    out.maxDiffLines = Math.trunc(n)
+  }
+  return out
+}
+
+/** @experimental */
+export interface DelegateCodeHandlerOptions {
+  queue: DelegationTaskQueue
+  delegate: CoderDelegate
+  /** Override the duration hint. */
+  estimateDurationMs?: (args: DelegateCodeArgs) => number
+}
+
+/** @experimental */
+export function createDelegateCodeHandler(
+  options: DelegateCodeHandlerOptions,
+): (raw: unknown) => Promise<DelegateCodeResult> {
+  const estimateDurationMs = options.estimateDurationMs ?? defaultEstimate
+  return async (raw) => {
+    const args = validateDelegateCodeArgs(raw)
+    const idempotencyKey = hashIdempotencyInput({
+      profile: 'coder',
+      goal: args.goal,
+      repoRoot: args.repoRoot,
+      contextHint: args.contextHint,
+      variants: args.variants ?? 1,
+      config: args.config,
+      namespace: args.namespace,
+    })
+    const submitted = options.queue.submit<DelegateCodeArgs>({
+      profile: 'coder',
+      args,
+      namespace: args.namespace,
+      idempotencyKey,
+      run: async (ctx) => options.delegate(args, ctx),
+    })
+    return {
+      taskId: submitted.taskId,
+      estimatedDurationMs: estimateDurationMs(args),
+    }
+  }
+}
+
+function defaultEstimate(args: DelegateCodeArgs): number {
+  const variants = Math.max(1, args.variants ?? 1)
+  if (variants === 1) return SINGLE_VARIANT_ESTIMATE_MS
+  return FANOUT_PER_VARIANT_ESTIMATE_MS
+}
diff --git a/src/mcp/tools/delegate-feedback.ts b/src/mcp/tools/delegate-feedback.ts
new file mode 100644
index 0000000..11d9a6e
--- /dev/null
+++ b/src/mcp/tools/delegate-feedback.ts
@@ -0,0 +1,187 @@
+/**
+ * @experimental
+ *
+ * `delegate_feedback` MCP tool — synchronous record of agent / user /
+ * downstream-judge feedback on a delegation, artifact, or outcome.
+ *
+ * The store is append-only. Every rating is its own event; no dedupe.
+ * When `refersTo.kind === 'delegation'`, the snapshot is also attached
+ * to the matching queue record so `delegation_history` surfaces it
+ * inline without a join.
+ */
+
+import type { FeedbackStore } from '../feedback-store'
+import { eventToSnapshot } from '../feedback-store'
+import type { DelegationTaskQueue } from '../task-queue'
+import type {
+  DelegateFeedbackArgs,
+  DelegateFeedbackResult,
+  FeedbackRating,
+  FeedbackRefersTo,
+} from '../types'
+
+/** @experimental */
+export const DELEGATE_FEEDBACK_TOOL_NAME = 'delegate_feedback'
+
+/** @experimental */
+export const DELEGATE_FEEDBACK_DESCRIPTION = [
+  'Record feedback on a delegation, artifact, or outcome. Synchronous — the',
+  'event is durably stored when this call returns.',
+  '',
+  'Use when: you (the agent), the user, or a downstream judge has formed an',
+  'opinion about a piece of work and want it persisted for calibration,',
+  'pricing, or future routing. Every call is a new event — multiple ratings',
+  'on the same target are expected and never deduped.',
+  '',
+  '`refersTo.kind`:',
+  '  - "delegation": ref is a taskId returned by delegate_code/delegate_research',
+  '  - "artifact":   ref is a URI/path/git-sha — anything you can dereference',
+  '  - "outcome":    ref is a free-form description of a downstream result',
+  '',
+  '`by`:',
+  '  - "agent":            the agent itself rated the work',
+  '  - "user":             the human user rated it',
+  '  - "downstream-judge": an automated evaluator emitted the rating',
+  '',
+  'When ref names a known taskId, the rating is also attached to the',
+  'delegation record so delegation_history surfaces it inline.',
+].join('\n')
+
+/** @experimental */
+export const DELEGATE_FEEDBACK_INPUT_SCHEMA = {
+  type: 'object',
+  properties: {
+    refersTo: {
+      type: 'object',
+      properties: {
+        kind: { type: 'string', enum: ['delegation', 'artifact', 'outcome'] },
+        ref: { type: 'string' },
+      },
+      required: ['kind', 'ref'],
+      additionalProperties: false,
+    },
+    rating: {
+      type: 'object',
+      properties: {
+        score: { type: 'number', minimum: 0, maximum: 1 },
+        label: { type: 'string', enum: ['good', 'bad', 'neutral', 'mixed'] },
+        notes: { type: 'string' },
+      },
+      required: ['score', 'notes'],
+      additionalProperties: false,
+    },
+    by: { type: 'string', enum: ['agent', 'user', 'downstream-judge'] },
+    capturedAt: { type: 'string' },
+    namespace: { type: 'string' },
+  },
+  required: ['refersTo', 'rating', 'by'],
+  additionalProperties: false,
+} as const
+
+/** @experimental */
+export function validateDelegateFeedbackArgs(raw: unknown): DelegateFeedbackArgs {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegate_feedback: arguments must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const refersTo = validateRefersTo(value.refersTo)
+  const rating = validateRating(value.rating)
+  const by = value.by
+  if (by !== 'agent' && by !== 'user' && by !== 'downstream-judge') {
+    throw new TypeError(
+      'delegate_feedback: `by` must be one of "agent" | "user" | "downstream-judge"',
+    )
+  }
+  const args: DelegateFeedbackArgs = { refersTo, rating, by }
+  if (value.capturedAt !== undefined) {
+    if (typeof value.capturedAt !== 'string' || Number.isNaN(Date.parse(value.capturedAt))) {
+      throw new TypeError('delegate_feedback: `capturedAt` must be an ISO datetime')
+    }
+    args.capturedAt = value.capturedAt
+  }
+  if (typeof value.namespace === 'string') args.namespace = value.namespace
+  return args
+}
+
+function validateRefersTo(raw: unknown): FeedbackRefersTo {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegate_feedback: `refersTo` must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const kind = value.kind
+  if (kind !== 'delegation' && kind !== 'artifact' && kind !== 'outcome') {
+    throw new TypeError(
+      'delegate_feedback: `refersTo.kind` must be one of "delegation" | "artifact" | "outcome"',
+    )
+  }
+  const ref = value.ref
+  if (typeof ref !== 'string' || ref.trim().length === 0) {
+    throw new TypeError('delegate_feedback: `refersTo.ref` must be a non-empty string')
+  }
+  return { kind, ref: ref.trim() }
+}
+
+function validateRating(raw: unknown): FeedbackRating {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegate_feedback: `rating` must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const score = Number(value.score)
+  if (!Number.isFinite(score) || score < 0 || score > 1) {
+    throw new RangeError('delegate_feedback: `rating.score` must be a number in [0, 1]')
+  }
+  const notes = value.notes
+  if (typeof notes !== 'string') {
+    throw new TypeError('delegate_feedback: `rating.notes` must be a string')
+  }
+  const rating: FeedbackRating = { score, notes }
+  const label = value.label
+  if (label !== undefined) {
+    if (label !== 'good' && label !== 'bad' && label !== 'neutral' && label !== 'mixed') {
+      throw new TypeError(
+        'delegate_feedback: `rating.label` must be one of "good" | "bad" | "neutral" | "mixed"',
+      )
+    }
+    rating.label = label
+  }
+  return rating
+}
+
+/** @experimental */
+export interface DelegateFeedbackHandlerOptions {
+  queue: DelegationTaskQueue
+  store: FeedbackStore
+  generateId?: () => string
+  now?: () => string
+}
+
+/** @experimental */
+export function createDelegateFeedbackHandler(
+  options: DelegateFeedbackHandlerOptions,
+): (raw: unknown) => Promise<DelegateFeedbackResult> {
+  const generateId = options.generateId ?? randomFeedbackId
+  const now = options.now ?? (() => new Date().toISOString())
+  return async (raw) => {
+    const args = validateDelegateFeedbackArgs(raw)
+    const id = generateId()
+    const event = {
+      id,
+      refersTo: args.refersTo,
+      rating: args.rating,
+      by: args.by,
+      capturedAt: args.capturedAt ?? now(),
+      namespace: args.namespace,
+    }
+    await options.store.put(event)
+    if (args.refersTo.kind === 'delegation') {
+      options.queue.attachFeedback(args.refersTo.ref, eventToSnapshot(event))
+    }
+    return { recorded: true, id }
+  }
+}
+
+function randomFeedbackId(): string {
+  const t = Date.now().toString(36)
+  const r = Math.random().toString(36).slice(2, 10)
+  return `fbk-${t}-${r}`
+}
diff --git a/src/mcp/tools/delegate-research.ts b/src/mcp/tools/delegate-research.ts
new file mode 100644
index 0000000..ba7d3b4
--- /dev/null
+++ b/src/mcp/tools/delegate-research.ts
@@ -0,0 +1,219 @@
+/**
+ * @experimental
+ *
+ * `delegate_research` MCP tool — async kickoff for source-grounded
+ * research tasks. Same async semantics as `delegate_code`: returns a
+ * taskId immediately, idempotent on canonical inputs.
+ *
+ * The handler does not import a researcher profile directly — consumers
+ * inject a `ResearcherDelegate` via `createMcpServer({ researcherDelegate })`.
+ * The substrate cannot depend on `@tangle-network/agent-knowledge`
+ * without inducing a dependency cycle.
+ */
+
+import type { ResearcherDelegate } from '../delegates'
+import {
+  type DelegateResearchArgs,
+  type DelegateResearchResult,
+  type DelegationTaskQueue,
+  hashIdempotencyInput,
+} from '../task-queue'
+import type { ResearchSource } from '../types'
+
+/** @experimental */
+export const DELEGATE_RESEARCH_TOOL_NAME = 'delegate_research'
+
+/** @experimental */
+export const DELEGATE_RESEARCH_DESCRIPTION = [
+  'Delegate a research question to specialist researcher agents that produce',
+  'source-grounded, evidence-bearing knowledge items.',
+  '',
+  'Use when: you need to answer a factual question with external evidence —',
+  'audience research, competitive intelligence, recency-bound web searches,',
+  'corpus / docs lookups. The researcher emits items[] with provenance, a',
+  'citations[] index, and proposedWrites[] you decide whether to persist.',
+  '',
+  'Returns immediately with a taskId. Poll delegation_status to retrieve the',
+  'items + verdict. Identical inputs return the same taskId — safe to retry.',
+  '',
+  'When variants > 1, multiple researcher harnesses run in parallel and the',
+  'highest-scoring valid output wins (citation density × source diversity ×',
+  'recency match × gap coverage). Use variants when answers might disagree.',
+  '',
+  'Multi-tenant isolation: every item carries `namespace`. The validator',
+  'hard-fails when any item is scoped outside `namespace`. Never pass another',
+  "tenant's namespace.",
+].join('\n')
+
+const VALID_SOURCES: readonly ResearchSource[] = ['web', 'corpus', 'twitter', 'github', 'docs']
+
+/** @experimental */
+export const DELEGATE_RESEARCH_INPUT_SCHEMA = {
+  type: 'object',
+  properties: {
+    question: {
+      type: 'string',
+      description: 'The research question to answer.',
+    },
+    namespace: {
+      type: 'string',
+      description: 'Multi-tenant scope (customer-id, workspace-id). REQUIRED.',
+    },
+    scope: { type: 'string', description: 'Bound, e.g. "audience for cpg-founder ICP".' },
+    sources: {
+      type: 'array',
+      items: { type: 'string', enum: [...VALID_SOURCES] },
+    },
+    variants: { type: 'integer', minimum: 1, maximum: 8 },
+    config: {
+      type: 'object',
+      properties: {
+        recencyWindow: {
+          type: 'object',
+          properties: {
+            since: { type: 'string', description: 'ISO datetime' },
+            until: { type: 'string', description: 'ISO datetime' },
+          },
+          additionalProperties: false,
+        },
+        maxItems: { type: 'integer', minimum: 1 },
+        minConfidence: { type: 'number', minimum: 0, maximum: 1 },
+      },
+      additionalProperties: false,
+    },
+  },
+  required: ['question', 'namespace'],
+  additionalProperties: false,
+} as const
+
+const SINGLE_VARIANT_ESTIMATE_MS = 4 * 60 * 1000
+const FANOUT_PER_VARIANT_ESTIMATE_MS = 6 * 60 * 1000
+
+/** @experimental */
+export function validateDelegateResearchArgs(raw: unknown): DelegateResearchArgs {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegate_research: arguments must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const question = value.question
+  if (typeof question !== 'string' || question.trim().length === 0) {
+    throw new TypeError('delegate_research: `question` must be a non-empty string')
+  }
+  const namespace = value.namespace
+  if (typeof namespace !== 'string' || namespace.trim().length === 0) {
+    throw new TypeError('delegate_research: `namespace` is required')
+  }
+  const args: DelegateResearchArgs = { question: question.trim(), namespace: namespace.trim() }
+  if (typeof value.scope === 'string') args.scope = value.scope
+  if (value.sources !== undefined) {
+    if (!Array.isArray(value.sources)) {
+      throw new TypeError('delegate_research: `sources` must be a string array')
+    }
+    const sources: ResearchSource[] = value.sources.map((src, i) => {
+      if (typeof src !== 'string' || !VALID_SOURCES.includes(src as ResearchSource)) {
+        throw new TypeError(
+          `delegate_research: sources[${i}] must be one of ${VALID_SOURCES.join('|')}`,
+        )
+      }
+      return src as ResearchSource
+    })
+    args.sources = sources
+  }
+  if (value.variants !== undefined) {
+    const variants = Number(value.variants)
+    if (!Number.isFinite(variants) || variants < 1 || variants > 8) {
+      throw new RangeError('delegate_research: `variants` must be an integer in [1, 8]')
+    }
+    args.variants = Math.trunc(variants)
+  }
+  if (value.config !== undefined) {
+    args.config = validateConfig(value.config)
+  }
+  return args
+}
+
+function validateConfig(raw: unknown): DelegateResearchArgs['config'] {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegate_research: `config` must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const out: NonNullable<DelegateResearchArgs['config']> = {}
+  if (value.recencyWindow !== undefined) {
+    if (value.recencyWindow === null || typeof value.recencyWindow !== 'object') {
+      throw new TypeError('delegate_research: `config.recencyWindow` must be an object')
+    }
+    const window = value.recencyWindow as Record<string, unknown>
+    const windowOut: NonNullable<NonNullable<DelegateResearchArgs['config']>['recencyWindow']> = {}
+    if (window.since !== undefined) {
+      if (typeof window.since !== 'string' || Number.isNaN(Date.parse(window.since))) {
+        throw new TypeError('delegate_research: `recencyWindow.since` must be an ISO datetime')
+      }
+      windowOut.since = window.since
+    }
+    if (window.until !== undefined) {
+      if (typeof window.until !== 'string' || Number.isNaN(Date.parse(window.until))) {
+        throw new TypeError('delegate_research: `recencyWindow.until` must be an ISO datetime')
+      }
+      windowOut.until = window.until
+    }
+    out.recencyWindow = windowOut
+  }
+  if (value.maxItems !== undefined) {
+    const n = Number(value.maxItems)
+    if (!Number.isFinite(n) || n < 1) {
+      throw new RangeError('delegate_research: `config.maxItems` must be a positive integer')
+    }
+    out.maxItems = Math.trunc(n)
+  }
+  if (value.minConfidence !== undefined) {
+    const n = Number(value.minConfidence)
+    if (!Number.isFinite(n) || n < 0 || n > 1) {
+      throw new RangeError('delegate_research: `config.minConfidence` must be in [0, 1]')
+    }
+    out.minConfidence = n
+  }
+  return out
+}
+
+/** @experimental */
+export interface DelegateResearchHandlerOptions {
+  queue: DelegationTaskQueue
+  delegate: ResearcherDelegate
+  estimateDurationMs?: (args: DelegateResearchArgs) => number
+}
+
+/** @experimental */
+export function createDelegateResearchHandler(
+  options: DelegateResearchHandlerOptions,
+): (raw: unknown) => Promise<DelegateResearchResult> {
+  const estimateDurationMs = options.estimateDurationMs ?? defaultEstimate
+  return async (raw) => {
+    const args = validateDelegateResearchArgs(raw)
+    const idempotencyKey = hashIdempotencyInput({
+      profile: 'researcher',
+      question: args.question,
+      namespace: args.namespace,
+      scope: args.scope,
+      sources: args.sources,
+      variants: args.variants ?? 1,
+      config: args.config,
+    })
+    const submitted = options.queue.submit<DelegateResearchArgs>({
+      profile: 'researcher',
+      args,
+      namespace: args.namespace,
+      idempotencyKey,
+      run: async (ctx) => options.delegate(args, ctx),
+    })
+    return {
+      taskId: submitted.taskId,
+      estimatedDurationMs: estimateDurationMs(args),
+    }
+  }
+}
+
+function defaultEstimate(args: DelegateResearchArgs): number {
+  const variants = Math.max(1, args.variants ?? 1)
+  if (variants === 1) return SINGLE_VARIANT_ESTIMATE_MS
+  return FANOUT_PER_VARIANT_ESTIMATE_MS
+}
diff --git a/src/mcp/tools/delegation-history.ts b/src/mcp/tools/delegation-history.ts
new file mode 100644
index 0000000..dae54fa
--- /dev/null
+++ b/src/mcp/tools/delegation-history.ts
@@ -0,0 +1,98 @@
+/**
+ * @experimental
+ *
+ * `delegation_history` MCP tool — synchronous read of past delegations.
+ * The agent uses this for self-introspection — "have I delegated this
+ * kind of task before? did it work?" — and calibration.
+ */
+
+import type {
+  DelegationHistoryArgs,
+  DelegationHistoryResult,
+  DelegationProfile,
+  DelegationTaskQueue,
+} from '../task-queue'
+
+/** @experimental */
+export const DELEGATION_HISTORY_TOOL_NAME = 'delegation_history'
+
+/** @experimental */
+export const DELEGATION_HISTORY_DESCRIPTION = [
+  'Read past delegations newest-first. Each entry carries the original',
+  'arguments, current status, cost, and any feedback attached via',
+  'delegate_feedback.',
+  '',
+  'Use when: you want to introspect prior decisions — "have I asked this',
+  'question before?',
+  'did the last patch land?',
+  "what's the historical",
+  'success rate of coder delegations on this repo?". Feed the results back',
+  'into your own routing and calibration.',
+  '',
+  'Filters: `namespace` (multi-tenant scope), `profile` ("coder" | "researcher"),',
+  '`since` (ISO date — only delegations started at-or-after). `limit` defaults',
+  'to 50, capped at 500.',
+].join('\n')
+
+/** @experimental */
+export const DELEGATION_HISTORY_INPUT_SCHEMA = {
+  type: 'object',
+  properties: {
+    namespace: { type: 'string' },
+    profile: { type: 'string', enum: ['coder', 'researcher'] },
+    since: { type: 'string', description: 'ISO datetime — earliest startedAt to include.' },
+    limit: { type: 'integer', minimum: 1, maximum: 500 },
+  },
+  additionalProperties: false,
+} as const
+
+/** @experimental */
+export function validateDelegationHistoryArgs(raw: unknown): DelegationHistoryArgs {
+  if (raw === undefined || raw === null) return {}
+  if (typeof raw !== 'object') {
+    throw new TypeError('delegation_history: arguments must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const out: DelegationHistoryArgs = {}
+  if (value.namespace !== undefined) {
+    if (typeof value.namespace !== 'string') {
+      throw new TypeError('delegation_history: `namespace` must be a string')
+    }
+    out.namespace = value.namespace
+  }
+  if (value.profile !== undefined) {
+    if (value.profile !== 'coder' && value.profile !== 'researcher') {
+      throw new TypeError('delegation_history: `profile` must be "coder" or "researcher"')
+    }
+    out.profile = value.profile as DelegationProfile
+  }
+  if (value.since !== undefined) {
+    if (typeof value.since !== 'string' || Number.isNaN(Date.parse(value.since))) {
+      throw new TypeError('delegation_history: `since` must be an ISO datetime')
+    }
+    out.since = value.since
+  }
+  if (value.limit !== undefined) {
+    const n = Number(value.limit)
+    if (!Number.isFinite(n) || n < 1 || n > 500) {
+      throw new RangeError('delegation_history: `limit` must be an integer in [1, 500]')
+    }
+    out.limit = Math.trunc(n)
+  }
+  return out
+}
+
+/** @experimental */
+export interface DelegationHistoryHandlerOptions {
+  queue: DelegationTaskQueue
+}
+
+/** @experimental */
+export function createDelegationHistoryHandler(
+  options: DelegationHistoryHandlerOptions,
+): (raw: unknown) => Promise<DelegationHistoryResult> {
+  return async (raw) => {
+    const args = validateDelegationHistoryArgs(raw)
+    return { delegations: options.queue.history(args) }
+  }
+}
diff --git a/src/mcp/tools/delegation-status.ts b/src/mcp/tools/delegation-status.ts
new file mode 100644
index 0000000..d2e14a5
--- /dev/null
+++ b/src/mcp/tools/delegation-status.ts
@@ -0,0 +1,76 @@
+/**
+ * @experimental
+ *
+ * `delegation_status` MCP tool — synchronous poll. Returns the current
+ * state machine + optional progress + final result (when terminal).
+ */
+
+import { NotFoundError } from '../../errors'
+import type {
+  DelegationStatusArgs,
+  DelegationStatusResult,
+  DelegationTaskQueue,
+} from '../task-queue'
+
+/** @experimental */
+export const DELEGATION_STATUS_TOOL_NAME = 'delegation_status'
+
+/** @experimental */
+export const DELEGATION_STATUS_DESCRIPTION = [
+  'Poll the status of an async delegation. Returns the current state',
+  '(pending | running | completed | failed | cancelled), optional progress,',
+  'and the final result when status === "completed".',
+  '',
+  'Use when: you previously called delegate_code or delegate_research and',
+  "need to know whether the work is done. The agent's right rhythm is to",
+  'call this every minute or two while waiting; do not busy-poll.',
+  '',
+  'For a completed coder task, `result.output` is a CoderOutput with branch,',
+  'patch, test/typecheck results, and diff stats. For a completed research',
+  'task, `result.output` is the items + citations + proposedWrites bundle.',
+  '',
+  'Throws NotFoundError when taskId is unknown — never silently returns',
+  '`pending` for a typo.',
+].join('\n')
+
+/** @experimental */
+export const DELEGATION_STATUS_INPUT_SCHEMA = {
+  type: 'object',
+  properties: {
+    taskId: { type: 'string', description: 'Returned by delegate_code / delegate_research.' },
+  },
+  required: ['taskId'],
+  additionalProperties: false,
+} as const
+
+/** @experimental */
+export function validateDelegationStatusArgs(raw: unknown): DelegationStatusArgs {
+  if (raw === null || typeof raw !== 'object') {
+    throw new TypeError('delegation_status: arguments must be an object')
+  }
+  const value = raw as Record<string, unknown>
+  const taskId = value.taskId
+  if (typeof taskId !== 'string' || taskId.trim().length === 0) {
+    throw new TypeError('delegation_status: `taskId` must be a non-empty string')
+  }
+  return { taskId: taskId.trim() }
+}
+
+/** @experimental */
+export interface DelegationStatusHandlerOptions {
+  queue: DelegationTaskQueue
+}
+
+/** @experimental */
+export function createDelegationStatusHandler(
+  options: DelegationStatusHandlerOptions,
+): (raw: unknown) => Promise<DelegationStatusResult> {
+  return async (raw) => {
+    const args = validateDelegationStatusArgs(raw)
+    const status = options.queue.status(args.taskId)
+    if (!status) {
+      throw new NotFoundError(`delegation_status: unknown taskId "${args.taskId}"`)
+    }
+    return status
+  }
+}
diff --git a/src/mcp/types.ts b/src/mcp/types.ts
new file mode 100644
index 0000000..29be887
--- /dev/null
+++ b/src/mcp/types.ts
@@ -0,0 +1,223 @@
+/**
+ * @experimental
+ *
+ * MCP delegation tool surface — the typed inputs/outputs the product agent
+ * sees over the wire. These types are the contract; the JSON schemas under
+ * `tools/*` mirror them for the MCP `tools/list` advertisement.
+ *
+ * Async semantics: `delegate_code` + `delegate_research` return a `taskId`
+ * immediately. The product agent polls `delegation_status` until the task
+ * transitions to `completed` | `failed` | `cancelled`. `delegate_feedback`
+ * + `delegation_history` are synchronous reads / writes against the local
+ * task queue + feedback store.
+ */
+
+import type { CoderOutput, CoderTask } from '../profiles/coder'
+
+/** @experimental */
+export type DelegationProfile = 'coder' | 'researcher'
+
+/** @experimental */
+export type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled'
+
+/**
+ * Minimal `CoderTask` overrides exposed over the MCP wire. The full
+ * `CoderTask` carries fields the kernel synthesizes from `goal` +
+ * `repoRoot` — the agent only edits the few that materially gate
+ * validator behavior.
+ *
+ * @experimental
+ */
+export interface DelegateCodeConfig {
+  testCmd?: string
+  typecheckCmd?: string
+  forbiddenPaths?: string[]
+  maxDiffLines?: number
+}
+
+/** @experimental */
+export interface DelegateCodeArgs {
+  /** Natural-language description of what the coder must accomplish. */
+  goal: string
+  /** Absolute path inside the sandbox where the repo lives. */
+  repoRoot: string
+  /** Optional free-form context the agent surfaces in the prompt prelude. */
+  contextHint?: string
+  /**
+   * When > 1, dispatches `multiHarnessCoderFanout` across N harnesses
+   * (claude-code, codex, opencode-glm) and picks the highest-scoring
+   * passing patch. Default 1.
+   */
+  variants?: number
+  /** Validator + prompt overrides the agent knows for this repo. */
+  config?: DelegateCodeConfig
+  /** Multi-tenant scope (customer-id, workspace-id). */
+  namespace?: string
+}
+
+/** @experimental */
+export interface DelegateCodeResult {
+  taskId: string
+  /** Best-effort hint — coder loops can take minutes-to-hours. */
+  estimatedDurationMs?: number
+}
+
+/** @experimental */
+export type ResearchSource = 'web' | 'corpus' | 'twitter' | 'github' | 'docs'
+
+/** @experimental */
+export interface DelegateResearchConfig {
+  recencyWindow?: { since?: string; until?: string }
+  maxItems?: number
+  minConfidence?: number
+}
+
+/** @experimental */
+export interface DelegateResearchArgs {
+  question: string
+  namespace: string
+  scope?: string
+  sources?: ResearchSource[]
+  variants?: number
+  config?: DelegateResearchConfig
+}
+
+/** @experimental */
+export interface DelegateResearchResult {
+  taskId: string
+  estimatedDurationMs?: number
+}
+
+/** @experimental */
+export interface FeedbackRefersTo {
+  kind: 'delegation' | 'artifact' | 'outcome'
+  /** For `'delegation'`, this is the taskId. */
+  ref: string
+}
+
+/** @experimental */
+export interface FeedbackRating {
+  /** [0, 1]. */
+  score: number
+  label?: 'good' | 'bad' | 'neutral' | 'mixed'
+  notes: string
+}
+
+/** @experimental */
+export interface DelegateFeedbackArgs {
+  refersTo: FeedbackRefersTo
+  rating: FeedbackRating
+  by: 'agent' | 'user' | 'downstream-judge'
+  /** ISO timestamp; defaults to server clock when omitted. */
+  capturedAt?: string
+  namespace?: string
+}
+
+/** @experimental */
+export interface DelegateFeedbackResult {
+  recorded: true
+  id: string
+}
+
+/** @experimental */
+export interface DelegationStatusArgs {
+  taskId: string
+}
+
+/** @experimental */
+export interface DelegationProgress {
+  iteration: number
+  phase: string
+}
+
+/** @experimental */
+export interface DelegationError {
+  message: string
+  kind: string
+}
+
+/**
+ * Polymorphic `result` field: `CoderOutput` when the underlying profile
+ * is `'coder'`, a structurally-typed research output when `'researcher'`.
+ * The MCP wire carries it as JSON either way.
+ *
+ * @experimental
+ */
+export type DelegationResultPayload =
+  | { profile: 'coder'; output: CoderOutput }
+  | { profile: 'researcher'; output: ResearchOutputShape }
+
+/**
+ * Loose shape of a research output over the wire — the substrate cannot
+ * import the `ResearchOutput` type from agent-knowledge without inducing
+ * a dependency cycle, so the MCP layer treats it structurally.
+ *
+ * @experimental
+ */
+export interface ResearchOutputShape {
+  items: unknown[]
+  citations: unknown[]
+  proposedWrites: unknown[]
+  gaps?: string[]
+  notes?: string
+  [key: string]: unknown
+}
+
+/** @experimental */
+export interface DelegationStatusResult {
+  taskId: string
+  profile: DelegationProfile
+  status: DelegationStatus
+  progress?: DelegationProgress
+  result?: DelegationResultPayload
+  error?: DelegationError
+  costUsd?: number
+  startedAt: string
+  completedAt?: string
+}
+
+/** @experimental */
+export interface DelegationHistoryArgs {
+  namespace?: string
+  profile?: DelegationProfile
+  /** ISO date — only delegations started at-or-after `since` are returned. */
+  since?: string
+  /** Default 50. Hard cap 500. */
+  limit?: number
+}
+
+/** @experimental */
+export interface DelegationFeedbackSnapshot {
+  id: string
+  score: number
+  label?: FeedbackRating['label']
+  by: DelegateFeedbackArgs['by']
+  notes: string
+  capturedAt: string
+}
+
+/** @experimental */
+export interface DelegationHistoryEntry {
+  taskId: string
+  profile: DelegationProfile
+  namespace?: string
+  args: DelegateCodeArgs | DelegateResearchArgs
+  status: DelegationStatus
+  feedback?: DelegationFeedbackSnapshot[]
+  costUsd?: number
+  startedAt: string
+  completedAt?: string
+}
+
+/** @experimental */
+export interface DelegationHistoryResult {
+  delegations: DelegationHistoryEntry[]
+}
+
+/**
+ * Re-export `CoderTask` so callers wiring custom delegates don't have to
+ * import from `./profiles` themselves.
+ *
+ * @experimental
+ */
+export type { CoderOutput, CoderTask }
diff --git a/tests/mcp/delegate-code.test.ts b/tests/mcp/delegate-code.test.ts
new file mode 100644
index 0000000..ddd0b85
--- /dev/null
+++ b/tests/mcp/delegate-code.test.ts
@@ -0,0 +1,120 @@
+import { describe, expect, it } from 'vitest'
+import type { CoderDelegate } from '../../src/mcp/delegates'
+import { DelegationTaskQueue } from '../../src/mcp/task-queue'
+import {
+  createDelegateCodeHandler,
+  DELEGATE_CODE_DESCRIPTION,
+  DELEGATE_CODE_INPUT_SCHEMA,
+  DELEGATE_CODE_TOOL_NAME,
+  validateDelegateCodeArgs,
+} from '../../src/mcp/tools/delegate-code'
+
+const stubOutput = {
+  branch: 'feat/x',
+  patch: '',
+  testResult: { passed: true, output: '' },
+  typecheckResult: { passed: true, output: '' },
+  diffStats: { filesChanged: 0, insertions: 0, deletions: 0 },
+}
+
+const stubDelegate: CoderDelegate = async () => stubOutput
+
+describe('validateDelegateCodeArgs', () => {
+  it('accepts the minimal required surface', () => {
+    const args = validateDelegateCodeArgs({ goal: 'fix', repoRoot: '/r' })
+    expect(args).toEqual({ goal: 'fix', repoRoot: '/r' })
+  })
+
+  it('rejects an empty goal', () => {
+    expect(() => validateDelegateCodeArgs({ goal: '   ', repoRoot: '/r' })).toThrow(TypeError)
+  })
+
+  it('rejects variants outside [1, 8]', () => {
+    expect(() => validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 0 })).toThrow(
+      RangeError,
+    )
+    expect(() => validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 9 })).toThrow(
+      RangeError,
+    )
+  })
+
+  it('rejects non-string forbiddenPaths entries', () => {
+    expect(() =>
+      validateDelegateCodeArgs({
+        goal: 'g',
+        repoRoot: '/r',
+        config: { forbiddenPaths: ['ok', 1] },
+      }),
+    ).toThrow(TypeError)
+  })
+
+  it('coerces variants to a positive integer', () => {
+    const args = validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 3.7 })
+    expect(args.variants).toBe(3)
+  })
+})
+
+describe('createDelegateCodeHandler', () => {
+  it('returns a taskId and estimated duration on a happy-path call', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate })
+    const result = await handler({ goal: 'fix', repoRoot: '/r' })
+    expect(result.taskId).toMatch(/^dlg-/)
+    expect(result.estimatedDurationMs).toBeGreaterThan(0)
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    expect(queue.status(result.taskId)?.status).toBe('completed')
+  })
+
+  it('is idempotent: duplicate identical input returns the same taskId', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate })
+    const first = await handler({ goal: 'fix', repoRoot: '/r', variants: 2 })
+    const second = await handler({ goal: 'fix', repoRoot: '/r', variants: 2 })
+    expect(second.taskId).toBe(first.taskId)
+  })
+
+  it('returns a different taskId when contextHint differs', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate })
+    const first = await handler({ goal: 'fix', repoRoot: '/r' })
+    const second = await handler({ goal: 'fix', repoRoot: '/r', contextHint: 'see issue #42' })
+    expect(second.taskId).not.toBe(first.taskId)
+  })
+
+  it('propagates validation errors out of the handler', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate })
+    await expect(handler({ goal: '', repoRoot: '/r' })).rejects.toThrow(/non-empty string/)
+  })
+
+  it('surfaces delegate exceptions via the queue status', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateCodeHandler({
+      queue,
+      delegate: async () => {
+        throw new TypeError('bad sandbox')
+      },
+    })
+    const { taskId } = await handler({ goal: 'fix', repoRoot: '/r' })
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    const status = queue.status(taskId)
+    expect(status?.status).toBe('failed')
+    expect(status?.error?.message).toBe('bad sandbox')
+    expect(status?.error?.kind).toBe('TypeError')
+  })
+})
+
+describe('tool descriptors', () => {
+  it('exposes a non-empty description that explains when to call', () => {
+    expect(DELEGATE_CODE_TOOL_NAME).toBe('delegate_code')
+    expect(DELEGATE_CODE_DESCRIPTION).toMatch(/Use when:/)
+  })
+  it('declares goal and repoRoot as required', () => {
+    expect((DELEGATE_CODE_INPUT_SCHEMA as { required: string[] }).required).toEqual([
+      'goal',
+      'repoRoot',
+    ])
+  })
+})
diff --git a/tests/mcp/delegate-feedback.test.ts b/tests/mcp/delegate-feedback.test.ts
new file mode 100644
index 0000000..953bde2
--- /dev/null
+++ b/tests/mcp/delegate-feedback.test.ts
@@ -0,0 +1,135 @@
+import { describe, expect, it } from 'vitest'
+import { InMemoryFeedbackStore } from '../../src/mcp/feedback-store'
+import { DelegationTaskQueue } from '../../src/mcp/task-queue'
+import {
+  createDelegateFeedbackHandler,
+  DELEGATE_FEEDBACK_DESCRIPTION,
+  validateDelegateFeedbackArgs,
+} from '../../src/mcp/tools/delegate-feedback'
+
+describe('validateDelegateFeedbackArgs', () => {
+  it('requires refersTo, rating, and by', () => {
+    expect(() => validateDelegateFeedbackArgs({})).toThrow()
+    expect(() =>
+      validateDelegateFeedbackArgs({
+        refersTo: { kind: 'delegation', ref: 'dlg-1' },
+        rating: { score: 1, notes: 'good' },
+      }),
+    ).toThrow(/`by`/)
+  })
+
+  it('clamps score validity to [0, 1]', () => {
+    expect(() =>
+      validateDelegateFeedbackArgs({
+        refersTo: { kind: 'delegation', ref: 'r' },
+        rating: { score: 1.1, notes: 'oops' },
+        by: 'agent',
+      }),
+    ).toThrow(RangeError)
+  })
+
+  it('rejects unknown labels', () => {
+    expect(() =>
+      validateDelegateFeedbackArgs({
+        refersTo: { kind: 'delegation', ref: 'r' },
+        rating: { score: 0.5, notes: 'n', label: 'maybe' },
+        by: 'agent',
+      }),
+    ).toThrow(TypeError)
+  })
+
+  it('accepts the three refersTo.kind values', () => {
+    for (const kind of ['delegation', 'artifact', 'outcome'] as const) {
+      const out = validateDelegateFeedbackArgs({
+        refersTo: { kind, ref: 'r' },
+        rating: { score: 0.5, notes: 'n' },
+        by: 'agent',
+      })
+      expect(out.refersTo.kind).toBe(kind)
+    }
+  })
+})
+
+describe('createDelegateFeedbackHandler', () => {
+  it('persists every event without deduping (append-only)', async () => {
+    const queue = new DelegationTaskQueue()
+    const store = new InMemoryFeedbackStore()
+    const handler = createDelegateFeedbackHandler({ queue, store })
+    const args = {
+      refersTo: { kind: 'artifact' as const, ref: 'file://x' },
+      rating: { score: 0.8, notes: 'looks ok' },
+      by: 'agent' as const,
+    }
+    const a = await handler(args)
+    const b = await handler(args)
+    expect(a.id).not.toBe(b.id)
+    expect((await store.list()).length).toBe(2)
+  })
+
+  it('isolates events by namespace', async () => {
+    const queue = new DelegationTaskQueue()
+    const store = new InMemoryFeedbackStore()
+    const handler = createDelegateFeedbackHandler({ queue, store })
+    await handler({
+      refersTo: { kind: 'outcome', ref: 'x' },
+      rating: { score: 0.5, notes: 'n' },
+      by: 'user',
+      namespace: 'a',
+    })
+    await handler({
+      refersTo: { kind: 'outcome', ref: 'x' },
+      rating: { score: 0.5, notes: 'n' },
+      by: 'user',
+      namespace: 'b',
+    })
+    expect((await store.list({ namespace: 'a' })).length).toBe(1)
+    expect((await store.list({ namespace: 'b' })).length).toBe(1)
+    expect((await store.list({ namespace: 'c' })).length).toBe(0)
+  })
+
+  it('attaches the feedback snapshot to the queue record when refersTo.kind === "delegation"', async () => {
+    const queue = new DelegationTaskQueue()
+    const store = new InMemoryFeedbackStore()
+    const submitted = queue.submit({
+      profile: 'coder',
+      args: { goal: 'g', repoRoot: '/r' },
+      run: async () => ({}) as never,
+    })
+    await new Promise((r) => setImmediate(r))
+    const handler = createDelegateFeedbackHandler({ queue, store })
+    const { id } = await handler({
+      refersTo: { kind: 'delegation', ref: submitted.taskId },
+      rating: { score: 0.9, notes: 'great patch', label: 'good' },
+      by: 'user',
+    })
+    const [entry] = queue.history()
+    expect(entry?.feedback?.length).toBe(1)
+    expect(entry?.feedback?.[0]?.id).toBe(id)
+    expect(entry?.feedback?.[0]?.label).toBe('good')
+  })
+
+  it('does not attach when refersTo.kind is artifact/outcome', async () => {
+    const queue = new DelegationTaskQueue()
+    const store = new InMemoryFeedbackStore()
+    const submitted = queue.submit({
+      profile: 'coder',
+      args: { goal: 'g', repoRoot: '/r' },
+      run: async () => ({}) as never,
+    })
+    await new Promise((r) => setImmediate(r))
+    const handler = createDelegateFeedbackHandler({ queue, store })
+    await handler({
+      refersTo: { kind: 'artifact', ref: submitted.taskId },
+      rating: { score: 0.1, notes: 'mistake — wrong kind' },
+      by: 'agent',
+    })
+    const [entry] = queue.history()
+    expect(entry?.feedback).toBeUndefined()
+  })
+
+  it('description explains kind variants', () => {
+    expect(DELEGATE_FEEDBACK_DESCRIPTION).toMatch(/delegation/)
+    expect(DELEGATE_FEEDBACK_DESCRIPTION).toMatch(/artifact/)
+    expect(DELEGATE_FEEDBACK_DESCRIPTION).toMatch(/outcome/)
+  })
+})
diff --git a/tests/mcp/delegate-research.test.ts b/tests/mcp/delegate-research.test.ts
new file mode 100644
index 0000000..56d5931
--- /dev/null
+++ b/tests/mcp/delegate-research.test.ts
@@ -0,0 +1,84 @@
+import { describe, expect, it } from 'vitest'
+import type { ResearcherDelegate } from '../../src/mcp/delegates'
+import { DelegationTaskQueue } from '../../src/mcp/task-queue'
+import {
+  createDelegateResearchHandler,
+  DELEGATE_RESEARCH_DESCRIPTION,
+  DELEGATE_RESEARCH_INPUT_SCHEMA,
+  DELEGATE_RESEARCH_TOOL_NAME,
+  validateDelegateResearchArgs,
+} from '../../src/mcp/tools/delegate-research'
+
+const stubOutput = { items: [], citations: [], proposedWrites: [] }
+const stubDelegate: ResearcherDelegate = async () => stubOutput
+
+describe('validateDelegateResearchArgs', () => {
+  it('requires question + namespace', () => {
+    expect(() => validateDelegateResearchArgs({ namespace: 'x' })).toThrow(/question/)
+    expect(() => validateDelegateResearchArgs({ question: 'q?' })).toThrow(/namespace/)
+  })
+
+  it('rejects unknown source types', () => {
+    expect(() =>
+      validateDelegateResearchArgs({ question: 'q?', namespace: 'x', sources: ['rss'] }),
+    ).toThrow(TypeError)
+  })
+
+  it('validates recencyWindow datetimes', () => {
+    expect(() =>
+      validateDelegateResearchArgs({
+        question: 'q?',
+        namespace: 'x',
+        config: { recencyWindow: { since: 'not-a-date' } },
+      }),
+    ).toThrow(TypeError)
+    const ok = validateDelegateResearchArgs({
+      question: 'q?',
+      namespace: 'x',
+      config: { recencyWindow: { since: '2026-01-01T00:00:00Z' } },
+    })
+    expect(ok.config?.recencyWindow?.since).toBe('2026-01-01T00:00:00Z')
+  })
+
+  it('rejects minConfidence outside [0, 1]', () => {
+    expect(() =>
+      validateDelegateResearchArgs({
+        question: 'q?',
+        namespace: 'x',
+        config: { minConfidence: 1.5 },
+      }),
+    ).toThrow(RangeError)
+  })
+})
+
+describe('createDelegateResearchHandler', () => {
+  it('returns a taskId with the researcher profile and isolates by namespace', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateResearchHandler({ queue, delegate: stubDelegate })
+    const { taskId } = await handler({ question: 'q?', namespace: 'tenant-a' })
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    const status = queue.status(taskId)
+    expect(status?.profile).toBe('researcher')
+    expect(status?.status).toBe('completed')
+    expect(queue.history({ namespace: 'tenant-a' }).length).toBe(1)
+    expect(queue.history({ namespace: 'tenant-b' }).length).toBe(0)
+  })
+
+  it('is idempotent on identical inputs', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateResearchHandler({ queue, delegate: stubDelegate })
+    const a = await handler({ question: 'q?', namespace: 'x' })
+    const b = await handler({ question: 'q?', namespace: 'x' })
+    expect(b.taskId).toBe(a.taskId)
+  })
+
+  it('exposes a description that warns about cross-tenant namespace risk', () => {
+    expect(DELEGATE_RESEARCH_TOOL_NAME).toBe('delegate_research')
+    expect(DELEGATE_RESEARCH_DESCRIPTION).toMatch(/namespace/i)
+    expect((DELEGATE_RESEARCH_INPUT_SCHEMA as { required: string[] }).required).toEqual([
+      'question',
+      'namespace',
+    ])
+  })
+})
diff --git a/tests/mcp/delegation-history.test.ts b/tests/mcp/delegation-history.test.ts
new file mode 100644
index 0000000..a969269
--- /dev/null
+++ b/tests/mcp/delegation-history.test.ts
@@ -0,0 +1,128 @@
+import { describe, expect, it } from 'vitest'
+import { InMemoryFeedbackStore } from '../../src/mcp/feedback-store'
+import { DelegationTaskQueue } from '../../src/mcp/task-queue'
+import { createDelegateFeedbackHandler } from '../../src/mcp/tools/delegate-feedback'
+import {
+  createDelegationHistoryHandler,
+  validateDelegationHistoryArgs,
+} from '../../src/mcp/tools/delegation-history'
+
+function settleTwice(): Promise<void> {
+  return new Promise((resolve) => {
+    setImmediate(() => setImmediate(() => resolve()))
+  })
+}
+
+describe('validateDelegationHistoryArgs', () => {
+  it('accepts an empty object', () => {
+    expect(validateDelegationHistoryArgs({})).toEqual({})
+  })
+  it('rejects an out-of-range limit', () => {
+    expect(() => validateDelegationHistoryArgs({ limit: 501 })).toThrow(RangeError)
+  })
+  it('rejects an unknown profile', () => {
+    expect(() => validateDelegationHistoryArgs({ profile: 'judge' })).toThrow(TypeError)
+  })
+})
+
+describe('createDelegationHistoryHandler', () => {
+  it('filters by namespace, profile, since', async () => {
+    let ts = 1_700_000_000_000
+    const queue = new DelegationTaskQueue({
+      now: () => {
+        ts += 1000
+        return new Date(ts).toISOString()
+      },
+    })
+    queue.submit({
+      profile: 'coder',
+      args: { goal: 'g', repoRoot: '/r' },
+      namespace: 'a',
+      run: async () => ({}) as never,
+    })
+    queue.submit({
+      profile: 'researcher',
+      args: { question: 'q', namespace: 'a' },
+      namespace: 'a',
+      run: async () => ({}) as never,
+    })
+    queue.submit({
+      profile: 'coder',
+      args: { goal: 'g2', repoRoot: '/r' },
+      namespace: 'b',
+      run: async () => ({}) as never,
+    })
+    await settleTwice()
+    const handler = createDelegationHistoryHandler({ queue })
+    const all = await handler({})
+    expect(all.delegations.length).toBe(3)
+    const aOnly = await handler({ namespace: 'a' })
+    expect(aOnly.delegations.length).toBe(2)
+    expect(aOnly.delegations.every((entry) => entry.namespace === 'a')).toBe(true)
+    const coderOnly = await handler({ profile: 'coder' })
+    expect(coderOnly.delegations.every((entry) => entry.profile === 'coder')).toBe(true)
+    expect(coderOnly.delegations.length).toBe(2)
+  })
+
+  it('honors limit', async () => {
+    const queue = new DelegationTaskQueue()
+    for (let i = 0; i < 5; i += 1) {
+      queue.submit({
+        profile: 'coder',
+        args: { goal: `g${i}`, repoRoot: '/r' },
+        run: async () => ({}) as never,
+      })
+    }
+    await settleTwice()
+    const handler = createDelegationHistoryHandler({ queue })
+    const limited = await handler({ limit: 2 })
+    expect(limited.delegations.length).toBe(2)
+  })
+
+  it('surfaces feedback cross-references inline', async () => {
+    const queue = new DelegationTaskQueue()
+    const store = new InMemoryFeedbackStore()
+    const submitted = queue.submit({
+      profile: 'coder',
+      args: { goal: 'g', repoRoot: '/r' },
+      run: async () => ({}) as never,
+    })
+    await settleTwice()
+    const feedback = createDelegateFeedbackHandler({ queue, store })
+    await feedback({
+      refersTo: { kind: 'delegation', ref: submitted.taskId },
+      rating: { score: 0.7, notes: 'shipped' },
+      by: 'user',
+    })
+    const history = await createDelegationHistoryHandler({ queue })({})
+    const entry = history.delegations.find((e) => e.taskId === submitted.taskId)
+    expect(entry?.feedback?.[0]?.notes).toBe('shipped')
+  })
+
+  it('filters by `since`', async () => {
+    let ts = 1_700_000_000_000
+    const queue = new DelegationTaskQueue({
+      now: () => {
+        ts += 1000
+        return new Date(ts).toISOString()
+      },
+    })
+    queue.submit({
+      profile: 'coder',
+      args: { goal: 'g1', repoRoot: '/r' },
+      run: async () => ({}) as never,
+    })
+    await settleTwice()
+    const cutoff = new Date(ts + 500).toISOString()
+    queue.submit({
+      profile: 'coder',
+      args: { goal: 'g2', repoRoot: '/r' },
+      run: async () => ({}) as never,
+    })
+    await settleTwice()
+    const handler = createDelegationHistoryHandler({ queue })
+    const recent = await handler({ since: cutoff })
+    expect(recent.delegations.length).toBe(1)
+    expect((recent.delegations[0]?.args as { goal: string }).goal).toBe('g2')
+  })
+})
diff --git a/tests/mcp/delegation-status.test.ts b/tests/mcp/delegation-status.test.ts
new file mode 100644
index 0000000..3e83ad7
--- /dev/null
+++ b/tests/mcp/delegation-status.test.ts
@@ -0,0 +1,72 @@
+import { describe, expect, it } from 'vitest'
+import { NotFoundError } from '../../src/errors'
+import { DelegationTaskQueue } from '../../src/mcp/task-queue'
+import {
+  createDelegationStatusHandler,
+  validateDelegationStatusArgs,
+} from '../../src/mcp/tools/delegation-status'
+
+function deferred<T>(): {
+  promise: Promise<T>
+  resolve: (value: T) => void
+} {
+  let resolve!: (value: T) => void
+  const promise = new Promise<T>((r) => {
+    resolve = r
+  })
+  return { promise, resolve }
+}
+
+describe('validateDelegationStatusArgs', () => {
+  it('requires a non-empty taskId', () => {
+    expect(() => validateDelegationStatusArgs({})).toThrow(TypeError)
+    expect(() => validateDelegationStatusArgs({ taskId: '   ' })).toThrow(TypeError)
+  })
+})
+
+describe('createDelegationStatusHandler', () => {
+  it('walks the lifecycle pending → running → completed', async () => {
+    const queue = new DelegationTaskQueue()
+    const d = deferred<{ ok: true }>()
+    const submitted = queue.submit({
+      profile: 'coder',
+      args: { goal: 'g', repoRoot: '/r' },
+      run: async () => await d.promise,
+    })
+    const handler = createDelegationStatusHandler({ queue })
+    const pending = await handler({ taskId: submitted.taskId })
+    expect(pending.status).toBe('pending')
+    await new Promise((r) => setImmediate(r))
+    const running = await handler({ taskId: submitted.taskId })
+    expect(running.status).toBe('running')
+    d.resolve({ ok: true })
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    const completed = await handler({ taskId: submitted.taskId })
+    expect(completed.status).toBe('completed')
+    expect(completed.completedAt).toBeTypeOf('string')
+  })
+
+  it('throws NotFoundError for unknown taskIds', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegationStatusHandler({ queue })
+    await expect(handler({ taskId: 'dlg-missing' })).rejects.toBeInstanceOf(NotFoundError)
+  })
+
+  it('surfaces failed status with the error payload', async () => {
+    const queue = new DelegationTaskQueue()
+    const submitted = queue.submit({
+      profile: 'coder',
+      args: { goal: 'g', repoRoot: '/r' },
+      run: async () => {
+        throw new Error('boom')
+      },
+    })
+    const handler = createDelegationStatusHandler({ queue })
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    const status = await handler({ taskId: submitted.taskId })
+    expect(status.status).toBe('failed')
+    expect(status.error?.message).toBe('boom')
+  })
+})
diff --git a/tests/mcp/idempotency.test.ts b/tests/mcp/idempotency.test.ts
new file mode 100644
index 0000000..2d2b5e8
--- /dev/null
+++ b/tests/mcp/idempotency.test.ts
@@ -0,0 +1,53 @@
+import { describe, expect, it } from 'vitest'
+import type { CoderDelegate, ResearcherDelegate } from '../../src/mcp/delegates'
+import { DelegationTaskQueue } from '../../src/mcp/task-queue'
+import { createDelegateCodeHandler } from '../../src/mcp/tools/delegate-code'
+import { createDelegateResearchHandler } from '../../src/mcp/tools/delegate-research'
+
+const coderStub: CoderDelegate = async () => ({
+  branch: 'feat/x',
+  patch: '',
+  testResult: { passed: true, output: '' },
+  typecheckResult: { passed: true, output: '' },
+  diffStats: { filesChanged: 0, insertions: 0, deletions: 0 },
+})
+
+const researcherStub: ResearcherDelegate = async () => ({
+  items: [],
+  citations: [],
+  proposedWrites: [],
+})
+
+describe('MCP idempotency — duplicate calls return the same taskId', () => {
+  it('coder: same arguments → same taskId; counted once in history', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateCodeHandler({ queue, delegate: coderStub })
+    const args = { goal: 'fix nav', repoRoot: '/r', variants: 1, config: { maxDiffLines: 100 } }
+    const a = await handler(args)
+    const b = await handler(args)
+    const c = await handler(args)
+    expect(b.taskId).toBe(a.taskId)
+    expect(c.taskId).toBe(a.taskId)
+    expect(queue.history().length).toBe(1)
+  })
+
+  it('coder: mutated config produces a fresh taskId', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateCodeHandler({ queue, delegate: coderStub })
+    const a = await handler({ goal: 'fix', repoRoot: '/r', config: { maxDiffLines: 100 } })
+    const b = await handler({ goal: 'fix', repoRoot: '/r', config: { maxDiffLines: 200 } })
+    expect(b.taskId).not.toBe(a.taskId)
+  })
+
+  it('researcher: same arguments → same taskId', async () => {
+    const queue = new DelegationTaskQueue()
+    const handler = createDelegateResearchHandler({ queue, delegate: researcherStub })
+    const a = await handler({ question: 'who?', namespace: 'n', sources: ['web', 'twitter'] })
+    const b = await handler({ question: 'who?', namespace: 'n', sources: ['twitter', 'web'] })
+    // The hash canonicalizes by key sort, but `sources` is an array — order matters.
+    // This ensures order-sensitivity for arrays (the agent should pass a canonical order).
+    expect(b.taskId).not.toBe(a.taskId)
+    const c = await handler({ question: 'who?', namespace: 'n', sources: ['web', 'twitter'] })
+    expect(c.taskId).toBe(a.taskId)
+  })
+})
diff --git a/tests/mcp/server-integration.test.ts b/tests/mcp/server-integration.test.ts
new file mode 100644
index 0000000..cbe1f45
--- /dev/null
+++ b/tests/mcp/server-integration.test.ts
@@ -0,0 +1,181 @@
+import { describe, expect, it } from 'vitest'
+import type { CoderDelegate, ResearcherDelegate } from '../../src/mcp/delegates'
+import {
+  createInProcessTransport,
+  createMcpServer,
+  type JsonRpcResponse,
+} from '../../src/mcp/server'
+
+const coderStub: CoderDelegate = async () => ({
+  branch: 'feat/y',
+  patch: '',
+  testResult: { passed: true, output: 'ok' },
+  typecheckResult: { passed: true, output: 'ok' },
+  diffStats: { filesChanged: 1, insertions: 1, deletions: 0 },
+})
+
+const researcherStub: ResearcherDelegate = async () => ({
+  items: [
+    {
+      id: 'i-1',
+      namespace: 'tenant-a',
+      claim: 'cpg-founders use Twitter heavily',
+      evidence: [{ source: 'twitter', capturedAt: 0 }],
+      confidence: 0.6,
+      authoredBy: { kind: 'agent', id: 'r' },
+    },
+  ],
+  citations: [{ url: 'https://x.com', quote: 'q', confidence: 0.5 }],
+  proposedWrites: [],
+})
+
+async function rpcCall(
+  server: ReturnType<typeof createMcpServer>,
+  method: string,
+  params: Record<string, unknown> = {},
+  id: number = 1,
+): Promise<JsonRpcResponse | null> {
+  return server.handle({ jsonrpc: '2.0', id, method, params })
+}
+
+describe('createMcpServer — JSON-RPC surface', () => {
+  it('responds to initialize + tools/list with the registered tools', async () => {
+    const server = createMcpServer({
+      coderDelegate: coderStub,
+      researcherDelegate: researcherStub,
+    })
+    const init = await rpcCall(server, 'initialize', {}, 0)
+    expect(init?.result).toMatchObject({
+      protocolVersion: '2024-11-05',
+      capabilities: { tools: {} },
+      serverInfo: { name: 'agent-runtime-mcp' },
+    })
+    const listed = await rpcCall(server, 'tools/list', {}, 1)
+    const names = (listed?.result as { tools: { name: string }[] }).tools.map((t) => t.name).sort()
+    expect(names).toEqual([
+      'delegate_code',
+      'delegate_feedback',
+      'delegate_research',
+      'delegation_history',
+      'delegation_status',
+    ])
+  })
+
+  it('omits delegate_code when coderDelegate is not wired', async () => {
+    const server = createMcpServer({ researcherDelegate: researcherStub })
+    const listed = await rpcCall(server, 'tools/list', {}, 1)
+    const names = (listed?.result as { tools: { name: string }[] }).tools.map((t) => t.name)
+    expect(names).not.toContain('delegate_code')
+    expect(names).toContain('delegate_research')
+  })
+
+  it('routes tools/call through the handler and returns structuredContent', async () => {
+    const server = createMcpServer({ coderDelegate: coderStub })
+    const call = await rpcCall(server, 'tools/call', {
+      name: 'delegate_code',
+      arguments: { goal: 'fix bug', repoRoot: '/r' },
+    })
+    const result = call?.result as {
+      content: { type: string; text: string }[]
+      structuredContent: { taskId: string }
+    }
+    expect(result.content[0]?.type).toBe('text')
+    const parsed = JSON.parse(result.content[0]!.text) as { taskId: string }
+    expect(parsed.taskId).toBe(result.structuredContent.taskId)
+    expect(parsed.taskId).toMatch(/^dlg-/)
+  })
+
+  it('returns -32602 on validation failures', async () => {
+    const server = createMcpServer({ coderDelegate: coderStub })
+    const call = await rpcCall(server, 'tools/call', {
+      name: 'delegate_code',
+      arguments: { goal: '', repoRoot: '/r' },
+    })
+    expect(call?.error?.code).toBe(-32602)
+  })
+
+  it('returns -32601 for unknown tools', async () => {
+    const server = createMcpServer({ coderDelegate: coderStub })
+    const call = await rpcCall(server, 'tools/call', { name: 'delegate_evaluation' })
+    expect(call?.error?.code).toBe(-32601)
+  })
+
+  it('drives the full lifecycle end-to-end: delegate → status → feedback → history', async () => {
+    const server = createMcpServer({
+      coderDelegate: coderStub,
+      researcherDelegate: researcherStub,
+    })
+    const created = await rpcCall(server, 'tools/call', {
+      name: 'delegate_research',
+      arguments: { question: 'who engages cpg-founders?', namespace: 'tenant-a' },
+    })
+    const taskId = (created?.result as { structuredContent: { taskId: string } }).structuredContent
+      .taskId
+
+    // Wait for the async run to settle.
+    for (let i = 0; i < 5; i += 1) await new Promise((r) => setImmediate(r))
+
+    const status = await rpcCall(server, 'tools/call', {
+      name: 'delegation_status',
+      arguments: { taskId },
+    })
+    expect(
+      (status?.result as { structuredContent: { status: string } }).structuredContent.status,
+    ).toBe('completed')
+
+    const feedback = await rpcCall(server, 'tools/call', {
+      name: 'delegate_feedback',
+      arguments: {
+        refersTo: { kind: 'delegation', ref: taskId },
+        rating: { score: 0.85, label: 'good', notes: 'cited the right source' },
+        by: 'agent',
+        namespace: 'tenant-a',
+      },
+    })
+    expect(
+      (feedback?.result as { structuredContent: { recorded: true; id: string } }).structuredContent
+        .recorded,
+    ).toBe(true)
+
+    const history = await rpcCall(server, 'tools/call', {
+      name: 'delegation_history',
+      arguments: { namespace: 'tenant-a' },
+    })
+    const entries = (
+      history?.result as {
+        structuredContent: { delegations: Array<{ taskId: string; feedback?: unknown[] }> }
+      }
+    ).structuredContent.delegations
+    expect(entries.find((e) => e.taskId === taskId)?.feedback?.length).toBe(1)
+  })
+})
+
+describe('createMcpServer — stdio transport', () => {
+  it('handles a single JSON-RPC line through serve() and writes a response', async () => {
+    const server = createMcpServer({ coderDelegate: coderStub })
+    const { transport, clientWrite, clientClose, readServer } = createInProcessTransport()
+    const servePromise = server.serve(transport)
+    clientWrite(JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'initialize' }))
+    clientWrite(JSON.stringify({ jsonrpc: '2.0', id: 2, method: 'tools/list' }))
+    // Allow drain.
+    for (let i = 0; i < 6; i += 1) await new Promise((r) => setImmediate(r))
+    const responses = await readServer()
+    expect(responses.length).toBeGreaterThanOrEqual(2)
+    const list = responses.find((r) => r.id === 2)
+    expect((list?.result as { tools: unknown[] }).tools.length).toBe(4)
+    clientClose()
+    await servePromise
+  })
+
+  it('emits a parse error for malformed JSON', async () => {
+    const server = createMcpServer({ coderDelegate: coderStub })
+    const { transport, clientWrite, clientClose, readServer } = createInProcessTransport()
+    const servePromise = server.serve(transport)
+    clientWrite('{not json')
+    for (let i = 0; i < 4; i += 1) await new Promise((r) => setImmediate(r))
+    const responses = await readServer()
+    expect(responses[0]?.error?.code).toBe(-32700)
+    clientClose()
+    await servePromise
+  })
+})
diff --git a/tests/mcp/task-queue.test.ts b/tests/mcp/task-queue.test.ts
new file mode 100644
index 0000000..f0b67ac
--- /dev/null
+++ b/tests/mcp/task-queue.test.ts
@@ -0,0 +1,235 @@
+import { describe, expect, it } from 'vitest'
+import { DelegationTaskQueue, hashIdempotencyInput } from '../../src/mcp/task-queue'
+import type { DelegateCodeArgs, DelegateResearchArgs } from '../../src/mcp/types'
+
+const codeArgs: DelegateCodeArgs = { goal: 'fix bug', repoRoot: '/repo' }
+const researchArgs: DelegateResearchArgs = { question: 'q?', namespace: 'ns-1' }
+
+function deferred<T>(): {
+  promise: Promise<T>
+  resolve: (value: T) => void
+  reject: (reason: unknown) => void
+} {
+  let resolve!: (value: T) => void
+  let reject!: (reason: unknown) => void
+  const promise = new Promise<T>((res, rej) => {
+    resolve = res
+    reject = rej
+  })
+  return { promise, resolve, reject }
+}
+
+describe('DelegationTaskQueue.submit', () => {
+  it('returns a taskId immediately and tracks pending → running → completed', async () => {
+    const queue = new DelegationTaskQueue()
+    const d = deferred<{
+      output: {
+        branch: string
+        patch: string
+        testResult: { passed: boolean; output: string }
+        typecheckResult: { passed: boolean; output: string }
+        diffStats: { filesChanged: number; insertions: number; deletions: number }
+      }
+    }>()
+    const { taskId, reused } = queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      run: async () => (await d.promise).output,
+    })
+    expect(reused).toBe(false)
+    expect(taskId).toMatch(/^dlg-/)
+    expect(queue.status(taskId)?.status).toBe('pending')
+    // microtask tick to flip to running
+    await new Promise((r) => setImmediate(r))
+    expect(queue.status(taskId)?.status).toBe('running')
+    d.resolve({
+      output: {
+        branch: 'b',
+        patch: '',
+        testResult: { passed: true, output: '' },
+        typecheckResult: { passed: true, output: '' },
+        diffStats: { filesChanged: 0, insertions: 0, deletions: 0 },
+      },
+    })
+    await new Promise((r) => setImmediate(r))
+    const status = queue.status(taskId)
+    expect(status?.status).toBe('completed')
+    expect(status?.completedAt).toBeTypeOf('string')
+    expect(status?.result?.profile).toBe('coder')
+  })
+
+  it('records failure when the run function throws', async () => {
+    const queue = new DelegationTaskQueue()
+    const { taskId } = queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      run: async () => {
+        throw new Error('runner exploded')
+      },
+    })
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    const status = queue.status(taskId)
+    expect(status?.status).toBe('failed')
+    expect(status?.error?.message).toBe('runner exploded')
+    expect(status?.error?.kind).toBe('Error')
+  })
+
+  it('reuses a taskId when idempotencyKey matches a known record', () => {
+    const queue = new DelegationTaskQueue()
+    const key = hashIdempotencyInput({ x: 1 })
+    const a = queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      idempotencyKey: key,
+      run: async () => ({}) as never,
+    })
+    const b = queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      idempotencyKey: key,
+      run: async () => ({}) as never,
+    })
+    expect(b.reused).toBe(true)
+    expect(b.taskId).toBe(a.taskId)
+  })
+
+  it('reports progress updates from the run function', async () => {
+    const queue = new DelegationTaskQueue()
+    const d = deferred<void>()
+    const { taskId } = queue.submit({
+      profile: 'researcher',
+      args: researchArgs,
+      run: async (ctx) => {
+        ctx.report({ iteration: 1, phase: 'searching' })
+        await d.promise
+        ctx.report({ iteration: 2, phase: 'finalizing' })
+        return { items: [], citations: [], proposedWrites: [] }
+      },
+    })
+    await new Promise((r) => setImmediate(r))
+    expect(queue.status(taskId)?.progress).toEqual({ iteration: 1, phase: 'searching' })
+    d.resolve()
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    expect(queue.status(taskId)?.status).toBe('completed')
+  })
+})
+
+describe('DelegationTaskQueue.cancel', () => {
+  it('aborts an in-flight run and marks the record cancelled', async () => {
+    const queue = new DelegationTaskQueue()
+    let abortObserved = false
+    const { taskId } = queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      run: async (ctx) =>
+        new Promise<never>((_, reject) => {
+          ctx.signal.addEventListener('abort', () => {
+            abortObserved = true
+            reject(new DOMException('aborted', 'AbortError'))
+          })
+        }),
+    })
+    await new Promise((r) => setImmediate(r))
+    expect(queue.cancel(taskId)).toBe(true)
+    await new Promise((r) => setImmediate(r))
+    const status = queue.status(taskId)
+    expect(status?.status).toBe('cancelled')
+    expect(status?.error?.kind).toBe('CancelledError')
+    expect(abortObserved).toBe(true)
+  })
+
+  it('returns false for unknown taskIds and is idempotent on terminal records', async () => {
+    const queue = new DelegationTaskQueue()
+    expect(queue.cancel('nope')).toBe(false)
+    const { taskId } = queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      run: async () => {
+        throw new Error('synthetic failure')
+      },
+    })
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    expect(queue.status(taskId)?.status).toBe('failed')
+    expect(queue.cancel(taskId)).toBe(false)
+  })
+})
+
+describe('DelegationTaskQueue.history + attachFeedback', () => {
+  it('filters by namespace, profile, and since; orders newest-first', async () => {
+    let n = 0
+    const queue = new DelegationTaskQueue({
+      now: () => {
+        n += 1
+        return new Date(1700000000000 + n * 1000).toISOString()
+      },
+    })
+    queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      namespace: 'a',
+      run: async () => ({}) as never,
+    })
+    queue.submit({
+      profile: 'researcher',
+      args: researchArgs,
+      namespace: 'a',
+      run: async () => ({}) as never,
+    })
+    queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      namespace: 'b',
+      run: async () => ({}) as never,
+    })
+    await new Promise((r) => setImmediate(r))
+    await new Promise((r) => setImmediate(r))
+    expect(queue.history({ namespace: 'a' }).map((e) => e.profile)).toEqual(['researcher', 'coder'])
+    expect(queue.history({ profile: 'coder' }).map((e) => e.namespace)).toEqual(['b', 'a'])
+    expect(queue.history({ limit: 1 }).length).toBe(1)
+  })
+
+  it('attaches feedback snapshots to the matching record', async () => {
+    const queue = new DelegationTaskQueue()
+    const { taskId } = queue.submit({
+      profile: 'coder',
+      args: codeArgs,
+      run: async () => ({}) as never,
+    })
+    await new Promise((r) => setImmediate(r))
+    expect(
+      queue.attachFeedback(taskId, {
+        id: 'fbk-1',
+        score: 0.9,
+        by: 'agent',
+        notes: 'looks good',
+        capturedAt: new Date().toISOString(),
+      }),
+    ).toBe(true)
+    expect(
+      queue.attachFeedback('nope', {
+        id: 'fbk-2',
+        score: 0.1,
+        by: 'agent',
+        notes: 'irrelevant',
+        capturedAt: new Date().toISOString(),
+      }),
+    ).toBe(false)
+    const [entry] = queue.history()
+    expect(entry?.feedback?.[0]?.notes).toBe('looks good')
+  })
+})
+
+describe('hashIdempotencyInput', () => {
+  it('produces stable hashes regardless of property order', () => {
+    expect(hashIdempotencyInput({ a: 1, b: 2 })).toBe(hashIdempotencyInput({ b: 2, a: 1 }))
+  })
+  it('differs when nested values differ', () => {
+    expect(hashIdempotencyInput({ a: 1 })).not.toBe(hashIdempotencyInput({ a: 2 }))
+  })
+  it('treats undefined as absent', () => {
+    expect(hashIdempotencyInput({ a: 1, b: undefined })).toBe(hashIdempotencyInput({ a: 1 }))
+  })
+})
diff --git a/tsup.config.ts b/tsup.config.ts
index 7f29af4..99e54b3 100644
--- a/tsup.config.ts
+++ b/tsup.config.ts
@@ -8,6 +8,8 @@ export default defineConfig({
     agent: 'src/agent/index.ts',
     loops: 'src/loops/index.ts',
     profiles: 'src/profiles/index.ts',
+    'mcp/index': 'src/mcp/index.ts',
+    'mcp/bin': 'src/mcp/bin.ts',
   },
   format: ['esm'],
   dts: true,