tangle-network · tangletools · May 24, 2026 · May 24, 2026
diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval
 | `deriveExecutionId` | Stable substrate executionId for `X-Execution-ID` cross-process reconnect |
 | `startRuntimeRun` | Canonical production-run row + cost ledger |
 | `defineAgent` | Declarative per-vertical agent manifest — surfaces, knowledge, rubric, run fn |
+| `createMcpServer` (`/mcp`) + `agent-runtime-mcp` bin | Stdio MCP server with the 5 delegation tools (`delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history`) |
 | `resolveChatModel` / `validateChatModelId` / `getModels` | Router catalog fetch + fail-closed admission + precedence resolver |
 | `decideKnowledgeReadiness` | `ready` / `blocked` / `caveat` branch for routes / UI |
 | `createOpenAICompatibleBackend` | OpenAI-compatible streaming backend (TCloud / cli-bridge) |
@@ -173,6 +174,115 @@ await run.persist({ runtimeEvents: telemetry.events })
 
 Full runnable: [`examples/runtime-run/`](./examples/runtime-run/).
 
+## Delegation tools (MCP)
+
+`@tangle-network/agent-runtime/mcp` ships a stdio MCP server that exposes
+five delegation tools to a sandbox coding-harness agent (claude-code,
+codex, opencode, ...). The product agent itself runs inside a sandbox
+during a chat; when it needs a long-running coder or researcher loop, it
+calls one of these tools instead of doing the work in-line.
+
+| Tool | Kind | Use |
+|---|---|---|
+| `delegate_code` | async | Code-modification task — returns a `taskId`; poll `delegation_status` for the patch |
+| `delegate_research` | async | Source-grounded research task — returns a `taskId`; poll for items + citations |
+| `delegate_feedback` | sync | Append an agent/user/judge rating against a delegation, artifact, or outcome |
+| `delegation_status` | sync | Snapshot of a delegation's state machine (`pending` → `running` → `completed` \| `failed` \| `cancelled`) |
+| `delegation_history` | sync | Newest-first read of past delegations + attached feedback |
+
+Mount the server from a Node entry point:
+
+```ts
+import { Sandbox } from '@tangle-network/sandbox'
+import {
+  createMcpServer,
+  createDefaultCoderDelegate,
+} from '@tangle-network/agent-runtime/mcp'
+
+const sandboxClient = new Sandbox({ apiKey: process.env.SANDBOX_API_KEY! })
+const server = createMcpServer({
+  coderDelegate: createDefaultCoderDelegate({ sandboxClient }),
+  // researcherDelegate: wire your own — see below.
+})
+await server.serve() // reads JSON-RPC from stdin, writes responses to stdout
+```
+
+Or run the ready-made bin:
+
+```bash
+SANDBOX_API_KEY=sk_sandbox_... agent-runtime-mcp
+```
+
+The bin auto-wires the coder delegate and, when
+`@tangle-network/agent-knowledge` is installed as a peer, the researcher
+delegate. Environment knobs:
+
+- `SANDBOX_API_KEY` — required (unless both `MCP_DISABLE_*` are set)
+- `SANDBOX_BASE_URL` — sandbox-SDK base URL override
+- `MCP_MAX_CONCURRENT_SANDBOXES` — kernel `maxConcurrency` cap (default 4)
+- `MCP_CODER_FANOUT_HARNESSES` — comma-separated harness ids for `variants > 1`
+- `MCP_DISABLE_CODER` / `MCP_DISABLE_RESEARCHER` — omit the matching tool
+
+### Async semantics
+
+Coder + researcher delegations are **fire-and-poll**. The handler returns
+a `taskId` immediately; the agent calls `delegation_status(taskId)` until
+the state is terminal. Identical inputs return the same `taskId` —
+duplicate-call safety is built in via canonical-form hashing.
+
+```
+agent → delegate_code(goal, repoRoot)        → { taskId, estimatedDurationMs }
+agent → delegation_status(taskId)            → { status: 'running', progress: { ... } }
+... (minutes pass)
+agent → delegation_status(taskId)            → { status: 'completed', result: { profile: 'coder', output: <CoderOutput> } }
+agent → delegate_feedback(refersTo, rating)  → { recorded: true, id }
+```
+
+Task state lives in-memory inside the server process. A restart drops
+pending delegations — Phase 2 will move state into sqlite.
+
+### Wiring a researcher delegate
+
+`agent-runtime` cannot depend on `@tangle-network/agent-knowledge` (it
+would induce a dependency cycle). Wire the researcher delegate from your
+own integration code:
+
+```ts
+import { runLoop } from '@tangle-network/agent-runtime/loops'
+import { researcherProfile, multiHarnessResearcherFanout } from '@tangle-network/agent-knowledge/profiles'
+import { createMcpServer, type ResearcherDelegate } from '@tangle-network/agent-runtime/mcp'
+
+const researcherDelegate: ResearcherDelegate = async (args, ctx) => {
+  const task = {
+    question: args.question,
+    knowledgeNamespace: args.namespace,
+    scope: args.scope,
+    sources: args.sources,
+    /* ...map config.recencyWindow ISO strings to Date objects */
+  }
+  if ((args.variants ?? 1) <= 1) {
+    const preset = researcherProfile({ task })
+    const result = await runLoop({
+      driver: { /* single-shot */ async plan(t, h) { return h.length === 0 ? [t] : [] }, decide(h) { return h.length > 0 ? 'pick-winner' : 'fail' } },
+      agentRun: preset.agentRunSpec, output: preset.output, validator: preset.validator,
+      task, ctx: { sandboxClient, signal: ctx.signal }, maxIterations: 1,
+    })
+    return result.winner!.output
+  }
+  const fanout = multiHarnessResearcherFanout({ task })
+  const result = await runLoop({
+    driver: fanout.driver,
+    agentRuns: fanout.agentRuns.slice(0, args.variants),
+    output: fanout.output, validator: fanout.validator,
+    task, ctx: { sandboxClient, signal: ctx.signal },
+    maxIterations: args.variants ?? 1,
+  })
+  return result.winner!.output
+}
+
+createMcpServer({ researcherDelegate })
+```
+
 ## Error taxonomy
 
 | Error | When |

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.19.0",
+  "version": "0.20.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {
@@ -43,8 +43,16 @@
       "types": "./dist/profiles.d.ts",
       "import": "./dist/profiles.js",
       "default": "./dist/profiles.js"
+    },
+    "./mcp": {
+      "types": "./dist/mcp/index.d.ts",
+      "import": "./dist/mcp/index.js",
+      "default": "./dist/mcp/index.js"
     }
   },
+  "bin": {
+    "agent-runtime-mcp": "./dist/mcp/bin.js"
+  },
   "files": [
     "dist",
     "README.md"
@@ -88,6 +96,12 @@
   "license": "MIT",
   "packageManager": "pnpm@10.28.0",
   "peerDependencies": {
+    "@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
     "@tangle-network/sandbox": ">=0.1.2 <0.3.0"
+  },
+  "peerDependenciesMeta": {
+    "@tangle-network/agent-knowledge": {
+      "optional": true
+    }
   }
 }
diff --git a/src/mcp/bin.ts b/src/mcp/bin.ts
@@ -0,0 +1,218 @@
+#!/usr/bin/env node
+
+/**
+ * @experimental
+ *
+ * `agent-runtime-mcp` — stdio MCP server entry point.
+ *
+ * Spins up a server with the default coder delegate (wired against the
+ * real `@tangle-network/sandbox` client) and, when the optional
+ * `@tangle-network/agent-knowledge` peer is installed, a researcher
+ * delegate against `multiHarnessResearcherFanout`.
+ *
+ * Environment variables:
+ *   SANDBOX_API_KEY                  required — passed to `new Sandbox({ apiKey })`
+ *   SANDBOX_BASE_URL                 optional — sandbox-SDK base URL override
+ *   MCP_MAX_CONCURRENT_SANDBOXES     default 4 — kernel maxConcurrency cap
+ *   MCP_CODER_FANOUT_HARNESSES       comma-separated harness ids to use for variants > 1
+ *   MCP_DISABLE_CODER                set to `1` to omit `delegate_code`
+ *   MCP_DISABLE_RESEARCHER           set to `1` to omit `delegate_research` even when peer is present
+ */
+
+import type { LoopSandboxClient } from '../loops'
+import { runLoop } from '../loops'
+import { createDefaultCoderDelegate, type ResearcherDelegate } from './delegates'
+import { createMcpServer } from './server'
+import type { ResearchOutputShape } from './types'
+
+async function main(): Promise<void> {
+  const fanoutHarnesses = parseHarnesses(process.env.MCP_CODER_FANOUT_HARNESSES)
+  const maxConcurrency = parseConcurrency(process.env.MCP_MAX_CONCURRENT_SANDBOXES)
+  const wantCoder = !process.env.MCP_DISABLE_CODER
+  const wantResearcher = !process.env.MCP_DISABLE_RESEARCHER
+
+  // Skip the sandbox client load entirely when no profile delegate needs it —
+  // the feedback + status + history tools are queue-bound and require no
+  // sandbox. Useful for tooling that mounts the MCP server purely for
+  // self-introspection.
+  const needsSandbox = wantCoder || wantResearcher
+  let sandboxClient: LoopSandboxClient | undefined
+  if (needsSandbox) {
+    const apiKey = process.env.SANDBOX_API_KEY
+    if (!apiKey && !process.env.AGENT_RUNTIME_MCP_ALLOW_NO_KEY) {
+      process.stderr.write(
+        'agent-runtime-mcp: SANDBOX_API_KEY is required (set AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 to run without it for diagnostics, or set MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to run the queue-only subset)\n',
+      )
+      process.exit(2)
+    }
+    sandboxClient = await loadSandboxClient(apiKey)
+  }
+
+  const coderDelegate =
+    wantCoder && sandboxClient
+      ? createDefaultCoderDelegate({
+          sandboxClient,
+          fanoutHarnesses,
+          maxConcurrency,
+        })
+      : undefined
+
+  const researcherDelegate =
+    wantResearcher && sandboxClient
+      ? await loadResearcherDelegate(sandboxClient, maxConcurrency)
+      : undefined
+
+  const server = createMcpServer({ coderDelegate, researcherDelegate })
+
+  process.on('SIGINT', () => {
+    server.stop()
+    process.exit(0)
+  })
+  process.on('SIGTERM', () => {
+    server.stop()
+    process.exit(0)
+  })
+
+  await server.serve()
+}
+
+async function loadSandboxClient(apiKey: string | undefined): Promise<LoopSandboxClient> {
+  // Dynamic import keeps the bin importable in environments that haven't
+  // installed `@tangle-network/sandbox` yet (the runtime package lists it
+  // as a peer dep, not a hard dep).
+  const mod = await import('@tangle-network/sandbox').catch((err) => {
+    process.stderr.write(
+      `agent-runtime-mcp: failed to load @tangle-network/sandbox (${err.message}); install the peer dependency\n`,
+    )
+    process.exit(2)
+  })
+  const SandboxCtor = (mod as { Sandbox?: new (config: unknown) => LoopSandboxClient }).Sandbox
+  if (!SandboxCtor) {
+    process.stderr.write(
+      'agent-runtime-mcp: @tangle-network/sandbox does not export Sandbox; cannot construct client\n',
+    )
+    process.exit(2)
+  }
+  const baseUrl = process.env.SANDBOX_BASE_URL
+  return new SandboxCtor({
+    apiKey,
+    ...(baseUrl ? { baseUrl } : {}),
+  })
+}
+
+interface ResearcherProfilePreset {
+  agentRunSpec: Parameters<typeof runLoop>[0]['agentRun'] extends infer T ? NonNullable<T> : never
+  output: Parameters<typeof runLoop>[0]['output']
+  validator: Parameters<typeof runLoop>[0]['validator']
+}
+
+interface ResearcherFanoutPreset {
+  agentRuns: NonNullable<Parameters<typeof runLoop>[0]['agentRuns']>
+  output: Parameters<typeof runLoop>[0]['output']
+  validator: Parameters<typeof runLoop>[0]['validator']
+  driver: Parameters<typeof runLoop>[0]['driver']
+}
+
+async function loadResearcherDelegate(
+  sandboxClient: LoopSandboxClient,
+  maxConcurrency: number,
+): Promise<ResearcherDelegate | undefined> {
+  // Optional peer — when `@tangle-network/agent-knowledge` isn't installed,
+  // we silently omit the researcher tool from the advertisement. The
+  // dynamic-import path is resolved at runtime; TypeScript cannot see the
+  // peer, so we type the module structurally rather than via its own
+  // declaration file.
+  const profilesSpecifier = '@tangle-network/agent-knowledge/profiles'
+  const mod = await import(profilesSpecifier).catch(() => undefined)
+  if (!mod) return undefined
+  type SingleFactory = (opts: { task: unknown }) => ResearcherProfilePreset
+  type FanoutFactory = (opts: { task: unknown }) => ResearcherFanoutPreset
+  const fanoutFactory = (mod as { multiHarnessResearcherFanout?: FanoutFactory })
+    .multiHarnessResearcherFanout
+  const singleFactory = (mod as { researcherProfile?: SingleFactory }).researcherProfile
+  if (!fanoutFactory || !singleFactory) return undefined
+
+  return async (args, ctx) => {
+    const task = {
+      question: args.question,
+      knowledgeNamespace: args.namespace,
+      scope: args.scope,
+      sources: args.sources,
+      recencyWindow: args.config?.recencyWindow
+        ? {
+            since: args.config.recencyWindow.since
+              ? new Date(args.config.recencyWindow.since)
+              : undefined,
+            until: args.config.recencyWindow.until
+              ? new Date(args.config.recencyWindow.until)
+              : undefined,
+          }
+        : undefined,
+      maxItems: args.config?.maxItems,
+      minConfidence: args.config?.minConfidence,
+    }
+    const variants = Math.max(1, Math.trunc(args.variants ?? 1))
+    ctx.report({ iteration: 0, phase: 'starting' })
+    if (variants <= 1) {
+      const preset = singleFactory({ task })
+      const result = await runLoop({
+        driver: {
+          name: 'mcp-researcher-single',
+          async plan(t, history) {
+            return history.length === 0 ? [t] : []
+          },
+          decide(history) {
+            return history.length > 0 ? 'pick-winner' : 'fail'
+          },
+        },
+        agentRun: preset.agentRunSpec,
+        output: preset.output,
+        validator: preset.validator,
+        task,
+        ctx: { sandboxClient, signal: ctx.signal },
+        maxIterations: 1,
+        maxConcurrency,
+      })
+      const output = result.winner?.output
+      if (!output) throw new Error('researcher delegate produced no winner')
+      ctx.report({ iteration: 1, phase: 'completed' })
+      return output as ResearchOutputShape
+    }
+    const fanout = fanoutFactory({ task })
+    const result = await runLoop({
+      driver: fanout.driver,
+      agentRuns: fanout.agentRuns.slice(0, variants),
+      output: fanout.output,
+      validator: fanout.validator,
+      task,
+      ctx: { sandboxClient, signal: ctx.signal },
+      maxIterations: variants,
+      maxConcurrency: Math.min(maxConcurrency, variants),
+    })
+    const output = result.winner?.output
+    if (!output) throw new Error('researcher delegate fanout produced no winner')
+    ctx.report({ iteration: result.iterations.length, phase: 'completed' })
+    return output as ResearchOutputShape
+  }
+}
+
+function parseHarnesses(raw: string | undefined): string[] | undefined {
+  if (!raw) return undefined
+  const list = raw
+    .split(',')
+    .map((entry) => entry.trim())
+    .filter(Boolean)
+  return list.length > 0 ? list : undefined
+}
+
+function parseConcurrency(raw: string | undefined): number {
+  if (!raw) return 4
+  const n = Number(raw)
+  if (!Number.isFinite(n) || n < 1) return 4
+  return Math.min(Math.trunc(n), 32)
+}
+
+main().catch((err) => {
+  process.stderr.write(`agent-runtime-mcp: ${err instanceof Error ? err.stack : String(err)}\n`)
+  process.exit(1)
+})