From 2cc58ac9b6a4d69412b565eb3809352193e841ca Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Sun, 24 May 2026 11:40:53 -0600 Subject: [PATCH] =?UTF-8?q?feat(0.20.0):=20MCP=20delegation=20tools=20?= =?UTF-8?q?=E2=80=94=20delegate=5Fcode,=20delegate=5Fresearch,=20delegate?= =?UTF-8?q?=5Ffeedback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New sub-export `@tangle-network/agent-runtime/mcp` and `agent-runtime-mcp` bin. Five tools exposed over stdio JSON-RPC (MCP 2024-11-05): - delegate_code async, idempotent — runs coderProfile / multi-harness fanout - delegate_research async, idempotent — runs an injected researcher delegate - delegate_feedback sync, append-only — every rating is its own event - delegation_status sync poll — state machine + progress + final result - delegation_history sync read — newest-first, filterable, feedback inline State lives in an in-memory DelegationTaskQueue (Phase 2 → sqlite). The server is topology-free; consumers wire coder + researcher delegates at construction. The bin auto-wires the default coder against the real Sandbox client and lazy-imports a researcher delegate when @tangle-network/agent-knowledge is installed as an optional peer. 61 new tests cover validation, idempotency, lifecycle, cancellation, namespace isolation, feedback cross-reference, and a full JSON-RPC end-to-end through both in-process and stdio transports. --- README.md | 110 ++++++++ package.json | 16 +- src/mcp/bin.ts | 218 ++++++++++++++++ src/mcp/delegates.ts | 144 +++++++++++ src/mcp/feedback-store.ts | 76 ++++++ src/mcp/index.ts | 100 ++++++++ src/mcp/server.ts | 337 +++++++++++++++++++++++++ src/mcp/task-queue.ts | 358 +++++++++++++++++++++++++++ src/mcp/tools/delegate-code.ts | 199 +++++++++++++++ src/mcp/tools/delegate-feedback.ts | 187 ++++++++++++++ src/mcp/tools/delegate-research.ts | 219 ++++++++++++++++ src/mcp/tools/delegation-history.ts | 98 ++++++++ src/mcp/tools/delegation-status.ts | 76 ++++++ src/mcp/types.ts | 223 +++++++++++++++++ tests/mcp/delegate-code.test.ts | 120 +++++++++ tests/mcp/delegate-feedback.test.ts | 135 ++++++++++ tests/mcp/delegate-research.test.ts | 84 +++++++ tests/mcp/delegation-history.test.ts | 128 ++++++++++ tests/mcp/delegation-status.test.ts | 72 ++++++ tests/mcp/idempotency.test.ts | 53 ++++ tests/mcp/server-integration.test.ts | 181 ++++++++++++++ tests/mcp/task-queue.test.ts | 235 ++++++++++++++++++ tsup.config.ts | 2 + 23 files changed, 3370 insertions(+), 1 deletion(-) create mode 100644 src/mcp/bin.ts create mode 100644 src/mcp/delegates.ts create mode 100644 src/mcp/feedback-store.ts create mode 100644 src/mcp/index.ts create mode 100644 src/mcp/server.ts create mode 100644 src/mcp/task-queue.ts create mode 100644 src/mcp/tools/delegate-code.ts create mode 100644 src/mcp/tools/delegate-feedback.ts create mode 100644 src/mcp/tools/delegate-research.ts create mode 100644 src/mcp/tools/delegation-history.ts create mode 100644 src/mcp/tools/delegation-status.ts create mode 100644 src/mcp/types.ts create mode 100644 tests/mcp/delegate-code.test.ts create mode 100644 tests/mcp/delegate-feedback.test.ts create mode 100644 tests/mcp/delegate-research.test.ts create mode 100644 tests/mcp/delegation-history.test.ts create mode 100644 tests/mcp/delegation-status.test.ts create mode 100644 tests/mcp/idempotency.test.ts create mode 100644 tests/mcp/server-integration.test.ts create mode 100644 tests/mcp/task-queue.test.ts diff --git a/README.md b/README.md index 8f6bd66..6ba5ac0 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval | `deriveExecutionId` | Stable substrate executionId for `X-Execution-ID` cross-process reconnect | | `startRuntimeRun` | Canonical production-run row + cost ledger | | `defineAgent` | Declarative per-vertical agent manifest — surfaces, knowledge, rubric, run fn | +| `createMcpServer` (`/mcp`) + `agent-runtime-mcp` bin | Stdio MCP server with the 5 delegation tools (`delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history`) | | `resolveChatModel` / `validateChatModelId` / `getModels` | Router catalog fetch + fail-closed admission + precedence resolver | | `decideKnowledgeReadiness` | `ready` / `blocked` / `caveat` branch for routes / UI | | `createOpenAICompatibleBackend` | OpenAI-compatible streaming backend (TCloud / cli-bridge) | @@ -173,6 +174,115 @@ await run.persist({ runtimeEvents: telemetry.events }) Full runnable: [`examples/runtime-run/`](./examples/runtime-run/). +## Delegation tools (MCP) + +`@tangle-network/agent-runtime/mcp` ships a stdio MCP server that exposes +five delegation tools to a sandbox coding-harness agent (claude-code, +codex, opencode, ...). The product agent itself runs inside a sandbox +during a chat; when it needs a long-running coder or researcher loop, it +calls one of these tools instead of doing the work in-line. + +| Tool | Kind | Use | +|---|---|---| +| `delegate_code` | async | Code-modification task — returns a `taskId`; poll `delegation_status` for the patch | +| `delegate_research` | async | Source-grounded research task — returns a `taskId`; poll for items + citations | +| `delegate_feedback` | sync | Append an agent/user/judge rating against a delegation, artifact, or outcome | +| `delegation_status` | sync | Snapshot of a delegation's state machine (`pending` → `running` → `completed` \| `failed` \| `cancelled`) | +| `delegation_history` | sync | Newest-first read of past delegations + attached feedback | + +Mount the server from a Node entry point: + +```ts +import { Sandbox } from '@tangle-network/sandbox' +import { + createMcpServer, + createDefaultCoderDelegate, +} from '@tangle-network/agent-runtime/mcp' + +const sandboxClient = new Sandbox({ apiKey: process.env.SANDBOX_API_KEY! }) +const server = createMcpServer({ + coderDelegate: createDefaultCoderDelegate({ sandboxClient }), + // researcherDelegate: wire your own — see below. +}) +await server.serve() // reads JSON-RPC from stdin, writes responses to stdout +``` + +Or run the ready-made bin: + +```bash +SANDBOX_API_KEY=sk_sandbox_... agent-runtime-mcp +``` + +The bin auto-wires the coder delegate and, when +`@tangle-network/agent-knowledge` is installed as a peer, the researcher +delegate. Environment knobs: + +- `SANDBOX_API_KEY` — required (unless both `MCP_DISABLE_*` are set) +- `SANDBOX_BASE_URL` — sandbox-SDK base URL override +- `MCP_MAX_CONCURRENT_SANDBOXES` — kernel `maxConcurrency` cap (default 4) +- `MCP_CODER_FANOUT_HARNESSES` — comma-separated harness ids for `variants > 1` +- `MCP_DISABLE_CODER` / `MCP_DISABLE_RESEARCHER` — omit the matching tool + +### Async semantics + +Coder + researcher delegations are **fire-and-poll**. The handler returns +a `taskId` immediately; the agent calls `delegation_status(taskId)` until +the state is terminal. Identical inputs return the same `taskId` — +duplicate-call safety is built in via canonical-form hashing. + +``` +agent → delegate_code(goal, repoRoot) → { taskId, estimatedDurationMs } +agent → delegation_status(taskId) → { status: 'running', progress: { ... } } +... (minutes pass) +agent → delegation_status(taskId) → { status: 'completed', result: { profile: 'coder', output: } } +agent → delegate_feedback(refersTo, rating) → { recorded: true, id } +``` + +Task state lives in-memory inside the server process. A restart drops +pending delegations — Phase 2 will move state into sqlite. + +### Wiring a researcher delegate + +`agent-runtime` cannot depend on `@tangle-network/agent-knowledge` (it +would induce a dependency cycle). Wire the researcher delegate from your +own integration code: + +```ts +import { runLoop } from '@tangle-network/agent-runtime/loops' +import { researcherProfile, multiHarnessResearcherFanout } from '@tangle-network/agent-knowledge/profiles' +import { createMcpServer, type ResearcherDelegate } from '@tangle-network/agent-runtime/mcp' + +const researcherDelegate: ResearcherDelegate = async (args, ctx) => { + const task = { + question: args.question, + knowledgeNamespace: args.namespace, + scope: args.scope, + sources: args.sources, + /* ...map config.recencyWindow ISO strings to Date objects */ + } + if ((args.variants ?? 1) <= 1) { + const preset = researcherProfile({ task }) + const result = await runLoop({ + driver: { /* single-shot */ async plan(t, h) { return h.length === 0 ? [t] : [] }, decide(h) { return h.length > 0 ? 'pick-winner' : 'fail' } }, + agentRun: preset.agentRunSpec, output: preset.output, validator: preset.validator, + task, ctx: { sandboxClient, signal: ctx.signal }, maxIterations: 1, + }) + return result.winner!.output + } + const fanout = multiHarnessResearcherFanout({ task }) + const result = await runLoop({ + driver: fanout.driver, + agentRuns: fanout.agentRuns.slice(0, args.variants), + output: fanout.output, validator: fanout.validator, + task, ctx: { sandboxClient, signal: ctx.signal }, + maxIterations: args.variants ?? 1, + }) + return result.winner!.output +} + +createMcpServer({ researcherDelegate }) +``` + ## Error taxonomy | Error | When | diff --git a/package.json b/package.json index 1d44356..e70b2bf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@tangle-network/agent-runtime", - "version": "0.19.0", + "version": "0.20.0", "description": "Reusable runtime lifecycle for domain-specific agents.", "homepage": "https://github.com/tangle-network/agent-runtime#readme", "repository": { @@ -43,8 +43,16 @@ "types": "./dist/profiles.d.ts", "import": "./dist/profiles.js", "default": "./dist/profiles.js" + }, + "./mcp": { + "types": "./dist/mcp/index.d.ts", + "import": "./dist/mcp/index.js", + "default": "./dist/mcp/index.js" } }, + "bin": { + "agent-runtime-mcp": "./dist/mcp/bin.js" + }, "files": [ "dist", "README.md" @@ -88,6 +96,12 @@ "license": "MIT", "packageManager": "pnpm@10.28.0", "peerDependencies": { + "@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0", "@tangle-network/sandbox": ">=0.1.2 <0.3.0" + }, + "peerDependenciesMeta": { + "@tangle-network/agent-knowledge": { + "optional": true + } } } diff --git a/src/mcp/bin.ts b/src/mcp/bin.ts new file mode 100644 index 0000000..0ae8900 --- /dev/null +++ b/src/mcp/bin.ts @@ -0,0 +1,218 @@ +#!/usr/bin/env node + +/** + * @experimental + * + * `agent-runtime-mcp` — stdio MCP server entry point. + * + * Spins up a server with the default coder delegate (wired against the + * real `@tangle-network/sandbox` client) and, when the optional + * `@tangle-network/agent-knowledge` peer is installed, a researcher + * delegate against `multiHarnessResearcherFanout`. + * + * Environment variables: + * SANDBOX_API_KEY required — passed to `new Sandbox({ apiKey })` + * SANDBOX_BASE_URL optional — sandbox-SDK base URL override + * MCP_MAX_CONCURRENT_SANDBOXES default 4 — kernel maxConcurrency cap + * MCP_CODER_FANOUT_HARNESSES comma-separated harness ids to use for variants > 1 + * MCP_DISABLE_CODER set to `1` to omit `delegate_code` + * MCP_DISABLE_RESEARCHER set to `1` to omit `delegate_research` even when peer is present + */ + +import type { LoopSandboxClient } from '../loops' +import { runLoop } from '../loops' +import { createDefaultCoderDelegate, type ResearcherDelegate } from './delegates' +import { createMcpServer } from './server' +import type { ResearchOutputShape } from './types' + +async function main(): Promise { + const fanoutHarnesses = parseHarnesses(process.env.MCP_CODER_FANOUT_HARNESSES) + const maxConcurrency = parseConcurrency(process.env.MCP_MAX_CONCURRENT_SANDBOXES) + const wantCoder = !process.env.MCP_DISABLE_CODER + const wantResearcher = !process.env.MCP_DISABLE_RESEARCHER + + // Skip the sandbox client load entirely when no profile delegate needs it — + // the feedback + status + history tools are queue-bound and require no + // sandbox. Useful for tooling that mounts the MCP server purely for + // self-introspection. + const needsSandbox = wantCoder || wantResearcher + let sandboxClient: LoopSandboxClient | undefined + if (needsSandbox) { + const apiKey = process.env.SANDBOX_API_KEY + if (!apiKey && !process.env.AGENT_RUNTIME_MCP_ALLOW_NO_KEY) { + process.stderr.write( + 'agent-runtime-mcp: SANDBOX_API_KEY is required (set AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 to run without it for diagnostics, or set MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to run the queue-only subset)\n', + ) + process.exit(2) + } + sandboxClient = await loadSandboxClient(apiKey) + } + + const coderDelegate = + wantCoder && sandboxClient + ? createDefaultCoderDelegate({ + sandboxClient, + fanoutHarnesses, + maxConcurrency, + }) + : undefined + + const researcherDelegate = + wantResearcher && sandboxClient + ? await loadResearcherDelegate(sandboxClient, maxConcurrency) + : undefined + + const server = createMcpServer({ coderDelegate, researcherDelegate }) + + process.on('SIGINT', () => { + server.stop() + process.exit(0) + }) + process.on('SIGTERM', () => { + server.stop() + process.exit(0) + }) + + await server.serve() +} + +async function loadSandboxClient(apiKey: string | undefined): Promise { + // Dynamic import keeps the bin importable in environments that haven't + // installed `@tangle-network/sandbox` yet (the runtime package lists it + // as a peer dep, not a hard dep). + const mod = await import('@tangle-network/sandbox').catch((err) => { + process.stderr.write( + `agent-runtime-mcp: failed to load @tangle-network/sandbox (${err.message}); install the peer dependency\n`, + ) + process.exit(2) + }) + const SandboxCtor = (mod as { Sandbox?: new (config: unknown) => LoopSandboxClient }).Sandbox + if (!SandboxCtor) { + process.stderr.write( + 'agent-runtime-mcp: @tangle-network/sandbox does not export Sandbox; cannot construct client\n', + ) + process.exit(2) + } + const baseUrl = process.env.SANDBOX_BASE_URL + return new SandboxCtor({ + apiKey, + ...(baseUrl ? { baseUrl } : {}), + }) +} + +interface ResearcherProfilePreset { + agentRunSpec: Parameters[0]['agentRun'] extends infer T ? NonNullable : never + output: Parameters[0]['output'] + validator: Parameters[0]['validator'] +} + +interface ResearcherFanoutPreset { + agentRuns: NonNullable[0]['agentRuns']> + output: Parameters[0]['output'] + validator: Parameters[0]['validator'] + driver: Parameters[0]['driver'] +} + +async function loadResearcherDelegate( + sandboxClient: LoopSandboxClient, + maxConcurrency: number, +): Promise { + // Optional peer — when `@tangle-network/agent-knowledge` isn't installed, + // we silently omit the researcher tool from the advertisement. The + // dynamic-import path is resolved at runtime; TypeScript cannot see the + // peer, so we type the module structurally rather than via its own + // declaration file. + const profilesSpecifier = '@tangle-network/agent-knowledge/profiles' + const mod = await import(profilesSpecifier).catch(() => undefined) + if (!mod) return undefined + type SingleFactory = (opts: { task: unknown }) => ResearcherProfilePreset + type FanoutFactory = (opts: { task: unknown }) => ResearcherFanoutPreset + const fanoutFactory = (mod as { multiHarnessResearcherFanout?: FanoutFactory }) + .multiHarnessResearcherFanout + const singleFactory = (mod as { researcherProfile?: SingleFactory }).researcherProfile + if (!fanoutFactory || !singleFactory) return undefined + + return async (args, ctx) => { + const task = { + question: args.question, + knowledgeNamespace: args.namespace, + scope: args.scope, + sources: args.sources, + recencyWindow: args.config?.recencyWindow + ? { + since: args.config.recencyWindow.since + ? new Date(args.config.recencyWindow.since) + : undefined, + until: args.config.recencyWindow.until + ? new Date(args.config.recencyWindow.until) + : undefined, + } + : undefined, + maxItems: args.config?.maxItems, + minConfidence: args.config?.minConfidence, + } + const variants = Math.max(1, Math.trunc(args.variants ?? 1)) + ctx.report({ iteration: 0, phase: 'starting' }) + if (variants <= 1) { + const preset = singleFactory({ task }) + const result = await runLoop({ + driver: { + name: 'mcp-researcher-single', + async plan(t, history) { + return history.length === 0 ? [t] : [] + }, + decide(history) { + return history.length > 0 ? 'pick-winner' : 'fail' + }, + }, + agentRun: preset.agentRunSpec, + output: preset.output, + validator: preset.validator, + task, + ctx: { sandboxClient, signal: ctx.signal }, + maxIterations: 1, + maxConcurrency, + }) + const output = result.winner?.output + if (!output) throw new Error('researcher delegate produced no winner') + ctx.report({ iteration: 1, phase: 'completed' }) + return output as ResearchOutputShape + } + const fanout = fanoutFactory({ task }) + const result = await runLoop({ + driver: fanout.driver, + agentRuns: fanout.agentRuns.slice(0, variants), + output: fanout.output, + validator: fanout.validator, + task, + ctx: { sandboxClient, signal: ctx.signal }, + maxIterations: variants, + maxConcurrency: Math.min(maxConcurrency, variants), + }) + const output = result.winner?.output + if (!output) throw new Error('researcher delegate fanout produced no winner') + ctx.report({ iteration: result.iterations.length, phase: 'completed' }) + return output as ResearchOutputShape + } +} + +function parseHarnesses(raw: string | undefined): string[] | undefined { + if (!raw) return undefined + const list = raw + .split(',') + .map((entry) => entry.trim()) + .filter(Boolean) + return list.length > 0 ? list : undefined +} + +function parseConcurrency(raw: string | undefined): number { + if (!raw) return 4 + const n = Number(raw) + if (!Number.isFinite(n) || n < 1) return 4 + return Math.min(Math.trunc(n), 32) +} + +main().catch((err) => { + process.stderr.write(`agent-runtime-mcp: ${err instanceof Error ? err.stack : String(err)}\n`) + process.exit(1) +}) diff --git a/src/mcp/delegates.ts b/src/mcp/delegates.ts new file mode 100644 index 0000000..0e5d529 --- /dev/null +++ b/src/mcp/delegates.ts @@ -0,0 +1,144 @@ +/** + * @experimental + * + * Delegate factories — the layer between MCP tool handlers and the + * underlying `runLoop` runners. + * + * The MCP server is profile-agnostic: it owns the task queue + feedback + * store + transport. Each `*Delegate` is the closure that the queue + * invokes when a task runs. Consumers can override either delegate to + * inject custom drivers, mocks, fleet-aware dispatchers, etc. + * + * The default coder delegate is wired here because we own + * `coderProfile` / `multiHarnessCoderFanout`. The default researcher + * delegate is **not** wired in this file — `agent-knowledge` cannot be + * imported from `agent-runtime` without inducing a cycle. Consumers + * pass `researcherDelegate` explicitly when constructing the server. + */ + +import type { LoopSandboxClient } from '../loops' +import { runLoop } from '../loops' +import { coderProfile, multiHarnessCoderFanout } from '../profiles/coder' +import type { + CoderTask, + DelegateCodeArgs, + DelegateResearchArgs, + DelegationProgress, + ResearchOutputShape, +} from './types' + +/** @experimental */ +export interface DelegateRunCtx { + signal: AbortSignal + report(progress: DelegationProgress): void +} + +/** @experimental */ +export type CoderDelegate = ( + args: DelegateCodeArgs, + ctx: DelegateRunCtx, +) => Promise + +/** @experimental */ +export type ResearcherDelegate = ( + args: DelegateResearchArgs, + ctx: DelegateRunCtx, +) => Promise + +/** @experimental */ +export interface CreateDefaultCoderDelegateOptions { + sandboxClient: LoopSandboxClient + /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */ + fanoutHarnesses?: string[] + /** Hard cap on the kernel's per-batch concurrency. Default 4. */ + maxConcurrency?: number +} + +/** + * Build a coder delegate that drives `runLoop` against the project's + * sandbox client + coder profile. When `args.variants > 1` it switches + * to the multi-harness fanout topology. + * + * @experimental + */ +export function createDefaultCoderDelegate( + options: CreateDefaultCoderDelegateOptions, +): CoderDelegate { + const sandboxClient = options.sandboxClient + const fanoutHarnesses = options.fanoutHarnesses + const maxConcurrency = options.maxConcurrency ?? 4 + return async (args, ctx) => { + const task: CoderTask = { + goal: buildCoderGoal(args), + repoRoot: args.repoRoot, + testCmd: args.config?.testCmd, + typecheckCmd: args.config?.typecheckCmd, + forbiddenPaths: args.config?.forbiddenPaths, + maxDiffLines: args.config?.maxDiffLines, + } + const variants = Math.max(1, Math.trunc(args.variants ?? 1)) + ctx.report({ iteration: 0, phase: 'starting' }) + if (variants <= 1) { + const { agentRunSpec, output, validator } = coderProfile({ task }) + const result = await runLoop({ + driver: singleShotDriver, + agentRun: agentRunSpec, + output, + validator, + task, + ctx: { sandboxClient, signal: ctx.signal }, + maxIterations: 1, + maxConcurrency, + }) + const winner = result.winner + if (!winner) { + throw new Error('coder delegate produced no winner') + } + ctx.report({ iteration: 1, phase: 'completed' }) + return winner.output + } + const fanout = multiHarnessCoderFanout( + fanoutHarnesses && fanoutHarnesses.length > 0 + ? { harnesses: fanoutHarnesses.slice(0, variants) } + : { harnesses: undefined }, + ) + const agentRuns = fanout.agentRuns.slice(0, variants) + const result = await runLoop({ + driver: fanout.driver, + agentRuns, + output: fanout.output, + validator: fanout.validator, + task, + ctx: { sandboxClient, signal: ctx.signal }, + maxIterations: variants, + maxConcurrency: Math.min(maxConcurrency, variants), + }) + const winner = result.winner + if (!winner) { + throw new Error('coder delegate fanout produced no winner') + } + ctx.report({ iteration: agentRuns.length, phase: 'completed' }) + return winner.output + } +} + +function buildCoderGoal(args: DelegateCodeArgs): string { + if (!args.contextHint) return args.goal + return [args.goal, '', '## Context', args.contextHint].join('\n') +} + +/** + * Single-shot driver — plan one task on iteration 0, stop after one + * iteration. Used by the coder delegate when `variants <= 1`. Keeps the + * runLoop kernel-level accounting (timing, cost, trace emission) while + * skipping fanout/refine topology overhead. + */ +const singleShotDriver = { + name: 'mcp-single-shot', + async plan(task: Task, history: ReadonlyArray): Promise { + return history.length === 0 ? [task] : [] + }, + decide(history: ReadonlyArray): 'pick-winner' | 'fail' { + return history.length > 0 ? 'pick-winner' : 'fail' + }, +} diff --git a/src/mcp/feedback-store.ts b/src/mcp/feedback-store.ts new file mode 100644 index 0000000..4cfa2b9 --- /dev/null +++ b/src/mcp/feedback-store.ts @@ -0,0 +1,76 @@ +/** + * @experimental + * + * Feedback persistence surface for the MCP layer. + * + * The substrate cannot import `@tangle-network/agent-knowledge` (it would + * induce a dependency cycle), so the store is an abstract interface. The + * default implementation is in-memory; consumers wire their own adapter + * (a real KbStore-backed sink, an HTTP relay to gtm-agent's knowledge + * service, etc.) via `createMcpServer({ feedbackStore })`. + * + * Feedback events are append-only: every rating is a new event with a + * fresh id, even when the same delegation is rated multiple times. The + * caller decides how to roll up scores downstream. + */ + +import type { DelegateFeedbackArgs, DelegationFeedbackSnapshot } from './types' + +/** @experimental */ +export interface FeedbackEvent { + id: string + refersTo: DelegateFeedbackArgs['refersTo'] + rating: DelegateFeedbackArgs['rating'] + by: DelegateFeedbackArgs['by'] + capturedAt: string + namespace?: string +} + +/** @experimental */ +export interface FeedbackStore { + /** Append a new event. Never dedupes — every rating is its own event. */ + put(event: FeedbackEvent): Promise + /** + * List events filtered by `namespace`. When `namespace` is omitted, list + * across all namespaces. Returns events in insertion order. + */ + list(filter?: { namespace?: string; refersToRef?: string }): Promise +} + +/** @experimental */ +export class InMemoryFeedbackStore implements FeedbackStore { + private readonly events: FeedbackEvent[] = [] + + async put(event: FeedbackEvent): Promise { + this.events.push({ ...event }) + } + + async list(filter: { namespace?: string; refersToRef?: string } = {}): Promise { + let out = this.events + if (filter.namespace !== undefined) { + out = out.filter((event) => event.namespace === filter.namespace) + } + if (filter.refersToRef !== undefined) { + out = out.filter((event) => event.refersTo.ref === filter.refersToRef) + } + return out.map((event) => ({ ...event })) + } +} + +/** + * Project a `FeedbackEvent` down to the snapshot shape carried on + * `delegation_history` entries. + * + * @experimental + */ +export function eventToSnapshot(event: FeedbackEvent): DelegationFeedbackSnapshot { + const snap: DelegationFeedbackSnapshot = { + id: event.id, + score: event.rating.score, + by: event.by, + notes: event.rating.notes, + capturedAt: event.capturedAt, + } + if (event.rating.label) snap.label = event.rating.label + return snap +} diff --git a/src/mcp/index.ts b/src/mcp/index.ts new file mode 100644 index 0000000..6517e90 --- /dev/null +++ b/src/mcp/index.ts @@ -0,0 +1,100 @@ +/** + * @experimental + * + * `@tangle-network/agent-runtime/mcp` — Stdio MCP server exposing the 5 + * delegation tools (`delegate_code`, `delegate_research`, + * `delegate_feedback`, `delegation_status`, `delegation_history`) to + * sandbox coding-harness agents. + * + * Mount the server inside a product agent's sandbox via + * `agent-runtime-mcp` (the bin) or wire it into a custom Node entry + * point with `createMcpServer({ ... })`. Pass `coderDelegate` / + * `researcherDelegate` factories you build from your project's + * sandbox client + run-loop topology. + */ + +export type { + CoderDelegate, + CreateDefaultCoderDelegateOptions, + DelegateRunCtx, + ResearcherDelegate, +} from './delegates' +export { createDefaultCoderDelegate } from './delegates' +export type { FeedbackEvent, FeedbackStore } from './feedback-store' +export { eventToSnapshot, InMemoryFeedbackStore } from './feedback-store' +export type { + JsonRpcMessage, + JsonRpcResponse, + McpServer, + McpServerOptions, + McpToolDescriptor, + McpTransport, +} from './server' +export { createInProcessTransport, createMcpServer } from './server' +export type { + DelegationRecord, + DelegationTaskQueueOptions, + SubmitInput, + SubmitOutput, +} from './task-queue' +export { DelegationTaskQueue, hashIdempotencyInput } from './task-queue' +export { + createDelegateCodeHandler, + DELEGATE_CODE_DESCRIPTION, + DELEGATE_CODE_INPUT_SCHEMA, + DELEGATE_CODE_TOOL_NAME, + validateDelegateCodeArgs, +} from './tools/delegate-code' +export { + createDelegateFeedbackHandler, + DELEGATE_FEEDBACK_DESCRIPTION, + DELEGATE_FEEDBACK_INPUT_SCHEMA, + DELEGATE_FEEDBACK_TOOL_NAME, + validateDelegateFeedbackArgs, +} from './tools/delegate-feedback' +export { + createDelegateResearchHandler, + DELEGATE_RESEARCH_DESCRIPTION, + DELEGATE_RESEARCH_INPUT_SCHEMA, + DELEGATE_RESEARCH_TOOL_NAME, + validateDelegateResearchArgs, +} from './tools/delegate-research' +export { + createDelegationHistoryHandler, + DELEGATION_HISTORY_DESCRIPTION, + DELEGATION_HISTORY_INPUT_SCHEMA, + DELEGATION_HISTORY_TOOL_NAME, + validateDelegationHistoryArgs, +} from './tools/delegation-history' +export { + createDelegationStatusHandler, + DELEGATION_STATUS_DESCRIPTION, + DELEGATION_STATUS_INPUT_SCHEMA, + DELEGATION_STATUS_TOOL_NAME, + validateDelegationStatusArgs, +} from './tools/delegation-status' +export type { + DelegateCodeArgs, + DelegateCodeConfig, + DelegateCodeResult, + DelegateFeedbackArgs, + DelegateFeedbackResult, + DelegateResearchArgs, + DelegateResearchConfig, + DelegateResearchResult, + DelegationError, + DelegationFeedbackSnapshot, + DelegationHistoryArgs, + DelegationHistoryEntry, + DelegationHistoryResult, + DelegationProfile, + DelegationProgress, + DelegationResultPayload, + DelegationStatus, + DelegationStatusArgs, + DelegationStatusResult, + FeedbackRating, + FeedbackRefersTo, + ResearchOutputShape, + ResearchSource, +} from './types' diff --git a/src/mcp/server.ts b/src/mcp/server.ts new file mode 100644 index 0000000..ca03bdf --- /dev/null +++ b/src/mcp/server.ts @@ -0,0 +1,337 @@ +/** + * @experimental + * + * Stdio JSON-RPC MCP server exposing the 5 delegation tools to sandbox + * coding-harness agents (claude-code, codex, opencode, ...). + * + * The server is transport-bound but topology-free: tool execution is + * delegated to handler functions composed from a queue, a feedback + * store, and per-profile run delegates. Consumers wire those at + * construction time. The `agent-runtime-mcp` bin spins up a default + * configuration for the common case (real sandbox client + coder). + * + * Wire protocol: line-delimited JSON-RPC 2.0 over stdio. Each line is + * one request; each response is one line. `tools/list` and `tools/call` + * mirror the MCP 2024-11-05 spec; we do not pull in + * `@modelcontextprotocol/sdk` to keep the dependency footprint zero. + */ + +import { createInterface, type Interface as ReadlineInterface } from 'node:readline' +import { Readable, Writable } from 'node:stream' +import type { CoderDelegate, ResearcherDelegate } from './delegates' +import { type FeedbackStore, InMemoryFeedbackStore } from './feedback-store' +import { DelegationTaskQueue } from './task-queue' +import { + createDelegateCodeHandler, + DELEGATE_CODE_DESCRIPTION, + DELEGATE_CODE_INPUT_SCHEMA, + DELEGATE_CODE_TOOL_NAME, +} from './tools/delegate-code' +import { + createDelegateFeedbackHandler, + DELEGATE_FEEDBACK_DESCRIPTION, + DELEGATE_FEEDBACK_INPUT_SCHEMA, + DELEGATE_FEEDBACK_TOOL_NAME, +} from './tools/delegate-feedback' +import { + createDelegateResearchHandler, + DELEGATE_RESEARCH_DESCRIPTION, + DELEGATE_RESEARCH_INPUT_SCHEMA, + DELEGATE_RESEARCH_TOOL_NAME, +} from './tools/delegate-research' +import { + createDelegationHistoryHandler, + DELEGATION_HISTORY_DESCRIPTION, + DELEGATION_HISTORY_INPUT_SCHEMA, + DELEGATION_HISTORY_TOOL_NAME, +} from './tools/delegation-history' +import { + createDelegationStatusHandler, + DELEGATION_STATUS_DESCRIPTION, + DELEGATION_STATUS_INPUT_SCHEMA, + DELEGATION_STATUS_TOOL_NAME, +} from './tools/delegation-status' + +/** @experimental */ +export interface McpServerOptions { + /** Required to enable delegate_code. */ + coderDelegate?: CoderDelegate + /** + * Required to enable delegate_research. The substrate cannot ship a + * default — wire one that closes over your `runLoop` + a + * researcher profile (typically `@tangle-network/agent-knowledge`'s + * `researcherProfile` / `multiHarnessResearcherFanout`). + */ + researcherDelegate?: ResearcherDelegate + /** Override the default in-memory feedback store. */ + feedbackStore?: FeedbackStore + /** Override the default in-memory task queue. */ + queue?: DelegationTaskQueue + /** Server display name surfaced via `initialize`. Default `'agent-runtime-mcp'`. */ + serverName?: string + /** Server version surfaced via `initialize`. Default = the package version baked at build time. */ + serverVersion?: string +} + +/** @experimental */ +export interface McpToolDescriptor { + name: string + description: string + inputSchema: Record + handler: (raw: unknown) => Promise +} + +/** @experimental */ +export interface McpServer { + /** Tools currently registered (depend on which delegates were wired). */ + readonly tools: ReadonlyMap + /** The underlying queue — exposed so tests can introspect it. */ + readonly queue: DelegationTaskQueue + /** The feedback store — exposed for the same reason. */ + readonly feedbackStore: FeedbackStore + /** Handle a single parsed JSON-RPC message. Returns the response object (or `null` for notifications). */ + handle(message: JsonRpcMessage): Promise + /** Drive the server on a stdio-shaped transport until `stop()` is called. */ + serve(transport?: McpTransport): Promise + /** Stop a `serve` call. Subsequent requests are rejected. */ + stop(): void +} + +/** @experimental */ +export interface McpTransport { + input: NodeJS.ReadableStream + output: NodeJS.WritableStream +} + +/** @experimental */ +export interface JsonRpcMessage { + jsonrpc: '2.0' + id?: number | string | null + method: string + params?: unknown +} + +/** @experimental */ +export interface JsonRpcResponse { + jsonrpc: '2.0' + id: number | string | null + result?: unknown + error?: { code: number; message: string; data?: unknown } +} + +const PROTOCOL_VERSION = '2024-11-05' +const DEFAULT_SERVER_NAME = 'agent-runtime-mcp' +const DEFAULT_SERVER_VERSION = '0.20.0' + +/** @experimental */ +export function createMcpServer(options: McpServerOptions = {}): McpServer { + const queue = options.queue ?? new DelegationTaskQueue() + const feedbackStore = options.feedbackStore ?? new InMemoryFeedbackStore() + const serverName = options.serverName ?? DEFAULT_SERVER_NAME + const serverVersion = options.serverVersion ?? DEFAULT_SERVER_VERSION + + const tools = new Map() + + if (options.coderDelegate) { + tools.set(DELEGATE_CODE_TOOL_NAME, { + name: DELEGATE_CODE_TOOL_NAME, + description: DELEGATE_CODE_DESCRIPTION, + inputSchema: DELEGATE_CODE_INPUT_SCHEMA as unknown as Record, + handler: createDelegateCodeHandler({ queue, delegate: options.coderDelegate }), + }) + } + if (options.researcherDelegate) { + tools.set(DELEGATE_RESEARCH_TOOL_NAME, { + name: DELEGATE_RESEARCH_TOOL_NAME, + description: DELEGATE_RESEARCH_DESCRIPTION, + inputSchema: DELEGATE_RESEARCH_INPUT_SCHEMA as unknown as Record, + handler: createDelegateResearchHandler({ queue, delegate: options.researcherDelegate }), + }) + } + tools.set(DELEGATE_FEEDBACK_TOOL_NAME, { + name: DELEGATE_FEEDBACK_TOOL_NAME, + description: DELEGATE_FEEDBACK_DESCRIPTION, + inputSchema: DELEGATE_FEEDBACK_INPUT_SCHEMA as unknown as Record, + handler: createDelegateFeedbackHandler({ queue, store: feedbackStore }), + }) + tools.set(DELEGATION_STATUS_TOOL_NAME, { + name: DELEGATION_STATUS_TOOL_NAME, + description: DELEGATION_STATUS_DESCRIPTION, + inputSchema: DELEGATION_STATUS_INPUT_SCHEMA as unknown as Record, + handler: createDelegationStatusHandler({ queue }), + }) + tools.set(DELEGATION_HISTORY_TOOL_NAME, { + name: DELEGATION_HISTORY_TOOL_NAME, + description: DELEGATION_HISTORY_DESCRIPTION, + inputSchema: DELEGATION_HISTORY_INPUT_SCHEMA as unknown as Record, + handler: createDelegationHistoryHandler({ queue }), + }) + + let stopped = false + let activeReadline: ReadlineInterface | undefined + + async function handle(message: JsonRpcMessage): Promise { + if (stopped) { + return rpcError(message.id ?? null, -32099, 'server stopped') + } + if (message.method === 'initialize') { + return rpcResult(message.id ?? null, { + protocolVersion: PROTOCOL_VERSION, + capabilities: { tools: {} }, + serverInfo: { name: serverName, version: serverVersion }, + }) + } + if (message.method === 'notifications/initialized') { + // MCP clients send this after the handshake; it has no id and expects + // no response. + return null + } + if (message.method === 'tools/list') { + return rpcResult(message.id ?? null, { + tools: [...tools.values()].map((tool) => ({ + name: tool.name, + description: tool.description, + inputSchema: tool.inputSchema, + })), + }) + } + if (message.method === 'tools/call') { + const params = (message.params ?? {}) as { name?: unknown; arguments?: unknown } + const name = typeof params.name === 'string' ? params.name : '' + const tool = tools.get(name) + if (!tool) { + return rpcError(message.id ?? null, -32601, `unknown tool: ${name}`) + } + try { + const output = await tool.handler(params.arguments ?? {}) + return rpcResult(message.id ?? null, { + content: [{ type: 'text', text: JSON.stringify(output) }], + structuredContent: output, + isError: false, + }) + } catch (err) { + const reason = err instanceof Error ? err.message : String(err) + const code = err instanceof TypeError || err instanceof RangeError ? -32602 : -32000 + return rpcError(message.id ?? null, code, reason) + } + } + if (message.id === undefined || message.id === null) return null + return rpcError(message.id, -32601, `unknown method: ${message.method}`) + } + + async function serve(transport?: McpTransport): Promise { + const input = transport?.input ?? process.stdin + const output = transport?.output ?? process.stdout + const rl = createInterface({ input, crlfDelay: Number.POSITIVE_INFINITY }) + activeReadline = rl + return new Promise((resolve, reject) => { + rl.on('line', (line) => { + const trimmed = line.trim() + if (!trimmed) return + let parsed: JsonRpcMessage | undefined + try { + parsed = JSON.parse(trimmed) as JsonRpcMessage + } catch (err) { + writeResponse(output, rpcError(null, -32700, `parse error: ${(err as Error).message}`)) + return + } + if (!parsed || parsed.jsonrpc !== '2.0' || typeof parsed.method !== 'string') { + writeResponse(output, rpcError(parsed?.id ?? null, -32600, 'invalid request')) + return + } + void handle(parsed).then((response) => { + if (response) writeResponse(output, response) + }) + }) + rl.on('close', () => resolve()) + rl.on('error', (err) => reject(err)) + if (stopped) { + rl.close() + resolve() + } + }) + } + + function stop(): void { + stopped = true + activeReadline?.close() + activeReadline = undefined + } + + return { + tools, + queue, + feedbackStore, + handle, + serve, + stop, + } +} + +function rpcResult(id: number | string | null, result: unknown): JsonRpcResponse { + return { jsonrpc: '2.0', id, result } +} + +function rpcError( + id: number | string | null, + code: number, + message: string, + data?: unknown, +): JsonRpcResponse { + return { + jsonrpc: '2.0', + id, + error: data === undefined ? { code, message } : { code, message, data }, + } +} + +function writeResponse(output: NodeJS.WritableStream, response: JsonRpcResponse): void { + output.write(`${JSON.stringify(response)}\n`) +} + +/** + * In-process pair of `Readable` + `Writable` streams suitable for driving + * `server.serve(...)` from a test. Returns the agent-side stream (the + * client writes to it) and the server-side stream (the test reads from it). + * + * @experimental + */ +export function createInProcessTransport(): { + transport: McpTransport + clientWrite(line: string): void + clientClose(): void + readServer(): Promise +} { + const responses: JsonRpcResponse[] = [] + const input = new Readable({ read() {} }) + const output = new Writable({ + write(chunk, _enc, cb) { + const text = chunk.toString('utf8') + for (const line of text.split('\n')) { + const trimmed = line.trim() + if (!trimmed) continue + try { + responses.push(JSON.parse(trimmed) as JsonRpcResponse) + } catch { + // Non-JSON output should never appear; drop it silently in the + // test transport rather than crashing. + } + } + cb() + }, + }) + return { + transport: { input, output }, + clientWrite(line: string) { + input.push(`${line}\n`) + }, + clientClose() { + input.push(null) + }, + async readServer() { + // Yield to the event loop a few times so async handlers drain. + for (let i = 0; i < 5; i += 1) await new Promise((r) => setImmediate(r)) + return [...responses] + }, + } +} diff --git a/src/mcp/task-queue.ts b/src/mcp/task-queue.ts new file mode 100644 index 0000000..94a90cc --- /dev/null +++ b/src/mcp/task-queue.ts @@ -0,0 +1,358 @@ +/** + * @experimental + * + * In-memory state for async MCP delegations. State machine: + * + * pending → running → completed | failed + * ↘ cancelled (from any non-terminal state via cancel()) + * + * Each `submit` returns a `taskId` immediately and kicks the work off in the + * background. The work function receives an `AbortSignal` the queue fires + * when `cancel(taskId)` is called. The queue does NOT supervise runtime + * timeouts — the underlying `runLoop` driver / sandbox imposes those. + * + * Idempotency: callers may supply an `idempotencyKey` (hash of the input). + * A duplicate `submit` with a known key returns the existing task instead of + * starting a new one. Mutated input → different key → different task. + * + * Persistent state (sqlite) is a Phase 2 follow-up. The README documents the + * in-memory limitation explicitly so consumers know a worker restart drops + * pending delegations. + */ + +import type { + DelegateCodeArgs, + DelegateResearchArgs, + DelegationError, + DelegationFeedbackSnapshot, + DelegationHistoryArgs, + DelegationHistoryEntry, + DelegationProfile, + DelegationProgress, + DelegationResultPayload, + DelegationStatus, + DelegationStatusResult, +} from './types' + +/** @experimental */ +export interface DelegationRecord { + taskId: string + profile: DelegationProfile + namespace?: string + args: DelegateCodeArgs | DelegateResearchArgs + status: DelegationStatus + progress?: DelegationProgress + result?: DelegationResultPayload + error?: DelegationError + costUsd?: number + startedAt: string + completedAt?: string + /** Sha-prefix hash of the canonical input — used for idempotency lookup. */ + idempotencyKey?: string + /** Feedback events keyed by this delegation's taskId. */ + feedback: DelegationFeedbackSnapshot[] +} + +/** @experimental */ +export interface SubmitInput { + profile: DelegationProfile + args: Args + namespace?: string + idempotencyKey?: string + /** + * Runs the underlying delegation. The queue passes a fresh `AbortSignal` + * and a `report` channel for incremental progress updates. The function + * MUST resolve with the typed `DelegationResultPayload['output']`; the + * queue wraps it with the profile tag. + */ + run: (ctx: { + signal: AbortSignal + report(progress: DelegationProgress): void + }) => Promise +} + +/** @experimental */ +export interface SubmitOutput { + taskId: string + /** True when a prior matching `idempotencyKey` returned an existing record. */ + reused: boolean +} + +/** @experimental */ +export interface DelegationTaskQueueOptions { + /** ID generator override; default `randomTaskId`. */ + generateId?: () => string + /** Clock override; default `() => new Date().toISOString()`. */ + now?: () => string +} + +/** @experimental */ +export class DelegationTaskQueue { + private readonly records = new Map() + private readonly controllers = new Map() + private readonly byIdempotencyKey = new Map() + private readonly generateId: () => string + private readonly now: () => string + + constructor(options: DelegationTaskQueueOptions = {}) { + this.generateId = options.generateId ?? randomTaskId + this.now = options.now ?? (() => new Date().toISOString()) + } + + /** + * Kick off a delegation in the background. Returns immediately. The + * `taskId` is queryable via `status` once this method returns. + */ + submit( + input: SubmitInput, + ): SubmitOutput { + if (input.idempotencyKey) { + const existing = this.byIdempotencyKey.get(input.idempotencyKey) + if (existing && this.records.has(existing)) { + return { taskId: existing, reused: true } + } + } + const taskId = this.generateId() + const controller = new AbortController() + const record: DelegationRecord = { + taskId, + profile: input.profile, + namespace: input.namespace, + args: input.args, + status: 'pending', + startedAt: this.now(), + feedback: [], + idempotencyKey: input.idempotencyKey, + } + this.records.set(taskId, record) + this.controllers.set(taskId, controller) + if (input.idempotencyKey) this.byIdempotencyKey.set(input.idempotencyKey, taskId) + + // Fire-and-forget the run function. Errors flow into the record so the + // status poll surfaces them; the promise itself is intentionally + // unobserved by the queue. + queueMicrotask(() => { + this.execute(taskId, input, controller) + }) + + return { taskId, reused: false } + } + + /** + * Snapshot the current state of a delegation. Returns `undefined` for + * unknown ids so callers can distinguish missing from terminal. + */ + status(taskId: string): DelegationStatusResult | undefined { + const record = this.records.get(taskId) + if (!record) return undefined + return toStatusResult(record) + } + + /** + * Abort an in-flight delegation. Returns `false` if the task is unknown + * or already terminal. The underlying `run` function MUST honor the + * abort signal for the cancel to take effect; the queue marks the + * record `cancelled` regardless so a misbehaving runner cannot pin the + * UI on `running` forever. + */ + cancel(taskId: string): boolean { + const record = this.records.get(taskId) + if (!record) return false + if (isTerminal(record.status)) return false + const controller = this.controllers.get(taskId) + controller?.abort() + record.status = 'cancelled' + record.completedAt = this.now() + record.error = { message: 'cancelled by caller', kind: 'CancelledError' } + return true + } + + /** + * Append a feedback event to the matching delegation. Returns `false` + * when `ref` does not name a known taskId — the caller should still + * record the feedback through a different surface (artifact/outcome + * kinds are not queue-bound). + */ + attachFeedback(taskId: string, snapshot: DelegationFeedbackSnapshot): boolean { + const record = this.records.get(taskId) + if (!record) return false + record.feedback.push(snapshot) + return true + } + + /** + * Query the recorded delegations. Returns entries newest-first (by + * `startedAt`), truncated to `limit`. + */ + history(args: DelegationHistoryArgs = {}): DelegationHistoryEntry[] { + const limit = clampLimit(args.limit) + const since = args.since ? Date.parse(args.since) : Number.NEGATIVE_INFINITY + const out: DelegationHistoryEntry[] = [] + for (const record of this.records.values()) { + if (args.namespace && record.namespace !== args.namespace) continue + if (args.profile && record.profile !== args.profile) continue + if (Number.isFinite(since) && Date.parse(record.startedAt) < since) continue + out.push(toHistoryEntry(record)) + } + out.sort((a, b) => b.startedAt.localeCompare(a.startedAt)) + return out.slice(0, limit) + } + + /** Test-only — number of in-flight (non-terminal) records. */ + inflightCount(): number { + let n = 0 + for (const record of this.records.values()) { + if (!isTerminal(record.status)) n += 1 + } + return n + } + + private async execute( + taskId: string, + input: SubmitInput, + controller: AbortController, + ): Promise { + const record = this.records.get(taskId) + if (!record) return + record.status = 'running' + try { + const output = await input.run({ + signal: controller.signal, + report: (progress) => { + if (record.status === 'running') record.progress = progress + }, + }) + // `cancel()` may have flipped the status to `cancelled` while the + // run promise was pending. Read the field through a widening + // helper so the narrowed `'running'` type from the assignment + // above does not exclude that case at compile time. + if (currentStatus(record) === 'cancelled') return + record.status = 'completed' + record.completedAt = this.now() + record.result = { profile: input.profile, output } as DelegationResultPayload + } catch (err) { + if (currentStatus(record) === 'cancelled') return + record.status = 'failed' + record.completedAt = this.now() + record.error = errorToShape(err) + } finally { + this.controllers.delete(taskId) + } + } +} + +function isTerminal(status: DelegationStatus): boolean { + return status === 'completed' || status === 'failed' || status === 'cancelled' +} + +function currentStatus(record: DelegationRecord): DelegationStatus { + return record.status +} + +function clampLimit(raw: number | undefined): number { + if (!Number.isFinite(raw)) return 50 + const n = Math.trunc(raw as number) + if (n <= 0) return 50 + return Math.min(n, 500) +} + +function toStatusResult(record: DelegationRecord): DelegationStatusResult { + const out: DelegationStatusResult = { + taskId: record.taskId, + profile: record.profile, + status: record.status, + startedAt: record.startedAt, + } + if (record.progress) out.progress = record.progress + if (record.result) out.result = record.result + if (record.error) out.error = record.error + if (record.costUsd !== undefined) out.costUsd = record.costUsd + if (record.completedAt) out.completedAt = record.completedAt + return out +} + +function toHistoryEntry(record: DelegationRecord): DelegationHistoryEntry { + const entry: DelegationHistoryEntry = { + taskId: record.taskId, + profile: record.profile, + args: record.args, + status: record.status, + startedAt: record.startedAt, + } + if (record.namespace) entry.namespace = record.namespace + if (record.completedAt) entry.completedAt = record.completedAt + if (record.costUsd !== undefined) entry.costUsd = record.costUsd + if (record.feedback.length > 0) entry.feedback = [...record.feedback] + return entry +} + +function errorToShape(err: unknown): DelegationError { + if (err instanceof Error) { + return { message: err.message, kind: err.name || 'Error' } + } + return { message: String(err), kind: 'NonError' } +} + +function randomTaskId(): string { + // Caller-stable id: `dlg-${timestamp}-${random}`. The timestamp portion + // makes lexicographic sort match chronological order in history queries + // even when the system clock skews under the second. + const t = Date.now().toString(36) + const r = Math.random().toString(36).slice(2, 10) + return `dlg-${t}-${r}` +} + +/** + * Best-effort stable hash for use as `idempotencyKey`. Not cryptographic; + * collisions only affect dedupe, never correctness. + * + * @experimental + */ +export function hashIdempotencyInput(value: unknown): string { + let str: string + try { + str = JSON.stringify(canonicalize(value)) + } catch { + str = String(value) + } + // FNV-1a 32-bit + let h = 0x811c9dc5 + for (let i = 0; i < str.length; i += 1) { + h ^= str.charCodeAt(i) + h = Math.imul(h, 0x01000193) + } + return (h >>> 0).toString(16).padStart(8, '0') +} + +function canonicalize(value: unknown): unknown { + if (value === null || typeof value !== 'object') return value + if (Array.isArray(value)) return value.map(canonicalize) + const entries = Object.entries(value as Record) + .filter(([, v]) => v !== undefined) + .sort(([a], [b]) => a.localeCompare(b)) + const out: Record = {} + for (const [k, v] of entries) out[k] = canonicalize(v) + return out +} + +// Re-exports re-used by the feedback-store + handler glue. Kept local so +// consumers of the queue don't have to import from `./types` separately. +export type { + DelegateCodeArgs, + DelegateCodeResult, + DelegateFeedbackArgs, + DelegateFeedbackResult, + DelegateResearchArgs, + DelegateResearchResult, + DelegationError, + DelegationFeedbackSnapshot, + DelegationHistoryArgs, + DelegationHistoryEntry, + DelegationHistoryResult, + DelegationProfile, + DelegationProgress, + DelegationResultPayload, + DelegationStatus, + DelegationStatusArgs, + DelegationStatusResult, +} from './types' diff --git a/src/mcp/tools/delegate-code.ts b/src/mcp/tools/delegate-code.ts new file mode 100644 index 0000000..4f6796f --- /dev/null +++ b/src/mcp/tools/delegate-code.ts @@ -0,0 +1,199 @@ +/** + * @experimental + * + * `delegate_code` MCP tool — async kickoff. The handler validates the + * input, computes an idempotency key over the canonical fields, hands + * the task to the queue, and returns `{ taskId, estimatedDurationMs }`. + */ + +import type { CoderDelegate } from '../delegates' +import { + type DelegateCodeArgs, + type DelegateCodeResult, + type DelegationTaskQueue, + hashIdempotencyInput, +} from '../task-queue' + +/** @experimental */ +export const DELEGATE_CODE_TOOL_NAME = 'delegate_code' + +/** @experimental */ +export const DELEGATE_CODE_DESCRIPTION = [ + 'Delegate a coding task to specialist coder agents that produce a validated patch.', + '', + 'Use when: you need code written, fixed, refactored, or extended to satisfy a', + 'user goal that touches a real repository. The coder runs in an isolated', + 'sandbox, opens a fresh branch, keeps the diff minimal, runs the supplied', + 'test + typecheck commands, and emits a unified-diff patch.', + '', + 'Returns immediately with a taskId. Poll delegation_status to retrieve the', + 'patch + validator verdict (typically minutes-to-hours, longer for large', + 'changes). Identical inputs return the same taskId — safe to retry.', + '', + 'When variants > 1, multiple coder harnesses (claude-code, codex, opencode)', + 'attempt the task in parallel and the highest-scoring patch wins (smallest', + 'passing diff). Use variants for high-stakes changes; single variant for', + 'routine ones.', + '', + 'Capability scope: the coder cannot modify paths outside repoRoot and cannot', + 'touch paths in config.forbiddenPaths. The validator hard-fails on a', + 'forbidden-path violation, diff above config.maxDiffLines, test failure, or', + 'typecheck failure — none of those make it past the gate.', +].join('\n') + +/** @experimental */ +export const DELEGATE_CODE_INPUT_SCHEMA = { + type: 'object', + properties: { + goal: { + type: 'string', + description: 'Natural-language description of what the coder must accomplish.', + }, + repoRoot: { + type: 'string', + description: 'Absolute path inside the sandbox where the repo lives.', + }, + contextHint: { + type: 'string', + description: 'Optional free-form context the coder sees in the prompt prelude.', + }, + variants: { + type: 'integer', + minimum: 1, + maximum: 8, + description: 'Number of parallel coder harnesses. Default 1.', + }, + config: { + type: 'object', + properties: { + testCmd: { type: 'string' }, + typecheckCmd: { type: 'string' }, + forbiddenPaths: { type: 'array', items: { type: 'string' } }, + maxDiffLines: { type: 'integer', minimum: 1 }, + }, + additionalProperties: false, + }, + namespace: { + type: 'string', + description: 'Multi-tenant scope (customer-id, workspace-id).', + }, + }, + required: ['goal', 'repoRoot'], + additionalProperties: false, +} as const + +const SINGLE_VARIANT_ESTIMATE_MS = 6 * 60 * 1000 // 6 minutes — single coder +const FANOUT_PER_VARIANT_ESTIMATE_MS = 8 * 60 * 1000 // 8 minutes — fanout + +/** @experimental */ +export function validateDelegateCodeArgs(raw: unknown): DelegateCodeArgs { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegate_code: arguments must be an object') + } + const value = raw as Record + const goal = value.goal + if (typeof goal !== 'string' || goal.trim().length === 0) { + throw new TypeError('delegate_code: `goal` must be a non-empty string') + } + const repoRoot = value.repoRoot + if (typeof repoRoot !== 'string' || repoRoot.trim().length === 0) { + throw new TypeError('delegate_code: `repoRoot` must be a non-empty string') + } + const args: DelegateCodeArgs = { goal: goal.trim(), repoRoot: repoRoot.trim() } + if (typeof value.contextHint === 'string') args.contextHint = value.contextHint + if (value.variants !== undefined) { + const variants = Number(value.variants) + if (!Number.isFinite(variants) || variants < 1 || variants > 8) { + throw new RangeError('delegate_code: `variants` must be an integer in [1, 8]') + } + args.variants = Math.trunc(variants) + } + if (value.config !== undefined) { + args.config = validateConfig(value.config) + } + if (typeof value.namespace === 'string') args.namespace = value.namespace + return args +} + +function validateConfig(raw: unknown): DelegateCodeArgs['config'] { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegate_code: `config` must be an object') + } + const value = raw as Record + const out: NonNullable = {} + if (value.testCmd !== undefined) { + if (typeof value.testCmd !== 'string') { + throw new TypeError('delegate_code: `config.testCmd` must be a string') + } + out.testCmd = value.testCmd + } + if (value.typecheckCmd !== undefined) { + if (typeof value.typecheckCmd !== 'string') { + throw new TypeError('delegate_code: `config.typecheckCmd` must be a string') + } + out.typecheckCmd = value.typecheckCmd + } + if (value.forbiddenPaths !== undefined) { + if (!Array.isArray(value.forbiddenPaths)) { + throw new TypeError('delegate_code: `config.forbiddenPaths` must be a string array') + } + out.forbiddenPaths = value.forbiddenPaths.map((entry, i) => { + if (typeof entry !== 'string') { + throw new TypeError(`delegate_code: forbiddenPaths[${i}] must be a string`) + } + return entry + }) + } + if (value.maxDiffLines !== undefined) { + const n = Number(value.maxDiffLines) + if (!Number.isFinite(n) || n < 1) { + throw new RangeError('delegate_code: `config.maxDiffLines` must be a positive integer') + } + out.maxDiffLines = Math.trunc(n) + } + return out +} + +/** @experimental */ +export interface DelegateCodeHandlerOptions { + queue: DelegationTaskQueue + delegate: CoderDelegate + /** Override the duration hint. */ + estimateDurationMs?: (args: DelegateCodeArgs) => number +} + +/** @experimental */ +export function createDelegateCodeHandler( + options: DelegateCodeHandlerOptions, +): (raw: unknown) => Promise { + const estimateDurationMs = options.estimateDurationMs ?? defaultEstimate + return async (raw) => { + const args = validateDelegateCodeArgs(raw) + const idempotencyKey = hashIdempotencyInput({ + profile: 'coder', + goal: args.goal, + repoRoot: args.repoRoot, + contextHint: args.contextHint, + variants: args.variants ?? 1, + config: args.config, + namespace: args.namespace, + }) + const submitted = options.queue.submit({ + profile: 'coder', + args, + namespace: args.namespace, + idempotencyKey, + run: async (ctx) => options.delegate(args, ctx), + }) + return { + taskId: submitted.taskId, + estimatedDurationMs: estimateDurationMs(args), + } + } +} + +function defaultEstimate(args: DelegateCodeArgs): number { + const variants = Math.max(1, args.variants ?? 1) + if (variants === 1) return SINGLE_VARIANT_ESTIMATE_MS + return FANOUT_PER_VARIANT_ESTIMATE_MS +} diff --git a/src/mcp/tools/delegate-feedback.ts b/src/mcp/tools/delegate-feedback.ts new file mode 100644 index 0000000..11d9a6e --- /dev/null +++ b/src/mcp/tools/delegate-feedback.ts @@ -0,0 +1,187 @@ +/** + * @experimental + * + * `delegate_feedback` MCP tool — synchronous record of agent / user / + * downstream-judge feedback on a delegation, artifact, or outcome. + * + * The store is append-only. Every rating is its own event; no dedupe. + * When `refersTo.kind === 'delegation'`, the snapshot is also attached + * to the matching queue record so `delegation_history` surfaces it + * inline without a join. + */ + +import type { FeedbackStore } from '../feedback-store' +import { eventToSnapshot } from '../feedback-store' +import type { DelegationTaskQueue } from '../task-queue' +import type { + DelegateFeedbackArgs, + DelegateFeedbackResult, + FeedbackRating, + FeedbackRefersTo, +} from '../types' + +/** @experimental */ +export const DELEGATE_FEEDBACK_TOOL_NAME = 'delegate_feedback' + +/** @experimental */ +export const DELEGATE_FEEDBACK_DESCRIPTION = [ + 'Record feedback on a delegation, artifact, or outcome. Synchronous — the', + 'event is durably stored when this call returns.', + '', + 'Use when: you (the agent), the user, or a downstream judge has formed an', + 'opinion about a piece of work and want it persisted for calibration,', + 'pricing, or future routing. Every call is a new event — multiple ratings', + 'on the same target are expected and never deduped.', + '', + '`refersTo.kind`:', + ' - "delegation": ref is a taskId returned by delegate_code/delegate_research', + ' - "artifact": ref is a URI/path/git-sha — anything you can dereference', + ' - "outcome": ref is a free-form description of a downstream result', + '', + '`by`:', + ' - "agent": the agent itself rated the work', + ' - "user": the human user rated it', + ' - "downstream-judge": an automated evaluator emitted the rating', + '', + 'When ref names a known taskId, the rating is also attached to the', + 'delegation record so delegation_history surfaces it inline.', +].join('\n') + +/** @experimental */ +export const DELEGATE_FEEDBACK_INPUT_SCHEMA = { + type: 'object', + properties: { + refersTo: { + type: 'object', + properties: { + kind: { type: 'string', enum: ['delegation', 'artifact', 'outcome'] }, + ref: { type: 'string' }, + }, + required: ['kind', 'ref'], + additionalProperties: false, + }, + rating: { + type: 'object', + properties: { + score: { type: 'number', minimum: 0, maximum: 1 }, + label: { type: 'string', enum: ['good', 'bad', 'neutral', 'mixed'] }, + notes: { type: 'string' }, + }, + required: ['score', 'notes'], + additionalProperties: false, + }, + by: { type: 'string', enum: ['agent', 'user', 'downstream-judge'] }, + capturedAt: { type: 'string' }, + namespace: { type: 'string' }, + }, + required: ['refersTo', 'rating', 'by'], + additionalProperties: false, +} as const + +/** @experimental */ +export function validateDelegateFeedbackArgs(raw: unknown): DelegateFeedbackArgs { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegate_feedback: arguments must be an object') + } + const value = raw as Record + const refersTo = validateRefersTo(value.refersTo) + const rating = validateRating(value.rating) + const by = value.by + if (by !== 'agent' && by !== 'user' && by !== 'downstream-judge') { + throw new TypeError( + 'delegate_feedback: `by` must be one of "agent" | "user" | "downstream-judge"', + ) + } + const args: DelegateFeedbackArgs = { refersTo, rating, by } + if (value.capturedAt !== undefined) { + if (typeof value.capturedAt !== 'string' || Number.isNaN(Date.parse(value.capturedAt))) { + throw new TypeError('delegate_feedback: `capturedAt` must be an ISO datetime') + } + args.capturedAt = value.capturedAt + } + if (typeof value.namespace === 'string') args.namespace = value.namespace + return args +} + +function validateRefersTo(raw: unknown): FeedbackRefersTo { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegate_feedback: `refersTo` must be an object') + } + const value = raw as Record + const kind = value.kind + if (kind !== 'delegation' && kind !== 'artifact' && kind !== 'outcome') { + throw new TypeError( + 'delegate_feedback: `refersTo.kind` must be one of "delegation" | "artifact" | "outcome"', + ) + } + const ref = value.ref + if (typeof ref !== 'string' || ref.trim().length === 0) { + throw new TypeError('delegate_feedback: `refersTo.ref` must be a non-empty string') + } + return { kind, ref: ref.trim() } +} + +function validateRating(raw: unknown): FeedbackRating { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegate_feedback: `rating` must be an object') + } + const value = raw as Record + const score = Number(value.score) + if (!Number.isFinite(score) || score < 0 || score > 1) { + throw new RangeError('delegate_feedback: `rating.score` must be a number in [0, 1]') + } + const notes = value.notes + if (typeof notes !== 'string') { + throw new TypeError('delegate_feedback: `rating.notes` must be a string') + } + const rating: FeedbackRating = { score, notes } + const label = value.label + if (label !== undefined) { + if (label !== 'good' && label !== 'bad' && label !== 'neutral' && label !== 'mixed') { + throw new TypeError( + 'delegate_feedback: `rating.label` must be one of "good" | "bad" | "neutral" | "mixed"', + ) + } + rating.label = label + } + return rating +} + +/** @experimental */ +export interface DelegateFeedbackHandlerOptions { + queue: DelegationTaskQueue + store: FeedbackStore + generateId?: () => string + now?: () => string +} + +/** @experimental */ +export function createDelegateFeedbackHandler( + options: DelegateFeedbackHandlerOptions, +): (raw: unknown) => Promise { + const generateId = options.generateId ?? randomFeedbackId + const now = options.now ?? (() => new Date().toISOString()) + return async (raw) => { + const args = validateDelegateFeedbackArgs(raw) + const id = generateId() + const event = { + id, + refersTo: args.refersTo, + rating: args.rating, + by: args.by, + capturedAt: args.capturedAt ?? now(), + namespace: args.namespace, + } + await options.store.put(event) + if (args.refersTo.kind === 'delegation') { + options.queue.attachFeedback(args.refersTo.ref, eventToSnapshot(event)) + } + return { recorded: true, id } + } +} + +function randomFeedbackId(): string { + const t = Date.now().toString(36) + const r = Math.random().toString(36).slice(2, 10) + return `fbk-${t}-${r}` +} diff --git a/src/mcp/tools/delegate-research.ts b/src/mcp/tools/delegate-research.ts new file mode 100644 index 0000000..ba7d3b4 --- /dev/null +++ b/src/mcp/tools/delegate-research.ts @@ -0,0 +1,219 @@ +/** + * @experimental + * + * `delegate_research` MCP tool — async kickoff for source-grounded + * research tasks. Same async semantics as `delegate_code`: returns a + * taskId immediately, idempotent on canonical inputs. + * + * The handler does not import a researcher profile directly — consumers + * inject a `ResearcherDelegate` via `createMcpServer({ researcherDelegate })`. + * The substrate cannot depend on `@tangle-network/agent-knowledge` + * without inducing a dependency cycle. + */ + +import type { ResearcherDelegate } from '../delegates' +import { + type DelegateResearchArgs, + type DelegateResearchResult, + type DelegationTaskQueue, + hashIdempotencyInput, +} from '../task-queue' +import type { ResearchSource } from '../types' + +/** @experimental */ +export const DELEGATE_RESEARCH_TOOL_NAME = 'delegate_research' + +/** @experimental */ +export const DELEGATE_RESEARCH_DESCRIPTION = [ + 'Delegate a research question to specialist researcher agents that produce', + 'source-grounded, evidence-bearing knowledge items.', + '', + 'Use when: you need to answer a factual question with external evidence —', + 'audience research, competitive intelligence, recency-bound web searches,', + 'corpus / docs lookups. The researcher emits items[] with provenance, a', + 'citations[] index, and proposedWrites[] you decide whether to persist.', + '', + 'Returns immediately with a taskId. Poll delegation_status to retrieve the', + 'items + verdict. Identical inputs return the same taskId — safe to retry.', + '', + 'When variants > 1, multiple researcher harnesses run in parallel and the', + 'highest-scoring valid output wins (citation density × source diversity ×', + 'recency match × gap coverage). Use variants when answers might disagree.', + '', + 'Multi-tenant isolation: every item carries `namespace`. The validator', + 'hard-fails when any item is scoped outside `namespace`. Never pass another', + "tenant's namespace.", +].join('\n') + +const VALID_SOURCES: readonly ResearchSource[] = ['web', 'corpus', 'twitter', 'github', 'docs'] + +/** @experimental */ +export const DELEGATE_RESEARCH_INPUT_SCHEMA = { + type: 'object', + properties: { + question: { + type: 'string', + description: 'The research question to answer.', + }, + namespace: { + type: 'string', + description: 'Multi-tenant scope (customer-id, workspace-id). REQUIRED.', + }, + scope: { type: 'string', description: 'Bound, e.g. "audience for cpg-founder ICP".' }, + sources: { + type: 'array', + items: { type: 'string', enum: [...VALID_SOURCES] }, + }, + variants: { type: 'integer', minimum: 1, maximum: 8 }, + config: { + type: 'object', + properties: { + recencyWindow: { + type: 'object', + properties: { + since: { type: 'string', description: 'ISO datetime' }, + until: { type: 'string', description: 'ISO datetime' }, + }, + additionalProperties: false, + }, + maxItems: { type: 'integer', minimum: 1 }, + minConfidence: { type: 'number', minimum: 0, maximum: 1 }, + }, + additionalProperties: false, + }, + }, + required: ['question', 'namespace'], + additionalProperties: false, +} as const + +const SINGLE_VARIANT_ESTIMATE_MS = 4 * 60 * 1000 +const FANOUT_PER_VARIANT_ESTIMATE_MS = 6 * 60 * 1000 + +/** @experimental */ +export function validateDelegateResearchArgs(raw: unknown): DelegateResearchArgs { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegate_research: arguments must be an object') + } + const value = raw as Record + const question = value.question + if (typeof question !== 'string' || question.trim().length === 0) { + throw new TypeError('delegate_research: `question` must be a non-empty string') + } + const namespace = value.namespace + if (typeof namespace !== 'string' || namespace.trim().length === 0) { + throw new TypeError('delegate_research: `namespace` is required') + } + const args: DelegateResearchArgs = { question: question.trim(), namespace: namespace.trim() } + if (typeof value.scope === 'string') args.scope = value.scope + if (value.sources !== undefined) { + if (!Array.isArray(value.sources)) { + throw new TypeError('delegate_research: `sources` must be a string array') + } + const sources: ResearchSource[] = value.sources.map((src, i) => { + if (typeof src !== 'string' || !VALID_SOURCES.includes(src as ResearchSource)) { + throw new TypeError( + `delegate_research: sources[${i}] must be one of ${VALID_SOURCES.join('|')}`, + ) + } + return src as ResearchSource + }) + args.sources = sources + } + if (value.variants !== undefined) { + const variants = Number(value.variants) + if (!Number.isFinite(variants) || variants < 1 || variants > 8) { + throw new RangeError('delegate_research: `variants` must be an integer in [1, 8]') + } + args.variants = Math.trunc(variants) + } + if (value.config !== undefined) { + args.config = validateConfig(value.config) + } + return args +} + +function validateConfig(raw: unknown): DelegateResearchArgs['config'] { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegate_research: `config` must be an object') + } + const value = raw as Record + const out: NonNullable = {} + if (value.recencyWindow !== undefined) { + if (value.recencyWindow === null || typeof value.recencyWindow !== 'object') { + throw new TypeError('delegate_research: `config.recencyWindow` must be an object') + } + const window = value.recencyWindow as Record + const windowOut: NonNullable['recencyWindow']> = {} + if (window.since !== undefined) { + if (typeof window.since !== 'string' || Number.isNaN(Date.parse(window.since))) { + throw new TypeError('delegate_research: `recencyWindow.since` must be an ISO datetime') + } + windowOut.since = window.since + } + if (window.until !== undefined) { + if (typeof window.until !== 'string' || Number.isNaN(Date.parse(window.until))) { + throw new TypeError('delegate_research: `recencyWindow.until` must be an ISO datetime') + } + windowOut.until = window.until + } + out.recencyWindow = windowOut + } + if (value.maxItems !== undefined) { + const n = Number(value.maxItems) + if (!Number.isFinite(n) || n < 1) { + throw new RangeError('delegate_research: `config.maxItems` must be a positive integer') + } + out.maxItems = Math.trunc(n) + } + if (value.minConfidence !== undefined) { + const n = Number(value.minConfidence) + if (!Number.isFinite(n) || n < 0 || n > 1) { + throw new RangeError('delegate_research: `config.minConfidence` must be in [0, 1]') + } + out.minConfidence = n + } + return out +} + +/** @experimental */ +export interface DelegateResearchHandlerOptions { + queue: DelegationTaskQueue + delegate: ResearcherDelegate + estimateDurationMs?: (args: DelegateResearchArgs) => number +} + +/** @experimental */ +export function createDelegateResearchHandler( + options: DelegateResearchHandlerOptions, +): (raw: unknown) => Promise { + const estimateDurationMs = options.estimateDurationMs ?? defaultEstimate + return async (raw) => { + const args = validateDelegateResearchArgs(raw) + const idempotencyKey = hashIdempotencyInput({ + profile: 'researcher', + question: args.question, + namespace: args.namespace, + scope: args.scope, + sources: args.sources, + variants: args.variants ?? 1, + config: args.config, + }) + const submitted = options.queue.submit({ + profile: 'researcher', + args, + namespace: args.namespace, + idempotencyKey, + run: async (ctx) => options.delegate(args, ctx), + }) + return { + taskId: submitted.taskId, + estimatedDurationMs: estimateDurationMs(args), + } + } +} + +function defaultEstimate(args: DelegateResearchArgs): number { + const variants = Math.max(1, args.variants ?? 1) + if (variants === 1) return SINGLE_VARIANT_ESTIMATE_MS + return FANOUT_PER_VARIANT_ESTIMATE_MS +} diff --git a/src/mcp/tools/delegation-history.ts b/src/mcp/tools/delegation-history.ts new file mode 100644 index 0000000..dae54fa --- /dev/null +++ b/src/mcp/tools/delegation-history.ts @@ -0,0 +1,98 @@ +/** + * @experimental + * + * `delegation_history` MCP tool — synchronous read of past delegations. + * The agent uses this for self-introspection — "have I delegated this + * kind of task before? did it work?" — and calibration. + */ + +import type { + DelegationHistoryArgs, + DelegationHistoryResult, + DelegationProfile, + DelegationTaskQueue, +} from '../task-queue' + +/** @experimental */ +export const DELEGATION_HISTORY_TOOL_NAME = 'delegation_history' + +/** @experimental */ +export const DELEGATION_HISTORY_DESCRIPTION = [ + 'Read past delegations newest-first. Each entry carries the original', + 'arguments, current status, cost, and any feedback attached via', + 'delegate_feedback.', + '', + 'Use when: you want to introspect prior decisions — "have I asked this', + 'question before?', + 'did the last patch land?', + "what's the historical", + 'success rate of coder delegations on this repo?". Feed the results back', + 'into your own routing and calibration.', + '', + 'Filters: `namespace` (multi-tenant scope), `profile` ("coder" | "researcher"),', + '`since` (ISO date — only delegations started at-or-after). `limit` defaults', + 'to 50, capped at 500.', +].join('\n') + +/** @experimental */ +export const DELEGATION_HISTORY_INPUT_SCHEMA = { + type: 'object', + properties: { + namespace: { type: 'string' }, + profile: { type: 'string', enum: ['coder', 'researcher'] }, + since: { type: 'string', description: 'ISO datetime — earliest startedAt to include.' }, + limit: { type: 'integer', minimum: 1, maximum: 500 }, + }, + additionalProperties: false, +} as const + +/** @experimental */ +export function validateDelegationHistoryArgs(raw: unknown): DelegationHistoryArgs { + if (raw === undefined || raw === null) return {} + if (typeof raw !== 'object') { + throw new TypeError('delegation_history: arguments must be an object') + } + const value = raw as Record + const out: DelegationHistoryArgs = {} + if (value.namespace !== undefined) { + if (typeof value.namespace !== 'string') { + throw new TypeError('delegation_history: `namespace` must be a string') + } + out.namespace = value.namespace + } + if (value.profile !== undefined) { + if (value.profile !== 'coder' && value.profile !== 'researcher') { + throw new TypeError('delegation_history: `profile` must be "coder" or "researcher"') + } + out.profile = value.profile as DelegationProfile + } + if (value.since !== undefined) { + if (typeof value.since !== 'string' || Number.isNaN(Date.parse(value.since))) { + throw new TypeError('delegation_history: `since` must be an ISO datetime') + } + out.since = value.since + } + if (value.limit !== undefined) { + const n = Number(value.limit) + if (!Number.isFinite(n) || n < 1 || n > 500) { + throw new RangeError('delegation_history: `limit` must be an integer in [1, 500]') + } + out.limit = Math.trunc(n) + } + return out +} + +/** @experimental */ +export interface DelegationHistoryHandlerOptions { + queue: DelegationTaskQueue +} + +/** @experimental */ +export function createDelegationHistoryHandler( + options: DelegationHistoryHandlerOptions, +): (raw: unknown) => Promise { + return async (raw) => { + const args = validateDelegationHistoryArgs(raw) + return { delegations: options.queue.history(args) } + } +} diff --git a/src/mcp/tools/delegation-status.ts b/src/mcp/tools/delegation-status.ts new file mode 100644 index 0000000..d2e14a5 --- /dev/null +++ b/src/mcp/tools/delegation-status.ts @@ -0,0 +1,76 @@ +/** + * @experimental + * + * `delegation_status` MCP tool — synchronous poll. Returns the current + * state machine + optional progress + final result (when terminal). + */ + +import { NotFoundError } from '../../errors' +import type { + DelegationStatusArgs, + DelegationStatusResult, + DelegationTaskQueue, +} from '../task-queue' + +/** @experimental */ +export const DELEGATION_STATUS_TOOL_NAME = 'delegation_status' + +/** @experimental */ +export const DELEGATION_STATUS_DESCRIPTION = [ + 'Poll the status of an async delegation. Returns the current state', + '(pending | running | completed | failed | cancelled), optional progress,', + 'and the final result when status === "completed".', + '', + 'Use when: you previously called delegate_code or delegate_research and', + "need to know whether the work is done. The agent's right rhythm is to", + 'call this every minute or two while waiting; do not busy-poll.', + '', + 'For a completed coder task, `result.output` is a CoderOutput with branch,', + 'patch, test/typecheck results, and diff stats. For a completed research', + 'task, `result.output` is the items + citations + proposedWrites bundle.', + '', + 'Throws NotFoundError when taskId is unknown — never silently returns', + '`pending` for a typo.', +].join('\n') + +/** @experimental */ +export const DELEGATION_STATUS_INPUT_SCHEMA = { + type: 'object', + properties: { + taskId: { type: 'string', description: 'Returned by delegate_code / delegate_research.' }, + }, + required: ['taskId'], + additionalProperties: false, +} as const + +/** @experimental */ +export function validateDelegationStatusArgs(raw: unknown): DelegationStatusArgs { + if (raw === null || typeof raw !== 'object') { + throw new TypeError('delegation_status: arguments must be an object') + } + const value = raw as Record + const taskId = value.taskId + if (typeof taskId !== 'string' || taskId.trim().length === 0) { + throw new TypeError('delegation_status: `taskId` must be a non-empty string') + } + return { taskId: taskId.trim() } +} + +/** @experimental */ +export interface DelegationStatusHandlerOptions { + queue: DelegationTaskQueue +} + +/** @experimental */ +export function createDelegationStatusHandler( + options: DelegationStatusHandlerOptions, +): (raw: unknown) => Promise { + return async (raw) => { + const args = validateDelegationStatusArgs(raw) + const status = options.queue.status(args.taskId) + if (!status) { + throw new NotFoundError(`delegation_status: unknown taskId "${args.taskId}"`) + } + return status + } +} diff --git a/src/mcp/types.ts b/src/mcp/types.ts new file mode 100644 index 0000000..29be887 --- /dev/null +++ b/src/mcp/types.ts @@ -0,0 +1,223 @@ +/** + * @experimental + * + * MCP delegation tool surface — the typed inputs/outputs the product agent + * sees over the wire. These types are the contract; the JSON schemas under + * `tools/*` mirror them for the MCP `tools/list` advertisement. + * + * Async semantics: `delegate_code` + `delegate_research` return a `taskId` + * immediately. The product agent polls `delegation_status` until the task + * transitions to `completed` | `failed` | `cancelled`. `delegate_feedback` + * + `delegation_history` are synchronous reads / writes against the local + * task queue + feedback store. + */ + +import type { CoderOutput, CoderTask } from '../profiles/coder' + +/** @experimental */ +export type DelegationProfile = 'coder' | 'researcher' + +/** @experimental */ +export type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled' + +/** + * Minimal `CoderTask` overrides exposed over the MCP wire. The full + * `CoderTask` carries fields the kernel synthesizes from `goal` + + * `repoRoot` — the agent only edits the few that materially gate + * validator behavior. + * + * @experimental + */ +export interface DelegateCodeConfig { + testCmd?: string + typecheckCmd?: string + forbiddenPaths?: string[] + maxDiffLines?: number +} + +/** @experimental */ +export interface DelegateCodeArgs { + /** Natural-language description of what the coder must accomplish. */ + goal: string + /** Absolute path inside the sandbox where the repo lives. */ + repoRoot: string + /** Optional free-form context the agent surfaces in the prompt prelude. */ + contextHint?: string + /** + * When > 1, dispatches `multiHarnessCoderFanout` across N harnesses + * (claude-code, codex, opencode-glm) and picks the highest-scoring + * passing patch. Default 1. + */ + variants?: number + /** Validator + prompt overrides the agent knows for this repo. */ + config?: DelegateCodeConfig + /** Multi-tenant scope (customer-id, workspace-id). */ + namespace?: string +} + +/** @experimental */ +export interface DelegateCodeResult { + taskId: string + /** Best-effort hint — coder loops can take minutes-to-hours. */ + estimatedDurationMs?: number +} + +/** @experimental */ +export type ResearchSource = 'web' | 'corpus' | 'twitter' | 'github' | 'docs' + +/** @experimental */ +export interface DelegateResearchConfig { + recencyWindow?: { since?: string; until?: string } + maxItems?: number + minConfidence?: number +} + +/** @experimental */ +export interface DelegateResearchArgs { + question: string + namespace: string + scope?: string + sources?: ResearchSource[] + variants?: number + config?: DelegateResearchConfig +} + +/** @experimental */ +export interface DelegateResearchResult { + taskId: string + estimatedDurationMs?: number +} + +/** @experimental */ +export interface FeedbackRefersTo { + kind: 'delegation' | 'artifact' | 'outcome' + /** For `'delegation'`, this is the taskId. */ + ref: string +} + +/** @experimental */ +export interface FeedbackRating { + /** [0, 1]. */ + score: number + label?: 'good' | 'bad' | 'neutral' | 'mixed' + notes: string +} + +/** @experimental */ +export interface DelegateFeedbackArgs { + refersTo: FeedbackRefersTo + rating: FeedbackRating + by: 'agent' | 'user' | 'downstream-judge' + /** ISO timestamp; defaults to server clock when omitted. */ + capturedAt?: string + namespace?: string +} + +/** @experimental */ +export interface DelegateFeedbackResult { + recorded: true + id: string +} + +/** @experimental */ +export interface DelegationStatusArgs { + taskId: string +} + +/** @experimental */ +export interface DelegationProgress { + iteration: number + phase: string +} + +/** @experimental */ +export interface DelegationError { + message: string + kind: string +} + +/** + * Polymorphic `result` field: `CoderOutput` when the underlying profile + * is `'coder'`, a structurally-typed research output when `'researcher'`. + * The MCP wire carries it as JSON either way. + * + * @experimental + */ +export type DelegationResultPayload = + | { profile: 'coder'; output: CoderOutput } + | { profile: 'researcher'; output: ResearchOutputShape } + +/** + * Loose shape of a research output over the wire — the substrate cannot + * import the `ResearchOutput` type from agent-knowledge without inducing + * a dependency cycle, so the MCP layer treats it structurally. + * + * @experimental + */ +export interface ResearchOutputShape { + items: unknown[] + citations: unknown[] + proposedWrites: unknown[] + gaps?: string[] + notes?: string + [key: string]: unknown +} + +/** @experimental */ +export interface DelegationStatusResult { + taskId: string + profile: DelegationProfile + status: DelegationStatus + progress?: DelegationProgress + result?: DelegationResultPayload + error?: DelegationError + costUsd?: number + startedAt: string + completedAt?: string +} + +/** @experimental */ +export interface DelegationHistoryArgs { + namespace?: string + profile?: DelegationProfile + /** ISO date — only delegations started at-or-after `since` are returned. */ + since?: string + /** Default 50. Hard cap 500. */ + limit?: number +} + +/** @experimental */ +export interface DelegationFeedbackSnapshot { + id: string + score: number + label?: FeedbackRating['label'] + by: DelegateFeedbackArgs['by'] + notes: string + capturedAt: string +} + +/** @experimental */ +export interface DelegationHistoryEntry { + taskId: string + profile: DelegationProfile + namespace?: string + args: DelegateCodeArgs | DelegateResearchArgs + status: DelegationStatus + feedback?: DelegationFeedbackSnapshot[] + costUsd?: number + startedAt: string + completedAt?: string +} + +/** @experimental */ +export interface DelegationHistoryResult { + delegations: DelegationHistoryEntry[] +} + +/** + * Re-export `CoderTask` so callers wiring custom delegates don't have to + * import from `./profiles` themselves. + * + * @experimental + */ +export type { CoderOutput, CoderTask } diff --git a/tests/mcp/delegate-code.test.ts b/tests/mcp/delegate-code.test.ts new file mode 100644 index 0000000..ddd0b85 --- /dev/null +++ b/tests/mcp/delegate-code.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it } from 'vitest' +import type { CoderDelegate } from '../../src/mcp/delegates' +import { DelegationTaskQueue } from '../../src/mcp/task-queue' +import { + createDelegateCodeHandler, + DELEGATE_CODE_DESCRIPTION, + DELEGATE_CODE_INPUT_SCHEMA, + DELEGATE_CODE_TOOL_NAME, + validateDelegateCodeArgs, +} from '../../src/mcp/tools/delegate-code' + +const stubOutput = { + branch: 'feat/x', + patch: '', + testResult: { passed: true, output: '' }, + typecheckResult: { passed: true, output: '' }, + diffStats: { filesChanged: 0, insertions: 0, deletions: 0 }, +} + +const stubDelegate: CoderDelegate = async () => stubOutput + +describe('validateDelegateCodeArgs', () => { + it('accepts the minimal required surface', () => { + const args = validateDelegateCodeArgs({ goal: 'fix', repoRoot: '/r' }) + expect(args).toEqual({ goal: 'fix', repoRoot: '/r' }) + }) + + it('rejects an empty goal', () => { + expect(() => validateDelegateCodeArgs({ goal: ' ', repoRoot: '/r' })).toThrow(TypeError) + }) + + it('rejects variants outside [1, 8]', () => { + expect(() => validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 0 })).toThrow( + RangeError, + ) + expect(() => validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 9 })).toThrow( + RangeError, + ) + }) + + it('rejects non-string forbiddenPaths entries', () => { + expect(() => + validateDelegateCodeArgs({ + goal: 'g', + repoRoot: '/r', + config: { forbiddenPaths: ['ok', 1] }, + }), + ).toThrow(TypeError) + }) + + it('coerces variants to a positive integer', () => { + const args = validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 3.7 }) + expect(args.variants).toBe(3) + }) +}) + +describe('createDelegateCodeHandler', () => { + it('returns a taskId and estimated duration on a happy-path call', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) + const result = await handler({ goal: 'fix', repoRoot: '/r' }) + expect(result.taskId).toMatch(/^dlg-/) + expect(result.estimatedDurationMs).toBeGreaterThan(0) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + expect(queue.status(result.taskId)?.status).toBe('completed') + }) + + it('is idempotent: duplicate identical input returns the same taskId', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) + const first = await handler({ goal: 'fix', repoRoot: '/r', variants: 2 }) + const second = await handler({ goal: 'fix', repoRoot: '/r', variants: 2 }) + expect(second.taskId).toBe(first.taskId) + }) + + it('returns a different taskId when contextHint differs', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) + const first = await handler({ goal: 'fix', repoRoot: '/r' }) + const second = await handler({ goal: 'fix', repoRoot: '/r', contextHint: 'see issue #42' }) + expect(second.taskId).not.toBe(first.taskId) + }) + + it('propagates validation errors out of the handler', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) + await expect(handler({ goal: '', repoRoot: '/r' })).rejects.toThrow(/non-empty string/) + }) + + it('surfaces delegate exceptions via the queue status', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateCodeHandler({ + queue, + delegate: async () => { + throw new TypeError('bad sandbox') + }, + }) + const { taskId } = await handler({ goal: 'fix', repoRoot: '/r' }) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + const status = queue.status(taskId) + expect(status?.status).toBe('failed') + expect(status?.error?.message).toBe('bad sandbox') + expect(status?.error?.kind).toBe('TypeError') + }) +}) + +describe('tool descriptors', () => { + it('exposes a non-empty description that explains when to call', () => { + expect(DELEGATE_CODE_TOOL_NAME).toBe('delegate_code') + expect(DELEGATE_CODE_DESCRIPTION).toMatch(/Use when:/) + }) + it('declares goal and repoRoot as required', () => { + expect((DELEGATE_CODE_INPUT_SCHEMA as { required: string[] }).required).toEqual([ + 'goal', + 'repoRoot', + ]) + }) +}) diff --git a/tests/mcp/delegate-feedback.test.ts b/tests/mcp/delegate-feedback.test.ts new file mode 100644 index 0000000..953bde2 --- /dev/null +++ b/tests/mcp/delegate-feedback.test.ts @@ -0,0 +1,135 @@ +import { describe, expect, it } from 'vitest' +import { InMemoryFeedbackStore } from '../../src/mcp/feedback-store' +import { DelegationTaskQueue } from '../../src/mcp/task-queue' +import { + createDelegateFeedbackHandler, + DELEGATE_FEEDBACK_DESCRIPTION, + validateDelegateFeedbackArgs, +} from '../../src/mcp/tools/delegate-feedback' + +describe('validateDelegateFeedbackArgs', () => { + it('requires refersTo, rating, and by', () => { + expect(() => validateDelegateFeedbackArgs({})).toThrow() + expect(() => + validateDelegateFeedbackArgs({ + refersTo: { kind: 'delegation', ref: 'dlg-1' }, + rating: { score: 1, notes: 'good' }, + }), + ).toThrow(/`by`/) + }) + + it('clamps score validity to [0, 1]', () => { + expect(() => + validateDelegateFeedbackArgs({ + refersTo: { kind: 'delegation', ref: 'r' }, + rating: { score: 1.1, notes: 'oops' }, + by: 'agent', + }), + ).toThrow(RangeError) + }) + + it('rejects unknown labels', () => { + expect(() => + validateDelegateFeedbackArgs({ + refersTo: { kind: 'delegation', ref: 'r' }, + rating: { score: 0.5, notes: 'n', label: 'maybe' }, + by: 'agent', + }), + ).toThrow(TypeError) + }) + + it('accepts the three refersTo.kind values', () => { + for (const kind of ['delegation', 'artifact', 'outcome'] as const) { + const out = validateDelegateFeedbackArgs({ + refersTo: { kind, ref: 'r' }, + rating: { score: 0.5, notes: 'n' }, + by: 'agent', + }) + expect(out.refersTo.kind).toBe(kind) + } + }) +}) + +describe('createDelegateFeedbackHandler', () => { + it('persists every event without deduping (append-only)', async () => { + const queue = new DelegationTaskQueue() + const store = new InMemoryFeedbackStore() + const handler = createDelegateFeedbackHandler({ queue, store }) + const args = { + refersTo: { kind: 'artifact' as const, ref: 'file://x' }, + rating: { score: 0.8, notes: 'looks ok' }, + by: 'agent' as const, + } + const a = await handler(args) + const b = await handler(args) + expect(a.id).not.toBe(b.id) + expect((await store.list()).length).toBe(2) + }) + + it('isolates events by namespace', async () => { + const queue = new DelegationTaskQueue() + const store = new InMemoryFeedbackStore() + const handler = createDelegateFeedbackHandler({ queue, store }) + await handler({ + refersTo: { kind: 'outcome', ref: 'x' }, + rating: { score: 0.5, notes: 'n' }, + by: 'user', + namespace: 'a', + }) + await handler({ + refersTo: { kind: 'outcome', ref: 'x' }, + rating: { score: 0.5, notes: 'n' }, + by: 'user', + namespace: 'b', + }) + expect((await store.list({ namespace: 'a' })).length).toBe(1) + expect((await store.list({ namespace: 'b' })).length).toBe(1) + expect((await store.list({ namespace: 'c' })).length).toBe(0) + }) + + it('attaches the feedback snapshot to the queue record when refersTo.kind === "delegation"', async () => { + const queue = new DelegationTaskQueue() + const store = new InMemoryFeedbackStore() + const submitted = queue.submit({ + profile: 'coder', + args: { goal: 'g', repoRoot: '/r' }, + run: async () => ({}) as never, + }) + await new Promise((r) => setImmediate(r)) + const handler = createDelegateFeedbackHandler({ queue, store }) + const { id } = await handler({ + refersTo: { kind: 'delegation', ref: submitted.taskId }, + rating: { score: 0.9, notes: 'great patch', label: 'good' }, + by: 'user', + }) + const [entry] = queue.history() + expect(entry?.feedback?.length).toBe(1) + expect(entry?.feedback?.[0]?.id).toBe(id) + expect(entry?.feedback?.[0]?.label).toBe('good') + }) + + it('does not attach when refersTo.kind is artifact/outcome', async () => { + const queue = new DelegationTaskQueue() + const store = new InMemoryFeedbackStore() + const submitted = queue.submit({ + profile: 'coder', + args: { goal: 'g', repoRoot: '/r' }, + run: async () => ({}) as never, + }) + await new Promise((r) => setImmediate(r)) + const handler = createDelegateFeedbackHandler({ queue, store }) + await handler({ + refersTo: { kind: 'artifact', ref: submitted.taskId }, + rating: { score: 0.1, notes: 'mistake — wrong kind' }, + by: 'agent', + }) + const [entry] = queue.history() + expect(entry?.feedback).toBeUndefined() + }) + + it('description explains kind variants', () => { + expect(DELEGATE_FEEDBACK_DESCRIPTION).toMatch(/delegation/) + expect(DELEGATE_FEEDBACK_DESCRIPTION).toMatch(/artifact/) + expect(DELEGATE_FEEDBACK_DESCRIPTION).toMatch(/outcome/) + }) +}) diff --git a/tests/mcp/delegate-research.test.ts b/tests/mcp/delegate-research.test.ts new file mode 100644 index 0000000..56d5931 --- /dev/null +++ b/tests/mcp/delegate-research.test.ts @@ -0,0 +1,84 @@ +import { describe, expect, it } from 'vitest' +import type { ResearcherDelegate } from '../../src/mcp/delegates' +import { DelegationTaskQueue } from '../../src/mcp/task-queue' +import { + createDelegateResearchHandler, + DELEGATE_RESEARCH_DESCRIPTION, + DELEGATE_RESEARCH_INPUT_SCHEMA, + DELEGATE_RESEARCH_TOOL_NAME, + validateDelegateResearchArgs, +} from '../../src/mcp/tools/delegate-research' + +const stubOutput = { items: [], citations: [], proposedWrites: [] } +const stubDelegate: ResearcherDelegate = async () => stubOutput + +describe('validateDelegateResearchArgs', () => { + it('requires question + namespace', () => { + expect(() => validateDelegateResearchArgs({ namespace: 'x' })).toThrow(/question/) + expect(() => validateDelegateResearchArgs({ question: 'q?' })).toThrow(/namespace/) + }) + + it('rejects unknown source types', () => { + expect(() => + validateDelegateResearchArgs({ question: 'q?', namespace: 'x', sources: ['rss'] }), + ).toThrow(TypeError) + }) + + it('validates recencyWindow datetimes', () => { + expect(() => + validateDelegateResearchArgs({ + question: 'q?', + namespace: 'x', + config: { recencyWindow: { since: 'not-a-date' } }, + }), + ).toThrow(TypeError) + const ok = validateDelegateResearchArgs({ + question: 'q?', + namespace: 'x', + config: { recencyWindow: { since: '2026-01-01T00:00:00Z' } }, + }) + expect(ok.config?.recencyWindow?.since).toBe('2026-01-01T00:00:00Z') + }) + + it('rejects minConfidence outside [0, 1]', () => { + expect(() => + validateDelegateResearchArgs({ + question: 'q?', + namespace: 'x', + config: { minConfidence: 1.5 }, + }), + ).toThrow(RangeError) + }) +}) + +describe('createDelegateResearchHandler', () => { + it('returns a taskId with the researcher profile and isolates by namespace', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateResearchHandler({ queue, delegate: stubDelegate }) + const { taskId } = await handler({ question: 'q?', namespace: 'tenant-a' }) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + const status = queue.status(taskId) + expect(status?.profile).toBe('researcher') + expect(status?.status).toBe('completed') + expect(queue.history({ namespace: 'tenant-a' }).length).toBe(1) + expect(queue.history({ namespace: 'tenant-b' }).length).toBe(0) + }) + + it('is idempotent on identical inputs', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateResearchHandler({ queue, delegate: stubDelegate }) + const a = await handler({ question: 'q?', namespace: 'x' }) + const b = await handler({ question: 'q?', namespace: 'x' }) + expect(b.taskId).toBe(a.taskId) + }) + + it('exposes a description that warns about cross-tenant namespace risk', () => { + expect(DELEGATE_RESEARCH_TOOL_NAME).toBe('delegate_research') + expect(DELEGATE_RESEARCH_DESCRIPTION).toMatch(/namespace/i) + expect((DELEGATE_RESEARCH_INPUT_SCHEMA as { required: string[] }).required).toEqual([ + 'question', + 'namespace', + ]) + }) +}) diff --git a/tests/mcp/delegation-history.test.ts b/tests/mcp/delegation-history.test.ts new file mode 100644 index 0000000..a969269 --- /dev/null +++ b/tests/mcp/delegation-history.test.ts @@ -0,0 +1,128 @@ +import { describe, expect, it } from 'vitest' +import { InMemoryFeedbackStore } from '../../src/mcp/feedback-store' +import { DelegationTaskQueue } from '../../src/mcp/task-queue' +import { createDelegateFeedbackHandler } from '../../src/mcp/tools/delegate-feedback' +import { + createDelegationHistoryHandler, + validateDelegationHistoryArgs, +} from '../../src/mcp/tools/delegation-history' + +function settleTwice(): Promise { + return new Promise((resolve) => { + setImmediate(() => setImmediate(() => resolve())) + }) +} + +describe('validateDelegationHistoryArgs', () => { + it('accepts an empty object', () => { + expect(validateDelegationHistoryArgs({})).toEqual({}) + }) + it('rejects an out-of-range limit', () => { + expect(() => validateDelegationHistoryArgs({ limit: 501 })).toThrow(RangeError) + }) + it('rejects an unknown profile', () => { + expect(() => validateDelegationHistoryArgs({ profile: 'judge' })).toThrow(TypeError) + }) +}) + +describe('createDelegationHistoryHandler', () => { + it('filters by namespace, profile, since', async () => { + let ts = 1_700_000_000_000 + const queue = new DelegationTaskQueue({ + now: () => { + ts += 1000 + return new Date(ts).toISOString() + }, + }) + queue.submit({ + profile: 'coder', + args: { goal: 'g', repoRoot: '/r' }, + namespace: 'a', + run: async () => ({}) as never, + }) + queue.submit({ + profile: 'researcher', + args: { question: 'q', namespace: 'a' }, + namespace: 'a', + run: async () => ({}) as never, + }) + queue.submit({ + profile: 'coder', + args: { goal: 'g2', repoRoot: '/r' }, + namespace: 'b', + run: async () => ({}) as never, + }) + await settleTwice() + const handler = createDelegationHistoryHandler({ queue }) + const all = await handler({}) + expect(all.delegations.length).toBe(3) + const aOnly = await handler({ namespace: 'a' }) + expect(aOnly.delegations.length).toBe(2) + expect(aOnly.delegations.every((entry) => entry.namespace === 'a')).toBe(true) + const coderOnly = await handler({ profile: 'coder' }) + expect(coderOnly.delegations.every((entry) => entry.profile === 'coder')).toBe(true) + expect(coderOnly.delegations.length).toBe(2) + }) + + it('honors limit', async () => { + const queue = new DelegationTaskQueue() + for (let i = 0; i < 5; i += 1) { + queue.submit({ + profile: 'coder', + args: { goal: `g${i}`, repoRoot: '/r' }, + run: async () => ({}) as never, + }) + } + await settleTwice() + const handler = createDelegationHistoryHandler({ queue }) + const limited = await handler({ limit: 2 }) + expect(limited.delegations.length).toBe(2) + }) + + it('surfaces feedback cross-references inline', async () => { + const queue = new DelegationTaskQueue() + const store = new InMemoryFeedbackStore() + const submitted = queue.submit({ + profile: 'coder', + args: { goal: 'g', repoRoot: '/r' }, + run: async () => ({}) as never, + }) + await settleTwice() + const feedback = createDelegateFeedbackHandler({ queue, store }) + await feedback({ + refersTo: { kind: 'delegation', ref: submitted.taskId }, + rating: { score: 0.7, notes: 'shipped' }, + by: 'user', + }) + const history = await createDelegationHistoryHandler({ queue })({}) + const entry = history.delegations.find((e) => e.taskId === submitted.taskId) + expect(entry?.feedback?.[0]?.notes).toBe('shipped') + }) + + it('filters by `since`', async () => { + let ts = 1_700_000_000_000 + const queue = new DelegationTaskQueue({ + now: () => { + ts += 1000 + return new Date(ts).toISOString() + }, + }) + queue.submit({ + profile: 'coder', + args: { goal: 'g1', repoRoot: '/r' }, + run: async () => ({}) as never, + }) + await settleTwice() + const cutoff = new Date(ts + 500).toISOString() + queue.submit({ + profile: 'coder', + args: { goal: 'g2', repoRoot: '/r' }, + run: async () => ({}) as never, + }) + await settleTwice() + const handler = createDelegationHistoryHandler({ queue }) + const recent = await handler({ since: cutoff }) + expect(recent.delegations.length).toBe(1) + expect((recent.delegations[0]?.args as { goal: string }).goal).toBe('g2') + }) +}) diff --git a/tests/mcp/delegation-status.test.ts b/tests/mcp/delegation-status.test.ts new file mode 100644 index 0000000..3e83ad7 --- /dev/null +++ b/tests/mcp/delegation-status.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it } from 'vitest' +import { NotFoundError } from '../../src/errors' +import { DelegationTaskQueue } from '../../src/mcp/task-queue' +import { + createDelegationStatusHandler, + validateDelegationStatusArgs, +} from '../../src/mcp/tools/delegation-status' + +function deferred(): { + promise: Promise + resolve: (value: T) => void +} { + let resolve!: (value: T) => void + const promise = new Promise((r) => { + resolve = r + }) + return { promise, resolve } +} + +describe('validateDelegationStatusArgs', () => { + it('requires a non-empty taskId', () => { + expect(() => validateDelegationStatusArgs({})).toThrow(TypeError) + expect(() => validateDelegationStatusArgs({ taskId: ' ' })).toThrow(TypeError) + }) +}) + +describe('createDelegationStatusHandler', () => { + it('walks the lifecycle pending → running → completed', async () => { + const queue = new DelegationTaskQueue() + const d = deferred<{ ok: true }>() + const submitted = queue.submit({ + profile: 'coder', + args: { goal: 'g', repoRoot: '/r' }, + run: async () => await d.promise, + }) + const handler = createDelegationStatusHandler({ queue }) + const pending = await handler({ taskId: submitted.taskId }) + expect(pending.status).toBe('pending') + await new Promise((r) => setImmediate(r)) + const running = await handler({ taskId: submitted.taskId }) + expect(running.status).toBe('running') + d.resolve({ ok: true }) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + const completed = await handler({ taskId: submitted.taskId }) + expect(completed.status).toBe('completed') + expect(completed.completedAt).toBeTypeOf('string') + }) + + it('throws NotFoundError for unknown taskIds', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegationStatusHandler({ queue }) + await expect(handler({ taskId: 'dlg-missing' })).rejects.toBeInstanceOf(NotFoundError) + }) + + it('surfaces failed status with the error payload', async () => { + const queue = new DelegationTaskQueue() + const submitted = queue.submit({ + profile: 'coder', + args: { goal: 'g', repoRoot: '/r' }, + run: async () => { + throw new Error('boom') + }, + }) + const handler = createDelegationStatusHandler({ queue }) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + const status = await handler({ taskId: submitted.taskId }) + expect(status.status).toBe('failed') + expect(status.error?.message).toBe('boom') + }) +}) diff --git a/tests/mcp/idempotency.test.ts b/tests/mcp/idempotency.test.ts new file mode 100644 index 0000000..2d2b5e8 --- /dev/null +++ b/tests/mcp/idempotency.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from 'vitest' +import type { CoderDelegate, ResearcherDelegate } from '../../src/mcp/delegates' +import { DelegationTaskQueue } from '../../src/mcp/task-queue' +import { createDelegateCodeHandler } from '../../src/mcp/tools/delegate-code' +import { createDelegateResearchHandler } from '../../src/mcp/tools/delegate-research' + +const coderStub: CoderDelegate = async () => ({ + branch: 'feat/x', + patch: '', + testResult: { passed: true, output: '' }, + typecheckResult: { passed: true, output: '' }, + diffStats: { filesChanged: 0, insertions: 0, deletions: 0 }, +}) + +const researcherStub: ResearcherDelegate = async () => ({ + items: [], + citations: [], + proposedWrites: [], +}) + +describe('MCP idempotency — duplicate calls return the same taskId', () => { + it('coder: same arguments → same taskId; counted once in history', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateCodeHandler({ queue, delegate: coderStub }) + const args = { goal: 'fix nav', repoRoot: '/r', variants: 1, config: { maxDiffLines: 100 } } + const a = await handler(args) + const b = await handler(args) + const c = await handler(args) + expect(b.taskId).toBe(a.taskId) + expect(c.taskId).toBe(a.taskId) + expect(queue.history().length).toBe(1) + }) + + it('coder: mutated config produces a fresh taskId', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateCodeHandler({ queue, delegate: coderStub }) + const a = await handler({ goal: 'fix', repoRoot: '/r', config: { maxDiffLines: 100 } }) + const b = await handler({ goal: 'fix', repoRoot: '/r', config: { maxDiffLines: 200 } }) + expect(b.taskId).not.toBe(a.taskId) + }) + + it('researcher: same arguments → same taskId', async () => { + const queue = new DelegationTaskQueue() + const handler = createDelegateResearchHandler({ queue, delegate: researcherStub }) + const a = await handler({ question: 'who?', namespace: 'n', sources: ['web', 'twitter'] }) + const b = await handler({ question: 'who?', namespace: 'n', sources: ['twitter', 'web'] }) + // The hash canonicalizes by key sort, but `sources` is an array — order matters. + // This ensures order-sensitivity for arrays (the agent should pass a canonical order). + expect(b.taskId).not.toBe(a.taskId) + const c = await handler({ question: 'who?', namespace: 'n', sources: ['web', 'twitter'] }) + expect(c.taskId).toBe(a.taskId) + }) +}) diff --git a/tests/mcp/server-integration.test.ts b/tests/mcp/server-integration.test.ts new file mode 100644 index 0000000..cbe1f45 --- /dev/null +++ b/tests/mcp/server-integration.test.ts @@ -0,0 +1,181 @@ +import { describe, expect, it } from 'vitest' +import type { CoderDelegate, ResearcherDelegate } from '../../src/mcp/delegates' +import { + createInProcessTransport, + createMcpServer, + type JsonRpcResponse, +} from '../../src/mcp/server' + +const coderStub: CoderDelegate = async () => ({ + branch: 'feat/y', + patch: '', + testResult: { passed: true, output: 'ok' }, + typecheckResult: { passed: true, output: 'ok' }, + diffStats: { filesChanged: 1, insertions: 1, deletions: 0 }, +}) + +const researcherStub: ResearcherDelegate = async () => ({ + items: [ + { + id: 'i-1', + namespace: 'tenant-a', + claim: 'cpg-founders use Twitter heavily', + evidence: [{ source: 'twitter', capturedAt: 0 }], + confidence: 0.6, + authoredBy: { kind: 'agent', id: 'r' }, + }, + ], + citations: [{ url: 'https://x.com', quote: 'q', confidence: 0.5 }], + proposedWrites: [], +}) + +async function rpcCall( + server: ReturnType, + method: string, + params: Record = {}, + id: number = 1, +): Promise { + return server.handle({ jsonrpc: '2.0', id, method, params }) +} + +describe('createMcpServer — JSON-RPC surface', () => { + it('responds to initialize + tools/list with the registered tools', async () => { + const server = createMcpServer({ + coderDelegate: coderStub, + researcherDelegate: researcherStub, + }) + const init = await rpcCall(server, 'initialize', {}, 0) + expect(init?.result).toMatchObject({ + protocolVersion: '2024-11-05', + capabilities: { tools: {} }, + serverInfo: { name: 'agent-runtime-mcp' }, + }) + const listed = await rpcCall(server, 'tools/list', {}, 1) + const names = (listed?.result as { tools: { name: string }[] }).tools.map((t) => t.name).sort() + expect(names).toEqual([ + 'delegate_code', + 'delegate_feedback', + 'delegate_research', + 'delegation_history', + 'delegation_status', + ]) + }) + + it('omits delegate_code when coderDelegate is not wired', async () => { + const server = createMcpServer({ researcherDelegate: researcherStub }) + const listed = await rpcCall(server, 'tools/list', {}, 1) + const names = (listed?.result as { tools: { name: string }[] }).tools.map((t) => t.name) + expect(names).not.toContain('delegate_code') + expect(names).toContain('delegate_research') + }) + + it('routes tools/call through the handler and returns structuredContent', async () => { + const server = createMcpServer({ coderDelegate: coderStub }) + const call = await rpcCall(server, 'tools/call', { + name: 'delegate_code', + arguments: { goal: 'fix bug', repoRoot: '/r' }, + }) + const result = call?.result as { + content: { type: string; text: string }[] + structuredContent: { taskId: string } + } + expect(result.content[0]?.type).toBe('text') + const parsed = JSON.parse(result.content[0]!.text) as { taskId: string } + expect(parsed.taskId).toBe(result.structuredContent.taskId) + expect(parsed.taskId).toMatch(/^dlg-/) + }) + + it('returns -32602 on validation failures', async () => { + const server = createMcpServer({ coderDelegate: coderStub }) + const call = await rpcCall(server, 'tools/call', { + name: 'delegate_code', + arguments: { goal: '', repoRoot: '/r' }, + }) + expect(call?.error?.code).toBe(-32602) + }) + + it('returns -32601 for unknown tools', async () => { + const server = createMcpServer({ coderDelegate: coderStub }) + const call = await rpcCall(server, 'tools/call', { name: 'delegate_evaluation' }) + expect(call?.error?.code).toBe(-32601) + }) + + it('drives the full lifecycle end-to-end: delegate → status → feedback → history', async () => { + const server = createMcpServer({ + coderDelegate: coderStub, + researcherDelegate: researcherStub, + }) + const created = await rpcCall(server, 'tools/call', { + name: 'delegate_research', + arguments: { question: 'who engages cpg-founders?', namespace: 'tenant-a' }, + }) + const taskId = (created?.result as { structuredContent: { taskId: string } }).structuredContent + .taskId + + // Wait for the async run to settle. + for (let i = 0; i < 5; i += 1) await new Promise((r) => setImmediate(r)) + + const status = await rpcCall(server, 'tools/call', { + name: 'delegation_status', + arguments: { taskId }, + }) + expect( + (status?.result as { structuredContent: { status: string } }).structuredContent.status, + ).toBe('completed') + + const feedback = await rpcCall(server, 'tools/call', { + name: 'delegate_feedback', + arguments: { + refersTo: { kind: 'delegation', ref: taskId }, + rating: { score: 0.85, label: 'good', notes: 'cited the right source' }, + by: 'agent', + namespace: 'tenant-a', + }, + }) + expect( + (feedback?.result as { structuredContent: { recorded: true; id: string } }).structuredContent + .recorded, + ).toBe(true) + + const history = await rpcCall(server, 'tools/call', { + name: 'delegation_history', + arguments: { namespace: 'tenant-a' }, + }) + const entries = ( + history?.result as { + structuredContent: { delegations: Array<{ taskId: string; feedback?: unknown[] }> } + } + ).structuredContent.delegations + expect(entries.find((e) => e.taskId === taskId)?.feedback?.length).toBe(1) + }) +}) + +describe('createMcpServer — stdio transport', () => { + it('handles a single JSON-RPC line through serve() and writes a response', async () => { + const server = createMcpServer({ coderDelegate: coderStub }) + const { transport, clientWrite, clientClose, readServer } = createInProcessTransport() + const servePromise = server.serve(transport) + clientWrite(JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'initialize' })) + clientWrite(JSON.stringify({ jsonrpc: '2.0', id: 2, method: 'tools/list' })) + // Allow drain. + for (let i = 0; i < 6; i += 1) await new Promise((r) => setImmediate(r)) + const responses = await readServer() + expect(responses.length).toBeGreaterThanOrEqual(2) + const list = responses.find((r) => r.id === 2) + expect((list?.result as { tools: unknown[] }).tools.length).toBe(4) + clientClose() + await servePromise + }) + + it('emits a parse error for malformed JSON', async () => { + const server = createMcpServer({ coderDelegate: coderStub }) + const { transport, clientWrite, clientClose, readServer } = createInProcessTransport() + const servePromise = server.serve(transport) + clientWrite('{not json') + for (let i = 0; i < 4; i += 1) await new Promise((r) => setImmediate(r)) + const responses = await readServer() + expect(responses[0]?.error?.code).toBe(-32700) + clientClose() + await servePromise + }) +}) diff --git a/tests/mcp/task-queue.test.ts b/tests/mcp/task-queue.test.ts new file mode 100644 index 0000000..f0b67ac --- /dev/null +++ b/tests/mcp/task-queue.test.ts @@ -0,0 +1,235 @@ +import { describe, expect, it } from 'vitest' +import { DelegationTaskQueue, hashIdempotencyInput } from '../../src/mcp/task-queue' +import type { DelegateCodeArgs, DelegateResearchArgs } from '../../src/mcp/types' + +const codeArgs: DelegateCodeArgs = { goal: 'fix bug', repoRoot: '/repo' } +const researchArgs: DelegateResearchArgs = { question: 'q?', namespace: 'ns-1' } + +function deferred(): { + promise: Promise + resolve: (value: T) => void + reject: (reason: unknown) => void +} { + let resolve!: (value: T) => void + let reject!: (reason: unknown) => void + const promise = new Promise((res, rej) => { + resolve = res + reject = rej + }) + return { promise, resolve, reject } +} + +describe('DelegationTaskQueue.submit', () => { + it('returns a taskId immediately and tracks pending → running → completed', async () => { + const queue = new DelegationTaskQueue() + const d = deferred<{ + output: { + branch: string + patch: string + testResult: { passed: boolean; output: string } + typecheckResult: { passed: boolean; output: string } + diffStats: { filesChanged: number; insertions: number; deletions: number } + } + }>() + const { taskId, reused } = queue.submit({ + profile: 'coder', + args: codeArgs, + run: async () => (await d.promise).output, + }) + expect(reused).toBe(false) + expect(taskId).toMatch(/^dlg-/) + expect(queue.status(taskId)?.status).toBe('pending') + // microtask tick to flip to running + await new Promise((r) => setImmediate(r)) + expect(queue.status(taskId)?.status).toBe('running') + d.resolve({ + output: { + branch: 'b', + patch: '', + testResult: { passed: true, output: '' }, + typecheckResult: { passed: true, output: '' }, + diffStats: { filesChanged: 0, insertions: 0, deletions: 0 }, + }, + }) + await new Promise((r) => setImmediate(r)) + const status = queue.status(taskId) + expect(status?.status).toBe('completed') + expect(status?.completedAt).toBeTypeOf('string') + expect(status?.result?.profile).toBe('coder') + }) + + it('records failure when the run function throws', async () => { + const queue = new DelegationTaskQueue() + const { taskId } = queue.submit({ + profile: 'coder', + args: codeArgs, + run: async () => { + throw new Error('runner exploded') + }, + }) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + const status = queue.status(taskId) + expect(status?.status).toBe('failed') + expect(status?.error?.message).toBe('runner exploded') + expect(status?.error?.kind).toBe('Error') + }) + + it('reuses a taskId when idempotencyKey matches a known record', () => { + const queue = new DelegationTaskQueue() + const key = hashIdempotencyInput({ x: 1 }) + const a = queue.submit({ + profile: 'coder', + args: codeArgs, + idempotencyKey: key, + run: async () => ({}) as never, + }) + const b = queue.submit({ + profile: 'coder', + args: codeArgs, + idempotencyKey: key, + run: async () => ({}) as never, + }) + expect(b.reused).toBe(true) + expect(b.taskId).toBe(a.taskId) + }) + + it('reports progress updates from the run function', async () => { + const queue = new DelegationTaskQueue() + const d = deferred() + const { taskId } = queue.submit({ + profile: 'researcher', + args: researchArgs, + run: async (ctx) => { + ctx.report({ iteration: 1, phase: 'searching' }) + await d.promise + ctx.report({ iteration: 2, phase: 'finalizing' }) + return { items: [], citations: [], proposedWrites: [] } + }, + }) + await new Promise((r) => setImmediate(r)) + expect(queue.status(taskId)?.progress).toEqual({ iteration: 1, phase: 'searching' }) + d.resolve() + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + expect(queue.status(taskId)?.status).toBe('completed') + }) +}) + +describe('DelegationTaskQueue.cancel', () => { + it('aborts an in-flight run and marks the record cancelled', async () => { + const queue = new DelegationTaskQueue() + let abortObserved = false + const { taskId } = queue.submit({ + profile: 'coder', + args: codeArgs, + run: async (ctx) => + new Promise((_, reject) => { + ctx.signal.addEventListener('abort', () => { + abortObserved = true + reject(new DOMException('aborted', 'AbortError')) + }) + }), + }) + await new Promise((r) => setImmediate(r)) + expect(queue.cancel(taskId)).toBe(true) + await new Promise((r) => setImmediate(r)) + const status = queue.status(taskId) + expect(status?.status).toBe('cancelled') + expect(status?.error?.kind).toBe('CancelledError') + expect(abortObserved).toBe(true) + }) + + it('returns false for unknown taskIds and is idempotent on terminal records', async () => { + const queue = new DelegationTaskQueue() + expect(queue.cancel('nope')).toBe(false) + const { taskId } = queue.submit({ + profile: 'coder', + args: codeArgs, + run: async () => { + throw new Error('synthetic failure') + }, + }) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + expect(queue.status(taskId)?.status).toBe('failed') + expect(queue.cancel(taskId)).toBe(false) + }) +}) + +describe('DelegationTaskQueue.history + attachFeedback', () => { + it('filters by namespace, profile, and since; orders newest-first', async () => { + let n = 0 + const queue = new DelegationTaskQueue({ + now: () => { + n += 1 + return new Date(1700000000000 + n * 1000).toISOString() + }, + }) + queue.submit({ + profile: 'coder', + args: codeArgs, + namespace: 'a', + run: async () => ({}) as never, + }) + queue.submit({ + profile: 'researcher', + args: researchArgs, + namespace: 'a', + run: async () => ({}) as never, + }) + queue.submit({ + profile: 'coder', + args: codeArgs, + namespace: 'b', + run: async () => ({}) as never, + }) + await new Promise((r) => setImmediate(r)) + await new Promise((r) => setImmediate(r)) + expect(queue.history({ namespace: 'a' }).map((e) => e.profile)).toEqual(['researcher', 'coder']) + expect(queue.history({ profile: 'coder' }).map((e) => e.namespace)).toEqual(['b', 'a']) + expect(queue.history({ limit: 1 }).length).toBe(1) + }) + + it('attaches feedback snapshots to the matching record', async () => { + const queue = new DelegationTaskQueue() + const { taskId } = queue.submit({ + profile: 'coder', + args: codeArgs, + run: async () => ({}) as never, + }) + await new Promise((r) => setImmediate(r)) + expect( + queue.attachFeedback(taskId, { + id: 'fbk-1', + score: 0.9, + by: 'agent', + notes: 'looks good', + capturedAt: new Date().toISOString(), + }), + ).toBe(true) + expect( + queue.attachFeedback('nope', { + id: 'fbk-2', + score: 0.1, + by: 'agent', + notes: 'irrelevant', + capturedAt: new Date().toISOString(), + }), + ).toBe(false) + const [entry] = queue.history() + expect(entry?.feedback?.[0]?.notes).toBe('looks good') + }) +}) + +describe('hashIdempotencyInput', () => { + it('produces stable hashes regardless of property order', () => { + expect(hashIdempotencyInput({ a: 1, b: 2 })).toBe(hashIdempotencyInput({ b: 2, a: 1 })) + }) + it('differs when nested values differ', () => { + expect(hashIdempotencyInput({ a: 1 })).not.toBe(hashIdempotencyInput({ a: 2 })) + }) + it('treats undefined as absent', () => { + expect(hashIdempotencyInput({ a: 1, b: undefined })).toBe(hashIdempotencyInput({ a: 1 })) + }) +}) diff --git a/tsup.config.ts b/tsup.config.ts index 7f29af4..99e54b3 100644 --- a/tsup.config.ts +++ b/tsup.config.ts @@ -8,6 +8,8 @@ export default defineConfig({ agent: 'src/agent/index.ts', loops: 'src/loops/index.ts', profiles: 'src/profiles/index.ts', + 'mcp/index': 'src/mcp/index.ts', + 'mcp/bin': 'src/mcp/bin.ts', }, format: ['esm'], dts: true,