From 08955705e40237c946ff70f76d3f96438ad2948a Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 24 May 2026 11:02:05 -0600
Subject: [PATCH] feat(0.19.0): driven-loop kernel (sandbox-SDK-based) +
 coderProfile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 0 of the driven-loop substrate. Ships:

- `@tangle-network/agent-runtime/loops` — `runLoop` kernel + Refine and
  FanoutVote drivers, built on the sandbox SDK's `AgentProfile` +
  `streamPrompt` contract. The kernel orchestrates around the sandbox
  SDK; it does not invent its own notion of "what an agent is".
- `@tangle-network/agent-runtime/profiles` — `coderProfile` +
  `multiHarnessCoderFanout`. Bundle an `AgentProfile`, task-to-prompt
  formatter, output adapter, and per-task validator (forbidden paths,
  diff cap, tests + typecheck) into a runLoop-ready unit.

Layering:

  sandbox SDK              AgentProfile + Sandbox + streamPrompt
  agent-runtime/loops      runLoop kernel + drivers
  agent-runtime/profiles   presets (coder; researcher in Phase 1)
  agent-runtime existing   UNTOUCHED — runAgentTask, RuntimeRunHandle etc

Kernel responsibilities: iteration accounting, parallel execution
bounded by `maxConcurrency`, abort propagation, cost aggregation from
sandbox `llm_call`-shaped events (with optional `runHandle.observe`
forwarding), and trace emission via `LoopTraceEmitter`.

Driver responsibilities: topology only. Refine returns `[task]` until
the validator passes; FanoutVote returns N copies on iteration 0 then
selects the highest-scoring valid output. Drivers receive a read-only
history and a typed decision channel; the kernel terminates on
`'stop' | 'pick-winner' | 'fail' | 'done'`.

Output adapter parses an event array → typed Output. Validator scores
the typed Output → DefaultVerdict. Both are pure functions; tests
exercise them without a real sandbox.

Heterogeneous fanout is built in: pass `agentRuns: AgentRunSpec[]` and
the kernel round-robins through them when the driver plans N tasks.
`multiHarnessCoderFanout` ships a 3-harness default (claude-code,
codex, opencode/zai-coding-plan/glm-5.1).

Tests (25 new, all 154 pass):
  - tests/loops/refine.test.ts (7) — refine-until-valid, maxIter cap,
    error capture, trace event ordering, cost aggregation
  - tests/loops/fanout-vote.test.ts (6) — winner selection, fail mode,
    `maxConcurrency` enforcement, heterogeneous agentRuns, error
    handling on missing options
  - tests/loops/composition.test.ts (2) — recursive runLoop in
    Driver.plan; static typecheck of nested kernel calls
  - tests/profiles/coder.test.ts (10) — task-bound validator
    (forbidden-path, diff cap, tests, typecheck), score math, output
    adapter (structured result + fenced-JSON fallback), multi-harness
    fanout shape

Build, typecheck, lint clean. Existing 129 tests untouched.

Smoke test (manual; requires sandbox credentials):

  cd /home/drew/code/agent-runtime && pnpm build
  TANGLE_SANDBOX_API_KEY=... TANGLE_ORCHESTRATOR_URL=... node -e "
    import { Sandbox } from '@tangle-network/sandbox'
    import { runLoop, createFanoutVoteDriver } from './dist/loops.js'
    import { multiHarnessCoderFanout } from './dist/profiles.js'
    const client = new Sandbox({ apiKey: process.env.TANGLE_SANDBOX_API_KEY,
                                  baseUrl: process.env.TANGLE_ORCHESTRATOR_URL })
    const { agentRuns, output, validator, driver } = multiHarnessCoderFanout()
    const result = await runLoop({
      driver, agentRuns, output, validator,
      task: { goal: 'add a hello function', repoRoot: '/work/repo' },
      ctx: { sandboxClient: client },
    })
    console.log(result.decision, result.winner?.iterationIndex, result.costUsd)
  "

Out of scope (Phase 1+): researcherProfile, sandboxedDriver helper,
MCP wrapper, Council/Decompose/Pipeline topologies, agent-eval refactor.
---
 package.json                     |  12 +-
 src/loops/drivers/fanout-vote.ts | 102 ++++++
 src/loops/drivers/refine.ts      |  79 +++++
 src/loops/index.ts               |  49 +++
 src/loops/run-loop.ts            | 534 +++++++++++++++++++++++++++++++
 src/loops/trace.ts               |  22 ++
 src/loops/types.ts               | 235 ++++++++++++++
 src/profiles/coder.ts            | 398 +++++++++++++++++++++++
 src/profiles/index.ts            |  16 +
 tests/loops/composition.test.ts  | 179 +++++++++++
 tests/loops/fanout-vote.test.ts  | 281 ++++++++++++++++
 tests/loops/refine.test.ts       | 283 ++++++++++++++++
 tests/profiles/coder.test.ts     | 186 +++++++++++
 tsup.config.ts                   |   2 +
 14 files changed, 2377 insertions(+), 1 deletion(-)
 create mode 100644 src/loops/drivers/fanout-vote.ts
 create mode 100644 src/loops/drivers/refine.ts
 create mode 100644 src/loops/index.ts
 create mode 100644 src/loops/run-loop.ts
 create mode 100644 src/loops/trace.ts
 create mode 100644 src/loops/types.ts
 create mode 100644 src/profiles/coder.ts
 create mode 100644 src/profiles/index.ts
 create mode 100644 tests/loops/composition.test.ts
 create mode 100644 tests/loops/fanout-vote.test.ts
 create mode 100644 tests/loops/refine.test.ts
 create mode 100644 tests/profiles/coder.test.ts

diff --git a/package.json b/package.json
index 28fbdde..1d44356 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.18.0",
+  "version": "0.19.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {
@@ -33,6 +33,16 @@
       "types": "./dist/agent.d.ts",
       "import": "./dist/agent.js",
       "default": "./dist/agent.js"
+    },
+    "./loops": {
+      "types": "./dist/loops.d.ts",
+      "import": "./dist/loops.js",
+      "default": "./dist/loops.js"
+    },
+    "./profiles": {
+      "types": "./dist/profiles.d.ts",
+      "import": "./dist/profiles.js",
+      "default": "./dist/profiles.js"
     }
   },
   "files": [
diff --git a/src/loops/drivers/fanout-vote.ts b/src/loops/drivers/fanout-vote.ts
new file mode 100644
index 0000000..9139499
--- /dev/null
+++ b/src/loops/drivers/fanout-vote.ts
@@ -0,0 +1,102 @@
+/**
+ * @experimental
+ *
+ * FanoutVote driver — N parallel attempts in iteration 0, pick the highest-
+ * scoring valid output. No second iteration: the topology is "spawn N, score,
+ * pick winner". The kernel handles heterogeneous fanout via the
+ * `agentRuns: AgentRunSpec[]` form on `runLoop`.
+ */
+
+import { ValidationError } from '../../errors'
+import type { DefaultVerdict, Driver, Iteration } from '../types'
+
+export type FanoutVoteDecision = 'pick-winner' | 'fail'
+
+/** @experimental */
+export interface FanoutVoteScored<Task, Output> {
+  task: Task
+  output: Output
+  verdict?: DefaultVerdict
+  iterationIndex: number
+  agentRunName: string
+}
+
+/** @experimental */
+export interface CreateFanoutVoteDriverOptions<Task, Output> {
+  /** Number of parallel attempts. Must be >= 1. */
+  n: number
+  /**
+   * Pick the winner from the scored set. Default: highest `verdict.score`
+   * among valid outputs (ties broken by smallest iteration index). When
+   * no valid outputs exist, returns `undefined` and `decide()` resolves
+   * to `'fail'`. The kernel still records winners structurally — this
+   * selector only feeds `decide()`'s pass/fail signal.
+   */
+  selector?: (
+    scored: FanoutVoteScored<Task, Output>[],
+  ) => FanoutVoteScored<Task, Output> | undefined
+  /** Stable identifier surfaced in trace events. Default `'fanout-vote'`. */
+  name?: string
+}
+
+/** @experimental */
+export function createFanoutVoteDriver<Task, Output>(
+  options: CreateFanoutVoteDriverOptions<Task, Output>,
+): Driver<Task, Output, FanoutVoteDecision> {
+  if (!Number.isFinite(options.n) || options.n < 1) {
+    throw new ValidationError(`createFanoutVoteDriver: n must be >= 1, got ${options.n}`)
+  }
+  const selector = options.selector ?? defaultSelector
+  return {
+    name: options.name ?? 'fanout-vote',
+    async plan(task, history) {
+      if (history.length === 0) return Array.from({ length: options.n }, () => task)
+      return []
+    },
+    decide(history) {
+      const scored = scoreIterations(history)
+      return selector(scored) ? 'pick-winner' : 'fail'
+    },
+  }
+}
+
+function defaultSelector<Task, Output>(
+  scored: FanoutVoteScored<Task, Output>[],
+): FanoutVoteScored<Task, Output> | undefined {
+  const valid = scored.filter((entry) => entry.verdict?.valid === true)
+  if (valid.length === 0) return undefined
+  return [...valid].sort(
+    (a, b) =>
+      (b.verdict?.score ?? 0) - (a.verdict?.score ?? 0) || a.iterationIndex - b.iterationIndex,
+  )[0]
+}
+
+function scoreIterations<Task, Output>(
+  iterations: ReadonlyArray<Iteration<Task, Output>>,
+): FanoutVoteScored<Task, Output>[] {
+  const out: FanoutVoteScored<Task, Output>[] = []
+  for (const iter of iterations) {
+    if (iter.output === undefined || iter.error) continue
+    out.push({
+      task: iter.task,
+      output: iter.output,
+      verdict: iter.verdict,
+      iterationIndex: iter.index,
+      agentRunName: iter.agentRunName,
+    })
+  }
+  return out
+}
+
+/**
+ * Test helper: surface the per-iteration scored view a custom `selector`
+ * would receive. Exposed so consumers writing a custom selector can test it
+ * standalone without driving the full kernel.
+ *
+ * @experimental
+ */
+export function scoreFanoutVoteIterations<Task, Output>(
+  iterations: ReadonlyArray<Iteration<Task, Output>>,
+): FanoutVoteScored<Task, Output>[] {
+  return scoreIterations(iterations)
+}
diff --git a/src/loops/drivers/refine.ts b/src/loops/drivers/refine.ts
new file mode 100644
index 0000000..b069b7e
--- /dev/null
+++ b/src/loops/drivers/refine.ts
@@ -0,0 +1,79 @@
+/**
+ * @experimental
+ *
+ * Refine driver — single task per iteration, validator-gated.
+ *
+ * `plan` returns `[task]` (possibly transformed via `refineTask`) until the
+ * prior verdict is valid OR the local cap is hit, then `[]`.
+ * `decide` returns `'stop'` once the latest verdict is valid OR the cap is
+ * reached. The kernel's `maxIterations` is an orthogonal safety cap;
+ * whichever is lower wins.
+ */
+
+import { ValidationError } from '../../errors'
+import type { DefaultVerdict, Driver, Iteration } from '../types'
+
+export type RefineDecision = 'continue' | 'stop'
+
+/** @experimental */
+export interface CreateRefineDriverOptions<Task> {
+  /** Hard cap on iterations. Default 5. */
+  maxIterations?: number
+  /**
+   * Optional task transform applied each round based on the prior verdict.
+   * When omitted, the same task is replayed and the agent is expected to
+   * inspect the sandbox session state for prior attempts.
+   */
+  refineTask?: (task: Task, prior: DefaultVerdict) => Task
+  /** Stable identifier surfaced in trace events. Default `'refine'`. */
+  name?: string
+}
+
+/** @experimental */
+export function createRefineDriver<Task, Output>(
+  options: CreateRefineDriverOptions<Task> = {},
+): Driver<Task, Output, RefineDecision> {
+  const maxIterations = options.maxIterations ?? 5
+  if (!Number.isFinite(maxIterations) || maxIterations <= 0) {
+    throw new ValidationError('createRefineDriver: maxIterations must be > 0')
+  }
+  const refineTask = options.refineTask
+  return {
+    name: options.name ?? 'refine',
+    async plan(task, history) {
+      if (history.length >= maxIterations) return []
+      if (history.length === 0) return [task]
+      const prior = history.at(-1)
+      if (!prior) return [task]
+      if (prior.verdict?.valid === true) return []
+      // Worker error: replay the same task so the agent can self-correct.
+      // The driver has no signal beyond `verdict`; only the validator
+      // controls "good enough".
+      if (!refineTask || !prior.verdict) return [prior.task]
+      return [refineTask(prior.task, prior.verdict)]
+    },
+    decide(history) {
+      const last = history.at(-1)
+      if (!last) return 'continue'
+      if (last.verdict?.valid === true) return 'stop'
+      if (history.length >= maxIterations) return 'stop'
+      return 'continue'
+    },
+  }
+}
+
+/**
+ * Test helper: select the last-valid iteration (or the last attempt if
+ * none passed). Mirrors the kernel's default selector ordering for refine
+ * topologies — the most recent successful attempt wins.
+ *
+ * @experimental
+ */
+export function refineWinnerIndex<Task, Output>(
+  iterations: ReadonlyArray<Iteration<Task, Output>>,
+): number | undefined {
+  for (let i = iterations.length - 1; i >= 0; i -= 1) {
+    if (iterations[i]?.verdict?.valid) return i
+  }
+  return iterations.length > 0 ? iterations.length - 1 : undefined
+}
diff --git a/src/loops/index.ts b/src/loops/index.ts
new file mode 100644
index 0000000..8ace184
--- /dev/null
+++ b/src/loops/index.ts
@@ -0,0 +1,49 @@
+/**
+ * @experimental
+ *
+ * Driven-loop substrate. `runLoop` orchestrates around the sandbox SDK; it
+ * does not invent its own notion of "what an agent is". Each iteration is
+ * a `sandboxClient.create({ backend: { profile } })` + `box.streamPrompt`
+ * call. The driver owns topology; the validator owns scoring; the output
+ * adapter owns event-stream decode; the kernel owns iteration accounting,
+ * concurrency, abort, cost aggregation, and trace emission.
+ */
+
+// One-stop import: sandbox-SDK types consumers need to spell out an
+// `AgentRunSpec` without importing `@tangle-network/sandbox` separately.
+export type {
+  AgentProfile,
+  CreateSandboxOptions,
+  SandboxEvent,
+  SandboxInstance,
+} from '@tangle-network/sandbox'
+export type {
+  CreateFanoutVoteDriverOptions,
+  FanoutVoteDecision,
+  FanoutVoteScored,
+} from './drivers/fanout-vote'
+export { createFanoutVoteDriver, scoreFanoutVoteIterations } from './drivers/fanout-vote'
+export type { CreateRefineDriverOptions, RefineDecision } from './drivers/refine'
+export { createRefineDriver, refineWinnerIndex } from './drivers/refine'
+export type { RunLoopOptions } from './run-loop'
+export { runLoop } from './run-loop'
+export type {
+  AgentRunSpec,
+  DefaultVerdict,
+  Driver,
+  ExecCtx,
+  Iteration,
+  LoopDecisionPayload,
+  LoopEndedPayload,
+  LoopIterationEndedPayload,
+  LoopIterationStartedPayload,
+  LoopResult,
+  LoopSandboxClient,
+  LoopStartedPayload,
+  LoopTraceEmitter,
+  LoopTraceEvent,
+  LoopWinner,
+  OutputAdapter,
+  ValidationCtx,
+  Validator,
+} from './types'
diff --git a/src/loops/run-loop.ts b/src/loops/run-loop.ts
new file mode 100644
index 0000000..d359942
--- /dev/null
+++ b/src/loops/run-loop.ts
@@ -0,0 +1,534 @@
+/**
+ * @experimental
+ *
+ * `runLoop` — the topology-agnostic kernel built atop the sandbox SDK.
+ *
+ * Each iteration:
+ *   1. `driver.plan(task, history)` → N tasks (1 = refine, N = fanout, 0 = stop)
+ *   2. For each task (parallel, bounded by `maxConcurrency`):
+ *        a. round-robin an `AgentRunSpec` from `agentRuns`
+ *        b. `sandboxClient.create({ backend: { profile }, ...overrides })`
+ *        c. iterate `box.streamPrompt(taskToPrompt(task))` and collect events
+ *   3. `output.parse(events)` → typed `Output`
+ *   4. `validator?.validate(output)` → `DefaultVerdict`
+ *   5. Append `Iteration` to history; emit `loop.iteration.ended`
+ *   6. `driver.decide(history)` → if terminal, return result + winner
+ *
+ * The kernel owns: iteration accounting, per-iteration timing, error
+ * capture, abort propagation, concurrency cap, cost aggregation, and trace
+ * emission. The kernel does NOT own: what the agent runs (sandbox SDK +
+ * profile), how outputs are decoded (output adapter), how outputs are
+ * scored (validator), or topology (driver).
+ */
+
+import type {
+  AgentProfile,
+  CreateSandboxOptions,
+  SandboxEvent,
+  SandboxInstance,
+} from '@tangle-network/sandbox'
+import { ValidationError } from '../errors'
+import type { RuntimeStreamEvent } from '../types'
+import type {
+  AgentRunSpec,
+  Driver,
+  ExecCtx,
+  Iteration,
+  LoopResult,
+  LoopSandboxClient,
+  LoopTraceEmitter,
+  LoopTraceEvent,
+  LoopWinner,
+  OutputAdapter,
+  Validator,
+} from './types'
+
+const DEFAULT_MAX_ITERATIONS = 10
+const DEFAULT_MAX_CONCURRENCY = 4
+
+/** @experimental */
+export interface RunLoopOptions<Task, Output, Decision> {
+  driver: Driver<Task, Output, Decision>
+  /**
+   * Single agent spec — every iteration uses this profile. Mutually
+   * exclusive with `agentRuns`.
+   */
+  agentRun?: AgentRunSpec<Task>
+  /**
+   * Multiple specs for heterogeneous fanout. The kernel round-robins
+   * through them when the driver plans N tasks. Mutually exclusive with
+   * `agentRun`.
+   */
+  agentRuns?: AgentRunSpec<Task>[]
+  output: OutputAdapter<Output>
+  validator?: Validator<Output>
+  task: Task
+  ctx: ExecCtx
+  /** Default 10. Hard cap on total iterations across all `plan()` rounds. */
+  maxIterations?: number
+  /** Default 4. In-flight worker cap within a single `plan()` batch. */
+  maxConcurrency?: number
+  /**
+   * Pre-allocated id for trace correlation. Default = `loop-${random}`.
+   * Surfaces as `runId` on every emitted `LoopTraceEvent`.
+   */
+  runId?: string
+  /**
+   * Clock override; default `Date.now`. Deterministic tests pass a
+   * monotonic counter to stabilize iteration timing fields.
+   */
+  now?: () => number
+  /**
+   * Override the default winner selector (highest-valid-score, ties broken
+   * by earliest iteration).
+   */
+  selectWinner?: (iterations: Iteration<Task, Output>[]) => LoopWinner<Task, Output> | undefined
+}
+
+/** @experimental */
+export async function runLoop<Task, Output, Decision>(
+  options: RunLoopOptions<Task, Output, Decision>,
+): Promise<LoopResult<Task, Output, Decision>> {
+  const specs = resolveAgentRuns(options)
+  const maxIterations = options.maxIterations ?? DEFAULT_MAX_ITERATIONS
+  if (!Number.isFinite(maxIterations) || maxIterations <= 0) {
+    throw new ValidationError('runLoop: maxIterations must be > 0')
+  }
+  const maxConcurrency = options.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY
+  if (!Number.isFinite(maxConcurrency) || maxConcurrency <= 0) {
+    throw new ValidationError('runLoop: maxConcurrency must be > 0')
+  }
+  if (!options.ctx?.sandboxClient || typeof options.ctx.sandboxClient.create !== 'function') {
+    throw new ValidationError('runLoop: ctx.sandboxClient.create is required')
+  }
+  const now = options.now ?? Date.now
+  const runId = options.runId ?? `loop-${randomSuffix()}`
+  const loopStart = now()
+  const driverName = options.driver.name ?? 'driver'
+  const iterations: Iteration<Task, Output>[] = []
+
+  await emitTrace(options.ctx.traceEmitter, {
+    kind: 'loop.started',
+    runId,
+    timestamp: now(),
+    payload: {
+      driver: driverName,
+      agentRunNames: specs.map((spec) => spec.name ?? spec.profile.name ?? 'agent'),
+      maxIterations,
+      maxConcurrency,
+    },
+  })
+
+  const controller = new AbortController()
+  const onOuterAbort = () => controller.abort()
+  if (options.ctx.signal) {
+    if (options.ctx.signal.aborted) controller.abort()
+    else options.ctx.signal.addEventListener('abort', onOuterAbort, { once: true })
+  }
+
+  try {
+    while (iterations.length < maxIterations) {
+      if (controller.signal.aborted) throwAbort()
+      const planned = await options.driver.plan(options.task, iterations)
+      if (planned.length === 0) break
+
+      const remaining = maxIterations - iterations.length
+      const slice = planned.slice(0, remaining)
+      const baseIndex = iterations.length
+      // Reserve slots up front so concurrent workers may mutate by index.
+      for (let i = 0; i < slice.length; i += 1) {
+        const spec = specs[(baseIndex + i) % specs.length]!
+        iterations.push({
+          index: baseIndex + i,
+          task: slice[i] as Task,
+          agentRunName: spec.name ?? spec.profile.name ?? 'agent',
+          events: [],
+          startedAt: now(),
+          endedAt: 0,
+          costUsd: 0,
+        })
+      }
+
+      await runBatch({
+        slice,
+        baseIndex,
+        iterations,
+        specs,
+        output: options.output,
+        validator: options.validator,
+        maxConcurrency,
+        signal: controller.signal,
+        ctx: options.ctx,
+        runId,
+        now,
+      })
+
+      if (controller.signal.aborted) throwAbort()
+
+      const decision = await options.driver.decide(iterations)
+      await emitTrace(options.ctx.traceEmitter, {
+        kind: 'loop.decision',
+        runId,
+        timestamp: now(),
+        payload: { decision: serializeDecision(decision), historyLength: iterations.length },
+      })
+      if (isTerminalDecision(decision)) {
+        return finalize({
+          options,
+          decision,
+          iterations,
+          startMs: loopStart,
+          now,
+          runId,
+        })
+      }
+    }
+
+    if (iterations.length >= maxIterations) {
+      // Cap reached without a terminal decision — ask the driver one more time
+      // for its final state, then close out.
+      const decision = await options.driver.decide(iterations)
+      await emitTrace(options.ctx.traceEmitter, {
+        kind: 'loop.decision',
+        runId,
+        timestamp: now(),
+        payload: { decision: serializeDecision(decision), historyLength: iterations.length },
+      })
+      return finalize({ options, decision, iterations, startMs: loopStart, now, runId })
+    }
+    // `plan()` returned `[]` before `decide()` reached a terminal state.
+    const decision = await options.driver.decide(iterations)
+    await emitTrace(options.ctx.traceEmitter, {
+      kind: 'loop.decision',
+      runId,
+      timestamp: now(),
+      payload: { decision: serializeDecision(decision), historyLength: iterations.length },
+    })
+    return finalize({ options, decision, iterations, startMs: loopStart, now, runId })
+  } finally {
+    if (options.ctx.signal) options.ctx.signal.removeEventListener('abort', onOuterAbort)
+  }
+}
+
+interface RunBatchArgs<Task, Output> {
+  slice: Task[]
+  baseIndex: number
+  iterations: Iteration<Task, Output>[]
+  specs: AgentRunSpec<Task>[]
+  output: OutputAdapter<Output>
+  validator: Validator<Output> | undefined
+  maxConcurrency: number
+  signal: AbortSignal
+  ctx: ExecCtx
+  runId: string
+  now: () => number
+}
+
+async function runBatch<Task, Output>(args: RunBatchArgs<Task, Output>) {
+  const queue = args.slice.map((task, offset) => ({ task, index: args.baseIndex + offset }))
+  const inflight = new Set<Promise<void>>()
+  while (queue.length > 0 || inflight.size > 0) {
+    while (inflight.size < args.maxConcurrency && queue.length > 0) {
+      const item = queue.shift()!
+      const p = executeIteration({ ...args, item }).finally(() => inflight.delete(p))
+      inflight.add(p)
+    }
+    if (inflight.size === 0) break
+    await Promise.race(inflight)
+  }
+}
+
+interface ExecuteIterationArgs<Task, Output> extends RunBatchArgs<Task, Output> {
+  item: { task: Task; index: number }
+}
+
+async function executeIteration<Task, Output>(args: ExecuteIterationArgs<Task, Output>) {
+  const slot = args.iterations[args.item.index]
+  if (!slot)
+    throw new ValidationError(`runLoop: missing iteration slot at index ${args.item.index}`)
+  const spec = args.specs[args.item.index % args.specs.length]
+  if (!spec) throw new ValidationError('runLoop: no AgentRunSpec available for iteration')
+  slot.startedAt = args.now()
+  slot.agentRunName = spec.name ?? spec.profile.name ?? 'agent'
+
+  await emitTrace(args.ctx.traceEmitter, {
+    kind: 'loop.iteration.started',
+    runId: args.runId,
+    timestamp: args.now(),
+    payload: {
+      iterationIndex: args.item.index,
+      agentRunName: slot.agentRunName,
+      taskHash: hashJson(args.item.task),
+    },
+  })
+
+  try {
+    const box = await createSandboxForSpec(args.ctx.sandboxClient, spec, args.signal)
+    const message = spec.taskToPrompt(args.item.task)
+    const events: SandboxEvent[] = []
+    for await (const event of box.streamPrompt(message, { signal: args.signal })) {
+      events.push(event)
+      const llmCall = extractLlmCallEvent(event, slot.agentRunName)
+      if (llmCall) {
+        slot.costUsd += llmCall.costUsd ?? 0
+        args.ctx.runHandle?.observe(llmCall)
+      }
+    }
+    slot.events = events
+    slot.output = args.output.parse(events)
+    if (args.validator) {
+      slot.verdict = await args.validator.validate(slot.output, {
+        iteration: args.item.index,
+        signal: args.signal,
+      })
+    }
+  } catch (err) {
+    slot.error = err instanceof Error ? err : new Error(String(err))
+  } finally {
+    slot.endedAt = args.now()
+    await emitTrace(args.ctx.traceEmitter, {
+      kind: 'loop.iteration.ended',
+      runId: args.runId,
+      timestamp: args.now(),
+      payload: {
+        iterationIndex: args.item.index,
+        agentRunName: slot.agentRunName,
+        outputHash: slot.output !== undefined ? hashJson(slot.output) : undefined,
+        verdict: slot.verdict,
+        error: slot.error?.message,
+        costUsd: slot.costUsd,
+        durationMs: slot.endedAt - slot.startedAt,
+      },
+    })
+  }
+}
+
+async function createSandboxForSpec<Task>(
+  client: LoopSandboxClient,
+  spec: AgentRunSpec<Task>,
+  signal: AbortSignal,
+): Promise<SandboxInstance> {
+  const overrides = spec.sandboxOverrides ?? {}
+  const overrideBackend = overrides.backend
+  const opts: CreateSandboxOptions = {
+    ...overrides,
+    backend: {
+      type: overrideBackend?.type ?? inferBackendType(spec.profile),
+      profile: spec.profile satisfies AgentProfile,
+      ...(overrideBackend?.model ? { model: overrideBackend.model } : {}),
+      ...(overrideBackend?.server ? { server: overrideBackend.server } : {}),
+    },
+  }
+  // Cooperative cancellation: if the abort signal fires while .create is
+  // pending, the promise itself is not abortable but the inflight prompt is.
+  if (signal.aborted) throwAbort()
+  return client.create(opts)
+}
+
+function inferBackendType(
+  profile: AgentProfile,
+): CreateSandboxOptions['backend'] extends infer B
+  ? B extends { type: infer T }
+    ? T
+    : never
+  : never {
+  // The sandbox SDK accepts profile-driven backend selection by name. When the
+  // profile has no explicit hint we fall through to the SDK's default
+  // ('opencode' on the platform side). Returning a literal here would lie
+  // about provenance — let the SDK pick.
+  type BackendType = NonNullable<CreateSandboxOptions['backend']>['type']
+  const explicit = profile.metadata?.backendType
+  if (typeof explicit === 'string') return explicit as BackendType
+  return 'opencode' as BackendType
+}
+
+interface FinalizeArgs<Task, Output, Decision> {
+  options: RunLoopOptions<Task, Output, Decision>
+  decision: Decision
+  iterations: Iteration<Task, Output>[]
+  startMs: number
+  now: () => number
+  runId: string
+}
+
+function finalize<Task, Output, Decision>(
+  args: FinalizeArgs<Task, Output, Decision>,
+): LoopResult<Task, Output, Decision> {
+  const winner = (args.options.selectWinner ?? defaultSelectWinner)(args.iterations)
+  const costUsd = args.iterations.reduce((sum, iter) => sum + (iter.costUsd || 0), 0)
+  const result: LoopResult<Task, Output, Decision> = {
+    decision: args.decision,
+    iterations: args.iterations,
+    winner,
+    durationMs: args.now() - args.startMs,
+    costUsd,
+  }
+  void emitTrace(args.options.ctx.traceEmitter, {
+    kind: 'loop.ended',
+    runId: args.runId,
+    timestamp: args.now(),
+    payload: {
+      winnerIterationIndex: winner?.iterationIndex,
+      totalCostUsd: costUsd,
+      durationMs: result.durationMs,
+      iterations: args.iterations.length,
+    },
+  })
+  return result
+}
+
+function defaultSelectWinner<Task, Output>(
+  iterations: Iteration<Task, Output>[],
+): LoopWinner<Task, Output> | undefined {
+  const candidates = iterations.filter((iter) => iter.output !== undefined && !iter.error)
+  if (candidates.length === 0) return undefined
+  const valid = candidates.filter((iter) => iter.verdict?.valid === true)
+  const pool = valid.length > 0 ? valid : candidates
+  const sorted = [...pool].sort(
+    (a, b) => (b.verdict?.score ?? 0) - (a.verdict?.score ?? 0) || a.index - b.index,
+  )
+  const top = sorted[0]
+  if (!top || top.output === undefined) return undefined
+  return {
+    task: top.task,
+    output: top.output,
+    verdict: top.verdict,
+    iterationIndex: top.index,
+    agentRunName: top.agentRunName,
+  }
+}
+
+function resolveAgentRuns<Task, Output, Decision>(
+  options: RunLoopOptions<Task, Output, Decision>,
+): AgentRunSpec<Task>[] {
+  if (options.agentRun && options.agentRuns) {
+    throw new ValidationError('runLoop: pass exactly one of `agentRun` or `agentRuns`')
+  }
+  if (options.agentRun) return [options.agentRun]
+  if (options.agentRuns && options.agentRuns.length > 0) return options.agentRuns
+  throw new ValidationError('runLoop: `agentRun` or non-empty `agentRuns` is required')
+}
+
+function isTerminalDecision(decision: unknown): boolean {
+  return (
+    decision === 'stop' || decision === 'pick-winner' || decision === 'fail' || decision === 'done'
+  )
+}
+
+function serializeDecision(decision: unknown): string {
+  if (typeof decision === 'string') return decision
+  if (decision === null || decision === undefined) return 'null'
+  try {
+    return JSON.stringify(decision)
+  } catch {
+    return String(decision)
+  }
+}
+
+async function emitTrace(
+  emitter: LoopTraceEmitter | undefined,
+  event: LoopTraceEvent,
+): Promise<void> {
+  if (!emitter) return
+  await emitter.emit(event)
+}
+
+function randomSuffix(len = 8): string {
+  return Math.random()
+    .toString(36)
+    .slice(2, 2 + len)
+}
+
+function throwAbort(): never {
+  const err = new Error('aborted')
+  err.name = 'AbortError'
+  throw err
+}
+
+/**
+ * Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when
+ * the event carries usage/cost data. Returns `undefined` for non-cost events
+ * so the kernel can iterate the full stream without branching.
+ *
+ * Sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`. The
+ * canonical cost-carrying types observed in the wild:
+ *   - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`
+ *   - `message.completed` / `result` — `data: { usage: { inputTokens,
+ *      outputTokens, totalCostUsd? } }`
+ *   - `cost.usage` — same shape under a dedicated type
+ *
+ * Numeric coercion is strict: `Number.isFinite` gates every accumulator
+ * write so a sentinel `NaN` from a misbehaving backend cannot poison the
+ * ledger.
+ */
+function extractLlmCallEvent(
+  event: SandboxEvent,
+  agentRunName: string,
+): (RuntimeStreamEvent & { type: 'llm_call' }) | undefined {
+  if (!event || typeof event !== 'object') return undefined
+  const type = String(event.type ?? '')
+  const data =
+    event.data && typeof event.data === 'object'
+      ? (event.data as Record<string, unknown>)
+      : ({} as Record<string, unknown>)
+
+  if (type === 'llm_call' || type === 'cost.usage' || type === 'usage') {
+    return buildLlmCall(data, agentRunName)
+  }
+  if (type === 'message.completed' || type === 'result' || type === 'final') {
+    const usage = data.usage as Record<string, unknown> | undefined
+    if (!usage || typeof usage !== 'object') return undefined
+    return buildLlmCall({ ...usage, model: data.model ?? usage.model }, agentRunName)
+  }
+  return undefined
+}
+
+function buildLlmCall(
+  data: Record<string, unknown>,
+  agentRunName: string,
+): (RuntimeStreamEvent & { type: 'llm_call' }) | undefined {
+  const tokensIn = pickFiniteNumber(data, ['tokensIn', 'inputTokens', 'prompt_tokens'])
+  const tokensOut = pickFiniteNumber(data, ['tokensOut', 'outputTokens', 'completion_tokens'])
+  const costUsd = pickFiniteNumber(data, ['costUsd', 'totalCostUsd', 'cost_usd', 'cost'])
+  if (tokensIn === undefined && tokensOut === undefined && costUsd === undefined) {
+    return undefined
+  }
+  const model = typeof data.model === 'string' && data.model.length > 0 ? data.model : agentRunName
+  const event: RuntimeStreamEvent & { type: 'llm_call' } = {
+    type: 'llm_call',
+    model,
+  }
+  if (tokensIn !== undefined) event.tokensIn = tokensIn
+  if (tokensOut !== undefined) event.tokensOut = tokensOut
+  if (costUsd !== undefined) event.costUsd = costUsd
+  return event
+}
+
+function pickFiniteNumber(data: Record<string, unknown>, keys: string[]): number | undefined {
+  for (const key of keys) {
+    const value = data[key]
+    if (typeof value === 'number' && Number.isFinite(value)) return value
+  }
+  return undefined
+}
+
+/**
+ * Stable hash for the trace payload. Not cryptographic — only used so
+ * downstream eval pipelines can group iterations whose task / output is the
+ * same. Bare structural hash; non-JSON values stringify via their `toString`.
+ */
+function hashJson(value: unknown): string {
+  let str: string
+  try {
+    str = JSON.stringify(value) ?? String(value)
+  } catch {
+    str = String(value)
+  }
+  // FNV-1a 32-bit — branch-free, dependency-free, good enough for grouping.
+  let h = 0x811c9dc5
+  for (let i = 0; i < str.length; i += 1) {
+    h ^= str.charCodeAt(i)
+    h = Math.imul(h, 0x01000193)
+  }
+  return (h >>> 0).toString(16).padStart(8, '0')
+}
diff --git a/src/loops/trace.ts b/src/loops/trace.ts
new file mode 100644
index 0000000..8a537fb
--- /dev/null
+++ b/src/loops/trace.ts
@@ -0,0 +1,22 @@
+/**
+ * @experimental
+ *
+ * Loop-topology trace events. Independent from `runHandle.observe`, which
+ * tracks cost. These describe the loop's iteration tree so downstream eval
+ * pipelines can group traces by topology (refine vs fanout, which spec ran
+ * each iteration, who won).
+ *
+ * Re-exported from `./types` for back-compat with the kernel's local imports;
+ * the canonical home is `./types` so call sites that already import from
+ * `loops` don't double-import.
+ */
+
+export type {
+  LoopDecisionPayload,
+  LoopEndedPayload,
+  LoopIterationEndedPayload,
+  LoopIterationStartedPayload,
+  LoopStartedPayload,
+  LoopTraceEmitter,
+  LoopTraceEvent,
+} from './types'
diff --git a/src/loops/types.ts b/src/loops/types.ts
new file mode 100644
index 0000000..100bd34
--- /dev/null
+++ b/src/loops/types.ts
@@ -0,0 +1,235 @@
+/**
+ * @experimental
+ *
+ * Driven-loop substrate — type surface.
+ *
+ * The loop kernel orchestrates around the sandbox SDK; it does not invent
+ * its own notion of "what an agent is". Each iteration is a sandbox-SDK
+ * `streamPrompt` call against an `AgentProfile`. The kernel owns iteration
+ * accounting, concurrency, abort propagation, cost aggregation, and trace
+ * emission; the driver owns topology (plan + decide); the validator owns
+ * output scoring; the output adapter owns event-stream → typed-output decode.
+ */
+
+import type {
+  AgentProfile,
+  CreateSandboxOptions,
+  SandboxEvent,
+  SandboxInstance,
+} from '@tangle-network/sandbox'
+import type { RuntimeRunHandle } from '../runtime-run'
+
+/** @experimental */
+export interface DefaultVerdict {
+  /** Whether the output meets the validator's pass criteria. */
+  valid: boolean
+  /** Aggregate score in [0, 1]. Drivers use this for winner selection. */
+  score: number
+  /** Per-dimension scores. Free-form; weighted into `score` by the validator. */
+  scores?: Record<string, number>
+  /** Human-readable rationale; surfaces in trace + final-result `winner.verdict`. */
+  notes?: string
+}
+
+/** @experimental */
+export interface ValidationCtx {
+  /** Iteration index this output came from (0-based). */
+  iteration: number
+  /** Cooperative cancellation channel. */
+  signal: AbortSignal
+}
+
+/** @experimental */
+export interface Validator<Output, Verdict = DefaultVerdict> {
+  validate(output: Output, ctx: ValidationCtx): Promise<Verdict>
+}
+
+/**
+ * Sandbox-SDK-shaped agent specification.
+ *
+ * The kernel uses `profile` to instantiate a sandbox per iteration, formats
+ * `task` into a prompt via `taskToPrompt`, and merges `sandboxOverrides` into
+ * the `CreateSandboxOptions` it passes to `client.create`. Heterogeneous
+ * fanout supplies multiple `AgentRunSpec`s and the kernel round-robins
+ * through them when the driver plans N tasks.
+ *
+ * @experimental
+ */
+export interface AgentRunSpec<Task> {
+  /** Sandbox SDK profile — what kind of agent runs the task. */
+  profile: AgentProfile
+  /** Task → prompt formatter. Pure and deterministic. */
+  taskToPrompt: (task: Task) => string
+  /**
+   * Per-spec stable name. Surfaced in trace events and the default winner
+   * selector tiebreak. Falls back to `profile.name ?? 'agent'`.
+   */
+  name?: string
+  /**
+   * Optional sandbox-SDK `CreateSandboxOptions` overrides merged on top of
+   * the kernel's defaults. `backend.profile` is set to `profile` by the
+   * kernel and cannot be overridden here — use `profile` itself for that.
+   */
+  sandboxOverrides?: Partial<Omit<CreateSandboxOptions, 'backend'>> & {
+    backend?: Omit<NonNullable<CreateSandboxOptions['backend']>, 'profile'>
+  }
+}
+
+/**
+ * Stream of `SandboxEvent`s → typed `Output`.
+ *
+ * Adapters are pure functions over the already-collected event array; they
+ * do not receive the live AsyncIterable so they can be replayed against
+ * persisted streams during tests / replays.
+ *
+ * @experimental
+ */
+export interface OutputAdapter<Output> {
+  parse(events: SandboxEvent[]): Output
+}
+
+/** @experimental */
+export interface Iteration<Task, Output> {
+  /** 0-based iteration index assigned by the kernel. */
+  index: number
+  task: Task
+  /** Stable name of the `AgentRunSpec` that produced this iteration. */
+  agentRunName: string
+  output?: Output
+  verdict?: DefaultVerdict
+  error?: Error
+  /** Raw sandbox event stream collected for this iteration. */
+  events: SandboxEvent[]
+  startedAt: number
+  endedAt: number
+  costUsd: number
+}
+
+/** @experimental */
+export interface Driver<Task, Output, Decision> {
+  /**
+   * Stable identifier surfaced in trace events. Default `'driver'`.
+   */
+  readonly name?: string
+  /**
+   * Tasks to issue this iteration. `[task]` → refine; N copies → fanout;
+   * `[]` → no more work this round (kernel proceeds to `decide`).
+   */
+  plan(task: Task, history: ReadonlyArray<Iteration<Task, Output>>): Promise<Task[]>
+  /**
+   * Inspect history and return the next state. The kernel terminates the
+   * loop when `decide` returns a value listed in `isTerminalDecision`
+   * (`'stop' | 'pick-winner' | 'fail' | 'done'`), when `maxIterations`
+   * is hit, or when the abort signal fires.
+   */
+  decide(history: ReadonlyArray<Iteration<Task, Output>>): Decision | Promise<Decision>
+}
+
+/** @experimental */
+export interface LoopWinner<Task, Output> {
+  task: Task
+  output: Output
+  verdict?: DefaultVerdict
+  iterationIndex: number
+  agentRunName: string
+}
+
+/** @experimental */
+export interface LoopResult<Task, Output, Decision> {
+  decision: Decision
+  iterations: Iteration<Task, Output>[]
+  winner?: LoopWinner<Task, Output>
+  durationMs: number
+  /** Sum of every iteration's `costUsd`. */
+  costUsd: number
+}
+
+/**
+ * Minimal sandbox client surface the kernel calls. Satisfied structurally by
+ * `new Sandbox({ apiKey, baseUrl })` — declared as a structural type so
+ * tests can pass a stub without instantiating the SDK.
+ *
+ * @experimental
+ */
+export interface LoopSandboxClient {
+  create(options?: CreateSandboxOptions): Promise<SandboxInstance>
+}
+
+/** @experimental */
+export interface LoopTraceEmitter {
+  emit(event: LoopTraceEvent): void | Promise<void>
+}
+
+/** @experimental */
+export type LoopTraceEvent =
+  | { kind: 'loop.started'; runId: string; timestamp: number; payload: LoopStartedPayload }
+  | {
+      kind: 'loop.iteration.started'
+      runId: string
+      timestamp: number
+      payload: LoopIterationStartedPayload
+    }
+  | {
+      kind: 'loop.iteration.ended'
+      runId: string
+      timestamp: number
+      payload: LoopIterationEndedPayload
+    }
+  | { kind: 'loop.decision'; runId: string; timestamp: number; payload: LoopDecisionPayload }
+  | { kind: 'loop.ended'; runId: string; timestamp: number; payload: LoopEndedPayload }
+
+/** @experimental */
+export interface LoopStartedPayload {
+  driver: string
+  agentRunNames: string[]
+  maxIterations: number
+  maxConcurrency: number
+}
+
+/** @experimental */
+export interface LoopIterationStartedPayload {
+  iterationIndex: number
+  agentRunName: string
+  taskHash: string
+}
+
+/** @experimental */
+export interface LoopIterationEndedPayload {
+  iterationIndex: number
+  agentRunName: string
+  outputHash?: string
+  verdict?: DefaultVerdict
+  error?: string
+  costUsd: number
+  durationMs: number
+}
+
+/** @experimental */
+export interface LoopDecisionPayload {
+  decision: string
+  historyLength: number
+}
+
+/** @experimental */
+export interface LoopEndedPayload {
+  winnerIterationIndex?: number
+  totalCostUsd: number
+  durationMs: number
+  iterations: number
+}
+
+/** @experimental */
+export interface ExecCtx {
+  /** Sandbox SDK client — the kernel calls `.create()` per iteration. */
+  sandboxClient: LoopSandboxClient
+  /** Optional trace emitter. When set, the kernel emits `loop.*` events. */
+  traceEmitter?: LoopTraceEmitter
+  /**
+   * Optional production-run handle. When set, every synthesized `llm_call`
+   * the kernel infers from a sandbox event stream is forwarded via
+   * `runHandle.observe` so per-run cost aggregates pick up loop spend.
+   */
+  runHandle?: RuntimeRunHandle
+  /** Cooperative cancellation signal. */
+  signal?: AbortSignal
+}
diff --git a/src/profiles/coder.ts b/src/profiles/coder.ts
new file mode 100644
index 0000000..80149d3
--- /dev/null
+++ b/src/profiles/coder.ts
@@ -0,0 +1,398 @@
+/**
+ * @experimental
+ *
+ * `coderProfile` — opinionated preset for code-modification tasks.
+ *
+ * The agent is told to:
+ *   - work on a fresh branch inside the sandbox workspace
+ *   - keep the patch minimal (under `maxDiffLines`)
+ *   - avoid `forbiddenPaths`
+ *   - run `testCmd` and `typecheckCmd`
+ *   - emit a final JSON result the output adapter parses
+ *
+ * The profile is stateless and agent-agnostic — `harness` selects the
+ * sandbox-SDK backend (`claude-code`, `codex`, `opencode/*`). For
+ * heterogeneous fanout, use `multiHarnessCoderFanout`.
+ */
+
+import type { AgentProfile, SandboxEvent } from '@tangle-network/sandbox'
+import { createFanoutVoteDriver } from '../loops/drivers/fanout-vote'
+import type { AgentRunSpec, DefaultVerdict, Driver, OutputAdapter, Validator } from '../loops/types'
+
+const DEFAULT_MAX_DIFF_LINES = 400
+
+/** @experimental */
+export interface CoderTask {
+  /** What the agent must accomplish. Free-form prose. */
+  goal: string
+  /** Absolute path inside the sandbox where the repo lives. */
+  repoRoot: string
+  /** Default `main`. The branch the agent diffs against. */
+  baseBranch?: string
+  /** Default `pnpm test --run`. */
+  testCmd?: string
+  /** Default `pnpm typecheck`. */
+  typecheckCmd?: string
+  /** Files the agent may inspect for context. Surfaced verbatim in the prompt. */
+  contextFiles?: string[]
+  /**
+   * Paths the agent must not touch. Validator hard-fails on any match.
+   * Use glob-free literal path prefixes for unambiguous enforcement.
+   */
+  forbiddenPaths?: string[]
+  /** Default 400. Hard cap; validator hard-fails when exceeded. */
+  maxDiffLines?: number
+}
+
+/** @experimental */
+export interface CoderOutput {
+  /** Branch the agent wrote the patch on. */
+  branch: string
+  /** Unified diff (`git diff <base>..HEAD`). */
+  patch: string
+  testResult: { passed: boolean; output: string }
+  typecheckResult: { passed: boolean; output: string }
+  diffStats: { filesChanged: number; insertions: number; deletions: number }
+  /** Optional reviewer commentary surfaced by the agent. */
+  reviewerNotes?: string
+}
+
+/** @experimental */
+export interface CoderProfileOptions {
+  /** Sandbox-SDK backend.type. Default `'claude-code'`. */
+  harness?: string
+  /** Default model id passed in `AgentProfile.model.default`. */
+  model?: string
+  /** Custom system prompt replacement. Default = built-in coder preset. */
+  systemPrompt?: string
+  /** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */
+  name?: string
+}
+
+/**
+ * Build a coder preset.
+ *
+ * `validator` enforces test + typecheck + a 400-line default diff cap. For
+ * per-task `forbiddenPaths` / `maxDiffLines` enforcement, pass `task` here
+ * — the returned validator closes over its constraints. Without a task
+ * the validator falls back to the default cap and skips path enforcement.
+ *
+ * @experimental
+ */
+export function coderProfile(options: CoderProfileOptions & { task?: CoderTask } = {}): {
+  profile: AgentProfile
+  taskToPrompt: (task: CoderTask) => string
+  output: OutputAdapter<CoderOutput>
+  validator: Validator<CoderOutput>
+  agentRunSpec: AgentRunSpec<CoderTask>
+} {
+  const harness = options.harness ?? 'claude-code'
+  const name = options.name ?? `coder-${harness}`
+  const systemPrompt = options.systemPrompt ?? DEFAULT_CODER_SYSTEM_PROMPT
+  const profile: AgentProfile = {
+    name,
+    description: 'Code-modification agent. Minimal-diff worktree-based coder.',
+    prompt: { systemPrompt },
+    model: options.model ? { default: options.model } : undefined,
+    tools: { git: true, fs: true, shell: true, test_runner: true },
+    metadata: { backendType: harness, role: 'coder' },
+  }
+  const output: OutputAdapter<CoderOutput> = { parse: parseCoderEvents }
+  const validator: Validator<CoderOutput> = options.task
+    ? createCoderValidator(options.task)
+    : createCoderValidator({
+        goal: '',
+        repoRoot: '',
+        forbiddenPaths: [],
+        maxDiffLines: DEFAULT_MAX_DIFF_LINES,
+      })
+  const agentRunSpec: AgentRunSpec<CoderTask> = {
+    name,
+    profile,
+    taskToPrompt: formatCoderPrompt,
+  }
+  return { profile, taskToPrompt: formatCoderPrompt, output, validator, agentRunSpec }
+}
+
+/** @experimental */
+export interface MultiHarnessCoderFanoutOptions {
+  /**
+   * Sandbox-SDK backend.type identifiers, one per parallel agent. Default:
+   * `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`.
+   */
+  harnesses?: string[]
+  /** Optional per-harness model override. Indexed parallel to `harnesses`. */
+  models?: (string | undefined)[]
+}
+
+/** @experimental */
+export function multiHarnessCoderFanout(options: MultiHarnessCoderFanoutOptions = {}): {
+  agentRuns: AgentRunSpec<CoderTask>[]
+  output: OutputAdapter<CoderOutput>
+  validator: Validator<CoderOutput>
+  driver: Driver<CoderTask, CoderOutput, 'pick-winner' | 'fail'>
+} {
+  const harnesses =
+    options.harnesses && options.harnesses.length > 0
+      ? options.harnesses
+      : ['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']
+  const models = options.models ?? []
+  const agentRuns = harnesses.map((harness, i) => {
+    const { agentRunSpec } = coderProfile({ harness, model: models[i] })
+    return agentRunSpec
+  })
+  const { output, validator } = coderProfile()
+  const driver = createFanoutVoteDriver<CoderTask, CoderOutput>({ n: harnesses.length })
+  return { agentRuns, output, validator, driver }
+}
+
+const DEFAULT_CODER_SYSTEM_PROMPT = [
+  'You are a coder agent operating inside an isolated sandbox workspace.',
+  'Your job is to deliver a minimal, correct patch for the user-supplied goal.',
+  '',
+  'Hard rules:',
+  '  1. Work on a fresh branch off the supplied base. Do not mutate the base branch.',
+  '  2. Never touch a forbidden path. The user will list them explicitly.',
+  '  3. Keep the diff under the max-diff cap. Prefer the smallest change that ships.',
+  '  4. Run the supplied test and typecheck commands before declaring done.',
+  '  5. If either command fails, fix the cause — do not weaken the test or hide the error.',
+  '',
+  'When you finish, emit a single final structured message of the shape:',
+  '  ```json',
+  '  { "branch": "<branch-name>",',
+  '    "patch": "<unified-diff>",',
+  '    "testResult": { "passed": <bool>, "output": "<stdout/stderr>" },',
+  '    "typecheckResult": { "passed": <bool>, "output": "<stdout/stderr>" },',
+  '    "diffStats": { "filesChanged": <int>, "insertions": <int>, "deletions": <int> },',
+  '    "reviewerNotes": "<optional commentary>" }',
+  '  ```',
+].join('\n')
+
+function formatCoderPrompt(task: CoderTask): string {
+  const base = task.baseBranch ?? 'main'
+  const testCmd = task.testCmd ?? 'pnpm test --run'
+  const typecheckCmd = task.typecheckCmd ?? 'pnpm typecheck'
+  const maxDiff = task.maxDiffLines ?? DEFAULT_MAX_DIFF_LINES
+  const forbidden = task.forbiddenPaths?.length ? task.forbiddenPaths.join(', ') : '(none)'
+  const context = task.contextFiles?.length
+    ? task.contextFiles.map((f) => `  - ${f}`).join('\n')
+    : '  (none)'
+  return [
+    `Goal: ${task.goal}`,
+    `Repo: ${task.repoRoot}`,
+    `Base branch: ${base}`,
+    `Run tests with: ${testCmd}`,
+    `Run typecheck with: ${typecheckCmd}`,
+    `Forbidden paths: ${forbidden}`,
+    `Max diff lines: ${maxDiff}`,
+    'Context files:',
+    context,
+    '',
+    'Produce a minimal patch on a fresh branch. Run tests and typecheck before',
+    'returning. Emit the final JSON result block exactly as instructed.',
+  ].join('\n')
+}
+
+/**
+ * Walk the event stream and return the last structured `coder.result` payload.
+ *
+ * The agent is instructed to emit a JSON block; in practice the sandbox SDK
+ * lifts the structured payload onto `data.result` of a `result` / `final`
+ * event. When the event stream does not contain a structured result, the
+ * adapter scans text deltas for a fenced JSON block matching the expected
+ * keys. Both shapes converge on `CoderOutput`.
+ */
+function parseCoderEvents(events: SandboxEvent[]): CoderOutput {
+  for (let i = events.length - 1; i >= 0; i -= 1) {
+    const event = events[i]
+    if (!event) continue
+    const type = String(event.type ?? '')
+    const data = isRecord(event.data) ? event.data : {}
+    if (type === 'result' || type === 'final' || type === 'coder.result') {
+      const direct = coerceCoderOutput(data.result ?? data.output ?? data)
+      if (direct) return direct
+    }
+  }
+  // Fallback: scan text deltas in reverse for a fenced JSON block.
+  for (let i = events.length - 1; i >= 0; i -= 1) {
+    const event = events[i]
+    if (!event) continue
+    const data = isRecord(event.data) ? event.data : {}
+    const text = pickString(data.text) ?? pickString(data.delta)
+    if (!text) continue
+    const fenced = extractFencedJson(text)
+    if (!fenced) continue
+    const coerced = coerceCoderOutput(fenced)
+    if (coerced) return coerced
+  }
+  return {
+    branch: '',
+    patch: '',
+    testResult: { passed: false, output: '' },
+    typecheckResult: { passed: false, output: '' },
+    diffStats: { filesChanged: 0, insertions: 0, deletions: 0 },
+  }
+}
+
+/**
+ * Build a validator that closes over a specific `CoderTask`'s constraints.
+ *
+ * Checks in order:
+ *   1. Forbidden-path: any `+++` / `---` header in the patch matching a
+ *      path prefix in `task.forbiddenPaths` fails hard.
+ *   2. Diff size: line count above `task.maxDiffLines` (default 400) fails
+ *      hard; below cap, the score shrinks linearly.
+ *   3. Tests: `output.testResult.passed` must be `true`.
+ *   4. Typecheck: `output.typecheckResult.passed` must be `true`.
+ *
+ * Aggregate score: `0.5 * tests + 0.3 * typecheck + 0.2 * (1 - diffLines/maxDiff)`.
+ * `valid` is the conjunction of all four.
+ *
+ * @experimental
+ */
+export function createCoderValidator(task: CoderTask): Validator<CoderOutput> {
+  const maxDiff = task.maxDiffLines ?? DEFAULT_MAX_DIFF_LINES
+  const forbidden = task.forbiddenPaths ?? []
+  return {
+    async validate(output) {
+      const scores: Record<string, number> = {}
+      const notes: string[] = []
+      let pass = true
+
+      const touched = touchedPathsFromPatch(output.patch)
+      const touchedForbidden = forbidden.filter((path) => {
+        const prefix = path.endsWith('/') ? path : `${path}/`
+        const exact = prefix.slice(0, -1)
+        return touched.some((p) => p === exact || p.startsWith(prefix))
+      })
+      if (touchedForbidden.length > 0) {
+        pass = false
+        scores.forbiddenPath = 0
+        notes.push(`touched forbidden paths: ${touchedForbidden.join(', ')}`)
+      } else {
+        scores.forbiddenPath = 1
+      }
+
+      const diffLines = countDiffLines(output.patch)
+      if (diffLines > maxDiff) {
+        pass = false
+        scores.diffSize = 0
+        notes.push(`diff ${diffLines} lines exceeds cap ${maxDiff}`)
+      } else {
+        scores.diffSize = maxDiff === 0 ? 0 : Math.max(0, 1 - diffLines / maxDiff)
+      }
+
+      scores.tests = output.testResult.passed ? 1 : 0
+      scores.typecheck = output.typecheckResult.passed ? 1 : 0
+      if (!output.testResult.passed) {
+        pass = false
+        notes.push('tests failed')
+      }
+      if (!output.typecheckResult.passed) {
+        pass = false
+        notes.push('typecheck failed')
+      }
+
+      const score = 0.5 * scores.tests + 0.3 * scores.typecheck + 0.2 * scores.diffSize
+      const verdict: DefaultVerdict = {
+        valid: pass,
+        score: Number.isFinite(score) ? score : 0,
+        scores,
+      }
+      if (notes.length > 0) verdict.notes = notes.join('; ')
+      return verdict
+    },
+  }
+}
+
+function touchedPathsFromPatch(patch: string): string[] {
+  const out = new Set<string>()
+  for (const line of patch.split(/\r?\n/)) {
+    if (line.startsWith('+++ ') || line.startsWith('--- ')) {
+      const rest = line.slice(4).trim()
+      if (rest === '/dev/null') continue
+      const stripped = rest.startsWith('a/') || rest.startsWith('b/') ? rest.slice(2) : rest
+      out.add(stripped)
+    }
+  }
+  return [...out]
+}
+
+function countDiffLines(patch: string): number {
+  let count = 0
+  for (const line of patch.split(/\r?\n/)) {
+    if (
+      (line.startsWith('+') || line.startsWith('-')) &&
+      !line.startsWith('+++') &&
+      !line.startsWith('---')
+    ) {
+      count += 1
+    }
+  }
+  return count
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return value !== null && typeof value === 'object' && !Array.isArray(value)
+}
+
+function pickString(value: unknown): string | undefined {
+  return typeof value === 'string' && value.length > 0 ? value : undefined
+}
+
+function extractFencedJson(text: string): unknown | undefined {
+  const match = text.match(/```(?:json)?\s*([\s\S]*?)```/i)
+  if (!match) return undefined
+  const body = (match[1] ?? '').trim()
+  if (!body) return undefined
+  try {
+    return JSON.parse(body)
+  } catch {
+    return undefined
+  }
+}
+
+function coerceCoderOutput(value: unknown): CoderOutput | undefined {
+  if (!isRecord(value)) return undefined
+  const branch = pickString(value.branch)
+  const patch = pickString(value.patch) ?? ''
+  if (branch === undefined) return undefined
+  const testResult = coerceCmdResult(value.testResult)
+  const typecheckResult = coerceCmdResult(value.typecheckResult)
+  const diffStats = coerceDiffStats(value.diffStats)
+  return {
+    branch,
+    patch,
+    testResult,
+    typecheckResult,
+    diffStats,
+    reviewerNotes: pickString(value.reviewerNotes),
+  }
+}
+
+function coerceCmdResult(value: unknown): { passed: boolean; output: string } {
+  if (!isRecord(value)) return { passed: false, output: '' }
+  return {
+    passed: value.passed === true,
+    output: pickString(value.output) ?? '',
+  }
+}
+
+function coerceDiffStats(value: unknown): {
+  filesChanged: number
+  insertions: number
+  deletions: number
+} {
+  if (!isRecord(value)) return { filesChanged: 0, insertions: 0, deletions: 0 }
+  return {
+    filesChanged: toFiniteInt(value.filesChanged),
+    insertions: toFiniteInt(value.insertions),
+    deletions: toFiniteInt(value.deletions),
+  }
+}
+
+function toFiniteInt(value: unknown): number {
+  if (typeof value !== 'number') return 0
+  if (!Number.isFinite(value)) return 0
+  return Math.max(0, Math.trunc(value))
+}
diff --git a/src/profiles/index.ts b/src/profiles/index.ts
new file mode 100644
index 0000000..37a5b06
--- /dev/null
+++ b/src/profiles/index.ts
@@ -0,0 +1,16 @@
+/**
+ * @experimental
+ *
+ * Pre-built `AgentRunSpec` + output adapter + validator bundles for common
+ * agent roles. Each preset bundles a sandbox-SDK `AgentProfile`, a
+ * task-to-prompt formatter, an output adapter, and a per-task validator
+ * constructor — all of the pieces `runLoop` needs to drive a topology.
+ */
+
+export type {
+  CoderOutput,
+  CoderProfileOptions,
+  CoderTask,
+  MultiHarnessCoderFanoutOptions,
+} from './coder'
+export { coderProfile, createCoderValidator, multiHarnessCoderFanout } from './coder'
diff --git a/tests/loops/composition.test.ts b/tests/loops/composition.test.ts
new file mode 100644
index 0000000..3a97ea2
--- /dev/null
+++ b/tests/loops/composition.test.ts
@@ -0,0 +1,179 @@
+import type { AgentProfile, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'
+import { describe, expect, it } from 'vitest'
+import {
+  type AgentRunSpec,
+  createFanoutVoteDriver,
+  createRefineDriver,
+  type Driver,
+  type OutputAdapter,
+  runLoop,
+  type Validator,
+} from '../../src/loops'
+
+interface Task {
+  goal: string
+}
+
+interface Inner {
+  attempt: number
+}
+
+interface Outer {
+  best: number
+}
+
+const profile: AgentProfile = { name: 'compose-stub' }
+
+const innerOutput: OutputAdapter<Inner> = {
+  parse(events) {
+    const last = events.at(-1)
+    const data = last?.data as { attempt?: number } | undefined
+    return { attempt: typeof data?.attempt === 'number' ? data.attempt : 0 }
+  },
+}
+
+const innerValidator: Validator<Inner> = {
+  async validate(out) {
+    return { valid: out.attempt >= 2, score: out.attempt / 3 }
+  },
+}
+
+const outerValidator: Validator<Outer> = {
+  async validate(out) {
+    return { valid: out.best >= 2, score: out.best }
+  },
+}
+
+const innerSpec: AgentRunSpec<Task> = {
+  profile,
+  name: 'inner',
+  taskToPrompt: (t) => t.goal,
+}
+
+function counterClient() {
+  let i = 0
+  return {
+    async create() {
+      const attempt = ++i
+      return {
+        async *streamPrompt() {
+          yield { type: 'result', data: { attempt } } satisfies SandboxEvent
+        },
+      } as unknown as SandboxInstance
+    },
+  }
+}
+
+describe('runLoop composition — a Driver that nests runLoop inside plan()', () => {
+  it('wraps an inner refine loop and the outer driver gates on the inner winner', async () => {
+    const innerClient = counterClient()
+
+    // Outer driver: each iteration's plan kicks off a full inner refine loop
+    // and yields a *single* outer task whose output adapter just stamps the
+    // inner best score. The outer task never reaches the sandbox client
+    // because we hand the kernel a no-op spec that immediately yields a
+    // synthetic result mirroring the inner winner.
+    let innerBest = 0
+    const outerDriver: Driver<Task, Outer, 'stop' | 'continue'> = {
+      name: 'outer',
+      async plan(task, history) {
+        if (history.length >= 2) return []
+        const innerResult = await runLoop({
+          driver: createRefineDriver<Task, Inner>(),
+          agentRun: innerSpec,
+          output: innerOutput,
+          validator: innerValidator,
+          task,
+          ctx: { sandboxClient: innerClient },
+        })
+        innerBest = innerResult.winner?.verdict?.score ?? 0
+        return [task]
+      },
+      decide(history) {
+        const last = history.at(-1)
+        if (last?.verdict?.valid) return 'stop'
+        if (history.length >= 2) return 'stop'
+        return 'continue'
+      },
+    }
+
+    let outerCalls = 0
+    const outerClient = {
+      async create() {
+        outerCalls += 1
+        return {
+          async *streamPrompt() {
+            yield { type: 'result', data: { best: innerBest } } satisfies SandboxEvent
+          },
+        } as unknown as SandboxInstance
+      },
+    }
+
+    const outerOutput: OutputAdapter<Outer> = {
+      parse(events) {
+        const last = events.at(-1)
+        const data = last?.data as { best?: number } | undefined
+        return { best: typeof data?.best === 'number' ? data.best : 0 }
+      },
+    }
+
+    const result = await runLoop({
+      driver: outerDriver,
+      agentRun: {
+        profile,
+        name: 'outer-agent',
+        taskToPrompt: (t) => `outer:${t.goal}`,
+      },
+      output: outerOutput,
+      validator: outerValidator,
+      task: { goal: 'compose' },
+      ctx: { sandboxClient: outerClient },
+    })
+
+    expect(outerCalls).toBeGreaterThan(0)
+    // The inner refine produces attempt=2 on the second iteration (score=2/3),
+    // which fails outerValidator's `best >= 2` check, so the loop exhausts
+    // the outer cap and stops without winning — but the structure shows the
+    // nesting works end-to-end.
+    expect(result.iterations.length).toBeGreaterThan(0)
+    expect(result.decision).toBe('stop')
+  })
+
+  it('static type check: a driver may compose multiple runLoops sequentially', () => {
+    // Compile-time proof that nested runLoop calls return well-typed results.
+    // The body is intentionally unreachable; the assertion is the type
+    // signature itself.
+    async function _typecheckOnly() {
+      const r1 = await runLoop({
+        driver: createRefineDriver<Task, Inner>(),
+        agentRun: innerSpec,
+        output: innerOutput,
+        validator: innerValidator,
+        task: { goal: '' },
+        ctx: {
+          sandboxClient: {
+            async create() {
+              throw new Error()
+            },
+          },
+        },
+      })
+      const r2 = await runLoop({
+        driver: createFanoutVoteDriver<Task, Inner>({ n: 2 }),
+        agentRun: innerSpec,
+        output: innerOutput,
+        validator: innerValidator,
+        task: { goal: '' },
+        ctx: {
+          sandboxClient: {
+            async create() {
+              throw new Error()
+            },
+          },
+        },
+      })
+      return { r1, r2 }
+    }
+    expect(typeof _typecheckOnly).toBe('function')
+  })
+})
diff --git a/tests/loops/fanout-vote.test.ts b/tests/loops/fanout-vote.test.ts
new file mode 100644
index 0000000..288bb99
--- /dev/null
+++ b/tests/loops/fanout-vote.test.ts
@@ -0,0 +1,281 @@
+import type {
+  AgentProfile,
+  CreateSandboxOptions,
+  SandboxEvent,
+  SandboxInstance,
+} from '@tangle-network/sandbox'
+import { describe, expect, it } from 'vitest'
+import {
+  type AgentRunSpec,
+  createFanoutVoteDriver,
+  type OutputAdapter,
+  runLoop,
+  scoreFanoutVoteIterations,
+  type Validator,
+} from '../../src/loops'
+
+interface FanTask {
+  goal: string
+}
+
+interface FanOutput {
+  variantId: string
+  score: number
+}
+
+const output: OutputAdapter<FanOutput> = {
+  parse(events) {
+    const last = events.at(-1)
+    const data = last?.data as { variantId?: string; score?: number } | undefined
+    return {
+      variantId: data?.variantId ?? '',
+      score: typeof data?.score === 'number' ? data.score : 0,
+    }
+  },
+}
+
+const validator: Validator<FanOutput> = {
+  async validate(out) {
+    return { valid: out.score > 0.5, score: out.score }
+  },
+}
+
+function profile(name: string): AgentProfile {
+  return { name }
+}
+
+function specs(names: string[]): AgentRunSpec<FanTask>[] {
+  return names.map((name) => ({
+    profile: profile(name),
+    name,
+    taskToPrompt: (t) => t.goal,
+  }))
+}
+
+function deterministicClient(outputs: Array<{ variantId: string; score: number }>): {
+  client: { create(opts?: CreateSandboxOptions): Promise<SandboxInstance> }
+  observed: { creates: number; concurrentMax: number }
+} {
+  const state = { creates: 0, concurrentMax: 0, inflight: 0 }
+  const pending: Array<() => void> = []
+  return {
+    observed: state as { creates: number; concurrentMax: number },
+    client: {
+      async create() {
+        const i = state.creates
+        state.creates += 1
+        const variant = outputs[i] ?? { variantId: `unknown-${i}`, score: 0 }
+        const release = new Promise<void>((resolve) => pending.push(resolve))
+        const box = {
+          async *streamPrompt() {
+            state.inflight += 1
+            state.concurrentMax = Math.max(state.concurrentMax, state.inflight)
+            // Yield to the scheduler so all sandboxes start in parallel.
+            await new Promise((r) => setTimeout(r, 0))
+            // Release the next pending sandbox so concurrency can climb.
+            const next = pending.shift()
+            if (next) next()
+            await release
+            state.inflight -= 1
+            yield {
+              type: 'result',
+              data: { variantId: variant.variantId, score: variant.score },
+            } satisfies SandboxEvent
+          },
+        } as unknown as SandboxInstance
+        // First sandbox releases the pump.
+        if (i === 0) {
+          setTimeout(() => {
+            const next = pending.shift()
+            if (next) next()
+          }, 0)
+        }
+        return box
+      },
+    },
+  }
+}
+
+describe('runLoop + createFanoutVoteDriver', () => {
+  it('spawns N parallel attempts and selects the highest-scoring valid winner', async () => {
+    const outputs = [
+      { variantId: 'a', score: 0.3 },
+      { variantId: 'b', score: 0.9 },
+      { variantId: 'c', score: 0.7 },
+    ]
+    let createdCount = 0
+    const client = {
+      async create() {
+        const i = createdCount
+        createdCount += 1
+        const variant = outputs[i]!
+        return {
+          async *streamPrompt() {
+            yield {
+              type: 'result',
+              data: { variantId: variant.variantId, score: variant.score },
+            } satisfies SandboxEvent
+          },
+        } as unknown as SandboxInstance
+      },
+    }
+
+    const result = await runLoop({
+      driver: createFanoutVoteDriver<FanTask, FanOutput>({ n: 3 }),
+      agentRun: {
+        profile: profile('uniform'),
+        name: 'uniform',
+        taskToPrompt: (t) => t.goal,
+      },
+      output,
+      validator,
+      task: { goal: 'fanout' },
+      ctx: { sandboxClient: client },
+    })
+
+    expect(result.iterations).toHaveLength(3)
+    expect(result.decision).toBe('pick-winner')
+    expect(result.winner?.output.variantId).toBe('b')
+    expect(result.winner?.verdict?.score).toBeCloseTo(0.9, 6)
+  })
+
+  it('resolves to fail when no iteration produces a valid output', async () => {
+    let createdCount = 0
+    const client = {
+      async create() {
+        const i = createdCount
+        createdCount += 1
+        return {
+          async *streamPrompt() {
+            yield {
+              type: 'result',
+              data: { variantId: `v${i}`, score: 0.1 },
+            } satisfies SandboxEvent
+          },
+        } as unknown as SandboxInstance
+      },
+    }
+
+    const result = await runLoop({
+      driver: createFanoutVoteDriver<FanTask, FanOutput>({ n: 2 }),
+      agentRun: {
+        profile: profile('weak'),
+        name: 'weak',
+        taskToPrompt: (t) => t.goal,
+      },
+      output,
+      validator,
+      task: { goal: 'fail-fanout' },
+      ctx: { sandboxClient: client },
+    })
+
+    expect(result.decision).toBe('fail')
+    expect(result.iterations).toHaveLength(2)
+  })
+
+  it('respects maxConcurrency cap on parallel fanout', async () => {
+    const { client, observed } = deterministicClient(
+      Array.from({ length: 4 }, (_, i) => ({ variantId: `v${i}`, score: 0.6 })),
+    )
+    await runLoop({
+      driver: createFanoutVoteDriver<FanTask, FanOutput>({ n: 4 }),
+      agentRun: {
+        profile: profile('uniform'),
+        name: 'uniform',
+        taskToPrompt: (t) => t.goal,
+      },
+      output,
+      validator,
+      task: { goal: 'cap' },
+      ctx: { sandboxClient: client },
+      maxConcurrency: 2,
+    })
+
+    expect(observed.creates).toBe(4)
+    expect(observed.concurrentMax).toBeLessThanOrEqual(2)
+  })
+
+  it('rotates through heterogeneous agentRuns for diversity', async () => {
+    const used: string[] = []
+    let createdCount = 0
+    const client = {
+      async create(opts?: CreateSandboxOptions) {
+        const name =
+          (opts?.backend?.profile && typeof opts.backend.profile === 'object'
+            ? opts.backend.profile.name
+            : undefined) ?? 'unknown'
+        used.push(name)
+        const i = createdCount
+        createdCount += 1
+        return {
+          async *streamPrompt() {
+            yield {
+              type: 'result',
+              data: { variantId: `${name}-${i}`, score: 0.9 },
+            } satisfies SandboxEvent
+          },
+        } as unknown as SandboxInstance
+      },
+    }
+
+    const result = await runLoop({
+      driver: createFanoutVoteDriver<FanTask, FanOutput>({ n: 3 }),
+      agentRuns: specs(['alpha', 'beta', 'gamma']),
+      output,
+      validator,
+      task: { goal: 'diversity' },
+      ctx: { sandboxClient: client },
+    })
+
+    expect(used).toEqual(['alpha', 'beta', 'gamma'])
+    expect(result.iterations.map((i) => i.agentRunName)).toEqual(['alpha', 'beta', 'gamma'])
+  })
+
+  it('scoreFanoutVoteIterations surfaces the per-iteration view', () => {
+    const scored = scoreFanoutVoteIterations<FanTask, FanOutput>([
+      {
+        index: 0,
+        task: { goal: '' },
+        agentRunName: 'a',
+        events: [],
+        startedAt: 0,
+        endedAt: 0,
+        costUsd: 0,
+        output: { variantId: 'x', score: 0.5 },
+        verdict: { valid: true, score: 0.5 },
+      },
+      {
+        index: 1,
+        task: { goal: '' },
+        agentRunName: 'b',
+        events: [],
+        startedAt: 0,
+        endedAt: 0,
+        costUsd: 0,
+        error: new Error('boom'),
+      },
+    ])
+    expect(scored).toHaveLength(1)
+    expect(scored[0]?.iterationIndex).toBe(0)
+  })
+
+  it('rejects mismatched options (agentRun + agentRuns)', async () => {
+    await expect(
+      runLoop({
+        driver: createFanoutVoteDriver<FanTask, FanOutput>({ n: 1 }),
+        agentRun: specs(['a'])[0],
+        agentRuns: specs(['a', 'b']),
+        output,
+        validator,
+        task: { goal: 'bad' },
+        ctx: {
+          sandboxClient: {
+            async create() {
+              throw new Error('unreachable')
+            },
+          },
+        },
+      }),
+    ).rejects.toThrow(/exactly one of/i)
+  })
+})
diff --git a/tests/loops/refine.test.ts b/tests/loops/refine.test.ts
new file mode 100644
index 0000000..50121de
--- /dev/null
+++ b/tests/loops/refine.test.ts
@@ -0,0 +1,283 @@
+import type {
+  AgentProfile,
+  CreateSandboxOptions,
+  SandboxEvent,
+  SandboxInstance,
+} from '@tangle-network/sandbox'
+import { describe, expect, it } from 'vitest'
+import {
+  type AgentRunSpec,
+  createRefineDriver,
+  type LoopTraceEvent,
+  type OutputAdapter,
+  refineWinnerIndex,
+  runLoop,
+  type Validator,
+} from '../../src/loops'
+
+interface RefineTask {
+  goal: string
+}
+
+interface RefineOutput {
+  attempt: number
+}
+
+const profile: AgentProfile = { name: 'stub' }
+
+function spec(): AgentRunSpec<RefineTask> {
+  return {
+    profile,
+    name: 'refiner',
+    taskToPrompt: (task) => task.goal,
+  }
+}
+
+function stubClient(eventsPerCall: SandboxEvent[][]): {
+  client: { create(opts?: CreateSandboxOptions): Promise<SandboxInstance> }
+  creates: number
+  prompts: string[]
+} {
+  const state = { creates: 0, prompts: [] as string[] }
+  let callIndex = 0
+  return {
+    creates: state.creates,
+    prompts: state.prompts,
+    client: {
+      async create() {
+        state.creates += 1
+        const events = eventsPerCall[callIndex] ?? []
+        callIndex += 1
+        const box = {
+          async *streamPrompt(message: string) {
+            state.prompts.push(message)
+            for (const e of events) yield e
+          },
+        } as unknown as SandboxInstance
+        return box
+      },
+    },
+  }
+}
+
+const output: OutputAdapter<RefineOutput> = {
+  parse: (events) => {
+    const last = events.at(-1)
+    const data = last?.data as { attempt?: number } | undefined
+    return { attempt: typeof data?.attempt === 'number' ? data.attempt : -1 }
+  },
+}
+
+const passOnSecond: Validator<RefineOutput> = {
+  async validate(out) {
+    if (out.attempt >= 2) return { valid: true, score: 1, scores: { attempt: 1 } }
+    return { valid: false, score: 0, scores: { attempt: 0 }, notes: 'try again' }
+  },
+}
+
+describe('runLoop + createRefineDriver', () => {
+  it('iterates until the validator returns valid=true', async () => {
+    const stub = stubClient([
+      [{ type: 'result', data: { attempt: 1 } }],
+      [{ type: 'result', data: { attempt: 2 } }],
+    ])
+    const result = await runLoop({
+      driver: createRefineDriver<RefineTask, RefineOutput>(),
+      agentRun: spec(),
+      output,
+      validator: passOnSecond,
+      task: { goal: 'fix it' },
+      ctx: { sandboxClient: stub.client },
+    })
+
+    expect(result.decision).toBe('stop')
+    expect(result.iterations).toHaveLength(2)
+    expect(result.iterations[0]?.verdict?.valid).toBe(false)
+    expect(result.iterations[1]?.verdict?.valid).toBe(true)
+    expect(result.winner?.iterationIndex).toBe(1)
+    expect(result.winner?.output).toEqual({ attempt: 2 })
+    expect(stub.client).toBeDefined()
+  })
+
+  it('respects the driver-local maxIterations cap and reports stop', async () => {
+    const events: SandboxEvent[][] = Array.from({ length: 6 }, (_, i) => [
+      { type: 'result', data: { attempt: i } },
+    ])
+    const failing: Validator<RefineOutput> = {
+      async validate() {
+        return { valid: false, score: 0, scores: { attempt: 0 } }
+      },
+    }
+    const stub = stubClient(events)
+    const result = await runLoop({
+      driver: createRefineDriver<RefineTask, RefineOutput>({ maxIterations: 3 }),
+      agentRun: spec(),
+      output,
+      validator: failing,
+      task: { goal: 'never passes' },
+      ctx: { sandboxClient: stub.client },
+    })
+
+    expect(result.iterations).toHaveLength(3)
+    expect(result.decision).toBe('stop')
+    expect(result.winner?.iterationIndex).toBeDefined()
+  })
+
+  it('respects the kernel maxIterations cap and re-asks the driver for a final decision', async () => {
+    const events: SandboxEvent[][] = Array.from({ length: 4 }, () => [
+      { type: 'result', data: { attempt: 0 } },
+    ])
+    const failing: Validator<RefineOutput> = {
+      async validate() {
+        return { valid: false, score: 0, scores: { attempt: 0 } }
+      },
+    }
+    const stub = stubClient(events)
+    const result = await runLoop({
+      driver: createRefineDriver<RefineTask, RefineOutput>({ maxIterations: 10 }),
+      agentRun: spec(),
+      output,
+      validator: failing,
+      task: { goal: 'never passes' },
+      ctx: { sandboxClient: stub.client },
+      maxIterations: 2,
+    })
+    expect(result.iterations).toHaveLength(2)
+  })
+
+  it('emits trace events in canonical order', async () => {
+    const events: LoopTraceEvent[] = []
+    const stub = stubClient([
+      [{ type: 'result', data: { attempt: 1 } }],
+      [{ type: 'result', data: { attempt: 2 } }],
+    ])
+    await runLoop({
+      driver: createRefineDriver<RefineTask, RefineOutput>(),
+      agentRun: spec(),
+      output,
+      validator: passOnSecond,
+      task: { goal: 'trace order' },
+      ctx: {
+        sandboxClient: stub.client,
+        traceEmitter: { emit: (e) => void events.push(e) },
+      },
+      runId: 'fixed-run-id',
+    })
+
+    const kinds = events.map((e) => e.kind)
+    expect(kinds[0]).toBe('loop.started')
+    expect(kinds[kinds.length - 1]).toBe('loop.ended')
+    // Each iteration emits a started + ended; two iterations = two pairs.
+    const startedCount = kinds.filter((k) => k === 'loop.iteration.started').length
+    const endedCount = kinds.filter((k) => k === 'loop.iteration.ended').length
+    expect(startedCount).toBe(2)
+    expect(endedCount).toBe(2)
+    // Every event references the same runId.
+    expect(events.every((e) => e.runId === 'fixed-run-id')).toBe(true)
+    // Decision event follows each iteration.
+    const decisionCount = kinds.filter((k) => k === 'loop.decision').length
+    expect(decisionCount).toBeGreaterThanOrEqual(2)
+  })
+
+  it('captures per-iteration errors without aborting the whole loop', async () => {
+    const stub = {
+      creates: 0,
+      client: {
+        async create() {
+          stub.creates += 1
+          if (stub.creates === 1) {
+            return {
+              streamPrompt(): AsyncIterable<SandboxEvent> {
+                return {
+                  [Symbol.asyncIterator]: () => ({
+                    next: () => Promise.reject(new Error('sandbox blew up')),
+                  }),
+                }
+              },
+            } as unknown as SandboxInstance
+          }
+          return {
+            async *streamPrompt() {
+              yield { type: 'result', data: { attempt: 2 } } satisfies SandboxEvent
+            },
+          } as unknown as SandboxInstance
+        },
+      },
+    }
+
+    const result = await runLoop({
+      driver: createRefineDriver<RefineTask, RefineOutput>(),
+      agentRun: spec(),
+      output,
+      validator: passOnSecond,
+      task: { goal: 'survive errors' },
+      ctx: { sandboxClient: stub.client },
+    })
+
+    expect(result.iterations[0]?.error?.message).toContain('sandbox blew up')
+    expect(result.iterations[0]?.output).toBeUndefined()
+    expect(result.iterations[1]?.verdict?.valid).toBe(true)
+    expect(result.decision).toBe('stop')
+  })
+
+  it('aggregates costUsd from llm_call events across iterations', async () => {
+    const stub = stubClient([
+      [
+        { type: 'llm_call', data: { tokensIn: 100, tokensOut: 50, costUsd: 0.01, model: 'm' } },
+        { type: 'result', data: { attempt: 1 } },
+      ],
+      [
+        { type: 'llm_call', data: { tokensIn: 80, tokensOut: 30, costUsd: 0.02, model: 'm' } },
+        { type: 'result', data: { attempt: 2 } },
+      ],
+    ])
+    const result = await runLoop({
+      driver: createRefineDriver<RefineTask, RefineOutput>(),
+      agentRun: spec(),
+      output,
+      validator: passOnSecond,
+      task: { goal: 'cost' },
+      ctx: { sandboxClient: stub.client },
+    })
+    expect(result.iterations[0]?.costUsd).toBeCloseTo(0.01, 9)
+    expect(result.iterations[1]?.costUsd).toBeCloseTo(0.02, 9)
+    expect(result.costUsd).toBeCloseTo(0.03, 9)
+  })
+
+  it('refineWinnerIndex returns the last valid iteration', () => {
+    expect(
+      refineWinnerIndex([
+        {
+          index: 0,
+          task: {} as RefineTask,
+          agentRunName: 'refiner',
+          events: [],
+          startedAt: 0,
+          endedAt: 0,
+          costUsd: 0,
+          verdict: { valid: false, score: 0 },
+        },
+        {
+          index: 1,
+          task: {} as RefineTask,
+          agentRunName: 'refiner',
+          events: [],
+          startedAt: 0,
+          endedAt: 0,
+          costUsd: 0,
+          verdict: { valid: true, score: 1 },
+        },
+        {
+          index: 2,
+          task: {} as RefineTask,
+          agentRunName: 'refiner',
+          events: [],
+          startedAt: 0,
+          endedAt: 0,
+          costUsd: 0,
+          verdict: { valid: false, score: 0 },
+        },
+      ]),
+    ).toBe(1)
+  })
+})
diff --git a/tests/profiles/coder.test.ts b/tests/profiles/coder.test.ts
new file mode 100644
index 0000000..4b356c7
--- /dev/null
+++ b/tests/profiles/coder.test.ts
@@ -0,0 +1,186 @@
+import type { SandboxEvent } from '@tangle-network/sandbox'
+import { describe, expect, it } from 'vitest'
+import {
+  type CoderOutput,
+  type CoderTask,
+  coderProfile,
+  createCoderValidator,
+  multiHarnessCoderFanout,
+} from '../../src/profiles'
+
+const ctx = { iteration: 0, signal: new AbortController().signal }
+
+function diff(filesTouched: string[], plusLines: number, minusLines: number): string {
+  const out: string[] = []
+  for (const path of filesTouched) {
+    out.push(`diff --git a/${path} b/${path}`)
+    out.push(`--- a/${path}`)
+    out.push(`+++ b/${path}`)
+    for (let i = 0; i < plusLines; i += 1) out.push(`+line ${i}`)
+    for (let i = 0; i < minusLines; i += 1) out.push(`-line ${i}`)
+  }
+  return out.join('\n')
+}
+
+const baseTask: CoderTask = {
+  goal: 'minor fix',
+  repoRoot: '/repo',
+  forbiddenPaths: ['secrets/', 'dist/'],
+  maxDiffLines: 100,
+}
+
+describe('createCoderValidator — task-bound validator', () => {
+  it('passes when tests + typecheck + diff size + forbidden-path all clean', async () => {
+    const validator = createCoderValidator(baseTask)
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['src/foo.ts'], 10, 5),
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 1, insertions: 10, deletions: 5 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(true)
+    // score = 0.5 + 0.3 + 0.2*(1 - 15/100) = 0.5 + 0.3 + 0.17 = 0.97
+    expect(verdict.score).toBeCloseTo(0.97, 6)
+    expect(verdict.scores?.forbiddenPath).toBe(1)
+    expect(verdict.scores?.diffSize).toBeCloseTo(0.85, 6)
+  })
+
+  it('fails hard when a forbidden path is touched', async () => {
+    const validator = createCoderValidator(baseTask)
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['secrets/keys.ts'], 1, 0),
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 1, insertions: 1, deletions: 0 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(false)
+    expect(verdict.scores?.forbiddenPath).toBe(0)
+    expect(verdict.notes).toMatch(/forbidden/)
+  })
+
+  it('fails hard when diff exceeds maxDiffLines', async () => {
+    const validator = createCoderValidator({ ...baseTask, maxDiffLines: 5 })
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['src/foo.ts'], 10, 0),
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 1, insertions: 10, deletions: 0 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(false)
+    expect(verdict.scores?.diffSize).toBe(0)
+    expect(verdict.notes).toMatch(/exceeds cap 5/)
+  })
+
+  it('fails when tests fail; score still reflects partial credit elsewhere', async () => {
+    const validator = createCoderValidator(baseTask)
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['src/foo.ts'], 4, 1),
+      testResult: { passed: false, output: 'red' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 1, insertions: 4, deletions: 1 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(false)
+    expect(verdict.scores?.tests).toBe(0)
+    expect(verdict.scores?.typecheck).toBe(1)
+    // score = 0 + 0.3 + 0.2*(1 - 5/100) = 0.3 + 0.19 = 0.49
+    expect(verdict.score).toBeCloseTo(0.49, 6)
+  })
+
+  it('fails when typecheck fails', async () => {
+    const validator = createCoderValidator(baseTask)
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['src/foo.ts'], 2, 0),
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: false, output: 'TS2304' },
+      diffStats: { filesChanged: 1, insertions: 2, deletions: 0 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(false)
+    expect(verdict.scores?.typecheck).toBe(0)
+  })
+
+  it('treats subdirectory matches under a forbidden prefix as forbidden', async () => {
+    const validator = createCoderValidator({ ...baseTask, forbiddenPaths: ['vendor'] })
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['vendor/lib/file.ts'], 1, 0),
+      testResult: { passed: true, output: '' },
+      typecheckResult: { passed: true, output: '' },
+      diffStats: { filesChanged: 1, insertions: 1, deletions: 0 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(false)
+    expect(verdict.notes).toMatch(/vendor/)
+  })
+})
+
+describe('coderProfile output adapter', () => {
+  const preset = coderProfile({ task: baseTask })
+
+  it('parses a final result event with embedded coder output', () => {
+    const events: SandboxEvent[] = [
+      { type: 'text_delta', data: { text: 'working...' } },
+      {
+        type: 'result',
+        data: {
+          result: {
+            branch: 'feat/y',
+            patch: diff(['src/foo.ts'], 2, 0),
+            testResult: { passed: true, output: 'ok' },
+            typecheckResult: { passed: true, output: 'ok' },
+            diffStats: { filesChanged: 1, insertions: 2, deletions: 0 },
+            reviewerNotes: 'lgtm',
+          },
+        },
+      },
+    ]
+    const out = preset.output.parse(events)
+    expect(out.branch).toBe('feat/y')
+    expect(out.testResult.passed).toBe(true)
+    expect(out.diffStats.insertions).toBe(2)
+    expect(out.reviewerNotes).toBe('lgtm')
+  })
+
+  it('falls back to parsing a fenced JSON block out of a text delta', () => {
+    const fenced =
+      'Done. Here is the patch summary:\n```json\n' +
+      JSON.stringify({
+        branch: 'feat/z',
+        patch: '',
+        testResult: { passed: false, output: 'fail' },
+        typecheckResult: { passed: true, output: '' },
+        diffStats: { filesChanged: 0, insertions: 0, deletions: 0 },
+      }) +
+      '\n```'
+    const events: SandboxEvent[] = [{ type: 'text_delta', data: { text: fenced } }]
+    const out = preset.output.parse(events)
+    expect(out.branch).toBe('feat/z')
+    expect(out.testResult.passed).toBe(false)
+  })
+
+  it('returns an empty CoderOutput when no structured result is present', () => {
+    const events: SandboxEvent[] = [{ type: 'text_delta', data: { text: 'hello' } }]
+    const out = preset.output.parse(events)
+    expect(out.branch).toBe('')
+    expect(out.testResult.passed).toBe(false)
+    expect(out.diffStats.filesChanged).toBe(0)
+  })
+})
+
+describe('multiHarnessCoderFanout — heterogeneous fanout bundle', () => {
+  it('produces one AgentRunSpec per harness and a fanout driver of matching n', () => {
+    const bundle = multiHarnessCoderFanout({ harnesses: ['claude-code', 'codex'] })
+    expect(bundle.agentRuns).toHaveLength(2)
+    expect(bundle.agentRuns.map((s) => s.name)).toEqual(['coder-claude-code', 'coder-codex'])
+    expect(bundle.agentRuns.every((s) => s.profile.tools?.git === true)).toBe(true)
+  })
+})
diff --git a/tsup.config.ts b/tsup.config.ts
index 1453476..7f29af4 100644
--- a/tsup.config.ts
+++ b/tsup.config.ts
@@ -6,6 +6,8 @@ export default defineConfig({
     platform: 'src/platform/index.ts',
     'analyst-loop': 'src/analyst-loop/index.ts',
     agent: 'src/agent/index.ts',
+    loops: 'src/loops/index.ts',
+    profiles: 'src/profiles/index.ts',
   },
   format: ['esm'],
   dts: true,