diff --git a/docs/api/primitive-catalog.md b/docs/api/primitive-catalog.md
index e957464e..27526a6b 100644
--- a/docs/api/primitive-catalog.md
+++ b/docs/api/primitive-catalog.md
@@ -337,7 +337,7 @@ Import from `@tangle-network/agent-runtime/intelligence` — 60 exports.
 
 ### Recursive atom + loop kernel (alias of ./runtime)
 
-Import from `@tangle-network/agent-runtime/loops` — 379 exports.
+Import from `@tangle-network/agent-runtime/loops` — 381 exports.
 
 | Symbol | Kind | Summary |
 |---|---|---|
@@ -651,6 +651,7 @@ Import from `@tangle-network/agent-runtime/loops` — 379 exports.
 | `SupervisorOpts` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `SupervisorProfile` | interface | The supervisor's profile — the subset of an `AgentProfile` that selects + shapes its brain. |
 | `SurfaceScore` | interface | _(no summary — add a TSDoc line at the declaration)_ |
+| `ToolLoopCompaction` | interface | Self-compaction — bound the loop's OWN context window the way a fresh-respawn (dumb-Ralph) loop |
 | `ToolSpec` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `TraceSource` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `TrajectoryAnalysis` | interface | _(no summary — add a TSDoc line at the declaration)_ |
@@ -712,6 +713,7 @@ Import from `@tangle-network/agent-runtime/loops` — 379 exports.
 | `SteeringDecision` | type | Terminal-or-continue decision shared by all three steering drivers. The |
 | `SupervisedResult` | type | Typed terminal result (M2) — a no-winner is NEVER coerced to a best-effort output. |
 | `ToolLoopChat` | type | One inference turn over the running conversation + the tool specs → the model's text, any |
+| `ToolLoopCompactionOptions` | type | Public supervisor-facing compaction config: same knobs as the primitive, but `distill` is optional |
 | `TrajectoryReportFn` | type | `trajectoryReport(...)` — the tree+cost reconstructor. Async (reads journal + optionally blobs). |
 | `UsageEvent` | type | Normalized usage event — the single channel every executor reports through, so the |
 | `Verify` | type | `verify(spec)` — build the 2-node implement→verifier-gate combinator. |
diff --git a/docs/api/runtime.md b/docs/api/runtime.md
index dcc9fa4b..eac1add0 100644
--- a/docs/api/runtime.md
+++ b/docs/api/runtime.md
@@ -7231,7 +7231,7 @@ What the spawn was supposed to produce — surfaced in traces/reports.
 
 ### DriverAgentOptions
 
-Defined in: [runtime/supervise/coordination-driver.ts:39](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L39)
+Defined in: [runtime/supervise/coordination-driver.ts:45](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L45)
 
 #### Properties
 
@@ -7239,13 +7239,13 @@ Defined in: [runtime/supervise/coordination-driver.ts:39](https://github.com/tan
 
 > `readonly` **name**: `string`
 
-Defined in: [runtime/supervise/coordination-driver.ts:40](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L40)
+Defined in: [runtime/supervise/coordination-driver.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L46)
 
 ##### brain
 
 > `readonly` **brain**: [`ToolLoopChat`](#toolloopchat)
 
-Defined in: [runtime/supervise/coordination-driver.ts:44](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L44)
+Defined in: [runtime/supervise/coordination-driver.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L50)
 
 The driver-LLM seam — ONE inference turn over the conversation + the coordination tool specs
  (the canonical `ToolLoopChat`): a scripted mock offline, the router's tool-calling in
@@ -7255,7 +7255,7 @@ The driver-LLM seam — ONE inference turn over the conversation + the coordinat
 
 > `readonly` **blobs**: [`ResultBlobStore`](#resultblobstore)
 
-Defined in: [runtime/supervise/coordination-driver.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L46)
+Defined in: [runtime/supervise/coordination-driver.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L52)
 
 Shared blob store — `observe_agent` reads settled outputs through it.
 
@@ -7263,7 +7263,7 @@ Shared blob store — `observe_agent` reads settled outputs through it.
 
 > `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](mcp.md#makeworkeragent)
 
-Defined in: [runtime/supervise/coordination-driver.ts:48](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L48)
+Defined in: [runtime/supervise/coordination-driver.ts:54](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L54)
 
 Resolve a spawned `profile` to a worker LEAF or a driver child (the recursion seam).
 
@@ -7271,7 +7271,7 @@ Resolve a spawned `profile` to a worker LEAF or a driver child (the recursion se
 
 > `readonly` **perWorker**: [`Budget`](#budget-10)
 
-Defined in: [runtime/supervise/coordination-driver.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L50)
+Defined in: [runtime/supervise/coordination-driver.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L56)
 
 Per-child budget reserved from the conserved pool on each spawn.
 
@@ -7279,7 +7279,7 @@ Per-child budget reserved from the conserved pool on each spawn.
 
 > `readonly` `optional` **maxLiveWorkers?**: `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L53)
+Defined in: [runtime/supervise/coordination-driver.ts:59](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L59)
 
 Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
  flight (a concurrency fence on top of the conserved-pool fence). Omit/`<= 0` = no cap.
@@ -7288,7 +7288,7 @@ Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this
 
 > `readonly` **systemPrompt**: `string` \| ((`task`) => `string`)
 
-Defined in: [runtime/supervise/coordination-driver.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L56)
+Defined in: [runtime/supervise/coordination-driver.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L62)
 
 The driver's stance — a string, or built from the task (the worker-driver prompt /
  the generator). INJECTED so the prompt is a pluggable, optimizable role.
@@ -7297,7 +7297,7 @@ The driver's stance — a string, or built from the task (the worker-driver prom
 
 > `readonly` `optional` **extraTools?**: readonly `object`[]
 
-Defined in: [runtime/supervise/coordination-driver.ts:61](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L61)
+Defined in: [runtime/supervise/coordination-driver.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L67)
 
 WORK tools the driver may call DIRECTLY (alongside the coordination verbs) — so the driver is
  not a pure manager but a full agent that can ACT (do simple work itself) OR SPAWN (delegate).
@@ -7308,7 +7308,7 @@ WORK tools the driver may call DIRECTLY (alongside the coordination verbs) — s
 
 > `readonly` `optional` **executeExtraTool?**: (`name`, `args`) => `Promise`\<`string` \| `null` \| `undefined`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:68](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L68)
+Defined in: [runtime/supervise/coordination-driver.ts:74](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L74)
 
 Runs an `extraTools` call. Returns a string result, or null/undefined to signal "not handled"
  so the call falls through to the coordination dispatch. Required iff `extraTools` is set.
@@ -7331,7 +7331,7 @@ Runs an `extraTools` call. Returns a string result, or null/undefined to signal
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L76)
+Defined in: [runtime/supervise/coordination-driver.ts:82](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L82)
 
 Max driver turns before the loop force-finalizes on the best settled child. Default 16.
  `0` lifts the turn-COUNT cap: the loop is bounded instead by the conserved budget pool,
@@ -7342,7 +7342,7 @@ Max driver turns before the loop force-finalizes on the best settled child. Defa
 
 > `readonly` `optional` **now?**: () => `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:79](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L79)
+Defined in: [runtime/supervise/coordination-driver.ts:85](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L85)
 
 Injected clock for the in-loop absolute-deadline guard — keeps the deadline check
  deterministic in tests. Defaults to `Date.now`.
@@ -7351,6 +7351,21 @@ Injected clock for the in-loop absolute-deadline guard — keeps the deadline ch
 
 `number`
 
+##### compaction?
+
+> `readonly` `optional` **compaction?**: [`ToolLoopCompactionOptions`](#toolloopcompactionoptions)
+
+Defined in: [runtime/supervise/coordination-driver.ts:94](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L94)
+
+Give the driver brain a chapter-lifecycle on its OWN context window. The LLM-brain front doors
+ lose to a dumb-Ralph respawn because the brain re-bills its whole coordination transcript every
+ turn — the same context overflow a single steered agent suffers, one level up. With this set,
+ once the brain's running conversation exceeds `thresholdTokens` it distills the accumulated
+ history to a compact progress note and continues fresh: the supervisor analog of respawning
+ against external tracking state, except the live `Scope` roster IS the durable state. Default
+ off (no behavior change). `distill` defaults to a self-summary authored by the brain combined
+ with the factual settled-worker roster; override to supply your own.
+
 ***
 
 ### CoordinationMcpHandle
@@ -8306,17 +8321,29 @@ Defined in: [runtime/supervise/supervise.ts:84](https://github.com/tangle-networ
 
 Defined in: [runtime/supervise/supervise.ts:85](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L85)
 
+##### compaction?
+
+> `readonly` `optional` **compaction?**: [`ToolLoopCompactionOptions`](#toolloopcompactionoptions)
+
+Defined in: [runtime/supervise/supervise.ts:91](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L91)
+
+Give the supervisor brain a chapter-lifecycle on its OWN context window (router arm only): once
+ its coordination transcript exceeds `thresholdTokens` it distills to a compact progress note and
+ continues, instead of re-billing the whole transcript every turn (the cost that makes the LLM-brain
+ front door lose to a dumb-Ralph respawn). The live `Scope` roster is the durable state across
+ chapters. Default off. `distill` defaults to a brain self-summary + the settled-worker roster.
+
 ##### runId?
 
 > `readonly` `optional` **runId?**: `string`
 
-Defined in: [runtime/supervise/supervise.ts:86](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L86)
+Defined in: [runtime/supervise/supervise.ts:92](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L92)
 
 ##### now?
 
 > `readonly` `optional` **now?**: () => `number`
 
-Defined in: [runtime/supervise/supervise.ts:87](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L87)
+Defined in: [runtime/supervise/supervise.ts:93](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L93)
 
 ###### Returns
 
@@ -8326,7 +8353,7 @@ Defined in: [runtime/supervise/supervise.ts:87](https://github.com/tangle-networ
 
 > `readonly` `optional` **allowedModels?**: readonly `string`[]
 
-Defined in: [runtime/supervise/supervise.ts:91](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L91)
+Defined in: [runtime/supervise/supervise.ts:97](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L97)
 
 Restrict the run to this subset of models. When set, every configured model — the
  supervisor router model, the profile's model, and the backend's model — must be a member,
@@ -8336,7 +8363,7 @@ Restrict the run to this subset of models. When set, every configured model —
 
 ### SupervisorProfile
 
-Defined in: [runtime/supervise/supervisor-agent.ts:26](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L26)
+Defined in: [runtime/supervise/supervisor-agent.ts:48](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L48)
 
 The supervisor's profile — the subset of an `AgentProfile` that selects + shapes its brain.
  `harness` is the backend-as-data discriminant; `systemPrompt` is the standing instruction.
@@ -8347,13 +8374,13 @@ The supervisor's profile — the subset of an `AgentProfile` that selects + shap
 
 > `readonly` `optional` **name?**: `string`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:27](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L27)
+Defined in: [runtime/supervise/supervisor-agent.ts:49](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L49)
 
 ##### harness?
 
 > `readonly` `optional` **harness?**: `string` \| `null`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:29](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L29)
+Defined in: [runtime/supervise/supervisor-agent.ts:51](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L51)
 
 null/undefined → router brain (in-process tool-loop); a coding-CLI harness → sandboxed brain.
 
@@ -8361,7 +8388,7 @@ null/undefined → router brain (in-process tool-loop); a coding-CLI harness →
 
 > `readonly` `optional` **model?**: `string`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:31](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L31)
+Defined in: [runtime/supervise/supervisor-agent.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L53)
 
 The router model when the brain is router-driven (falls back to the deps router config).
 
@@ -8369,7 +8396,7 @@ The router model when the brain is router-driven (falls back to the deps router
 
 > `readonly` `optional` **systemPrompt?**: `string`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:33](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L33)
+Defined in: [runtime/supervise/supervisor-agent.ts:55](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L55)
 
 The standing instructions ("you delegate, you do not solve").
 
@@ -8377,7 +8404,7 @@ The standing instructions ("you delegate, you do not solve").
 
 ### SupervisorAgentDeps
 
-Defined in: [runtime/supervise/supervisor-agent.ts:47](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L47)
+Defined in: [runtime/supervise/supervisor-agent.ts:69](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L69)
 
 #### Properties
 
@@ -8385,13 +8412,13 @@ Defined in: [runtime/supervise/supervisor-agent.ts:47](https://github.com/tangle
 
 > `readonly` **blobs**: [`ResultBlobStore`](#resultblobstore)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:48](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L48)
+Defined in: [runtime/supervise/supervisor-agent.ts:70](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L70)
 
 ##### makeWorkerAgent
 
 > `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](mcp.md#makeworkeragent)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L50)
+Defined in: [runtime/supervise/supervisor-agent.ts:72](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L72)
 
 Resolve a spawned worker `profile` to a leaf agent — the recursion seam (same for both arms).
 
@@ -8399,7 +8426,7 @@ Resolve a spawned worker `profile` to a leaf agent — the recursion seam (same
 
 > `readonly` **perWorker**: [`Budget`](#budget-10)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L52)
+Defined in: [runtime/supervise/supervisor-agent.ts:74](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L74)
 
 Per-child budget reserved from the conserved pool on each spawn.
 
@@ -8407,7 +8434,7 @@ Per-child budget reserved from the conserved pool on each spawn.
 
 > `readonly` `optional` **maxLiveWorkers?**: `number`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L56)
+Defined in: [runtime/supervise/supervisor-agent.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L78)
 
 Hard cap on simultaneously-LIVE workers across both arms — `spawn_agent` fails closed once
  this many are in flight (a concurrency fence on top of the conserved-pool fence; bounds live
@@ -8417,7 +8444,7 @@ Hard cap on simultaneously-LIVE workers across both arms — `spawn_agent` fails
 
 > `readonly` `optional` **router?**: [`RouterConfig`](#routerconfig)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L58)
+Defined in: [runtime/supervise/supervisor-agent.ts:80](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L80)
 
 Router substrate for a router-brained supervisor (`harness` null). The profile's model wins.
 
@@ -8425,7 +8452,7 @@ Router substrate for a router-brained supervisor (`harness` null). The profile's
 
 > `readonly` `optional` **brain?**: [`ToolLoopChat`](#toolloopchat)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:60](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L60)
+Defined in: [runtime/supervise/supervisor-agent.ts:82](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L82)
 
 Inject the brain directly (tests / advanced) instead of resolving `routerBrain` from the profile.
 
@@ -8433,7 +8460,7 @@ Inject the brain directly (tests / advanced) instead of resolving `routerBrain`
 
 > `readonly` `optional` **driveHarness?**: [`DriveHarness`](#driveharness-1)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L62)
+Defined in: [runtime/supervise/supervisor-agent.ts:84](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L84)
 
 Required for a sandboxed-harness supervisor (`harness` set): runs the harness as the driver.
 
@@ -8441,7 +8468,7 @@ Required for a sandboxed-harness supervisor (`harness` set): runs the harness as
 
 > `readonly` `optional` **extraTools?**: readonly `object`[]
 
-Defined in: [runtime/supervise/supervisor-agent.ts:65](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L65)
+Defined in: [runtime/supervise/supervisor-agent.ts:87](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L87)
 
 WORK tools the supervisor may call DIRECTLY (router arm) — so it can do simple work ITSELF and
  only delegate when it needs parallelism. Pair with `executeExtraTool`.
@@ -8450,7 +8477,7 @@ WORK tools the supervisor may call DIRECTLY (router arm) — so it can do simple
 
 > `readonly` `optional` **executeExtraTool?**: (`name`, `args`) => `Promise`\<`string` \| `null` \| `undefined`\>
 
-Defined in: [runtime/supervise/supervisor-agent.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L71)
+Defined in: [runtime/supervise/supervisor-agent.ts:93](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L93)
 
 Runs an `extraTools` call; null/undefined falls through to the coordination dispatch.
 
@@ -8472,7 +8499,17 @@ Runs an `extraTools` call; null/undefined falls through to the coordination disp
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:75](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L75)
+Defined in: [runtime/supervise/supervisor-agent.ts:97](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L97)
+
+##### compaction?
+
+> `readonly` `optional` **compaction?**: [`ToolLoopCompactionOptions`](#toolloopcompactionoptions)
+
+Defined in: [runtime/supervise/supervisor-agent.ts:101](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L101)
+
+Give the supervisor brain a chapter-lifecycle on its OWN context window (router arm only) — it
+ distills its coordination transcript to a compact progress note once it exceeds the threshold,
+ instead of re-billing the whole thing every turn. See `DriverAgentOptions.compaction`.
 
 ***
 
@@ -9847,6 +9884,106 @@ Defined in: [runtime/supervise/worktree-fanout.ts:68](https://github.com/tangle-
 
 ***
 
+### ToolLoopCompaction
+
+Defined in: [runtime/tool-loop.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/tool-loop.ts#L50)
+
+Self-compaction — bound the loop's OWN context window the way a fresh-respawn (dumb-Ralph) loop
+ does, but in place. A stateless chat API re-sends the WHOLE running conversation every turn, so an
+ agent that accumulates dozens of turns of tool results re-bills its entire transcript on every
+ inference — the context-overflow-one-level-up that the conserved budget pool cannot fix. With
+ compaction set, once the conversation exceeds `thresholdTokens` the accumulated middle (every prior
+ assistant turn + tool result) is distilled into ONE compact progress note and the conversation is
+ reset to `[...head, digest]`: the preserved head (system + the original task) survives, the stale
+ turn-by-turn history does not. The model keeps deciding; it stops re-billing the whole transcript.
+ Fires at a CLEAN turn boundary (after a turn's tool results are folded in, before the next
+ inference) so it never orphans an assistant `tool_calls` from its `tool` replies.
+
+#### Properties
+
+##### thresholdTokens
+
+> `readonly` **thresholdTokens**: `number`
+
+Defined in: [runtime/tool-loop.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/tool-loop.ts#L52)
+
+Compact once the estimated token count of the conversation exceeds this.
+
+##### distill
+
+> `readonly` **distill**: (`messages`) => `string` \| `Promise`\<`string`\>
+
+Defined in: [runtime/tool-loop.ts:55](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/tool-loop.ts#L55)
+
+Distill the conversation into a compact progress note that REPLACES the middle. Receives the
+ full conversation (so it can summarize everything done so far); returns the digest string.
+
+###### Parameters
+
+###### messages
+
+readonly `Msg`[]
+
+###### Returns
+
+`string` \| `Promise`\<`string`\>
+
+##### preserveHead?
+
+> `readonly` `optional` **preserveHead?**: `number`
+
+Defined in: [runtime/tool-loop.ts:57](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/tool-loop.ts#L57)
+
+Leading messages preserved verbatim (system + the original task). Default 2.
+
+##### estimateTokens?
+
+> `readonly` `optional` **estimateTokens?**: (`messages`) => `number`
+
+Defined in: [runtime/tool-loop.ts:59](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/tool-loop.ts#L59)
+
+Token estimator over the conversation. Default ≈ chars/4 (incl. tool-call arguments).
+
+###### Parameters
+
+###### messages
+
+readonly `Msg`[]
+
+###### Returns
+
+`number`
+
+##### onCompact?
+
+> `readonly` `optional` **onCompact?**: (`info`) => `void`
+
+Defined in: [runtime/tool-loop.ts:61](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/tool-loop.ts#L61)
+
+Notified each time a compaction fires — for observability/metering.
+
+###### Parameters
+
+###### info
+
+###### turn
+
+`number`
+
+###### beforeTokens
+
+`number`
+
+###### afterTokens
+
+`number`
+
+###### Returns
+
+`void`
+
+***
+
 ### ValidationCtx
 
 Defined in: [runtime/types.ts:32](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/types.ts#L32)
@@ -12667,7 +12804,7 @@ variant carries its backend's seam (router/router-tools/bridge/cli/cli-worktree/
 
 > **DriveHarness** = (`args`) => `Promise`\<`void`\>
 
-Defined in: [runtime/supervise/supervisor-agent.ts:40](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L40)
+Defined in: [runtime/supervise/supervisor-agent.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L62)
 
 How to run a sandboxed harness as the DRIVER, with the coordination verbs mounted — the substrate
  seam the caller supplies (mirrors `makeWorkerAgent` for spawned children). It runs `profile` on
@@ -12896,6 +13033,23 @@ One inference turn over the running conversation + the tool specs → the model'
 
 ***
 
+### ToolLoopCompactionOptions
+
+> **ToolLoopCompactionOptions** = `Omit`\<[`ToolLoopCompaction`](#toolloopcompaction), `"distill"`\> & `object`
+
+Defined in: [runtime/tool-loop.ts:66](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/tool-loop.ts#L66)
+
+Public supervisor-facing compaction config: same knobs as the primitive, but `distill` is optional
+ because the supervisor has a default digest that combines a brain note with live worker state.
+
+#### Type Declaration
+
+##### distill?
+
+> `readonly` `optional` **distill?**: [`ToolLoopCompaction`](#toolloopcompaction)\[`"distill"`\]
+
+***
+
 ### MountRecorder
 
 > **MountRecorder** = (`entry`) => `void`
@@ -15240,7 +15394,7 @@ executor has produced its output. The inner `score` is preserved; only `valid` i
 
 > **driverAgent**(`opts`): [`Agent`](#agent)\<`unknown`, `unknown`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:113](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L113)
+Defined in: [runtime/supervise/coordination-driver.ts:157](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L157)
 
 Build the intelligent recursive driver. Its `act` is the LLM tool-loop; spawn it as a
 `driverChild` (`driver-executor.ts`) to run it inside a nested scope, recursively.
@@ -15261,7 +15415,7 @@ Build the intelligent recursive driver. Its `act` is the LLM tool-loop; spawn it
 
 > **finalizeBestDelivered**(`settled`, `blobs`): `Promise`\<`unknown`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:267](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L267)
+Defined in: [runtime/supervise/coordination-driver.ts:356](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L356)
 
 Keep-best finalize under the completion-oracle: return the highest-scoring DELIVERED child's
  output (settled `done` AND `valid` — its deliverable check passed). Returns undefined when no
@@ -15680,7 +15834,7 @@ Build the worker seam from a backend (WHERE workers run) + an optional completio
 
 > **supervise**(`profile`, `task`, `opts`): `Promise`\<[`SupervisedResult`](#supervisedresult)\<`unknown`\>\>
 
-Defined in: [runtime/supervise/supervise.ts:102](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L102)
+Defined in: [runtime/supervise/supervise.ts:108](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L108)
 
 #### Parameters
 
@@ -15706,7 +15860,7 @@ Defined in: [runtime/supervise/supervise.ts:102](https://github.com/tangle-netwo
 
 > **supervisorAgent**(`profile`, `deps`): [`Agent`](#agent)\<`unknown`, `unknown`\>
 
-Defined in: [runtime/supervise/supervisor-agent.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L78)
+Defined in: [runtime/supervise/supervisor-agent.ts:104](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L104)
 
 #### Parameters
 
diff --git a/src/runtime/index.ts b/src/runtime/index.ts
index b4c70256..e4bad3e0 100644
--- a/src/runtime/index.ts
+++ b/src/runtime/index.ts
@@ -482,8 +482,9 @@ export {
   worktreeFanout,
 } from './supervise/worktree-fanout'
 // The driver-brain seam type a consumer scripts (a mock) or passes (`routerBrain`) into
-// `DriverAgentOptions.brain` — the canonical one-inference-turn tool-loop chat.
-export type { ToolLoopChat } from './tool-loop'
+// `DriverAgentOptions.brain` — the canonical one-inference-turn tool-loop chat. `ToolLoopCompaction`
+// is the self-compaction config that bounds the brain's own context window (the supervisor chapter-close).
+export type { ToolLoopChat, ToolLoopCompaction, ToolLoopCompactionOptions } from './tool-loop'
 export type {
   AgentRunSpec,
   DefaultVerdict,
diff --git a/src/runtime/supervise/coordination-driver.ts b/src/runtime/supervise/coordination-driver.ts
index dcefd02e..219c5749 100644
--- a/src/runtime/supervise/coordination-driver.ts
+++ b/src/runtime/supervise/coordination-driver.ts
@@ -31,10 +31,16 @@ import {
   coordinationVerbNames,
   createCoordinationTools,
   type MakeWorkerAgent,
+  type SettledWorker,
 } from '../../mcp/tools/coordination'
 import type { ToolSpec } from '../router-client'
-import { runBrainLoop, type ToolLoopChat } from '../tool-loop'
-import type { Agent, Budget, ResultBlobStore, Scope, Spend } from './types'
+import {
+  runBrainLoop,
+  type ToolLoopChat,
+  type ToolLoopCompaction,
+  type ToolLoopCompactionOptions,
+} from '../tool-loop'
+import type { Agent, Budget, NodeSnapshot, ResultBlobStore, Scope, Spend, TreeView } from './types'
 
 export interface DriverAgentOptions {
   readonly name: string
@@ -77,6 +83,44 @@ export interface DriverAgentOptions {
   /** Injected clock for the in-loop absolute-deadline guard — keeps the deadline check
    *  deterministic in tests. Defaults to `Date.now`. */
   readonly now?: () => number
+  /** Give the driver brain a chapter-lifecycle on its OWN context window. The LLM-brain front doors
+   *  lose to a dumb-Ralph respawn because the brain re-bills its whole coordination transcript every
+   *  turn — the same context overflow a single steered agent suffers, one level up. With this set,
+   *  once the brain's running conversation exceeds `thresholdTokens` it distills the accumulated
+   *  history to a compact progress note and continues fresh: the supervisor analog of respawning
+   *  against external tracking state, except the live `Scope` roster IS the durable state. Default
+   *  off (no behavior change). `distill` defaults to a self-summary authored by the brain combined
+   *  with the factual settled-worker roster; override to supply your own. */
+  readonly compaction?: ToolLoopCompactionOptions
+}
+
+/** The default chapter-close prompt: the brain summarizes its OWN progress for its future self before
+ *  the detailed history is dropped. Emphasis on PENDING work — the part a too-eager chapter-close
+ *  loses (the coding-burn counter-finding: closing after one fix leaves integration bugs uncircled). */
+const distillInstruction =
+  'CONTEXT COMPACTION. Your detailed turn-by-turn history is about to be discarded to free your context window. Write a COMPLETE, compact handoff note for your future self so you can keep going without it. Cover: (1) what you have accomplished; (2) every worker you spawned and its current status/result; (3) what subtasks remain unfinished, failing, or unverified — be specific and exhaustive here, this is the part you must not lose; (4) your immediate next action. Do not call any tools; respond with the note only.'
+
+/** Factual ground truth for the digest — the live worker roster from Scope plus the delivered-result
+ *  ledger, independent of whatever the brain's prose summary captures. */
+function summarizeRoster(view: TreeView, settled: ReadonlyArray<SettledWorker>): string {
+  if (view.nodes.length === 0) return 'Workers in current live scope: none yet.'
+  const settledById = new Map(settled.map((w) => [w.id, w]))
+  const lines = view.nodes.map((node) => formatRosterNode(node, settledById.get(node.id)))
+  return `Workers in current live scope (ground truth from the run, ${view.nodes.length} total, ${view.inFlight} in flight):\n${lines.join('\n')}`
+}
+
+function formatRosterNode(node: NodeSnapshot, settled?: SettledWorker): string {
+  const result =
+    settled?.status === 'done'
+      ? `, delivered=${settled.valid ?? false}${
+          settled.score !== undefined ? `, score=${settled.score}` : ''
+        }${settled.outRef ? `, outRef=${settled.outRef}` : ''}`
+      : settled?.status === 'down'
+        ? `, reason=${settled.reason ?? 'unknown'}`
+        : node.outRef
+          ? `, outRef=${node.outRef}`
+          : ''
+  return `- ${node.id}: ${node.status}, label=${node.label}, runtime=${node.runtime}${result}`
 }
 
 /** maxTurns=0 anti-runaway tripwire: a finite ceiling for the ONE case the conserved pool can't
@@ -176,8 +220,12 @@ export function driverAgent(opts: DriverAgentOptions): Agent<unknown, unknown> {
       // drains the pool → poolStarved). Wrapping the brain keeps the debit exactly where it was; a
       // scripted/mock turn reports no usage and meters nothing, so offline equal-k stays exact.
       // iterations:0 — the conserved iteration channel budgets CHILD rounds, not driver turns.
-      let turn = 0
-      const chat: ToolLoopChat = async (messages, tools) => {
+      let driverTurn = 0
+      const meteredBrain = async (
+        messages: ReadonlyArray<Record<string, unknown>>,
+        tools: ReadonlyArray<ToolSpec>,
+        detail: Record<string, unknown>,
+      ) => {
         const res = await opts.brain(messages, tools)
         if (res.usage || res.costUsd !== undefined) {
           const turnSpend: Spend = {
@@ -187,19 +235,60 @@ export function driverAgent(opts: DriverAgentOptions): Agent<unknown, unknown> {
             ms: 0,
           }
           await scope.meter(turnSpend, {
-            kind: 'driver-inference',
             driver: opts.name,
-            turn,
             toolCalls: (res.toolCalls ?? []).map((c) => c.name),
+            ...detail,
           })
         }
-        turn += 1
+        return res
+      }
+      const chat: ToolLoopChat = async (messages, tools) => {
+        const turn = driverTurn
+        const res = await meteredBrain(messages, tools, {
+          kind: 'driver-inference',
+          turn,
+        })
+        driverTurn += 1
         return res
       }
 
+      // Chapter-close on the brain's own window. The default distiller pairs the factual settled-worker
+      // roster (from the live scope) with a brain-authored progress note; the brain call runs through
+      // the metered `chat`, so the one-time O(history) distill cost debits the conserved pool like any
+      // turn. It replaces the per-turn O(history) re-billing it removes.
+      const compaction: ToolLoopCompaction | undefined = opts.compaction
+        ? {
+            thresholdTokens: opts.compaction.thresholdTokens,
+            distill:
+              opts.compaction.distill ??
+              (async (msgs) => {
+                const roster = summarizeRoster(scope.view, coord.settled())
+                try {
+                  const res = await meteredBrain(
+                    [...msgs, { role: 'user', content: distillInstruction }],
+                    [],
+                    { kind: 'driver-compaction', compactingTurn: driverTurn },
+                  )
+                  const narrative = (res.content ?? '').trim()
+                  return narrative ? `${roster}\n\n## Progress notes\n${narrative}` : roster
+                } catch (e) {
+                  return `${roster}\n\n## Progress notes\nSummary unavailable: ${errMessage(e)}`
+                }
+              }),
+            ...(opts.compaction.onCompact ? { onCompact: opts.compaction.onCompact } : {}),
+            ...(opts.compaction.preserveHead !== undefined
+              ? { preserveHead: opts.compaction.preserveHead }
+              : {}),
+            ...(opts.compaction.estimateTokens
+              ? { estimateTokens: opts.compaction.estimateTokens }
+              : {}),
+          }
+        : undefined
+
       await runBrainLoop({
         chat,
         tools: toolSpecs,
+        ...(compaction ? { compaction } : {}),
         execute: async (name, args) => {
           // WORK FIRST: a work tool the driver runs itself (act). A non-null return is handled here;
           // null/undefined means "not mine" → fall through to the coordination dispatch (spawn/await/…).
@@ -295,3 +384,7 @@ function safeJson(v: unknown): string {
     return String(v)
   }
 }
+
+function errMessage(e: unknown): string {
+  return e instanceof Error ? e.message : String(e)
+}
diff --git a/src/runtime/supervise/supervise.ts b/src/runtime/supervise/supervise.ts
index 6e08f43a..5760d73f 100644
--- a/src/runtime/supervise/supervise.ts
+++ b/src/runtime/supervise/supervise.ts
@@ -11,7 +11,7 @@ import type { AgentProfile } from '@tangle-network/sandbox'
 import { ValidationError } from '../../errors'
 import type { MakeWorkerAgent } from '../../mcp/tools/coordination'
 import type { RouterConfig } from '../router-client'
-import type { ToolLoopChat } from '../tool-loop'
+import type { ToolLoopChat, ToolLoopCompactionOptions } from '../tool-loop'
 import { type DeliverableSpec, gateOnDeliverable } from './completion-gate'
 import { assertModelAllowed } from './model-policy'
 import { createInMemoryRunContext } from './run-context'
@@ -83,6 +83,12 @@ export interface SuperviseOptions {
   readonly blobs?: ResultBlobStore
   readonly maxDepth?: number
   readonly maxTurns?: number
+  /** Give the supervisor brain a chapter-lifecycle on its OWN context window (router arm only): once
+   *  its coordination transcript exceeds `thresholdTokens` it distills to a compact progress note and
+   *  continues, instead of re-billing the whole transcript every turn (the cost that makes the LLM-brain
+   *  front door lose to a dumb-Ralph respawn). The live `Scope` roster is the durable state across
+   *  chapters. Default off. `distill` defaults to a brain self-summary + the settled-worker roster. */
+  readonly compaction?: ToolLoopCompactionOptions
   readonly runId?: string
   readonly now?: () => number
   /** Restrict the run to this subset of models. When set, every configured model — the
@@ -135,6 +141,7 @@ export function supervise(profile: SupervisorProfile, task: unknown, opts: Super
     ...(opts.extraTools ? { extraTools: opts.extraTools } : {}),
     ...(opts.executeExtraTool ? { executeExtraTool: opts.executeExtraTool } : {}),
     ...(opts.maxTurns !== undefined ? { maxTurns: opts.maxTurns } : {}),
+    ...(opts.compaction ? { compaction: opts.compaction } : {}),
   })
 
   return createSupervisor<unknown, unknown>().run(agent, task, {
diff --git a/src/runtime/supervise/supervisor-agent.ts b/src/runtime/supervise/supervisor-agent.ts
index 59743db0..5b4a5fc6 100644
--- a/src/runtime/supervise/supervisor-agent.ts
+++ b/src/runtime/supervise/supervisor-agent.ts
@@ -16,7 +16,7 @@
 import { ValidationError } from '../../errors'
 import type { MakeWorkerAgent } from '../../mcp/tools/coordination'
 import { type RouterConfig, routerBrain } from '../router-client'
-import type { ToolLoopChat } from '../tool-loop'
+import type { ToolLoopChat, ToolLoopCompactionOptions } from '../tool-loop'
 import { driverAgent, finalizeBestDelivered } from './coordination-driver'
 import { serveCoordinationMcp } from './coordination-mcp'
 import type { Agent, Budget, ResultBlobStore, Scope } from './types'
@@ -95,6 +95,10 @@ export interface SupervisorAgentDeps {
     args: Record<string, unknown>,
   ) => Promise<string | null | undefined>
   readonly maxTurns?: number
+  /** Give the supervisor brain a chapter-lifecycle on its OWN context window (router arm only) — it
+   *  distills its coordination transcript to a compact progress note once it exceeds the threshold,
+   *  instead of re-billing the whole thing every turn. See `DriverAgentOptions.compaction`. */
+  readonly compaction?: ToolLoopCompactionOptions
 }
 
 export function supervisorAgent(
@@ -105,6 +109,12 @@ export function supervisorAgent(
   const systemPrompt = profile.systemPrompt ?? defaultSupervisorPrompt
   const harness = profile.harness ?? null
 
+  if (harness !== null && deps.compaction) {
+    throw new ValidationError(
+      'supervisorAgent: compaction is only supported for router-brained supervisors (profile.harness null)',
+    )
+  }
+
   if (harness === null) {
     // ROUTER arm: the in-process tool-loop. `routerBrain` is now an internal detail — the caller
     // passes a profile, not a hand-built brain (a test may still inject `deps.brain`).
@@ -120,6 +130,7 @@ export function supervisorAgent(
       ...(deps.extraTools ? { extraTools: deps.extraTools } : {}),
       ...(deps.executeExtraTool ? { executeExtraTool: deps.executeExtraTool } : {}),
       ...(deps.maxTurns !== undefined ? { maxTurns: deps.maxTurns } : {}),
+      ...(deps.compaction ? { compaction: deps.compaction } : {}),
     })
   }
 
diff --git a/src/runtime/tool-loop.ts b/src/runtime/tool-loop.ts
index 7382bbd6..a52725df 100644
--- a/src/runtime/tool-loop.ts
+++ b/src/runtime/tool-loop.ts
@@ -37,6 +37,87 @@ export interface ToolLoopHooks {
   onUsage?(usage: { input: number; output: number }): void
 }
 
+/** Self-compaction — bound the loop's OWN context window the way a fresh-respawn (dumb-Ralph) loop
+ *  does, but in place. A stateless chat API re-sends the WHOLE running conversation every turn, so an
+ *  agent that accumulates dozens of turns of tool results re-bills its entire transcript on every
+ *  inference — the context-overflow-one-level-up that the conserved budget pool cannot fix. With
+ *  compaction set, once the conversation exceeds `thresholdTokens` the accumulated middle (every prior
+ *  assistant turn + tool result) is distilled into ONE compact progress note and the conversation is
+ *  reset to `[...head, digest]`: the preserved head (system + the original task) survives, the stale
+ *  turn-by-turn history does not. The model keeps deciding; it stops re-billing the whole transcript.
+ *  Fires at a CLEAN turn boundary (after a turn's tool results are folded in, before the next
+ *  inference) so it never orphans an assistant `tool_calls` from its `tool` replies. */
+export interface ToolLoopCompaction {
+  /** Compact once the estimated token count of the conversation exceeds this. */
+  readonly thresholdTokens: number
+  /** Distill the conversation into a compact progress note that REPLACES the middle. Receives the
+   *  full conversation (so it can summarize everything done so far); returns the digest string. */
+  readonly distill: (messages: ReadonlyArray<Msg>) => Promise<string> | string
+  /** Leading messages preserved verbatim (system + the original task). Default 2. */
+  readonly preserveHead?: number
+  /** Token estimator over the conversation. Default ≈ chars/4 (incl. tool-call arguments). */
+  readonly estimateTokens?: (messages: ReadonlyArray<Msg>) => number
+  /** Notified each time a compaction fires — for observability/metering. */
+  readonly onCompact?: (info: { turn: number; beforeTokens: number; afterTokens: number }) => void
+}
+
+/** Public supervisor-facing compaction config: same knobs as the primitive, but `distill` is optional
+ *  because the supervisor has a default digest that combines a brain note with live worker state. */
+export type ToolLoopCompactionOptions = Omit<ToolLoopCompaction, 'distill'> & {
+  readonly distill?: ToolLoopCompaction['distill']
+}
+
+/** ≈ chars/4 over content + any tool-call arguments — a cheap, provider-agnostic size proxy. The
+ *  exact constant does not matter: it only has to track GROWTH so the threshold trips as history
+ *  accumulates, which a uniform chars/4 does. */
+function estimateConversationTokens(messages: ReadonlyArray<Msg>): number {
+  let chars = 0
+  for (const m of messages) {
+    const content = (m as { content?: unknown }).content
+    chars += typeof content === 'string' ? content.length : safeJsonLength(content)
+    const calls = (m as { tool_calls?: Array<{ function?: { arguments?: string } }> }).tool_calls
+    if (calls) for (const c of calls) chars += c.function?.arguments?.length ?? 0
+  }
+  return Math.ceil(chars / 4)
+}
+
+function safeJsonLength(value: unknown): number {
+  if (value === undefined || value === null) return 0
+  try {
+    return JSON.stringify(value)?.length ?? 0
+  } catch {
+    return String(value).length
+  }
+}
+
+/** Distill the conversation's accumulated middle into one note and splice it in, IF the estimated
+ *  size exceeds the threshold. Returns true when a compaction fired. The preserved head keeps the
+ *  system + task verbatim; everything after collapses to a single `user` progress note, so no
+ *  assistant `tool_calls` is left without its `tool` replies. */
+async function maybeCompact(
+  messages: Msg[],
+  c: ToolLoopCompaction,
+  turn: number,
+): Promise<boolean> {
+  // A negative `preserveHead` would make `splice(head, …)` index from the END and silently preserve
+  // the wrong messages; an invalid value falls back to the default (keep system + task) rather than
+  // zero (which would splice the system message away). A too-large value is already safe — the length
+  // guard below makes it a no-op. (Callers never pass `preserveHead`; this guards the exported primitive.)
+  const head = c.preserveHead !== undefined && c.preserveHead > 0 ? c.preserveHead : 2
+  // Nothing meaningful to collapse yet (only head + at most one trailing message).
+  if (messages.length <= head + 1) return false
+  const estimate = c.estimateTokens ?? estimateConversationTokens
+  const before = estimate(messages)
+  if (before <= c.thresholdTokens) return false
+  const digest = await c.distill([...messages])
+  messages.splice(head, messages.length - head, {
+    role: 'user',
+    content: `[earlier work compacted to save context — progress so far]\n${digest}`,
+  })
+  c.onCompact?.({ turn, beforeTokens: before, afterTokens: estimate(messages) })
+  return true
+}
+
 export interface ToolLoopResult {
   /** The model's final assistant text (where it stopped calling tools, or the budget turn). */
   final: string
@@ -58,6 +139,9 @@ export async function runBrainLoop(opts: {
   initialMessages: ReadonlyArray<Msg>
   maxTurns?: number
   hooks?: ToolLoopHooks
+  /** Bound the loop's own context window in place (the chapter-close a dumb-Ralph respawn gets for
+   *  free). Off by default — when unset the conversation accumulates exactly as before. */
+  compaction?: ToolLoopCompaction
 }): Promise<ToolLoopResult> {
   const maxTurns = opts.maxTurns ?? 4
   const messages: Msg[] = [...opts.initialMessages]
@@ -69,6 +153,10 @@ export async function runBrainLoop(opts: {
   for (let turn = 1; turn <= maxTurns; turn += 1) {
     if (opts.hooks?.stopBefore?.(turn)) break
     await opts.hooks?.beforeTurn?.(turn, messages)
+    // Close the chapter BEFORE the inference turn that would otherwise re-bill the whole transcript:
+    // distill the accumulated middle to a compact note so this and every later turn pay O(working-set),
+    // not O(total-history). A clean boundary — the prior turn's tool replies are already folded in.
+    if (opts.compaction) await maybeCompact(messages, opts.compaction, turn)
     const r = await opts.chat(messages, opts.tools)
     if (r.usage) {
       usage.input += r.usage.input
diff --git a/tests/loops/coordination-driver.test.ts b/tests/loops/coordination-driver.test.ts
index 4eb96563..fe6de46d 100644
--- a/tests/loops/coordination-driver.test.ts
+++ b/tests/loops/coordination-driver.test.ts
@@ -18,6 +18,7 @@ import type {
   UsageEvent,
 } from '../../src/runtime/supervise/types'
 import type { ToolLoopChat } from '../../src/runtime/tool-loop'
+import type { RuntimeHookEvent } from '../../src/runtime-hooks'
 import { type ScriptedTurn, scriptedBrain } from './scripted-brain'
 
 type SeenMessages = Array<ReadonlyArray<Record<string, unknown>>>
@@ -64,6 +65,37 @@ function workerLeaf(name: string, s: WorkerScript): Agent<unknown, unknown> {
   }
 }
 
+function hangingWorkerLeaf(name: string): Agent<unknown, unknown> {
+  const spec: AgentSpec = {
+    profile: { name } as AgentProfile,
+    harness: null,
+    executor: {
+      runtime: 'router',
+      execute(_task: unknown, signal: AbortSignal): Promise<ExecutorResult<unknown>> {
+        return new Promise((_, reject) => {
+          if (signal.aborted) {
+            reject(new Error('aborted'))
+            return
+          }
+          signal.addEventListener('abort', () => reject(new Error('aborted')), { once: true })
+        })
+      },
+      teardown: () => Promise.resolve({ destroyed: true }),
+      resultArtifact(): ExecutorResult<unknown> {
+        return {
+          outRef: 'never',
+          out: {},
+          verdict: { valid: false, score: 0 },
+          spent: { iterations: 0, tokens: { input: 0, output: 0 }, usd: 0, ms: 0 },
+        }
+      },
+    },
+  }
+  return { name, act: async () => ({}), executorSpec: spec } as Agent<unknown, unknown> & {
+    executorSpec: AgentSpec
+  }
+}
+
 const perWorker: Budget = { maxIterations: 4, maxTokens: 1000 }
 
 function driverOpts(
@@ -224,6 +256,78 @@ describe('driverAgent — the driver BRAIN (LLM tool-loop drives real spawns)',
     expect(nested.some((e) => e.kind === 'spawned')).toBe(true)
     expect(nested.some((e) => e.kind === 'settled' && e.status === 'done')).toBe(true)
   })
+
+  it('default compaction keeps in-flight workers in the compressed driver memory', async () => {
+    SHARED_BLOBS = new InMemoryResultBlobStore()
+    const journal = new InMemorySpawnJournal()
+    const seenNormalTurns: SeenMessages = []
+    const turnEvents: RuntimeHookEvent[] = []
+    let normalTurn = 0
+    let compactionCalls = 0
+    const worker = hangingWorkerLeaf('slow-worker')
+
+    const chat: ToolLoopChat = async (messages, tools) => {
+      const last = String(messages[messages.length - 1]?.content ?? '')
+      if (last.includes('CONTEXT COMPACTION')) {
+        compactionCalls += 1
+        return {
+          content: 'Spawned one slow worker; it is still running.',
+          toolCalls: [],
+          usage: { input: 7, output: 3 },
+        }
+      }
+      seenNormalTurns.push(messages)
+      normalTurn += 1
+      if (normalTurn === 1) {
+        expect(tools.some((t) => t.function.name === 'spawn_agent')).toBe(true)
+        return {
+          toolCalls: [
+            {
+              id: 'spawn',
+              name: 'spawn_agent',
+              arguments: JSON.stringify({ profile: {}, task: 'go' }),
+            },
+          ],
+          usage: { input: 11, output: 5 },
+        }
+      }
+      return { content: 'stop', toolCalls: [], usage: { input: 13, output: 2 } }
+    }
+
+    const root = driverAgent({
+      ...driverOpts('root', chat, () => worker),
+      compaction: { thresholdTokens: 1 },
+    })
+    const result = await createSupervisor<unknown, unknown>().run(root, 'keep track of work', {
+      budget: { maxIterations: 100, maxTokens: 100_000 },
+      runId: 'cd-live',
+      journal,
+      blobs: SHARED_BLOBS,
+      executors: createExecutorRegistry(),
+      maxDepth: 2,
+      now: () => 0,
+      hooks: {
+        onEvent: (event) => {
+          if (event.target === 'agent.turn') turnEvents.push(event)
+        },
+      },
+    })
+
+    expect(result.kind).toBe('no-winner')
+    expect(compactionCalls).toBe(1)
+    const compacted = String(seenNormalTurns[1]?.[2]?.content ?? '')
+    expect(compacted).toContain('Workers in current live scope')
+    expect(compacted).toContain('cd-live:s0')
+    expect(compacted).toContain('running')
+
+    const kinds = turnEvents.map((event) => (event.payload as { kind?: string }).kind)
+    expect(kinds).toEqual(['driver-inference', 'driver-compaction', 'driver-inference'])
+    const driverTurns = turnEvents
+      .map((event) => event.payload as { kind?: string; turn?: number })
+      .filter((event) => event.kind === 'driver-inference')
+      .map((event) => event.turn)
+    expect(driverTurns).toEqual([0, 1])
+  })
 })
 
 /** Discover every tree key the in-memory journal has begun (test-only introspection, mirroring
diff --git a/tests/loops/tool-loop-compaction.test.ts b/tests/loops/tool-loop-compaction.test.ts
new file mode 100644
index 00000000..fdcf21d0
--- /dev/null
+++ b/tests/loops/tool-loop-compaction.test.ts
@@ -0,0 +1,224 @@
+import { describe, expect, it } from 'vitest'
+import { runBrainLoop, type ToolLoopChat } from '../../src/runtime/tool-loop'
+
+/** A brain that requests a tool call for the first `toolTurns` turns (forcing the conversation to
+ *  accumulate), then answers with final text. Records the conversation SIZE it saw each turn so the
+ *  test can assert growth vs boundedness. */
+function recordingBrain(toolTurns: number, sizesSeen: number[]): ToolLoopChat {
+  let t = 0
+  return async (messages) => {
+    sizesSeen.push(messages.length)
+    t += 1
+    if (t <= toolTurns)
+      return { content: '', toolCalls: [{ id: `c${t}`, name: 'work', arguments: '{}' }] }
+    return { content: 'done', toolCalls: [] }
+  }
+}
+
+const tools = [
+  { type: 'function' as const, function: { name: 'work', description: 'w', parameters: {} } },
+]
+const init = [
+  { role: 'system', content: 'SYS' },
+  { role: 'user', content: 'TASK' },
+]
+const bigResult = 'x'.repeat(4000) // ≈ 1000 tokens per tool result, well over a small threshold
+
+describe('runBrainLoop self-compaction', () => {
+  it('without compaction the conversation grows unbounded across turns', async () => {
+    const sizes: number[] = []
+    await runBrainLoop({
+      chat: recordingBrain(6, sizes),
+      tools,
+      execute: async () => bigResult,
+      initialMessages: init,
+      maxTurns: 8,
+    })
+    expect(sizes[sizes.length - 1]).toBeGreaterThan(sizes[0]!)
+    expect(Math.max(...sizes)).toBeGreaterThanOrEqual(12) // 2 + 2·(6 tool turns)
+  })
+
+  it('with compaction the conversation stays bounded and preserves the head', async () => {
+    const sizes: number[] = []
+    const events: Array<{ beforeTokens: number; afterTokens: number }> = []
+    let headPreserved = true
+    const inner = recordingBrain(6, sizes)
+    const chat: ToolLoopChat = async (messages, t) => {
+      const head0 = (messages[0] as { content?: unknown }).content
+      const head1 = (messages[1] as { content?: unknown }).content
+      if (head0 !== 'SYS' || head1 !== 'TASK') headPreserved = false
+      return inner(messages, t)
+    }
+    await runBrainLoop({
+      chat,
+      tools,
+      execute: async () => bigResult,
+      initialMessages: init,
+      maxTurns: 8,
+      compaction: {
+        thresholdTokens: 200,
+        distill: () => 'DIGEST',
+        onCompact: (info) => events.push(info),
+      },
+    })
+    expect(headPreserved).toBe(true)
+    expect(events.length).toBeGreaterThan(0)
+    expect(events[0]!.afterTokens).toBeLessThan(events[0]!.beforeTokens)
+    // With this low threshold, compaction fires before each post-tool inference, so the brain sees
+    // the compacted 3-message state instead of the unbounded run's max (14).
+    expect(Math.max(...sizes)).toBeLessThanOrEqual(6)
+  })
+
+  it('does not fire while the conversation stays below the threshold', async () => {
+    const sizes: number[] = []
+    let fired = 0
+    await runBrainLoop({
+      chat: recordingBrain(3, sizes),
+      tools,
+      execute: async () => 'small',
+      initialMessages: init,
+      maxTurns: 5,
+      compaction: {
+        thresholdTokens: 1_000_000,
+        distill: () => 'DIGEST',
+        onCompact: () => {
+          fired += 1
+        },
+      },
+    })
+    expect(fired).toBe(0)
+  })
+
+  it('a negative preserveHead is clamped (never splices from the end / preserves the wrong messages)', async () => {
+    const sizes: number[] = []
+    let headIntact = true
+    const inner = recordingBrain(6, sizes)
+    const chat: ToolLoopChat = async (messages, t) => {
+      if ((messages[0] as { content?: unknown }).content !== 'SYS') headIntact = false
+      return inner(messages, t)
+    }
+    await runBrainLoop({
+      chat,
+      tools,
+      execute: async () => bigResult,
+      initialMessages: init,
+      maxTurns: 8,
+      compaction: { thresholdTokens: 200, distill: () => 'DIGEST', preserveHead: -1 },
+    })
+    // With the clamp, the system message at index 0 is never spliced away.
+    expect(headIntact).toBe(true)
+    expect(Math.max(...sizes)).toBeLessThanOrEqual(6)
+  })
+
+  it('preserveHead: 0 is treated as invalid and keeps the system/task head', async () => {
+    const sizes: number[] = []
+    let headIntact = true
+    const inner = recordingBrain(6, sizes)
+    const chat: ToolLoopChat = async (messages, t) => {
+      const head0 = (messages[0] as { content?: unknown }).content
+      const head1 = (messages[1] as { content?: unknown }).content
+      if (head0 !== 'SYS' || head1 !== 'TASK') headIntact = false
+      return inner(messages, t)
+    }
+    await runBrainLoop({
+      chat,
+      tools,
+      execute: async () => bigResult,
+      initialMessages: init,
+      maxTurns: 8,
+      compaction: { thresholdTokens: 200, distill: () => 'DIGEST', preserveHead: 0 },
+    })
+    expect(headIntact).toBe(true)
+  })
+
+  it('the distiller receives the full accumulated conversation (so it can summarize everything)', async () => {
+    const sizes: number[] = []
+    let distillSawAtLeast = 0
+    await runBrainLoop({
+      chat: recordingBrain(6, sizes),
+      tools,
+      execute: async () => bigResult,
+      initialMessages: init,
+      maxTurns: 8,
+      compaction: {
+        thresholdTokens: 200,
+        distill: (messages) => {
+          distillSawAtLeast = Math.max(distillSawAtLeast, messages.length)
+          return 'DIGEST'
+        },
+      },
+    })
+    // The distiller is handed the full pre-compaction conversation (head + at least one tool round).
+    expect(distillSawAtLeast).toBeGreaterThanOrEqual(4)
+  })
+
+  it('hands the distiller a clean boundary with every tool call paired to its tool reply', async () => {
+    let snapshot: ReadonlyArray<Record<string, unknown>> = []
+    await runBrainLoop({
+      chat: recordingBrain(2, []),
+      tools,
+      execute: async () => bigResult,
+      initialMessages: init,
+      maxTurns: 4,
+      compaction: {
+        thresholdTokens: 200,
+        distill: (messages) => {
+          snapshot = messages
+          return 'DIGEST'
+        },
+      },
+    })
+
+    const toolReplyIds = new Set(
+      snapshot
+        .filter((m) => m.role === 'tool')
+        .map((m) => (m as { tool_call_id?: unknown }).tool_call_id),
+    )
+    const assistantCalls = snapshot.flatMap(
+      (m) => (m as { tool_calls?: Array<{ id?: unknown }> }).tool_calls ?? [],
+    )
+    expect(assistantCalls.length).toBeGreaterThan(0)
+    expect(assistantCalls.every((tc) => toolReplyIds.has(tc.id))).toBe(true)
+  })
+
+  it('uses the custom token estimator for firing and compaction event sizes', async () => {
+    const events: Array<{ beforeTokens: number; afterTokens: number }> = []
+    let calls = 0
+    await runBrainLoop({
+      chat: recordingBrain(2, []),
+      tools,
+      execute: async () => bigResult,
+      initialMessages: init,
+      maxTurns: 4,
+      compaction: {
+        thresholdTokens: 10,
+        estimateTokens: (messages) => {
+          calls += 1
+          return messages.length > 3 ? 999 : 3
+        },
+        distill: () => 'DIGEST',
+        onCompact: (info) => events.push(info),
+      },
+    })
+    expect(calls).toBeGreaterThan(0)
+    expect(events[0]).toMatchObject({ beforeTokens: 999, afterTokens: 3 })
+  })
+
+  it('does not crash when non-string message content cannot be JSON-stringified', async () => {
+    const circular: Record<string, unknown> = {}
+    circular.self = circular
+    await expect(
+      runBrainLoop({
+        chat: recordingBrain(1, []),
+        tools,
+        execute: async () => bigResult,
+        initialMessages: [
+          { role: 'system', content: circular },
+          { role: 'user', content: 'TASK' },
+        ],
+        maxTurns: 3,
+        compaction: { thresholdTokens: 1, distill: () => 'DIGEST' },
+      }),
+    ).resolves.toMatchObject({ final: 'done' })
+  })
+})