refactor(hooks/runtime): simplify approval plumbing and trim docstrings

dgageot · dgageot · commit c34390e1c619 · 2026-04-27T16:27:53.000+02:00
Pure cleanup pass over the two preceding feature commits. No features
removed, no public API changes.

  * `LocalRuntime.runTool` now returns `toolApprovalOutcome` directly,
    so callers don't repeat `stop, msg := runTool(); return outcome{...}`
    in seven places.
  * Local `runTool` closure in `processToolCalls` is renamed to `invoke`
    (no more shadowing the method) and extracted into a new
    `toolInvoker` helper for clarity.
  * `executeWithApproval` and `askUserForConfirmation` now `return invoke()`
    in every approval path \u2014 every one of those paths used to be a
    two-line tuple-unpack-and-pack.
  * `processToolCalls` synthesised-error loops are extracted into a
    `synthesizeRemaining` closure and wrapped in a `switch` with
    `canceled`/`stopRun`/`default` arms so the control flow is visible
    at a glance.

  * `loop_detector`: inline `parseLoopDetectorArgs`; replace the
    per-call `map[string]struct{}` exempt set with `slices.Contains`
    over a slice (no allocations on the hot path); shorter state field
    names (`sig`, `count`).
  * `max_iterations`: inline `parseMaxIterationsArgs`; collapse the
    `(int, bool)` parse-result into a single `limit\u003c=0\u200a\u2192\u200ano-op` check.
  * `json.go`: previously held `canonicalJSON` and `joinNonEmpty`
    helpers that were already deleted as unused; the file is now just
    `sortKeys` + `canonicalToolInput`.

Trimmed the verbose comments added in the previous two commits to
match the surrounding house style. The contract is the same; the
prose just earned its keep:

  * `emitHookDrivenShutdown`, `executeBeforeLLMCallHooks`,
    `executeSessionEndHooks`, `loopDetectorExemptTools`, the
    `maxConsecutive` normalisation block, the `builtinsState` field
    comment.
  * Package doc and `State` / `Register` / `AgentDefaults` doc in
    pkg/hooks/builtins.
  * `hooks.EventPostToolUse`, `hooks.EventBeforeLLMCall`, and
    `hooks.DecisionBlockValue` docs.

    9 files changed, 238 insertions(+), 395 deletions(-)

Lint clean. `go test ./...` and `go test -race ./pkg/hooks/...
./pkg/runtime/...` all pass.

Assisted-By: docker-agent
diff --git a/pkg/hooks/builtins/builtins.go b/pkg/hooks/builtins/builtins.go
@@ -1,4 +1,4 @@
-// Package builtins contains the stock in-process hook implementations
+// Package builtins contains the in-process hook implementations
 // shipped with docker-agent.
 //
 // Available builtins:
diff --git a/pkg/hooks/builtins/loop_detector.go b/pkg/hooks/builtins/loop_detector.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"log/slog"
+	"slices"
 	"strconv"
 	"sync"
 
@@ -13,70 +14,57 @@ import (
 // LoopDetector is the registered name of the loop_detector builtin.
 const LoopDetector = "loop_detector"
 
-// defaultLoopDetectorThreshold is the consecutive-identical-call count
-// at which the detector trips when no explicit threshold is configured.
-// Five matches the historical default of the inline tool-loop detector
-// previously baked into pkg/runtime.
+// defaultLoopDetectorThreshold matches the historical default of the
+// inline tool-loop detector previously baked into pkg/runtime.
 const defaultLoopDetectorThreshold = 5
 
 // loopDetectorBuiltin is the post_tool_use builtin that terminates the
-// run when the model issues the same tool call (name + canonical
-// arguments) `threshold` times in a row.
+// run when the model issues the same tool call (name + canonical args)
+// `threshold` times in a row.
 //
-// State is per-session, keyed by [hooks.Input.SessionID], so concurrent
-// runs on a shared runtime can't cross-contaminate each other's
-// counters. The state map is bounded by the number of *active* sessions
-// rather than all-time sessions \u2014 the runtime calls `clearSession` from
-// session_end so the entry is dropped when the run finishes.
+// Args layout: `[threshold, exempt1, exempt2, ...]`. An invalid or
+// missing threshold falls back to [defaultLoopDetectorThreshold].
 //
-// Semantics differ from the previous inline detector in one specific
-// way: signatures are tracked **per call**, not per batch. For the
-// common stuck-agent case (the model repeatedly emits a single tool
-// call with identical arguments) the trip point is unchanged. A model
-// stuck in an alternating multi-tool batch like `[A, B] [A, B] [A, B]`
-// is no longer flagged because each B resets A's counter; users who
-// hit that pattern should rely on `max_iterations` (interactive UX) or
-// the new max_iterations builtin (hard stop) instead.
+// State is per-session, keyed by [hooks.Input.SessionID], and cleared
+// from session_end via [State.ClearSession].
 //
-// Polling tools listed in `args` after the threshold (e.g.
-// `view_background_agent`) are silently ignored: a polling call
-// neither increments nor resets the counter, so a looping model can't
-// evade detection by sneaking a single polling call between identical
-// stuck calls.
+// Detection is per-call, not per-batch: single-tool repetition and
+// parallel-identical batches still trip; alternating multi-tool
+// patterns like `[A,B] [A,B]` do not — those should be caught by
+// max_iterations or manual threshold tuning. Tools listed in `args`
+// after the threshold (e.g. background-task pollers) neither
+// increment nor reset the counter.
 type loopDetectorBuiltin struct {
 	mu     sync.Mutex
-	states map[string]*loopDetectorState // keyed by SessionID
+	states map[string]*loopState // SessionID -> state
 }
 
-type loopDetectorState struct {
-	lastSignature string
-	consecutive   int
+type loopState struct {
+	sig   string
+	count int
 }
 
 func newLoopDetector() *loopDetectorBuiltin {
-	return &loopDetectorBuiltin{states: map[string]*loopDetectorState{}}
+	return &loopDetectorBuiltin{states: map[string]*loopState{}}
 }
 
-// hook is registered as the [hooks.BuiltinFunc] for
-// [hooks.EventPostToolUse]. Args layout: `[threshold, exempt1, exempt2, ...]`
-// where `threshold` is an optional positive integer (defaults to
-// [defaultLoopDetectorThreshold] when missing or invalid) and the
-// remaining strings are tool names to exempt from detection.
 func (d *loopDetectorBuiltin) hook(_ context.Context, in *hooks.Input, args []string) (*hooks.Output, error) {
 	if in == nil || in.SessionID == "" || in.ToolName == "" {
 		// Defensive: post_tool_use always carries SessionID and
-		// ToolName today. Skipping unbounded, unkeyed events keeps
-		// the state map from filling with anonymous entries.
+		// ToolName today. Skipping unkeyed events keeps the state
+		// map from filling with anonymous entries.
 		return nil, nil
 	}
 
-	threshold, exempt := parseLoopDetectorArgs(args)
-
-	if _, ok := exempt[in.ToolName]; ok {
-		// Polling-style tools never count toward the consecutive
-		// total and never reset it: see the type-level comment for
-		// why visibility-zero (rather than counter-reset) is the
-		// right behaviour.
+	threshold := defaultLoopDetectorThreshold
+	var exempt []string
+	if len(args) > 0 {
+		if n, err := strconv.Atoi(args[0]); err == nil && n > 0 {
+			threshold = n
+		}
+		exempt = args[1:]
+	}
+	if slices.Contains(exempt, in.ToolName) {
 		return nil, nil
 	}
 
@@ -85,69 +73,37 @@ func (d *loopDetectorBuiltin) hook(_ context.Context, in *hooks.Input, args []st
 	d.mu.Lock()
 	state, ok := d.states[in.SessionID]
 	if !ok {
-		state = &loopDetectorState{}
+		state = &loopState{}
 		d.states[in.SessionID] = state
 	}
-	if sig == state.lastSignature {
-		state.consecutive++
+	if sig == state.sig {
+		state.count++
 	} else {
-		state.lastSignature = sig
-		state.consecutive = 1
+		state.sig = sig
+		state.count = 1
 	}
-	tripped := state.consecutive >= threshold
-	count := state.consecutive
+	count := state.count
 	d.mu.Unlock()
 
-	if !tripped {
+	if count < threshold {
 		return nil, nil
 	}
 
 	slog.Warn("loop_detector tripped",
 		"tool", in.ToolName, "consecutive", count,
 		"threshold", threshold, "session_id", in.SessionID)
 
-	reason := fmt.Sprintf(
-		"Agent terminated: detected %d consecutive identical calls to %s. "+
-			"This indicates a degenerate loop where the model is not making progress.",
-		count, in.ToolName)
-
-	// "block" is the post_tool_use deny verdict: aggregate() turns it
-	// into Result.Allowed=false, which the runtime translates into
-	// the standard Error / notification / on_error fan-out before
-	// terminating the run.
 	return &hooks.Output{
 		Decision: hooks.DecisionBlockValue,
-		Reason:   reason,
+		Reason: fmt.Sprintf(
+			"Agent terminated: detected %d consecutive identical calls to %s. "+
+				"This indicates a degenerate loop where the model is not making progress.",
+			count, in.ToolName),
 	}, nil
 }
 
-// clearSession drops a session's state entry, called from a
-// session_end hook so long-running runtimes don't accumulate
-// per-session entries indefinitely.
 func (d *loopDetectorBuiltin) clearSession(sessionID string) {
 	d.mu.Lock()
 	delete(d.states, sessionID)
 	d.mu.Unlock()
 }
-
-// parseLoopDetectorArgs splits builtin args into (threshold, exempt
-// tool name set). An invalid or non-positive threshold falls back to
-// [defaultLoopDetectorThreshold] silently \u2014 a misconfigured YAML
-// shouldn't disable the detector entirely.
-func parseLoopDetectorArgs(args []string) (int, map[string]struct{}) {
-	threshold := defaultLoopDetectorThreshold
-	exempt := map[string]struct{}{}
-
-	if len(args) == 0 {
-		return threshold, exempt
-	}
-	if n, err := strconv.Atoi(args[0]); err == nil && n > 0 {
-		threshold = n
-	}
-	for _, name := range args[1:] {
-		if name != "" {
-			exempt[name] = struct{}{}
-		}
-	}
-	return threshold, exempt
-}
diff --git a/pkg/hooks/builtins/max_iterations.go b/pkg/hooks/builtins/max_iterations.go
@@ -17,20 +17,16 @@ const MaxIterations = "max_iterations"
 // and signals a terminating verdict once the configured limit is
 // exceeded.
 //
-// This is intentionally a *hard stop*: it has no resume protocol and
-// emits no special runtime event. The runtime translates the deny
-// verdict into the standard Error / notification / on_error fan-out
-// from [LocalRuntime.emitHookDrivenShutdown], same as any other
-// hook-driven shutdown. The legacy `agent.MaxIterations` flag, which
-// has its own special UX (MaxIterationsReachedEvent + a resume
-// dialog), is unchanged and continues to live inline in loop.go;
-// this builtin is the way to express "stop after N model calls,
-// period" in YAML without that interactive dance.
+// This is a hard stop with no resume protocol — distinct from the
+// agent.MaxIterations flag, which has its own special UX
+// (MaxIterationsReachedEvent + a resume dialog) and stays in
+// pkg/runtime. Use this builtin to express "stop after N model calls,
+// period" in YAML.
 //
-// State is per-session, keyed by [hooks.Input.SessionID]. The runtime
-// calls [maxIterationsBuiltin.clearSession] from session_end so a
-// long-running shared runtime does not accumulate counters
-// indefinitely.
+// Args layout: `[limit]`. Missing or invalid args make the hook a
+// no-op so a misconfigured YAML doesn't accidentally cap a run at
+// zero. State is per-session, keyed by [hooks.Input.SessionID], and
+// cleared from session_end via [State.ClearSession].
 type maxIterationsBuiltin struct {
 	mu     sync.Mutex
 	counts map[string]int // SessionID -> calls observed
@@ -40,16 +36,13 @@ func newMaxIterations() *maxIterationsBuiltin {
 	return &maxIterationsBuiltin{counts: map[string]int{}}
 }
 
-// hook is registered as the [hooks.BuiltinFunc] for
-// [hooks.EventBeforeLLMCall]. The single arg is the limit (a positive
-// integer). Missing / invalid args make the hook a no-op so a
-// misconfigured YAML doesn't accidentally cap a run at zero.
 func (b *maxIterationsBuiltin) hook(_ context.Context, in *hooks.Input, args []string) (*hooks.Output, error) {
-	if in == nil || in.SessionID == "" {
+	if in == nil || in.SessionID == "" || len(args) == 0 {
 		return nil, nil
 	}
-	limit, ok := parseMaxIterationsArgs(args)
-	if !ok {
+	limit, err := strconv.Atoi(args[0])
+	if err != nil || limit <= 0 {
+		slog.Debug("max_iterations: ignoring invalid limit", "arg", args[0], "error", err)
 		return nil, nil
 	}
 
@@ -65,42 +58,16 @@ func (b *maxIterationsBuiltin) hook(_ context.Context, in *hooks.Input, args []s
 	slog.Warn("max_iterations tripped",
 		"count", count, "limit", limit, "session_id", in.SessionID)
 
-	reason := fmt.Sprintf(
-		"Agent terminated: max_iterations builtin reached its limit of %d model call(s).",
-		limit)
-
 	return &hooks.Output{
 		Decision: hooks.DecisionBlockValue,
-		Reason:   reason,
+		Reason: fmt.Sprintf(
+			"Agent terminated: max_iterations builtin reached its limit of %d model call(s).",
+			limit),
 	}, nil
 }
 
-// clearSession drops a session's counter, called from a session_end
-// hook so a long-running runtime serving many sessions doesn't grow
-// the state map without bound.
 func (b *maxIterationsBuiltin) clearSession(sessionID string) {
 	b.mu.Lock()
 	delete(b.counts, sessionID)
 	b.mu.Unlock()
 }
-
-// parseMaxIterationsArgs returns (limit, true) when args[0] is a
-// positive integer, or (0, false) for any other input. The "valid"
-// boolean lets the caller distinguish "no limit configured" (no-op)
-// from "limit explicitly set to 0" (which would also be a no-op but
-// reads as a config error and is logged at debug).
-func parseMaxIterationsArgs(args []string) (int, bool) {
-	if len(args) == 0 {
-		return 0, false
-	}
-	n, err := strconv.Atoi(args[0])
-	switch {
-	case err != nil:
-		slog.Debug("max_iterations: ignoring non-integer limit", "arg", args[0], "error", err)
-		return 0, false
-	case n <= 0:
-		slog.Debug("max_iterations: ignoring non-positive limit", "limit", n)
-		return 0, false
-	}
-	return n, true
-}
diff --git a/pkg/hooks/types.go b/pkg/hooks/types.go
@@ -16,24 +16,20 @@ const (
 	// EventPreToolUse fires before a tool call. Can allow/deny/modify it.
 	EventPreToolUse EventType = "pre_tool_use"
 	// EventPostToolUse fires after a tool completes successfully.
-	// Returning decision="block" (or continue=false / exit code 2) stops
-	// the agent's run loop after the current tool batch finishes — useful
-	// for circuit-breaker patterns like a tool-call loop detector. The
-	// runtime emits the [Result.Message] as an Error event and fires
-	// notification / on_error before exiting.
+	// Returning decision="block" (or continue=false / exit code 2)
+	// stops the run loop after the current tool batch — useful for
+	// circuit-breaker patterns like a tool-call loop detector.
 	EventPostToolUse EventType = "post_tool_use"
 	// EventSessionStart fires when a session begins or resumes.
 	EventSessionStart EventType = "session_start"
 	// EventTurnStart fires at the start of every agent turn (each model
 	// call). AdditionalContext is injected transiently and never persisted.
 	EventTurnStart EventType = "turn_start"
 	// EventBeforeLLMCall fires immediately before each model call.
-	// Returning decision="block" (or continue=false / exit code 2) stops
-	// the agent's run loop before the model is invoked — useful for hard
-	// budget guards like a max-iterations builtin. The runtime emits the
-	// [Result.Message] as an Error event and fires notification / on_error
-	// before exiting. Use turn_start to contribute system messages; this
-	// event's AdditionalContext is not consumed.
+	// Returning decision="block" (or continue=false / exit code 2)
+	// stops the run loop before the model is invoked — useful for hard
+	// budget guards. Use turn_start to contribute system messages;
+	// this event's AdditionalContext is not consumed.
 	EventBeforeLLMCall EventType = "before_llm_call"
 	// EventAfterLLMCall fires immediately after a successful model call,
 	// before the response is recorded. Failed calls fire EventOnError.
@@ -180,7 +176,7 @@ type Output struct {
 	// SystemMessage is a warning to show the user.
 	SystemMessage string `json:"system_message,omitempty"`
 	// Decision is for blocking operations ("block", ...).
-	// See [DecisionBlockValue] for the canonical "block" string.
+	// In-process builtin hooks should use [DecisionBlockValue].
 	Decision string `json:"decision,omitempty"`
 	// Reason explains the decision.
 	Reason string `json:"reason,omitempty"`
@@ -193,8 +189,6 @@ func (o *Output) ShouldContinue() bool { return o.Continue == nil || *o.Continue
 
 // DecisionBlockValue is the canonical value of [Output.Decision] used
 // by hooks to signal a deny/terminate verdict on the current event.
-// In-process builtin hooks should set Decision to this constant rather
-// than the bare string literal.
 const DecisionBlockValue = "block"
 
 // IsBlocked reports whether the decision is "block".
diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go
@@ -393,9 +393,7 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
 
 			// before_llm_call hooks fire just before the model is invoked.
 			// A terminating verdict (e.g. from the max_iterations builtin)
-			// stops the run loop here, before any tokens are spent on the
-			// model call. Use turn_start to contribute system messages;
-			// this event's AdditionalContext is intentionally not consumed.
+			// stops the run loop here, before any tokens are spent.
 			if stop, msg := r.executeBeforeLLMCallHooks(ctx, sess, a); stop {
 				slog.Warn("before_llm_call hook signalled run termination",
 					"agent", a.Name(), "session_id", sess.ID, "reason", msg)
diff --git a/pkg/runtime/tool_dispatch.go b/pkg/runtime/tool_dispatch.go

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// Package builtins contains the stock in-process hook implementations`
	`1`	`+// Package builtins contains the in-process hook implementations`
`2`	`2`	`// shipped with docker-agent.`
`3`	`3`	`//`
`4`	`4`	`// Available builtins:`