fix(runtime,hooks): review-driven fixes (duplicate before_llm_call dispatch + 4 quality)

dgageot · dgageot · commit c54be36d72fd · 2026-04-27T16:42:57.000+02:00
Review of the three feature commits surfaced one critical bug introduced
during the rebase plus a handful of low-impact issues. Fixes them, with a
regression test for the critical one.

## Critical bug

**before_llm_call hook fired twice per loop iteration.** The rebase
left two identical dispatch blocks in [LocalRuntime.RunStream], so any
stateful before_llm_call hook \u2014 prominently the new max_iterations
builtin \u2014 advanced its counter twice per LLM call and tripped at half
the configured limit. Other affected handlers: any user-authored
before_llm_call hook with side effects (audit logging, cost meters).

Adds pkg/runtime/before_llm_call_test.go to pin "fires exactly once
per iteration". I verified the test fails on the buggy code and passes
on the fix before checking either in.

## Quality fixes

  * pkg/hooks/builtins/json.go: sortKeys was mutating []any slices in
    place. Currently safe because each builtin gets a freshly-decoded
    Input, but it's a foot-gun for any future caller that re-uses
    inputs. sortKeys now returns a deep copy.
  * pkg/runtime/tool_dispatch.go: processToolCalls switch had three
    arms each ending in nearly-identical span End() / SetStatus
    (codes.Ok, "...") boilerplate. Pulled span finalisation up before
    the switch so each arm only carries the logic that's actually
    distinct.
  * pkg/hooks/builtins/git.go: gitOutput's empty-dir error included
    the function name ("gitOutput: empty working directory") against
    Go style. Trimmed.
  * pkg/runtime/hooks_wiring_test.go: stale comment referred to
    "caching by agent name"; the post-rebase eager-build doesn't
    cache, it just looks up. Updated.

## Validation

    go test ./...                                  -&gt; all packages pass
    go test -race ./pkg/hooks/... ./pkg/runtime/... -&gt; clean
    golangci-lint run ./...                         -&gt; 0 issues

Assisted-By: docker-agent
diff --git a/pkg/hooks/builtins/git.go b/pkg/hooks/builtins/git.go
@@ -57,7 +57,7 @@ func gitOutput(ctx context.Context, dir string, args ...string) (string, error)
 		// Defensive: every caller guards on Cwd, but bailing out
 		// here keeps a future caller from accidentally running git
 		// in the process's working directory.
-		return "", errors.New("gitOutput: empty working directory")
+		return "", errors.New("empty working directory")
 	}
 	full := append([]string{"-C", dir}, args...)
 	out, err := exec.CommandContext(ctx, "git", full...).Output()
diff --git a/pkg/hooks/builtins/json.go b/pkg/hooks/builtins/json.go
@@ -6,10 +6,10 @@ import (
 	"slices"
 )
 
-// sortKeys recursively sorts map keys so [json.Marshal] produces
-// deterministic output regardless of how the input was constructed.
-// Slices are walked in place; non-collection values are returned
-// unchanged.
+// sortKeys returns a deep, deterministic copy of v with every nested
+// map's keys ordered. Slices and maps are copied rather than mutated
+// in place so the caller's input is never modified — important when
+// the same Input is reachable from a future hook handler.
 func sortKeys(v any) any {
 	switch val := v.(type) {
 	case map[string]any:
@@ -19,10 +19,11 @@ func sortKeys(v any) any {
 		}
 		return sorted
 	case []any:
+		copied := make([]any, len(val))
 		for i, item := range val {
-			val[i] = sortKeys(item)
+			copied[i] = sortKeys(item)
 		}
-		return val
+		return copied
 	default:
 		return v
 	}
diff --git a/pkg/runtime/before_llm_call_test.go b/pkg/runtime/before_llm_call_test.go
@@ -0,0 +1,71 @@
+package runtime
+
+import (
+	"context"
+	"sync/atomic"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/config/latest"
+	"github.com/docker/docker-agent/pkg/hooks"
+	"github.com/docker/docker-agent/pkg/session"
+	"github.com/docker/docker-agent/pkg/team"
+)
+
+// TestBeforeLLMCallHookFiresOncePerLoopIteration is a regression test
+// for a duplicate dispatch in [LocalRuntime.RunStream] that fired
+// [LocalRuntime.executeBeforeLLMCallHooks] twice per iteration. The
+// bug would silently break stateful before_llm_call hooks (the
+// max_iterations builtin would have tripped at half its configured
+// limit). A single-turn session must observe exactly one fire.
+func TestBeforeLLMCallHookFiresOncePerLoopIteration(t *testing.T) {
+	t.Parallel()
+
+	const counterName = "test-before-llm-counter"
+	var calls atomic.Int32
+
+	stream := newStreamBuilder().
+		AddContent("Hello").
+		AddStopWithUsage(3, 2).
+		Build()
+	prov := &mockProvider{id: "test/mock-model", stream: stream}
+
+	root := agent.New("root", "test agent",
+		agent.WithModel(prov),
+		agent.WithHooks(&latest.HooksConfig{
+			BeforeLLMCall: []latest.HookDefinition{
+				{Type: "builtin", Command: counterName},
+			},
+		}),
+	)
+	tm := team.New(team.WithAgents(root))
+
+	rt, err := NewLocalRuntime(tm,
+		WithSessionCompaction(false),
+		WithModelStore(mockModelStore{}),
+	)
+	require.NoError(t, err)
+
+	// Builtin lookup happens at dispatch time, not at executor build,
+	// so registering after NewLocalRuntime is sufficient.
+	require.NoError(t, rt.hooksRegistry.RegisterBuiltin(
+		counterName,
+		func(_ context.Context, _ *hooks.Input, _ []string) (*hooks.Output, error) {
+			calls.Add(1)
+			return nil, nil
+		},
+	))
+
+	sess := session.New(session.WithUserMessage("hi"))
+	sess.Title = "Unit Test"
+
+	for range rt.RunStream(t.Context(), sess) {
+	}
+
+	assert.Equal(t, int32(1), calls.Load(),
+		"before_llm_call must fire exactly once per loop iteration; "+
+			"a duplicate dispatch would silently break stateful hooks like max_iterations")
+}
diff --git a/pkg/runtime/hooks_wiring_test.go b/pkg/runtime/hooks_wiring_test.go
@@ -86,10 +86,9 @@ func TestHooksExecWiresAgentFlagsToBuiltins(t *testing.T) {
 			exec := r.hooksExec(a)
 			require.NotNil(t, exec, "loop_detector is always-on, so an executor is always built")
 
-			// hooksExec caches the executor by agent name. Calling it twice
-			// returns the same pointer, so per-turn dispatches don't pay
-			// the matcher-compilation cost repeatedly.
-			assert.Same(t, exec, r.hooksExec(a), "hooksExec must cache by agent name")
+			// hooksExec is read-only after [LocalRuntime.buildHooksExecutors],
+			// so calling it twice returns the same pointer.
+			assert.Same(t, exec, r.hooksExec(a), "hooksExec must be stable across calls")
 
 			assert.Equal(t, tc.wantTurnStart, exec.Has(hooks.EventTurnStart),
 				"turn_start activation must match flags")
diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go
@@ -402,17 +402,6 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
 				return
 			}
 
-			// before_llm_call hooks fire just before the model is invoked.
-			// A terminating verdict (e.g. from the max_iterations builtin)
-			// stops the run loop here, before any tokens are spent.
-			if stop, msg := r.executeBeforeLLMCallHooks(ctx, sess, a); stop {
-				slog.Warn("before_llm_call hook signalled run termination",
-					"agent", a.Name(), "session_id", sess.ID, "reason", msg)
-				r.emitHookDrivenShutdown(ctx, a, sess, msg, events)
-				streamSpan.End()
-				return
-			}
-
 			// Try primary model with fallback chain if configured
 			res, usedModel, err := r.tryModelWithFallback(streamCtx, a, model, messages, agentTools, sess, m, events)
 			if err != nil {
diff --git a/pkg/runtime/tool_dispatch.go b/pkg/runtime/tool_dispatch.go
@@ -84,24 +84,22 @@ func (r *LocalRuntime) processToolCalls(ctx context.Context, sess *session.Sessi
 
 		outcome := r.executeWithApproval(callCtx, sess, toolCall, tool, events, a, invoke)
 
+		if outcome.canceled {
+			callSpan.SetStatus(codes.Ok, "tool call canceled by user")
+		} else {
+			callSpan.SetStatus(codes.Ok, "tool call processed")
+		}
+		callSpan.End()
+
 		switch {
 		case outcome.canceled:
-			callSpan.SetStatus(codes.Ok, "tool call canceled by user")
-			callSpan.End()
 			synthesizeRemaining(calls[i+1:],
 				"The tool call was canceled because a previous tool call in the same batch was canceled by the user.")
 			return false, ""
-
 		case outcome.stopRun:
-			callSpan.SetStatus(codes.Ok, "tool call processed")
-			callSpan.End()
 			synthesizeRemaining(calls[i+1:],
 				"The tool call was skipped because a post_tool_use hook signalled run termination.")
 			return true, outcome.stopMessage
-
-		default:
-			callSpan.SetStatus(codes.Ok, "tool call processed")
-			callSpan.End()
 		}
 	}
 	return false, ""

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ func gitOutput(ctx context.Context, dir string, args ...string) (string, error)`
`57`	`57`	`// Defensive: every caller guards on Cwd, but bailing out`
`58`	`58`	`// here keeps a future caller from accidentally running git`
`59`	`59`	`// in the process's working directory.`
`60`		`- return "", errors.New("gitOutput: empty working directory")`
	`60`	`+ return "", errors.New("empty working directory")`
`61`	`61`	`}`
`62`	`62`	`full := append([]string{"-C", dir}, args...)`
`63`	`63`	`out, err := exec.CommandContext(ctx, "git", full...).Output()`