docker
diff --git a/‎pkg/hooks/builtins/builtins.go‎
Lines changed: 76 additions & 28 deletions b/‎pkg/hooks/builtins/builtins.go‎
Lines changed: 76 additions & 28 deletions
diff --git a/‎pkg/hooks/builtins/builtins_test.go‎
Lines changed: 6 additions & 2 deletions b/‎pkg/hooks/builtins/builtins_test.go‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎pkg/hooks/builtins/json.go‎
Lines changed: 46 additions & 0 deletions b/‎pkg/hooks/builtins/json.go‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎pkg/hooks/builtins/loop_detector.go‎
Lines changed: 153 additions & 0 deletions b/‎pkg/hooks/builtins/loop_detector.go‎
Lines changed: 153 additions & 0 deletions
@@ -3,35 +3,65 @@
 //
 // Available builtins:
 //
-//   - add_date              (turn_start)    — today's date
-//   - add_environment_info  (session_start) — cwd, git, OS, arch
-//   - add_prompt_files      (turn_start)    — contents of prompt files
-//   - add_git_status        (turn_start)    — `git status --short --branch`
-//   - add_git_diff          (turn_start)    — `git diff --stat` (or full)
-//   - add_directory_listing (session_start) — top-level entries of cwd
-//   - add_user_info         (session_start) — current OS user and host
-//   - add_recent_commits    (session_start) — `git log --oneline -n N`
+//   - add_date              (turn_start)      — today's date
+//   - add_environment_info  (session_start)   — cwd, git, OS, arch
+//   - add_prompt_files      (turn_start)      — contents of prompt files
+//   - add_git_status        (turn_start)      — `git status --short --branch`
+//   - add_git_diff          (turn_start)      — `git diff --stat` (or full)
+//   - add_directory_listing (session_start)   — top-level entries of cwd
+//   - add_user_info         (session_start)   — current OS user and host
+//   - add_recent_commits    (session_start)   — `git log --oneline -n N`
+//   - loop_detector         (post_tool_use)   — block on N consecutive
+//     identical tool calls
+//   - max_iterations        (before_llm_call) — hard stop after N model calls
 //
-// They can be referenced explicitly from a hook YAML entry using
-// `{type: builtin, command: "<name>"}`. The runtime also auto-injects
-// add_date / add_environment_info / add_prompt_files when the matching
-// agent flags are set.
+// Reference any of them from a hook YAML entry as
+// `{type: builtin, command: "<name>"}`. The runtime additionally
+// auto-injects add_date / add_environment_info / add_prompt_files
+// from the matching agent flags, and loop_detector from
+// agent.MaxConsecutiveToolCalls.
 //
-// turn_start builtins recompute every turn (date, git state). session_start
-// builtins run once per session for context that's stable for its duration.
-// Each builtin lives in its own file along with its registered-name
-// constant; this file holds the shared registration plumbing.
+// turn_start builtins recompute every turn (date, git state).
+// session_start builtins run once per session for context that's
+// stable for its duration. loop_detector and max_iterations are
+// stateful: their per-session counters live on the [State] returned
+// by [Register]; the runtime clears them via [State.ClearSession]
+// from session_end.
 package builtins
 
 import (
 	"errors"
+	"strconv"
 
 	"github.com/docker/docker-agent/pkg/hooks"
 )
 
-// Register installs the stock builtin hooks on r.
-func Register(r *hooks.Registry) error {
-	return errors.Join(
+// State holds the per-runtime state of the stateful builtins.
+// It is returned by [Register] so callers can clear per-session
+// entries on session_end. Stateless builtins don't appear here.
+type State struct {
+	loopDetector  *loopDetectorBuiltin
+	maxIterations *maxIterationsBuiltin
+}
+
+// ClearSession drops per-session state from every stateful builtin.
+// A nil receiver is a no-op.
+func (s *State) ClearSession(sessionID string) {
+	if s == nil || sessionID == "" {
+		return
+	}
+	s.loopDetector.clearSession(sessionID)
+	s.maxIterations.clearSession(sessionID)
+}
+
+// Register installs the stock builtin hooks on r and returns a [State]
+// handle the caller must use to clear per-session state on session_end.
+func Register(r *hooks.Registry) (*State, error) {
+	state := &State{
+		loopDetector:  newLoopDetector(),
+		maxIterations: newMaxIterations(),
+	}
+	if err := errors.Join(
 		r.RegisterBuiltin(AddDate, addDate),
 		r.RegisterBuiltin(AddEnvironmentInfo, addEnvironmentInfo),
 		r.RegisterBuiltin(AddPromptFiles, addPromptFiles),
@@ -40,22 +70,33 @@ func Register(r *hooks.Registry) error {
 		r.RegisterBuiltin(AddDirectoryListing, addDirectoryListing),
 		r.RegisterBuiltin(AddUserInfo, addUserInfo),
 		r.RegisterBuiltin(AddRecentCommits, addRecentCommits),
-	)
+		r.RegisterBuiltin(LoopDetector, state.loopDetector.hook),
+		r.RegisterBuiltin(MaxIterations, state.maxIterations.hook),
+	); err != nil {
+		return nil, err
+	}
+	return state, nil
 }
 
 // AgentDefaults captures the agent-level flags that map onto stock
-// builtin hook entries. Pass each AgentConfig.AddXxx flag as-is.
+// builtin hook entries.
+//
+// MaxConsecutiveToolCalls and ExemptLoopTools together control the
+// auto-injected loop_detector entry: a non-zero threshold injects a
+// post_tool_use hook with that threshold and the exempt tool names.
+// The runtime supplies the exempt list so this package stays
+// decoupled from any tool constants.
 type AgentDefaults struct {
-	AddDate            bool
-	AddEnvironmentInfo bool
-	AddPromptFiles     []string
+	AddDate                 bool
+	AddEnvironmentInfo      bool
+	AddPromptFiles          []string
+	MaxConsecutiveToolCalls int
+	ExemptLoopTools         []string
 }
 
 // ApplyAgentDefaults appends the stock builtin hook entries implied by
-// d to cfg, returning the (possibly mutated) config.
-//
-// A nil cfg is treated as empty; the returned value is non-nil iff at
-// least one hook (user-configured or auto-injected) is present.
+// d to cfg. A nil cfg is treated as empty. Returns nil iff no hook
+// (user-configured or auto-injected) is present.
 func ApplyAgentDefaults(cfg *hooks.Config, d AgentDefaults) *hooks.Config {
 	if cfg == nil {
 		cfg = &hooks.Config{}
@@ -69,6 +110,13 @@ func ApplyAgentDefaults(cfg *hooks.Config, d AgentDefaults) *hooks.Config {
 	if d.AddEnvironmentInfo {
 		cfg.SessionStart = append(cfg.SessionStart, builtinHook(AddEnvironmentInfo))
 	}
+	if d.MaxConsecutiveToolCalls > 0 {
+		args := append([]string{strconv.Itoa(d.MaxConsecutiveToolCalls)}, d.ExemptLoopTools...)
+		cfg.PostToolUse = append(cfg.PostToolUse, hooks.MatcherConfig{
+			Matcher: "*",
+			Hooks:   []hooks.Hook{builtinHook(LoopDetector, args...)},
+		})
+	}
 	if cfg.IsEmpty() {
 		return nil
 	}
 
@@ -21,7 +21,8 @@ func TestRegisterInstallsAllBuiltins(t *testing.T) {
 	t.Parallel()
 
 	r := hooks.NewRegistry()
-	require.NoError(t, builtins.Register(r))
+	_, err := builtins.Register(r)
+	require.NoError(t, err)
 
 	for _, name := range []string{
 		builtins.AddDate,
@@ -32,6 +33,8 @@ func TestRegisterInstallsAllBuiltins(t *testing.T) {
 		builtins.AddDirectoryListing,
 		builtins.AddUserInfo,
 		builtins.AddRecentCommits,
+		builtins.LoopDetector,
+		builtins.MaxIterations,
 	} {
 		fn, ok := r.LookupBuiltin(name)
 		assert.True(t, ok, "builtin %q must be registered", name)
@@ -172,7 +175,8 @@ func TestAddPromptFilesNoArgsIsNoop(t *testing.T) {
 func lookup(t *testing.T, name string) hooks.BuiltinFunc {
 	t.Helper()
 	r := hooks.NewRegistry()
-	require.NoError(t, builtins.Register(r))
+	_, err := builtins.Register(r)
+	require.NoError(t, err)
 	fn, ok := r.LookupBuiltin(name)
 	require.True(t, ok, "builtin %q must be registered", name)
 	require.NotNil(t, fn)
 
@@ -0,0 +1,46 @@
+package builtins
+
+import (
+	"encoding/json"
+	"maps"
+	"slices"
+)
+
+// sortKeys recursively sorts map keys so [json.Marshal] produces
+// deterministic output regardless of how the input was constructed.
+// Slices are walked in place; non-collection values are returned
+// unchanged.
+func sortKeys(v any) any {
+	switch val := v.(type) {
+	case map[string]any:
+		sorted := make(map[string]any, len(val))
+		for _, k := range slices.Sorted(maps.Keys(val)) {
+			sorted[k] = sortKeys(val[k])
+		}
+		return sorted
+	case []any:
+		for i, item := range val {
+			val[i] = sortKeys(item)
+		}
+		return val
+	default:
+		return v
+	}
+}
+
+// canonicalToolInput returns a stable signature for a tool's input map
+// suitable for equality comparison across calls. Marshalling is done
+// after a recursive sort so semantically identical maps with different
+// iteration orders produce the same bytes. An unmarshalable input or
+// an empty map produces an empty string — which the caller should
+// treat as a non-matching signature rather than a wildcard.
+func canonicalToolInput(in map[string]any) string {
+	if len(in) == 0 {
+		return ""
+	}
+	out, err := json.Marshal(sortKeys(in))
+	if err != nil {
+		return ""
+	}
+	return string(out)
+}
@@ -0,0 +1,153 @@
+package builtins
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"strconv"
+	"sync"
+
+	"github.com/docker/docker-agent/pkg/hooks"
+)
+
+// LoopDetector is the registered name of the loop_detector builtin.
+const LoopDetector = "loop_detector"
+
+// defaultLoopDetectorThreshold is the consecutive-identical-call count
+// at which the detector trips when no explicit threshold is configured.
+// Five matches the historical default of the inline tool-loop detector
+// previously baked into pkg/runtime.
+const defaultLoopDetectorThreshold = 5
+
+// loopDetectorBuiltin is the post_tool_use builtin that terminates the
+// run when the model issues the same tool call (name + canonical
+// arguments) `threshold` times in a row.
+//
+// State is per-session, keyed by [hooks.Input.SessionID], so concurrent
+// runs on a shared runtime can't cross-contaminate each other's
+// counters. The state map is bounded by the number of *active* sessions
+// rather than all-time sessions \u2014 the runtime calls `clearSession` from
+// session_end so the entry is dropped when the run finishes.
+//
+// Semantics differ from the previous inline detector in one specific
+// way: signatures are tracked **per call**, not per batch. For the
+// common stuck-agent case (the model repeatedly emits a single tool
+// call with identical arguments) the trip point is unchanged. A model
+// stuck in an alternating multi-tool batch like `[A, B] [A, B] [A, B]`
+// is no longer flagged because each B resets A's counter; users who
+// hit that pattern should rely on `max_iterations` (interactive UX) or
+// the new max_iterations builtin (hard stop) instead.
+//
+// Polling tools listed in `args` after the threshold (e.g.
+// `view_background_agent`) are silently ignored: a polling call
+// neither increments nor resets the counter, so a looping model can't
+// evade detection by sneaking a single polling call between identical
+// stuck calls.
+type loopDetectorBuiltin struct {
+	mu     sync.Mutex
+	states map[string]*loopDetectorState // keyed by SessionID
+}
+
+type loopDetectorState struct {
+	lastSignature string
+	consecutive   int
+}
+
+func newLoopDetector() *loopDetectorBuiltin {
+	return &loopDetectorBuiltin{states: map[string]*loopDetectorState{}}
+}
+
+// hook is registered as the [hooks.BuiltinFunc] for
+// [hooks.EventPostToolUse]. Args layout: `[threshold, exempt1, exempt2, ...]`
+// where `threshold` is an optional positive integer (defaults to
+// [defaultLoopDetectorThreshold] when missing or invalid) and the
+// remaining strings are tool names to exempt from detection.
+func (d *loopDetectorBuiltin) hook(_ context.Context, in *hooks.Input, args []string) (*hooks.Output, error) {
+	if in == nil || in.SessionID == "" || in.ToolName == "" {
+		// Defensive: post_tool_use always carries SessionID and
+		// ToolName today. Skipping unbounded, unkeyed events keeps
+		// the state map from filling with anonymous entries.
+		return nil, nil
+	}
+
+	threshold, exempt := parseLoopDetectorArgs(args)
+
+	if _, ok := exempt[in.ToolName]; ok {
+		// Polling-style tools never count toward the consecutive
+		// total and never reset it: see the type-level comment for
+		// why visibility-zero (rather than counter-reset) is the
+		// right behaviour.
+		return nil, nil
+	}
+
+	sig := in.ToolName + "\x00" + canonicalToolInput(in.ToolInput)
+
+	d.mu.Lock()
+	state, ok := d.states[in.SessionID]
+	if !ok {
+		state = &loopDetectorState{}
+		d.states[in.SessionID] = state
+	}
+	if sig == state.lastSignature {
+		state.consecutive++
+	} else {
+		state.lastSignature = sig
+		state.consecutive = 1
+	}
+	tripped := state.consecutive >= threshold
+	count := state.consecutive
+	d.mu.Unlock()
+
+	if !tripped {
+		return nil, nil
+	}
+
+	slog.Warn("loop_detector tripped",
+		"tool", in.ToolName, "consecutive", count,
+		"threshold", threshold, "session_id", in.SessionID)
+
+	reason := fmt.Sprintf(
+		"Agent terminated: detected %d consecutive identical calls to %s. "+
+			"This indicates a degenerate loop where the model is not making progress.",
+		count, in.ToolName)
+
+	// "block" is the post_tool_use deny verdict: aggregate() turns it
+	// into Result.Allowed=false, which the runtime translates into
+	// the standard Error / notification / on_error fan-out before
+	// terminating the run.
+	return &hooks.Output{
+		Decision: hooks.DecisionBlockValue,
+		Reason:   reason,
+	}, nil
+}
+
+// clearSession drops a session's state entry, called from a
+// session_end hook so long-running runtimes don't accumulate
+// per-session entries indefinitely.
+func (d *loopDetectorBuiltin) clearSession(sessionID string) {
+	d.mu.Lock()
+	delete(d.states, sessionID)
+	d.mu.Unlock()
+}
+
+// parseLoopDetectorArgs splits builtin args into (threshold, exempt
+// tool name set). An invalid or non-positive threshold falls back to
+// [defaultLoopDetectorThreshold] silently \u2014 a misconfigured YAML
+// shouldn't disable the detector entirely.
+func parseLoopDetectorArgs(args []string) (int, map[string]struct{}) {
+	threshold := defaultLoopDetectorThreshold
+	exempt := map[string]struct{}{}
+
+	if len(args) == 0 {
+		return threshold, exempt
+	}
+	if n, err := strconv.Atoi(args[0]); err == nil && n > 0 {
+		threshold = n
+	}
+	for _, name := range args[1:] {
+		if name != "" {
+			exempt[name] = struct{}{}
+		}
+	}
+	return threshold, exempt
+}