feat(hooks): add on_tool_approval_decision event

dgageot · dgageot · commit 5ac1cd9d418d · 2026-04-27T17:35:13.000+02:00
Fires after the runtime's tool approval chain (yolo / permissions /

readonly / ask) resolves a verdict for a tool call, BEFORE the call

is executed (allow) or its error response is recorded (deny /

canceled). Until now this verdict was implicit \u2014 reconstructible

only by correlating ToolCall, ToolCallConfirmation, ToolCallResponse,

and HookBlocked events from the runtime channel. The hook gives

audit pipelines a single, structured "who approved what" record.

Two new typed Input fields:

  - ApprovalDecision: "allow" | "deny" | "canceled"

  - ApprovalSource:   stable classifier for which step decided

                      (yolo, session_permissions_allow,

                      session_permissions_deny, team_permissions_allow,

                      team_permissions_deny, readonly_hint,

                      user_approved, user_approved_session,

                      user_approved_tool, user_rejected,

                      context_canceled)

Constants live on the runtime side as Approval{Decision,Source}*

so the contract between executeWithApproval and the hook protocol

is discoverable from one place. allowSourceFor / denySourceFor map

the existing permissionChecker.source labels onto the public

classifiers; unknown labels default to team_permissions to avoid

silent misclassification on future label changes.

The hook is fired at every return path of executeWithApproval and

askUserForConfirmation, so a single hook gets exactly one record

per tool call regardless of which step decided. Existing event

consumers see no change.

Assisted-By: docker-agent
diff --git a/agent-schema.json b/agent-schema.json
@@ -581,6 +581,13 @@
           "items": {
             "$ref": "#/definitions/HookDefinition"
           }
+        },
+        "on_tool_approval_decision": {
+          "type": "array",
+          "description": "Hooks that run after the runtime's tool approval chain (yolo / permissions / readonly / ask) resolves a verdict for a tool call, before the call is executed (allow) or its error response is recorded (deny / canceled). Receives approval_decision (\"allow\" | \"deny\" | \"canceled\") and approval_source (a stable classifier of which step decided). Observational; gives audit pipelines a single \"who approved what\" record without re-implementing the chain.",
+          "items": {
+            "$ref": "#/definitions/HookDefinition"
+          }
         }
       },
       "additionalProperties": false
diff --git a/pkg/config/latest/types.go b/pkg/config/latest/types.go
@@ -1681,6 +1681,12 @@ type HooksConfig struct {
 	// runtime to continue past its configured max_iterations limit.
 	// Observational; useful for alerting on extended-runtime sessions.
 	OnSessionResume []HookDefinition `json:"on_session_resume,omitempty" yaml:"on_session_resume,omitempty"`
+
+	// OnToolApprovalDecision hooks run after the runtime's tool
+	// approval chain resolves a verdict for a tool call. Observational;
+	// gives audit pipelines a structured "who approved what" record
+	// without re-implementing the chain.
+	OnToolApprovalDecision []HookDefinition `json:"on_tool_approval_decision,omitempty" yaml:"on_tool_approval_decision,omitempty"`
 }
 
 // IsEmpty returns true if no hooks are configured
@@ -1701,7 +1707,8 @@ func (h *HooksConfig) IsEmpty() bool {
 		len(h.OnError) == 0 &&
 		len(h.OnMaxIterations) == 0 &&
 		len(h.OnAgentSwitch) == 0 &&
-		len(h.OnSessionResume) == 0
+		len(h.OnSessionResume) == 0 &&
+		len(h.OnToolApprovalDecision) == 0
 }
 
 // HookMatcherConfig represents a hook matcher with its hooks.
@@ -1849,6 +1856,13 @@ func (h *HooksConfig) validate() error {
 		}
 	}
 
+	// Validate OnToolApprovalDecision hooks
+	for i, hook := range h.OnToolApprovalDecision {
+		if err := hook.validate("on_tool_approval_decision", i); err != nil {
+			return err
+		}
+	}
+
 	return nil
 }
 
diff --git a/pkg/hooks/executor.go b/pkg/hooks/executor.go
@@ -73,20 +73,21 @@ func compileEvents(c *Config) map[EventType][]matcher {
 		return []matcher{{hooks: hooks}}
 	}
 	return map[EventType][]matcher{
-		EventPreToolUse:      compileMatchers(c.PreToolUse),
-		EventPostToolUse:     compileMatchers(c.PostToolUse),
-		EventSessionStart:    flat(c.SessionStart),
-		EventTurnStart:       flat(c.TurnStart),
-		EventBeforeLLMCall:   flat(c.BeforeLLMCall),
-		EventAfterLLMCall:    flat(c.AfterLLMCall),
-		EventSessionEnd:      flat(c.SessionEnd),
-		EventOnUserInput:     flat(c.OnUserInput),
-		EventStop:            flat(c.Stop),
-		EventNotification:    flat(c.Notification),
-		EventOnError:         flat(c.OnError),
-		EventOnMaxIterations: flat(c.OnMaxIterations),
-		EventOnAgentSwitch:   flat(c.OnAgentSwitch),
-		EventOnSessionResume: flat(c.OnSessionResume),
+		EventPreToolUse:             compileMatchers(c.PreToolUse),
+		EventPostToolUse:            compileMatchers(c.PostToolUse),
+		EventSessionStart:           flat(c.SessionStart),
+		EventTurnStart:              flat(c.TurnStart),
+		EventBeforeLLMCall:          flat(c.BeforeLLMCall),
+		EventAfterLLMCall:           flat(c.AfterLLMCall),
+		EventSessionEnd:             flat(c.SessionEnd),
+		EventOnUserInput:            flat(c.OnUserInput),
+		EventStop:                   flat(c.Stop),
+		EventNotification:           flat(c.Notification),
+		EventOnError:                flat(c.OnError),
+		EventOnMaxIterations:        flat(c.OnMaxIterations),
+		EventOnAgentSwitch:          flat(c.OnAgentSwitch),
+		EventOnSessionResume:        flat(c.OnSessionResume),
+		EventOnToolApprovalDecision: flat(c.OnToolApprovalDecision),
 	}
 }
 
diff --git a/pkg/hooks/types.go b/pkg/hooks/types.go
@@ -58,6 +58,13 @@ const (
 	// Observational; useful for alerting on extended-runtime sessions
 	// or for pipelines that bill / quota-track per resume.
 	EventOnSessionResume EventType = "on_session_resume"
+	// EventOnToolApprovalDecision fires after the runtime's tool
+	// approval chain (yolo / permissions / readonly / ask) has resolved
+	// a verdict for a tool call, before the call is executed (for
+	// allow) or its error response is recorded (for deny / canceled).
+	// Observational; gives audit pipelines a single, structured "who
+	// approved what" record without re-implementing the chain.
+	EventOnToolApprovalDecision EventType = "on_tool_approval_decision"
 )
 
 // consumesContext reports whether the runtime emit site for e routes
@@ -113,6 +120,16 @@ type Input struct {
 	// reconstructing it from the iteration counter.
 	PreviousMaxIterations int `json:"previous_max_iterations,omitempty"`
 	NewMaxIterations      int `json:"new_max_iterations,omitempty"`
+
+	// OnToolApprovalDecision specific: the verdict resolved by the
+	// approval chain ("allow", "deny", "canceled") and a stable
+	// classifier for what produced it ("yolo",
+	// "session_permissions_allow", "session_permissions_deny",
+	// "team_permissions_allow", "team_permissions_deny",
+	// "readonly_hint", "user_approved", "user_approved_session",
+	// "user_approved_tool", "user_rejected", "context_canceled").
+	ApprovalDecision string `json:"approval_decision,omitempty"`
+	ApprovalSource   string `json:"approval_source,omitempty"`
 }
 
 // ToJSON serializes the input.
diff --git a/pkg/runtime/hooks.go b/pkg/runtime/hooks.go
@@ -9,6 +9,7 @@ import (
 	"github.com/docker/docker-agent/pkg/hooks"
 	"github.com/docker/docker-agent/pkg/hooks/builtins"
 	"github.com/docker/docker-agent/pkg/session"
+	"github.com/docker/docker-agent/pkg/tools"
 )
 
 // buildHooksExecutors builds a [hooks.Executor] for every agent in the
@@ -215,6 +216,46 @@ func (r *LocalRuntime) executeOnSessionResumeHooks(ctx context.Context, a *agent
 	}, nil)
 }
 
+// Verdicts and sources for [hooks.EventOnToolApprovalDecision]. Constants
+// instead of literals so the contract between executeWithApproval and
+// the hook protocol is discoverable from the runtime side and a typo
+// trips a compile error.
+const (
+	ApprovalDecisionAllow    = "allow"
+	ApprovalDecisionDeny     = "deny"
+	ApprovalDecisionCanceled = "canceled"
+
+	ApprovalSourceYolo                    = "yolo"
+	ApprovalSourceSessionPermissionsAllow = "session_permissions_allow"
+	ApprovalSourceSessionPermissionsDeny  = "session_permissions_deny"
+	ApprovalSourceTeamPermissionsAllow    = "team_permissions_allow"
+	ApprovalSourceTeamPermissionsDeny     = "team_permissions_deny"
+	ApprovalSourceReadOnlyHint            = "readonly_hint"
+	ApprovalSourceUserApproved            = "user_approved"
+	ApprovalSourceUserApprovedSession     = "user_approved_session"
+	ApprovalSourceUserApprovedTool        = "user_approved_tool"
+	ApprovalSourceUserRejected            = "user_rejected"
+	ApprovalSourceContextCanceled         = "context_canceled"
+)
+
+// executeOnToolApprovalDecisionHooks fires on_tool_approval_decision
+// after the runtime's approval chain has resolved a verdict for a
+// tool call. Fired once per call from each return path of
+// [executeWithApproval], so a single hook gets one record per tool
+// call regardless of which step decided.
+func (r *LocalRuntime) executeOnToolApprovalDecisionHooks(
+	ctx context.Context,
+	sess *session.Session,
+	a *agent.Agent,
+	toolCall tools.ToolCall,
+	decision, source string,
+) {
+	input := newHooksInput(sess, toolCall)
+	input.ApprovalDecision = decision
+	input.ApprovalSource = source
+	r.dispatchHook(ctx, a, hooks.EventOnToolApprovalDecision, input, nil)
+}
+
 // executeBeforeLLMCallHooks fires before_llm_call just before each
 // model call. A terminating verdict (decision="block" / continue=false
 // / exit 2) stops the run loop — see [hooks.EventBeforeLLMCall] for
diff --git a/pkg/runtime/on_tool_approval_decision_test.go b/pkg/runtime/on_tool_approval_decision_test.go
@@ -0,0 +1,92 @@
+package runtime
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/hooks"
+	"github.com/docker/docker-agent/pkg/session"
+	"github.com/docker/docker-agent/pkg/team"
+	"github.com/docker/docker-agent/pkg/tools"
+)
+
+// runtimeWithRecordedToolApproval mirrors the runtimeWithRecorded*
+// helpers for the on_tool_approval_decision event. Same pattern: a
+// recording builtin on the runtime's private registry so the test
+// can assert on the dispatched verdict + source without exposing a
+// production Opt that would tempt users to inject builtins ad hoc.
+func runtimeWithRecordedToolApproval(t *testing.T) (*LocalRuntime, *recordingBuiltin) {
+	t.Helper()
+
+	rb := &recordingBuiltin{}
+	prov := &mockProvider{id: "test/mock-model", stream: &mockStream{}}
+	a := agent.New("root", "instructions",
+		agent.WithModel(prov),
+		agent.WithHooks(&hooks.Config{
+			OnToolApprovalDecision: []hooks.Hook{{
+				Type:    hooks.HookTypeBuiltin,
+				Command: "test_record_tool_approval",
+			}},
+		}),
+	)
+	tm := team.New(team.WithAgents(a))
+
+	r, err := NewLocalRuntime(tm, WithModelStore(mockModelStore{}))
+	require.NoError(t, err)
+
+	require.NoError(t, r.hooksRegistry.RegisterBuiltin("test_record_tool_approval", rb.hook))
+	r.buildHooksExecutors()
+
+	return r, rb
+}
+
+// TestExecuteOnToolApprovalDecisionHooks_ForwardsVerdictAndSource pins
+// the contract: the dispatched Input carries the verdict and source
+// classifier verbatim, plus the tool-call identifying fields the
+// existing PreToolUse / PostToolUse hooks already use. That gives
+// audit pipelines a uniform "tool call X resulted in verdict Y from
+// source Z" record across the whole approval chain.
+func TestExecuteOnToolApprovalDecisionHooks_ForwardsVerdictAndSource(t *testing.T) {
+	t.Parallel()
+
+	r, rb := runtimeWithRecordedToolApproval(t)
+	a := r.CurrentAgent()
+	require.NotNil(t, a)
+
+	sess := &session.Session{ID: "session-z"}
+	tc := tools.ToolCall{
+		ID: "call-1",
+		Function: tools.FunctionCall{
+			Name:      "read_file",
+			Arguments: `{"path":"/tmp/x"}`,
+		},
+	}
+	r.executeOnToolApprovalDecisionHooks(t.Context(), sess, a, tc, ApprovalDecisionAllow, ApprovalSourceReadOnlyHint)
+
+	got := rb.snapshot()
+	require.Len(t, got, 1)
+	in := got[0]
+	assert.Equal(t, "read_file", in.ToolName)
+	assert.Equal(t, "call-1", in.ToolUseID)
+	assert.Equal(t, ApprovalDecisionAllow, in.ApprovalDecision)
+	assert.Equal(t, ApprovalSourceReadOnlyHint, in.ApprovalSource)
+}
+
+// TestApprovalSourceMappersAreStable pins the stable classifier
+// strings used by [allowSourceFor] and [denySourceFor]. Tests that
+// the team-permissions vs session-permissions split (today: by
+// checker.source string match) survives changes to the inner labels.
+func TestApprovalSourceMappersAreStable(t *testing.T) {
+	t.Parallel()
+
+	assert.Equal(t, ApprovalSourceSessionPermissionsAllow, allowSourceFor("session permissions"))
+	assert.Equal(t, ApprovalSourceTeamPermissionsAllow, allowSourceFor("permissions configuration"))
+	assert.Equal(t, ApprovalSourceTeamPermissionsAllow, allowSourceFor("anything-else"),
+		"unknown source must default to team_permissions to avoid silent misclassification on future label changes")
+
+	assert.Equal(t, ApprovalSourceSessionPermissionsDeny, denySourceFor("session permissions"))
+	assert.Equal(t, ApprovalSourceTeamPermissionsDeny, denySourceFor("permissions configuration"))
+}
diff --git a/pkg/runtime/tool_dispatch.go b/pkg/runtime/tool_dispatch.go
@@ -162,6 +162,7 @@ func (r *LocalRuntime) executeWithApproval(
 
 	if sess.ToolsApproved {
 		slog.Debug("Tool auto-approved by --yolo flag", "tool", toolName, "session_id", sess.ID)
+		r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionAllow, ApprovalSourceYolo)
 		return invoke()
 	}
 
@@ -178,11 +179,13 @@ func (r *LocalRuntime) executeWithApproval(
 		switch pc.checker.CheckWithArgs(toolName, toolArgs) {
 		case permissions.Deny:
 			slog.Debug("Tool denied by permissions", "tool", toolName, "source", pc.source, "session_id", sess.ID)
+			r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionDeny, denySourceFor(pc.source))
 			r.addToolErrorResponse(ctx, sess, toolCall, tool, events, a,
 				fmt.Sprintf("Tool '%s' is denied by %s.", toolName, pc.source))
 			return toolApprovalOutcome{}
 		case permissions.Allow:
 			slog.Debug("Tool auto-approved by permissions", "tool", toolName, "source", pc.source, "session_id", sess.ID)
+			r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionAllow, allowSourceFor(pc.source))
 			return invoke()
 		case permissions.ForceAsk:
 			slog.Debug("Tool requires confirmation (ask pattern)", "tool", toolName, "source", pc.source, "session_id", sess.ID)
@@ -193,11 +196,30 @@ func (r *LocalRuntime) executeWithApproval(
 	}
 
 	if tool.Annotations.ReadOnlyHint {
+		r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionAllow, ApprovalSourceReadOnlyHint)
 		return invoke()
 	}
 	return r.askUserForConfirmation(ctx, sess, toolCall, tool, events, a, invoke)
 }
 
+// allowSourceFor maps a permission-checker source label to the
+// corresponding approval-decision source classifier. Centralised so
+// the strings stay aligned with [permissionChecker.source].
+func allowSourceFor(checkerSource string) string {
+	if checkerSource == "session permissions" {
+		return ApprovalSourceSessionPermissionsAllow
+	}
+	return ApprovalSourceTeamPermissionsAllow
+}
+
+// denySourceFor mirrors allowSourceFor for the deny path.
+func denySourceFor(checkerSource string) string {
+	if checkerSource == "session permissions" {
+		return ApprovalSourceSessionPermissionsDeny
+	}
+	return ApprovalSourceTeamPermissionsDeny
+}
+
 // permissionChecker pairs a checker with a human-readable source label.
 type permissionChecker struct {
 	checker *permissions.Checker
@@ -249,10 +271,12 @@ func (r *LocalRuntime) askUserForConfirmation(
 		switch req.Type {
 		case ResumeTypeApprove:
 			slog.Debug("Resume signal received, approving tool", "tool", toolName, "session_id", sess.ID)
+			r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionAllow, ApprovalSourceUserApproved)
 			return invoke()
 		case ResumeTypeApproveSession:
 			slog.Debug("Resume signal received, approving session", "tool", toolName, "session_id", sess.ID)
 			sess.ToolsApproved = true
+			r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionAllow, ApprovalSourceUserApprovedSession)
 			return invoke()
 		case ResumeTypeApproveTool:
 			approvedTool := req.ToolName
@@ -266,9 +290,11 @@ func (r *LocalRuntime) askUserForConfirmation(
 				sess.Permissions.Allow = append(sess.Permissions.Allow, approvedTool)
 			}
 			slog.Debug("Resume signal received, approving tool permanently", "tool", approvedTool, "session_id", sess.ID)
+			r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionAllow, ApprovalSourceUserApprovedTool)
 			return invoke()
 		case ResumeTypeReject:
 			slog.Debug("Resume signal received, rejecting tool", "tool", toolName, "session_id", sess.ID, "reason", req.Reason)
+			r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionDeny, ApprovalSourceUserRejected)
 			rejectMsg := "The user rejected the tool call."
 			if strings.TrimSpace(req.Reason) != "" {
 				rejectMsg += " Reason: " + strings.TrimSpace(req.Reason)
@@ -278,6 +304,7 @@ func (r *LocalRuntime) askUserForConfirmation(
 		return toolApprovalOutcome{}
 	case <-ctx.Done():
 		slog.Debug("Context cancelled while waiting for resume", "tool", toolName, "session_id", sess.ID)
+		r.executeOnToolApprovalDecisionHooks(ctx, sess, a, toolCall, ApprovalDecisionCanceled, ApprovalSourceContextCanceled)
 		r.addToolErrorResponse(ctx, sess, toolCall, tool, events, a, "The tool call was canceled by the user.")
 		return toolApprovalOutcome{canceled: true}
 	}

Original file line number	Diff line number	Diff line change
`@@ -581,6 +581,13 @@`
`581`	`581`	`"items": {`
`582`	`582`	`"$ref": "#/definitions/HookDefinition"`
`583`	`583`	`}`
	`584`	`+ },`
	`585`	`+ "on_tool_approval_decision": {`
	`586`	`+ "type": "array",`
	`587`	`+ "description": "Hooks that run after the runtime's tool approval chain (yolo / permissions / readonly / ask) resolves a verdict for a tool call, before the call is executed (allow) or its error response is recorded (deny / canceled). Receives approval_decision (\"allow\" \| \"deny\" \| \"canceled\") and approval_source (a stable classifier of which step decided). Observational; gives audit pipelines a single \"who approved what\" record without re-implementing the chain.",`
	`588`	`+ "items": {`
	`589`	`+ "$ref": "#/definitions/HookDefinition"`
	`590`	`+ }`
`584`	`591`	`}`
`585`	`592`	`},`
`586`	`593`	`"additionalProperties": false`