Skip to content

Commit 8a03846

Browse files
authored
Merge pull request #2453 from dgageot/board/add-task-budget-output-config-to-opus-4-ba6f107e
2 parents 0787e18 + e70f8eb commit 8a03846

14 files changed

Lines changed: 710 additions & 9 deletions

File tree

agent-schema.json

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,33 @@
171171
"description": "Effort level (e.g., \"low\", \"medium\", \"high\", \"none\", \"adaptive\")"
172172
}
173173
]
174+
},
175+
"task_budget": {
176+
"description": "Default total-token budget for an agentic task (forwarded to Anthropic as `output_config.task_budget`, with the required `task-budgets-2026-03-13` beta header attached automatically). Configurable on any Claude model — docker-agent does not gate by model name — but at the time of writing only Claude Opus 4.7 honors it. Accepts an integer token count or an object {type: tokens, total: N}.",
177+
"oneOf": [
178+
{
179+
"type": "integer",
180+
"minimum": 0,
181+
"description": "Token budget for the full task (combined thinking, tool calls, and output)."
182+
},
183+
{
184+
"type": "object",
185+
"properties": {
186+
"type": {
187+
"type": "string",
188+
"enum": ["tokens"],
189+
"description": "Budget kind. Only \"tokens\" is supported today."
190+
},
191+
"total": {
192+
"type": "integer",
193+
"minimum": 0,
194+
"description": "Total budget value."
195+
}
196+
},
197+
"required": ["total"],
198+
"additionalProperties": false
199+
}
200+
]
174201
}
175202
},
176203
"additionalProperties": false
@@ -650,6 +677,38 @@
650677
32768
651678
]
652679
},
680+
"task_budget": {
681+
"description": "Total-token budget for a full agentic task (forwarded to Anthropic as `output_config.task_budget`, with the required `task-budgets-2026-03-13` beta header attached automatically). Limits the combined tokens spent on thinking, tool calls, and output across the whole task. Configurable on any Claude model — docker-agent does not gate by model name — but at the time of writing only Claude Opus 4.7 honors it. Accepts an integer token count or an object {type: tokens, total: N}.",
682+
"oneOf": [
683+
{
684+
"type": "integer",
685+
"minimum": 0,
686+
"description": "Total token budget for the task (e.g., 128000)."
687+
},
688+
{
689+
"type": "object",
690+
"properties": {
691+
"type": {
692+
"type": "string",
693+
"enum": ["tokens"],
694+
"description": "Budget kind. Only \"tokens\" is supported today."
695+
},
696+
"total": {
697+
"type": "integer",
698+
"minimum": 0,
699+
"description": "Total budget value."
700+
}
701+
},
702+
"required": ["total"],
703+
"additionalProperties": false
704+
}
705+
],
706+
"examples": [
707+
64000,
708+
128000,
709+
{ "type": "tokens", "total": 128000 }
710+
]
711+
},
653712
"routing": {
654713
"type": "array",
655714
"description": "Routing rules for request-based model selection. When configured, this model becomes a router that selects the best model based on the user's input. The model's provider/model fields define the fallback model.",

docs/concepts/models/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ See the [Model Providers]({{ '/providers/overview/' | relative_url }}) section f
7070
| `presence_penalty` | float | Encourage topic diversity: 0.0 to 2.0 |
7171
| `base_url` | string | Custom API endpoint |
7272
| `thinking_budget` | string/int | Reasoning effort configuration |
73+
| `task_budget` | int/object | Total token budget for an agentic task (Anthropic; honored by Opus 4.7 today) |
7374
| `provider_opts` | object | Provider-specific options |
7475

7576
## Reasoning / Thinking Budget

docs/configuration/models/index.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ models:
2424
base_url: string # Optional: custom API endpoint
2525
token_key: string # Optional: env var for API token
2626
thinking_budget: string|int # Optional: reasoning effort
27+
task_budget: int|object # Optional: total task token budget (Anthropic)
2728
parallel_tool_calls: boolean # Optional: allow parallel tool calls
2829
track_usage: boolean # Optional: track token usage
2930
routing: [list] # Optional: rule-based model routing
@@ -45,6 +46,7 @@ models:
4546
| `base_url` | string | ✗ | Custom API endpoint URL (for self-hosted or proxied endpoints) |
4647
| `token_key` | string | ✗ | Environment variable name containing the API token (overrides provider default) |
4748
| `thinking_budget` | string/int | ✗ | Reasoning effort control |
49+
| `task_budget` | int/object | ✗ | Total token budget for an agentic task (forwarded to Anthropic; see [Task Budget](#task-budget)). |
4850
| `parallel_tool_calls` | boolean | ✗ | Allow model to call multiple tools at once |
4951
| `track_usage` | boolean | ✗ | Track and report token usage for this model |
5052
| `routing` | array | ✗ | Rule-based routing to different models. See [Model Routing]({{ '/configuration/routing/' | relative_url }}). |
@@ -110,6 +112,58 @@ Works for all providers:
110112
thinking_budget: none # or 0
111113
```
112114

115+
## Task Budget
116+
117+
**Anthropic-only.**
118+
119+
`task_budget` caps the **total** number of tokens the model may spend across a
120+
multi-step agentic task — combining thinking, tool calls, and final output
121+
tokens. It lets long-running agents self-regulate effort without having to
122+
choose a tight per-call `max_tokens`.
123+
124+
It is forwarded to Anthropic's
125+
[`output_config.task_budget`](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)
126+
request field. docker-agent automatically attaches the required
127+
`task-budgets-2026-03-13` beta header whenever this field is set.
128+
129+
You can configure `task_budget` on **any** Claude model — docker-agent never
130+
gates it by model name. At the time of writing only **Claude Opus 4.7**
131+
actually honors the field; other Claude models will reject requests that
132+
include it. Check the Anthropic release notes linked above for the current
133+
list of supported models.
134+
135+
### Integer shorthand
136+
137+
```yaml
138+
models:
139+
opus:
140+
provider: anthropic
141+
model: claude-opus-4-7
142+
task_budget: 128000 # total tokens for the whole task
143+
thinking_budget: adaptive # works nicely together
144+
```
145+
146+
### Object form
147+
148+
Equivalent, and forward-compatible with future budget types:
149+
150+
```yaml
151+
models:
152+
opus:
153+
provider: anthropic
154+
model: claude-opus-4-7
155+
task_budget:
156+
type: tokens # only "tokens" is supported today
157+
total: 128000
158+
```
159+
160+
Setting `task_budget: 0` (or omitting the field) disables the feature — the
161+
model falls back to the provider's default behavior.
162+
163+
Like other inheritable model settings, `task_budget` can also be declared on a
164+
[provider definition]({{ '/providers/custom/' | relative_url }}) and is
165+
inherited by every model that references that provider.
166+
113167
## Interleaved Thinking
114168

115169
For Anthropic and Bedrock Claude models, interleaved thinking allows tool calls during model reasoning. This is enabled by default:

docs/configuration/overview/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ agents:
261261
| `temperature` | Default sampling temperature. |
262262
| `max_tokens` | Default maximum response tokens. |
263263
| `thinking_budget` | Default reasoning effort/budget. |
264+
| `task_budget` | Default total token budget for an agentic task (Anthropic; honored by Claude Opus 4.7 today). |
264265
| `top_p` | Default top-p sampling parameter. |
265266
| `frequency_penalty` | Default frequency penalty. |
266267
| `presence_penalty` | Default presence penalty. |

docs/providers/anthropic/index.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,45 @@ models:
6767
interleaved_thinking: false # disable if needed
6868
```
6969

70+
## Task Budget
71+
72+
`task_budget` caps the **total** number of tokens the model may spend across a
73+
multi-step agentic task — combined thinking, tool calls, and final output. It
74+
is forwarded as
75+
[`output_config.task_budget`](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)
76+
and is ideal for letting long-running agents self-regulate effort without
77+
tightening `max_tokens` on every call.
78+
79+
docker-agent automatically attaches the required `task-budgets-2026-03-13`
80+
beta header whenever this field is set. You can configure `task_budget` on
81+
**any** Claude model — docker-agent never gates it by model name. At the time
82+
of writing, only **Claude Opus 4.7** actually honors the field; other Claude
83+
models (Sonnet 4.5, Opus 4.5 / 4.6, etc.) are expected to reject requests
84+
that include it. Check the Anthropic release notes linked above for the
85+
current list of supported models.
86+
87+
```yaml
88+
models:
89+
opus:
90+
provider: anthropic
91+
model: claude-opus-4-7
92+
task_budget: 128000 # integer shorthand → { type: tokens, total: 128000 }
93+
thinking_budget: adaptive
94+
```
95+
96+
Object form (forward-compatible with future budget types):
97+
98+
```yaml
99+
opus:
100+
provider: anthropic
101+
model: claude-opus-4-7
102+
task_budget:
103+
type: tokens
104+
total: 128000
105+
```
106+
107+
See the full schema on the [Model Configuration]({{ '/configuration/models/#task-budget' | relative_url }}) page.
108+
70109
<div class="callout callout-info" markdown="1">
71110
<div class="callout-title">ℹ️ Note
72111
</div>

docs/providers/custom/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ agents:
108108
| `parallel_tool_calls` | boolean | Whether to enable parallel tool calls by default. | — |
109109
| `track_usage` | boolean | Whether to track token usage by default. | — |
110110
| `thinking_budget` | string/int | Default reasoning effort/budget. | — |
111+
| `task_budget` | int/object | Default total token budget for an agentic task (forwarded to Anthropic; honored by Claude Opus 4.7 today). Integer shorthand or `{type: tokens, total: N}`. | — |
111112
| `provider_opts` | object | Provider-specific options passed through to the client. | — |
112113

113114
## Default Inheritance

examples/task_budget.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env docker agent run
2+
3+
# Anthropic `task_budget` caps the total tokens a model spends across a
4+
# multi-step agentic task (thinking + tool calls + final output). docker-agent
5+
# forwards it as `output_config.task_budget` and automatically attaches the
6+
# `task-budgets-2026-03-13` beta header.
7+
#
8+
# It can be set on any Claude model; at the time of writing only Claude
9+
# Opus 4.7 actually honors it. See:
10+
# https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7
11+
#
12+
# Run the demo command with: docker agent run task_budget.yaml -c demo
13+
14+
# Declare the provider explicitly so we can reference claude-opus-4-7 before
15+
# it lands in the public models.dev catalog. For catalog-known models you can
16+
# set `task_budget` directly under `models.<name>` without this block.
17+
providers:
18+
anthropic-opus-47:
19+
provider: anthropic
20+
token_key: ANTHROPIC_API_KEY
21+
22+
agents:
23+
root:
24+
model: opus-bounded
25+
description: a helpful assistant with a bounded task token budget
26+
instruction: Stay within the configured task token budget.
27+
commands:
28+
demo: "design and sketch a small Python CLI that fetches weather data"
29+
toolsets:
30+
- type: shell
31+
32+
models:
33+
# Integer shorthand = a "tokens" budget of 128k for the whole task.
34+
opus-bounded:
35+
provider: anthropic-opus-47
36+
model: claude-opus-4-7
37+
task_budget: 128000
38+
thinking_budget: adaptive # task_budget pairs well with adaptive thinking
39+
40+
# Explicit object form, equivalent to `task_budget: 64000`.
41+
opus-bounded-tight:
42+
provider: anthropic-opus-47
43+
model: claude-opus-4-7
44+
task_budget:
45+
type: tokens
46+
total: 64000

pkg/config/latest/model_config_clone_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ func TestModelConfig_Clone_DeepCopiesPointerFields(t *testing.T) {
2424
ParallelToolCalls: &parallel,
2525
TrackUsage: &trackUsage,
2626
ThinkingBudget: &ThinkingBudget{Effort: "high"},
27+
TaskBudget: &TaskBudget{Type: "tokens", Total: 128000},
2728
ProviderOpts: map[string]any{"key": "value"},
2829
Routing: []RoutingRule{
2930
{Model: "fast", Examples: []string{"quick question"}},
@@ -39,6 +40,7 @@ func TestModelConfig_Clone_DeepCopiesPointerFields(t *testing.T) {
3940
*original.ParallelToolCalls = false
4041
*original.TrackUsage = false
4142
original.ThinkingBudget.Effort = "low"
43+
original.TaskBudget.Total = 1
4244
original.ProviderOpts["key"] = "mutated"
4345
original.Routing[0].Examples[0] = "mutated"
4446

@@ -49,6 +51,8 @@ func TestModelConfig_Clone_DeepCopiesPointerFields(t *testing.T) {
4951
assert.True(t, *clone.ParallelToolCalls)
5052
assert.True(t, *clone.TrackUsage)
5153
assert.Equal(t, "high", clone.ThinkingBudget.Effort)
54+
assert.Equal(t, 128000, clone.TaskBudget.Total)
55+
assert.Equal(t, "tokens", clone.TaskBudget.Type)
5256
assert.Equal(t, "value", clone.ProviderOpts["key"])
5357
assert.Equal(t, "quick question", clone.Routing[0].Examples[0])
5458
}

0 commit comments

Comments
 (0)