Skip to content

Commit e08bd1c

Browse files
authored
Merge pull request #2010 from dgageot/board/refacto-runtime-0fc5230f
refactor: split runtime.go and extract pkg/modelerrors
2 parents e6192d8 + 6068592 commit e08bd1c

7 files changed

Lines changed: 1066 additions & 1304 deletions

File tree

pkg/modelerrors/modelerrors.go

Lines changed: 354 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,354 @@
1+
// Package modelerrors provides error classification utilities for LLM model
2+
// providers. It determines whether errors are retryable, identifies context
3+
// window overflow conditions, extracts HTTP status codes from various SDK
4+
// error types, and computes exponential backoff durations.
5+
package modelerrors
6+
7+
import (
8+
"context"
9+
"errors"
10+
"fmt"
11+
"log/slog"
12+
"math/rand"
13+
"net"
14+
"regexp"
15+
"strings"
16+
"time"
17+
18+
"github.com/anthropics/anthropic-sdk-go"
19+
"google.golang.org/genai"
20+
)
21+
22+
// Backoff configuration constants.
23+
const (
24+
backoffBaseDelay = 200 * time.Millisecond
25+
backoffMaxDelay = 2 * time.Second
26+
backoffFactor = 2.0
27+
backoffJitter = 0.1
28+
)
29+
30+
// Default fallback configuration.
31+
const (
32+
// DefaultRetries is the default number of retries per model with exponential
33+
// backoff for retryable errors (5xx, timeouts). 2 retries means 3 total attempts.
34+
// This handles transient provider issues without immediately failing over.
35+
DefaultRetries = 2
36+
37+
// DefaultCooldown is the default duration to stick with a fallback model
38+
// after a non-retryable error before retrying the primary.
39+
DefaultCooldown = 1 * time.Minute
40+
)
41+
42+
// ContextOverflowError wraps an underlying error to indicate that the failure
43+
// was caused by the conversation context exceeding the model's context window.
44+
// This is used to trigger auto-compaction in the runtime loop instead of
45+
// surfacing raw HTTP errors to the user.
46+
type ContextOverflowError struct {
47+
Underlying error
48+
}
49+
50+
func (e *ContextOverflowError) Error() string {
51+
if e.Underlying == nil {
52+
return "context window overflow"
53+
}
54+
return fmt.Sprintf("context window overflow: %s", e.Underlying.Error())
55+
}
56+
57+
func (e *ContextOverflowError) Unwrap() error {
58+
return e.Underlying
59+
}
60+
61+
// contextOverflowPatterns contains error message substrings that indicate the
62+
// prompt/context exceeds the model's context window. These patterns are checked
63+
// case-insensitively against error messages from various providers.
64+
var contextOverflowPatterns = []string{
65+
"prompt is too long",
66+
"maximum context length",
67+
"context length exceeded",
68+
"context_length_exceeded",
69+
"max_tokens must be greater than",
70+
"maximum number of tokens",
71+
"content length exceeds",
72+
"request too large",
73+
"payload too large",
74+
"input is too long",
75+
"exceeds the model's max token",
76+
"token limit",
77+
"reduce your prompt",
78+
"reduce the length",
79+
}
80+
81+
// IsContextOverflowError checks whether the error indicates the conversation
82+
// context has exceeded the model's context window. It inspects both structured
83+
// SDK error types and raw error message patterns.
84+
//
85+
// Recognised patterns include:
86+
// - Anthropic 400 "prompt is too long: N tokens > M maximum"
87+
// - Anthropic 400 "max_tokens must be greater than thinking.budget_tokens"
88+
// (emitted when the prompt is so large that max_tokens can't accommodate
89+
// the thinking budget — a proxy for context overflow)
90+
// - OpenAI 400 "maximum context length" / "context_length_exceeded"
91+
// - Anthropic 500 that is actually a context overflow (heuristic: the error
92+
// message is opaque but the conversation was already near the limit)
93+
//
94+
// This function intentionally does NOT match generic 500 errors; callers
95+
// that want to treat an opaque 500 as overflow must check separately with
96+
// additional context (e.g., session token counts).
97+
func IsContextOverflowError(err error) bool {
98+
if err == nil {
99+
return false
100+
}
101+
102+
// Already wrapped
103+
var ctxErr *ContextOverflowError
104+
if errors.As(err, &ctxErr) {
105+
return true
106+
}
107+
108+
errMsg := strings.ToLower(err.Error())
109+
for _, pattern := range contextOverflowPatterns {
110+
if strings.Contains(errMsg, pattern) {
111+
return true
112+
}
113+
}
114+
115+
return false
116+
}
117+
118+
// statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ")
119+
var statusCodeRegex = regexp.MustCompile(`\b([45]\d{2})\b`)
120+
121+
// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error.
122+
// Checks in order:
123+
// 1. Known provider SDK error types (Anthropic, Gemini)
124+
// 2. Regex parsing of error message (fallback for OpenAI and others)
125+
// Returns 0 if no status code found.
126+
func ExtractHTTPStatusCode(err error) int {
127+
if err == nil {
128+
return 0
129+
}
130+
131+
// Check Anthropic SDK error type (public)
132+
if anthropicErr, ok := errors.AsType[*anthropic.Error](err); ok {
133+
return anthropicErr.StatusCode
134+
}
135+
136+
// Check Google Gemini SDK error type (public)
137+
if geminiErr, ok := errors.AsType[*genai.APIError](err); ok {
138+
return geminiErr.Code
139+
}
140+
141+
// For other providers (OpenAI, etc.), extract from error message using regex
142+
// OpenAI SDK error format: `POST "/v1/...": 429 Too Many Requests {...}`
143+
matches := statusCodeRegex.FindStringSubmatch(err.Error())
144+
if len(matches) >= 2 {
145+
var code int
146+
if _, err := fmt.Sscanf(matches[1], "%d", &code); err == nil {
147+
return code
148+
}
149+
}
150+
151+
return 0
152+
}
153+
154+
// IsRetryableStatusCode determines if an HTTP status code is retryable.
155+
// Retryable means we should retry the SAME model with exponential backoff.
156+
//
157+
// Retryable status codes:
158+
// - 5xx (server errors): 500, 502, 503, 504
159+
// - 529 (Anthropic overloaded)
160+
// - 408 (request timeout)
161+
//
162+
// Non-retryable status codes (skip to next model immediately):
163+
// - 429 (rate limit) - provider is explicitly telling us to back off
164+
// - 4xx client errors (400, 401, 403, 404) - won't get better with retry
165+
func IsRetryableStatusCode(statusCode int) bool {
166+
switch statusCode {
167+
case 500, 502, 503, 504: // Server errors
168+
return true
169+
case 529: // Anthropic overloaded
170+
return true
171+
case 408: // Request timeout
172+
return true
173+
case 429: // Rate limit - NOT retryable, skip to next model
174+
return false
175+
default:
176+
return false
177+
}
178+
}
179+
180+
// IsRetryableModelError determines if an error should trigger a retry of the SAME model.
181+
//
182+
// Retryable errors (retry same model with backoff):
183+
// - Network timeouts
184+
// - Temporary network errors
185+
// - HTTP 5xx errors (server errors)
186+
// - HTTP 529 (Anthropic overloaded)
187+
// - HTTP 408 (request timeout)
188+
//
189+
// Non-retryable errors (skip to next model in chain immediately):
190+
// - Context cancellation
191+
// - HTTP 429 (rate limit) - provider is explicitly rate limiting us
192+
// - HTTP 4xx errors (client errors)
193+
// - Authentication errors
194+
// - Invalid request errors
195+
//
196+
// The key distinction is: 429 means "you're calling too fast, slow down" which
197+
// suggests we should try a different model, not keep hammering the same one.
198+
func IsRetryableModelError(err error) bool {
199+
if err == nil {
200+
return false
201+
}
202+
203+
// Context cancellation is never retryable
204+
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
205+
return false
206+
}
207+
208+
// Context overflow errors are never retryable — the context hasn't changed
209+
// between attempts, so retrying the same oversized payload will always fail.
210+
// This avoids wasting time on 3 attempts + exponential backoff.
211+
if IsContextOverflowError(err) {
212+
slog.Debug("Context overflow error, not retryable", "error", err)
213+
return false
214+
}
215+
216+
// First, try to extract HTTP status code from known SDK error types
217+
if statusCode := ExtractHTTPStatusCode(err); statusCode != 0 {
218+
retryable := IsRetryableStatusCode(statusCode)
219+
slog.Debug("Classified error by status code",
220+
"status_code", statusCode,
221+
"retryable", retryable)
222+
return retryable
223+
}
224+
225+
// Check for network errors
226+
if netErr, ok := errors.AsType[net.Error](err); ok {
227+
// Timeout errors are retryable
228+
if netErr.Timeout() {
229+
slog.Debug("Network timeout error, retryable", "error", err)
230+
return true
231+
}
232+
}
233+
234+
// Fall back to message-pattern matching for errors without structured status codes
235+
errMsg := strings.ToLower(err.Error())
236+
237+
// Retryable patterns (5xx, timeout, network issues)
238+
// NOTE: 429 is explicitly NOT in this list - we skip to next model for rate limits
239+
retryablePatterns := []string{
240+
"500", // Internal server error
241+
"502", // Bad gateway
242+
"503", // Service unavailable
243+
"504", // Gateway timeout
244+
"408", // Request timeout
245+
"timeout", // Generic timeout
246+
"connection reset", // Connection reset
247+
"connection refused", // Connection refused
248+
"no such host", // DNS failure
249+
"temporary failure", // Temporary failure
250+
"service unavailable", // Service unavailable
251+
"internal server error", // Server error
252+
"bad gateway", // Gateway error
253+
"gateway timeout", // Gateway timeout
254+
"overloaded", // Server overloaded
255+
"overloaded_error", // Server overloaded
256+
"other side closed", // Connection closed by peer
257+
"fetch failed", // Network fetch failure
258+
"reset before headers", // Connection reset before headers received
259+
"upstream connect", // Upstream connection error
260+
}
261+
262+
for _, pattern := range retryablePatterns {
263+
if strings.Contains(errMsg, pattern) {
264+
slog.Debug("Matched retryable error pattern", "pattern", pattern)
265+
return true
266+
}
267+
}
268+
269+
// Non-retryable patterns (skip to next model immediately)
270+
nonRetryablePatterns := []string{
271+
"429", // Rate limit - skip to next model
272+
"rate limit", // Rate limit message
273+
"too many requests", // Rate limit message
274+
"throttl", // Throttling (rate limiting)
275+
"quota", // Quota exceeded
276+
"capacity", // Capacity issues (often rate-limit related)
277+
"401", // Unauthorized
278+
"403", // Forbidden
279+
"404", // Not found
280+
"400", // Bad request
281+
"invalid", // Invalid request
282+
"unauthorized", // Auth error
283+
"authentication", // Auth error
284+
"api key", // API key error
285+
}
286+
287+
for _, pattern := range nonRetryablePatterns {
288+
if strings.Contains(errMsg, pattern) {
289+
slog.Debug("Matched non-retryable error pattern", "pattern", pattern)
290+
return false
291+
}
292+
}
293+
294+
// Default: don't retry unknown errors to be safe
295+
slog.Debug("Unknown error type, not retrying", "error", err)
296+
return false
297+
}
298+
299+
// CalculateBackoff returns the backoff duration for a given attempt (0-indexed).
300+
// Uses exponential backoff with jitter.
301+
func CalculateBackoff(attempt int) time.Duration {
302+
if attempt < 0 {
303+
attempt = 0
304+
}
305+
306+
// Calculate exponential delay
307+
delay := float64(backoffBaseDelay)
308+
for range attempt {
309+
delay *= backoffFactor
310+
}
311+
312+
// Cap at max delay
313+
if delay > float64(backoffMaxDelay) {
314+
delay = float64(backoffMaxDelay)
315+
}
316+
317+
// Add jitter (±10%)
318+
jitter := delay * backoffJitter * (2*rand.Float64() - 1)
319+
delay += jitter
320+
321+
return time.Duration(delay)
322+
}
323+
324+
// SleepWithContext sleeps for the specified duration, returning early if context is cancelled.
325+
// Returns true if the sleep completed, false if it was interrupted by context cancellation.
326+
func SleepWithContext(ctx context.Context, d time.Duration) bool {
327+
timer := time.NewTimer(d)
328+
defer timer.Stop()
329+
330+
select {
331+
case <-timer.C:
332+
return true
333+
case <-ctx.Done():
334+
return false
335+
}
336+
}
337+
338+
// FormatError returns a user-friendly error message for model errors.
339+
// Context overflow gets a dedicated actionable message; all other errors
340+
// pass through their original message.
341+
func FormatError(err error) string {
342+
if err == nil {
343+
return ""
344+
}
345+
346+
// Context overflow gets a dedicated, actionable message.
347+
var ctxOverflow *ContextOverflowError
348+
if errors.As(err, &ctxOverflow) {
349+
return "The conversation has exceeded the model's context window and automatic compaction is not enabled. " +
350+
"Try running /compact to reduce the conversation size, or start a new session."
351+
}
352+
353+
return err.Error()
354+
}

0 commit comments

Comments
 (0)