From 2473755279734676979627786121b0328626f9a2 Mon Sep 17 00:00:00 2001 From: Alessandro Pogliaghi Date: Fri, 15 May 2026 15:45:03 -0400 Subject: [PATCH 1/2] perf(agent-server): defer gateway models fetch off critical path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud agent server boot was waiting on the synchronous /v1/models gateway call inside createSession before /health flips hasSession=true, even when the caller already pinned a model (which cloud always does). Each new sandbox = new process = cold cache = one extra round trip. When meta?.model or settings.model is set, return a single-entry options list immediately and warm the gateway cache from deferBackgroundFetches. The available-models dropdown updates after init, getContextWindowForModel falls back to the 200k default until the warmup completes (which fires within ~10ms of session creation). When neither is set we still wait — there's no other source for the default model id. --- .../agent/src/adapters/claude/claude-agent.ts | 66 ++++++++++++++----- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 2b516b72d..f0399d128 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -1240,20 +1240,46 @@ export class ClaudeAcpAgent extends BaseAcpAgent { ? withTimeout(q.initializationResult(), SESSION_VALIDATION_TIMEOUT_MS) : undefined; - const [modelOptions] = await Promise.all([ - this.getModelConfigOptions( - settingsManager.getSettings().model || meta?.model || undefined, - ), - ...(meta?.taskRunId - ? [ - this.client.extNotification(POSTHOG_NOTIFICATIONS.SDK_SESSION, { - taskRunId: meta.taskRunId, - sessionId, - adapter: "claude", - }), - ] - : []), - ]); + // When the caller already pinned a model (cloud always does), the awaited + // gateway /v1/models call is only used to populate the UI's available-models + // dropdown — kicking it off in the background lets us return the session + // sooner. With no pinned model we still wait, since we need the gateway + // list to choose a default. + const knownModelId = + settingsManager.getSettings().model || meta?.model || undefined; + + const sdkSessionNotification = meta?.taskRunId + ? this.client.extNotification(POSTHOG_NOTIFICATIONS.SDK_SESSION, { + taskRunId: meta.taskRunId, + sessionId, + adapter: "claude", + }) + : Promise.resolve(); + + let modelOptions: { + currentModelId: string; + options: SessionConfigSelectOption[]; + }; + if (knownModelId) { + // Synthesize a minimal options list; the real list arrives via + // deferBackgroundFetches below. + modelOptions = { + currentModelId: knownModelId, + options: [ + { + value: knownModelId, + name: knownModelId, + description: "", + }, + ], + }; + void sdkSessionNotification; + } else { + [modelOptions] = await Promise.all([ + this.getModelConfigOptions(undefined), + sdkSessionNotification, + ]); + } if (initPromise) { try { @@ -1571,8 +1597,10 @@ export class ClaudeAcpAgent extends BaseAcpAgent { // ================================ /** - * Fire-and-forget: fetch slash commands and MCP tool metadata in parallel. - * Both populate caches used later — neither is needed to return configOptions. + * Fire-and-forget: fetch slash commands, MCP tool metadata, and prime the + * gateway models cache in parallel. None of these are needed to return + * configOptions — priming the gateway cache here keeps `getContextWindowForModel` + * accurate by the time the first prompt fires. */ private deferBackgroundFetches(q: Query): void { Promise.all([ @@ -1585,6 +1613,12 @@ export class ClaudeAcpAgent extends BaseAcpAgent { this.options?.onMcpServersReady?.(serverNames); } }), + // Warm the gateway models cache so subsequent context-window lookups + // don't fall back to the 200k default. Result is stored on the + // agent instance via fetchGatewayModels' internal cache. + this.getModelConfigOptions(this.session?.modelId).catch(() => { + // Best-effort: failures here just leave the default in place. + }), ]).catch((err) => this.logger.error("Background fetch failed", { error: err }), ); From 88c3a41cb71849ebc526f62d6d5a0fb4f42e31f6 Mon Sep 17 00:00:00 2001 From: Alessandro Pogliaghi Date: Fri, 15 May 2026 16:47:17 -0400 Subject: [PATCH 2/2] perf(agent-server): reuse prefetched task in initial message sendInitialTaskMessage re-fetched the task via getTask() even though _doInitializeSession had already fetched it milliseconds earlier in the same boot path (preTask). Each getTask is a sandbox->PostHog round trip on the critical path that gates /health hasSession=true. Pass preTask through as prefetchedTask and reuse it, falling back to the API call when it is absent (the catch path in _doInitializeSession can still yield null). Pure within-boot dedupe: no staleness risk since it re-uses a value fetched in the same function. --- packages/agent/src/server/agent-server.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/agent/src/server/agent-server.ts b/packages/agent/src/server/agent-server.ts index 12ea84c47..22e17b3ea 100644 --- a/packages/agent/src/server/agent-server.ts +++ b/packages/agent/src/server/agent-server.ts @@ -42,6 +42,7 @@ import type { GitCheckpointEvent, HandoffLocalGitState, LogLevel, + Task, TaskRun, TaskRunArtifact, } from "../types"; @@ -1026,7 +1027,7 @@ export class AgentServer { this.logger.debug("Failed to set task run to in_progress", err), ); - await this.sendInitialTaskMessage(payload, preTaskRun); + await this.sendInitialTaskMessage(payload, preTaskRun, preTask); } private extractErrorClassification(error: unknown): { @@ -1067,6 +1068,7 @@ export class AgentServer { private async sendInitialTaskMessage( payload: JwtPayload, prefetchedRun?: TaskRun | null, + prefetchedTask?: Task | null, ): Promise { if (!this.session) return; @@ -1105,7 +1107,11 @@ export class AgentServer { } try { - const task = await this.posthogAPI.getTask(payload.task_id); + // Reuse the task fetched during session init when available; it was + // fetched milliseconds ago in the same boot path, so re-fetching it + // here is a redundant sandbox->PostHog round trip on the hot path. + const task = + prefetchedTask ?? (await this.posthogAPI.getTask(payload.task_id)); const initialPromptOverride = taskRun ? this.getInitialPromptOverride(taskRun)