@@ -71,6 +71,39 @@ export async function POST(request: Request) {
7171 }
7272 if ( chatId ) rootSpan . setAttribute ( TraceAttr . ChatId , chatId )
7373
74+ // ORDER MATTERS: local abort FIRST, Go explicit-abort SECOND.
75+ //
76+ // Sim and Go each own a separate Redis instance and do not share
77+ // state through it — the only signal that crosses the service
78+ // boundary is this HTTP call. So the race to win is purely
79+ // Sim-internal:
80+ //
81+ // - `abortActiveStream` flips the AbortController (reason =
82+ // AbortReason.UserStop) that's wrapped around the in-flight
83+ // `fetchGo('/api/mothership', ...)` SSE stream. Once flipped,
84+ // the stream throws AbortError on the next chunk read, and
85+ // the lifecycle catch block's classifier sees
86+ // `signal.aborted = true` with an explicit-stop reason → the
87+ // root span gets stamped `cancel_reason = explicit_stop` and
88+ // the `request.cancelled` event fires correctly.
89+ //
90+ // - If we call Go first (old order), Go's context cancels from
91+ // its own explicit-abort handler, the /api/mothership stream
92+ // errors with "context canceled", and Sim's catch block fires
93+ // BEFORE we've flipped the local AbortController. At that
94+ // point `signal.aborted` is still false, so the classifier
95+ // falls through to `client_disconnect` / `unknown` and the
96+ // root ends up as `outcome = error` — which is what we saw
97+ // in trace 25f31730082078cef54653b1740caf12.
98+ //
99+ // Go's explicit-abort endpoint still runs second: it's what tells
100+ // Go-side billing "this was intentional, flush the paused ledger"
101+ // and is unaffected by the reorder (Go's context is already
102+ // cancelled by the time we get there; the endpoint's job is
103+ // billing semantics, not cancelling in-flight work).
104+ const aborted = await abortActiveStream ( streamId )
105+ rootSpan . setAttribute ( TraceAttr . CopilotAbortLocalAborted , aborted )
106+
74107 let goAbortOk = false
75108 try {
76109 const headers : Record < string , string > = { 'Content-Type' : 'application/json' }
@@ -94,7 +127,7 @@ export async function POST(request: Request) {
94127 spanName : 'sim → go /api/streams/explicit-abort' ,
95128 operation : 'explicit_abort' ,
96129 attributes : {
97- 'copilot.stream.id' : streamId ,
130+ [ TraceAttr . StreamId ] : streamId ,
98131 ...( chatId ? { [ TraceAttr . ChatId ] : chatId } : { } ) ,
99132 } ,
100133 } ) . finally ( ( ) => clearTimeout ( timeout ) )
@@ -103,16 +136,13 @@ export async function POST(request: Request) {
103136 }
104137 goAbortOk = true
105138 } catch ( err ) {
106- logger . warn ( 'Explicit abort marker request failed; proceeding with local abort' , {
139+ logger . warn ( 'Explicit abort marker request failed after local abort' , {
107140 streamId,
108141 error : err instanceof Error ? err . message : String ( err ) ,
109142 } )
110143 }
111144 rootSpan . setAttribute ( TraceAttr . CopilotAbortGoMarkerOk , goAbortOk )
112145
113- const aborted = await abortActiveStream ( streamId )
114- rootSpan . setAttribute ( TraceAttr . CopilotAbortLocalAborted , aborted )
115-
116146 if ( chatId ) {
117147 // `waitForPendingChatStream` blocks up to 8s waiting for the
118148 // prior stream's release. It's THE single most likely stall
0 commit comments