Skip to content

Commit 4448ab0

Browse files
fix: protect result-writing loops from CancelledError and catch CancelledError in tool execution
Harden the orchestrator's result-writing and tool-execution loops against CancelledError to prevent orphaned tool_calls that break the conversation state. When a CancelledError escapes a result-writing loop, the assistant message is committed without the corresponding tool result. The LLM sees a tool_use without a tool_result on the next turn, causing a protocol violation or infinite retry loop. This fix catches CancelledError at two levels: - In the result-writing loop, ensuring partial results are committed - In tool execution, ensuring the tool result is always recorded Rebased on main to pick up the defensive getattr access pattern for tool call attributes. Companion fixes already merged in loop-streaming#14 and loop-events#5. Fixes: microsoft-amplifier/amplifier-support#46 🤖 Generated with [Amplifier](https://github.com/microsoft/amplifier) Co-Authored-By: Amplifier <240397093+microsoft-amplifier@users.noreply.github.com>
1 parent 3a0246f commit 4448ab0

File tree

1 file changed

+65
-26
lines changed

1 file changed

+65
-26
lines changed

amplifier_module_loop_basic/__init__.py

Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ async def execute_single_tool(
487487
result_content = result.get_serialized_output()
488488
return (tool_call_id, result_content)
489489

490-
except Exception as te:
490+
except (Exception, asyncio.CancelledError) as te:
491491
# Emit error event
492492
await hooks.emit(
493493
TOOL_ERROR,
@@ -524,19 +524,28 @@ async def execute_single_tool(
524524
)
525525
except asyncio.CancelledError:
526526
# Immediate cancellation (second Ctrl+C) - synthesize cancelled results
527-
# for ALL tool_calls to maintain tool_use/tool_result pairing
527+
# for ALL tool_calls to maintain tool_use/tool_result pairing.
528+
# Protect from further CancelledError using kernel
529+
# catch-continue-reraise pattern so all results are written.
528530
logger.info(
529531
"Tool execution cancelled - synthesizing cancelled results"
530532
)
531533
for tc in tool_calls:
532-
if hasattr(context, "add_message"):
533-
await context.add_message(
534-
{
535-
"role": "tool",
536-
"tool_call_id": getattr(tc, "id", None)
537-
or tc.get("id"),
538-
"content": f'{{"error": "Tool execution was cancelled by user", "cancelled": true, "tool": "{getattr(tc, "name", None) or tc.get("tool")}"}}',
539-
}
534+
try:
535+
if hasattr(context, "add_message"):
536+
await context.add_message(
537+
{
538+
"role": "tool",
539+
"tool_call_id": getattr(tc, "id", None)
540+
or tc.get("id"),
541+
"content": f'{{"error": "Tool execution was cancelled by user", "cancelled": true, "tool": "{getattr(tc, "name", None) or tc.get("tool")}"}}',
542+
}
543+
)
544+
except asyncio.CancelledError:
545+
logger.info(
546+
"CancelledError during synthetic result write - "
547+
"completing remaining writes to prevent "
548+
"orphaned tool_calls"
540549
)
541550
# Re-raise to let the cancellation propagate
542551
raise
@@ -546,15 +555,30 @@ async def execute_single_tool(
546555
# MUST add tool results to context before returning
547556
# Otherwise we leave orphaned tool_calls without matching tool_results
548557
# which violates provider API contracts (Anthropic, OpenAI)
558+
# Protect from CancelledError using kernel catch-continue-reraise
559+
# pattern (coordinator.cleanup, hooks.emit) so all results are
560+
# written even if force-cancel arrives mid-loop.
561+
_cancel_error = None
549562
for tool_call_id, content in tool_results:
550-
if hasattr(context, "add_message"):
551-
await context.add_message(
552-
{
553-
"role": "tool",
554-
"tool_call_id": tool_call_id,
555-
"content": content,
556-
}
557-
)
563+
try:
564+
if hasattr(context, "add_message"):
565+
await context.add_message(
566+
{
567+
"role": "tool",
568+
"tool_call_id": tool_call_id,
569+
"content": content,
570+
}
571+
)
572+
except asyncio.CancelledError:
573+
if _cancel_error is None:
574+
_cancel_error = asyncio.CancelledError()
575+
logger.info(
576+
"CancelledError during tool result write - "
577+
"completing remaining writes to prevent "
578+
"orphaned tool_calls"
579+
)
580+
if _cancel_error is not None:
581+
raise _cancel_error
558582
await hooks.emit(
559583
ORCHESTRATOR_COMPLETE,
560584
{
@@ -566,15 +590,30 @@ async def execute_single_tool(
566590
return final_content
567591

568592
# Add all tool results to context in original order (deterministic)
593+
# Protect from CancelledError using kernel catch-continue-reraise
594+
# pattern so all results are written even if force-cancel arrives
595+
# mid-loop.
596+
_cancel_error = None
569597
for tool_call_id, content in tool_results:
570-
if hasattr(context, "add_message"):
571-
await context.add_message(
572-
{
573-
"role": "tool",
574-
"tool_call_id": tool_call_id,
575-
"content": content,
576-
}
577-
)
598+
try:
599+
if hasattr(context, "add_message"):
600+
await context.add_message(
601+
{
602+
"role": "tool",
603+
"tool_call_id": tool_call_id,
604+
"content": content,
605+
}
606+
)
607+
except asyncio.CancelledError:
608+
if _cancel_error is None:
609+
_cancel_error = asyncio.CancelledError()
610+
logger.info(
611+
"CancelledError during tool result write - "
612+
"completing remaining writes to prevent "
613+
"orphaned tool_calls"
614+
)
615+
if _cancel_error is not None:
616+
raise _cancel_error
578617

579618
# After executing tools, continue loop to get final response
580619
iteration += 1

0 commit comments

Comments
 (0)