amplifier-module-loop-basic/tests/test_thinking_block_leak.py at fa67fe23177f9afb5f19740822d3640bf75ccefb · microsoft/amplifier-module-loop-basic · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
"""Regression tests for thinking block content leaking into text extraction.

Root cause:
    There are TWO content block model systems:
      - content_models.ThinkingContent  → has .text  (was the bug hazard)
      - message_models.ThinkingBlock    → has .thinking (already safe)

    The old execute() inline text extraction used ``hasattr(block, "text")``
    which allowed ThinkingContent objects through unchanged, leaking thinking
    text into final_content and ultimately into downstream parse_json calls
    (causing the "Cannot access 'task_id' on str, not dict" failure mode).

Fix:
    Use an explicit ``block.type == "text"`` guard so only text blocks are
    included, regardless of which model system the block comes from.

RED / GREEN verification:
    Run against the unfixed code to see test_thinking_content_does_not_leak
    FAIL (thinking text present in result).
    Run after the fix to see all tests PASS.

Cross-ecosystem:
    Same fix pattern as amplifier-module-loop-streaming PR #25 (df5c0e1).
"""

import pytest

from amplifier_core.testing import EventRecorder, MockContextManager

from amplifier_module_loop_basic import BasicOrchestrator


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _orch():
    return BasicOrchestrator({})


def _make_fake_response(blocks):
    """Create a fake provider response with the given content blocks."""

    class FakeResponse:
        content = blocks
        tool_calls = None
        usage = None
        content_blocks = None
        metadata = None

    return FakeResponse()


class FakeProvider:
    """Mock provider that returns a pre-configured response."""

    name = "mock-thinking"

    def __init__(self, response):
        self._response = response

    async def complete(self, request, **kwargs):
        return self._response


# ---------------------------------------------------------------------------
# Primary regression test: ThinkingContent (content_models) must be filtered
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_thinking_content_does_not_leak_into_final_content():
    """content_models.ThinkingContent has .text — must be filtered by type check.

    This is the primary regression test.  Before the fix, the
    hasattr(block, "text") guard would include ThinkingContent blocks because
    they *do* have a .text attribute, just with type="thinking".

    RED (before fix):  result contains "internal reasoning" — thinking text leaked.
    GREEN (after fix): result is ONLY "real response".
    """
    from amplifier_core.content_models import TextContent, ThinkingContent

    orchestrator = _orch()
    context = MockContextManager()
    hooks = EventRecorder()

    response = _make_fake_response(
        [
            ThinkingContent(text="internal reasoning"),
            TextContent(text="real response"),
        ]
    )
    provider = FakeProvider(response)

    result = await orchestrator.execute(
        prompt="Test",
        context=context,
        providers={"default": provider},
        tools={},
        hooks=hooks,
    )

    # Thinking text must NOT appear in the final content
    assert "internal reasoning" not in result, (
        f"Thinking text leaked into final_content: {result!r}"
    )
    # Only the TextContent payload should be present
    assert "real response" in result, (
        f"Expected 'real response' in result but got: {result!r}"
    )


# ---------------------------------------------------------------------------
# Complementary test: ThinkingBlock (message_models) was already safe
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_thinking_block_does_not_leak_into_final_content():
    """message_models.ThinkingBlock has .thinking (not .text) — already safe.

    ThinkingBlock was not affected by the original bug (no .text attribute),
    but this test documents that it remains excluded after the type-check
    refactor.
    """
    from amplifier_core.message_models import TextBlock, ThinkingBlock

    orchestrator = _orch()
    context = MockContextManager()
    hooks = EventRecorder()

    response = _make_fake_response(
        [
            ThinkingBlock(thinking="internal reasoning", signature="sig"),
            TextBlock(text="real response"),
        ]
    )
    provider = FakeProvider(response)

    result = await orchestrator.execute(
        prompt="Test",
        context=context,
        providers={"default": provider},
        tools={},
        hooks=hooks,
    )

    assert "internal reasoning" not in result, (
        f"ThinkingBlock content leaked into final_content: {result!r}"
    )
    assert "real response" in result, (
        f"Expected 'real response' in result but got: {result!r}"
    )


# ---------------------------------------------------------------------------
# Smoke test: normal TextContent and TextBlock pass through correctly
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_text_content_passes_through():
    """content_models.TextContent is included in final_content as expected."""
    from amplifier_core.content_models import TextContent

    orchestrator = _orch()
    context = MockContextManager()
    hooks = EventRecorder()

    response = _make_fake_response([TextContent(text="hello from TextContent")])
    provider = FakeProvider(response)

    result = await orchestrator.execute(
        prompt="Test",
        context=context,
        providers={"default": provider},
        tools={},
        hooks=hooks,
    )

    assert result == "hello from TextContent", f"Unexpected result: {result!r}"


@pytest.mark.asyncio
async def test_text_block_passes_through():
    """message_models.TextBlock is included in final_content as expected."""
    from amplifier_core.message_models import TextBlock

    orchestrator = _orch()
    context = MockContextManager()
    hooks = EventRecorder()

    response = _make_fake_response([TextBlock(text="hello from TextBlock")])
    provider = FakeProvider(response)

    result = await orchestrator.execute(
        prompt="Test",
        context=context,
        providers={"default": provider},
        tools={},
        hooks=hooks,
    )

    assert result == "hello from TextBlock", f"Unexpected result: {result!r}"


@pytest.mark.asyncio
async def test_dict_text_block_passes_through():
    """Dict blocks with type='text' pass through correctly."""
    orchestrator = _orch()
    context = MockContextManager()
    hooks = EventRecorder()

    response = _make_fake_response([{"type": "text", "text": "dict text block"}])
    provider = FakeProvider(response)

    result = await orchestrator.execute(
        prompt="Test",
        context=context,
        providers={"default": provider},
        tools={},
        hooks=hooks,
    )

    assert result == "dict text block", f"Unexpected result: {result!r}"


@pytest.mark.asyncio
async def test_dict_thinking_block_is_filtered():
    """Dict blocks with type='thinking' are filtered out (consistency fix)."""
    orchestrator = _orch()
    context = MockContextManager()
    hooks = EventRecorder()

    response = _make_fake_response(
        [
            {"type": "thinking", "text": "dict thinking block"},
            {"type": "text", "text": "dict real response"},
        ]
    )
    provider = FakeProvider(response)

    result = await orchestrator.execute(
        prompt="Test",
        context=context,
        providers={"default": provider},
        tools={},
        hooks=hooks,
    )

    assert "dict thinking block" not in result, (
        f"Dict thinking block leaked into final_content: {result!r}"
    )
    assert "dict real response" in result, (
        f"Expected 'dict real response' in result but got: {result!r}"
    )