Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Aksel Joonas Reedi
Fix CLI rendering corruption and split CLI/frontend model defaults (#121)
3eec386 unverified | """Regression test for the malformed-JSON loop in observatory session | |
| 7750e82f (2026-04-25): GLM-5.1 produced six consecutive ``write`` calls | |
| whose ``arguments`` strings JSON-parse-failed (truncated mid-stream by | |
| the provider). The soft retry hint didn't move the model. The detector | |
| in ``_detect_repeated_malformed`` looks for the streak so the agent loop | |
| can inject a hard system-prompt forcing a different strategy. | |
| """ | |
| from litellm import Message | |
| from agent.core.agent_loop import _detect_repeated_malformed | |
| def _malformed_tool_msg(name: str, call_id: str) -> Message: | |
| return Message( | |
| role="tool", | |
| content=( | |
| f"ERROR: Tool call to '{name}' had malformed JSON arguments and " | |
| f"was NOT executed. Retry with smaller content — for 'write', " | |
| f"split into multiple smaller writes using 'edit'." | |
| ), | |
| tool_call_id=call_id, | |
| name=name, | |
| ) | |
| def test_two_consecutive_malformed_same_tool_triggers(): | |
| items = [ | |
| Message(role="user", content="write a big plan"), | |
| Message(role="assistant", content=None), | |
| _malformed_tool_msg("write", "1"), | |
| Message(role="assistant", content=None), | |
| _malformed_tool_msg("write", "2"), | |
| ] | |
| assert _detect_repeated_malformed(items, threshold=2) == "write" | |
| def test_one_malformed_does_not_trigger(): | |
| items = [ | |
| Message(role="user", content="write a plan"), | |
| Message(role="assistant", content=None), | |
| _malformed_tool_msg("write", "1"), | |
| ] | |
| assert _detect_repeated_malformed(items, threshold=2) is None | |
| def test_two_malformed_different_tools_does_not_trigger(): | |
| items = [ | |
| Message(role="assistant", content=None), | |
| _malformed_tool_msg("write", "1"), | |
| Message(role="assistant", content=None), | |
| _malformed_tool_msg("bash", "2"), | |
| ] | |
| assert _detect_repeated_malformed(items, threshold=2) is None | |
| def test_streak_broken_by_successful_tool_call_does_not_trigger(): | |
| items = [ | |
| Message(role="assistant", content=None), | |
| _malformed_tool_msg("write", "1"), | |
| Message(role="assistant", content=None), | |
| Message(role="tool", content="ok", tool_call_id="2", name="write"), | |
| Message(role="assistant", content=None), | |
| _malformed_tool_msg("write", "3"), | |
| ] | |
| assert _detect_repeated_malformed(items, threshold=2) is None | |