akseljoonas HF Staff Claude Opus 4.6 commited on
Commit
c7899fc
·
1 Parent(s): e9064f3

fix: recover malformed tool calls instead of crashing session

Browse files

When the LLM produces malformed JSON in tool call arguments (common with
large write calls), the session would crash. Now
ContextManager.recover_malformed_tool_calls() sanitizes the JSON, injects
an error result telling the agent to retry with smaller content, and the
agent loop skips execution of those tools.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

agent/context_manager/manager.py CHANGED
@@ -133,14 +133,111 @@ class ContextManager:
133
  def get_messages(self) -> list[Message]:
134
  """Get all messages for sending to LLM.
135
 
136
- Automatically patches any dangling tool_calls (assistant messages
137
- with tool_calls that have no matching tool-result message). This
138
- can happen after errors or cancellations and would cause the LLM
139
- API to reject the request.
 
140
  """
 
141
  self._patch_dangling_tool_calls()
142
  return self.items
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  def _patch_dangling_tool_calls(self) -> None:
145
  """Add stub tool results for any tool_calls that lack a matching result.
146
 
@@ -165,6 +262,7 @@ class ContextManager:
165
  if not assistant_msg:
166
  return
167
 
 
168
  answered_ids = {
169
  getattr(m, "tool_call_id", None)
170
  for m in self.items
 
133
  def get_messages(self) -> list[Message]:
134
  """Get all messages for sending to LLM.
135
 
136
+ Automatically recovers malformed tool_call arguments and patches
137
+ any dangling tool_calls (assistant messages with tool_calls that
138
+ have no matching tool-result message). Both can happen after
139
+ errors or cancellations and would cause the LLM API to reject the
140
+ request.
141
  """
142
+ self.recover_malformed_tool_calls()
143
  self._patch_dangling_tool_calls()
144
  return self.items
145
 
146
+ @staticmethod
147
+ def _normalize_tool_calls(msg: Message) -> None:
148
+ """Ensure msg.tool_calls contains proper ToolCall objects, not dicts.
149
+
150
+ litellm's Message has validate_assignment=False (Pydantic v2 default),
151
+ so direct attribute assignment (e.g. inside litellm's streaming handler)
152
+ can leave raw dicts. Re-assigning via the constructor fixes this.
153
+ """
154
+ from litellm import ChatCompletionMessageToolCall as ToolCall
155
+
156
+ tool_calls = getattr(msg, "tool_calls", None)
157
+ if not tool_calls:
158
+ return
159
+ needs_fix = any(isinstance(tc, dict) for tc in tool_calls)
160
+ if not needs_fix:
161
+ return
162
+ msg.tool_calls = [
163
+ tc if not isinstance(tc, dict) else ToolCall(**tc)
164
+ for tc in tool_calls
165
+ ]
166
+
167
+ def recover_malformed_tool_calls(self) -> set[str]:
168
+ """Sanitize malformed tool_call arguments and inject error results.
169
+
170
+ For every tool_call whose arguments are not valid JSON:
171
+ 1. Replaces the arguments with ``"{}"`` so the context stays
172
+ valid for the LLM API.
173
+ 2. Injects a ``tool`` result message explaining the error and
174
+ asking the agent to retry with smaller content.
175
+
176
+ This method is idempotent — safe to call from both the agent loop
177
+ (before tool execution) and from :meth:`get_messages` (safety net).
178
+
179
+ Returns:
180
+ Set of tool_call IDs that had malformed arguments.
181
+ """
182
+ import json
183
+
184
+ malformed_ids: set[str] = set()
185
+
186
+ # 1. Find and sanitize malformed arguments
187
+ for msg in self.items:
188
+ if getattr(msg, "role", None) != "assistant":
189
+ continue
190
+ tool_calls = getattr(msg, "tool_calls", None)
191
+ if not tool_calls:
192
+ continue
193
+ self._normalize_tool_calls(msg)
194
+ for tc in msg.tool_calls:
195
+ try:
196
+ json.loads(tc.function.arguments)
197
+ except (json.JSONDecodeError, TypeError, ValueError) as e:
198
+ logger.warning(
199
+ "Malformed arguments for tool_call %s (%s): %s",
200
+ tc.id, tc.function.name, e,
201
+ )
202
+ tc.function.arguments = "{}"
203
+ malformed_ids.add(tc.id)
204
+
205
+ if not malformed_ids:
206
+ return malformed_ids
207
+
208
+ # 2. Inject error results for malformed calls that don't have one yet
209
+ answered_ids = {
210
+ getattr(m, "tool_call_id", None)
211
+ for m in self.items
212
+ if getattr(m, "role", None) == "tool"
213
+ }
214
+ for msg in self.items:
215
+ if getattr(msg, "role", None) != "assistant":
216
+ continue
217
+ tool_calls = getattr(msg, "tool_calls", None)
218
+ if not tool_calls:
219
+ continue
220
+ for tc in msg.tool_calls:
221
+ if tc.id in malformed_ids and tc.id not in answered_ids:
222
+ self.items.append(
223
+ Message(
224
+ role="tool",
225
+ content=(
226
+ f"ERROR: Your tool call to '{tc.function.name}' had malformed "
227
+ f"JSON arguments and was NOT executed. This usually happens "
228
+ f"when the content is too large and gets truncated. "
229
+ f"Please retry with smaller content — for 'write', split the "
230
+ f"file into multiple smaller writes using 'edit' to build up "
231
+ f"the file incrementally."
232
+ ),
233
+ tool_call_id=tc.id,
234
+ name=tc.function.name,
235
+ )
236
+ )
237
+ answered_ids.add(tc.id)
238
+
239
+ return malformed_ids
240
+
241
  def _patch_dangling_tool_calls(self) -> None:
242
  """Add stub tool results for any tool_calls that lack a matching result.
243
 
 
262
  if not assistant_msg:
263
  return
264
 
265
+ self._normalize_tool_calls(assistant_msg)
266
  answered_ids = {
267
  getattr(m, "tool_call_id", None)
268
  for m in self.items
agent/core/agent_loop.py CHANGED
@@ -357,11 +357,32 @@ class Handlers:
357
  if session.is_cancelled:
358
  break
359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  # Separate tools into those requiring approval and those that don't
361
  approval_required_tools = []
362
  non_approval_tools = []
363
 
364
  for tc in tool_calls:
 
 
365
  tool_name = tc.function.name
366
  try:
367
  tool_args = json.loads(tc.function.arguments)
 
357
  if session.is_cancelled:
358
  break
359
 
360
+ # Recover any malformed tool calls (sanitize JSON + inject
361
+ # error results). Returns IDs to skip during execution.
362
+ malformed_ids = session.context_manager.recover_malformed_tool_calls()
363
+ for mid in malformed_ids:
364
+ await session.send_event(
365
+ Event(
366
+ event_type="tool_output",
367
+ data={
368
+ "tool": next(
369
+ (tc.function.name for tc in tool_calls if tc.id == mid),
370
+ "unknown",
371
+ ),
372
+ "tool_call_id": mid,
373
+ "output": "Malformed tool call — see error in context.",
374
+ "success": False,
375
+ },
376
+ )
377
+ )
378
+
379
  # Separate tools into those requiring approval and those that don't
380
  approval_required_tools = []
381
  non_approval_tools = []
382
 
383
  for tc in tool_calls:
384
+ if tc.id in malformed_ids:
385
+ continue
386
  tool_name = tc.function.name
387
  try:
388
  tool_args = json.loads(tc.function.arguments)