Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

akseljoonas HF Staff Claude Opus 4.6 commited on Mar 11

Commit

c7899fc

1 Parent(s): e9064f3

fix: recover malformed tool calls instead of crashing session

When the LLM produces malformed JSON in tool call arguments (common with
large write calls), the session would crash. Now
ContextManager.recover_malformed_tool_calls() sanitizes the JSON, injects
an error result telling the agent to retry with smaller content, and the
agent loop skips execution of those tools.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

agent/context_manager/manager.py +102 -4
agent/core/agent_loop.py +21 -0

agent/context_manager/manager.py CHANGED Viewed

@@ -133,14 +133,111 @@ class ContextManager:
     def get_messages(self) -> list[Message]:
         """Get all messages for sending to LLM.
-        Automatically patches any dangling tool_calls (assistant messages
-        with tool_calls that have no matching tool-result message).  This
-        can happen after errors or cancellations and would cause the LLM
-        API to reject the request.
         """
         self._patch_dangling_tool_calls()
         return self.items
     def _patch_dangling_tool_calls(self) -> None:
         """Add stub tool results for any tool_calls that lack a matching result.
@@ -165,6 +262,7 @@ class ContextManager:
         if not assistant_msg:
             return
         answered_ids = {
             getattr(m, "tool_call_id", None)
             for m in self.items

     def get_messages(self) -> list[Message]:
         """Get all messages for sending to LLM.
+        Automatically recovers malformed tool_call arguments and patches
+        any dangling tool_calls (assistant messages with tool_calls that
+        have no matching tool-result message).  Both can happen after
+        errors or cancellations and would cause the LLM API to reject the
+        request.
         """
+        self.recover_malformed_tool_calls()
         self._patch_dangling_tool_calls()
         return self.items
+    @staticmethod
+    def _normalize_tool_calls(msg: Message) -> None:
+        """Ensure msg.tool_calls contains proper ToolCall objects, not dicts.
+        litellm's Message has validate_assignment=False (Pydantic v2 default),
+        so direct attribute assignment (e.g. inside litellm's streaming handler)
+        can leave raw dicts.  Re-assigning via the constructor fixes this.
+        """
+        from litellm import ChatCompletionMessageToolCall as ToolCall
+        tool_calls = getattr(msg, "tool_calls", None)
+        if not tool_calls:
+            return
+        needs_fix = any(isinstance(tc, dict) for tc in tool_calls)
+        if not needs_fix:
+            return
+        msg.tool_calls = [
+            tc if not isinstance(tc, dict) else ToolCall(**tc)
+            for tc in tool_calls
+        ]
+    def recover_malformed_tool_calls(self) -> set[str]:
+        """Sanitize malformed tool_call arguments and inject error results.
+        For every tool_call whose arguments are not valid JSON:
+        1. Replaces the arguments with ``"{}"`` so the context stays
+           valid for the LLM API.
+        2. Injects a ``tool`` result message explaining the error and
+           asking the agent to retry with smaller content.
+        This method is idempotent — safe to call from both the agent loop
+        (before tool execution) and from :meth:`get_messages` (safety net).
+        Returns:
+            Set of tool_call IDs that had malformed arguments.
+        """
+        import json
+        malformed_ids: set[str] = set()
+        # 1. Find and sanitize malformed arguments
+        for msg in self.items:
+            if getattr(msg, "role", None) != "assistant":
+                continue
+            tool_calls = getattr(msg, "tool_calls", None)
+            if not tool_calls:
+                continue
+            self._normalize_tool_calls(msg)
+            for tc in msg.tool_calls:
+                try:
+                    json.loads(tc.function.arguments)
+                except (json.JSONDecodeError, TypeError, ValueError) as e:
+                    logger.warning(
+                        "Malformed arguments for tool_call %s (%s): %s",
+                        tc.id, tc.function.name, e,
+                    )
+                    tc.function.arguments = "{}"
+                    malformed_ids.add(tc.id)
+        if not malformed_ids:
+            return malformed_ids
+        # 2. Inject error results for malformed calls that don't have one yet
+        answered_ids = {
+            getattr(m, "tool_call_id", None)
+            for m in self.items
+            if getattr(m, "role", None) == "tool"
+        }
+        for msg in self.items:
+            if getattr(msg, "role", None) != "assistant":
+                continue
+            tool_calls = getattr(msg, "tool_calls", None)
+            if not tool_calls:
+                continue
+            for tc in msg.tool_calls:
+                if tc.id in malformed_ids and tc.id not in answered_ids:
+                    self.items.append(
+                        Message(
+                            role="tool",
+                            content=(
+                                f"ERROR: Your tool call to '{tc.function.name}' had malformed "
+                                f"JSON arguments and was NOT executed. This usually happens "
+                                f"when the content is too large and gets truncated. "
+                                f"Please retry with smaller content — for 'write', split the "
+                                f"file into multiple smaller writes using 'edit' to build up "
+                                f"the file incrementally."
+                            ),
+                            tool_call_id=tc.id,
+                            name=tc.function.name,
+                        )
+                    )
+                    answered_ids.add(tc.id)
+        return malformed_ids
     def _patch_dangling_tool_calls(self) -> None:
         """Add stub tool results for any tool_calls that lack a matching result.
         if not assistant_msg:
             return
+        self._normalize_tool_calls(assistant_msg)
         answered_ids = {
             getattr(m, "tool_call_id", None)
             for m in self.items

agent/core/agent_loop.py CHANGED Viewed

@@ -357,11 +357,32 @@ class Handlers:
                 if session.is_cancelled:
                     break
                 # Separate tools into those requiring approval and those that don't
                 approval_required_tools = []
                 non_approval_tools = []
                 for tc in tool_calls:
                     tool_name = tc.function.name
                     try:
                         tool_args = json.loads(tc.function.arguments)

                 if session.is_cancelled:
                     break
+                # Recover any malformed tool calls (sanitize JSON + inject
+                # error results).  Returns IDs to skip during execution.
+                malformed_ids = session.context_manager.recover_malformed_tool_calls()
+                for mid in malformed_ids:
+                    await session.send_event(
+                        Event(
+                            event_type="tool_output",
+                            data={
+                                "tool": next(
+                                    (tc.function.name for tc in tool_calls if tc.id == mid),
+                                    "unknown",
+                                ),
+                                "tool_call_id": mid,
+                                "output": "Malformed tool call — see error in context.",
+                                "success": False,
+                            },
+                        )
+                    )
                 # Separate tools into those requiring approval and those that don't
                 approval_required_tools = []
                 non_approval_tools = []
                 for tc in tool_calls:
+                    if tc.id in malformed_ids:
+                        continue
                     tool_name = tc.function.name
                     try:
                         tool_args = json.loads(tc.function.arguments)