Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

akseljoonas HF Staff Claude Opus 4.6 commited on Mar 24

Commit

12b7c8f

1 Parent(s): 28bdef8

fix: auto-retry on truncated tool calls + correct activity status on refresh

Agent loop: when finish_reason=length drops tool calls, inject a system
hint telling the LLM to use bash heredoc or smaller edits instead, then
continue the loop. Also set max_tokens=16384 and emit an error event so
the truncation is visible in the UI.

Frontend: extract actual tool name from in-progress message parts on
hydration instead of hardcoding 'running', which rendered as
"Running running" in the activity bar.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

agent/core/agent_loop.py +40 -3
frontend/src/hooks/useAgentChat.ts +6 -1

agent/core/agent_loop.py CHANGED Viewed

@@ -255,7 +255,7 @@ class Handlers:
                     tool_choice="auto",
                     stream=True,
                     stream_options={"include_usage": True},
-                    timeout=600,  # 10 min — long tool-use turns can take a while
                     **llm_params,
                 )
@@ -320,11 +320,48 @@ class Handlers:
                 # ── Stream finished — reconstruct full message ───────
                 content = full_content or None
-                # If output was truncated, all tool call args are garbage
                 if finish_reason == "length" and tool_calls_acc:
-                    logger.warning("Output truncated (finish_reason=length) — dropping tool calls")
                     tool_calls_acc.clear()
                 # Build tool_calls list from accumulated deltas
                 tool_calls: list[ToolCall] = []
                 for idx in sorted(tool_calls_acc.keys()):

                     tool_choice="auto",
                     stream=True,
                     stream_options={"include_usage": True},
+                    timeout=600,       # 10 min — long tool-use turns can take a while
                     **llm_params,
                 )
                 # ── Stream finished — reconstruct full message ───────
                 content = full_content or None
+                # If output was truncated, all tool call args are garbage.
+                # Inject a system hint so the LLM retries with smaller content.
                 if finish_reason == "length" and tool_calls_acc:
+                    dropped_names = [
+                        tc["function"]["name"]
+                        for tc in tool_calls_acc.values()
+                        if tc["function"]["name"]
+                    ]
+                    logger.warning(
+                        "Output truncated (finish_reason=length) — dropping tool calls: %s",
+                        dropped_names,
+                    )
                     tool_calls_acc.clear()
+                    # Tell the agent what happened so it can retry differently
+                    truncation_hint = (
+                        "Your previous response was truncated because the output hit the "
+                        "token limit. The following tool calls were lost: "
+                        f"{dropped_names}. "
+                        "IMPORTANT: Do NOT retry with the same large content. Instead:\n"
+                        "  • For 'write': use bash with cat<<'HEREDOC' to write the file, "
+                        "or split into several smaller edit calls.\n"
+                        "  • For other tools: reduce the size of your arguments or use bash."
+                    )
+                    if content:
+                        assistant_msg = Message(role="assistant", content=content)
+                        session.context_manager.add_message(assistant_msg, token_count)
+                    session.context_manager.add_message(
+                        Message(role="user", content=f"[SYSTEM: {truncation_hint}]")
+                    )
+                    await session.send_event(
+                        Event(event_type="assistant_stream_end", data={})
+                    )
+                    await session.send_event(
+                        Event(
+                            event_type="error",
+                            data={"error": f"Output truncated — retrying with smaller content ({dropped_names})"},
+                        )
+                    )
+                    iteration += 1
+                    continue  # retry this iteration
                 # Build tool_calls list from accumulated deltas
                 tool_calls: list[ToolCall] = []
                 for idx in sorted(tool_calls_acc.keys()):

frontend/src/hooks/useAgentChat.ts CHANGED Viewed

@@ -327,7 +327,12 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
               if (hasPending) {
                 updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });
               } else if (hasRunning) {
-                updateSession(sessionId, { isProcessing: true, activityStatus: { type: 'tool', toolName: 'running' } });
               }
             }
           }

               if (hasPending) {
                 updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });
               } else if (hasRunning) {
+                // Extract the actual tool name from the last in-progress tool part
+                const runningPart = lastAssistant.parts.find(
+                  p => p.type === 'dynamic-tool' && (p.state === 'input-available' || p.state === 'input-streaming'),
+                );
+                const runningToolName = (runningPart && 'toolName' in runningPart) ? runningPart.toolName : undefined;
+                updateSession(sessionId, { isProcessing: true, activityStatus: { type: 'tool', toolName: runningToolName || 'sandbox' } });
               }
             }
           }