Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit Β·
12b7c8f
1
Parent(s): 28bdef8
fix: auto-retry on truncated tool calls + correct activity status on refresh
Browse filesAgent loop: when finish_reason=length drops tool calls, inject a system
hint telling the LLM to use bash heredoc or smaller edits instead, then
continue the loop. Also set max_tokens=16384 and emit an error event so
the truncation is visible in the UI.
Frontend: extract actual tool name from in-progress message parts on
hydration instead of hardcoding 'running', which rendered as
"Running running" in the activity bar.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
agent/core/agent_loop.py
CHANGED
|
@@ -255,7 +255,7 @@ class Handlers:
|
|
| 255 |
tool_choice="auto",
|
| 256 |
stream=True,
|
| 257 |
stream_options={"include_usage": True},
|
| 258 |
-
timeout=600,
|
| 259 |
**llm_params,
|
| 260 |
)
|
| 261 |
|
|
@@ -320,11 +320,48 @@ class Handlers:
|
|
| 320 |
# ββ Stream finished β reconstruct full message βββββββ
|
| 321 |
content = full_content or None
|
| 322 |
|
| 323 |
-
# If output was truncated, all tool call args are garbage
|
|
|
|
| 324 |
if finish_reason == "length" and tool_calls_acc:
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
tool_calls_acc.clear()
|
| 327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
# Build tool_calls list from accumulated deltas
|
| 329 |
tool_calls: list[ToolCall] = []
|
| 330 |
for idx in sorted(tool_calls_acc.keys()):
|
|
|
|
| 255 |
tool_choice="auto",
|
| 256 |
stream=True,
|
| 257 |
stream_options={"include_usage": True},
|
| 258 |
+
timeout=600, # 10 min β long tool-use turns can take a while
|
| 259 |
**llm_params,
|
| 260 |
)
|
| 261 |
|
|
|
|
| 320 |
# ββ Stream finished β reconstruct full message βββββββ
|
| 321 |
content = full_content or None
|
| 322 |
|
| 323 |
+
# If output was truncated, all tool call args are garbage.
|
| 324 |
+
# Inject a system hint so the LLM retries with smaller content.
|
| 325 |
if finish_reason == "length" and tool_calls_acc:
|
| 326 |
+
dropped_names = [
|
| 327 |
+
tc["function"]["name"]
|
| 328 |
+
for tc in tool_calls_acc.values()
|
| 329 |
+
if tc["function"]["name"]
|
| 330 |
+
]
|
| 331 |
+
logger.warning(
|
| 332 |
+
"Output truncated (finish_reason=length) β dropping tool calls: %s",
|
| 333 |
+
dropped_names,
|
| 334 |
+
)
|
| 335 |
tool_calls_acc.clear()
|
| 336 |
|
| 337 |
+
# Tell the agent what happened so it can retry differently
|
| 338 |
+
truncation_hint = (
|
| 339 |
+
"Your previous response was truncated because the output hit the "
|
| 340 |
+
"token limit. The following tool calls were lost: "
|
| 341 |
+
f"{dropped_names}. "
|
| 342 |
+
"IMPORTANT: Do NOT retry with the same large content. Instead:\n"
|
| 343 |
+
" β’ For 'write': use bash with cat<<'HEREDOC' to write the file, "
|
| 344 |
+
"or split into several smaller edit calls.\n"
|
| 345 |
+
" β’ For other tools: reduce the size of your arguments or use bash."
|
| 346 |
+
)
|
| 347 |
+
if content:
|
| 348 |
+
assistant_msg = Message(role="assistant", content=content)
|
| 349 |
+
session.context_manager.add_message(assistant_msg, token_count)
|
| 350 |
+
session.context_manager.add_message(
|
| 351 |
+
Message(role="user", content=f"[SYSTEM: {truncation_hint}]")
|
| 352 |
+
)
|
| 353 |
+
await session.send_event(
|
| 354 |
+
Event(event_type="assistant_stream_end", data={})
|
| 355 |
+
)
|
| 356 |
+
await session.send_event(
|
| 357 |
+
Event(
|
| 358 |
+
event_type="error",
|
| 359 |
+
data={"error": f"Output truncated β retrying with smaller content ({dropped_names})"},
|
| 360 |
+
)
|
| 361 |
+
)
|
| 362 |
+
iteration += 1
|
| 363 |
+
continue # retry this iteration
|
| 364 |
+
|
| 365 |
# Build tool_calls list from accumulated deltas
|
| 366 |
tool_calls: list[ToolCall] = []
|
| 367 |
for idx in sorted(tool_calls_acc.keys()):
|
frontend/src/hooks/useAgentChat.ts
CHANGED
|
@@ -327,7 +327,12 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
|
|
| 327 |
if (hasPending) {
|
| 328 |
updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });
|
| 329 |
} else if (hasRunning) {
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
}
|
| 332 |
}
|
| 333 |
}
|
|
|
|
| 327 |
if (hasPending) {
|
| 328 |
updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });
|
| 329 |
} else if (hasRunning) {
|
| 330 |
+
// Extract the actual tool name from the last in-progress tool part
|
| 331 |
+
const runningPart = lastAssistant.parts.find(
|
| 332 |
+
p => p.type === 'dynamic-tool' && (p.state === 'input-available' || p.state === 'input-streaming'),
|
| 333 |
+
);
|
| 334 |
+
const runningToolName = (runningPart && 'toolName' in runningPart) ? runningPart.toolName : undefined;
|
| 335 |
+
updateSession(sessionId, { isProcessing: true, activityStatus: { type: 'tool', toolName: runningToolName || 'sandbox' } });
|
| 336 |
}
|
| 337 |
}
|
| 338 |
}
|