Spaces:
Running on CPU Upgrade
Fix premature agent loop exit in long-running autonomous tasks
Browse files- Fix context_length initialization bug: was set to max_context (180K) on
startup, which is above the compaction threshold (170K), causing spurious
compaction on the very first iteration. Now starts at 0.
- Add diagnostics at loop exit point: log finish_reason, token_count,
context_length, and truncated response text whenever the agent loop
breaks due to no tool calls. Also emit a tool_log event for CLI visibility.
- Add logging at compaction and ContextWindowExceededError points so we
can trace context pressure in headless run logs.
- Add autonomous mode guidance to system prompt: instruct the model to
always include tool calls (text-only response kills the loop), continue
after training to evaluate/iterate/save, and check the timer.
- Add action hint to truncated bash output so the model doesn't stall
after receiving large training output.
- agent/context_manager/manager.py +1 -1
- agent/core/agent_loop.py +46 -0
- agent/main.py +2 -0
- agent/prompts/system_prompt_v3.yaml +20 -0
- agent/tools/local_tools.py +1 -0
|
@@ -89,7 +89,7 @@ class ContextManager:
|
|
| 89 |
)
|
| 90 |
self.max_context = max_context - 10000
|
| 91 |
self.compact_size = int(max_context * compact_size)
|
| 92 |
-
self.context_length =
|
| 93 |
self.untouched_messages = untouched_messages
|
| 94 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 95 |
|
|
|
|
| 89 |
)
|
| 90 |
self.max_context = max_context - 10000
|
| 91 |
self.compact_size = int(max_context * compact_size)
|
| 92 |
+
self.context_length = 0 # Updated after each LLM call with actual usage
|
| 93 |
self.untouched_messages = untouched_messages
|
| 94 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 95 |
|
|
@@ -204,6 +204,11 @@ async def _compact_and_notify(session: Session) -> None:
|
|
| 204 |
model_name=session.config.model_name,
|
| 205 |
)
|
| 206 |
old_length = session.context_manager.context_length
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
tool_specs = session.tool_router.get_tool_specs_for_llm()
|
| 208 |
await session.context_manager.compact(
|
| 209 |
model_name=session.config.model_name,
|
|
@@ -211,6 +216,11 @@ async def _compact_and_notify(session: Session) -> None:
|
|
| 211 |
)
|
| 212 |
new_length = session.context_manager.context_length
|
| 213 |
if new_length != old_length:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
await session.send_event(
|
| 215 |
Event(
|
| 216 |
event_type="compacted",
|
|
@@ -582,6 +592,34 @@ class Handlers:
|
|
| 582 |
|
| 583 |
# If no tool calls, add assistant message and we're done
|
| 584 |
if not tool_calls:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 585 |
if content:
|
| 586 |
assistant_msg = Message(role="assistant", content=content)
|
| 587 |
session.context_manager.add_message(assistant_msg, token_count)
|
|
@@ -788,6 +826,14 @@ class Handlers:
|
|
| 788 |
|
| 789 |
except ContextWindowExceededError:
|
| 790 |
# Force compact and retry this iteration
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
session.context_manager.context_length = (
|
| 792 |
session.context_manager.max_context + 1
|
| 793 |
)
|
|
|
|
| 204 |
model_name=session.config.model_name,
|
| 205 |
)
|
| 206 |
old_length = session.context_manager.context_length
|
| 207 |
+
max_ctx = session.context_manager.max_context
|
| 208 |
+
logger.debug(
|
| 209 |
+
"Compaction check: context_length=%d, max_context=%d, needs_compact=%s",
|
| 210 |
+
old_length, max_ctx, old_length > max_ctx,
|
| 211 |
+
)
|
| 212 |
tool_specs = session.tool_router.get_tool_specs_for_llm()
|
| 213 |
await session.context_manager.compact(
|
| 214 |
model_name=session.config.model_name,
|
|
|
|
| 216 |
)
|
| 217 |
new_length = session.context_manager.context_length
|
| 218 |
if new_length != old_length:
|
| 219 |
+
logger.warning(
|
| 220 |
+
"Context compacted: %d -> %d tokens (max=%d, %d messages)",
|
| 221 |
+
old_length, new_length, max_ctx,
|
| 222 |
+
len(session.context_manager.items),
|
| 223 |
+
)
|
| 224 |
await session.send_event(
|
| 225 |
Event(
|
| 226 |
event_type="compacted",
|
|
|
|
| 592 |
|
| 593 |
# If no tool calls, add assistant message and we're done
|
| 594 |
if not tool_calls:
|
| 595 |
+
logger.warning(
|
| 596 |
+
"Agent loop ending: no tool calls. "
|
| 597 |
+
"finish_reason=%s, token_count=%d, "
|
| 598 |
+
"context_length=%d, max_context=%d, "
|
| 599 |
+
"iteration=%d/%d, "
|
| 600 |
+
"response_text=%s",
|
| 601 |
+
finish_reason,
|
| 602 |
+
token_count,
|
| 603 |
+
session.context_manager.context_length,
|
| 604 |
+
session.context_manager.max_context,
|
| 605 |
+
iteration,
|
| 606 |
+
effective_max,
|
| 607 |
+
(content or "")[:500],
|
| 608 |
+
)
|
| 609 |
+
await session.send_event(
|
| 610 |
+
Event(
|
| 611 |
+
event_type="tool_log",
|
| 612 |
+
data={
|
| 613 |
+
"tool": "system",
|
| 614 |
+
"log": (
|
| 615 |
+
f"Loop exit: no tool calls. "
|
| 616 |
+
f"finish_reason={finish_reason}, "
|
| 617 |
+
f"tokens={token_count}/{session.context_manager.max_context}, "
|
| 618 |
+
f"iter={iteration}/{effective_max}"
|
| 619 |
+
),
|
| 620 |
+
},
|
| 621 |
+
)
|
| 622 |
+
)
|
| 623 |
if content:
|
| 624 |
assistant_msg = Message(role="assistant", content=content)
|
| 625 |
session.context_manager.add_message(assistant_msg, token_count)
|
|
|
|
| 826 |
|
| 827 |
except ContextWindowExceededError:
|
| 828 |
# Force compact and retry this iteration
|
| 829 |
+
logger.warning(
|
| 830 |
+
"ContextWindowExceededError at iteration %d — forcing compaction "
|
| 831 |
+
"(context_length=%d, max_context=%d, messages=%d)",
|
| 832 |
+
iteration,
|
| 833 |
+
session.context_manager.context_length,
|
| 834 |
+
session.context_manager.max_context,
|
| 835 |
+
len(session.context_manager.items),
|
| 836 |
+
)
|
| 837 |
session.context_manager.context_length = (
|
| 838 |
session.context_manager.max_context + 1
|
| 839 |
)
|
|
@@ -973,6 +973,8 @@ async def headless_main(prompt: str, model: str | None = None) -> None:
|
|
| 973 |
elif event.event_type in ("turn_complete", "interrupted"):
|
| 974 |
shimmer.stop()
|
| 975 |
stream_buf.discard()
|
|
|
|
|
|
|
| 976 |
break
|
| 977 |
|
| 978 |
# Shutdown
|
|
|
|
| 973 |
elif event.event_type in ("turn_complete", "interrupted"):
|
| 974 |
shimmer.stop()
|
| 975 |
stream_buf.discard()
|
| 976 |
+
history_size = event.data.get("history_size", "?") if event.data else "?"
|
| 977 |
+
print(f"\n--- Agent {event.event_type} (history_size={history_size}) ---", file=sys.stderr)
|
| 978 |
break
|
| 979 |
|
| 980 |
# Shutdown
|
|
@@ -112,6 +112,26 @@ system_prompt: |
|
|
| 112 |
Do not stop after describing what you plan to do. Continue calling tools until the task is verifiably done.
|
| 113 |
Do not mark plan tasks as completed if they failed or are only partially done.
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
# Communication
|
| 116 |
|
| 117 |
- Be concise and direct. No filler, no restating what the user said.
|
|
|
|
| 112 |
Do not stop after describing what you plan to do. Continue calling tools until the task is verifiably done.
|
| 113 |
Do not mark plan tasks as completed if they failed or are only partially done.
|
| 114 |
|
| 115 |
+
# Autonomous / headless mode
|
| 116 |
+
|
| 117 |
+
When running autonomously (no human in the loop), you MUST follow these rules:
|
| 118 |
+
|
| 119 |
+
NEVER respond with only text. Every response MUST include at least one tool call. If you have nothing to do, check the plan, check the timer, or verify outputs. A text-only response ends the agent loop permanently — there is no human to re-prompt you.
|
| 120 |
+
|
| 121 |
+
After training completes:
|
| 122 |
+
1. Check the output for errors or warnings
|
| 123 |
+
2. Copy/save the trained model to the required output location (e.g. final_model/)
|
| 124 |
+
3. Run evaluation to measure performance
|
| 125 |
+
4. If time remains and performance can improve: iterate (adjust hyperparameters, train longer, try different data)
|
| 126 |
+
5. Verify the final output exists and is valid before stopping
|
| 127 |
+
|
| 128 |
+
Check the remaining time periodically with the timer command specified in the task prompt. Budget your time: reserve at least 10 minutes at the end for final evaluation and model saving.
|
| 129 |
+
|
| 130 |
+
The task is NOT done until:
|
| 131 |
+
- The required output directory exists (e.g. final_model/) with a valid model
|
| 132 |
+
- You have evaluated the model and confirmed it works
|
| 133 |
+
- You have used all available time productively
|
| 134 |
+
|
| 135 |
# Communication
|
| 136 |
|
| 137 |
- Be concise and direct. No filler, no restating what the user said.
|
|
@@ -78,6 +78,7 @@ def _truncate_output(output: str, max_chars: int = MAX_OUTPUT_CHARS, head_ratio:
|
|
| 78 |
meta = f"\n\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\n"
|
| 79 |
if spill_path:
|
| 80 |
meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\n"
|
|
|
|
| 81 |
return head + meta + tail
|
| 82 |
|
| 83 |
|
|
|
|
| 78 |
meta = f"\n\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\n"
|
| 79 |
if spill_path:
|
| 80 |
meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\n"
|
| 81 |
+
meta += "IMPORTANT: The command has finished. Analyze the output above and continue with your next action.\n"
|
| 82 |
return head + meta + tail
|
| 83 |
|
| 84 |
|