akseljoonas HF Staff commited on
Commit
fdddeaa
·
1 Parent(s): 9f009a3

Fix premature agent loop exit in long-running autonomous tasks

Browse files

- Fix context_length initialization bug: was set to max_context (180K) on
startup, which is above the compaction threshold (170K), causing spurious
compaction on the very first iteration. Now starts at 0.

- Add diagnostics at loop exit point: log finish_reason, token_count,
context_length, and truncated response text whenever the agent loop
breaks due to no tool calls. Also emit a tool_log event for CLI visibility.

- Add logging at compaction and ContextWindowExceededError points so we
can trace context pressure in headless run logs.

- Add autonomous mode guidance to system prompt: instruct the model to
always include tool calls (text-only response kills the loop), continue
after training to evaluate/iterate/save, and check the timer.

- Add action hint to truncated bash output so the model doesn't stall
after receiving large training output.

agent/context_manager/manager.py CHANGED
@@ -89,7 +89,7 @@ class ContextManager:
89
  )
90
  self.max_context = max_context - 10000
91
  self.compact_size = int(max_context * compact_size)
92
- self.context_length = max_context
93
  self.untouched_messages = untouched_messages
94
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
95
 
 
89
  )
90
  self.max_context = max_context - 10000
91
  self.compact_size = int(max_context * compact_size)
92
+ self.context_length = 0 # Updated after each LLM call with actual usage
93
  self.untouched_messages = untouched_messages
94
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
95
 
agent/core/agent_loop.py CHANGED
@@ -204,6 +204,11 @@ async def _compact_and_notify(session: Session) -> None:
204
  model_name=session.config.model_name,
205
  )
206
  old_length = session.context_manager.context_length
 
 
 
 
 
207
  tool_specs = session.tool_router.get_tool_specs_for_llm()
208
  await session.context_manager.compact(
209
  model_name=session.config.model_name,
@@ -211,6 +216,11 @@ async def _compact_and_notify(session: Session) -> None:
211
  )
212
  new_length = session.context_manager.context_length
213
  if new_length != old_length:
 
 
 
 
 
214
  await session.send_event(
215
  Event(
216
  event_type="compacted",
@@ -582,6 +592,34 @@ class Handlers:
582
 
583
  # If no tool calls, add assistant message and we're done
584
  if not tool_calls:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
  if content:
586
  assistant_msg = Message(role="assistant", content=content)
587
  session.context_manager.add_message(assistant_msg, token_count)
@@ -788,6 +826,14 @@ class Handlers:
788
 
789
  except ContextWindowExceededError:
790
  # Force compact and retry this iteration
 
 
 
 
 
 
 
 
791
  session.context_manager.context_length = (
792
  session.context_manager.max_context + 1
793
  )
 
204
  model_name=session.config.model_name,
205
  )
206
  old_length = session.context_manager.context_length
207
+ max_ctx = session.context_manager.max_context
208
+ logger.debug(
209
+ "Compaction check: context_length=%d, max_context=%d, needs_compact=%s",
210
+ old_length, max_ctx, old_length > max_ctx,
211
+ )
212
  tool_specs = session.tool_router.get_tool_specs_for_llm()
213
  await session.context_manager.compact(
214
  model_name=session.config.model_name,
 
216
  )
217
  new_length = session.context_manager.context_length
218
  if new_length != old_length:
219
+ logger.warning(
220
+ "Context compacted: %d -> %d tokens (max=%d, %d messages)",
221
+ old_length, new_length, max_ctx,
222
+ len(session.context_manager.items),
223
+ )
224
  await session.send_event(
225
  Event(
226
  event_type="compacted",
 
592
 
593
  # If no tool calls, add assistant message and we're done
594
  if not tool_calls:
595
+ logger.warning(
596
+ "Agent loop ending: no tool calls. "
597
+ "finish_reason=%s, token_count=%d, "
598
+ "context_length=%d, max_context=%d, "
599
+ "iteration=%d/%d, "
600
+ "response_text=%s",
601
+ finish_reason,
602
+ token_count,
603
+ session.context_manager.context_length,
604
+ session.context_manager.max_context,
605
+ iteration,
606
+ effective_max,
607
+ (content or "")[:500],
608
+ )
609
+ await session.send_event(
610
+ Event(
611
+ event_type="tool_log",
612
+ data={
613
+ "tool": "system",
614
+ "log": (
615
+ f"Loop exit: no tool calls. "
616
+ f"finish_reason={finish_reason}, "
617
+ f"tokens={token_count}/{session.context_manager.max_context}, "
618
+ f"iter={iteration}/{effective_max}"
619
+ ),
620
+ },
621
+ )
622
+ )
623
  if content:
624
  assistant_msg = Message(role="assistant", content=content)
625
  session.context_manager.add_message(assistant_msg, token_count)
 
826
 
827
  except ContextWindowExceededError:
828
  # Force compact and retry this iteration
829
+ logger.warning(
830
+ "ContextWindowExceededError at iteration %d — forcing compaction "
831
+ "(context_length=%d, max_context=%d, messages=%d)",
832
+ iteration,
833
+ session.context_manager.context_length,
834
+ session.context_manager.max_context,
835
+ len(session.context_manager.items),
836
+ )
837
  session.context_manager.context_length = (
838
  session.context_manager.max_context + 1
839
  )
agent/main.py CHANGED
@@ -973,6 +973,8 @@ async def headless_main(prompt: str, model: str | None = None) -> None:
973
  elif event.event_type in ("turn_complete", "interrupted"):
974
  shimmer.stop()
975
  stream_buf.discard()
 
 
976
  break
977
 
978
  # Shutdown
 
973
  elif event.event_type in ("turn_complete", "interrupted"):
974
  shimmer.stop()
975
  stream_buf.discard()
976
+ history_size = event.data.get("history_size", "?") if event.data else "?"
977
+ print(f"\n--- Agent {event.event_type} (history_size={history_size}) ---", file=sys.stderr)
978
  break
979
 
980
  # Shutdown
agent/prompts/system_prompt_v3.yaml CHANGED
@@ -112,6 +112,26 @@ system_prompt: |
112
  Do not stop after describing what you plan to do. Continue calling tools until the task is verifiably done.
113
  Do not mark plan tasks as completed if they failed or are only partially done.
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  # Communication
116
 
117
  - Be concise and direct. No filler, no restating what the user said.
 
112
  Do not stop after describing what you plan to do. Continue calling tools until the task is verifiably done.
113
  Do not mark plan tasks as completed if they failed or are only partially done.
114
 
115
+ # Autonomous / headless mode
116
+
117
+ When running autonomously (no human in the loop), you MUST follow these rules:
118
+
119
+ NEVER respond with only text. Every response MUST include at least one tool call. If you have nothing to do, check the plan, check the timer, or verify outputs. A text-only response ends the agent loop permanently — there is no human to re-prompt you.
120
+
121
+ After training completes:
122
+ 1. Check the output for errors or warnings
123
+ 2. Copy/save the trained model to the required output location (e.g. final_model/)
124
+ 3. Run evaluation to measure performance
125
+ 4. If time remains and performance can improve: iterate (adjust hyperparameters, train longer, try different data)
126
+ 5. Verify the final output exists and is valid before stopping
127
+
128
+ Check the remaining time periodically with the timer command specified in the task prompt. Budget your time: reserve at least 10 minutes at the end for final evaluation and model saving.
129
+
130
+ The task is NOT done until:
131
+ - The required output directory exists (e.g. final_model/) with a valid model
132
+ - You have evaluated the model and confirmed it works
133
+ - You have used all available time productively
134
+
135
  # Communication
136
 
137
  - Be concise and direct. No filler, no restating what the user said.
agent/tools/local_tools.py CHANGED
@@ -78,6 +78,7 @@ def _truncate_output(output: str, max_chars: int = MAX_OUTPUT_CHARS, head_ratio:
78
  meta = f"\n\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\n"
79
  if spill_path:
80
  meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\n"
 
81
  return head + meta + tail
82
 
83
 
 
78
  meta = f"\n\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\n"
79
  if spill_path:
80
  meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\n"
81
+ meta += "IMPORTANT: The command has finished. Analyze the output above and continue with your next action.\n"
82
  return head + meta + tail
83
 
84