akseljoonas HF Staff commited on
Commit
950ca5c
·
1 Parent(s): 0c2eb90

Add per-tool error limits and per-turn request cap

Browse files
Files changed (2) hide show
  1. agent/config.py +2 -0
  2. agent/core/agent_loop.py +40 -1
agent/config.py CHANGED
@@ -23,6 +23,8 @@ class Config(BaseModel):
23
  session_dataset_repo: str = "akseljoonas/hf-agent-sessions"
24
  auto_save_interval: int = 3 # Save every N user turns (0 = disabled)
25
  yolo_mode: bool = False # Auto-approve all tool calls without confirmation
 
 
26
 
27
  # Permission control parameters
28
  confirm_cpu_jobs: bool = True
 
23
  session_dataset_repo: str = "akseljoonas/hf-agent-sessions"
24
  auto_save_interval: int = 3 # Save every N user turns (0 = disabled)
25
  yolo_mode: bool = False # Auto-approve all tool calls without confirmation
26
+ max_tool_failures_per_turn: int = 3 # Disable a tool after this many failures in one turn
27
+ max_requests_per_turn: int = 50 # Hard cap on LLM requests per agent turn
28
 
29
  # Permission control parameters
30
  confirm_cpu_jobs: bool = True
agent/core/agent_loop.py CHANGED
@@ -152,6 +152,35 @@ _MAX_LLM_RETRIES = 3
152
  _LLM_RETRY_DELAYS = [5, 15, 30] # seconds between retries
153
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def _is_transient_error(error: Exception) -> bool:
156
  """Return True for errors that are likely transient and worth retrying."""
157
  err_str = str(error).lower()
@@ -283,8 +312,10 @@ class Handlers:
283
  iteration = 0
284
  final_response = None
285
  errored = False
 
286
 
287
- while iteration < max_iterations:
 
288
  # ── Cancellation check: before LLM call ──
289
  if session.is_cancelled:
290
  break
@@ -608,7 +639,15 @@ class Handlers:
608
  results = gather_task.result()
609
 
610
  # 4. Record results and send outputs (order preserved)
 
611
  for tc, tool_name, tool_args, output, success in results:
 
 
 
 
 
 
 
612
  tool_msg = Message(
613
  role="tool",
614
  content=output,
 
152
  _LLM_RETRY_DELAYS = [5, 15, 30] # seconds between retries
153
 
154
 
155
+ def _append_failure_warning(
156
+ output: str,
157
+ tool_name: str,
158
+ tool_error_counts: dict[str, int],
159
+ max_failures: int,
160
+ ) -> str:
161
+ """Track a tool failure and append a warning to the output.
162
+
163
+ Returns the output with an appended warning indicating how many
164
+ failures have occurred and whether the LLM should switch approach.
165
+ """
166
+ tool_error_counts[tool_name] = tool_error_counts.get(tool_name, 0) + 1
167
+ count = tool_error_counts[tool_name]
168
+ if count >= max_failures:
169
+ return output + (
170
+ f"\n\n⚠ Tool '{tool_name}' has now failed "
171
+ f"{count} times this turn. You should try a "
172
+ f"different approach instead of calling this "
173
+ f"tool again."
174
+ )
175
+ remaining = max_failures - count
176
+ return output + (
177
+ f"\n\n⚠ Tool '{tool_name}' has failed "
178
+ f"{count}/{max_failures} times this turn. "
179
+ f"{remaining} attempt(s) before you should "
180
+ f"switch to a different approach."
181
+ )
182
+
183
+
184
  def _is_transient_error(error: Exception) -> bool:
185
  """Return True for errors that are likely transient and worth retrying."""
186
  err_str = str(error).lower()
 
312
  iteration = 0
313
  final_response = None
314
  errored = False
315
+ tool_error_counts: dict[str, int] = {}
316
 
317
+ effective_max = min(max_iterations, session.config.max_requests_per_turn)
318
+ while iteration < effective_max:
319
  # ── Cancellation check: before LLM call ──
320
  if session.is_cancelled:
321
  break
 
639
  results = gather_task.result()
640
 
641
  # 4. Record results and send outputs (order preserved)
642
+ max_failures = session.config.max_tool_failures_per_turn
643
  for tc, tool_name, tool_args, output, success in results:
644
+ if not success:
645
+ output = _append_failure_warning(
646
+ output, tool_name, tool_error_counts, max_failures,
647
+ )
648
+ else:
649
+ tool_error_counts.pop(tool_name, None)
650
+
651
  tool_msg = Message(
652
  role="tool",
653
  content=output,