Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
950ca5c
1
Parent(s): 0c2eb90
Add per-tool error limits and per-turn request cap
Browse files- agent/config.py +2 -0
- agent/core/agent_loop.py +40 -1
agent/config.py
CHANGED
|
@@ -23,6 +23,8 @@ class Config(BaseModel):
|
|
| 23 |
session_dataset_repo: str = "akseljoonas/hf-agent-sessions"
|
| 24 |
auto_save_interval: int = 3 # Save every N user turns (0 = disabled)
|
| 25 |
yolo_mode: bool = False # Auto-approve all tool calls without confirmation
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# Permission control parameters
|
| 28 |
confirm_cpu_jobs: bool = True
|
|
|
|
| 23 |
session_dataset_repo: str = "akseljoonas/hf-agent-sessions"
|
| 24 |
auto_save_interval: int = 3 # Save every N user turns (0 = disabled)
|
| 25 |
yolo_mode: bool = False # Auto-approve all tool calls without confirmation
|
| 26 |
+
max_tool_failures_per_turn: int = 3 # Disable a tool after this many failures in one turn
|
| 27 |
+
max_requests_per_turn: int = 50 # Hard cap on LLM requests per agent turn
|
| 28 |
|
| 29 |
# Permission control parameters
|
| 30 |
confirm_cpu_jobs: bool = True
|
agent/core/agent_loop.py
CHANGED
|
@@ -152,6 +152,35 @@ _MAX_LLM_RETRIES = 3
|
|
| 152 |
_LLM_RETRY_DELAYS = [5, 15, 30] # seconds between retries
|
| 153 |
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
def _is_transient_error(error: Exception) -> bool:
|
| 156 |
"""Return True for errors that are likely transient and worth retrying."""
|
| 157 |
err_str = str(error).lower()
|
|
@@ -283,8 +312,10 @@ class Handlers:
|
|
| 283 |
iteration = 0
|
| 284 |
final_response = None
|
| 285 |
errored = False
|
|
|
|
| 286 |
|
| 287 |
-
|
|
|
|
| 288 |
# ── Cancellation check: before LLM call ──
|
| 289 |
if session.is_cancelled:
|
| 290 |
break
|
|
@@ -608,7 +639,15 @@ class Handlers:
|
|
| 608 |
results = gather_task.result()
|
| 609 |
|
| 610 |
# 4. Record results and send outputs (order preserved)
|
|
|
|
| 611 |
for tc, tool_name, tool_args, output, success in results:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
tool_msg = Message(
|
| 613 |
role="tool",
|
| 614 |
content=output,
|
|
|
|
| 152 |
_LLM_RETRY_DELAYS = [5, 15, 30] # seconds between retries
|
| 153 |
|
| 154 |
|
| 155 |
+
def _append_failure_warning(
|
| 156 |
+
output: str,
|
| 157 |
+
tool_name: str,
|
| 158 |
+
tool_error_counts: dict[str, int],
|
| 159 |
+
max_failures: int,
|
| 160 |
+
) -> str:
|
| 161 |
+
"""Track a tool failure and append a warning to the output.
|
| 162 |
+
|
| 163 |
+
Returns the output with an appended warning indicating how many
|
| 164 |
+
failures have occurred and whether the LLM should switch approach.
|
| 165 |
+
"""
|
| 166 |
+
tool_error_counts[tool_name] = tool_error_counts.get(tool_name, 0) + 1
|
| 167 |
+
count = tool_error_counts[tool_name]
|
| 168 |
+
if count >= max_failures:
|
| 169 |
+
return output + (
|
| 170 |
+
f"\n\n⚠ Tool '{tool_name}' has now failed "
|
| 171 |
+
f"{count} times this turn. You should try a "
|
| 172 |
+
f"different approach instead of calling this "
|
| 173 |
+
f"tool again."
|
| 174 |
+
)
|
| 175 |
+
remaining = max_failures - count
|
| 176 |
+
return output + (
|
| 177 |
+
f"\n\n⚠ Tool '{tool_name}' has failed "
|
| 178 |
+
f"{count}/{max_failures} times this turn. "
|
| 179 |
+
f"{remaining} attempt(s) before you should "
|
| 180 |
+
f"switch to a different approach."
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
def _is_transient_error(error: Exception) -> bool:
|
| 185 |
"""Return True for errors that are likely transient and worth retrying."""
|
| 186 |
err_str = str(error).lower()
|
|
|
|
| 312 |
iteration = 0
|
| 313 |
final_response = None
|
| 314 |
errored = False
|
| 315 |
+
tool_error_counts: dict[str, int] = {}
|
| 316 |
|
| 317 |
+
effective_max = min(max_iterations, session.config.max_requests_per_turn)
|
| 318 |
+
while iteration < effective_max:
|
| 319 |
# ── Cancellation check: before LLM call ──
|
| 320 |
if session.is_cancelled:
|
| 321 |
break
|
|
|
|
| 639 |
results = gather_task.result()
|
| 640 |
|
| 641 |
# 4. Record results and send outputs (order preserved)
|
| 642 |
+
max_failures = session.config.max_tool_failures_per_turn
|
| 643 |
for tc, tool_name, tool_args, output, success in results:
|
| 644 |
+
if not success:
|
| 645 |
+
output = _append_failure_warning(
|
| 646 |
+
output, tool_name, tool_error_counts, max_failures,
|
| 647 |
+
)
|
| 648 |
+
else:
|
| 649 |
+
tool_error_counts.pop(tool_name, None)
|
| 650 |
+
|
| 651 |
tool_msg = Message(
|
| 652 |
role="tool",
|
| 653 |
content=output,
|