Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

akseljoonas HF Staff Claude Opus 4.6 (1M context) commited on Apr 3

Commit

263e229

1 Parent(s): 2383b4b

feat: LLM-based pruning for complex tool outputs (hf_jobs)

Tools in _LLM_PRUNE_TOOLS get a cheap LLM call (≤600 tokens) that
extracts the answer from the raw output, preserving job IDs, metrics,
and error messages exactly. Falls back to deterministic truncation if
the LLM call fails.

Three tool lists now control pruning behavior:
- _PRUNE_SKIP_TOOLS: never pruned (research, plan_tool)
- _LLM_PRUNE_TOOLS: LLM-summarised (hf_jobs)
- Everything else: deterministic 80-char truncation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

agent/context_manager/manager.py +70 -12
agent/core/agent_loop.py +3 -1

agent/context_manager/manager.py CHANGED Viewed

@@ -224,17 +224,38 @@ class ContextManager:
     # Tools whose outputs should never be pruned (too valuable to summarise)
     _PRUNE_SKIP_TOOLS: set[str] = {"research", "plan_tool"}
-    def prune_old_tool_outputs(self) -> None:
-        """Stage 1 compaction: deterministically truncate old tool outputs.
-        For any tool message older than the last 6 messages, replace content
-        exceeding 500 chars with a short one-line summary preserving
-        tool_call_id and name.
         """
         if len(self.items) <= 6:
             return
         cutoff = len(self.items) - 6
         for i in range(cutoff - 1, -1, -1):
             msg = self.items[i]
             if getattr(msg, "role", None) != "tool":
@@ -246,18 +267,55 @@ class ContextManager:
             tool_name = getattr(msg, "name", None) or "tool"
             if tool_name in self._PRUNE_SKIP_TOOLS:
                 continue
             preview = content[:80]
             total = len(content)
-            if tool_name == "hf_jobs":
-                summary = f"[hf_jobs: {preview}... ({total} chars)]"
-            elif tool_name == "bash":
-                # Try to extract exit_code from content
                 exit_code_part = ""
                 if "exit_code" in content[:200]:
                     for line in content[:200].splitlines():
                         if "exit_code" in line:
-                            exit_code_part = f"exit_code visible if present, "
                             break
                 summary = f"[bash: {exit_code_part}{preview}... ({total} chars)]"
             else:
@@ -269,7 +327,7 @@ class ContextManager:
         self, model_name: str, tool_specs: list[dict] | None = None
     ) -> None:
         """Remove old messages to keep history under target size"""
-        self.prune_old_tool_outputs()
         if (self.context_length <= self.max_context) or not self.items:
             return

     # Tools whose outputs should never be pruned (too valuable to summarise)
     _PRUNE_SKIP_TOOLS: set[str] = {"research", "plan_tool"}
+    # Tools whose outputs are pruned via a cheap LLM call instead of
+    # deterministic truncation (the output structure is too complex for
+    # a fixed head-slice to capture the answer reliably).
+    _LLM_PRUNE_TOOLS: set[str] = {"hf_jobs"}
+    async def prune_old_tool_outputs(self, model_name: str | None = None) -> None:
+        """Stage 1 compaction: shrink old tool outputs.
+        For any tool message older than the last 6 messages whose content
+        exceeds 500 chars:
+        - Tools in _LLM_PRUNE_TOOLS get a cheap LLM summarisation (≤600 tokens).
+        - All other tools get a deterministic one-line summary.
+        tool_call_id and name are always preserved.
         """
         if len(self.items) <= 6:
             return
         cutoff = len(self.items) - 6
+        # Find the preceding assistant tool_call arguments so the LLM
+        # knows what question the tool output was answering.
+        def _find_tool_call_args(tool_call_id: str) -> str | None:
+            for msg in self.items:
+                if getattr(msg, "role", None) != "assistant":
+                    continue
+                for tc in getattr(msg, "tool_calls", None) or []:
+                    tc_id = tc.id if hasattr(tc, "id") else tc.get("id")
+                    if tc_id == tool_call_id:
+                        fn = tc.function if hasattr(tc, "function") else tc.get("function", {})
+                        return fn.arguments if hasattr(fn, "arguments") else fn.get("arguments", "")
+            return None
         for i in range(cutoff - 1, -1, -1):
             msg = self.items[i]
             if getattr(msg, "role", None) != "tool":
             tool_name = getattr(msg, "name", None) or "tool"
             if tool_name in self._PRUNE_SKIP_TOOLS:
                 continue
+            # --- LLM-based pruning for complex tool outputs ---
+            if tool_name in self._LLM_PRUNE_TOOLS and model_name:
+                call_args = _find_tool_call_args(getattr(msg, "tool_call_id", ""))
+                context_line = (
+                    f"The tool was called with: {call_args}\n\n" if call_args else ""
+                )
+                try:
+                    hf_key = os.environ.get("INFERENCE_TOKEN")
+                    resp = await acompletion(
+                        model=model_name,
+                        messages=[
+                            Message(
+                                role="user",
+                                content=(
+                                    f"{context_line}"
+                                    f"Below is the raw output of the '{tool_name}' tool.\n"
+                                    "Give the answer to the original request unchanged — "
+                                    "preserve all job IDs, numbers, status values, error "
+                                    "messages, and metrics exactly. Omit filler/boilerplate. "
+                                    "Stay under 600 tokens.\n\n"
+                                    f"{content}"
+                                ),
+                            )
+                        ],
+                        max_completion_tokens=600,
+                        api_key=hf_key
+                        if hf_key and model_name.startswith("huggingface/")
+                        else None,
+                    )
+                    msg.content = resp.choices[0].message.content
+                    continue
+                except Exception:
+                    logger.warning(
+                        "LLM prune failed for %s, falling back to deterministic",
+                        tool_name,
+                    )
+                    # fall through to deterministic pruning below
+            # --- Deterministic pruning ---
             preview = content[:80]
             total = len(content)
+            if tool_name == "bash":
                 exit_code_part = ""
                 if "exit_code" in content[:200]:
                     for line in content[:200].splitlines():
                         if "exit_code" in line:
+                            exit_code_part = "exit_code visible if present, "
                             break
                 summary = f"[bash: {exit_code_part}{preview}... ({total} chars)]"
             else:
         self, model_name: str, tool_specs: list[dict] | None = None
     ) -> None:
         """Remove old messages to keep history under target size"""
+        await self.prune_old_tool_outputs(model_name=model_name)
         if (self.context_length <= self.max_context) or not self.items:
             return

agent/core/agent_loop.py CHANGED Viewed

@@ -199,7 +199,9 @@ def _is_transient_error(error: Exception) -> bool:
 async def _compact_and_notify(session: Session) -> None:
     """Run compaction and send event if context was reduced."""
-    session.context_manager.prune_old_tool_outputs()
     old_length = session.context_manager.context_length
     tool_specs = session.tool_router.get_tool_specs_for_llm()
     await session.context_manager.compact(

 async def _compact_and_notify(session: Session) -> None:
     """Run compaction and send event if context was reduced."""
+    await session.context_manager.prune_old_tool_outputs(
+        model_name=session.config.model_name,
+    )
     old_length = session.context_manager.context_length
     tool_specs = session.tool_router.get_tool_specs_for_llm()
     await session.context_manager.compact(