Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

akseljoonas HF Staff Claude Opus 4.6 commited on Mar 23

Commit

82b0c13

1 Parent(s): 288473a

feat: CLI local mode, slash commands, interrupt support; remove lmnr; frontend fixes

CLI:
- Add local tools (bash/read/write/edit) via local_tools.py for CLI mode
- Add ToolRouter local_mode to use local tools instead of sandbox
- Add slash commands: /help, /undo, /compact, /model, /yolo, /status
- Add Ctrl+C interrupt support (single=cancel, double=exit)
- Add HF token auto-loading from env/huggingface-cli
- Add session_holder for interrupt/model/status access
- Add Session.update_model() method

Cleanup:
- Remove lmnr dependency and all observe/Laminar references
- Delete unused logo PNGs

Frontend:
- Show hardware pricing in tool approval UI
- Add sandbox explanation text
- Revert bash streaming to panel (caused issues)
- Fix setPanelOutput to not force panel view

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (11) hide show

agent/core/agent_loop.py +2 -9
agent/core/session.py +5 -0
agent/core/tools.py +9 -7
agent/main.py +191 -28
agent/tools/local_tools.py +159 -0
eval/hf_agent_connector.py +0 -2
eval/solvers.py +0 -5
frontend/src/components/Chat/ToolCallGroup.tsx +64 -17
frontend/src/hooks/useAgentChat.ts +10 -28
frontend/src/store/agentStore.ts +1 -4
pyproject.toml +0 -1

agent/core/agent_loop.py CHANGED Viewed

@@ -9,7 +9,6 @@ import os
 from litellm import ChatCompletionMessageToolCall, Message, acompletion
 from litellm.exceptions import ContextWindowExceededError
-from lmnr import observe
 from agent.config import Config
 from agent.core.session import Event, OpType, Session
@@ -207,7 +206,6 @@ class Handlers:
         logger.info("Abandoned %d pending approval tool(s)", len(tool_calls))
     @staticmethod
-    @observe(name="run_agent")
     async def run_agent(
         session: Session, text: str, max_iterations: int = 300
     ) -> str | None:
@@ -215,12 +213,6 @@ class Handlers:
         Handle user input (like user_input_or_turn in codex.rs:1291)
         Returns the final assistant response content, if any.
         """
-        # Set session ID for this trace
-        if hasattr(session, "session_id"):
-            from lmnr import Laminar
-            Laminar.set_trace_session_id(session_id=session.session_id)
         # Clear any stale cancellation flag from a previous run
         session.reset_cancel()
@@ -861,12 +853,13 @@ async def process_submission(session: Session, submission) -> bool:
     return True
-@observe(name="submission_loop")
 async def submission_loop(
     submission_queue: asyncio.Queue,
     event_queue: asyncio.Queue,
     config: Config | None = None,
     tool_router: ToolRouter | None = None,
 ) -> None:
     """
     Main agent loop - processes submissions and dispatches to handlers.

 from litellm import ChatCompletionMessageToolCall, Message, acompletion
 from litellm.exceptions import ContextWindowExceededError
 from agent.config import Config
 from agent.core.session import Event, OpType, Session
         logger.info("Abandoned %d pending approval tool(s)", len(tool_calls))
     @staticmethod
     async def run_agent(
         session: Session, text: str, max_iterations: int = 300
     ) -> str | None:
         Handle user input (like user_input_or_turn in codex.rs:1291)
         Returns the final assistant response content, if any.
         """
         # Clear any stale cancellation flag from a previous run
         session.reset_cancel()
     return True
 async def submission_loop(
     submission_queue: asyncio.Queue,
     event_queue: asyncio.Queue,
     config: Config | None = None,
     tool_router: ToolRouter | None = None,
+    session_holder: list | None = None,
+    hf_token: str | None = None,
 ) -> None:
     """
     Main agent loop - processes submissions and dispatches to handlers.

agent/core/session.py CHANGED Viewed

@@ -135,6 +135,11 @@ class Session:
     def is_cancelled(self) -> bool:
         return self._cancelled.is_set()
     def increment_turn(self) -> None:
         """Increment turn counter (called after each user interaction)"""
         self.turn_count += 1

     def is_cancelled(self) -> bool:
         return self._cancelled.is_set()
+    def update_model(self, model_name: str) -> None:
+        """Switch the active model and update the context window limit."""
+        self.config.model_name = model_name
+        self.context_manager.max_context = _get_max_tokens_safe(model_name)
     def increment_turn(self) -> None:
         """Increment turn counter (called after each user interaction)"""
         self.turn_count += 1

agent/core/tools.py CHANGED Viewed

@@ -12,7 +12,6 @@ logger = logging.getLogger(__name__)
 from fastmcp import Client
 from fastmcp.exceptions import ToolError
-from lmnr import observe
 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
@@ -129,11 +128,11 @@ class ToolRouter:
     Based on codex-rs/core/src/tools/router.rs
     """
-    def __init__(self, mcp_servers: dict[str, MCPServerConfig], hf_token: str | None = None):
         self.tools: dict[str, ToolSpec] = {}
         self.mcp_servers: dict[str, dict[str, Any]] = {}
-        for tool in create_builtin_tools():
             self.register_tool(tool)
         self.mcp_client: Client | None = None
@@ -226,7 +225,6 @@ class ToolRouter:
             await self.mcp_client.__aexit__(exc_type, exc, tb)
             self._mcp_initialized = False
-    @observe(name="call_tool")
     async def call_tool(
         self,
         tool_name: str,
@@ -275,7 +273,7 @@ class ToolRouter:
 # ============================================================================
-def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     # in order of importance
     tools = [
@@ -352,8 +350,12 @@ def create_builtin_tools() -> list[ToolSpec]:
         ),
     ]
-    # Sandbox tools (highest priority)
-    tools = get_sandbox_tools() + tools
     tool_names = ", ".join([t.name for t in tools])
     logger.info(f"Loaded {len(tools)} built-in tools: {tool_names}")

 from fastmcp import Client
 from fastmcp.exceptions import ToolError
 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
     Based on codex-rs/core/src/tools/router.rs
     """
+    def __init__(self, mcp_servers: dict[str, MCPServerConfig], hf_token: str | None = None, local_mode: bool = False):
         self.tools: dict[str, ToolSpec] = {}
         self.mcp_servers: dict[str, dict[str, Any]] = {}
+        for tool in create_builtin_tools(local_mode=local_mode):
             self.register_tool(tool)
         self.mcp_client: Client | None = None
             await self.mcp_client.__aexit__(exc_type, exc, tb)
             self._mcp_initialized = False
     async def call_tool(
         self,
         tool_name: str,
 # ============================================================================
+def create_builtin_tools(local_mode: bool = False) -> list[ToolSpec]:
     """Create built-in tool specifications"""
     # in order of importance
     tools = [
         ),
     ]
+    # Sandbox or local tools (highest priority)
+    if local_mode:
+        from agent.tools.local_tools import get_local_tools
+        tools = get_local_tools() + tools
+    else:
+        tools = get_sandbox_tools() + tools
     tool_names = ", ".join([t.name for t in tools])
     logger.info(f"Loaded {len(tools)} built-in tools: {tool_names}")

agent/main.py CHANGED Viewed

@@ -5,12 +5,12 @@ Interactive CLI chat with the agent
 import asyncio
 import json
 import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Optional
 import litellm
-from lmnr import Laminar, LaminarLiteLLMCallback
 from prompt_toolkit import PromptSession
 from agent.config import load_config
@@ -31,6 +31,15 @@ from agent.utils.terminal_display import (
 litellm.drop_params = True
 def _safe_get_args(arguments: dict) -> dict:
     """Safely extract args dict from arguments, handling cases where LLM passes string."""
@@ -41,15 +50,20 @@ def _safe_get_args(arguments: dict) -> dict:
     return args if isinstance(args, dict) else {}
-lmnr_api_key = os.environ.get("LMNR_API_KEY")
-if lmnr_api_key:
     try:
-        Laminar.initialize(project_api_key=lmnr_api_key)
-        litellm.callbacks = [LaminarLiteLLMCallback()]
-        print("Laminar initialized")
-    except Exception as e:
-        print(f"Failed to initialize Laminar: {e}")
 @dataclass
 class Operation:
@@ -112,6 +126,22 @@ async def event_listener(
                 if plan_display:
                     print(plan_display)
                 turn_complete_event.set()
             elif event.event_type == "error":
                 error = (
                     event.data.get("error", "Unknown error")
@@ -127,7 +157,7 @@ async def event_listener(
             elif event.event_type == "compacted":
                 old_tokens = event.data.get("old_tokens", 0) if event.data else 0
                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
-                print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
             elif event.event_type == "approval_required":
                 # Handle batch approval format
                 tools_data = event.data.get("tools", []) if event.data else []
@@ -143,7 +173,7 @@ async def event_listener(
                         }
                         for t in tools_data
                     ]
-                    print(f"\n⚡ YOLO MODE: Auto-approving {count} item(s)")
                     submission_id[0] += 1
                     approval_submission = Submission(
                         id=f"approval_{submission_id[0]}",
@@ -387,7 +417,7 @@ async def event_listener(
                     if response == "yolo":
                         config.yolo_mode = True
                         print(
-                            "⚡ YOLO MODE ACTIVATED - Auto-approving all future tool calls"
                         )
                         # Auto-approve this item and all remaining
                         approvals.append(
@@ -444,6 +474,93 @@ async def get_user_input(prompt_session: PromptSession) -> str:
     return await prompt_session.prompt_async(HTML("\n<b><cyan>></cyan></b> "))
 async def main():
     """Interactive chat with the agent"""
     from agent.utils.terminal_display import Colors
@@ -452,21 +569,28 @@ async def main():
     os.system("clear" if os.name != "nt" else "cls")
     banner = r"""
-  _   _                   _               _____                   _                    _
- | | | |_   _  __ _  __ _(_)_ __   __ _  |  ___|_ _  ___ ___     / \   __ _  ___ _ __ | |_
  | |_| | | | |/ _` |/ _` | | '_ \ / _` | | |_ / _` |/ __/ _ \   / _ \ / _` |/ _ \ '_ \| __|
- |  _  | |_| | (_| | (_| | | | | | (_| | |  _| (_| | (_|  __/  / ___ \ (_| |  __/ | | | |_
  |_| |_|\__,_|\__, |\__, |_|_| |_|\__, | |_|  \__,_|\___\___| /_/   \_\__, |\___|_| |_|\__|
               |___/ |___/         |___/                               |___/
     """
     print(format_separator())
     print(f"{Colors.YELLOW} {banner}{Colors.RESET}")
-    print("Type your messages below. Type 'exit', 'quit', or '/quit' to end.\n")
     print(format_separator())
     # Wait for agent to initialize
     print("Initializing agent...")
     # Create queues for communication
     submission_queue = asyncio.Queue()
     event_queue = asyncio.Queue()
@@ -480,19 +604,24 @@ async def main():
     config_path = Path(__file__).parent.parent / "configs" / "main_agent_config.json"
     config = load_config(config_path)
-    # Create tool router
     print(f"Loading MCP servers: {', '.join(config.mcpServers.keys())}")
-    tool_router = ToolRouter(config.mcpServers)
     # Create prompt session for input
     prompt_session = PromptSession()
     agent_task = asyncio.create_task(
         submission_loop(
             submission_queue,
             event_queue,
             config=config,
             tool_router=tool_router,
         )
     )
@@ -510,12 +639,16 @@ async def main():
     await ready_event.wait()
-    submission_id = 0
     try:
         while True:
-            # Wait for previous turn to complete
-            await turn_complete_event.wait()
             turn_complete_event.clear()
             # Get user input
@@ -523,6 +656,21 @@ async def main():
                 user_input = await get_user_input(prompt_session)
             except EOFError:
                 break
             # Check for exit commands
             if user_input.strip().lower() in ["exit", "quit", "/quit", "/exit"]:
@@ -533,35 +681,50 @@ async def main():
                 turn_complete_event.set()
                 continue
             # Submit to agent
-            submission_id += 1
             submission = Submission(
-                id=f"sub_{submission_id}",
                 operation=Operation(
                     op_type=OpType.USER_INPUT, data={"text": user_input}
                 ),
             )
-            # print(f"Main submitting: {submission.operation.op_type}")
             await submission_queue.put(submission)
     except KeyboardInterrupt:
         print("\n\nInterrupted by user")
     # Shutdown
-    print("\n🛑 Shutting down agent...")
     shutdown_submission = Submission(
         id="sub_shutdown", operation=Operation(op_type=OpType.SHUTDOWN)
     )
     await submission_queue.put(shutdown_submission)
-    await asyncio.wait_for(agent_task, timeout=5.0)
     listener_task.cancel()
-    print("✨ Goodbye!\n")
 if __name__ == "__main__":
     try:
         asyncio.run(main())
     except KeyboardInterrupt:
-        print("\n\n✨ Goodbye!")

 import asyncio
 import json
 import os
+import time
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Optional
 import litellm
 from prompt_toolkit import PromptSession
 from agent.config import load_config
 litellm.drop_params = True
+# ── Available models (mirrors backend/routes/agent.py) ──────────────────
+AVAILABLE_MODELS = [
+    {"id": "anthropic/claude-opus-4-6", "label": "Claude Opus 4.6"},
+    {"id": "huggingface/fireworks-ai/MiniMaxAI/MiniMax-M2.5", "label": "MiniMax M2.5"},
+    {"id": "huggingface/novita/moonshotai/kimi-k2.5", "label": "Kimi K2.5"},
+    {"id": "huggingface/novita/zai-org/glm-5", "label": "GLM 5"},
+]
+VALID_MODEL_IDS = {m["id"] for m in AVAILABLE_MODELS}
 def _safe_get_args(arguments: dict) -> dict:
     """Safely extract args dict from arguments, handling cases where LLM passes string."""
     return args if isinstance(args, dict) else {}
+def _get_hf_token() -> str | None:
+    """Get HF token from environment or huggingface_hub login."""
+    token = os.environ.get("HF_TOKEN")
+    if token:
+        return token
     try:
+        from huggingface_hub import HfApi
+        api = HfApi()
+        token = api.token
+        if token:
+            return token
+    except Exception:
+        pass
+    return None
 @dataclass
 class Operation:
                 if plan_display:
                     print(plan_display)
                 turn_complete_event.set()
+            elif event.event_type == "interrupted":
+                print("\n(interrupted)")
+                turn_complete_event.set()
+            elif event.event_type == "undo_complete":
+                print("Undo complete.")
+                turn_complete_event.set()
+            elif event.event_type == "tool_log":
+                tool = event.data.get("tool", "") if event.data else ""
+                log = event.data.get("log", "") if event.data else ""
+                if log:
+                    print(f"  [{tool}] {log}")
+            elif event.event_type == "tool_state_change":
+                tool = event.data.get("tool", "") if event.data else ""
+                state = event.data.get("state", "") if event.data else ""
+                if state in ("approved", "rejected", "running"):
+                    print(f"  {tool}: {state}")
             elif event.event_type == "error":
                 error = (
                     event.data.get("error", "Unknown error")
             elif event.event_type == "compacted":
                 old_tokens = event.data.get("old_tokens", 0) if event.data else 0
                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
+                print(f"Compacted context: {old_tokens} -> {new_tokens} tokens")
             elif event.event_type == "approval_required":
                 # Handle batch approval format
                 tools_data = event.data.get("tools", []) if event.data else []
                         }
                         for t in tools_data
                     ]
+                    print(f"\n YOLO MODE: Auto-approving {count} item(s)")
                     submission_id[0] += 1
                     approval_submission = Submission(
                         id=f"approval_{submission_id[0]}",
                     if response == "yolo":
                         config.yolo_mode = True
                         print(
+                            "YOLO MODE ACTIVATED - Auto-approving all future tool calls"
                         )
                         # Auto-approve this item and all remaining
                         approvals.append(
     return await prompt_session.prompt_async(HTML("\n<b><cyan>></cyan></b> "))
+# ── Slash command helpers ────────────────────────────────────────────────
+HELP_TEXT = """\
+Commands:
+  /help            Show this help
+  /undo            Undo last turn
+  /compact         Compact context window
+  /model [id]      Show available models or switch model
+  /yolo            Toggle auto-approve mode
+  /status          Show current model, turn count
+  /quit, /exit     Exit the CLI
+"""
+def _handle_slash_command(
+    cmd: str,
+    config,
+    session_holder: list,
+    submission_queue: asyncio.Queue,
+    submission_id: list[int],
+) -> Submission | None:
+    """
+    Handle a slash command. Returns a Submission to enqueue, or None if
+    the command was handled locally (caller should set turn_complete_event).
+    """
+    parts = cmd.strip().split(None, 1)
+    command = parts[0].lower()
+    arg = parts[1].strip() if len(parts) > 1 else ""
+    if command == "/help":
+        print(HELP_TEXT)
+        return None
+    if command == "/undo":
+        submission_id[0] += 1
+        return Submission(
+            id=f"sub_{submission_id[0]}",
+            operation=Operation(op_type=OpType.UNDO),
+        )
+    if command == "/compact":
+        submission_id[0] += 1
+        return Submission(
+            id=f"sub_{submission_id[0]}",
+            operation=Operation(op_type=OpType.COMPACT),
+        )
+    if command == "/model":
+        if not arg:
+            print("Available models:")
+            session = session_holder[0] if session_holder else None
+            current = config.model_name if config else ""
+            for m in AVAILABLE_MODELS:
+                marker = " <-- current" if m["id"] == current else ""
+                print(f"  {m['id']}  ({m['label']}){marker}")
+            return None
+        if arg not in VALID_MODEL_IDS:
+            print(f"Unknown model: {arg}")
+            print(f"Valid: {', '.join(VALID_MODEL_IDS)}")
+            return None
+        session = session_holder[0] if session_holder else None
+        if session:
+            session.update_model(arg)
+            print(f"Model switched to {arg}")
+        else:
+            config.model_name = arg
+            print(f"Model set to {arg} (session not started yet)")
+        return None
+    if command == "/yolo":
+        config.yolo_mode = not config.yolo_mode
+        state = "ON" if config.yolo_mode else "OFF"
+        print(f"YOLO mode: {state}")
+        return None
+    if command == "/status":
+        session = session_holder[0] if session_holder else None
+        print(f"Model: {config.model_name}")
+        if session:
+            print(f"Turns: {session.turn_count}")
+            print(f"Context items: {len(session.context_manager.items)}")
+        return None
+    print(f"Unknown command: {command}. Type /help for available commands.")
+    return None
 async def main():
     """Interactive chat with the agent"""
     from agent.utils.terminal_display import Colors
     os.system("clear" if os.name != "nt" else "cls")
     banner = r"""
+  _   _                   _               _____                   _                    _
+ | | | |_   _  __ _  __ _(_)_ __   __ _  |  ___|_ _  ___ ___     / \   __ _  ___ _ __ | |_
  | |_| | | | |/ _` |/ _` | | '_ \ / _` | | |_ / _` |/ __/ _ \   / _ \ / _` |/ _ \ '_ \| __|
+ |  _  | |_| | (_| | (_| | | | | | (_| | |  _| (_| | (_|  __/  / ___ \ (_| |  __/ | | | |_
  |_| |_|\__,_|\__, |\__, |_|_| |_|\__, | |_|  \__,_|\___\___| /_/   \_\__, |\___|_| |_|\__|
               |___/ |___/         |___/                               |___/
     """
     print(format_separator())
     print(f"{Colors.YELLOW} {banner}{Colors.RESET}")
+    print("Type your messages below. Type /help for commands, /quit to exit.\n")
     print(format_separator())
     # Wait for agent to initialize
     print("Initializing agent...")
+    # HF token
+    hf_token = _get_hf_token()
+    if hf_token:
+        print("HF token loaded")
+    else:
+        print("Warning: No HF token found. Set HF_TOKEN or run `huggingface-cli login`.")
     # Create queues for communication
     submission_queue = asyncio.Queue()
     event_queue = asyncio.Queue()
     config_path = Path(__file__).parent.parent / "configs" / "main_agent_config.json"
     config = load_config(config_path)
+    # Create tool router with local mode
     print(f"Loading MCP servers: {', '.join(config.mcpServers.keys())}")
+    tool_router = ToolRouter(config.mcpServers, hf_token=hf_token, local_mode=True)
     # Create prompt session for input
     prompt_session = PromptSession()
+    # Session holder for interrupt/model/status access
+    session_holder = [None]
     agent_task = asyncio.create_task(
         submission_loop(
             submission_queue,
             event_queue,
             config=config,
             tool_router=tool_router,
+            session_holder=session_holder,
+            hf_token=hf_token,
         )
     )
     await ready_event.wait()
+    submission_id = [0]
+    last_interrupt_time = 0.0
     try:
         while True:
+            # Wait for previous turn to complete, with interrupt support
+            try:
+                await turn_complete_event.wait()
+            except asyncio.CancelledError:
+                break
             turn_complete_event.clear()
             # Get user input
                 user_input = await get_user_input(prompt_session)
             except EOFError:
                 break
+            except KeyboardInterrupt:
+                now = time.monotonic()
+                if now - last_interrupt_time < 3.0:
+                    print("\nDouble Ctrl+C, exiting...")
+                    break
+                last_interrupt_time = now
+                # If agent is busy, cancel it
+                session = session_holder[0]
+                if session and not turn_complete_event.is_set():
+                    session.cancel()
+                    print("\nInterrupting agent...")
+                else:
+                    print("\n(Ctrl+C again within 3s to exit)")
+                    turn_complete_event.set()
+                continue
             # Check for exit commands
             if user_input.strip().lower() in ["exit", "quit", "/quit", "/exit"]:
                 turn_complete_event.set()
                 continue
+            # Handle slash commands
+            if user_input.strip().startswith("/"):
+                sub = _handle_slash_command(
+                    user_input.strip(), config, session_holder, submission_queue, submission_id
+                )
+                if sub is None:
+                    # Command handled locally, loop back for input
+                    turn_complete_event.set()
+                    continue
+                else:
+                    await submission_queue.put(sub)
+                    continue
             # Submit to agent
+            submission_id[0] += 1
             submission = Submission(
+                id=f"sub_{submission_id[0]}",
                 operation=Operation(
                     op_type=OpType.USER_INPUT, data={"text": user_input}
                 ),
             )
             await submission_queue.put(submission)
     except KeyboardInterrupt:
         print("\n\nInterrupted by user")
     # Shutdown
+    print("\nShutting down agent...")
     shutdown_submission = Submission(
         id="sub_shutdown", operation=Operation(op_type=OpType.SHUTDOWN)
     )
     await submission_queue.put(shutdown_submission)
+    try:
+        await asyncio.wait_for(agent_task, timeout=5.0)
+    except asyncio.TimeoutError:
+        agent_task.cancel()
     listener_task.cancel()
+    print("Goodbye!\n")
 if __name__ == "__main__":
     try:
         asyncio.run(main())
     except KeyboardInterrupt:
+        print("\n\nGoodbye!")

agent/tools/local_tools.py ADDED Viewed

	@@ -0,0 +1,159 @@

+"""
+Local tool implementations — bash/read/write/edit running on the user's machine.
+Drop-in replacement for sandbox tools when running in CLI (local) mode.
+Same tool specs (names, parameters) but handlers execute locally via
+subprocess/pathlib instead of going through a remote sandbox.
+"""
+from __future__ import annotations
+import subprocess
+from pathlib import Path
+from typing import Any
+from agent.tools.sandbox_client import Sandbox
+MAX_OUTPUT_CHARS = 30_000
+MAX_LINE_LENGTH = 2000
+DEFAULT_READ_LINES = 2000
+DEFAULT_TIMEOUT = 120
+MAX_TIMEOUT = 600
+# ── Handlers ────────────────────────────────────────────────────────────
+async def _bash_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
+    command = args.get("command", "")
+    if not command:
+        return "No command provided.", False
+    work_dir = args.get("work_dir", ".")
+    timeout = min(args.get("timeout") or DEFAULT_TIMEOUT, MAX_TIMEOUT)
+    try:
+        result = subprocess.run(
+            command,
+            shell=True,
+            capture_output=True,
+            text=True,
+            cwd=work_dir,
+            timeout=timeout,
+        )
+        output = result.stdout + result.stderr
+        if len(output) > MAX_OUTPUT_CHARS:
+            output = output[:MAX_OUTPUT_CHARS] + "\n... (output truncated)"
+        if not output.strip():
+            output = "(no output)"
+        return output, result.returncode == 0
+    except subprocess.TimeoutExpired:
+        return f"Command timed out after {timeout}s.", False
+    except Exception as e:
+        return f"bash error: {e}", False
+async def _read_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
+    file_path = args.get("path", "")
+    if not file_path:
+        return "No path provided.", False
+    p = Path(file_path)
+    if not p.exists():
+        return f"File not found: {file_path}", False
+    if p.is_dir():
+        return "Cannot read a directory. Use bash with 'ls' instead.", False
+    try:
+        lines = p.read_text().splitlines()
+    except Exception as e:
+        return f"read error: {e}", False
+    offset = max((args.get("offset") or 1), 1)
+    limit = args.get("limit") or DEFAULT_READ_LINES
+    selected = lines[offset - 1 : offset - 1 + limit]
+    numbered = []
+    for i, line in enumerate(selected, start=offset):
+        if len(line) > MAX_LINE_LENGTH:
+            line = line[:MAX_LINE_LENGTH] + "..."
+        numbered.append(f"{i:>6}\t{line}")
+    return "\n".join(numbered), True
+async def _write_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
+    file_path = args.get("path", "")
+    content = args.get("content", "")
+    if not file_path:
+        return "No path provided.", False
+    p = Path(file_path)
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text(content)
+        return f"Wrote {len(content)} bytes to {file_path}", True
+    except Exception as e:
+        return f"write error: {e}", False
+async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
+    file_path = args.get("path", "")
+    old_str = args.get("old_str", "")
+    new_str = args.get("new_str", "")
+    replace_all = args.get("replace_all", False)
+    if not file_path:
+        return "No path provided.", False
+    if old_str == new_str:
+        return "old_str and new_str must differ.", False
+    p = Path(file_path)
+    if not p.exists():
+        return f"File not found: {file_path}", False
+    try:
+        text = p.read_text()
+    except Exception as e:
+        return f"edit read error: {e}", False
+    count = text.count(old_str)
+    if count == 0:
+        return "old_str not found in file.", False
+    if count > 1 and not replace_all:
+        return (
+            f"old_str appears {count} times. Use replace_all=true to replace all, "
+            "or provide a more specific old_str."
+        ), False
+    new_text = text.replace(old_str, new_str) if replace_all else text.replace(old_str, new_str, 1)
+    try:
+        p.write_text(new_text)
+    except Exception as e:
+        return f"edit write error: {e}", False
+    replacements = count if replace_all else 1
+    return f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})", True
+# ── Public API ──────────────────────────────────────────────────────────
+_HANDLERS = {
+    "bash": _bash_handler,
+    "read": _read_handler,
+    "write": _write_handler,
+    "edit": _edit_handler,
+}
+def get_local_tools():
+    """Return local ToolSpecs for bash/read/write/edit (no sandbox_create)."""
+    from agent.core.tools import ToolSpec
+    tools = []
+    for name, spec in Sandbox.TOOLS.items():
+        handler = _HANDLERS.get(name)
+        if handler is None:
+            continue
+        tools.append(
+            ToolSpec(
+                name=name,
+                description=spec["description"],
+                parameters=spec["parameters"],
+                handler=handler,
+            )
+        )
+    return tools

eval/hf_agent_connector.py CHANGED Viewed

@@ -5,7 +5,6 @@ import sys
 from pathlib import Path
 from typing import Any
-from lmnr import observe
 from agent.config import Config, load_config
 from agent.core.agent_loop import Handlers
@@ -40,7 +39,6 @@ class AgentResponseGenerator:
         """Expose the agent model name for downstream logging."""
         return self.config.model_name
-    @observe(name="eval_run")
     async def run(self, prompt: str) -> str:
         """
         Execute the agent loop for a single prompt and return the assistant reply.

 from pathlib import Path
 from typing import Any
 from agent.config import Config, load_config
 from agent.core.agent_loop import Handlers
         """Expose the agent model name for downstream logging."""
         return self.config.model_name
     async def run(self, prompt: str) -> str:
         """
         Execute the agent loop for a single prompt and return the assistant reply.

eval/solvers.py CHANGED Viewed

@@ -14,7 +14,6 @@ import litellm
 from inspect_ai.model import ChatMessageAssistant, ModelOutput
 from inspect_ai.solver import Solver, solver
 from inspect_ai.solver._task_state import TaskState
-from lmnr import Laminar, LaminarLiteLLMCallback
 from eval.hf_agent_connector import AgentResponseGenerator
@@ -39,10 +38,6 @@ def hf_agent(
     config_path: str = "agent/config_mcp_example.json",
     max_iterations: int = 10,
 ) -> Solver:
-    # init lmnr for observability
-    Laminar.initialize(project_api_key=os.environ.get("LMNR_API_KEY"))
-    litellm.callbacks = [LaminarLiteLLMCallback()]
-    print("✅ Laminar initialized")
     runner = AgentResponseGenerator(
         config_path=config_path,

 from inspect_ai.model import ChatMessageAssistant, ModelOutput
 from inspect_ai.solver import Solver, solver
 from inspect_ai.solver._task_state import TaskState
 from eval.hf_agent_connector import AgentResponseGenerator
     config_path: str = "agent/config_mcp_example.json",
     max_iterations: int = 10,
 ) -> Solver:
     runner = AgentResponseGenerator(
         config_path=config_path,

frontend/src/components/Chat/ToolCallGroup.tsx CHANGED Viewed

@@ -24,6 +24,32 @@ interface ToolCallGroupProps {
   approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => Promise<boolean>;
 }
 // ---------------------------------------------------------------------------
 // Visual helpers
 // ---------------------------------------------------------------------------
@@ -108,29 +134,49 @@ function InlineApproval({
   return (
     <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>
-      {toolName === 'sandbox_create' && args && (
-        <Box sx={{ mb: 1.5 }}>
-          <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>
-            Create sandbox on{' '}
-            <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
-              {String(args.hardware || 'cpu-basic')}
-            </Box>
-            {!!args.private && (
-              <Box component="span" sx={{ color: 'var(--muted-text)' }}>{' (private)'}</Box>
-            )}
-          </Typography>
-        </Box>
-      )}
-      {toolName === 'hf_jobs' && args && (
         <Box sx={{ mb: 1.5 }}>
           <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>
             Execute <Box component="span" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>{scriptLabel.replace('Script', 'Job')}</Box> on{' '}
             <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
-              {String(args.hardware_flavor || 'default')}
             </Box>
             {!!args.timeout && (
-              <> with timeout <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
                 {String(args.timeout)}
               </Box></>
             )}
@@ -184,7 +230,8 @@ function InlineApproval({
             </Box>
           )}
         </Box>
-      )}
       <Box sx={{ display: 'flex', gap: 1, mb: 1 }}>
         <TextField

   approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => Promise<boolean>;
 }
+// ---------------------------------------------------------------------------
+// Hardware pricing ($/hr) — from HF Spaces & Jobs pricing
+// ---------------------------------------------------------------------------
+const HARDWARE_PRICING: Record<string, string> = {
+  'cpu-basic': 'free',
+  'cpu-upgrade': '$0.03/hr',
+  't4-small': '$0.60/hr',
+  't4-medium': '$1.00/hr',
+  'a10g-small': '$1.05/hr',
+  'a10g-large': '$3.15/hr',
+  'a10g-largex2': '$6.30/hr',
+  'a10g-largex4': '$12.60/hr',
+  'a100-large': '$4.13/hr',
+  'a100x4': '$16.52/hr',
+  'a100x8': '$33.04/hr',
+  'l4x1': '$0.80/hr',
+  'l4x4': '$3.20/hr',
+  'l40sx1': '$1.80/hr',
+  'l40sx4': '$7.20/hr',
+  'l40sx8': '$14.40/hr',
+};
+function costLabel(hardware: string): string | null {
+  return HARDWARE_PRICING[hardware] || null;
+}
 // ---------------------------------------------------------------------------
 // Visual helpers
 // ---------------------------------------------------------------------------
   return (
     <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>
+      {toolName === 'sandbox_create' && args && (() => {
+        const hw = String(args.hardware || 'cpu-basic');
+        const cost = costLabel(hw);
+        return (
+          <Box sx={{ mb: 1.5 }}>
+            <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 0.5 }}>
+              Create a remote dev environment on{' '}
+              <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
+                {hw}
+              </Box>
+              {cost && (
+                <Box component="span" sx={{ color: cost === 'free' ? 'var(--accent-green)' : 'var(--accent-yellow)', fontWeight: 500 }}>
+                  {' '}({cost})
+                </Box>
+              )}
+              {!!args.private && (
+                <Box component="span" sx={{ color: 'var(--muted-text)' }}>{' (private)'}</Box>
+              )}
+            </Typography>
+            <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.7rem', opacity: 0.7 }}>
+              Creates a temporary HF Space to develop and test scripts before running jobs. Takes 1-2 min to start.
+            </Typography>
+          </Box>
+        );
+      })()}
+      {toolName === 'hf_jobs' && args && (() => {
+        const hw = String(args.hardware_flavor || 'cpu-basic');
+        const cost = costLabel(hw);
+        return (
         <Box sx={{ mb: 1.5 }}>
           <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>
             Execute <Box component="span" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>{scriptLabel.replace('Script', 'Job')}</Box> on{' '}
             <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
+              {hw}
             </Box>
+            {cost && (
+              <Box component="span" sx={{ color: cost === 'free' ? 'var(--accent-green)' : 'var(--accent-yellow)', fontWeight: 500 }}>
+                {' '}({cost})
+              </Box>
+            )}
             {!!args.timeout && (
+              <> for up to <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
                 {String(args.timeout)}
               </Box></>
             )}
             </Box>
           )}
         </Box>
+        );
+      })()}
       <Box sx={{ display: 'flex', gap: 1, mb: 1 }}>
         <TextField

frontend/src/hooks/useAgentChat.ts CHANGED Viewed

@@ -98,28 +98,19 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
       },
       onToolLog: (tool: string, log: string) => {
         if (!isActiveRef.current) return;
-        const STREAMABLE_TOOLS = new Set(['hf_jobs', 'sandbox', 'bash']);
-        if (!STREAMABLE_TOOLS.has(tool)) return;
-        const state = useAgentStore.getState();
-        const existingOutput = state.panelData?.output?.content || '';
-        const newContent = existingOutput
-          ? existingOutput + '\n' + log
-          : log;
-        if (!state.panelData) {
-          // Initialize panel when it doesn't exist (bash bypasses approval, so no panel yet)
-          const title = tool === 'hf_jobs' ? 'Job Output' : 'Sandbox';
-          setPanel(
-            { title, output: { content: newContent, language: 'text' } },
-            'output',
-          );
-        } else {
           setPanelOutput({ content: newContent, language: 'text' });
-        }
-        if (!useLayoutStore.getState().isRightPanelOpen) {
-          setRightPanelOpen(true);
         }
       },
       onConnectionChange: (connected: boolean) => {
@@ -178,12 +169,6 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
           });
           setRightPanelOpen(true);
           setLeftSidebarOpen(false);
-        } else if (toolName === 'bash' && args.command) {
-          // Initialize panel for sandbox bash — command in script tab, output tab active for streaming
-          setPanel(
-            { title: 'Sandbox', script: { content: String(args.command), language: 'bash' } },
-            'output',
-          );
         }
       },
       onToolOutputPanel: (toolName: string, _toolCallId: string, output: string, success: boolean) => {
@@ -191,9 +176,6 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
         if (toolName === 'hf_jobs' && output) {
           setPanelOutput({ content: output, language: 'markdown' });
           if (!success) useAgentStore.getState().setPanelView('output');
-        } else if (toolName === 'bash') {
-          // Streaming already populated the output — ensure output view on error
-          if (!success) useAgentStore.getState().setPanelView('output');
         }
       },
       onStreaming: () => {

       },
       onToolLog: (tool: string, log: string) => {
         if (!isActiveRef.current) return;
+        if (tool === 'hf_jobs' || tool === 'sandbox') {
+          const state = useAgentStore.getState();
+          const existingOutput = state.panelData?.output?.content || '';
+          const header = tool === 'sandbox' ? '--- Sandbox creation ---' : '--- Job execution started ---';
+          const newContent = existingOutput
+            ? existingOutput + '\n' + log
+            : header + '\n' + log;
           setPanelOutput({ content: newContent, language: 'text' });
+          if (!useLayoutStore.getState().isRightPanelOpen) {
+            setRightPanelOpen(true);
+          }
         }
       },
       onConnectionChange: (connected: boolean) => {
           });
           setRightPanelOpen(true);
           setLeftSidebarOpen(false);
         }
       },
       onToolOutputPanel: (toolName: string, _toolCallId: string, output: string, success: boolean) => {
         if (toolName === 'hf_jobs' && output) {
           setPanelOutput({ content: output, language: 'markdown' });
           if (!success) useAgentStore.getState().setPanelView('output');
         }
       },
       onStreaming: () => {

frontend/src/store/agentStore.ts CHANGED Viewed

@@ -135,10 +135,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
   setPanelView: (view) => set({ panelView: view }),
   setPanelOutput: (output) => set((state) => ({
-    panelData: state.panelData
-      ? { ...state.panelData, output }
-      : { title: 'Output', output },
-    panelView: 'output',
   })),
   updatePanelScript: (content) => set((state) => ({

   setPanelView: (view) => set({ panelView: view }),
   setPanelOutput: (output) => set((state) => ({
+    panelData: state.panelData ? { ...state.panelData, output } : null,
   })),
   updatePanelScript: (content) => set((state) => ({

pyproject.toml CHANGED Viewed

@@ -18,7 +18,6 @@ agent = [
     "litellm>=1.0.0",
     "huggingface-hub>=1.0.1",
     "fastmcp>=2.4.0",
-    "lmnr>=0.7.23",  # Note: Using base package to avoid torch/transformers from [all] extra
     "prompt-toolkit>=3.0.0",
     "thefuzz>=0.22.1",
     "nbconvert>=7.16.6",

     "litellm>=1.0.0",
     "huggingface-hub>=1.0.1",
     "fastmcp>=2.4.0",
     "prompt-toolkit>=3.0.0",
     "thefuzz>=0.22.1",
     "nbconvert>=7.16.6",