akseljoonas HF Staff Claude Opus 4.6 commited on
Commit
82b0c13
·
1 Parent(s): 288473a

feat: CLI local mode, slash commands, interrupt support; remove lmnr; frontend fixes

Browse files

CLI:
- Add local tools (bash/read/write/edit) via local_tools.py for CLI mode
- Add ToolRouter local_mode to use local tools instead of sandbox
- Add slash commands: /help, /undo, /compact, /model, /yolo, /status
- Add Ctrl+C interrupt support (single=cancel, double=exit)
- Add HF token auto-loading from env/huggingface-cli
- Add session_holder for interrupt/model/status access
- Add Session.update_model() method

Cleanup:
- Remove lmnr dependency and all observe/Laminar references
- Delete unused logo PNGs

Frontend:
- Show hardware pricing in tool approval UI
- Add sandbox explanation text
- Revert bash streaming to panel (caused issues)
- Fix setPanelOutput to not force panel view

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

agent/core/agent_loop.py CHANGED
@@ -9,7 +9,6 @@ import os
9
 
10
  from litellm import ChatCompletionMessageToolCall, Message, acompletion
11
  from litellm.exceptions import ContextWindowExceededError
12
- from lmnr import observe
13
 
14
  from agent.config import Config
15
  from agent.core.session import Event, OpType, Session
@@ -207,7 +206,6 @@ class Handlers:
207
  logger.info("Abandoned %d pending approval tool(s)", len(tool_calls))
208
 
209
  @staticmethod
210
- @observe(name="run_agent")
211
  async def run_agent(
212
  session: Session, text: str, max_iterations: int = 300
213
  ) -> str | None:
@@ -215,12 +213,6 @@ class Handlers:
215
  Handle user input (like user_input_or_turn in codex.rs:1291)
216
  Returns the final assistant response content, if any.
217
  """
218
- # Set session ID for this trace
219
- if hasattr(session, "session_id"):
220
- from lmnr import Laminar
221
-
222
- Laminar.set_trace_session_id(session_id=session.session_id)
223
-
224
  # Clear any stale cancellation flag from a previous run
225
  session.reset_cancel()
226
 
@@ -861,12 +853,13 @@ async def process_submission(session: Session, submission) -> bool:
861
  return True
862
 
863
 
864
- @observe(name="submission_loop")
865
  async def submission_loop(
866
  submission_queue: asyncio.Queue,
867
  event_queue: asyncio.Queue,
868
  config: Config | None = None,
869
  tool_router: ToolRouter | None = None,
 
 
870
  ) -> None:
871
  """
872
  Main agent loop - processes submissions and dispatches to handlers.
 
9
 
10
  from litellm import ChatCompletionMessageToolCall, Message, acompletion
11
  from litellm.exceptions import ContextWindowExceededError
 
12
 
13
  from agent.config import Config
14
  from agent.core.session import Event, OpType, Session
 
206
  logger.info("Abandoned %d pending approval tool(s)", len(tool_calls))
207
 
208
  @staticmethod
 
209
  async def run_agent(
210
  session: Session, text: str, max_iterations: int = 300
211
  ) -> str | None:
 
213
  Handle user input (like user_input_or_turn in codex.rs:1291)
214
  Returns the final assistant response content, if any.
215
  """
 
 
 
 
 
 
216
  # Clear any stale cancellation flag from a previous run
217
  session.reset_cancel()
218
 
 
853
  return True
854
 
855
 
 
856
  async def submission_loop(
857
  submission_queue: asyncio.Queue,
858
  event_queue: asyncio.Queue,
859
  config: Config | None = None,
860
  tool_router: ToolRouter | None = None,
861
+ session_holder: list | None = None,
862
+ hf_token: str | None = None,
863
  ) -> None:
864
  """
865
  Main agent loop - processes submissions and dispatches to handlers.
agent/core/session.py CHANGED
@@ -135,6 +135,11 @@ class Session:
135
  def is_cancelled(self) -> bool:
136
  return self._cancelled.is_set()
137
 
 
 
 
 
 
138
  def increment_turn(self) -> None:
139
  """Increment turn counter (called after each user interaction)"""
140
  self.turn_count += 1
 
135
  def is_cancelled(self) -> bool:
136
  return self._cancelled.is_set()
137
 
138
+ def update_model(self, model_name: str) -> None:
139
+ """Switch the active model and update the context window limit."""
140
+ self.config.model_name = model_name
141
+ self.context_manager.max_context = _get_max_tokens_safe(model_name)
142
+
143
  def increment_turn(self) -> None:
144
  """Increment turn counter (called after each user interaction)"""
145
  self.turn_count += 1
agent/core/tools.py CHANGED
@@ -12,7 +12,6 @@ logger = logging.getLogger(__name__)
12
 
13
  from fastmcp import Client
14
  from fastmcp.exceptions import ToolError
15
- from lmnr import observe
16
  from mcp.types import EmbeddedResource, ImageContent, TextContent
17
 
18
  from agent.config import MCPServerConfig
@@ -129,11 +128,11 @@ class ToolRouter:
129
  Based on codex-rs/core/src/tools/router.rs
130
  """
131
 
132
- def __init__(self, mcp_servers: dict[str, MCPServerConfig], hf_token: str | None = None):
133
  self.tools: dict[str, ToolSpec] = {}
134
  self.mcp_servers: dict[str, dict[str, Any]] = {}
135
 
136
- for tool in create_builtin_tools():
137
  self.register_tool(tool)
138
 
139
  self.mcp_client: Client | None = None
@@ -226,7 +225,6 @@ class ToolRouter:
226
  await self.mcp_client.__aexit__(exc_type, exc, tb)
227
  self._mcp_initialized = False
228
 
229
- @observe(name="call_tool")
230
  async def call_tool(
231
  self,
232
  tool_name: str,
@@ -275,7 +273,7 @@ class ToolRouter:
275
  # ============================================================================
276
 
277
 
278
- def create_builtin_tools() -> list[ToolSpec]:
279
  """Create built-in tool specifications"""
280
  # in order of importance
281
  tools = [
@@ -352,8 +350,12 @@ def create_builtin_tools() -> list[ToolSpec]:
352
  ),
353
  ]
354
 
355
- # Sandbox tools (highest priority)
356
- tools = get_sandbox_tools() + tools
 
 
 
 
357
 
358
  tool_names = ", ".join([t.name for t in tools])
359
  logger.info(f"Loaded {len(tools)} built-in tools: {tool_names}")
 
12
 
13
  from fastmcp import Client
14
  from fastmcp.exceptions import ToolError
 
15
  from mcp.types import EmbeddedResource, ImageContent, TextContent
16
 
17
  from agent.config import MCPServerConfig
 
128
  Based on codex-rs/core/src/tools/router.rs
129
  """
130
 
131
+ def __init__(self, mcp_servers: dict[str, MCPServerConfig], hf_token: str | None = None, local_mode: bool = False):
132
  self.tools: dict[str, ToolSpec] = {}
133
  self.mcp_servers: dict[str, dict[str, Any]] = {}
134
 
135
+ for tool in create_builtin_tools(local_mode=local_mode):
136
  self.register_tool(tool)
137
 
138
  self.mcp_client: Client | None = None
 
225
  await self.mcp_client.__aexit__(exc_type, exc, tb)
226
  self._mcp_initialized = False
227
 
 
228
  async def call_tool(
229
  self,
230
  tool_name: str,
 
273
  # ============================================================================
274
 
275
 
276
+ def create_builtin_tools(local_mode: bool = False) -> list[ToolSpec]:
277
  """Create built-in tool specifications"""
278
  # in order of importance
279
  tools = [
 
350
  ),
351
  ]
352
 
353
+ # Sandbox or local tools (highest priority)
354
+ if local_mode:
355
+ from agent.tools.local_tools import get_local_tools
356
+ tools = get_local_tools() + tools
357
+ else:
358
+ tools = get_sandbox_tools() + tools
359
 
360
  tool_names = ", ".join([t.name for t in tools])
361
  logger.info(f"Loaded {len(tools)} built-in tools: {tool_names}")
agent/main.py CHANGED
@@ -5,12 +5,12 @@ Interactive CLI chat with the agent
5
  import asyncio
6
  import json
7
  import os
 
8
  from dataclasses import dataclass
9
  from pathlib import Path
10
  from typing import Any, Optional
11
 
12
  import litellm
13
- from lmnr import Laminar, LaminarLiteLLMCallback
14
  from prompt_toolkit import PromptSession
15
 
16
  from agent.config import load_config
@@ -31,6 +31,15 @@ from agent.utils.terminal_display import (
31
 
32
  litellm.drop_params = True
33
 
 
 
 
 
 
 
 
 
 
34
 
35
  def _safe_get_args(arguments: dict) -> dict:
36
  """Safely extract args dict from arguments, handling cases where LLM passes string."""
@@ -41,15 +50,20 @@ def _safe_get_args(arguments: dict) -> dict:
41
  return args if isinstance(args, dict) else {}
42
 
43
 
44
- lmnr_api_key = os.environ.get("LMNR_API_KEY")
45
- if lmnr_api_key:
 
 
 
46
  try:
47
- Laminar.initialize(project_api_key=lmnr_api_key)
48
- litellm.callbacks = [LaminarLiteLLMCallback()]
49
- print("Laminar initialized")
50
- except Exception as e:
51
- print(f"Failed to initialize Laminar: {e}")
52
-
 
 
53
 
54
  @dataclass
55
  class Operation:
@@ -112,6 +126,22 @@ async def event_listener(
112
  if plan_display:
113
  print(plan_display)
114
  turn_complete_event.set()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  elif event.event_type == "error":
116
  error = (
117
  event.data.get("error", "Unknown error")
@@ -127,7 +157,7 @@ async def event_listener(
127
  elif event.event_type == "compacted":
128
  old_tokens = event.data.get("old_tokens", 0) if event.data else 0
129
  new_tokens = event.data.get("new_tokens", 0) if event.data else 0
130
- print(f"Compacted context: {old_tokens} {new_tokens} tokens")
131
  elif event.event_type == "approval_required":
132
  # Handle batch approval format
133
  tools_data = event.data.get("tools", []) if event.data else []
@@ -143,7 +173,7 @@ async def event_listener(
143
  }
144
  for t in tools_data
145
  ]
146
- print(f"\n YOLO MODE: Auto-approving {count} item(s)")
147
  submission_id[0] += 1
148
  approval_submission = Submission(
149
  id=f"approval_{submission_id[0]}",
@@ -387,7 +417,7 @@ async def event_listener(
387
  if response == "yolo":
388
  config.yolo_mode = True
389
  print(
390
- "YOLO MODE ACTIVATED - Auto-approving all future tool calls"
391
  )
392
  # Auto-approve this item and all remaining
393
  approvals.append(
@@ -444,6 +474,93 @@ async def get_user_input(prompt_session: PromptSession) -> str:
444
  return await prompt_session.prompt_async(HTML("\n<b><cyan>></cyan></b> "))
445
 
446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  async def main():
448
  """Interactive chat with the agent"""
449
  from agent.utils.terminal_display import Colors
@@ -452,21 +569,28 @@ async def main():
452
  os.system("clear" if os.name != "nt" else "cls")
453
 
454
  banner = r"""
455
- _ _ _ _____ _ _
456
- | | | |_ _ __ _ __ _(_)_ __ __ _ | ___|_ _ ___ ___ / \ __ _ ___ _ __ | |_
457
  | |_| | | | |/ _` |/ _` | | '_ \ / _` | | |_ / _` |/ __/ _ \ / _ \ / _` |/ _ \ '_ \| __|
458
- | _ | |_| | (_| | (_| | | | | | (_| | | _| (_| | (_| __/ / ___ \ (_| | __/ | | | |_
459
  |_| |_|\__,_|\__, |\__, |_|_| |_|\__, | |_| \__,_|\___\___| /_/ \_\__, |\___|_| |_|\__|
460
  |___/ |___/ |___/ |___/
461
  """
462
 
463
  print(format_separator())
464
  print(f"{Colors.YELLOW} {banner}{Colors.RESET}")
465
- print("Type your messages below. Type 'exit', 'quit', or '/quit' to end.\n")
466
  print(format_separator())
467
  # Wait for agent to initialize
468
  print("Initializing agent...")
469
 
 
 
 
 
 
 
 
470
  # Create queues for communication
471
  submission_queue = asyncio.Queue()
472
  event_queue = asyncio.Queue()
@@ -480,19 +604,24 @@ async def main():
480
  config_path = Path(__file__).parent.parent / "configs" / "main_agent_config.json"
481
  config = load_config(config_path)
482
 
483
- # Create tool router
484
  print(f"Loading MCP servers: {', '.join(config.mcpServers.keys())}")
485
- tool_router = ToolRouter(config.mcpServers)
486
 
487
  # Create prompt session for input
488
  prompt_session = PromptSession()
489
 
 
 
 
490
  agent_task = asyncio.create_task(
491
  submission_loop(
492
  submission_queue,
493
  event_queue,
494
  config=config,
495
  tool_router=tool_router,
 
 
496
  )
497
  )
498
 
@@ -510,12 +639,16 @@ async def main():
510
 
511
  await ready_event.wait()
512
 
513
- submission_id = 0
 
514
 
515
  try:
516
  while True:
517
- # Wait for previous turn to complete
518
- await turn_complete_event.wait()
 
 
 
519
  turn_complete_event.clear()
520
 
521
  # Get user input
@@ -523,6 +656,21 @@ async def main():
523
  user_input = await get_user_input(prompt_session)
524
  except EOFError:
525
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
 
527
  # Check for exit commands
528
  if user_input.strip().lower() in ["exit", "quit", "/quit", "/exit"]:
@@ -533,35 +681,50 @@ async def main():
533
  turn_complete_event.set()
534
  continue
535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  # Submit to agent
537
- submission_id += 1
538
  submission = Submission(
539
- id=f"sub_{submission_id}",
540
  operation=Operation(
541
  op_type=OpType.USER_INPUT, data={"text": user_input}
542
  ),
543
  )
544
- # print(f"Main submitting: {submission.operation.op_type}")
545
  await submission_queue.put(submission)
546
 
547
  except KeyboardInterrupt:
548
  print("\n\nInterrupted by user")
549
 
550
  # Shutdown
551
- print("\n🛑 Shutting down agent...")
552
  shutdown_submission = Submission(
553
  id="sub_shutdown", operation=Operation(op_type=OpType.SHUTDOWN)
554
  )
555
  await submission_queue.put(shutdown_submission)
556
 
557
- await asyncio.wait_for(agent_task, timeout=5.0)
 
 
 
558
  listener_task.cancel()
559
 
560
- print("Goodbye!\n")
561
 
562
 
563
  if __name__ == "__main__":
564
  try:
565
  asyncio.run(main())
566
  except KeyboardInterrupt:
567
- print("\n\n✨ Goodbye!")
 
5
  import asyncio
6
  import json
7
  import os
8
+ import time
9
  from dataclasses import dataclass
10
  from pathlib import Path
11
  from typing import Any, Optional
12
 
13
  import litellm
 
14
  from prompt_toolkit import PromptSession
15
 
16
  from agent.config import load_config
 
31
 
32
  litellm.drop_params = True
33
 
34
+ # ── Available models (mirrors backend/routes/agent.py) ──────────────────
35
+ AVAILABLE_MODELS = [
36
+ {"id": "anthropic/claude-opus-4-6", "label": "Claude Opus 4.6"},
37
+ {"id": "huggingface/fireworks-ai/MiniMaxAI/MiniMax-M2.5", "label": "MiniMax M2.5"},
38
+ {"id": "huggingface/novita/moonshotai/kimi-k2.5", "label": "Kimi K2.5"},
39
+ {"id": "huggingface/novita/zai-org/glm-5", "label": "GLM 5"},
40
+ ]
41
+ VALID_MODEL_IDS = {m["id"] for m in AVAILABLE_MODELS}
42
+
43
 
44
  def _safe_get_args(arguments: dict) -> dict:
45
  """Safely extract args dict from arguments, handling cases where LLM passes string."""
 
50
  return args if isinstance(args, dict) else {}
51
 
52
 
53
+ def _get_hf_token() -> str | None:
54
+ """Get HF token from environment or huggingface_hub login."""
55
+ token = os.environ.get("HF_TOKEN")
56
+ if token:
57
+ return token
58
  try:
59
+ from huggingface_hub import HfApi
60
+ api = HfApi()
61
+ token = api.token
62
+ if token:
63
+ return token
64
+ except Exception:
65
+ pass
66
+ return None
67
 
68
  @dataclass
69
  class Operation:
 
126
  if plan_display:
127
  print(plan_display)
128
  turn_complete_event.set()
129
+ elif event.event_type == "interrupted":
130
+ print("\n(interrupted)")
131
+ turn_complete_event.set()
132
+ elif event.event_type == "undo_complete":
133
+ print("Undo complete.")
134
+ turn_complete_event.set()
135
+ elif event.event_type == "tool_log":
136
+ tool = event.data.get("tool", "") if event.data else ""
137
+ log = event.data.get("log", "") if event.data else ""
138
+ if log:
139
+ print(f" [{tool}] {log}")
140
+ elif event.event_type == "tool_state_change":
141
+ tool = event.data.get("tool", "") if event.data else ""
142
+ state = event.data.get("state", "") if event.data else ""
143
+ if state in ("approved", "rejected", "running"):
144
+ print(f" {tool}: {state}")
145
  elif event.event_type == "error":
146
  error = (
147
  event.data.get("error", "Unknown error")
 
157
  elif event.event_type == "compacted":
158
  old_tokens = event.data.get("old_tokens", 0) if event.data else 0
159
  new_tokens = event.data.get("new_tokens", 0) if event.data else 0
160
+ print(f"Compacted context: {old_tokens} -> {new_tokens} tokens")
161
  elif event.event_type == "approval_required":
162
  # Handle batch approval format
163
  tools_data = event.data.get("tools", []) if event.data else []
 
173
  }
174
  for t in tools_data
175
  ]
176
+ print(f"\n YOLO MODE: Auto-approving {count} item(s)")
177
  submission_id[0] += 1
178
  approval_submission = Submission(
179
  id=f"approval_{submission_id[0]}",
 
417
  if response == "yolo":
418
  config.yolo_mode = True
419
  print(
420
+ "YOLO MODE ACTIVATED - Auto-approving all future tool calls"
421
  )
422
  # Auto-approve this item and all remaining
423
  approvals.append(
 
474
  return await prompt_session.prompt_async(HTML("\n<b><cyan>></cyan></b> "))
475
 
476
 
477
+ # ── Slash command helpers ────────────────────────────────────────────────
478
+
479
+ HELP_TEXT = """\
480
+ Commands:
481
+ /help Show this help
482
+ /undo Undo last turn
483
+ /compact Compact context window
484
+ /model [id] Show available models or switch model
485
+ /yolo Toggle auto-approve mode
486
+ /status Show current model, turn count
487
+ /quit, /exit Exit the CLI
488
+ """
489
+
490
+
491
+ def _handle_slash_command(
492
+ cmd: str,
493
+ config,
494
+ session_holder: list,
495
+ submission_queue: asyncio.Queue,
496
+ submission_id: list[int],
497
+ ) -> Submission | None:
498
+ """
499
+ Handle a slash command. Returns a Submission to enqueue, or None if
500
+ the command was handled locally (caller should set turn_complete_event).
501
+ """
502
+ parts = cmd.strip().split(None, 1)
503
+ command = parts[0].lower()
504
+ arg = parts[1].strip() if len(parts) > 1 else ""
505
+
506
+ if command == "/help":
507
+ print(HELP_TEXT)
508
+ return None
509
+
510
+ if command == "/undo":
511
+ submission_id[0] += 1
512
+ return Submission(
513
+ id=f"sub_{submission_id[0]}",
514
+ operation=Operation(op_type=OpType.UNDO),
515
+ )
516
+
517
+ if command == "/compact":
518
+ submission_id[0] += 1
519
+ return Submission(
520
+ id=f"sub_{submission_id[0]}",
521
+ operation=Operation(op_type=OpType.COMPACT),
522
+ )
523
+
524
+ if command == "/model":
525
+ if not arg:
526
+ print("Available models:")
527
+ session = session_holder[0] if session_holder else None
528
+ current = config.model_name if config else ""
529
+ for m in AVAILABLE_MODELS:
530
+ marker = " <-- current" if m["id"] == current else ""
531
+ print(f" {m['id']} ({m['label']}){marker}")
532
+ return None
533
+ if arg not in VALID_MODEL_IDS:
534
+ print(f"Unknown model: {arg}")
535
+ print(f"Valid: {', '.join(VALID_MODEL_IDS)}")
536
+ return None
537
+ session = session_holder[0] if session_holder else None
538
+ if session:
539
+ session.update_model(arg)
540
+ print(f"Model switched to {arg}")
541
+ else:
542
+ config.model_name = arg
543
+ print(f"Model set to {arg} (session not started yet)")
544
+ return None
545
+
546
+ if command == "/yolo":
547
+ config.yolo_mode = not config.yolo_mode
548
+ state = "ON" if config.yolo_mode else "OFF"
549
+ print(f"YOLO mode: {state}")
550
+ return None
551
+
552
+ if command == "/status":
553
+ session = session_holder[0] if session_holder else None
554
+ print(f"Model: {config.model_name}")
555
+ if session:
556
+ print(f"Turns: {session.turn_count}")
557
+ print(f"Context items: {len(session.context_manager.items)}")
558
+ return None
559
+
560
+ print(f"Unknown command: {command}. Type /help for available commands.")
561
+ return None
562
+
563
+
564
  async def main():
565
  """Interactive chat with the agent"""
566
  from agent.utils.terminal_display import Colors
 
569
  os.system("clear" if os.name != "nt" else "cls")
570
 
571
  banner = r"""
572
+ _ _ _ _____ _ _
573
+ | | | |_ _ __ _ __ _(_)_ __ __ _ | ___|_ _ ___ ___ / \ __ _ ___ _ __ | |_
574
  | |_| | | | |/ _` |/ _` | | '_ \ / _` | | |_ / _` |/ __/ _ \ / _ \ / _` |/ _ \ '_ \| __|
575
+ | _ | |_| | (_| | (_| | | | | | (_| | | _| (_| | (_| __/ / ___ \ (_| | __/ | | | |_
576
  |_| |_|\__,_|\__, |\__, |_|_| |_|\__, | |_| \__,_|\___\___| /_/ \_\__, |\___|_| |_|\__|
577
  |___/ |___/ |___/ |___/
578
  """
579
 
580
  print(format_separator())
581
  print(f"{Colors.YELLOW} {banner}{Colors.RESET}")
582
+ print("Type your messages below. Type /help for commands, /quit to exit.\n")
583
  print(format_separator())
584
  # Wait for agent to initialize
585
  print("Initializing agent...")
586
 
587
+ # HF token
588
+ hf_token = _get_hf_token()
589
+ if hf_token:
590
+ print("HF token loaded")
591
+ else:
592
+ print("Warning: No HF token found. Set HF_TOKEN or run `huggingface-cli login`.")
593
+
594
  # Create queues for communication
595
  submission_queue = asyncio.Queue()
596
  event_queue = asyncio.Queue()
 
604
  config_path = Path(__file__).parent.parent / "configs" / "main_agent_config.json"
605
  config = load_config(config_path)
606
 
607
+ # Create tool router with local mode
608
  print(f"Loading MCP servers: {', '.join(config.mcpServers.keys())}")
609
+ tool_router = ToolRouter(config.mcpServers, hf_token=hf_token, local_mode=True)
610
 
611
  # Create prompt session for input
612
  prompt_session = PromptSession()
613
 
614
+ # Session holder for interrupt/model/status access
615
+ session_holder = [None]
616
+
617
  agent_task = asyncio.create_task(
618
  submission_loop(
619
  submission_queue,
620
  event_queue,
621
  config=config,
622
  tool_router=tool_router,
623
+ session_holder=session_holder,
624
+ hf_token=hf_token,
625
  )
626
  )
627
 
 
639
 
640
  await ready_event.wait()
641
 
642
+ submission_id = [0]
643
+ last_interrupt_time = 0.0
644
 
645
  try:
646
  while True:
647
+ # Wait for previous turn to complete, with interrupt support
648
+ try:
649
+ await turn_complete_event.wait()
650
+ except asyncio.CancelledError:
651
+ break
652
  turn_complete_event.clear()
653
 
654
  # Get user input
 
656
  user_input = await get_user_input(prompt_session)
657
  except EOFError:
658
  break
659
+ except KeyboardInterrupt:
660
+ now = time.monotonic()
661
+ if now - last_interrupt_time < 3.0:
662
+ print("\nDouble Ctrl+C, exiting...")
663
+ break
664
+ last_interrupt_time = now
665
+ # If agent is busy, cancel it
666
+ session = session_holder[0]
667
+ if session and not turn_complete_event.is_set():
668
+ session.cancel()
669
+ print("\nInterrupting agent...")
670
+ else:
671
+ print("\n(Ctrl+C again within 3s to exit)")
672
+ turn_complete_event.set()
673
+ continue
674
 
675
  # Check for exit commands
676
  if user_input.strip().lower() in ["exit", "quit", "/quit", "/exit"]:
 
681
  turn_complete_event.set()
682
  continue
683
 
684
+ # Handle slash commands
685
+ if user_input.strip().startswith("/"):
686
+ sub = _handle_slash_command(
687
+ user_input.strip(), config, session_holder, submission_queue, submission_id
688
+ )
689
+ if sub is None:
690
+ # Command handled locally, loop back for input
691
+ turn_complete_event.set()
692
+ continue
693
+ else:
694
+ await submission_queue.put(sub)
695
+ continue
696
+
697
  # Submit to agent
698
+ submission_id[0] += 1
699
  submission = Submission(
700
+ id=f"sub_{submission_id[0]}",
701
  operation=Operation(
702
  op_type=OpType.USER_INPUT, data={"text": user_input}
703
  ),
704
  )
 
705
  await submission_queue.put(submission)
706
 
707
  except KeyboardInterrupt:
708
  print("\n\nInterrupted by user")
709
 
710
  # Shutdown
711
+ print("\nShutting down agent...")
712
  shutdown_submission = Submission(
713
  id="sub_shutdown", operation=Operation(op_type=OpType.SHUTDOWN)
714
  )
715
  await submission_queue.put(shutdown_submission)
716
 
717
+ try:
718
+ await asyncio.wait_for(agent_task, timeout=5.0)
719
+ except asyncio.TimeoutError:
720
+ agent_task.cancel()
721
  listener_task.cancel()
722
 
723
+ print("Goodbye!\n")
724
 
725
 
726
  if __name__ == "__main__":
727
  try:
728
  asyncio.run(main())
729
  except KeyboardInterrupt:
730
+ print("\n\nGoodbye!")
agent/tools/local_tools.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Local tool implementations — bash/read/write/edit running on the user's machine.
3
+
4
+ Drop-in replacement for sandbox tools when running in CLI (local) mode.
5
+ Same tool specs (names, parameters) but handlers execute locally via
6
+ subprocess/pathlib instead of going through a remote sandbox.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import subprocess
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from agent.tools.sandbox_client import Sandbox
16
+
17
+ MAX_OUTPUT_CHARS = 30_000
18
+ MAX_LINE_LENGTH = 2000
19
+ DEFAULT_READ_LINES = 2000
20
+ DEFAULT_TIMEOUT = 120
21
+ MAX_TIMEOUT = 600
22
+
23
+
24
+ # ── Handlers ────────────────────────────────────────────────────────────
25
+
26
+ async def _bash_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
27
+ command = args.get("command", "")
28
+ if not command:
29
+ return "No command provided.", False
30
+ work_dir = args.get("work_dir", ".")
31
+ timeout = min(args.get("timeout") or DEFAULT_TIMEOUT, MAX_TIMEOUT)
32
+ try:
33
+ result = subprocess.run(
34
+ command,
35
+ shell=True,
36
+ capture_output=True,
37
+ text=True,
38
+ cwd=work_dir,
39
+ timeout=timeout,
40
+ )
41
+ output = result.stdout + result.stderr
42
+ if len(output) > MAX_OUTPUT_CHARS:
43
+ output = output[:MAX_OUTPUT_CHARS] + "\n... (output truncated)"
44
+ if not output.strip():
45
+ output = "(no output)"
46
+ return output, result.returncode == 0
47
+ except subprocess.TimeoutExpired:
48
+ return f"Command timed out after {timeout}s.", False
49
+ except Exception as e:
50
+ return f"bash error: {e}", False
51
+
52
+
53
+ async def _read_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
54
+ file_path = args.get("path", "")
55
+ if not file_path:
56
+ return "No path provided.", False
57
+ p = Path(file_path)
58
+ if not p.exists():
59
+ return f"File not found: {file_path}", False
60
+ if p.is_dir():
61
+ return "Cannot read a directory. Use bash with 'ls' instead.", False
62
+ try:
63
+ lines = p.read_text().splitlines()
64
+ except Exception as e:
65
+ return f"read error: {e}", False
66
+
67
+ offset = max((args.get("offset") or 1), 1)
68
+ limit = args.get("limit") or DEFAULT_READ_LINES
69
+
70
+ selected = lines[offset - 1 : offset - 1 + limit]
71
+ numbered = []
72
+ for i, line in enumerate(selected, start=offset):
73
+ if len(line) > MAX_LINE_LENGTH:
74
+ line = line[:MAX_LINE_LENGTH] + "..."
75
+ numbered.append(f"{i:>6}\t{line}")
76
+ return "\n".join(numbered), True
77
+
78
+
79
+ async def _write_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
80
+ file_path = args.get("path", "")
81
+ content = args.get("content", "")
82
+ if not file_path:
83
+ return "No path provided.", False
84
+ p = Path(file_path)
85
+ try:
86
+ p.parent.mkdir(parents=True, exist_ok=True)
87
+ p.write_text(content)
88
+ return f"Wrote {len(content)} bytes to {file_path}", True
89
+ except Exception as e:
90
+ return f"write error: {e}", False
91
+
92
+
93
+ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
94
+ file_path = args.get("path", "")
95
+ old_str = args.get("old_str", "")
96
+ new_str = args.get("new_str", "")
97
+ replace_all = args.get("replace_all", False)
98
+
99
+ if not file_path:
100
+ return "No path provided.", False
101
+ if old_str == new_str:
102
+ return "old_str and new_str must differ.", False
103
+
104
+ p = Path(file_path)
105
+ if not p.exists():
106
+ return f"File not found: {file_path}", False
107
+
108
+ try:
109
+ text = p.read_text()
110
+ except Exception as e:
111
+ return f"edit read error: {e}", False
112
+
113
+ count = text.count(old_str)
114
+ if count == 0:
115
+ return "old_str not found in file.", False
116
+ if count > 1 and not replace_all:
117
+ return (
118
+ f"old_str appears {count} times. Use replace_all=true to replace all, "
119
+ "or provide a more specific old_str."
120
+ ), False
121
+
122
+ new_text = text.replace(old_str, new_str) if replace_all else text.replace(old_str, new_str, 1)
123
+ try:
124
+ p.write_text(new_text)
125
+ except Exception as e:
126
+ return f"edit write error: {e}", False
127
+
128
+ replacements = count if replace_all else 1
129
+ return f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})", True
130
+
131
+
132
+ # ── Public API ──────────────────────────────────────────────────────────
133
+
134
+ _HANDLERS = {
135
+ "bash": _bash_handler,
136
+ "read": _read_handler,
137
+ "write": _write_handler,
138
+ "edit": _edit_handler,
139
+ }
140
+
141
+
142
+ def get_local_tools():
143
+ """Return local ToolSpecs for bash/read/write/edit (no sandbox_create)."""
144
+ from agent.core.tools import ToolSpec
145
+
146
+ tools = []
147
+ for name, spec in Sandbox.TOOLS.items():
148
+ handler = _HANDLERS.get(name)
149
+ if handler is None:
150
+ continue
151
+ tools.append(
152
+ ToolSpec(
153
+ name=name,
154
+ description=spec["description"],
155
+ parameters=spec["parameters"],
156
+ handler=handler,
157
+ )
158
+ )
159
+ return tools
eval/hf_agent_connector.py CHANGED
@@ -5,7 +5,6 @@ import sys
5
  from pathlib import Path
6
  from typing import Any
7
 
8
- from lmnr import observe
9
 
10
  from agent.config import Config, load_config
11
  from agent.core.agent_loop import Handlers
@@ -40,7 +39,6 @@ class AgentResponseGenerator:
40
  """Expose the agent model name for downstream logging."""
41
  return self.config.model_name
42
 
43
- @observe(name="eval_run")
44
  async def run(self, prompt: str) -> str:
45
  """
46
  Execute the agent loop for a single prompt and return the assistant reply.
 
5
  from pathlib import Path
6
  from typing import Any
7
 
 
8
 
9
  from agent.config import Config, load_config
10
  from agent.core.agent_loop import Handlers
 
39
  """Expose the agent model name for downstream logging."""
40
  return self.config.model_name
41
 
 
42
  async def run(self, prompt: str) -> str:
43
  """
44
  Execute the agent loop for a single prompt and return the assistant reply.
eval/solvers.py CHANGED
@@ -14,7 +14,6 @@ import litellm
14
  from inspect_ai.model import ChatMessageAssistant, ModelOutput
15
  from inspect_ai.solver import Solver, solver
16
  from inspect_ai.solver._task_state import TaskState
17
- from lmnr import Laminar, LaminarLiteLLMCallback
18
 
19
  from eval.hf_agent_connector import AgentResponseGenerator
20
 
@@ -39,10 +38,6 @@ def hf_agent(
39
  config_path: str = "agent/config_mcp_example.json",
40
  max_iterations: int = 10,
41
  ) -> Solver:
42
- # init lmnr for observability
43
- Laminar.initialize(project_api_key=os.environ.get("LMNR_API_KEY"))
44
- litellm.callbacks = [LaminarLiteLLMCallback()]
45
- print("✅ Laminar initialized")
46
 
47
  runner = AgentResponseGenerator(
48
  config_path=config_path,
 
14
  from inspect_ai.model import ChatMessageAssistant, ModelOutput
15
  from inspect_ai.solver import Solver, solver
16
  from inspect_ai.solver._task_state import TaskState
 
17
 
18
  from eval.hf_agent_connector import AgentResponseGenerator
19
 
 
38
  config_path: str = "agent/config_mcp_example.json",
39
  max_iterations: int = 10,
40
  ) -> Solver:
 
 
 
 
41
 
42
  runner = AgentResponseGenerator(
43
  config_path=config_path,
frontend/src/components/Chat/ToolCallGroup.tsx CHANGED
@@ -24,6 +24,32 @@ interface ToolCallGroupProps {
24
  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => Promise<boolean>;
25
  }
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  // ---------------------------------------------------------------------------
28
  // Visual helpers
29
  // ---------------------------------------------------------------------------
@@ -108,29 +134,49 @@ function InlineApproval({
108
 
109
  return (
110
  <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>
111
- {toolName === 'sandbox_create' && args && (
112
- <Box sx={{ mb: 1.5 }}>
113
- <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>
114
- Create sandbox on{' '}
115
- <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
116
- {String(args.hardware || 'cpu-basic')}
117
- </Box>
118
- {!!args.private && (
119
- <Box component="span" sx={{ color: 'var(--muted-text)' }}>{' (private)'}</Box>
120
- )}
121
- </Typography>
122
- </Box>
123
- )}
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- {toolName === 'hf_jobs' && args && (
 
 
 
126
  <Box sx={{ mb: 1.5 }}>
127
  <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>
128
  Execute <Box component="span" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>{scriptLabel.replace('Script', 'Job')}</Box> on{' '}
129
  <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
130
- {String(args.hardware_flavor || 'default')}
131
  </Box>
 
 
 
 
 
132
  {!!args.timeout && (
133
- <> with timeout <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
134
  {String(args.timeout)}
135
  </Box></>
136
  )}
@@ -184,7 +230,8 @@ function InlineApproval({
184
  </Box>
185
  )}
186
  </Box>
187
- )}
 
188
 
189
  <Box sx={{ display: 'flex', gap: 1, mb: 1 }}>
190
  <TextField
 
24
  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => Promise<boolean>;
25
  }
26
 
27
+ // ---------------------------------------------------------------------------
28
+ // Hardware pricing ($/hr) — from HF Spaces & Jobs pricing
29
+ // ---------------------------------------------------------------------------
30
+ const HARDWARE_PRICING: Record<string, string> = {
31
+ 'cpu-basic': 'free',
32
+ 'cpu-upgrade': '$0.03/hr',
33
+ 't4-small': '$0.60/hr',
34
+ 't4-medium': '$1.00/hr',
35
+ 'a10g-small': '$1.05/hr',
36
+ 'a10g-large': '$3.15/hr',
37
+ 'a10g-largex2': '$6.30/hr',
38
+ 'a10g-largex4': '$12.60/hr',
39
+ 'a100-large': '$4.13/hr',
40
+ 'a100x4': '$16.52/hr',
41
+ 'a100x8': '$33.04/hr',
42
+ 'l4x1': '$0.80/hr',
43
+ 'l4x4': '$3.20/hr',
44
+ 'l40sx1': '$1.80/hr',
45
+ 'l40sx4': '$7.20/hr',
46
+ 'l40sx8': '$14.40/hr',
47
+ };
48
+
49
+ function costLabel(hardware: string): string | null {
50
+ return HARDWARE_PRICING[hardware] || null;
51
+ }
52
+
53
  // ---------------------------------------------------------------------------
54
  // Visual helpers
55
  // ---------------------------------------------------------------------------
 
134
 
135
  return (
136
  <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>
137
+ {toolName === 'sandbox_create' && args && (() => {
138
+ const hw = String(args.hardware || 'cpu-basic');
139
+ const cost = costLabel(hw);
140
+ return (
141
+ <Box sx={{ mb: 1.5 }}>
142
+ <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 0.5 }}>
143
+ Create a remote dev environment on{' '}
144
+ <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
145
+ {hw}
146
+ </Box>
147
+ {cost && (
148
+ <Box component="span" sx={{ color: cost === 'free' ? 'var(--accent-green)' : 'var(--accent-yellow)', fontWeight: 500 }}>
149
+ {' '}({cost})
150
+ </Box>
151
+ )}
152
+ {!!args.private && (
153
+ <Box component="span" sx={{ color: 'var(--muted-text)' }}>{' (private)'}</Box>
154
+ )}
155
+ </Typography>
156
+ <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.7rem', opacity: 0.7 }}>
157
+ Creates a temporary HF Space to develop and test scripts before running jobs. Takes 1-2 min to start.
158
+ </Typography>
159
+ </Box>
160
+ );
161
+ })()}
162
 
163
+ {toolName === 'hf_jobs' && args && (() => {
164
+ const hw = String(args.hardware_flavor || 'cpu-basic');
165
+ const cost = costLabel(hw);
166
+ return (
167
  <Box sx={{ mb: 1.5 }}>
168
  <Typography variant="body2" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>
169
  Execute <Box component="span" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>{scriptLabel.replace('Script', 'Job')}</Box> on{' '}
170
  <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
171
+ {hw}
172
  </Box>
173
+ {cost && (
174
+ <Box component="span" sx={{ color: cost === 'free' ? 'var(--accent-green)' : 'var(--accent-yellow)', fontWeight: 500 }}>
175
+ {' '}({cost})
176
+ </Box>
177
+ )}
178
  {!!args.timeout && (
179
+ <> for up to <Box component="span" sx={{ fontWeight: 500, color: 'var(--text)' }}>
180
  {String(args.timeout)}
181
  </Box></>
182
  )}
 
230
  </Box>
231
  )}
232
  </Box>
233
+ );
234
+ })()}
235
 
236
  <Box sx={{ display: 'flex', gap: 1, mb: 1 }}>
237
  <TextField
frontend/src/hooks/useAgentChat.ts CHANGED
@@ -98,28 +98,19 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
98
  },
99
  onToolLog: (tool: string, log: string) => {
100
  if (!isActiveRef.current) return;
101
- const STREAMABLE_TOOLS = new Set(['hf_jobs', 'sandbox', 'bash']);
102
- if (!STREAMABLE_TOOLS.has(tool)) return;
 
 
 
 
 
103
 
104
- const state = useAgentStore.getState();
105
- const existingOutput = state.panelData?.output?.content || '';
106
- const newContent = existingOutput
107
- ? existingOutput + '\n' + log
108
- : log;
109
-
110
- if (!state.panelData) {
111
- // Initialize panel when it doesn't exist (bash bypasses approval, so no panel yet)
112
- const title = tool === 'hf_jobs' ? 'Job Output' : 'Sandbox';
113
- setPanel(
114
- { title, output: { content: newContent, language: 'text' } },
115
- 'output',
116
- );
117
- } else {
118
  setPanelOutput({ content: newContent, language: 'text' });
119
- }
120
 
121
- if (!useLayoutStore.getState().isRightPanelOpen) {
122
- setRightPanelOpen(true);
 
123
  }
124
  },
125
  onConnectionChange: (connected: boolean) => {
@@ -178,12 +169,6 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
178
  });
179
  setRightPanelOpen(true);
180
  setLeftSidebarOpen(false);
181
- } else if (toolName === 'bash' && args.command) {
182
- // Initialize panel for sandbox bash — command in script tab, output tab active for streaming
183
- setPanel(
184
- { title: 'Sandbox', script: { content: String(args.command), language: 'bash' } },
185
- 'output',
186
- );
187
  }
188
  },
189
  onToolOutputPanel: (toolName: string, _toolCallId: string, output: string, success: boolean) => {
@@ -191,9 +176,6 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
191
  if (toolName === 'hf_jobs' && output) {
192
  setPanelOutput({ content: output, language: 'markdown' });
193
  if (!success) useAgentStore.getState().setPanelView('output');
194
- } else if (toolName === 'bash') {
195
- // Streaming already populated the output — ensure output view on error
196
- if (!success) useAgentStore.getState().setPanelView('output');
197
  }
198
  },
199
  onStreaming: () => {
 
98
  },
99
  onToolLog: (tool: string, log: string) => {
100
  if (!isActiveRef.current) return;
101
+ if (tool === 'hf_jobs' || tool === 'sandbox') {
102
+ const state = useAgentStore.getState();
103
+ const existingOutput = state.panelData?.output?.content || '';
104
+ const header = tool === 'sandbox' ? '--- Sandbox creation ---' : '--- Job execution started ---';
105
+ const newContent = existingOutput
106
+ ? existingOutput + '\n' + log
107
+ : header + '\n' + log;
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  setPanelOutput({ content: newContent, language: 'text' });
 
110
 
111
+ if (!useLayoutStore.getState().isRightPanelOpen) {
112
+ setRightPanelOpen(true);
113
+ }
114
  }
115
  },
116
  onConnectionChange: (connected: boolean) => {
 
169
  });
170
  setRightPanelOpen(true);
171
  setLeftSidebarOpen(false);
 
 
 
 
 
 
172
  }
173
  },
174
  onToolOutputPanel: (toolName: string, _toolCallId: string, output: string, success: boolean) => {
 
176
  if (toolName === 'hf_jobs' && output) {
177
  setPanelOutput({ content: output, language: 'markdown' });
178
  if (!success) useAgentStore.getState().setPanelView('output');
 
 
 
179
  }
180
  },
181
  onStreaming: () => {
frontend/src/store/agentStore.ts CHANGED
@@ -135,10 +135,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
135
  setPanelView: (view) => set({ panelView: view }),
136
 
137
  setPanelOutput: (output) => set((state) => ({
138
- panelData: state.panelData
139
- ? { ...state.panelData, output }
140
- : { title: 'Output', output },
141
- panelView: 'output',
142
  })),
143
 
144
  updatePanelScript: (content) => set((state) => ({
 
135
  setPanelView: (view) => set({ panelView: view }),
136
 
137
  setPanelOutput: (output) => set((state) => ({
138
+ panelData: state.panelData ? { ...state.panelData, output } : null,
 
 
 
139
  })),
140
 
141
  updatePanelScript: (content) => set((state) => ({
pyproject.toml CHANGED
@@ -18,7 +18,6 @@ agent = [
18
  "litellm>=1.0.0",
19
  "huggingface-hub>=1.0.1",
20
  "fastmcp>=2.4.0",
21
- "lmnr>=0.7.23", # Note: Using base package to avoid torch/transformers from [all] extra
22
  "prompt-toolkit>=3.0.0",
23
  "thefuzz>=0.22.1",
24
  "nbconvert>=7.16.6",
 
18
  "litellm>=1.0.0",
19
  "huggingface-hub>=1.0.1",
20
  "fastmcp>=2.4.0",
 
21
  "prompt-toolkit>=3.0.0",
22
  "thefuzz>=0.22.1",
23
  "nbconvert>=7.16.6",