akseljoonas HF Staff commited on
Commit
66a2425
Β·
1 Parent(s): fee4f16

sandbox start

Browse files
agent/core/session.py CHANGED
@@ -59,6 +59,7 @@ class Session:
59
  self.is_running = True
60
  self.current_task: asyncio.Task | None = None
61
  self.pending_approval: Optional[dict[str, Any]] = None
 
62
 
63
  # Session trajectory logging
64
  self.logged_events: list[dict] = []
 
59
  self.is_running = True
60
  self.current_task: asyncio.Task | None = None
61
  self.pending_approval: Optional[dict[str, Any]] = None
62
+ self.sandbox = None
63
 
64
  # Session trajectory logging
65
  self.logged_events: list[dict] = []
agent/core/tools.py CHANGED
@@ -45,6 +45,7 @@ from agent.tools.hf_repo_git_tool import (
45
  )
46
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
47
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 
48
 
49
  # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
50
  # from agent.tools.private_hf_repo_tools import (
@@ -327,6 +328,9 @@ def create_builtin_tools() -> list[ToolSpec]:
327
  ),
328
  ]
329
 
 
 
 
330
  tool_names = ", ".join([t.name for t in tools])
331
  print(f"Loaded {len(tools)} built-in tools: {tool_names}")
332
 
 
45
  )
46
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
47
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
48
+ from agent.tools.sandbox_tool import get_sandbox_tools
49
 
50
  # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
51
  # from agent.tools.private_hf_repo_tools import (
 
328
  ),
329
  ]
330
 
331
+ # Sandbox tools
332
+ tools = get_sandbox_tools() + tools
333
+
334
  tool_names = ", ".join([t.name for t in tools])
335
  print(f"Loaded {len(tools)} built-in tools: {tool_names}")
336
 
agent/prompts/system_prompt_v2.yaml CHANGED
@@ -341,6 +341,20 @@ system_prompt: |
341
  - ⚠️ Include HF_TOKEN for Hub operations
342
  - ⚠️ Storage is EPHEMERAL - must push_to_hub
343
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  **hf_private_repos:**
345
  - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)
346
  - Upload logs, scripts, results that can't push_to_hub
 
341
  - ⚠️ Include HF_TOKEN for Hub operations
342
  - ⚠️ Storage is EPHEMERAL - must push_to_hub
343
 
344
+ ## Sandbox (Interactive Development Environment)
345
+
346
+ **sandbox_create:**
347
+ - Persistent remote Linux environment on HF Spaces for interactive development
348
+ - First call sandbox_create with hardware choice, then use bash/read/write/edit/glob/grep freely
349
+ - Hardware: cpu-basic (free tier), cpu-upgrade (8vCPU/32GB), t4-small (16GB GPU), a10g-small (24GB GPU), a10g-large (24GB GPU + 46GB RAM), a100-large (80GB GPU)
350
+ - Use for: iterative development, debugging, multi-step workflows, testing code, installing packages
351
+ - Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training
352
+
353
+ **bash / read / write / edit / glob / grep / upload:**
354
+ - Available after sandbox_create β€” no additional approvals needed
355
+ - Same semantics as local file/shell operations, but run on the remote sandbox
356
+ - bash: run shell commands; read/write/edit: file operations; glob/grep: search; upload: transfer files
357
+
358
  **hf_private_repos:**
359
  - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)
360
  - Upload logs, scripts, results that can't push_to_hub
agent/tools/sandbox_client.py ADDED
@@ -0,0 +1,740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.10"
4
+ # dependencies = ["huggingface_hub>=0.20.0", "httpx>=0.27.0"]
5
+ # ///
6
+ """
7
+ Sandbox Tools β€” Agent-native primitives for HF Space dev-mode sandboxes.
8
+
9
+ Architecture:
10
+ - Creates a sandbox by duplicating a template Space (runs sandbox_server.py)
11
+ - Waits for it to come online
12
+ - Communicates via HTTPS to the Space's API
13
+ - Optionally deletes the Space when done
14
+
15
+ Lifecycle:
16
+ sb = Sandbox.create(owner="burtenshaw") # duplicate, wait, connect
17
+ sb = Sandbox.create(owner="burtenshaw", # with options
18
+ hardware="t4-small",
19
+ private=True,
20
+ sleep_time=3600)
21
+ sb = Sandbox.connect("burtenshaw/my-sandbox-abc") # attach to existing
22
+
23
+ sb.bash("uv run train.py")
24
+ sb.read("/app/train.py")
25
+ sb.edit("/app/train.py", old_str="lr=1e-3", new_str="lr=1e-4")
26
+
27
+ sb.delete() # tear down when done
28
+
29
+ # Or use as a context manager for automatic cleanup
30
+ with Sandbox.create(owner="burtenshaw") as sb:
31
+ sb.bash("python train.py")
32
+ # Space deleted on exit
33
+
34
+ Tools: bash, read, write, edit, glob, grep, upload
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import io
40
+ import os
41
+ import sys
42
+ import time
43
+ import uuid
44
+ from dataclasses import dataclass, field
45
+ from pathlib import Path
46
+ from typing import Any
47
+
48
+ import httpx
49
+ from huggingface_hub import CommitOperationAdd, HfApi
50
+
51
+ TEMPLATE_SPACE = "burtenshaw/sandbox"
52
+ HARDWARE_OPTIONS = [
53
+ "cpu-basic",
54
+ "cpu-upgrade",
55
+ "t4-small",
56
+ "t4-medium",
57
+ "a10g-small",
58
+ "a10g-large",
59
+ "a100-large",
60
+ ]
61
+ OUTPUT_LIMIT = 30000
62
+ LINE_LIMIT = 2000
63
+ DEFAULT_READ_LIMIT = 2000
64
+ DEFAULT_TIMEOUT = 120
65
+ MAX_TIMEOUT = 600
66
+ WAIT_TIMEOUT = 300
67
+ WAIT_INTERVAL = 5
68
+ API_WAIT_TIMEOUT = 180
69
+
70
+ _DOCKERFILE = """\
71
+ FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
72
+
73
+ RUN apt-get update && \\
74
+ apt-get install -y \\
75
+ bash git git-lfs wget curl procps \\
76
+ htop vim nano jq tmux \\
77
+ build-essential grep && \\
78
+ rm -rf /var/lib/apt/lists/*
79
+
80
+ # Install server dependencies (as root, before USER switch)
81
+ RUN uv pip install --system fastapi uvicorn python-multipart
82
+
83
+ RUN useradd -m -u 1000 user
84
+ USER user
85
+
86
+ ENV HOME=/home/user \\
87
+ PATH=/home/user/.local/bin:$PATH
88
+
89
+ WORKDIR /app
90
+ COPY --chown=user . /app
91
+
92
+ EXPOSE 7860
93
+
94
+ CMD ["python", "sandbox_server.py"]
95
+ """
96
+
97
+
98
+ @dataclass
99
+ class ToolResult:
100
+ success: bool
101
+ output: str = ""
102
+ error: str = ""
103
+
104
+ def __str__(self):
105
+ if self.success:
106
+ return self.output or "(no output)"
107
+ return f"ERROR: {self.error}"
108
+
109
+ def to_dict(self) -> dict:
110
+ return {"success": self.success, "output": self.output, "error": self.error}
111
+
112
+
113
+ @dataclass
114
+ class Sandbox:
115
+ """
116
+ A handle to an HF Space sandbox.
117
+
118
+ Use Sandbox.create() to spin up a new one, or Sandbox.connect() to
119
+ attach to an existing running Space.
120
+ """
121
+
122
+ space_id: str
123
+ token: str | None = None
124
+ work_dir: str = "/app"
125
+ timeout: int = DEFAULT_TIMEOUT
126
+ _owns_space: bool = field(default=False, repr=False)
127
+ _base_url: str = field(init=False, repr=False)
128
+ _client: httpx.Client = field(init=False, repr=False)
129
+ _hf_api: HfApi = field(init=False, repr=False)
130
+ _files_read: set = field(init=False, repr=False, default_factory=set)
131
+
132
+ def __post_init__(self):
133
+ self.token = self.token or os.environ.get("HF_TOKEN")
134
+ slug = self.space_id.replace("/", "-")
135
+ # Trailing slash is critical: httpx resolves relative paths against base_url.
136
+ # Without it, client.get("health") resolves to /health instead of /api/health.
137
+ self._base_url = f"https://{slug}.hf.space/api/"
138
+ self._client = httpx.Client(
139
+ base_url=self._base_url,
140
+ headers={"Authorization": f"Bearer {self.token}"} if self.token else {},
141
+ timeout=httpx.Timeout(MAX_TIMEOUT, connect=30),
142
+ follow_redirects=True,
143
+ )
144
+ self._hf_api = HfApi(token=self.token)
145
+
146
+ # ── Lifecycle ─────────────────────────────────────────────────
147
+
148
+ @classmethod
149
+ def create(
150
+ cls,
151
+ owner: str,
152
+ *,
153
+ name: str | None = None,
154
+ template: str = TEMPLATE_SPACE,
155
+ hardware: str = "cpu-basic",
156
+ private: bool = False,
157
+ sleep_time: int | None = None,
158
+ token: str | None = None,
159
+ wait_timeout: int = WAIT_TIMEOUT,
160
+ ) -> Sandbox:
161
+ """
162
+ Create a new sandbox by duplicating the template Space.
163
+
164
+ Generates a unique space name, duplicates the template, waits for it
165
+ to come online, then returns a connected Sandbox.
166
+
167
+ Args:
168
+ owner: HF username or org (e.g. "burtenshaw").
169
+ name: Base name for the space. Defaults to "sandbox".
170
+ A unique suffix is always appended.
171
+ template: Source Space to duplicate (default: burtenshaw/sandbox).
172
+ hardware: Hardware tier (cpu-basic, t4-small, etc.).
173
+ private: Whether the Space should be private.
174
+ sleep_time: Auto-sleep after N seconds of inactivity.
175
+ token: HF API token. Falls back to HF_TOKEN env var.
176
+ wait_timeout: Max seconds to wait for Space to start (default: 300).
177
+
178
+ Returns:
179
+ A Sandbox instance connected to the running Space.
180
+ """
181
+ token = token or os.environ.get("HF_TOKEN")
182
+ api = HfApi(token=token)
183
+
184
+ base = name or "sandbox"
185
+ suffix = uuid.uuid4().hex[:8]
186
+ space_id = f"{owner}/{base}-{suffix}"
187
+
188
+ print(f"Creating sandbox: {space_id} (from {template})...")
189
+
190
+ kwargs = {
191
+ "from_id": template,
192
+ "to_id": space_id,
193
+ "private": private,
194
+ "hardware": hardware,
195
+ }
196
+ if sleep_time is not None:
197
+ kwargs["sleep_time"] = sleep_time
198
+
199
+ api.duplicate_space(**kwargs)
200
+ print(f"Space created: https://huggingface.co/spaces/{space_id}")
201
+
202
+ # Upload sandbox server and Dockerfile (triggers rebuild)
203
+ cls._setup_server(space_id, api)
204
+
205
+ # Wait for it to come online (rebuild + start)
206
+ print(f"Waiting for Space to start (timeout: {wait_timeout}s)...")
207
+ deadline = time.time() + wait_timeout
208
+ while time.time() < deadline:
209
+ runtime = api.get_space_runtime(space_id)
210
+ if runtime.stage == "RUNNING":
211
+ print(f"Space is running (hardware: {runtime.hardware})")
212
+ break
213
+ if runtime.stage in ("RUNTIME_ERROR", "BUILD_ERROR"):
214
+ raise RuntimeError(
215
+ f"Space failed to start: {runtime.stage}. "
216
+ f"Check https://huggingface.co/spaces/{space_id}"
217
+ )
218
+ print(f" {runtime.stage}...")
219
+ time.sleep(WAIT_INTERVAL)
220
+ else:
221
+ raise TimeoutError(
222
+ f"Space did not start within {wait_timeout}s. "
223
+ f"Check https://huggingface.co/spaces/{space_id}"
224
+ )
225
+
226
+ # Wait for the API server to be responsive (non-fatal)
227
+ sb = cls(space_id=space_id, token=token, _owns_space=True)
228
+ try:
229
+ sb._wait_for_api(timeout=API_WAIT_TIMEOUT)
230
+ except TimeoutError as e:
231
+ print(
232
+ f"Warning: API health check timed out ({e}), but Space is RUNNING. Continuing."
233
+ )
234
+ return sb
235
+
236
+ @staticmethod
237
+ def _setup_server(space_id: str, api: HfApi) -> None:
238
+ """Upload FastAPI server + Dockerfile to the sandbox Space (single commit)."""
239
+ server_path = Path(__file__).parent / "example_sandbox_server.py"
240
+ server_code = server_path.read_text()
241
+
242
+ print(f"Uploading sandbox server to {space_id}...")
243
+ api.create_commit(
244
+ repo_id=space_id,
245
+ repo_type="space",
246
+ operations=[
247
+ CommitOperationAdd(
248
+ path_in_repo="sandbox_server.py",
249
+ path_or_fileobj=io.BytesIO(server_code.encode()),
250
+ ),
251
+ CommitOperationAdd(
252
+ path_in_repo="Dockerfile",
253
+ path_or_fileobj=io.BytesIO(_DOCKERFILE.encode()),
254
+ ),
255
+ ],
256
+ commit_message="Setup sandbox server",
257
+ )
258
+ print("Server files uploaded, rebuild triggered.")
259
+
260
+ @classmethod
261
+ def connect(cls, space_id: str, *, token: str | None = None) -> Sandbox:
262
+ """
263
+ Connect to an existing running Space.
264
+
265
+ Does a health check to verify the Space is reachable.
266
+ """
267
+ sb = cls(space_id=space_id, token=token, _owns_space=False)
268
+ sb._wait_for_api(timeout=60)
269
+ return sb
270
+
271
+ def _wait_for_api(self, timeout: int = API_WAIT_TIMEOUT):
272
+ """Poll the health endpoint until the server responds."""
273
+ deadline = time.time() + timeout
274
+ last_err = None
275
+ last_status = None
276
+ while time.time() < deadline:
277
+ try:
278
+ resp = self._client.get("health", timeout=10)
279
+ last_status = resp.status_code
280
+ if resp.status_code == 200:
281
+ print(f"API is responsive at {self._base_url}")
282
+ return
283
+ except Exception as e:
284
+ last_err = e
285
+ time.sleep(3)
286
+ raise TimeoutError(
287
+ f"Sandbox API at {self._base_url} not responding after {timeout}s. "
288
+ f"Last status: {last_status}, last error: {last_err}"
289
+ )
290
+
291
+ def delete(self):
292
+ """Delete the Space. Only works if this Sandbox created it."""
293
+ if not self._owns_space:
294
+ raise RuntimeError(
295
+ f"This Sandbox did not create {self.space_id}. "
296
+ f"Use self._hf_api.delete_repo() directly if you're sure."
297
+ )
298
+ print(f"Deleting sandbox: {self.space_id}...")
299
+ self._hf_api.delete_repo(self.space_id, repo_type="space")
300
+ self._client.close()
301
+ print("Deleted.")
302
+
303
+ def pause(self):
304
+ """Pause the Space (stops billing, preserves state)."""
305
+ self._hf_api.pause_space(self.space_id)
306
+
307
+ def restart(self):
308
+ """Restart the Space."""
309
+ self._hf_api.restart_space(self.space_id)
310
+ self._wait_for_api()
311
+
312
+ @property
313
+ def url(self) -> str:
314
+ """Public URL of the Space."""
315
+ return f"https://huggingface.co/spaces/{self.space_id}"
316
+
317
+ @property
318
+ def status(self) -> str:
319
+ """Current Space stage (RUNNING, BUILDING, PAUSED, etc.)."""
320
+ return self._hf_api.get_space_runtime(self.space_id).stage
321
+
322
+ def __enter__(self) -> Sandbox:
323
+ return self
324
+
325
+ def __exit__(self, *exc):
326
+ if self._owns_space:
327
+ try:
328
+ self.delete()
329
+ except Exception as e:
330
+ print(f"Warning: failed to delete sandbox: {e}", file=sys.stderr)
331
+ self._client.close()
332
+
333
+ # ── HTTP plumbing ─────────────────────────────────────────────
334
+
335
+ def _call(
336
+ self, endpoint: str, payload: dict, timeout: float | None = None
337
+ ) -> ToolResult:
338
+ # Strip leading slash for correct httpx base_url resolution
339
+ endpoint = endpoint.lstrip("/")
340
+ try:
341
+ resp = self._client.post(
342
+ endpoint,
343
+ json=payload,
344
+ timeout=timeout or self.timeout,
345
+ )
346
+ data = resp.json()
347
+ if resp.status_code == 200:
348
+ return ToolResult(
349
+ success=data.get("success", True),
350
+ output=data.get("output", ""),
351
+ error=data.get("error", ""),
352
+ )
353
+ return ToolResult(
354
+ success=False,
355
+ error=data.get("error", f"HTTP {resp.status_code}"),
356
+ )
357
+ except httpx.TimeoutException:
358
+ return ToolResult(
359
+ success=False, error=f"Timeout after {timeout or self.timeout}s"
360
+ )
361
+ except httpx.ConnectError:
362
+ return ToolResult(
363
+ success=False,
364
+ error=f"Cannot connect to sandbox. Is {self.space_id} running? Status: {self.status}",
365
+ )
366
+ except Exception as e:
367
+ return ToolResult(success=False, error=str(e))
368
+
369
+ # ── Tools ─────────────────────────────────────────────────────
370
+
371
+ def bash(
372
+ self,
373
+ command: str,
374
+ *,
375
+ work_dir: str | None = None,
376
+ timeout: int | None = None,
377
+ description: str | None = None,
378
+ ) -> ToolResult:
379
+ return self._call(
380
+ "bash",
381
+ {
382
+ "command": command,
383
+ "work_dir": work_dir or self.work_dir,
384
+ "timeout": min(timeout or self.timeout, MAX_TIMEOUT),
385
+ },
386
+ timeout=timeout,
387
+ )
388
+
389
+ def read(
390
+ self, path: str, *, offset: int | None = None, limit: int | None = None
391
+ ) -> ToolResult:
392
+ self._files_read.add(path)
393
+ return self._call(
394
+ "read",
395
+ {
396
+ "path": path,
397
+ "offset": offset,
398
+ "limit": limit or (DEFAULT_READ_LIMIT if offset is None else None),
399
+ },
400
+ )
401
+
402
+ def write(self, path: str, content: str) -> ToolResult:
403
+ if path not in self._files_read:
404
+ check = self._call("exists", {"path": path})
405
+ if check.success and check.output == "true":
406
+ return ToolResult(
407
+ success=False,
408
+ error=(
409
+ f"File {path} exists but has not been read this session. "
410
+ f"Read it first, or use sandbox_edit for targeted changes."
411
+ ),
412
+ )
413
+ result = self._call("write", {"path": path, "content": content})
414
+ if result.success:
415
+ self._files_read.add(path)
416
+ return result
417
+
418
+ def edit(
419
+ self, path: str, old_str: str, new_str: str, *, replace_all: bool = False
420
+ ) -> ToolResult:
421
+ if old_str == new_str:
422
+ return ToolResult(success=False, error="old_str and new_str are identical.")
423
+ if path not in self._files_read:
424
+ return ToolResult(
425
+ success=False,
426
+ error=f"File {path} has not been read this session. Read it first.",
427
+ )
428
+ return self._call(
429
+ "edit",
430
+ {
431
+ "path": path,
432
+ "old_str": old_str,
433
+ "new_str": new_str,
434
+ "replace_all": replace_all,
435
+ },
436
+ )
437
+
438
+ def glob(self, pattern: str, *, path: str | None = None) -> ToolResult:
439
+ return self._call(
440
+ "glob",
441
+ {
442
+ "pattern": pattern,
443
+ "path": path or self.work_dir,
444
+ },
445
+ )
446
+
447
+ def grep(
448
+ self,
449
+ pattern: str,
450
+ *,
451
+ path: str | None = None,
452
+ include: str | None = None,
453
+ output_mode: str = "files_with_matches",
454
+ case_insensitive: bool = False,
455
+ n: bool = False,
456
+ A: int | None = None,
457
+ B: int | None = None,
458
+ C: int | None = None,
459
+ head_limit: int | None = None,
460
+ ) -> ToolResult:
461
+ return self._call(
462
+ "grep",
463
+ {
464
+ "pattern": pattern,
465
+ "path": path or self.work_dir,
466
+ "include": include,
467
+ "output_mode": output_mode,
468
+ "case_insensitive": case_insensitive,
469
+ "n": n,
470
+ "A": A,
471
+ "B": B,
472
+ "C": C,
473
+ "head_limit": head_limit,
474
+ },
475
+ )
476
+
477
+ # ── Tool schemas & dispatch ───────────────────────────────────
478
+
479
+ TOOLS = {
480
+ "bash": {
481
+ "description": (
482
+ "Run a shell command in the remote sandbox and return stdout/stderr.\n"
483
+ "\n"
484
+ "Commands run in a shell at the working directory (default /app). "
485
+ "Each invocation is independent β€” use files in /app to persist state.\n"
486
+ "\n"
487
+ "AVOID using bash for operations covered by specialized tools:\n"
488
+ "- File reading: use read (not cat/head/tail)\n"
489
+ "- File search: use grep (not grep/rg)\n"
490
+ "- File finding: use glob (not find)\n"
491
+ "- File editing: use edit (not sed/awk)\n"
492
+ "- File writing: use write (not echo/cat <<EOF)\n"
493
+ "\n"
494
+ "For long-running tasks, background them:\n"
495
+ " nohup uv run train.py > /app/train.log 2>&1 &\n"
496
+ "Then check with read on the log file.\n"
497
+ "\n"
498
+ "Chain dependent commands with &&. Independent commands should be "
499
+ "separate bash calls (they can run in parallel).\n"
500
+ "\n"
501
+ "Timeout default 120s, max 600s."
502
+ ),
503
+ "parameters": {
504
+ "type": "object",
505
+ "required": ["command"],
506
+ "additionalProperties": False,
507
+ "properties": {
508
+ "command": {
509
+ "type": "string",
510
+ "description": "The shell command to execute.",
511
+ },
512
+ "description": {
513
+ "type": "string",
514
+ "description": "Short description (5-10 words, active voice). E.g. 'Install dependencies', 'Run training script'.",
515
+ },
516
+ "work_dir": {
517
+ "type": "string",
518
+ "description": "Working directory (default: /app).",
519
+ },
520
+ "timeout": {
521
+ "type": "integer",
522
+ "description": "Timeout in seconds (default: 120, max: 600).",
523
+ },
524
+ },
525
+ },
526
+ },
527
+ "read": {
528
+ "description": (
529
+ "Read file contents with line numbers (cat -n format).\n"
530
+ "\n"
531
+ "Returns the first 2000 lines by default. For large files, use offset/limit "
532
+ "to read a specific range. Line numbers always match the original file.\n"
533
+ "\n"
534
+ "Lines longer than 2000 chars are truncated.\n"
535
+ "Cannot read directories β€” use bash with 'ls' instead."
536
+ ),
537
+ "parameters": {
538
+ "type": "object",
539
+ "required": ["path"],
540
+ "additionalProperties": False,
541
+ "properties": {
542
+ "path": {
543
+ "type": "string",
544
+ "description": "Absolute path to the file to read.",
545
+ },
546
+ "offset": {
547
+ "type": "integer",
548
+ "description": "Start from this line (1-based). Only if file is too large.",
549
+ },
550
+ "limit": {
551
+ "type": "integer",
552
+ "description": "Number of lines to read. Only if file is too large.",
553
+ },
554
+ },
555
+ },
556
+ },
557
+ "write": {
558
+ "description": (
559
+ "Create or overwrite a file. Creates parent directories as needed.\n"
560
+ "\n"
561
+ "For existing files, you MUST read the file first (system enforced). "
562
+ "Prefer edit for modifications."
563
+ ),
564
+ "parameters": {
565
+ "type": "object",
566
+ "required": ["path", "content"],
567
+ "additionalProperties": False,
568
+ "properties": {
569
+ "path": {
570
+ "type": "string",
571
+ "description": "Absolute path to the file to write.",
572
+ },
573
+ "content": {
574
+ "type": "string",
575
+ "description": "Complete file content.",
576
+ },
577
+ },
578
+ },
579
+ },
580
+ "edit": {
581
+ "description": (
582
+ "Targeted edit via exact string replacement.\n"
583
+ "\n"
584
+ "Rules:\n"
585
+ "- old_str must appear EXACTLY once (unless replace_all is true).\n"
586
+ "- Include enough context in old_str for uniqueness.\n"
587
+ "- old_str and new_str must differ.\n"
588
+ "- Preserve indentation exactly.\n"
589
+ "- To delete code, set new_str to empty string.\n"
590
+ "- File MUST have been read this session (system enforced).\n"
591
+ "- Do NOT include line number prefixes in old_str/new_str.\n"
592
+ "\n"
593
+ "Use replace_all=true for batch operations like variable renaming."
594
+ ),
595
+ "parameters": {
596
+ "type": "object",
597
+ "required": ["path", "old_str", "new_str"],
598
+ "additionalProperties": False,
599
+ "properties": {
600
+ "path": {
601
+ "type": "string",
602
+ "description": "Absolute path to the file.",
603
+ },
604
+ "old_str": {
605
+ "type": "string",
606
+ "description": "Exact text to find (must differ from new_str).",
607
+ },
608
+ "new_str": {"type": "string", "description": "Replacement text."},
609
+ "replace_all": {
610
+ "type": "boolean",
611
+ "description": "Replace all occurrences (default: false).",
612
+ "default": False,
613
+ },
614
+ },
615
+ },
616
+ },
617
+ "glob": {
618
+ "description": (
619
+ "Find files by glob pattern, sorted by modification time (newest first).\n"
620
+ "\n"
621
+ "Patterns: * (any), ** (recursive), ? (one char), {a,b}, [abc], [!abc].\n"
622
+ "Examples: '*.py', '*.{json,yaml}', 'test_*'"
623
+ ),
624
+ "parameters": {
625
+ "type": "object",
626
+ "required": ["pattern"],
627
+ "additionalProperties": False,
628
+ "properties": {
629
+ "pattern": {
630
+ "type": "string",
631
+ "description": "Glob pattern to match file names.",
632
+ },
633
+ "path": {
634
+ "type": "string",
635
+ "description": "Directory to search (default: /app). Omit for default.",
636
+ },
637
+ },
638
+ },
639
+ },
640
+ "grep": {
641
+ "description": (
642
+ "Search file contents. ALWAYS use this β€” NEVER bash with grep.\n"
643
+ "\n"
644
+ "Output modes:\n"
645
+ "- 'files_with_matches' (default): file paths only\n"
646
+ "- 'content': matching lines (supports -n, -A/-B/-C context)\n"
647
+ "- 'count': match counts per file\n"
648
+ "\n"
649
+ "Supports regex. Use glob for name matching, grep for content."
650
+ ),
651
+ "parameters": {
652
+ "type": "object",
653
+ "required": ["pattern"],
654
+ "additionalProperties": False,
655
+ "properties": {
656
+ "pattern": {
657
+ "type": "string",
658
+ "description": "Search string or regex.",
659
+ },
660
+ "path": {
661
+ "type": "string",
662
+ "description": "Directory to search (default: /app).",
663
+ },
664
+ "include": {
665
+ "type": "string",
666
+ "description": "Glob filter (e.g. '*.py').",
667
+ },
668
+ "output_mode": {
669
+ "type": "string",
670
+ "enum": ["content", "files_with_matches", "count"],
671
+ "description": "Default: 'files_with_matches'.",
672
+ },
673
+ "-i": {"type": "boolean", "description": "Case-insensitive."},
674
+ "-n": {
675
+ "type": "boolean",
676
+ "description": "Line numbers (content mode only).",
677
+ },
678
+ "-A": {
679
+ "type": "integer",
680
+ "description": "Lines after match (content mode only).",
681
+ },
682
+ "-B": {
683
+ "type": "integer",
684
+ "description": "Lines before match (content mode only).",
685
+ },
686
+ "-C": {
687
+ "type": "integer",
688
+ "description": "Lines around match (content mode only).",
689
+ },
690
+ "head_limit": {
691
+ "type": "integer",
692
+ "description": "Limit output entries.",
693
+ },
694
+ },
695
+ },
696
+ },
697
+ }
698
+
699
+ @classmethod
700
+ def tool_definitions(cls) -> list[dict]:
701
+ return [{"name": name, **spec} for name, spec in cls.TOOLS.items()]
702
+
703
+ def call_tool(self, name: str, arguments: dict[str, Any]) -> ToolResult:
704
+ dispatch = {
705
+ "bash": lambda a: self.bash(
706
+ a["command"],
707
+ work_dir=a.get("work_dir"),
708
+ timeout=a.get("timeout"),
709
+ description=a.get("description"),
710
+ ),
711
+ "read": lambda a: self.read(
712
+ a["path"],
713
+ offset=a.get("offset"),
714
+ limit=a.get("limit"),
715
+ ),
716
+ "write": lambda a: self.write(a["path"], a["content"]),
717
+ "edit": lambda a: self.edit(
718
+ a["path"],
719
+ a["old_str"],
720
+ a["new_str"],
721
+ replace_all=a.get("replace_all", False),
722
+ ),
723
+ "glob": lambda a: self.glob(a["pattern"], path=a.get("path")),
724
+ "grep": lambda a: self.grep(
725
+ a["pattern"],
726
+ path=a.get("path"),
727
+ include=a.get("include"),
728
+ output_mode=a.get("output_mode", "files_with_matches"),
729
+ case_insensitive=a.get("-i", False),
730
+ n=a.get("-n", False),
731
+ A=a.get("-A"),
732
+ B=a.get("-B"),
733
+ C=a.get("-C"),
734
+ head_limit=a.get("head_limit"),
735
+ ),
736
+ }
737
+ fn = dispatch.get(name)
738
+ if not fn:
739
+ return ToolResult(success=False, error=f"Unknown tool: {name}")
740
+ return fn(arguments)
agent/tools/sandbox_tool.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sandbox tools β€” expose the Sandbox client as agent tools.
3
+
4
+ 7 tools total:
5
+ bash, read, write, edit, glob, grep, upload β€” operations on the sandbox
6
+
7
+ Auto-creation: if any operation tool is called without an active sandbox,
8
+ a cpu-basic sandbox is auto-created (no approval needed).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+ import os
15
+ from typing import Any
16
+
17
+ from huggingface_hub import HfApi
18
+
19
+ from agent.core.session import Event
20
+ from agent.tools.sandbox_client import Sandbox
21
+
22
+ # ── Tool name mapping (short agent names β†’ Sandbox client names) ──────
23
+
24
+
25
+ async def _ensure_sandbox(
26
+ session: Any, hardware: str = "cpu-basic", **create_kwargs
27
+ ) -> tuple[Sandbox | None, str | None]:
28
+ """
29
+ Ensure a sandbox exists on the session. Auto-creates with given hardware if needed.
30
+
31
+ Returns:
32
+ (sandbox, error_message) β€” one will be None.
33
+ """
34
+ if session and getattr(session, "sandbox", None):
35
+ return session.sandbox, None
36
+
37
+ if not session:
38
+ return None, "No session available."
39
+
40
+ token = os.environ.get("HF_TOKEN")
41
+ if not token:
42
+ return None, "HF_TOKEN environment variable not set. Cannot create sandbox."
43
+
44
+ api = HfApi(token=token)
45
+ user_info = api.whoami()
46
+ owner = user_info.get("name", user_info.get("user", ""))
47
+ if not owner:
48
+ return None, "Could not determine HF username from token."
49
+
50
+ await session.send_event(
51
+ Event(
52
+ event_type="tool_log",
53
+ data={
54
+ "tool": "sandbox",
55
+ "log": f"Auto-creating sandbox for {owner} ({hardware})...",
56
+ },
57
+ )
58
+ )
59
+
60
+ kwargs = {"owner": owner, "hardware": hardware, "token": token, **create_kwargs}
61
+ sb = await asyncio.to_thread(Sandbox.create, **kwargs)
62
+ session.sandbox = sb
63
+
64
+ await session.send_event(
65
+ Event(
66
+ event_type="tool_log",
67
+ data={"tool": "sandbox", "log": f"Sandbox ready: {sb.space_id} ({sb.url})"},
68
+ )
69
+ )
70
+
71
+ return sb, None
72
+
73
+
74
+ def _make_tool_handler(sandbox_tool_name: str):
75
+ """Factory: create a handler for a sandbox operation tool."""
76
+
77
+ async def handler(args: dict[str, Any], session: Any = None) -> tuple[str, bool]:
78
+ # Auto-create sandbox if not present
79
+ try:
80
+ sb, error = await _ensure_sandbox(session)
81
+ except Exception as e:
82
+ return f"Failed to auto-create sandbox: {e}", False
83
+
84
+ if error:
85
+ return error, False
86
+
87
+ try:
88
+ result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
89
+ if result.success:
90
+ return result.output or "(no output)", True
91
+ else:
92
+ error_msg = result.error or "Unknown error"
93
+ output = result.output
94
+ if output:
95
+ return f"{output}\n\nERROR: {error_msg}", False
96
+ return f"ERROR: {error_msg}", False
97
+ except Exception as e:
98
+ return f"Sandbox operation failed: {e}", False
99
+
100
+ return handler
101
+
102
+
103
+ def get_sandbox_tools():
104
+ """Return all 8 sandbox ToolSpecs."""
105
+ from agent.core.tools import ToolSpec
106
+
107
+ tools = []
108
+
109
+ for name in Sandbox.TOOLS.keys():
110
+ spec = Sandbox.TOOLS[name]
111
+ tools.append(
112
+ ToolSpec(
113
+ name=name,
114
+ description=spec["description"],
115
+ parameters=spec["parameters"],
116
+ handler=_make_tool_handler(name),
117
+ )
118
+ )
119
+
120
+ return tools