Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
786b8bc
1
Parent(s): 2464c2e
sandbox working and agent can use it
Browse files- agent/core/agent_loop.py +22 -11
- agent/prompts/system_prompt_v2.yaml +3 -3
- agent/tools/sandbox_client.py +114 -144
- agent/tools/sandbox_tool.py +12 -16
agent/core/agent_loop.py
CHANGED
|
@@ -38,7 +38,9 @@ def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
|
|
| 38 |
return True, None
|
| 39 |
|
| 40 |
|
| 41 |
-
def _needs_approval(
|
|
|
|
|
|
|
| 42 |
"""Check if a tool call requires user approval before execution."""
|
| 43 |
# Yolo mode: skip all approvals
|
| 44 |
if config and config.yolo_mode:
|
|
@@ -49,28 +51,31 @@ def _needs_approval(tool_name: str, tool_args: dict, config: Config | None = Non
|
|
| 49 |
if not args_valid:
|
| 50 |
return False
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
if tool_name in SANDBOX_TOOLS:
|
| 55 |
-
return tool_name == "sandbox_create"
|
| 56 |
|
| 57 |
if tool_name == "hf_jobs":
|
| 58 |
operation = tool_args.get("operation", "")
|
| 59 |
if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
|
| 60 |
return False
|
| 61 |
-
|
| 62 |
# Check if this is a CPU-only job
|
| 63 |
# hardware_flavor is at top level of tool_args, not nested in args
|
| 64 |
-
hardware_flavor =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
is_cpu_job = hardware_flavor in CPU_FLAVORS
|
| 66 |
-
|
| 67 |
if is_cpu_job:
|
| 68 |
if config and not config.confirm_cpu_jobs:
|
| 69 |
return False
|
| 70 |
return True
|
| 71 |
-
|
| 72 |
return True
|
| 73 |
-
|
| 74 |
# Check for file upload operations (hf_private_repos or other tools)
|
| 75 |
if tool_name == "hf_private_repos":
|
| 76 |
operation = tool_args.get("operation", "")
|
|
@@ -91,7 +96,13 @@ def _needs_approval(tool_name: str, tool_args: dict, config: Config | None = Non
|
|
| 91 |
# hf_repo_git: destructive operations require approval
|
| 92 |
if tool_name == "hf_repo_git":
|
| 93 |
operation = tool_args.get("operation", "")
|
| 94 |
-
if operation in [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
return True
|
| 96 |
|
| 97 |
return False
|
|
|
|
| 38 |
return True, None
|
| 39 |
|
| 40 |
|
| 41 |
+
def _needs_approval(
|
| 42 |
+
tool_name: str, tool_args: dict, config: Config | None = None
|
| 43 |
+
) -> bool:
|
| 44 |
"""Check if a tool call requires user approval before execution."""
|
| 45 |
# Yolo mode: skip all approvals
|
| 46 |
if config and config.yolo_mode:
|
|
|
|
| 51 |
if not args_valid:
|
| 52 |
return False
|
| 53 |
|
| 54 |
+
if tool_name == "sandbox_create":
|
| 55 |
+
return True
|
|
|
|
|
|
|
| 56 |
|
| 57 |
if tool_name == "hf_jobs":
|
| 58 |
operation = tool_args.get("operation", "")
|
| 59 |
if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
|
| 60 |
return False
|
| 61 |
+
|
| 62 |
# Check if this is a CPU-only job
|
| 63 |
# hardware_flavor is at top level of tool_args, not nested in args
|
| 64 |
+
hardware_flavor = (
|
| 65 |
+
tool_args.get("hardware_flavor")
|
| 66 |
+
or tool_args.get("flavor")
|
| 67 |
+
or tool_args.get("hardware")
|
| 68 |
+
or "cpu-basic"
|
| 69 |
+
)
|
| 70 |
is_cpu_job = hardware_flavor in CPU_FLAVORS
|
| 71 |
+
|
| 72 |
if is_cpu_job:
|
| 73 |
if config and not config.confirm_cpu_jobs:
|
| 74 |
return False
|
| 75 |
return True
|
| 76 |
+
|
| 77 |
return True
|
| 78 |
+
|
| 79 |
# Check for file upload operations (hf_private_repos or other tools)
|
| 80 |
if tool_name == "hf_private_repos":
|
| 81 |
operation = tool_args.get("operation", "")
|
|
|
|
| 96 |
# hf_repo_git: destructive operations require approval
|
| 97 |
if tool_name == "hf_repo_git":
|
| 98 |
operation = tool_args.get("operation", "")
|
| 99 |
+
if operation in [
|
| 100 |
+
"delete_branch",
|
| 101 |
+
"delete_tag",
|
| 102 |
+
"merge_pr",
|
| 103 |
+
"create_repo",
|
| 104 |
+
"update_repo",
|
| 105 |
+
]:
|
| 106 |
return True
|
| 107 |
|
| 108 |
return False
|
agent/prompts/system_prompt_v2.yaml
CHANGED
|
@@ -345,15 +345,15 @@ system_prompt: |
|
|
| 345 |
|
| 346 |
**sandbox_create:**
|
| 347 |
- Persistent remote Linux environment on HF Spaces for interactive development
|
| 348 |
-
- First call sandbox_create with hardware choice, then use bash/read/write/edit
|
| 349 |
- Hardware: cpu-basic (free tier), cpu-upgrade (8vCPU/32GB), t4-small (16GB GPU), a10g-small (24GB GPU), a10g-large (24GB GPU + 46GB RAM), a100-large (80GB GPU)
|
| 350 |
- Use for: iterative development, debugging, multi-step workflows, testing code, installing packages
|
| 351 |
- Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training
|
| 352 |
|
| 353 |
-
**bash / read / write / edit /
|
| 354 |
- Available after sandbox_create — no additional approvals needed
|
| 355 |
- Same semantics as local file/shell operations, but run on the remote sandbox
|
| 356 |
-
- bash: run shell commands; read/write/edit: file operations;
|
| 357 |
|
| 358 |
**hf_private_repos:**
|
| 359 |
- Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)
|
|
|
|
| 345 |
|
| 346 |
**sandbox_create:**
|
| 347 |
- Persistent remote Linux environment on HF Spaces for interactive development
|
| 348 |
+
- First call sandbox_create with hardware choice, then use bash/read/write/edit freely
|
| 349 |
- Hardware: cpu-basic (free tier), cpu-upgrade (8vCPU/32GB), t4-small (16GB GPU), a10g-small (24GB GPU), a10g-large (24GB GPU + 46GB RAM), a100-large (80GB GPU)
|
| 350 |
- Use for: iterative development, debugging, multi-step workflows, testing code, installing packages
|
| 351 |
- Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training
|
| 352 |
|
| 353 |
+
**bash / read / write / edit / upload:**
|
| 354 |
- Available after sandbox_create — no additional approvals needed
|
| 355 |
- Same semantics as local file/shell operations, but run on the remote sandbox
|
| 356 |
+
- bash: run shell commands; read/write/edit: file operations; upload: transfer files
|
| 357 |
|
| 358 |
**hf_private_repos:**
|
| 359 |
- Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)
|
agent/tools/sandbox_client.py
CHANGED
|
@@ -31,7 +31,7 @@ Lifecycle:
|
|
| 31 |
sb.bash("python train.py")
|
| 32 |
# Space deleted on exit
|
| 33 |
|
| 34 |
-
Tools: bash, read, write, edit,
|
| 35 |
"""
|
| 36 |
|
| 37 |
from __future__ import annotations
|
|
@@ -42,7 +42,6 @@ import sys
|
|
| 42 |
import time
|
| 43 |
import uuid
|
| 44 |
from dataclasses import dataclass, field
|
| 45 |
-
from pathlib import Path
|
| 46 |
from typing import Any
|
| 47 |
|
| 48 |
import httpx
|
|
@@ -74,17 +73,17 @@ RUN apt-get update && \\
|
|
| 74 |
apt-get install -y \\
|
| 75 |
bash git git-lfs wget curl procps \\
|
| 76 |
htop vim nano jq tmux \\
|
| 77 |
-
build-essential
|
| 78 |
rm -rf /var/lib/apt/lists/*
|
| 79 |
|
| 80 |
-
# Install server dependencies (as root, before USER switch)
|
| 81 |
RUN uv pip install --system fastapi uvicorn python-multipart
|
| 82 |
|
| 83 |
RUN useradd -m -u 1000 user
|
| 84 |
USER user
|
| 85 |
|
| 86 |
ENV HOME=/home/user \\
|
| 87 |
-
PATH=/home/user/.local/bin:$PATH
|
|
|
|
| 88 |
|
| 89 |
WORKDIR /app
|
| 90 |
COPY --chown=user . /app
|
|
@@ -94,6 +93,114 @@ EXPOSE 7860
|
|
| 94 |
CMD ["python", "sandbox_server.py"]
|
| 95 |
"""
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
@dataclass
|
| 99 |
class ToolResult:
|
|
@@ -235,10 +342,7 @@ class Sandbox:
|
|
| 235 |
|
| 236 |
@staticmethod
|
| 237 |
def _setup_server(space_id: str, api: HfApi) -> None:
|
| 238 |
-
"""Upload
|
| 239 |
-
server_path = Path(__file__).parent / "example_sandbox_server.py"
|
| 240 |
-
server_code = server_path.read_text()
|
| 241 |
-
|
| 242 |
print(f"Uploading sandbox server to {space_id}...")
|
| 243 |
api.create_commit(
|
| 244 |
repo_id=space_id,
|
|
@@ -246,7 +350,7 @@ class Sandbox:
|
|
| 246 |
operations=[
|
| 247 |
CommitOperationAdd(
|
| 248 |
path_in_repo="sandbox_server.py",
|
| 249 |
-
path_or_fileobj=io.BytesIO(
|
| 250 |
),
|
| 251 |
CommitOperationAdd(
|
| 252 |
path_in_repo="Dockerfile",
|
|
@@ -435,45 +539,6 @@ class Sandbox:
|
|
| 435 |
},
|
| 436 |
)
|
| 437 |
|
| 438 |
-
def glob(self, pattern: str, *, path: str | None = None) -> ToolResult:
|
| 439 |
-
return self._call(
|
| 440 |
-
"glob",
|
| 441 |
-
{
|
| 442 |
-
"pattern": pattern,
|
| 443 |
-
"path": path or self.work_dir,
|
| 444 |
-
},
|
| 445 |
-
)
|
| 446 |
-
|
| 447 |
-
def grep(
|
| 448 |
-
self,
|
| 449 |
-
pattern: str,
|
| 450 |
-
*,
|
| 451 |
-
path: str | None = None,
|
| 452 |
-
include: str | None = None,
|
| 453 |
-
output_mode: str = "files_with_matches",
|
| 454 |
-
case_insensitive: bool = False,
|
| 455 |
-
n: bool = False,
|
| 456 |
-
A: int | None = None,
|
| 457 |
-
B: int | None = None,
|
| 458 |
-
C: int | None = None,
|
| 459 |
-
head_limit: int | None = None,
|
| 460 |
-
) -> ToolResult:
|
| 461 |
-
return self._call(
|
| 462 |
-
"grep",
|
| 463 |
-
{
|
| 464 |
-
"pattern": pattern,
|
| 465 |
-
"path": path or self.work_dir,
|
| 466 |
-
"include": include,
|
| 467 |
-
"output_mode": output_mode,
|
| 468 |
-
"case_insensitive": case_insensitive,
|
| 469 |
-
"n": n,
|
| 470 |
-
"A": A,
|
| 471 |
-
"B": B,
|
| 472 |
-
"C": C,
|
| 473 |
-
"head_limit": head_limit,
|
| 474 |
-
},
|
| 475 |
-
)
|
| 476 |
-
|
| 477 |
# ── Tool schemas & dispatch ───────────────────────────────────
|
| 478 |
|
| 479 |
TOOLS = {
|
|
@@ -486,8 +551,6 @@ class Sandbox:
|
|
| 486 |
"\n"
|
| 487 |
"AVOID using bash for operations covered by specialized tools:\n"
|
| 488 |
"- File reading: use read (not cat/head/tail)\n"
|
| 489 |
-
"- File search: use grep (not grep/rg)\n"
|
| 490 |
-
"- File finding: use glob (not find)\n"
|
| 491 |
"- File editing: use edit (not sed/awk)\n"
|
| 492 |
"- File writing: use write (not echo/cat <<EOF)\n"
|
| 493 |
"\n"
|
|
@@ -614,86 +677,6 @@ class Sandbox:
|
|
| 614 |
},
|
| 615 |
},
|
| 616 |
},
|
| 617 |
-
"glob": {
|
| 618 |
-
"description": (
|
| 619 |
-
"Find files by glob pattern, sorted by modification time (newest first).\n"
|
| 620 |
-
"\n"
|
| 621 |
-
"Patterns: * (any), ** (recursive), ? (one char), {a,b}, [abc], [!abc].\n"
|
| 622 |
-
"Examples: '*.py', '*.{json,yaml}', 'test_*'"
|
| 623 |
-
),
|
| 624 |
-
"parameters": {
|
| 625 |
-
"type": "object",
|
| 626 |
-
"required": ["pattern"],
|
| 627 |
-
"additionalProperties": False,
|
| 628 |
-
"properties": {
|
| 629 |
-
"pattern": {
|
| 630 |
-
"type": "string",
|
| 631 |
-
"description": "Glob pattern to match file names.",
|
| 632 |
-
},
|
| 633 |
-
"path": {
|
| 634 |
-
"type": "string",
|
| 635 |
-
"description": "Directory to search (default: /app). Omit for default.",
|
| 636 |
-
},
|
| 637 |
-
},
|
| 638 |
-
},
|
| 639 |
-
},
|
| 640 |
-
"grep": {
|
| 641 |
-
"description": (
|
| 642 |
-
"Search file contents. ALWAYS use this — NEVER bash with grep.\n"
|
| 643 |
-
"\n"
|
| 644 |
-
"Output modes:\n"
|
| 645 |
-
"- 'files_with_matches' (default): file paths only\n"
|
| 646 |
-
"- 'content': matching lines (supports -n, -A/-B/-C context)\n"
|
| 647 |
-
"- 'count': match counts per file\n"
|
| 648 |
-
"\n"
|
| 649 |
-
"Supports regex. Use glob for name matching, grep for content."
|
| 650 |
-
),
|
| 651 |
-
"parameters": {
|
| 652 |
-
"type": "object",
|
| 653 |
-
"required": ["pattern"],
|
| 654 |
-
"additionalProperties": False,
|
| 655 |
-
"properties": {
|
| 656 |
-
"pattern": {
|
| 657 |
-
"type": "string",
|
| 658 |
-
"description": "Search string or regex.",
|
| 659 |
-
},
|
| 660 |
-
"path": {
|
| 661 |
-
"type": "string",
|
| 662 |
-
"description": "Directory to search (default: /app).",
|
| 663 |
-
},
|
| 664 |
-
"include": {
|
| 665 |
-
"type": "string",
|
| 666 |
-
"description": "Glob filter (e.g. '*.py').",
|
| 667 |
-
},
|
| 668 |
-
"output_mode": {
|
| 669 |
-
"type": "string",
|
| 670 |
-
"enum": ["content", "files_with_matches", "count"],
|
| 671 |
-
"description": "Default: 'files_with_matches'.",
|
| 672 |
-
},
|
| 673 |
-
"-i": {"type": "boolean", "description": "Case-insensitive."},
|
| 674 |
-
"-n": {
|
| 675 |
-
"type": "boolean",
|
| 676 |
-
"description": "Line numbers (content mode only).",
|
| 677 |
-
},
|
| 678 |
-
"-A": {
|
| 679 |
-
"type": "integer",
|
| 680 |
-
"description": "Lines after match (content mode only).",
|
| 681 |
-
},
|
| 682 |
-
"-B": {
|
| 683 |
-
"type": "integer",
|
| 684 |
-
"description": "Lines before match (content mode only).",
|
| 685 |
-
},
|
| 686 |
-
"-C": {
|
| 687 |
-
"type": "integer",
|
| 688 |
-
"description": "Lines around match (content mode only).",
|
| 689 |
-
},
|
| 690 |
-
"head_limit": {
|
| 691 |
-
"type": "integer",
|
| 692 |
-
"description": "Limit output entries.",
|
| 693 |
-
},
|
| 694 |
-
},
|
| 695 |
-
},
|
| 696 |
-
},
|
| 697 |
}
|
| 698 |
|
| 699 |
@classmethod
|
|
@@ -720,19 +703,6 @@ class Sandbox:
|
|
| 720 |
a["new_str"],
|
| 721 |
replace_all=a.get("replace_all", False),
|
| 722 |
),
|
| 723 |
-
"glob": lambda a: self.glob(a["pattern"], path=a.get("path")),
|
| 724 |
-
"grep": lambda a: self.grep(
|
| 725 |
-
a["pattern"],
|
| 726 |
-
path=a.get("path"),
|
| 727 |
-
include=a.get("include"),
|
| 728 |
-
output_mode=a.get("output_mode", "files_with_matches"),
|
| 729 |
-
case_insensitive=a.get("-i", False),
|
| 730 |
-
n=a.get("-n", False),
|
| 731 |
-
A=a.get("-A"),
|
| 732 |
-
B=a.get("-B"),
|
| 733 |
-
C=a.get("-C"),
|
| 734 |
-
head_limit=a.get("head_limit"),
|
| 735 |
-
),
|
| 736 |
}
|
| 737 |
fn = dispatch.get(name)
|
| 738 |
if not fn:
|
|
|
|
| 31 |
sb.bash("python train.py")
|
| 32 |
# Space deleted on exit
|
| 33 |
|
| 34 |
+
Tools: bash, read, write, edit, upload
|
| 35 |
"""
|
| 36 |
|
| 37 |
from __future__ import annotations
|
|
|
|
| 42 |
import time
|
| 43 |
import uuid
|
| 44 |
from dataclasses import dataclass, field
|
|
|
|
| 45 |
from typing import Any
|
| 46 |
|
| 47 |
import httpx
|
|
|
|
| 73 |
apt-get install -y \\
|
| 74 |
bash git git-lfs wget curl procps \\
|
| 75 |
htop vim nano jq tmux \\
|
| 76 |
+
build-essential && \\
|
| 77 |
rm -rf /var/lib/apt/lists/*
|
| 78 |
|
|
|
|
| 79 |
RUN uv pip install --system fastapi uvicorn python-multipart
|
| 80 |
|
| 81 |
RUN useradd -m -u 1000 user
|
| 82 |
USER user
|
| 83 |
|
| 84 |
ENV HOME=/home/user \\
|
| 85 |
+
PATH=/home/user/.local/bin:$PATH \\
|
| 86 |
+
PIP_USER=1
|
| 87 |
|
| 88 |
WORKDIR /app
|
| 89 |
COPY --chown=user . /app
|
|
|
|
| 93 |
CMD ["python", "sandbox_server.py"]
|
| 94 |
"""
|
| 95 |
|
| 96 |
+
_SANDBOX_SERVER = '''\
|
| 97 |
+
"""Minimal FastAPI server for sandbox operations."""
|
| 98 |
+
import os, subprocess, pathlib
|
| 99 |
+
from fastapi import FastAPI
|
| 100 |
+
from pydantic import BaseModel
|
| 101 |
+
from typing import Optional
|
| 102 |
+
import uvicorn
|
| 103 |
+
|
| 104 |
+
app = FastAPI()
|
| 105 |
+
|
| 106 |
+
class BashReq(BaseModel):
|
| 107 |
+
command: str
|
| 108 |
+
work_dir: str = "/app"
|
| 109 |
+
timeout: int = 120
|
| 110 |
+
|
| 111 |
+
class ReadReq(BaseModel):
|
| 112 |
+
path: str
|
| 113 |
+
offset: Optional[int] = None
|
| 114 |
+
limit: Optional[int] = 2000
|
| 115 |
+
|
| 116 |
+
class WriteReq(BaseModel):
|
| 117 |
+
path: str
|
| 118 |
+
content: str
|
| 119 |
+
|
| 120 |
+
class EditReq(BaseModel):
|
| 121 |
+
path: str
|
| 122 |
+
old_str: str
|
| 123 |
+
new_str: str
|
| 124 |
+
replace_all: bool = False
|
| 125 |
+
|
| 126 |
+
class ExistsReq(BaseModel):
|
| 127 |
+
path: str
|
| 128 |
+
|
| 129 |
+
@app.get("/api/health")
|
| 130 |
+
def health():
|
| 131 |
+
return {"status": "ok"}
|
| 132 |
+
|
| 133 |
+
@app.post("/api/bash")
|
| 134 |
+
def bash(req: BashReq):
|
| 135 |
+
try:
|
| 136 |
+
r = subprocess.run(
|
| 137 |
+
req.command, shell=True, capture_output=True, text=True,
|
| 138 |
+
cwd=req.work_dir, timeout=req.timeout,
|
| 139 |
+
)
|
| 140 |
+
output = r.stdout + r.stderr
|
| 141 |
+
if len(output) > 30000:
|
| 142 |
+
output = output[:30000] + "\\n... (truncated)"
|
| 143 |
+
return {"success": r.returncode == 0, "output": output, "error": "" if r.returncode == 0 else f"Exit code {r.returncode}"}
|
| 144 |
+
except subprocess.TimeoutExpired:
|
| 145 |
+
return {"success": False, "output": "", "error": f"Timeout after {req.timeout}s"}
|
| 146 |
+
except Exception as e:
|
| 147 |
+
return {"success": False, "output": "", "error": str(e)}
|
| 148 |
+
|
| 149 |
+
@app.post("/api/read")
|
| 150 |
+
def read(req: ReadReq):
|
| 151 |
+
try:
|
| 152 |
+
p = pathlib.Path(req.path)
|
| 153 |
+
if not p.exists():
|
| 154 |
+
return {"success": False, "output": "", "error": f"File not found: {req.path}"}
|
| 155 |
+
if p.is_dir():
|
| 156 |
+
return {"success": False, "output": "", "error": f"Is a directory: {req.path}"}
|
| 157 |
+
lines = p.read_text().splitlines()
|
| 158 |
+
start = (req.offset or 1) - 1
|
| 159 |
+
end = start + (req.limit or len(lines))
|
| 160 |
+
selected = lines[start:end]
|
| 161 |
+
numbered = "\\n".join(f"{start + i + 1}\\t{line}" for i, line in enumerate(selected))
|
| 162 |
+
return {"success": True, "output": numbered, "error": ""}
|
| 163 |
+
except Exception as e:
|
| 164 |
+
return {"success": False, "output": "", "error": str(e)}
|
| 165 |
+
|
| 166 |
+
@app.post("/api/write")
|
| 167 |
+
def write(req: WriteReq):
|
| 168 |
+
try:
|
| 169 |
+
p = pathlib.Path(req.path)
|
| 170 |
+
p.parent.mkdir(parents=True, exist_ok=True)
|
| 171 |
+
p.write_text(req.content)
|
| 172 |
+
return {"success": True, "output": f"Wrote {len(req.content)} bytes to {req.path}", "error": ""}
|
| 173 |
+
except Exception as e:
|
| 174 |
+
return {"success": False, "output": "", "error": str(e)}
|
| 175 |
+
|
| 176 |
+
@app.post("/api/edit")
|
| 177 |
+
def edit(req: EditReq):
|
| 178 |
+
try:
|
| 179 |
+
p = pathlib.Path(req.path)
|
| 180 |
+
if not p.exists():
|
| 181 |
+
return {"success": False, "output": "", "error": f"File not found: {req.path}"}
|
| 182 |
+
content = p.read_text()
|
| 183 |
+
if req.old_str not in content:
|
| 184 |
+
return {"success": False, "output": "", "error": f"old_str not found in {req.path}"}
|
| 185 |
+
if not req.replace_all and content.count(req.old_str) > 1:
|
| 186 |
+
return {"success": False, "output": "", "error": f"old_str appears {content.count(req.old_str)} times. Use replace_all=true or provide more context."}
|
| 187 |
+
if req.replace_all:
|
| 188 |
+
new_content = content.replace(req.old_str, req.new_str)
|
| 189 |
+
else:
|
| 190 |
+
new_content = content.replace(req.old_str, req.new_str, 1)
|
| 191 |
+
p.write_text(new_content)
|
| 192 |
+
return {"success": True, "output": f"Edited {req.path}", "error": ""}
|
| 193 |
+
except Exception as e:
|
| 194 |
+
return {"success": False, "output": "", "error": str(e)}
|
| 195 |
+
|
| 196 |
+
@app.post("/api/exists")
|
| 197 |
+
def exists(req: ExistsReq):
|
| 198 |
+
return {"success": True, "output": str(pathlib.Path(req.path).exists()).lower(), "error": ""}
|
| 199 |
+
|
| 200 |
+
if __name__ == "__main__":
|
| 201 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 202 |
+
'''
|
| 203 |
+
|
| 204 |
|
| 205 |
@dataclass
|
| 206 |
class ToolResult:
|
|
|
|
| 342 |
|
| 343 |
@staticmethod
|
| 344 |
def _setup_server(space_id: str, api: HfApi) -> None:
|
| 345 |
+
"""Upload embedded sandbox server + Dockerfile to the Space (single commit)."""
|
|
|
|
|
|
|
|
|
|
| 346 |
print(f"Uploading sandbox server to {space_id}...")
|
| 347 |
api.create_commit(
|
| 348 |
repo_id=space_id,
|
|
|
|
| 350 |
operations=[
|
| 351 |
CommitOperationAdd(
|
| 352 |
path_in_repo="sandbox_server.py",
|
| 353 |
+
path_or_fileobj=io.BytesIO(_SANDBOX_SERVER.encode()),
|
| 354 |
),
|
| 355 |
CommitOperationAdd(
|
| 356 |
path_in_repo="Dockerfile",
|
|
|
|
| 539 |
},
|
| 540 |
)
|
| 541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
# ── Tool schemas & dispatch ───────────────────────────────────
|
| 543 |
|
| 544 |
TOOLS = {
|
|
|
|
| 551 |
"\n"
|
| 552 |
"AVOID using bash for operations covered by specialized tools:\n"
|
| 553 |
"- File reading: use read (not cat/head/tail)\n"
|
|
|
|
|
|
|
| 554 |
"- File editing: use edit (not sed/awk)\n"
|
| 555 |
"- File writing: use write (not echo/cat <<EOF)\n"
|
| 556 |
"\n"
|
|
|
|
| 677 |
},
|
| 678 |
},
|
| 679 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
}
|
| 681 |
|
| 682 |
@classmethod
|
|
|
|
| 703 |
a["new_str"],
|
| 704 |
replace_all=a.get("replace_all", False),
|
| 705 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
}
|
| 707 |
fn = dispatch.get(name)
|
| 708 |
if not fn:
|
agent/tools/sandbox_tool.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
Sandbox tools — expose the Sandbox client as agent tools.
|
| 3 |
|
| 4 |
-
|
| 5 |
sandbox_create — explicit sandbox creation (requires approval)
|
| 6 |
-
bash, read, write, edit
|
| 7 |
|
| 8 |
If any operation tool is called without an active sandbox,
|
| 9 |
a cpu-basic sandbox is auto-created (no approval needed).
|
|
@@ -78,12 +78,14 @@ SANDBOX_CREATE_TOOL_SPEC = {
|
|
| 78 |
"name": "sandbox_create",
|
| 79 |
"description": (
|
| 80 |
"Create a persistent remote Linux sandbox on HF Spaces for interactive development.\n"
|
| 81 |
-
"YOU MUST DO THIS BEFORE USING bash/read/write/edit
|
| 82 |
"\n"
|
| 83 |
-
"Spins up a new sandbox
|
| 84 |
-
"install packages, and debug iteratively. The sandbox persists across tool calls within "
|
| 85 |
-
"
|
| 86 |
-
"You can choose from the following hardware tiers
|
|
|
|
|
|
|
| 87 |
"Use sandbox for: iterative development, debugging, multi-step workflows, testing code.\n"
|
| 88 |
"Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training.\n"
|
| 89 |
),
|
|
@@ -101,10 +103,6 @@ SANDBOX_CREATE_TOOL_SPEC = {
|
|
| 101 |
"type": "boolean",
|
| 102 |
"description": "If true, create a private Space",
|
| 103 |
},
|
| 104 |
-
"sleep_time": {
|
| 105 |
-
"type": "integer",
|
| 106 |
-
"description": "Auto-sleep after N seconds of inactivity",
|
| 107 |
-
},
|
| 108 |
},
|
| 109 |
},
|
| 110 |
}
|
|
@@ -120,15 +118,13 @@ async def sandbox_create_handler(
|
|
| 120 |
return (
|
| 121 |
f"Sandbox already active: {sb.space_id}\n"
|
| 122 |
f"URL: {sb.url}\n"
|
| 123 |
-
f"Use bash/read/write/edit
|
| 124 |
), True
|
| 125 |
|
| 126 |
hardware = args.get("hardware", "cpu-basic")
|
| 127 |
create_kwargs = {}
|
| 128 |
if "private" in args:
|
| 129 |
create_kwargs["private"] = args["private"]
|
| 130 |
-
if "sleep_time" in args:
|
| 131 |
-
create_kwargs["sleep_time"] = args["sleep_time"]
|
| 132 |
|
| 133 |
try:
|
| 134 |
sb, error = await _ensure_sandbox(session, hardware=hardware, **create_kwargs)
|
|
@@ -142,7 +138,7 @@ async def sandbox_create_handler(
|
|
| 142 |
f"Sandbox created: {sb.space_id}\n"
|
| 143 |
f"URL: {sb.url}\n"
|
| 144 |
f"Hardware: {hardware}\n"
|
| 145 |
-
f"Use bash/read/write/edit
|
| 146 |
), True
|
| 147 |
|
| 148 |
|
|
@@ -176,7 +172,7 @@ def _make_tool_handler(sandbox_tool_name: str):
|
|
| 176 |
|
| 177 |
|
| 178 |
def get_sandbox_tools():
|
| 179 |
-
"""Return all
|
| 180 |
from agent.core.tools import ToolSpec
|
| 181 |
|
| 182 |
tools = []
|
|
|
|
| 1 |
"""
|
| 2 |
Sandbox tools — expose the Sandbox client as agent tools.
|
| 3 |
|
| 4 |
+
5 tools total:
|
| 5 |
sandbox_create — explicit sandbox creation (requires approval)
|
| 6 |
+
bash, read, write, edit — operations on the sandbox
|
| 7 |
|
| 8 |
If any operation tool is called without an active sandbox,
|
| 9 |
a cpu-basic sandbox is auto-created (no approval needed).
|
|
|
|
| 78 |
"name": "sandbox_create",
|
| 79 |
"description": (
|
| 80 |
"Create a persistent remote Linux sandbox on HF Spaces for interactive development.\n"
|
| 81 |
+
"YOU MUST DO THIS BEFORE USING bash/read/write/edit tools.\n"
|
| 82 |
"\n"
|
| 83 |
+
"Spins up a new sandbox with a given hardware tier where you can run commands, read/write/edit files, "
|
| 84 |
+
"install packages, and debug iteratively. The sandbox persists across tool calls within the session."
|
| 85 |
+
"\n"
|
| 86 |
+
"You can choose from the following hardware tiers (GPU is required for model development or other tasks that benefit from and utilize the GPU): "
|
| 87 |
+
+ ", ".join([e.value for e in SpaceHardware])
|
| 88 |
+
+ ".\n"
|
| 89 |
"Use sandbox for: iterative development, debugging, multi-step workflows, testing code.\n"
|
| 90 |
"Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training.\n"
|
| 91 |
),
|
|
|
|
| 103 |
"type": "boolean",
|
| 104 |
"description": "If true, create a private Space",
|
| 105 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
},
|
| 107 |
},
|
| 108 |
}
|
|
|
|
| 118 |
return (
|
| 119 |
f"Sandbox already active: {sb.space_id}\n"
|
| 120 |
f"URL: {sb.url}\n"
|
| 121 |
+
f"Use bash/read/write/edit to interact with it."
|
| 122 |
), True
|
| 123 |
|
| 124 |
hardware = args.get("hardware", "cpu-basic")
|
| 125 |
create_kwargs = {}
|
| 126 |
if "private" in args:
|
| 127 |
create_kwargs["private"] = args["private"]
|
|
|
|
|
|
|
| 128 |
|
| 129 |
try:
|
| 130 |
sb, error = await _ensure_sandbox(session, hardware=hardware, **create_kwargs)
|
|
|
|
| 138 |
f"Sandbox created: {sb.space_id}\n"
|
| 139 |
f"URL: {sb.url}\n"
|
| 140 |
f"Hardware: {hardware}\n"
|
| 141 |
+
f"Use bash/read/write/edit to interact with it."
|
| 142 |
), True
|
| 143 |
|
| 144 |
|
|
|
|
| 172 |
|
| 173 |
|
| 174 |
def get_sandbox_tools():
|
| 175 |
+
"""Return all 5 sandbox ToolSpecs (sandbox_create + 4 operation tools)."""
|
| 176 |
from agent.core.tools import ToolSpec
|
| 177 |
|
| 178 |
tools = []
|