Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

akseljoonas HF Staff commited on Feb 25

Commit

786b8bc

1 Parent(s): 2464c2e

sandbox working and agent can use it

Browse files

Files changed (4) hide show

agent/core/agent_loop.py +22 -11
agent/prompts/system_prompt_v2.yaml +3 -3
agent/tools/sandbox_client.py +114 -144
agent/tools/sandbox_tool.py +12 -16

agent/core/agent_loop.py CHANGED Viewed

@@ -38,7 +38,9 @@ def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
     return True, None
-def _needs_approval(tool_name: str, tool_args: dict, config: Config | None = None) -> bool:
     """Check if a tool call requires user approval before execution."""
     # Yolo mode: skip all approvals
     if config and config.yolo_mode:
@@ -49,28 +51,31 @@ def _needs_approval(tool_name: str, tool_args: dict, config: Config | None = Non
     if not args_valid:
         return False
-    # Sandbox tools: only sandbox_create requires approval
-    SANDBOX_TOOLS = {"sandbox_create", "bash", "read", "write", "edit", "glob", "grep"}
-    if tool_name in SANDBOX_TOOLS:
-        return tool_name == "sandbox_create"
     if tool_name == "hf_jobs":
         operation = tool_args.get("operation", "")
         if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
             return False
         # Check if this is a CPU-only job
         # hardware_flavor is at top level of tool_args, not nested in args
-        hardware_flavor = tool_args.get("hardware_flavor") or tool_args.get("flavor") or tool_args.get("hardware") or "cpu-basic"
         is_cpu_job = hardware_flavor in CPU_FLAVORS
         if is_cpu_job:
             if config and not config.confirm_cpu_jobs:
                 return False
             return True
         return True
     # Check for file upload operations (hf_private_repos or other tools)
     if tool_name == "hf_private_repos":
         operation = tool_args.get("operation", "")
@@ -91,7 +96,13 @@ def _needs_approval(tool_name: str, tool_args: dict, config: Config | None = Non
     # hf_repo_git: destructive operations require approval
     if tool_name == "hf_repo_git":
         operation = tool_args.get("operation", "")
-        if operation in ["delete_branch", "delete_tag", "merge_pr", "create_repo", "update_repo"]:
             return True
     return False

     return True, None
+def _needs_approval(
+    tool_name: str, tool_args: dict, config: Config | None = None
+) -> bool:
     """Check if a tool call requires user approval before execution."""
     # Yolo mode: skip all approvals
     if config and config.yolo_mode:
     if not args_valid:
         return False
+    if tool_name == "sandbox_create":
+        return True
     if tool_name == "hf_jobs":
         operation = tool_args.get("operation", "")
         if operation not in ["run", "uv", "scheduled run", "scheduled uv"]:
             return False
         # Check if this is a CPU-only job
         # hardware_flavor is at top level of tool_args, not nested in args
+        hardware_flavor = (
+            tool_args.get("hardware_flavor")
+            or tool_args.get("flavor")
+            or tool_args.get("hardware")
+            or "cpu-basic"
+        )
         is_cpu_job = hardware_flavor in CPU_FLAVORS
         if is_cpu_job:
             if config and not config.confirm_cpu_jobs:
                 return False
             return True
         return True
     # Check for file upload operations (hf_private_repos or other tools)
     if tool_name == "hf_private_repos":
         operation = tool_args.get("operation", "")
     # hf_repo_git: destructive operations require approval
     if tool_name == "hf_repo_git":
         operation = tool_args.get("operation", "")
+        if operation in [
+            "delete_branch",
+            "delete_tag",
+            "merge_pr",
+            "create_repo",
+            "update_repo",
+        ]:
             return True
     return False

agent/prompts/system_prompt_v2.yaml CHANGED Viewed

@@ -345,15 +345,15 @@ system_prompt: |
   **sandbox_create:**
   - Persistent remote Linux environment on HF Spaces for interactive development
-  - First call sandbox_create with hardware choice, then use bash/read/write/edit/glob/grep freely
   - Hardware: cpu-basic (free tier), cpu-upgrade (8vCPU/32GB), t4-small (16GB GPU), a10g-small (24GB GPU), a10g-large (24GB GPU + 46GB RAM), a100-large (80GB GPU)
   - Use for: iterative development, debugging, multi-step workflows, testing code, installing packages
   - Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training
-  **bash / read / write / edit / glob / grep / upload:**
   - Available after sandbox_create — no additional approvals needed
   - Same semantics as local file/shell operations, but run on the remote sandbox
-  - bash: run shell commands; read/write/edit: file operations; glob/grep: search; upload: transfer files
   **hf_private_repos:**
   - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)

   **sandbox_create:**
   - Persistent remote Linux environment on HF Spaces for interactive development
+  - First call sandbox_create with hardware choice, then use bash/read/write/edit freely
   - Hardware: cpu-basic (free tier), cpu-upgrade (8vCPU/32GB), t4-small (16GB GPU), a10g-small (24GB GPU), a10g-large (24GB GPU + 46GB RAM), a100-large (80GB GPU)
   - Use for: iterative development, debugging, multi-step workflows, testing code, installing packages
   - Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training
+  **bash / read / write / edit / upload:**
   - Available after sandbox_create — no additional approvals needed
   - Same semantics as local file/shell operations, but run on the remote sandbox
+  - bash: run shell commands; read/write/edit: file operations; upload: transfer files
   **hf_private_repos:**
   - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)

agent/tools/sandbox_client.py CHANGED Viewed

@@ -31,7 +31,7 @@ Lifecycle:
         sb.bash("python train.py")
     # Space deleted on exit
-Tools: bash, read, write, edit, glob, grep, upload
 """
 from __future__ import annotations
@@ -42,7 +42,6 @@ import sys
 import time
 import uuid
 from dataclasses import dataclass, field
-from pathlib import Path
 from typing import Any
 import httpx
@@ -74,17 +73,17 @@ RUN apt-get update && \\
     apt-get install -y \\
       bash git git-lfs wget curl procps \\
       htop vim nano jq tmux \\
-      build-essential grep && \\
     rm -rf /var/lib/apt/lists/*
-# Install server dependencies (as root, before USER switch)
 RUN uv pip install --system fastapi uvicorn python-multipart
 RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \\
-    PATH=/home/user/.local/bin:$PATH
 WORKDIR /app
 COPY --chown=user . /app
@@ -94,6 +93,114 @@ EXPOSE 7860
 CMD ["python", "sandbox_server.py"]
 """
 @dataclass
 class ToolResult:
@@ -235,10 +342,7 @@ class Sandbox:
     @staticmethod
     def _setup_server(space_id: str, api: HfApi) -> None:
-        """Upload FastAPI server + Dockerfile to the sandbox Space (single commit)."""
-        server_path = Path(__file__).parent / "example_sandbox_server.py"
-        server_code = server_path.read_text()
         print(f"Uploading sandbox server to {space_id}...")
         api.create_commit(
             repo_id=space_id,
@@ -246,7 +350,7 @@ class Sandbox:
             operations=[
                 CommitOperationAdd(
                     path_in_repo="sandbox_server.py",
-                    path_or_fileobj=io.BytesIO(server_code.encode()),
                 ),
                 CommitOperationAdd(
                     path_in_repo="Dockerfile",
@@ -435,45 +539,6 @@ class Sandbox:
             },
         )
-    def glob(self, pattern: str, *, path: str | None = None) -> ToolResult:
-        return self._call(
-            "glob",
-            {
-                "pattern": pattern,
-                "path": path or self.work_dir,
-            },
-        )
-    def grep(
-        self,
-        pattern: str,
-        *,
-        path: str | None = None,
-        include: str | None = None,
-        output_mode: str = "files_with_matches",
-        case_insensitive: bool = False,
-        n: bool = False,
-        A: int | None = None,
-        B: int | None = None,
-        C: int | None = None,
-        head_limit: int | None = None,
-    ) -> ToolResult:
-        return self._call(
-            "grep",
-            {
-                "pattern": pattern,
-                "path": path or self.work_dir,
-                "include": include,
-                "output_mode": output_mode,
-                "case_insensitive": case_insensitive,
-                "n": n,
-                "A": A,
-                "B": B,
-                "C": C,
-                "head_limit": head_limit,
-            },
-        )
     # ── Tool schemas & dispatch ───────────────────────────────────
     TOOLS = {
@@ -486,8 +551,6 @@ class Sandbox:
                 "\n"
                 "AVOID using bash for operations covered by specialized tools:\n"
                 "- File reading: use read (not cat/head/tail)\n"
-                "- File search: use grep (not grep/rg)\n"
-                "- File finding: use glob (not find)\n"
                 "- File editing: use edit (not sed/awk)\n"
                 "- File writing: use write (not echo/cat <<EOF)\n"
                 "\n"
@@ -614,86 +677,6 @@ class Sandbox:
                 },
             },
         },
-        "glob": {
-            "description": (
-                "Find files by glob pattern, sorted by modification time (newest first).\n"
-                "\n"
-                "Patterns: * (any), ** (recursive), ? (one char), {a,b}, [abc], [!abc].\n"
-                "Examples: '*.py', '*.{json,yaml}', 'test_*'"
-            ),
-            "parameters": {
-                "type": "object",
-                "required": ["pattern"],
-                "additionalProperties": False,
-                "properties": {
-                    "pattern": {
-                        "type": "string",
-                        "description": "Glob pattern to match file names.",
-                    },
-                    "path": {
-                        "type": "string",
-                        "description": "Directory to search (default: /app). Omit for default.",
-                    },
-                },
-            },
-        },
-        "grep": {
-            "description": (
-                "Search file contents. ALWAYS use this — NEVER bash with grep.\n"
-                "\n"
-                "Output modes:\n"
-                "- 'files_with_matches' (default): file paths only\n"
-                "- 'content': matching lines (supports -n, -A/-B/-C context)\n"
-                "- 'count': match counts per file\n"
-                "\n"
-                "Supports regex. Use glob for name matching, grep for content."
-            ),
-            "parameters": {
-                "type": "object",
-                "required": ["pattern"],
-                "additionalProperties": False,
-                "properties": {
-                    "pattern": {
-                        "type": "string",
-                        "description": "Search string or regex.",
-                    },
-                    "path": {
-                        "type": "string",
-                        "description": "Directory to search (default: /app).",
-                    },
-                    "include": {
-                        "type": "string",
-                        "description": "Glob filter (e.g. '*.py').",
-                    },
-                    "output_mode": {
-                        "type": "string",
-                        "enum": ["content", "files_with_matches", "count"],
-                        "description": "Default: 'files_with_matches'.",
-                    },
-                    "-i": {"type": "boolean", "description": "Case-insensitive."},
-                    "-n": {
-                        "type": "boolean",
-                        "description": "Line numbers (content mode only).",
-                    },
-                    "-A": {
-                        "type": "integer",
-                        "description": "Lines after match (content mode only).",
-                    },
-                    "-B": {
-                        "type": "integer",
-                        "description": "Lines before match (content mode only).",
-                    },
-                    "-C": {
-                        "type": "integer",
-                        "description": "Lines around match (content mode only).",
-                    },
-                    "head_limit": {
-                        "type": "integer",
-                        "description": "Limit output entries.",
-                    },
-                },
-            },
-        },
     }
     @classmethod
@@ -720,19 +703,6 @@ class Sandbox:
                 a["new_str"],
                 replace_all=a.get("replace_all", False),
             ),
-            "glob": lambda a: self.glob(a["pattern"], path=a.get("path")),
-            "grep": lambda a: self.grep(
-                a["pattern"],
-                path=a.get("path"),
-                include=a.get("include"),
-                output_mode=a.get("output_mode", "files_with_matches"),
-                case_insensitive=a.get("-i", False),
-                n=a.get("-n", False),
-                A=a.get("-A"),
-                B=a.get("-B"),
-                C=a.get("-C"),
-                head_limit=a.get("head_limit"),
-            ),
         }
         fn = dispatch.get(name)
         if not fn:

         sb.bash("python train.py")
     # Space deleted on exit
+Tools: bash, read, write, edit, upload
 """
 from __future__ import annotations
 import time
 import uuid
 from dataclasses import dataclass, field
 from typing import Any
 import httpx
     apt-get install -y \\
       bash git git-lfs wget curl procps \\
       htop vim nano jq tmux \\
+      build-essential && \\
     rm -rf /var/lib/apt/lists/*
 RUN uv pip install --system fastapi uvicorn python-multipart
 RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \\
+    PATH=/home/user/.local/bin:$PATH \\
+    PIP_USER=1
 WORKDIR /app
 COPY --chown=user . /app
 CMD ["python", "sandbox_server.py"]
 """
+_SANDBOX_SERVER = '''\
+"""Minimal FastAPI server for sandbox operations."""
+import os, subprocess, pathlib
+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import Optional
+import uvicorn
+app = FastAPI()
+class BashReq(BaseModel):
+    command: str
+    work_dir: str = "/app"
+    timeout: int = 120
+class ReadReq(BaseModel):
+    path: str
+    offset: Optional[int] = None
+    limit: Optional[int] = 2000
+class WriteReq(BaseModel):
+    path: str
+    content: str
+class EditReq(BaseModel):
+    path: str
+    old_str: str
+    new_str: str
+    replace_all: bool = False
+class ExistsReq(BaseModel):
+    path: str
+@app.get("/api/health")
+def health():
+    return {"status": "ok"}
+@app.post("/api/bash")
+def bash(req: BashReq):
+    try:
+        r = subprocess.run(
+            req.command, shell=True, capture_output=True, text=True,
+            cwd=req.work_dir, timeout=req.timeout,
+        )
+        output = r.stdout + r.stderr
+        if len(output) > 30000:
+            output = output[:30000] + "\\n... (truncated)"
+        return {"success": r.returncode == 0, "output": output, "error": "" if r.returncode == 0 else f"Exit code {r.returncode}"}
+    except subprocess.TimeoutExpired:
+        return {"success": False, "output": "", "error": f"Timeout after {req.timeout}s"}
+    except Exception as e:
+        return {"success": False, "output": "", "error": str(e)}
+@app.post("/api/read")
+def read(req: ReadReq):
+    try:
+        p = pathlib.Path(req.path)
+        if not p.exists():
+            return {"success": False, "output": "", "error": f"File not found: {req.path}"}
+        if p.is_dir():
+            return {"success": False, "output": "", "error": f"Is a directory: {req.path}"}
+        lines = p.read_text().splitlines()
+        start = (req.offset or 1) - 1
+        end = start + (req.limit or len(lines))
+        selected = lines[start:end]
+        numbered = "\\n".join(f"{start + i + 1}\\t{line}" for i, line in enumerate(selected))
+        return {"success": True, "output": numbered, "error": ""}
+    except Exception as e:
+        return {"success": False, "output": "", "error": str(e)}
+@app.post("/api/write")
+def write(req: WriteReq):
+    try:
+        p = pathlib.Path(req.path)
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text(req.content)
+        return {"success": True, "output": f"Wrote {len(req.content)} bytes to {req.path}", "error": ""}
+    except Exception as e:
+        return {"success": False, "output": "", "error": str(e)}
+@app.post("/api/edit")
+def edit(req: EditReq):
+    try:
+        p = pathlib.Path(req.path)
+        if not p.exists():
+            return {"success": False, "output": "", "error": f"File not found: {req.path}"}
+        content = p.read_text()
+        if req.old_str not in content:
+            return {"success": False, "output": "", "error": f"old_str not found in {req.path}"}
+        if not req.replace_all and content.count(req.old_str) > 1:
+            return {"success": False, "output": "", "error": f"old_str appears {content.count(req.old_str)} times. Use replace_all=true or provide more context."}
+        if req.replace_all:
+            new_content = content.replace(req.old_str, req.new_str)
+        else:
+            new_content = content.replace(req.old_str, req.new_str, 1)
+        p.write_text(new_content)
+        return {"success": True, "output": f"Edited {req.path}", "error": ""}
+    except Exception as e:
+        return {"success": False, "output": "", "error": str(e)}
+@app.post("/api/exists")
+def exists(req: ExistsReq):
+    return {"success": True, "output": str(pathlib.Path(req.path).exists()).lower(), "error": ""}
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)
+'''
 @dataclass
 class ToolResult:
     @staticmethod
     def _setup_server(space_id: str, api: HfApi) -> None:
+        """Upload embedded sandbox server + Dockerfile to the Space (single commit)."""
         print(f"Uploading sandbox server to {space_id}...")
         api.create_commit(
             repo_id=space_id,
             operations=[
                 CommitOperationAdd(
                     path_in_repo="sandbox_server.py",
+                    path_or_fileobj=io.BytesIO(_SANDBOX_SERVER.encode()),
                 ),
                 CommitOperationAdd(
                     path_in_repo="Dockerfile",
             },
         )
     # ── Tool schemas & dispatch ───────────────────────────────────
     TOOLS = {
                 "\n"
                 "AVOID using bash for operations covered by specialized tools:\n"
                 "- File reading: use read (not cat/head/tail)\n"
                 "- File editing: use edit (not sed/awk)\n"
                 "- File writing: use write (not echo/cat <<EOF)\n"
                 "\n"
                 },
             },
         },
     }
     @classmethod
                 a["new_str"],
                 replace_all=a.get("replace_all", False),
             ),
         }
         fn = dispatch.get(name)
         if not fn:

agent/tools/sandbox_tool.py CHANGED Viewed

@@ -1,9 +1,9 @@
 """
 Sandbox tools — expose the Sandbox client as agent tools.
-7 tools total:
   sandbox_create — explicit sandbox creation (requires approval)
-  bash, read, write, edit, glob, grep — operations on the sandbox
 If any operation tool is called without an active sandbox,
 a cpu-basic sandbox is auto-created (no approval needed).
@@ -78,12 +78,14 @@ SANDBOX_CREATE_TOOL_SPEC = {
     "name": "sandbox_create",
     "description": (
         "Create a persistent remote Linux sandbox on HF Spaces for interactive development.\n"
-        "YOU MUST DO THIS BEFORE USING bash/read/write/edit/glob/grep tools.\n"
         "\n"
-        "Spins up a new sandbox environment where you can run commands, read/write/edit files, "
-        "install packages, and debug iteratively. The sandbox persists across tool calls within "
-        "the session.\n"
-        "You can choose from the following hardware tiers: " + ", ".join([e.value for e in SpaceHardware]) + ".\n"
         "Use sandbox for: iterative development, debugging, multi-step workflows, testing code.\n"
         "Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training.\n"
     ),
@@ -101,10 +103,6 @@ SANDBOX_CREATE_TOOL_SPEC = {
                 "type": "boolean",
                 "description": "If true, create a private Space",
             },
-            "sleep_time": {
-                "type": "integer",
-                "description": "Auto-sleep after N seconds of inactivity",
-            },
         },
     },
 }
@@ -120,15 +118,13 @@ async def sandbox_create_handler(
         return (
             f"Sandbox already active: {sb.space_id}\n"
             f"URL: {sb.url}\n"
-            f"Use bash/read/write/edit/glob/grep to interact with it."
         ), True
     hardware = args.get("hardware", "cpu-basic")
     create_kwargs = {}
     if "private" in args:
         create_kwargs["private"] = args["private"]
-    if "sleep_time" in args:
-        create_kwargs["sleep_time"] = args["sleep_time"]
     try:
         sb, error = await _ensure_sandbox(session, hardware=hardware, **create_kwargs)
@@ -142,7 +138,7 @@ async def sandbox_create_handler(
         f"Sandbox created: {sb.space_id}\n"
         f"URL: {sb.url}\n"
         f"Hardware: {hardware}\n"
-        f"Use bash/read/write/edit/glob/grep to interact with it."
     ), True
@@ -176,7 +172,7 @@ def _make_tool_handler(sandbox_tool_name: str):
 def get_sandbox_tools():
-    """Return all 7 sandbox ToolSpecs (sandbox_create + 6 operation tools)."""
     from agent.core.tools import ToolSpec
     tools = []

 """
 Sandbox tools — expose the Sandbox client as agent tools.
+5 tools total:
   sandbox_create — explicit sandbox creation (requires approval)
+  bash, read, write, edit — operations on the sandbox
 If any operation tool is called without an active sandbox,
 a cpu-basic sandbox is auto-created (no approval needed).
     "name": "sandbox_create",
     "description": (
         "Create a persistent remote Linux sandbox on HF Spaces for interactive development.\n"
+        "YOU MUST DO THIS BEFORE USING bash/read/write/edit tools.\n"
         "\n"
+        "Spins up a new sandbox with a given hardware tier where you can run commands, read/write/edit files, "
+        "install packages, and debug iteratively. The sandbox persists across tool calls within the session."
+        "\n"
+        "You can choose from the following hardware tiers (GPU is required for model development or other tasks that benefit from and utilize the GPU): "
+        + ", ".join([e.value for e in SpaceHardware])
+        + ".\n"
         "Use sandbox for: iterative development, debugging, multi-step workflows, testing code.\n"
         "Use hf_jobs instead for: one-shot batch runs, scheduled tasks, fire-and-forget training.\n"
     ),
                 "type": "boolean",
                 "description": "If true, create a private Space",
             },
         },
     },
 }
         return (
             f"Sandbox already active: {sb.space_id}\n"
             f"URL: {sb.url}\n"
+            f"Use bash/read/write/edit to interact with it."
         ), True
     hardware = args.get("hardware", "cpu-basic")
     create_kwargs = {}
     if "private" in args:
         create_kwargs["private"] = args["private"]
     try:
         sb, error = await _ensure_sandbox(session, hardware=hardware, **create_kwargs)
         f"Sandbox created: {sb.space_id}\n"
         f"URL: {sb.url}\n"
         f"Hardware: {hardware}\n"
+        f"Use bash/read/write/edit to interact with it."
     ), True
 def get_sandbox_tools():
+    """Return all 5 sandbox ToolSpecs (sandbox_create + 4 operation tools)."""
     from agent.core.tools import ToolSpec
     tools = []