Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit Β·
0cee198
1
Parent(s): 40c02f5
feat: cancel sandbox creation on user interrupt
Browse filesSandbox.create() now accepts a cancel_event checked during polling loops.
When set, the partially-created Space is deleted and Sandbox.Cancelled is
raised. _ensure_sandbox bridges the asyncio cancel signal to a threading
event so the blocking create call can respond within ~5s.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- agent/tools/sandbox_client.py +24 -0
- agent/tools/sandbox_tool.py +19 -1
agent/tools/sandbox_client.py
CHANGED
|
@@ -287,6 +287,9 @@ class Sandbox:
|
|
| 287 |
|
| 288 |
# ββ Lifecycle βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 289 |
|
|
|
|
|
|
|
|
|
|
| 290 |
@classmethod
|
| 291 |
def create(
|
| 292 |
cls,
|
|
@@ -300,6 +303,7 @@ class Sandbox:
|
|
| 300 |
token: str | None = None,
|
| 301 |
wait_timeout: int = WAIT_TIMEOUT,
|
| 302 |
log: "Callable[[str], object] | None" = None,
|
|
|
|
| 303 |
) -> Sandbox:
|
| 304 |
"""
|
| 305 |
Create a new sandbox by duplicating the template Space.
|
|
@@ -317,6 +321,9 @@ class Sandbox:
|
|
| 317 |
sleep_time: Auto-sleep after N seconds of inactivity.
|
| 318 |
token: HF API token (from user's OAuth session).
|
| 319 |
wait_timeout: Max seconds to wait for Space to start (default: 300).
|
|
|
|
|
|
|
|
|
|
| 320 |
|
| 321 |
Returns:
|
| 322 |
A Sandbox instance connected to the running Space.
|
|
@@ -324,6 +331,16 @@ class Sandbox:
|
|
| 324 |
_log = log or print
|
| 325 |
api = HfApi(token=token)
|
| 326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
base = name or "sandbox"
|
| 328 |
suffix = uuid.uuid4().hex[:8]
|
| 329 |
space_id = f"{owner}/{base}-{suffix}"
|
|
@@ -342,13 +359,18 @@ class Sandbox:
|
|
| 342 |
api.duplicate_space(**kwargs)
|
| 343 |
_log(f"Space created: https://huggingface.co/spaces/{space_id}")
|
| 344 |
|
|
|
|
|
|
|
| 345 |
# Upload sandbox server and Dockerfile (triggers rebuild)
|
| 346 |
cls._setup_server(space_id, api, log=_log)
|
| 347 |
|
|
|
|
|
|
|
| 348 |
# Wait for it to come online (rebuild + start)
|
| 349 |
_log(f"Waiting for Space to start (timeout: {wait_timeout}s)...")
|
| 350 |
deadline = time.time() + wait_timeout
|
| 351 |
while time.time() < deadline:
|
|
|
|
| 352 |
runtime = api.get_space_runtime(space_id)
|
| 353 |
if runtime.stage == "RUNNING":
|
| 354 |
_log(f"Space is running (hardware: {runtime.hardware})")
|
|
@@ -366,6 +388,8 @@ class Sandbox:
|
|
| 366 |
f"Check https://huggingface.co/spaces/{space_id}"
|
| 367 |
)
|
| 368 |
|
|
|
|
|
|
|
| 369 |
# Wait for the API server to be responsive (non-fatal)
|
| 370 |
sb = cls(space_id=space_id, token=token, _owns_space=True)
|
| 371 |
try:
|
|
|
|
| 287 |
|
| 288 |
# ββ Lifecycle βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 289 |
|
| 290 |
+
class Cancelled(Exception):
|
| 291 |
+
"""Raised when sandbox creation is cancelled by the user."""
|
| 292 |
+
|
| 293 |
@classmethod
|
| 294 |
def create(
|
| 295 |
cls,
|
|
|
|
| 303 |
token: str | None = None,
|
| 304 |
wait_timeout: int = WAIT_TIMEOUT,
|
| 305 |
log: "Callable[[str], object] | None" = None,
|
| 306 |
+
cancel_event: "Any | None" = None,
|
| 307 |
) -> Sandbox:
|
| 308 |
"""
|
| 309 |
Create a new sandbox by duplicating the template Space.
|
|
|
|
| 321 |
sleep_time: Auto-sleep after N seconds of inactivity.
|
| 322 |
token: HF API token (from user's OAuth session).
|
| 323 |
wait_timeout: Max seconds to wait for Space to start (default: 300).
|
| 324 |
+
cancel_event: A threading.Event (or compatible) checked during
|
| 325 |
+
polling loops. When set, the Space is deleted and
|
| 326 |
+
Sandbox.Cancelled is raised.
|
| 327 |
|
| 328 |
Returns:
|
| 329 |
A Sandbox instance connected to the running Space.
|
|
|
|
| 331 |
_log = log or print
|
| 332 |
api = HfApi(token=token)
|
| 333 |
|
| 334 |
+
def _check_cancel():
|
| 335 |
+
if cancel_event and cancel_event.is_set():
|
| 336 |
+
_log("Sandbox creation cancelled by user, cleaning up...")
|
| 337 |
+
try:
|
| 338 |
+
api.delete_repo(space_id, repo_type="space")
|
| 339 |
+
_log(f"Deleted Space {space_id}")
|
| 340 |
+
except Exception:
|
| 341 |
+
pass
|
| 342 |
+
raise cls.Cancelled(f"Sandbox creation cancelled: {space_id}")
|
| 343 |
+
|
| 344 |
base = name or "sandbox"
|
| 345 |
suffix = uuid.uuid4().hex[:8]
|
| 346 |
space_id = f"{owner}/{base}-{suffix}"
|
|
|
|
| 359 |
api.duplicate_space(**kwargs)
|
| 360 |
_log(f"Space created: https://huggingface.co/spaces/{space_id}")
|
| 361 |
|
| 362 |
+
_check_cancel()
|
| 363 |
+
|
| 364 |
# Upload sandbox server and Dockerfile (triggers rebuild)
|
| 365 |
cls._setup_server(space_id, api, log=_log)
|
| 366 |
|
| 367 |
+
_check_cancel()
|
| 368 |
+
|
| 369 |
# Wait for it to come online (rebuild + start)
|
| 370 |
_log(f"Waiting for Space to start (timeout: {wait_timeout}s)...")
|
| 371 |
deadline = time.time() + wait_timeout
|
| 372 |
while time.time() < deadline:
|
| 373 |
+
_check_cancel()
|
| 374 |
runtime = api.get_space_runtime(space_id)
|
| 375 |
if runtime.stage == "RUNNING":
|
| 376 |
_log(f"Space is running (hardware: {runtime.hardware})")
|
|
|
|
| 388 |
f"Check https://huggingface.co/spaces/{space_id}"
|
| 389 |
)
|
| 390 |
|
| 391 |
+
_check_cancel()
|
| 392 |
+
|
| 393 |
# Wait for the API server to be responsive (non-fatal)
|
| 394 |
sb = cls(space_id=space_id, token=token, _owns_space=True)
|
| 395 |
try:
|
agent/tools/sandbox_tool.py
CHANGED
|
@@ -13,6 +13,7 @@ from __future__ import annotations
|
|
| 13 |
|
| 14 |
import asyncio
|
| 15 |
import shlex
|
|
|
|
| 16 |
from typing import Any
|
| 17 |
|
| 18 |
from huggingface_hub import HfApi, SpaceHardware
|
|
@@ -103,16 +104,33 @@ async def _ensure_sandbox(
|
|
| 103 |
Event(event_type="tool_log", data={"tool": "sandbox", "log": msg}),
|
| 104 |
)
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
kwargs = {
|
| 107 |
"owner": owner,
|
| 108 |
"hardware": hardware,
|
| 109 |
"token": token,
|
| 110 |
"log": _log,
|
|
|
|
| 111 |
**create_kwargs,
|
| 112 |
}
|
| 113 |
if hardware != "cpu-basic":
|
| 114 |
kwargs["sleep_time"] = 2700
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
session.sandbox = sb
|
| 117 |
|
| 118 |
# Set a descriptive title (template title is inherited on duplicate)
|
|
|
|
| 13 |
|
| 14 |
import asyncio
|
| 15 |
import shlex
|
| 16 |
+
import threading
|
| 17 |
from typing import Any
|
| 18 |
|
| 19 |
from huggingface_hub import HfApi, SpaceHardware
|
|
|
|
| 104 |
Event(event_type="tool_log", data={"tool": "sandbox", "log": msg}),
|
| 105 |
)
|
| 106 |
|
| 107 |
+
# Bridge asyncio cancel event to a threading.Event for the blocking create call.
|
| 108 |
+
# We poll session._cancelled from the main loop in a background task and set
|
| 109 |
+
# a threading.Event that Sandbox.create checks during its polling loops.
|
| 110 |
+
cancel_flag = threading.Event()
|
| 111 |
+
|
| 112 |
+
async def _watch_cancel():
|
| 113 |
+
await session._cancelled.wait()
|
| 114 |
+
cancel_flag.set()
|
| 115 |
+
|
| 116 |
+
watcher_task = asyncio.create_task(_watch_cancel())
|
| 117 |
+
|
| 118 |
kwargs = {
|
| 119 |
"owner": owner,
|
| 120 |
"hardware": hardware,
|
| 121 |
"token": token,
|
| 122 |
"log": _log,
|
| 123 |
+
"cancel_event": cancel_flag,
|
| 124 |
**create_kwargs,
|
| 125 |
}
|
| 126 |
if hardware != "cpu-basic":
|
| 127 |
kwargs["sleep_time"] = 2700
|
| 128 |
+
try:
|
| 129 |
+
sb = await asyncio.to_thread(Sandbox.create, **kwargs)
|
| 130 |
+
except Sandbox.Cancelled:
|
| 131 |
+
return None, "Sandbox creation cancelled by user."
|
| 132 |
+
finally:
|
| 133 |
+
watcher_task.cancel()
|
| 134 |
session.sandbox = sb
|
| 135 |
|
| 136 |
# Set a descriptive title (template title is inherited on duplicate)
|