Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

akseljoonas HF Staff commited on Mar 5

Commit

33f29a8

1 Parent(s): 061ebdc

fix: route all HF token usage through session.hf_token from OAuth

Remove all os.environ.get("HF_TOKEN") fallbacks. Token now flows
exclusively through OAuth -> session.hf_token -> tools.

- Pass hf_token through Session -> ContextManager -> _get_hf_username
- Add session param to docs, repo_files, repo_git handlers
- Jobs use HF_ADMIN_TOKEN for creation, user token for job secrets
- Remove HF_TOKEN env setup from backend/main.py
- Remove module-level username cache (broken for multi-user)

Files changed (12) hide show

agent/context_manager/manager.py +14 -24
agent/core/agent_loop.py +2 -3
agent/core/session.py +3 -2
agent/tools/dataset_tools.py +0 -2
agent/tools/docs_tools.py +60 -26
agent/tools/hf_repo_files_tool.py +3 -2
agent/tools/hf_repo_git_tool.py +3 -2
agent/tools/jobs_tool.py +12 -12
agent/tools/sandbox_client.py +1 -4
agent/tools/sandbox_tool.py +1 -6
backend/main.py +2 -8
backend/session_manager.py +4 -4

agent/context_manager/manager.py CHANGED Viewed

@@ -10,21 +10,17 @@ from pathlib import Path
 from typing import Any
 import yaml
-from huggingface_hub import HfApi
 from jinja2 import Template
 from litellm import Message, acompletion
 logger = logging.getLogger(__name__)
-# Module-level cache for HF username — avoids repeating the slow whoami() call
-_hf_username_cache: str | None = None
 _HF_WHOAMI_URL = "https://huggingface.co/api/whoami-v2"
 _HF_WHOAMI_TIMEOUT = 5  # seconds
-def _get_hf_username() -> str:
-    """Return the HF username, cached after the first call.
     Uses subprocess + curl to avoid Python HTTP client IPv6 issues that
     cause 40+ second hangs (httpx/urllib try IPv6 first which times out
@@ -34,15 +30,9 @@ def _get_hf_username() -> str:
     import subprocess
     import time as _t
-    global _hf_username_cache
-    if _hf_username_cache is not None:
-        return _hf_username_cache
-    hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
     if not hf_token:
-        logger.warning("No HF_TOKEN set, using 'unknown' as username")
-        _hf_username_cache = "unknown"
-        return _hf_username_cache
     t0 = _t.monotonic()
     try:
@@ -64,21 +54,18 @@ def _get_hf_username() -> str:
         t1 = _t.monotonic()
         if result.returncode == 0 and result.stdout:
             data = json.loads(result.stdout)
-            _hf_username_cache = data.get("name", "unknown")
-            logger.info(
-                f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s"
-            )
         else:
             logger.warning(
                 f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
             )
-            _hf_username_cache = "unknown"
     except Exception as e:
         t1 = _t.monotonic()
         logger.warning(f"HF whoami failed in {t1 - t0:.2f}s: {e}")
-        _hf_username_cache = "unknown"
-    return _hf_username_cache
 class ContextManager:
@@ -91,10 +78,12 @@ class ContextManager:
         untouched_messages: int = 5,
         tool_specs: list[dict[str, Any]] | None = None,
         prompt_file_suffix: str = "system_prompt_v3.yaml",
     ):
         self.system_prompt = self._load_system_prompt(
             tool_specs or [],
             prompt_file_suffix="system_prompt_v3.yaml",
         )
         self.max_context = max_context
         self.compact_size = int(max_context * compact_size)
@@ -106,6 +95,7 @@ class ContextManager:
         self,
         tool_specs: list[dict[str, Any]],
         prompt_file_suffix: str = "system_prompt.yaml",
     ):
         """Load and render the system prompt from YAML file with Jinja2"""
         prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
@@ -121,8 +111,8 @@ class ContextManager:
         current_time = now.strftime("%H:%M:%S.%f")[:-3]
         current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
-        # Get HF user info (cached after the first call)
-        hf_user_info = _get_hf_username()
         template = Template(template_str)
         return template.render(

 from typing import Any
 import yaml
 from jinja2 import Template
 from litellm import Message, acompletion
 logger = logging.getLogger(__name__)
 _HF_WHOAMI_URL = "https://huggingface.co/api/whoami-v2"
 _HF_WHOAMI_TIMEOUT = 5  # seconds
+def _get_hf_username(hf_token: str | None = None) -> str:
+    """Return the HF username for the given token.
     Uses subprocess + curl to avoid Python HTTP client IPv6 issues that
     cause 40+ second hangs (httpx/urllib try IPv6 first which times out
     import subprocess
     import time as _t
     if not hf_token:
+        logger.warning("No hf_token provided, using 'unknown' as username")
+        return "unknown"
     t0 = _t.monotonic()
     try:
         t1 = _t.monotonic()
         if result.returncode == 0 and result.stdout:
             data = json.loads(result.stdout)
+            username = data.get("name", "unknown")
+            logger.info(f"HF username resolved to '{username}' in {t1 - t0:.2f}s")
+            return username
         else:
             logger.warning(
                 f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
             )
+            return "unknown"
     except Exception as e:
         t1 = _t.monotonic()
         logger.warning(f"HF whoami failed in {t1 - t0:.2f}s: {e}")
+        return "unknown"
 class ContextManager:
         untouched_messages: int = 5,
         tool_specs: list[dict[str, Any]] | None = None,
         prompt_file_suffix: str = "system_prompt_v3.yaml",
+        hf_token: str | None = None,
     ):
         self.system_prompt = self._load_system_prompt(
             tool_specs or [],
             prompt_file_suffix="system_prompt_v3.yaml",
+            hf_token=hf_token,
         )
         self.max_context = max_context
         self.compact_size = int(max_context * compact_size)
         self,
         tool_specs: list[dict[str, Any]],
         prompt_file_suffix: str = "system_prompt.yaml",
+        hf_token: str | None = None,
     ):
         """Load and render the system prompt from YAML file with Jinja2"""
         prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
         current_time = now.strftime("%H:%M:%S.%f")[:-3]
         current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
+        # Get HF user info from OAuth token
+        hf_user_info = _get_hf_username(hf_token)
         template = Template(template_str)
         return template.render(

agent/core/agent_loop.py CHANGED Viewed

@@ -19,8 +19,7 @@ from agent.tools.jobs_tool import CPU_FLAVORS
 logger = logging.getLogger(__name__)
 ToolCall = ChatCompletionMessageToolCall
-# Explicit inference token — needed because litellm checks HF_TOKEN before
-# HUGGINGFACE_API_KEY, and HF_TOKEN (used for Hub ops) may lack inference permissions.
 _INFERENCE_API_KEY = os.environ.get("INFERENCE_TOKEN")
@@ -45,7 +44,7 @@ def _resolve_hf_router_params(model_name: str) -> dict:
     router_provider = parts[1]
     actual_model = parts[2]
-    api_key = _INFERENCE_API_KEY or os.environ.get("HF_TOKEN")
     return {
         "model": f"openai/{actual_model}",

 logger = logging.getLogger(__name__)
 ToolCall = ChatCompletionMessageToolCall
+# Explicit inference token for LLM API calls (separate from user OAuth tokens).
 _INFERENCE_API_KEY = os.environ.get("INFERENCE_TOKEN")
     router_provider = parts[1]
     actual_model = parts[2]
+    api_key = _INFERENCE_API_KEY
     return {
         "model": f"openai/{actual_model}",

agent/core/session.py CHANGED Viewed

@@ -80,7 +80,9 @@ class Session:
         config: Config | None = None,
         tool_router=None,
         context_manager: ContextManager | None = None,
     ):
         self.tool_router = tool_router
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
@@ -88,6 +90,7 @@ class Session:
             compact_size=0.1,
             untouched_messages=5,
             tool_specs=tool_specs,
         )
         self.event_queue = event_queue
         self.session_id = str(uuid.uuid4())
@@ -97,8 +100,6 @@ class Session:
         self.is_running = True
         self._cancelled = asyncio.Event()
         self.pending_approval: Optional[dict[str, Any]] = None
-        # User's HF OAuth token — set by session_manager after construction
-        self.hf_token: Optional[str] = None
         self.sandbox = None
         # Session trajectory logging

         config: Config | None = None,
         tool_router=None,
         context_manager: ContextManager | None = None,
+        hf_token: str | None = None,
     ):
+        self.hf_token: Optional[str] = hf_token
         self.tool_router = tool_router
         tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
         self.context_manager = context_manager or ContextManager(
             compact_size=0.1,
             untouched_messages=5,
             tool_specs=tool_specs,
+            hf_token=hf_token,
         )
         self.event_queue = event_queue
         self.session_id = str(uuid.uuid4())
         self.is_running = True
         self._cancelled = asyncio.Event()
         self.pending_approval: Optional[dict[str, Any]] = None
         self.sandbox = None
         # Session trajectory logging

agent/tools/dataset_tools.py CHANGED Viewed

@@ -6,7 +6,6 @@ to provide everything needed for ML tasks in a single tool call.
 """
 import asyncio
-import os
 from typing import Any, TypedDict
 import httpx
@@ -29,7 +28,6 @@ class SplitConfig(TypedDict):
 def _get_headers(token: str | None = None) -> dict:
     """Get auth headers for private/gated datasets"""
-    token = token or os.environ.get("HF_TOKEN")
     if token:
         return {"Authorization": f"Bearer {token}"}
     return {}

 """
 import asyncio
 from typing import Any, TypedDict
 import httpx
 def _get_headers(token: str | None = None) -> dict:
     """Get auth headers for private/gated datasets"""
     if token:
         return {"Authorization": f"Bearer {token}"}
     return {}

agent/tools/docs_tools.py CHANGED Viewed

@@ -4,7 +4,6 @@ Documentation search tools for exploring HuggingFace and Gradio documentation.
 import asyncio
 import json
-import os
 from typing import Any
 import httpx
@@ -287,7 +286,9 @@ def _format_results(
 # ---------------------------------------------------------------------------
-async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
     """Explore documentation structure with optional search query."""
     endpoint = arguments.get("endpoint", "").lstrip("/")
     query = arguments.get("query")
@@ -316,9 +317,9 @@ async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]
             return f"Error fetching Gradio docs: {str(e)}", False
     # HF docs
-    hf_token = os.environ.get("HF_TOKEN")
     if not hf_token:
-        return "Error: HF_TOKEN environment variable not set", False
     try:
         max_results_int = int(max_results) if max_results is not None else None
@@ -378,15 +379,17 @@ async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]
         return f"Unexpected error: {str(e)}", False
-async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
     """Fetch full markdown content of a documentation page."""
     url = arguments.get("url", "")
     if not url:
         return "Error: No URL provided", False
-    hf_token = os.environ.get("HF_TOKEN")
     if not hf_token:
-        return "Error: HF_TOKEN environment variable not set", False
     if not url.endswith(".md"):
         url = f"{url}.md"
@@ -454,20 +457,30 @@ def _extract_all_endpoints(spec: dict[str, Any]) -> list[dict[str, Any]]:
     endpoints = []
     for path, path_item in spec.get("paths", {}).items():
         for method, op in path_item.items():
-            if method not in ["get", "post", "put", "delete", "patch", "head", "options"]:
                 continue
-            endpoints.append({
-                "path": path,
-                "method": method.upper(),
-                "operationId": op.get("operationId", ""),
-                "summary": op.get("summary", ""),
-                "description": op.get("description", ""),
-                "tags": " ".join(op.get("tags", [])),
-                "parameters": op.get("parameters", []),
-                "request_body": op.get("requestBody", {}),
-                "responses": op.get("responses", {}),
-                "base_url": base_url,
-            })
     return endpoints
@@ -511,7 +524,12 @@ async def _build_openapi_index() -> tuple[Any, MultifieldParser, list[dict[str,
     parser = MultifieldParser(
         ["summary", "description", "operationId", "tags", "param_names"],
         schema=schema,
-        fieldboosts={"summary": 3.0, "operationId": 2.0, "description": 1.0, "tags": 1.5},
         group=OrGroup,
     )
@@ -532,11 +550,20 @@ async def _search_openapi(
         return [], "Query contained unsupported syntax."
     with index.searcher() as searcher:
-        results = searcher.search(query_obj, limit=limit * 2)  # Get extra for tag filtering
         matches = []
         for hit in results:
             # Find full endpoint data
-            ep = next((e for e in endpoints if e["path"] == hit["path"] and e["method"] == hit["method"]), None)
             if ep is None:
                 continue
             # Filter by tag if provided
@@ -713,7 +740,10 @@ async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
     query = arguments.get("query", "").strip() or None
     if not tag and not query:
-        return "Error: Provide either 'query' (keyword search) or 'tag' (category filter), or both.", False
     try:
         note = None
@@ -724,7 +754,9 @@ async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
             # If Whoosh found results, return them
             if results:
-                return _format_openapi_results(results, tag=tag, query=query, note=search_note), True
             # Whoosh found nothing - fall back to tag-based if tag provided
             if tag:
@@ -737,7 +769,9 @@ async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
         if tag:
             _, _, endpoints = await _build_openapi_index()
             results = [ep for ep in endpoints if tag in ep.get("tags", "")]
-            return _format_openapi_results(results, tag=tag, query=None, note=note), True
         return "Error: No results found", False

 import asyncio
 import json
 from typing import Any
 import httpx
 # ---------------------------------------------------------------------------
+async def explore_hf_docs_handler(
+    arguments: dict[str, Any], session=None
+) -> tuple[str, bool]:
     """Explore documentation structure with optional search query."""
     endpoint = arguments.get("endpoint", "").lstrip("/")
     query = arguments.get("query")
             return f"Error fetching Gradio docs: {str(e)}", False
     # HF docs
+    hf_token = session.hf_token if session else None
     if not hf_token:
+        return "Error: No HF token available (not logged in)", False
     try:
         max_results_int = int(max_results) if max_results is not None else None
         return f"Unexpected error: {str(e)}", False
+async def hf_docs_fetch_handler(
+    arguments: dict[str, Any], session=None
+) -> tuple[str, bool]:
     """Fetch full markdown content of a documentation page."""
     url = arguments.get("url", "")
     if not url:
         return "Error: No URL provided", False
+    hf_token = session.hf_token if session else None
     if not hf_token:
+        return "Error: No HF token available (not logged in)", False
     if not url.endswith(".md"):
         url = f"{url}.md"
     endpoints = []
     for path, path_item in spec.get("paths", {}).items():
         for method, op in path_item.items():
+            if method not in [
+                "get",
+                "post",
+                "put",
+                "delete",
+                "patch",
+                "head",
+                "options",
+            ]:
                 continue
+            endpoints.append(
+                {
+                    "path": path,
+                    "method": method.upper(),
+                    "operationId": op.get("operationId", ""),
+                    "summary": op.get("summary", ""),
+                    "description": op.get("description", ""),
+                    "tags": " ".join(op.get("tags", [])),
+                    "parameters": op.get("parameters", []),
+                    "request_body": op.get("requestBody", {}),
+                    "responses": op.get("responses", {}),
+                    "base_url": base_url,
+                }
+            )
     return endpoints
     parser = MultifieldParser(
         ["summary", "description", "operationId", "tags", "param_names"],
         schema=schema,
+        fieldboosts={
+            "summary": 3.0,
+            "operationId": 2.0,
+            "description": 1.0,
+            "tags": 1.5,
+        },
         group=OrGroup,
     )
         return [], "Query contained unsupported syntax."
     with index.searcher() as searcher:
+        results = searcher.search(
+            query_obj, limit=limit * 2
+        )  # Get extra for tag filtering
         matches = []
         for hit in results:
             # Find full endpoint data
+            ep = next(
+                (
+                    e
+                    for e in endpoints
+                    if e["path"] == hit["path"] and e["method"] == hit["method"]
+                ),
+                None,
+            )
             if ep is None:
                 continue
             # Filter by tag if provided
     query = arguments.get("query", "").strip() or None
     if not tag and not query:
+        return (
+            "Error: Provide either 'query' (keyword search) or 'tag' (category filter), or both.",
+            False,
+        )
     try:
         note = None
             # If Whoosh found results, return them
             if results:
+                return _format_openapi_results(
+                    results, tag=tag, query=query, note=search_note
+                ), True
             # Whoosh found nothing - fall back to tag-based if tag provided
             if tag:
         if tag:
             _, _, endpoints = await _build_openapi_index()
             results = [ep for ep in endpoints if tag in ep.get("tags", "")]
+            return _format_openapi_results(
+                results, tag=tag, query=None, note=note
+            ), True
         return "Error: No results found", False

agent/tools/hf_repo_files_tool.py CHANGED Viewed

@@ -312,10 +312,11 @@ HF_REPO_FILES_TOOL_SPEC = {
 }
-async def hf_repo_files_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
     """Handler for agent tool router."""
     try:
-        tool = HfRepoFilesTool()
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:

 }
+async def hf_repo_files_handler(arguments: Dict[str, Any], session=None) -> tuple[str, bool]:
     """Handler for agent tool router."""
     try:
+        hf_token = session.hf_token if session else None
+        tool = HfRepoFilesTool(hf_token=hf_token)
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:

agent/tools/hf_repo_git_tool.py CHANGED Viewed

@@ -653,10 +653,11 @@ HF_REPO_GIT_TOOL_SPEC = {
 }
-async def hf_repo_git_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
     """Handler for agent tool router."""
     try:
-        tool = HfRepoGitTool()
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:

 }
+async def hf_repo_git_handler(arguments: Dict[str, Any], session=None) -> tuple[str, bool]:
     """Handler for agent tool router."""
     try:
+        hf_token = session.hf_token if session else None
+        tool = HfRepoGitTool(hf_token=hf_token)
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:

agent/tools/jobs_tool.py CHANGED Viewed

@@ -135,8 +135,7 @@ def _add_default_env(params: Dict[str, Any] | None) -> Dict[str, Any]:
 def _add_environment_variables(
     params: Dict[str, Any] | None, user_token: str | None = None
 ) -> Dict[str, Any]:
-    # Prefer the authenticated user's OAuth token, fall back to global env var
-    token = user_token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
     # Start with user-provided env vars, then force-set token last
     result = dict(params or {})
@@ -294,8 +293,11 @@ class HfJobsTool:
         log_callback: Optional[Callable[[str], Awaitable[None]]] = None,
         session: Any = None,
         tool_call_id: Optional[str] = None,
     ):
         self.hf_token = hf_token
         self.api = HfApi(token=hf_token)
         self.namespace = namespace
         self.log_callback = log_callback
@@ -520,7 +522,7 @@ class HfJobsTool:
                 image=image,
                 command=command,
                 env=_add_default_env(args.get("env")),
-                secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
@@ -752,7 +754,7 @@ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}
                 command=command,
                 schedule=schedule,
                 env=_add_default_env(args.get("env")),
-                secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
@@ -1055,17 +1057,15 @@ async def hf_jobs_handler(
                 return f"Failed to read {script} from sandbox: {result.error}", False
             arguments = {**arguments, "script": result.output}
-        # Prefer the authenticated user's OAuth token, fall back to global env
-        hf_token = (
-            (getattr(session, "hf_token", None) if session else None)
-            or os.environ.get("HF_TOKEN")
-            or os.environ.get("HUGGINGFACE_HUB_TOKEN")
-        )
-        namespace = os.environ.get("HF_NAMESPACE") or (HfApi(token=hf_token).whoami().get("name") if hf_token else None)
         tool = HfJobsTool(
             namespace=namespace,
-            hf_token=hf_token,
             log_callback=log_callback if session else None,
             session=session,
             tool_call_id=tool_call_id,

 def _add_environment_variables(
     params: Dict[str, Any] | None, user_token: str | None = None
 ) -> Dict[str, Any]:
+    token = user_token or ""
     # Start with user-provided env vars, then force-set token last
     result = dict(params or {})
         log_callback: Optional[Callable[[str], Awaitable[None]]] = None,
         session: Any = None,
         tool_call_id: Optional[str] = None,
+        user_token: Optional[str] = None,
     ):
         self.hf_token = hf_token
+        # user_token is injected into job secrets; hf_token is for API calls (job creation)
+        self.user_token = user_token or hf_token
         self.api = HfApi(token=hf_token)
         self.namespace = namespace
         self.log_callback = log_callback
                 image=image,
                 command=command,
                 env=_add_default_env(args.get("env")),
+                secrets=_add_environment_variables(args.get("secrets"), self.user_token),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
                 command=command,
                 schedule=schedule,
                 env=_add_default_env(args.get("env")),
+                secrets=_add_environment_variables(args.get("secrets"), self.user_token),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 namespace=self.namespace,
                 return f"Failed to read {script} from sandbox: {result.error}", False
             arguments = {**arguments, "script": result.output}
+        user_token = session.hf_token if session else None
+        # HF_ADMIN_TOKEN creates jobs under the org; user token is injected into job secrets
+        admin_token = os.environ.get("HF_ADMIN_TOKEN") or user_token
+        namespace = os.environ.get("HF_NAMESPACE") or (HfApi(token=admin_token).whoami().get("name") if admin_token else None)
         tool = HfJobsTool(
             namespace=namespace,
+            hf_token=admin_token,
+            user_token=user_token,
             log_callback=log_callback if session else None,
             session=session,
             tool_call_id=tool_call_id,

agent/tools/sandbox_client.py CHANGED Viewed

@@ -37,7 +37,6 @@ Tools: bash, read, write, edit, upload
 from __future__ import annotations
 import io
-import os
 import sys
 import time
 import uuid
@@ -240,7 +239,6 @@ class Sandbox:
     _files_read: set = field(init=False, repr=False, default_factory=set)
     def __post_init__(self):
-        self.token = self.token or os.environ.get("HF_TOKEN")
         slug = self.space_id.replace("/", "-")
         # Trailing slash is critical: httpx resolves relative paths against base_url.
         # Without it, client.get("health") resolves to /health instead of /api/health.
@@ -282,13 +280,12 @@ class Sandbox:
             hardware: Hardware tier (cpu-basic, t4-small, etc.).
             private: Whether the Space should be private.
             sleep_time: Auto-sleep after N seconds of inactivity.
-            token: HF API token. Falls back to HF_TOKEN env var.
             wait_timeout: Max seconds to wait for Space to start (default: 300).
         Returns:
             A Sandbox instance connected to the running Space.
         """
-        token = token or os.environ.get("HF_TOKEN")
         api = HfApi(token=token)
         base = name or "sandbox"

 from __future__ import annotations
 import io
 import sys
 import time
 import uuid
     _files_read: set = field(init=False, repr=False, default_factory=set)
     def __post_init__(self):
         slug = self.space_id.replace("/", "-")
         # Trailing slash is critical: httpx resolves relative paths against base_url.
         # Without it, client.get("health") resolves to /health instead of /api/health.
             hardware: Hardware tier (cpu-basic, t4-small, etc.).
             private: Whether the Space should be private.
             sleep_time: Auto-sleep after N seconds of inactivity.
+            token: HF API token (from user's OAuth session).
             wait_timeout: Max seconds to wait for Space to start (default: 300).
         Returns:
             A Sandbox instance connected to the running Space.
         """
         api = HfApi(token=token)
         base = name or "sandbox"

agent/tools/sandbox_tool.py CHANGED Viewed

@@ -12,7 +12,6 @@ a cpu-basic sandbox is auto-created (no approval needed).
 from __future__ import annotations
 import asyncio
-import os
 from typing import Any
 from huggingface_hub import HfApi, SpaceHardware
@@ -38,11 +37,7 @@ async def _ensure_sandbox(
     if not session:
         return None, "No session available."
-    token = (
-        getattr(session, "hf_token", None)
-        or os.environ.get("HF_TOKEN")
-        or os.environ.get("HUGGINGFACE_HUB_TOKEN")
-    )
     if not token:
         return None, "No HF token available. Cannot create sandbox."

 from __future__ import annotations
 import asyncio
 from typing import Any
 from huggingface_hub import HfApi, SpaceHardware
     if not session:
         return None, "No session available."
+    token = session.hf_token
     if not token:
         return None, "No HF token available. Cannot create sandbox."

backend/main.py CHANGED Viewed

@@ -6,20 +6,14 @@ from contextlib import asynccontextmanager
 from pathlib import Path
 from dotenv import load_dotenv
-load_dotenv()
-# Ensure HF_TOKEN is set — fall back to HF_ADMIN_TOKEN if available (HF Spaces)
-if not os.environ.get("HF_TOKEN") and os.environ.get("HF_ADMIN_TOKEN"):
-    os.environ["HF_TOKEN"] = os.environ["HF_ADMIN_TOKEN"]
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from routes.agent import router as agent_router
 from routes.auth import router as auth_router
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,

 from pathlib import Path
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from routes.agent import router as agent_router
 from routes.auth import router as auth_router
+load_dotenv()
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,

backend/session_manager.py CHANGED Viewed

@@ -132,16 +132,16 @@ class SessionManager:
         def _create_session_sync():
             t0 = _time.monotonic()
             tool_router = ToolRouter(self.config.mcpServers)
-            session = Session(event_queue, config=self.config, tool_router=tool_router)
             t1 = _time.monotonic()
             logger.info(f"Session initialized in {t1 - t0:.2f}s")
             return tool_router, session
         tool_router, session = await asyncio.to_thread(_create_session_sync)
-        # Store user's HF token on the session so tools can use it
-        session.hf_token = hf_token
         # Create wrapper
         agent_session = AgentSession(
             session_id=session_id,

         def _create_session_sync():
             t0 = _time.monotonic()
             tool_router = ToolRouter(self.config.mcpServers)
+            session = Session(
+                event_queue, config=self.config, tool_router=tool_router,
+                hf_token=hf_token,
+            )
             t1 = _time.monotonic()
             logger.info(f"Session initialized in {t1 - t0:.2f}s")
             return tool_router, session
         tool_router, session = await asyncio.to_thread(_create_session_sync)
         # Create wrapper
         agent_session = AgentSession(
             session_id=session_id,