akseljoonas HF Staff commited on
Commit
5510397
·
1 Parent(s): a25dfc5

fix: route all HF token usage through session.hf_token from OAuth

Browse files

Remove all os.environ.get("HF_TOKEN") fallbacks. Token now flows
exclusively through OAuth -> session.hf_token -> tools.

- Pass hf_token through Session -> ContextManager -> _get_hf_username
- Add session param to docs, repo_files, repo_git handlers
- Jobs use HF_ADMIN_TOKEN for creation, user token for job secrets
- Remove HF_TOKEN env setup from backend/main.py
- Remove module-level username cache (broken for multi-user)

agent/context_manager/manager.py CHANGED
@@ -10,21 +10,17 @@ from pathlib import Path
10
  from typing import Any
11
 
12
  import yaml
13
- from huggingface_hub import HfApi
14
  from jinja2 import Template
15
  from litellm import Message, acompletion
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
- # Module-level cache for HF username — avoids repeating the slow whoami() call
20
- _hf_username_cache: str | None = None
21
-
22
  _HF_WHOAMI_URL = "https://huggingface.co/api/whoami-v2"
23
  _HF_WHOAMI_TIMEOUT = 5 # seconds
24
 
25
 
26
- def _get_hf_username() -> str:
27
- """Return the HF username, cached after the first call.
28
 
29
  Uses subprocess + curl to avoid Python HTTP client IPv6 issues that
30
  cause 40+ second hangs (httpx/urllib try IPv6 first which times out
@@ -34,15 +30,9 @@ def _get_hf_username() -> str:
34
  import subprocess
35
  import time as _t
36
 
37
- global _hf_username_cache
38
- if _hf_username_cache is not None:
39
- return _hf_username_cache
40
-
41
- hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
42
  if not hf_token:
43
- logger.warning("No HF_TOKEN set, using 'unknown' as username")
44
- _hf_username_cache = "unknown"
45
- return _hf_username_cache
46
 
47
  t0 = _t.monotonic()
48
  try:
@@ -64,21 +54,18 @@ def _get_hf_username() -> str:
64
  t1 = _t.monotonic()
65
  if result.returncode == 0 and result.stdout:
66
  data = json.loads(result.stdout)
67
- _hf_username_cache = data.get("name", "unknown")
68
- logger.info(
69
- f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s"
70
- )
71
  else:
72
  logger.warning(
73
  f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
74
  )
75
- _hf_username_cache = "unknown"
76
  except Exception as e:
77
  t1 = _t.monotonic()
78
  logger.warning(f"HF whoami failed in {t1 - t0:.2f}s: {e}")
79
- _hf_username_cache = "unknown"
80
-
81
- return _hf_username_cache
82
 
83
 
84
  class ContextManager:
@@ -91,10 +78,12 @@ class ContextManager:
91
  untouched_messages: int = 5,
92
  tool_specs: list[dict[str, Any]] | None = None,
93
  prompt_file_suffix: str = "system_prompt_v3.yaml",
 
94
  ):
95
  self.system_prompt = self._load_system_prompt(
96
  tool_specs or [],
97
  prompt_file_suffix="system_prompt_v3.yaml",
 
98
  )
99
  self.max_context = max_context
100
  self.compact_size = int(max_context * compact_size)
@@ -106,6 +95,7 @@ class ContextManager:
106
  self,
107
  tool_specs: list[dict[str, Any]],
108
  prompt_file_suffix: str = "system_prompt.yaml",
 
109
  ):
110
  """Load and render the system prompt from YAML file with Jinja2"""
111
  prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
@@ -121,8 +111,8 @@ class ContextManager:
121
  current_time = now.strftime("%H:%M:%S.%f")[:-3]
122
  current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
123
 
124
- # Get HF user info (cached after the first call)
125
- hf_user_info = _get_hf_username()
126
 
127
  template = Template(template_str)
128
  return template.render(
 
10
  from typing import Any
11
 
12
  import yaml
 
13
  from jinja2 import Template
14
  from litellm import Message, acompletion
15
 
16
  logger = logging.getLogger(__name__)
17
 
 
 
 
18
  _HF_WHOAMI_URL = "https://huggingface.co/api/whoami-v2"
19
  _HF_WHOAMI_TIMEOUT = 5 # seconds
20
 
21
 
22
+ def _get_hf_username(hf_token: str | None = None) -> str:
23
+ """Return the HF username for the given token.
24
 
25
  Uses subprocess + curl to avoid Python HTTP client IPv6 issues that
26
  cause 40+ second hangs (httpx/urllib try IPv6 first which times out
 
30
  import subprocess
31
  import time as _t
32
 
 
 
 
 
 
33
  if not hf_token:
34
+ logger.warning("No hf_token provided, using 'unknown' as username")
35
+ return "unknown"
 
36
 
37
  t0 = _t.monotonic()
38
  try:
 
54
  t1 = _t.monotonic()
55
  if result.returncode == 0 and result.stdout:
56
  data = json.loads(result.stdout)
57
+ username = data.get("name", "unknown")
58
+ logger.info(f"HF username resolved to '{username}' in {t1 - t0:.2f}s")
59
+ return username
 
60
  else:
61
  logger.warning(
62
  f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
63
  )
64
+ return "unknown"
65
  except Exception as e:
66
  t1 = _t.monotonic()
67
  logger.warning(f"HF whoami failed in {t1 - t0:.2f}s: {e}")
68
+ return "unknown"
 
 
69
 
70
 
71
  class ContextManager:
 
78
  untouched_messages: int = 5,
79
  tool_specs: list[dict[str, Any]] | None = None,
80
  prompt_file_suffix: str = "system_prompt_v3.yaml",
81
+ hf_token: str | None = None,
82
  ):
83
  self.system_prompt = self._load_system_prompt(
84
  tool_specs or [],
85
  prompt_file_suffix="system_prompt_v3.yaml",
86
+ hf_token=hf_token,
87
  )
88
  self.max_context = max_context
89
  self.compact_size = int(max_context * compact_size)
 
95
  self,
96
  tool_specs: list[dict[str, Any]],
97
  prompt_file_suffix: str = "system_prompt.yaml",
98
+ hf_token: str | None = None,
99
  ):
100
  """Load and render the system prompt from YAML file with Jinja2"""
101
  prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
 
111
  current_time = now.strftime("%H:%M:%S.%f")[:-3]
112
  current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
113
 
114
+ # Get HF user info from OAuth token
115
+ hf_user_info = _get_hf_username(hf_token)
116
 
117
  template = Template(template_str)
118
  return template.render(
agent/core/agent_loop.py CHANGED
@@ -19,8 +19,7 @@ from agent.tools.jobs_tool import CPU_FLAVORS
19
  logger = logging.getLogger(__name__)
20
 
21
  ToolCall = ChatCompletionMessageToolCall
22
- # Explicit inference token needed because litellm checks HF_TOKEN before
23
- # HUGGINGFACE_API_KEY, and HF_TOKEN (used for Hub ops) may lack inference permissions.
24
  _INFERENCE_API_KEY = os.environ.get("INFERENCE_TOKEN")
25
 
26
 
@@ -45,7 +44,7 @@ def _resolve_hf_router_params(model_name: str) -> dict:
45
 
46
  router_provider = parts[1]
47
  actual_model = parts[2]
48
- api_key = _INFERENCE_API_KEY or os.environ.get("HF_TOKEN")
49
 
50
  return {
51
  "model": f"openai/{actual_model}",
 
19
  logger = logging.getLogger(__name__)
20
 
21
  ToolCall = ChatCompletionMessageToolCall
22
+ # Explicit inference token for LLM API calls (separate from user OAuth tokens).
 
23
  _INFERENCE_API_KEY = os.environ.get("INFERENCE_TOKEN")
24
 
25
 
 
44
 
45
  router_provider = parts[1]
46
  actual_model = parts[2]
47
+ api_key = _INFERENCE_API_KEY
48
 
49
  return {
50
  "model": f"openai/{actual_model}",
agent/core/session.py CHANGED
@@ -80,7 +80,9 @@ class Session:
80
  config: Config | None = None,
81
  tool_router=None,
82
  context_manager: ContextManager | None = None,
 
83
  ):
 
84
  self.tool_router = tool_router
85
  tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
86
  self.context_manager = context_manager or ContextManager(
@@ -88,6 +90,7 @@ class Session:
88
  compact_size=0.1,
89
  untouched_messages=5,
90
  tool_specs=tool_specs,
 
91
  )
92
  self.event_queue = event_queue
93
  self.session_id = str(uuid.uuid4())
@@ -97,8 +100,6 @@ class Session:
97
  self.is_running = True
98
  self._cancelled = asyncio.Event()
99
  self.pending_approval: Optional[dict[str, Any]] = None
100
- # User's HF OAuth token — set by session_manager after construction
101
- self.hf_token: Optional[str] = None
102
  self.sandbox = None
103
 
104
  # Session trajectory logging
 
80
  config: Config | None = None,
81
  tool_router=None,
82
  context_manager: ContextManager | None = None,
83
+ hf_token: str | None = None,
84
  ):
85
+ self.hf_token: Optional[str] = hf_token
86
  self.tool_router = tool_router
87
  tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
88
  self.context_manager = context_manager or ContextManager(
 
90
  compact_size=0.1,
91
  untouched_messages=5,
92
  tool_specs=tool_specs,
93
+ hf_token=hf_token,
94
  )
95
  self.event_queue = event_queue
96
  self.session_id = str(uuid.uuid4())
 
100
  self.is_running = True
101
  self._cancelled = asyncio.Event()
102
  self.pending_approval: Optional[dict[str, Any]] = None
 
 
103
  self.sandbox = None
104
 
105
  # Session trajectory logging
agent/tools/dataset_tools.py CHANGED
@@ -6,7 +6,6 @@ to provide everything needed for ML tasks in a single tool call.
6
  """
7
 
8
  import asyncio
9
- import os
10
  from typing import Any, TypedDict
11
 
12
  import httpx
@@ -26,9 +25,8 @@ class SplitConfig(TypedDict):
26
  splits: list[str]
27
 
28
 
29
- def _get_headers() -> dict:
30
  """Get auth headers for private/gated datasets"""
31
- token = os.environ.get("HF_TOKEN")
32
  if token:
33
  return {"Authorization": f"Bearer {token}"}
34
  return {}
@@ -39,12 +37,13 @@ async def inspect_dataset(
39
  config: str | None = None,
40
  split: str | None = None,
41
  sample_rows: int = 3,
 
42
  ) -> ToolResult:
43
  """
44
  Get comprehensive dataset info in one call.
45
  All API calls made in parallel for speed.
46
  """
47
- headers = _get_headers()
48
  output_parts = []
49
  errors = []
50
 
@@ -424,14 +423,16 @@ HF_INSPECT_DATASET_TOOL_SPEC = {
424
  }
425
 
426
 
427
- async def hf_inspect_dataset_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
428
  """Handler for agent tool router"""
429
  try:
 
430
  result = await inspect_dataset(
431
  dataset=arguments["dataset"],
432
  config=arguments.get("config"),
433
  split=arguments.get("split"),
434
  sample_rows=min(arguments.get("sample_rows", 3), 10),
 
435
  )
436
  return result["formatted"], not result.get("isError", False)
437
  except Exception as e:
 
6
  """
7
 
8
  import asyncio
 
9
  from typing import Any, TypedDict
10
 
11
  import httpx
 
25
  splits: list[str]
26
 
27
 
28
+ def _get_headers(token: str | None = None) -> dict:
29
  """Get auth headers for private/gated datasets"""
 
30
  if token:
31
  return {"Authorization": f"Bearer {token}"}
32
  return {}
 
37
  config: str | None = None,
38
  split: str | None = None,
39
  sample_rows: int = 3,
40
+ hf_token: str | None = None,
41
  ) -> ToolResult:
42
  """
43
  Get comprehensive dataset info in one call.
44
  All API calls made in parallel for speed.
45
  """
46
+ headers = _get_headers(hf_token)
47
  output_parts = []
48
  errors = []
49
 
 
423
  }
424
 
425
 
426
+ async def hf_inspect_dataset_handler(arguments: dict[str, Any], session=None) -> tuple[str, bool]:
427
  """Handler for agent tool router"""
428
  try:
429
+ hf_token = session.hf_token if session else None
430
  result = await inspect_dataset(
431
  dataset=arguments["dataset"],
432
  config=arguments.get("config"),
433
  split=arguments.get("split"),
434
  sample_rows=min(arguments.get("sample_rows", 3), 10),
435
+ hf_token=hf_token,
436
  )
437
  return result["formatted"], not result.get("isError", False)
438
  except Exception as e:
agent/tools/docs_tools.py CHANGED
@@ -4,7 +4,6 @@ Documentation search tools for exploring HuggingFace and Gradio documentation.
4
 
5
  import asyncio
6
  import json
7
- import os
8
  from typing import Any
9
 
10
  import httpx
@@ -287,7 +286,9 @@ def _format_results(
287
  # ---------------------------------------------------------------------------
288
 
289
 
290
- async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
 
 
291
  """Explore documentation structure with optional search query."""
292
  endpoint = arguments.get("endpoint", "").lstrip("/")
293
  query = arguments.get("query")
@@ -316,9 +317,9 @@ async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]
316
  return f"Error fetching Gradio docs: {str(e)}", False
317
 
318
  # HF docs
319
- hf_token = os.environ.get("HF_TOKEN")
320
  if not hf_token:
321
- return "Error: HF_TOKEN environment variable not set", False
322
 
323
  try:
324
  max_results_int = int(max_results) if max_results is not None else None
@@ -378,15 +379,17 @@ async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]
378
  return f"Unexpected error: {str(e)}", False
379
 
380
 
381
- async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
 
 
382
  """Fetch full markdown content of a documentation page."""
383
  url = arguments.get("url", "")
384
  if not url:
385
  return "Error: No URL provided", False
386
 
387
- hf_token = os.environ.get("HF_TOKEN")
388
  if not hf_token:
389
- return "Error: HF_TOKEN environment variable not set", False
390
 
391
  if not url.endswith(".md"):
392
  url = f"{url}.md"
@@ -454,20 +457,30 @@ def _extract_all_endpoints(spec: dict[str, Any]) -> list[dict[str, Any]]:
454
  endpoints = []
455
  for path, path_item in spec.get("paths", {}).items():
456
  for method, op in path_item.items():
457
- if method not in ["get", "post", "put", "delete", "patch", "head", "options"]:
 
 
 
 
 
 
 
 
458
  continue
459
- endpoints.append({
460
- "path": path,
461
- "method": method.upper(),
462
- "operationId": op.get("operationId", ""),
463
- "summary": op.get("summary", ""),
464
- "description": op.get("description", ""),
465
- "tags": " ".join(op.get("tags", [])),
466
- "parameters": op.get("parameters", []),
467
- "request_body": op.get("requestBody", {}),
468
- "responses": op.get("responses", {}),
469
- "base_url": base_url,
470
- })
 
 
471
  return endpoints
472
 
473
 
@@ -511,7 +524,12 @@ async def _build_openapi_index() -> tuple[Any, MultifieldParser, list[dict[str,
511
  parser = MultifieldParser(
512
  ["summary", "description", "operationId", "tags", "param_names"],
513
  schema=schema,
514
- fieldboosts={"summary": 3.0, "operationId": 2.0, "description": 1.0, "tags": 1.5},
 
 
 
 
 
515
  group=OrGroup,
516
  )
517
 
@@ -532,11 +550,20 @@ async def _search_openapi(
532
  return [], "Query contained unsupported syntax."
533
 
534
  with index.searcher() as searcher:
535
- results = searcher.search(query_obj, limit=limit * 2) # Get extra for tag filtering
 
 
536
  matches = []
537
  for hit in results:
538
  # Find full endpoint data
539
- ep = next((e for e in endpoints if e["path"] == hit["path"] and e["method"] == hit["method"]), None)
 
 
 
 
 
 
 
540
  if ep is None:
541
  continue
542
  # Filter by tag if provided
@@ -713,7 +740,10 @@ async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
713
  query = arguments.get("query", "").strip() or None
714
 
715
  if not tag and not query:
716
- return "Error: Provide either 'query' (keyword search) or 'tag' (category filter), or both.", False
 
 
 
717
 
718
  try:
719
  note = None
@@ -724,7 +754,9 @@ async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
724
 
725
  # If Whoosh found results, return them
726
  if results:
727
- return _format_openapi_results(results, tag=tag, query=query, note=search_note), True
 
 
728
 
729
  # Whoosh found nothing - fall back to tag-based if tag provided
730
  if tag:
@@ -737,7 +769,9 @@ async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
737
  if tag:
738
  _, _, endpoints = await _build_openapi_index()
739
  results = [ep for ep in endpoints if tag in ep.get("tags", "")]
740
- return _format_openapi_results(results, tag=tag, query=None, note=note), True
 
 
741
 
742
  return "Error: No results found", False
743
 
 
4
 
5
  import asyncio
6
  import json
 
7
  from typing import Any
8
 
9
  import httpx
 
286
  # ---------------------------------------------------------------------------
287
 
288
 
289
+ async def explore_hf_docs_handler(
290
+ arguments: dict[str, Any], session=None
291
+ ) -> tuple[str, bool]:
292
  """Explore documentation structure with optional search query."""
293
  endpoint = arguments.get("endpoint", "").lstrip("/")
294
  query = arguments.get("query")
 
317
  return f"Error fetching Gradio docs: {str(e)}", False
318
 
319
  # HF docs
320
+ hf_token = session.hf_token if session else None
321
  if not hf_token:
322
+ return "Error: No HF token available (not logged in)", False
323
 
324
  try:
325
  max_results_int = int(max_results) if max_results is not None else None
 
379
  return f"Unexpected error: {str(e)}", False
380
 
381
 
382
+ async def hf_docs_fetch_handler(
383
+ arguments: dict[str, Any], session=None
384
+ ) -> tuple[str, bool]:
385
  """Fetch full markdown content of a documentation page."""
386
  url = arguments.get("url", "")
387
  if not url:
388
  return "Error: No URL provided", False
389
 
390
+ hf_token = session.hf_token if session else None
391
  if not hf_token:
392
+ return "Error: No HF token available (not logged in)", False
393
 
394
  if not url.endswith(".md"):
395
  url = f"{url}.md"
 
457
  endpoints = []
458
  for path, path_item in spec.get("paths", {}).items():
459
  for method, op in path_item.items():
460
+ if method not in [
461
+ "get",
462
+ "post",
463
+ "put",
464
+ "delete",
465
+ "patch",
466
+ "head",
467
+ "options",
468
+ ]:
469
  continue
470
+ endpoints.append(
471
+ {
472
+ "path": path,
473
+ "method": method.upper(),
474
+ "operationId": op.get("operationId", ""),
475
+ "summary": op.get("summary", ""),
476
+ "description": op.get("description", ""),
477
+ "tags": " ".join(op.get("tags", [])),
478
+ "parameters": op.get("parameters", []),
479
+ "request_body": op.get("requestBody", {}),
480
+ "responses": op.get("responses", {}),
481
+ "base_url": base_url,
482
+ }
483
+ )
484
  return endpoints
485
 
486
 
 
524
  parser = MultifieldParser(
525
  ["summary", "description", "operationId", "tags", "param_names"],
526
  schema=schema,
527
+ fieldboosts={
528
+ "summary": 3.0,
529
+ "operationId": 2.0,
530
+ "description": 1.0,
531
+ "tags": 1.5,
532
+ },
533
  group=OrGroup,
534
  )
535
 
 
550
  return [], "Query contained unsupported syntax."
551
 
552
  with index.searcher() as searcher:
553
+ results = searcher.search(
554
+ query_obj, limit=limit * 2
555
+ ) # Get extra for tag filtering
556
  matches = []
557
  for hit in results:
558
  # Find full endpoint data
559
+ ep = next(
560
+ (
561
+ e
562
+ for e in endpoints
563
+ if e["path"] == hit["path"] and e["method"] == hit["method"]
564
+ ),
565
+ None,
566
+ )
567
  if ep is None:
568
  continue
569
  # Filter by tag if provided
 
740
  query = arguments.get("query", "").strip() or None
741
 
742
  if not tag and not query:
743
+ return (
744
+ "Error: Provide either 'query' (keyword search) or 'tag' (category filter), or both.",
745
+ False,
746
+ )
747
 
748
  try:
749
  note = None
 
754
 
755
  # If Whoosh found results, return them
756
  if results:
757
+ return _format_openapi_results(
758
+ results, tag=tag, query=query, note=search_note
759
+ ), True
760
 
761
  # Whoosh found nothing - fall back to tag-based if tag provided
762
  if tag:
 
769
  if tag:
770
  _, _, endpoints = await _build_openapi_index()
771
  results = [ep for ep in endpoints if tag in ep.get("tags", "")]
772
+ return _format_openapi_results(
773
+ results, tag=tag, query=None, note=note
774
+ ), True
775
 
776
  return "Error: No results found", False
777
 
agent/tools/hf_repo_files_tool.py CHANGED
@@ -312,10 +312,11 @@ HF_REPO_FILES_TOOL_SPEC = {
312
  }
313
 
314
 
315
- async def hf_repo_files_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
316
  """Handler for agent tool router."""
317
  try:
318
- tool = HfRepoFilesTool()
 
319
  result = await tool.execute(arguments)
320
  return result["formatted"], not result.get("isError", False)
321
  except Exception as e:
 
312
  }
313
 
314
 
315
+ async def hf_repo_files_handler(arguments: Dict[str, Any], session=None) -> tuple[str, bool]:
316
  """Handler for agent tool router."""
317
  try:
318
+ hf_token = session.hf_token if session else None
319
+ tool = HfRepoFilesTool(hf_token=hf_token)
320
  result = await tool.execute(arguments)
321
  return result["formatted"], not result.get("isError", False)
322
  except Exception as e:
agent/tools/hf_repo_git_tool.py CHANGED
@@ -653,10 +653,11 @@ HF_REPO_GIT_TOOL_SPEC = {
653
  }
654
 
655
 
656
- async def hf_repo_git_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
657
  """Handler for agent tool router."""
658
  try:
659
- tool = HfRepoGitTool()
 
660
  result = await tool.execute(arguments)
661
  return result["formatted"], not result.get("isError", False)
662
  except Exception as e:
 
653
  }
654
 
655
 
656
+ async def hf_repo_git_handler(arguments: Dict[str, Any], session=None) -> tuple[str, bool]:
657
  """Handler for agent tool router."""
658
  try:
659
+ hf_token = session.hf_token if session else None
660
+ tool = HfRepoGitTool(hf_token=hf_token)
661
  result = await tool.execute(arguments)
662
  return result["formatted"], not result.get("isError", False)
663
  except Exception as e:
agent/tools/jobs_tool.py CHANGED
@@ -135,8 +135,7 @@ def _add_default_env(params: Dict[str, Any] | None) -> Dict[str, Any]:
135
  def _add_environment_variables(
136
  params: Dict[str, Any] | None, user_token: str | None = None
137
  ) -> Dict[str, Any]:
138
- # Prefer the authenticated user's OAuth token, fall back to global env var
139
- token = user_token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
140
 
141
  # Start with user-provided env vars, then force-set token last
142
  result = dict(params or {})
@@ -294,8 +293,11 @@ class HfJobsTool:
294
  log_callback: Optional[Callable[[str], Awaitable[None]]] = None,
295
  session: Any = None,
296
  tool_call_id: Optional[str] = None,
 
297
  ):
298
  self.hf_token = hf_token
 
 
299
  self.api = HfApi(token=hf_token)
300
  self.namespace = namespace
301
  self.log_callback = log_callback
@@ -520,7 +522,7 @@ class HfJobsTool:
520
  image=image,
521
  command=command,
522
  env=_add_default_env(args.get("env")),
523
- secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
524
  flavor=args.get("hardware_flavor", "cpu-basic"),
525
  timeout=args.get("timeout", "30m"),
526
  namespace=self.namespace,
@@ -752,7 +754,7 @@ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}
752
  command=command,
753
  schedule=schedule,
754
  env=_add_default_env(args.get("env")),
755
- secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
756
  flavor=args.get("hardware_flavor", "cpu-basic"),
757
  timeout=args.get("timeout", "30m"),
758
  namespace=self.namespace,
@@ -1055,17 +1057,15 @@ async def hf_jobs_handler(
1055
  return f"Failed to read {script} from sandbox: {result.error}", False
1056
  arguments = {**arguments, "script": result.output}
1057
 
1058
- # Prefer the authenticated user's OAuth token, fall back to global env
1059
- hf_token = (
1060
- (getattr(session, "hf_token", None) if session else None)
1061
- or os.environ.get("HF_TOKEN")
1062
- or os.environ.get("HUGGINGFACE_HUB_TOKEN")
1063
- )
1064
- namespace = os.environ.get("HF_NAMESPACE") or (HfApi(token=hf_token).whoami().get("name") if hf_token else None)
1065
 
1066
  tool = HfJobsTool(
1067
  namespace=namespace,
1068
- hf_token=hf_token,
 
1069
  log_callback=log_callback if session else None,
1070
  session=session,
1071
  tool_call_id=tool_call_id,
 
135
  def _add_environment_variables(
136
  params: Dict[str, Any] | None, user_token: str | None = None
137
  ) -> Dict[str, Any]:
138
+ token = user_token or ""
 
139
 
140
  # Start with user-provided env vars, then force-set token last
141
  result = dict(params or {})
 
293
  log_callback: Optional[Callable[[str], Awaitable[None]]] = None,
294
  session: Any = None,
295
  tool_call_id: Optional[str] = None,
296
+ user_token: Optional[str] = None,
297
  ):
298
  self.hf_token = hf_token
299
+ # user_token is injected into job secrets; hf_token is for API calls (job creation)
300
+ self.user_token = user_token or hf_token
301
  self.api = HfApi(token=hf_token)
302
  self.namespace = namespace
303
  self.log_callback = log_callback
 
522
  image=image,
523
  command=command,
524
  env=_add_default_env(args.get("env")),
525
+ secrets=_add_environment_variables(args.get("secrets"), self.user_token),
526
  flavor=args.get("hardware_flavor", "cpu-basic"),
527
  timeout=args.get("timeout", "30m"),
528
  namespace=self.namespace,
 
754
  command=command,
755
  schedule=schedule,
756
  env=_add_default_env(args.get("env")),
757
+ secrets=_add_environment_variables(args.get("secrets"), self.user_token),
758
  flavor=args.get("hardware_flavor", "cpu-basic"),
759
  timeout=args.get("timeout", "30m"),
760
  namespace=self.namespace,
 
1057
  return f"Failed to read {script} from sandbox: {result.error}", False
1058
  arguments = {**arguments, "script": result.output}
1059
 
1060
+ user_token = session.hf_token if session else None
1061
+ # HF_ADMIN_TOKEN creates jobs under the org; user token is injected into job secrets
1062
+ admin_token = os.environ.get("HF_ADMIN_TOKEN") or user_token
1063
+ namespace = os.environ.get("HF_NAMESPACE") or (HfApi(token=admin_token).whoami().get("name") if admin_token else None)
 
 
 
1064
 
1065
  tool = HfJobsTool(
1066
  namespace=namespace,
1067
+ hf_token=admin_token,
1068
+ user_token=user_token,
1069
  log_callback=log_callback if session else None,
1070
  session=session,
1071
  tool_call_id=tool_call_id,
agent/tools/sandbox_client.py CHANGED
@@ -37,7 +37,6 @@ Tools: bash, read, write, edit, upload
37
  from __future__ import annotations
38
 
39
  import io
40
- import os
41
  import sys
42
  import time
43
  import uuid
@@ -240,7 +239,6 @@ class Sandbox:
240
  _files_read: set = field(init=False, repr=False, default_factory=set)
241
 
242
  def __post_init__(self):
243
- self.token = self.token or os.environ.get("HF_TOKEN")
244
  slug = self.space_id.replace("/", "-")
245
  # Trailing slash is critical: httpx resolves relative paths against base_url.
246
  # Without it, client.get("health") resolves to /health instead of /api/health.
@@ -282,13 +280,12 @@ class Sandbox:
282
  hardware: Hardware tier (cpu-basic, t4-small, etc.).
283
  private: Whether the Space should be private.
284
  sleep_time: Auto-sleep after N seconds of inactivity.
285
- token: HF API token. Falls back to HF_TOKEN env var.
286
  wait_timeout: Max seconds to wait for Space to start (default: 300).
287
 
288
  Returns:
289
  A Sandbox instance connected to the running Space.
290
  """
291
- token = token or os.environ.get("HF_TOKEN")
292
  api = HfApi(token=token)
293
 
294
  base = name or "sandbox"
 
37
  from __future__ import annotations
38
 
39
  import io
 
40
  import sys
41
  import time
42
  import uuid
 
239
  _files_read: set = field(init=False, repr=False, default_factory=set)
240
 
241
  def __post_init__(self):
 
242
  slug = self.space_id.replace("/", "-")
243
  # Trailing slash is critical: httpx resolves relative paths against base_url.
244
  # Without it, client.get("health") resolves to /health instead of /api/health.
 
280
  hardware: Hardware tier (cpu-basic, t4-small, etc.).
281
  private: Whether the Space should be private.
282
  sleep_time: Auto-sleep after N seconds of inactivity.
283
+ token: HF API token (from user's OAuth session).
284
  wait_timeout: Max seconds to wait for Space to start (default: 300).
285
 
286
  Returns:
287
  A Sandbox instance connected to the running Space.
288
  """
 
289
  api = HfApi(token=token)
290
 
291
  base = name or "sandbox"
agent/tools/sandbox_tool.py CHANGED
@@ -12,7 +12,6 @@ a cpu-basic sandbox is auto-created (no approval needed).
12
  from __future__ import annotations
13
 
14
  import asyncio
15
- import os
16
  from typing import Any
17
 
18
  from huggingface_hub import HfApi, SpaceHardware
@@ -38,11 +37,7 @@ async def _ensure_sandbox(
38
  if not session:
39
  return None, "No session available."
40
 
41
- token = (
42
- getattr(session, "hf_token", None)
43
- or os.environ.get("HF_TOKEN")
44
- or os.environ.get("HUGGINGFACE_HUB_TOKEN")
45
- )
46
  if not token:
47
  return None, "No HF token available. Cannot create sandbox."
48
 
 
12
  from __future__ import annotations
13
 
14
  import asyncio
 
15
  from typing import Any
16
 
17
  from huggingface_hub import HfApi, SpaceHardware
 
37
  if not session:
38
  return None, "No session available."
39
 
40
+ token = session.hf_token
 
 
 
 
41
  if not token:
42
  return None, "No HF token available. Cannot create sandbox."
43
 
backend/main.py CHANGED
@@ -6,20 +6,14 @@ from contextlib import asynccontextmanager
6
  from pathlib import Path
7
 
8
  from dotenv import load_dotenv
9
-
10
- load_dotenv()
11
-
12
- # Ensure HF_TOKEN is set — fall back to HF_ADMIN_TOKEN if available (HF Spaces)
13
- if not os.environ.get("HF_TOKEN") and os.environ.get("HF_ADMIN_TOKEN"):
14
- os.environ["HF_TOKEN"] = os.environ["HF_ADMIN_TOKEN"]
15
-
16
  from fastapi import FastAPI
17
  from fastapi.middleware.cors import CORSMiddleware
18
  from fastapi.staticfiles import StaticFiles
19
-
20
  from routes.agent import router as agent_router
21
  from routes.auth import router as auth_router
22
 
 
 
23
  # Configure logging
24
  logging.basicConfig(
25
  level=logging.INFO,
 
6
  from pathlib import Path
7
 
8
  from dotenv import load_dotenv
 
 
 
 
 
 
 
9
  from fastapi import FastAPI
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.staticfiles import StaticFiles
 
12
  from routes.agent import router as agent_router
13
  from routes.auth import router as auth_router
14
 
15
+ load_dotenv()
16
+
17
  # Configure logging
18
  logging.basicConfig(
19
  level=logging.INFO,
backend/session_manager.py CHANGED
@@ -132,16 +132,16 @@ class SessionManager:
132
  def _create_session_sync():
133
  t0 = _time.monotonic()
134
  tool_router = ToolRouter(self.config.mcpServers)
135
- session = Session(event_queue, config=self.config, tool_router=tool_router)
 
 
 
136
  t1 = _time.monotonic()
137
  logger.info(f"Session initialized in {t1 - t0:.2f}s")
138
  return tool_router, session
139
 
140
  tool_router, session = await asyncio.to_thread(_create_session_sync)
141
 
142
- # Store user's HF token on the session so tools can use it
143
- session.hf_token = hf_token
144
-
145
  # Create wrapper
146
  agent_session = AgentSession(
147
  session_id=session_id,
 
132
  def _create_session_sync():
133
  t0 = _time.monotonic()
134
  tool_router = ToolRouter(self.config.mcpServers)
135
+ session = Session(
136
+ event_queue, config=self.config, tool_router=tool_router,
137
+ hf_token=hf_token,
138
+ )
139
  t1 = _time.monotonic()
140
  logger.info(f"Session initialized in {t1 - t0:.2f}s")
141
  return tool_router, session
142
 
143
  tool_router, session = await asyncio.to_thread(_create_session_sync)
144
 
 
 
 
145
  # Create wrapper
146
  agent_session = AgentSession(
147
  session_id=session_id,