akseljoonas HF Staff commited on
Commit
9934918
·
1 Parent(s): 75cb10f

deleted help style call, added it to tool descritption and autoloaded HF token to env

Browse files
Files changed (1) hide show
  1. agent/tools/jobs_tool.py +65 -154
agent/tools/jobs_tool.py CHANGED
@@ -10,7 +10,7 @@ import os
10
  from typing import Any, Dict, Literal, Optional
11
 
12
  from huggingface_hub import HfApi
13
- from huggingface_hub.utils import HfHubHTTPError
14
 
15
  from agent.tools.types import ToolResult
16
  from agent.tools.utilities import (
@@ -64,26 +64,21 @@ OperationType = Literal[
64
  UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
65
 
66
 
67
- def _substitute_hf_token(params: Dict[str, Any] | None) -> Dict[str, Any] | None:
68
  """
69
- Substitute HF_TOKEN key with actual token value from environment.
70
 
71
  Args:
72
- params: Dictionary that may contain "HF_TOKEN" as a key
73
 
74
  Returns:
75
- Dictionary with HF_TOKEN value substituted from environment
76
  """
77
- print("DEBUG !! : ", params)
78
- if params is None:
79
- return None
80
 
81
- result = {}
82
- for key, value in params.items():
83
- if key == "HF_TOKEN":
84
- result[key] = os.environ.get("HF_TOKEN", "")
85
- else:
86
- result[key] = value
87
 
88
  return result
89
 
@@ -231,17 +226,18 @@ class HfJobsTool:
231
  operation = params.get("operation")
232
  args = params.get("args", {})
233
 
234
- # If no operation provided, return usage instructions
235
  if not operation:
236
- return self._show_help()
 
 
 
 
 
237
 
238
  # Normalize operation name
239
  operation = operation.lower()
240
 
241
- # Check if help is requested
242
- if args.get("help"):
243
- return self._show_operation_help(operation)
244
-
245
  try:
246
  # Route to appropriate handler
247
  if operation == "run":
@@ -298,104 +294,6 @@ class HfJobsTool:
298
  "isError": True,
299
  }
300
 
301
- def _show_help(self) -> ToolResult:
302
- """Show usage instructions when tool is called with no arguments"""
303
- cpu_flavors_list = ", ".join(CPU_FLAVORS)
304
- gpu_flavors_list = ", ".join(GPU_FLAVORS)
305
- specialized_flavors_list = ", ".join(SPECIALIZED_FLAVORS)
306
-
307
- hardware_section = f"**CPU:** {cpu_flavors_list}\n"
308
- if GPU_FLAVORS:
309
- hardware_section += f"**GPU:** {gpu_flavors_list}\n"
310
- if SPECIALIZED_FLAVORS:
311
- hardware_section += f"**Specialized:** {specialized_flavors_list}"
312
-
313
- usage_text = f"""# HuggingFace Jobs API
314
-
315
- Manage compute jobs on Hugging Face infrastructure.
316
-
317
- ## Available Commands
318
-
319
- ### Job Management
320
- - **run** - Run a job with a Docker image
321
- - **uv** - Run a Python script with UV (inline dependencies)
322
- - **ps** - List jobs
323
- - **logs** - Fetch job logs
324
- - **inspect** - Get detailed job information
325
- - **cancel** - Cancel a running job
326
-
327
- ### Scheduled Jobs
328
- - **scheduled run** - Create a scheduled job
329
- - **scheduled uv** - Create a scheduled UV job
330
- - **scheduled ps** - List scheduled jobs
331
- - **scheduled inspect** - Get scheduled job details
332
- - **scheduled delete** - Delete a scheduled job
333
- - **scheduled suspend** - Pause a scheduled job
334
- - **scheduled resume** - Resume a suspended job
335
-
336
- ## Examples
337
-
338
- ### Run a simple job
339
- Call this tool with:
340
- ```json
341
- {{
342
- "operation": "run",
343
- "args": {{
344
- "image": "python:3.12",
345
- "command": ["python", "-c", "print('Hello from HF Jobs!')"],
346
- "flavor": "cpu-basic"
347
- }}
348
- }}
349
- ```
350
-
351
- ### Run a Python script with UV
352
- Call this tool with:
353
- ```json
354
- {{
355
- "operation": "uv",
356
- "args": {{
357
- "script": "import random\\nprint(42 + random.randint(1, 5))",
358
- "dependencies": ["torch", "huggingface_hub"],
359
- "secrets": {{"HF_TOKEN": "$HF_TOKEN"}}
360
- }}
361
- }}
362
- ```
363
-
364
- ## Hardware Flavors
365
-
366
- {hardware_section}
367
-
368
- ## Command Format Guidelines
369
-
370
- **Array format (default):**
371
- - Recommended for every command—JSON keeps arguments intact (URLs with `&`, spaces, etc.)
372
- - Use `["/bin/sh", "-lc", "..."]` when you need shell operators like `&&`, `|`, or redirections
373
- - Works with any language: Python, bash, node, npm, uv, etc.
374
-
375
- **String format (simple cases only):**
376
- - Still accepted for backwards compatibility, parsed with POSIX shell semantics
377
- - Rejects shell operators and can mis-handle characters such as `&`; switch to arrays when things turn complex
378
-
379
- ### Show command-specific help
380
- Call this tool with:
381
- ```json
382
- {{"operation": "<operation>", "args": {{"help": true}}}}
383
- ```
384
-
385
- ## Tips
386
-
387
- - Jobs default to non-detached mode (stream logs until completion). Set `detach: true` to return immediately.
388
- - Prefer array commands to avoid shell parsing surprises
389
- - To access, create, or modify private Hub assets (spaces, private models, datasets, collections), pass `secrets: {{ "HF_TOKEN": "$HF_TOKEN" }}`. This is important. Without it, you will encounter authentification issues. Do not assume the user is connected on the jobs' server.
390
- - Before calling a job, think about dependencies (they must be specified), which hardware flavor to run on (choose simplest for task), and whether to include secrets.
391
- """
392
- return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
393
-
394
- def _show_operation_help(self, operation: str) -> ToolResult:
395
- """Show help for a specific operation"""
396
- help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
397
- return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
398
-
399
  async def _wait_for_job_completion(
400
  self, job_id: str, namespace: Optional[str] = None
401
  ) -> tuple[str, list[str]]:
@@ -430,26 +328,14 @@ Call this tool with:
430
  self.api.run_job,
431
  image=args.get("image", "python:3.12"),
432
  command=args.get("command"),
433
- env=_substitute_hf_token(args.get("env")),
434
- secrets=_substitute_hf_token(args.get("secrets")),
435
  flavor=args.get("flavor", "cpu-basic"),
436
  timeout=args.get("timeout", "30m"),
437
  namespace=args.get("namespace") or self.namespace,
438
  )
439
 
440
- # If detached, return immediately
441
- if args.get("detach", False):
442
- response = f"""Job started successfully!
443
-
444
- **Job ID:** {job.id}
445
- **Status:** {job.status.stage}
446
- **View at:** {job.url}
447
-
448
- To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "{job.id}"}}}}`
449
- To inspect, call this tool with `{{"operation": "inspect", "args": {{"job_id": "{job.id}"}}}}`"""
450
- return {"formatted": response, "totalResults": 1, "resultsShared": 1}
451
-
452
- # Not detached - wait for completion and stream logs
453
  print(f"Job started: {job.url}")
454
  print("Streaming logs...\n---\n")
455
 
@@ -504,25 +390,14 @@ To inspect, call this tool with `{{"operation": "inspect", "args": {{"job_id": "
504
  self.api.run_job,
505
  image=UV_DEFAULT_IMAGE,
506
  command=command,
507
- env=_substitute_hf_token(args.get("env")),
508
- secrets=_substitute_hf_token(args.get("secrets")),
509
  flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
510
  timeout=args.get("timeout", "30m"),
511
  namespace=args.get("namespace") or self.namespace,
512
  )
513
 
514
- # If detached, return immediately
515
- if args.get("detach", False):
516
- response = f"""UV Job started successfully!
517
-
518
- **Job ID:** {job.id}
519
- **Status:** {job.status.stage}
520
- **View at:** {job.url}
521
-
522
- To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "{job.id}"}}}}`"""
523
- return {"formatted": response, "totalResults": 1, "resultsShared": 1}
524
-
525
- # Not detached - wait for completion and stream logs
526
  print(f"UV Job started: {job.url}")
527
  print("Streaming logs...\n---\n")
528
 
@@ -693,8 +568,8 @@ To verify, call this tool with `{{"operation": "inspect", "args": {{"job_id": "{
693
  image=args.get("image", "python:3.12"),
694
  command=args.get("command"),
695
  schedule=args.get("schedule"),
696
- env=_substitute_hf_token(args.get("env")),
697
- secrets=_substitute_hf_token(args.get("secrets")),
698
  flavor=args.get("flavor", "cpu-basic"),
699
  timeout=args.get("timeout", "30m"),
700
  namespace=args.get("namespace") or self.namespace,
@@ -750,8 +625,8 @@ To list all, call this tool with `{{"operation": "scheduled ps"}}`"""
750
  image=UV_DEFAULT_IMAGE,
751
  command=command,
752
  schedule=schedule,
753
- env=_substitute_hf_token(args.get("env")),
754
- secrets=_substitute_hf_token(args.get("secrets")),
755
  flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
756
  timeout=args.get("timeout", "30m"),
757
  namespace=args.get("namespace") or self.namespace,
@@ -911,8 +786,44 @@ HF_JOBS_TOOL_SPEC = {
911
  "description": (
912
  "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
913
  "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
914
- "Example hardware/flavor: cpu-basic, cpu-performance, t4-medium. "
915
- "Call this tool with no operation for full usage instructions and examples."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
916
  ),
917
  "parameters": {
918
  "type": "object",
@@ -958,7 +869,7 @@ HF_JOBS_TOOL_SPEC = {
958
  async def hf_jobs_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
959
  """Handler for agent tool router"""
960
  try:
961
- tool = HfJobsTool()
962
  result = await tool.execute(arguments)
963
  return result["formatted"], not result.get("isError", False)
964
  except Exception as e:
 
10
  from typing import Any, Dict, Literal, Optional
11
 
12
  from huggingface_hub import HfApi
13
+ from huggingface_hub.utils import HfHubHTTPError, get_token_to_send
14
 
15
  from agent.tools.types import ToolResult
16
  from agent.tools.utilities import (
 
64
  UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
65
 
66
 
67
+ def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
68
  """
69
+ Automatically adds selected environment variables to the parameters passed by LLM.
70
 
71
  Args:
72
+ params: Dictionary that may contain "HF_TOKEN" and other environment variables as keys
73
 
74
  Returns:
75
+ Dictionary with environment variables added
76
  """
 
 
 
77
 
78
+ result = {"HF_TOKEN": get_token_to_send(os.environ.get("HF_TOKEN", ""))}
79
+
80
+ if params:
81
+ result.update(params)
 
 
82
 
83
  return result
84
 
 
226
  operation = params.get("operation")
227
  args = params.get("args", {})
228
 
229
+ # If no operation provided, return error
230
  if not operation:
231
+ return {
232
+ "formatted": "Error: 'operation' parameter is required. See tool description for available operations and usage examples.",
233
+ "totalResults": 0,
234
+ "resultsShared": 0,
235
+ "isError": True,
236
+ }
237
 
238
  # Normalize operation name
239
  operation = operation.lower()
240
 
 
 
 
 
241
  try:
242
  # Route to appropriate handler
243
  if operation == "run":
 
294
  "isError": True,
295
  }
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  async def _wait_for_job_completion(
298
  self, job_id: str, namespace: Optional[str] = None
299
  ) -> tuple[str, list[str]]:
 
328
  self.api.run_job,
329
  image=args.get("image", "python:3.12"),
330
  command=args.get("command"),
331
+ env=_add_environment_variables(args.get("env")),
332
+ secrets=_add_environment_variables(args.get("secrets")),
333
  flavor=args.get("flavor", "cpu-basic"),
334
  timeout=args.get("timeout", "30m"),
335
  namespace=args.get("namespace") or self.namespace,
336
  )
337
 
338
+ # Wait for completion and stream logs
 
 
 
 
 
 
 
 
 
 
 
 
339
  print(f"Job started: {job.url}")
340
  print("Streaming logs...\n---\n")
341
 
 
390
  self.api.run_job,
391
  image=UV_DEFAULT_IMAGE,
392
  command=command,
393
+ env=_add_environment_variables(args.get("env")),
394
+ secrets=_add_environment_variables(args.get("secrets")),
395
  flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
396
  timeout=args.get("timeout", "30m"),
397
  namespace=args.get("namespace") or self.namespace,
398
  )
399
 
400
+ # Wait for completion and stream logs
 
 
 
 
 
 
 
 
 
 
 
401
  print(f"UV Job started: {job.url}")
402
  print("Streaming logs...\n---\n")
403
 
 
568
  image=args.get("image", "python:3.12"),
569
  command=args.get("command"),
570
  schedule=args.get("schedule"),
571
+ env=_add_environment_variables(args.get("env")),
572
+ secrets=_add_environment_variables(args.get("secrets")),
573
  flavor=args.get("flavor", "cpu-basic"),
574
  timeout=args.get("timeout", "30m"),
575
  namespace=args.get("namespace") or self.namespace,
 
625
  image=UV_DEFAULT_IMAGE,
626
  command=command,
627
  schedule=schedule,
628
+ env=_add_environment_variables(args.get("env")),
629
+ secrets=_add_environment_variables(args.get("secrets")),
630
  flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
631
  timeout=args.get("timeout", "30m"),
632
  namespace=args.get("namespace") or self.namespace,
 
786
  "description": (
787
  "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
788
  "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
789
+ "Call this tool with no operation for full usage instructions and examples.\n\n"
790
+ "## Available Operations\n"
791
+ "**Job Management:**\n"
792
+ "- run: Run job with Docker image\n"
793
+ "- uv: Run Python script with inline dependencies (recommended for Python)\n"
794
+ "- ps: List jobs (shows running by default, use args: {'all': true} for all)\n"
795
+ "- logs: Fetch job logs (args: {'job_id': 'xxx'})\n"
796
+ "- inspect: Get job details (args: {'job_id': 'xxx'})\n"
797
+ "- cancel: Cancel running job (args: {'job_id': 'xxx'})\n\n"
798
+ "**Scheduled Jobs:**\n"
799
+ "- same functionality as Job Management, but recurring periodically\n"
800
+ "- schedule: One of '@annually', '@yearly', '@monthly', '@weekly', '@daily', '@hourly', or a CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).\n"
801
+ "- scheduled run/uv/ps/inspect/delete/suspend/resume\n\n"
802
+ "## Available Hardware Flavors\n"
803
+ "**CPU:** cpu-basic, cpu-upgrade, cpu-performance, cpu-xl\n"
804
+ "**GPU:** t4-small, t4-medium, l4x1, l4x4, a10g-small, a10g-large, a10g-largex2, a10g-largex4, a100-large, h100, h100x8\n"
805
+ "**Specialized:** inf2x6\n\n"
806
+ "## Usage Examples\n"
807
+ "**Run Python with UV (recommended):**\n"
808
+ "{'operation': 'uv', 'args': {'script': 'import torch\\nprint(torch.cuda.is_available())', "
809
+ "'dependencies': ['torch', 'transformers'], 'flavor': 'a10g-small', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}}\n\n"
810
+ "**Run Docker command:**\n"
811
+ "{'operation': 'run', 'args': {'image': 'python:3.12', 'command': ['python', '-c', 'print(42)'], 'flavor': 'cpu-basic'}}\n\n"
812
+ "**List running jobs:**\n"
813
+ "{'operation': 'ps'}\n\n"
814
+ "## Key Parameters\n"
815
+ "- script: Python code (for uv) - can be inline string, URL, or file path\n"
816
+ "- dependencies/packages: List of pip packages (for uv)\n"
817
+ "- command: Array format ['cmd', 'arg1', 'arg2'] (for run)\n"
818
+ "- flavor/hardware: Choose appropriate size (default: cpu-basic)\n"
819
+ "- secrets: {'HF_TOKEN': '$HF_TOKEN'} for Hub access\n"
820
+ "- timeout: Max runtime (default: '30m')\n\n"
821
+ "## Important Notes\n"
822
+ "- **CRITICAL: Job files are EPHEMERAL** - ALL files created in HF Jobs (trained models, datasets, outputs, completions etc.) are DELETED when the job completes. You MUST upload any outputs to HF Hub in the script itself (using model.push_to_hub() when training models, dataset.push_to_hub() when creating text based outputs, etc.)."
823
+ "- Always pass full script content - no local files available on server\n"
824
+ "- Use array format for commands: ['/bin/sh', '-lc', 'cmd'] for shell features\n"
825
+ "- hf-transfer is auto-included in uv jobs for faster downloads\n"
826
+ "- **Remember to upload outputs to Hub before job finishes!**"
827
  ),
828
  "parameters": {
829
  "type": "object",
 
869
  async def hf_jobs_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
870
  """Handler for agent tool router"""
871
  try:
872
+ tool = HfJobsTool(namespace=os.environ.get("HF_NAMESPACE", ""))
873
  result = await tool.execute(arguments)
874
  return result["formatted"], not result.get("isError", False)
875
  except Exception as e: