Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit Β·
103298c
1
Parent(s): a644598
feat: use hf_agent tool descriptions, hardware flavors, and default env
Browse files- agent/tools/dataset_tools.py +9 -16
- agent/tools/docs_tools.py +10 -21
- agent/tools/github_find_examples.py +10 -49
- agent/tools/github_read_file.py +6 -52
- agent/tools/jobs_tool.py +77 -89
- agent/tools/plan_tool.py +5 -12
agent/tools/dataset_tools.py
CHANGED
|
@@ -388,22 +388,15 @@ def _format_parquet_files(data: dict, max_rows: int = 10) -> str | None:
|
|
| 388 |
HF_INSPECT_DATASET_TOOL_SPEC = {
|
| 389 |
"name": "hf_inspect_dataset",
|
| 390 |
"description": (
|
| 391 |
-
"Inspect a
|
| 392 |
-
"
|
| 393 |
-
"
|
| 394 |
-
"
|
| 395 |
-
"
|
| 396 |
-
"
|
| 397 |
-
"
|
| 398 |
-
"
|
| 399 |
-
"
|
| 400 |
-
"- Column names for your dataloader\n"
|
| 401 |
-
"- Data types and format\n"
|
| 402 |
-
"- Available splits (train/test/validation)\n\n"
|
| 403 |
-
"Supports private/gated datasets when HF_TOKEN is set.\n\n"
|
| 404 |
-
"## Examples\n"
|
| 405 |
-
'{"dataset": "stanfordnlp/imdb"}\n'
|
| 406 |
-
'{"dataset": "nyu-mll/glue", "config": "mrpc", "sample_rows": 5}\n'
|
| 407 |
),
|
| 408 |
"parameters": {
|
| 409 |
"type": "object",
|
|
|
|
| 388 |
HF_INSPECT_DATASET_TOOL_SPEC = {
|
| 389 |
"name": "hf_inspect_dataset",
|
| 390 |
"description": (
|
| 391 |
+
"Inspect a HF dataset in one call: status, configs/splits, schema, sample rows, parquet info.\n\n"
|
| 392 |
+
"REQUIRED before any training job to verify dataset format matches training method:\n"
|
| 393 |
+
" SFT: needs 'messages', 'text', or 'prompt'/'completion'\n"
|
| 394 |
+
" DPO: needs 'prompt', 'chosen', 'rejected'\n"
|
| 395 |
+
" GRPO: needs 'prompt'\n"
|
| 396 |
+
"All datasets used for training have to be in conversational ChatML format to be compatible with HF libraries.'\n"
|
| 397 |
+
"Training will fail with KeyError if columns don't match.\n\n"
|
| 398 |
+
"Also use to get example datapoints, understand column names, data types, and available splits before writing any data loading code. "
|
| 399 |
+
"Supports private/gated datasets when HF_TOKEN is set."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
),
|
| 401 |
"parameters": {
|
| 402 |
"type": "object",
|
agent/tools/docs_tools.py
CHANGED
|
@@ -845,17 +845,12 @@ DOC_ENDPOINTS = [
|
|
| 845 |
EXPLORE_HF_DOCS_TOOL_SPEC = {
|
| 846 |
"name": "explore_hf_docs",
|
| 847 |
"description": (
|
| 848 |
-
"
|
| 849 |
-
"
|
| 850 |
-
"
|
| 851 |
-
"
|
| 852 |
-
"
|
| 853 |
-
"
|
| 854 |
-
"**Pattern:** explore (discover structure) β fetch_hf_docs (get details) β implement with researched approach. "
|
| 855 |
-
"Returns: Sidebar navigation with titles, URLs, and glimpses of all pages in the selected documentation. "
|
| 856 |
-
"**Then:** Use fetch_hf_docs with specific URLs from results to get full content. "
|
| 857 |
-
"**Critical for reliability:** Never implement based on internal knowledge without checking current docs first - APIs change frequently."
|
| 858 |
-
" By default returns the top 20 results; set max_results (max 50) to adjust."
|
| 859 |
),
|
| 860 |
"parameters": {
|
| 861 |
"type": "object",
|
|
@@ -928,16 +923,10 @@ EXPLORE_HF_DOCS_TOOL_SPEC = {
|
|
| 928 |
HF_DOCS_FETCH_TOOL_SPEC = {
|
| 929 |
"name": "fetch_hf_docs",
|
| 930 |
"description": (
|
| 931 |
-
"Fetch full markdown content of
|
| 932 |
-
"
|
| 933 |
-
"
|
| 934 |
-
"
|
| 935 |
-
"(5) Need parameter descriptions and usage patterns. "
|
| 936 |
-
"**Pattern:** explore_hf_docs (find relevant page) β fetch_hf_docs (get full content) β implement using documented approach. "
|
| 937 |
-
"Provide full URL from explore_hf_docs results (e.g., 'https://huggingface.co/docs/trl/sft_trainer'). "
|
| 938 |
-
"Returns: Complete markdown documentation with examples, parameters, and usage patterns. "
|
| 939 |
-
"**For training tasks:** ALWAYS fetch trainer docs (SFTConfig, DPOConfig, etc.) before creating training scripts. "
|
| 940 |
-
"**Critical for reliability:** This ensures you use current APIs and best practices."
|
| 941 |
),
|
| 942 |
"parameters": {
|
| 943 |
"type": "object",
|
|
|
|
| 845 |
EXPLORE_HF_DOCS_TOOL_SPEC = {
|
| 846 |
"name": "explore_hf_docs",
|
| 847 |
"description": (
|
| 848 |
+
"Browse HF documentation structure β discover all available documentation with 200-char previews.\n\n"
|
| 849 |
+
"Use this to find relevant documentation and/or examples with detailed parameter docs and API reference. "
|
| 850 |
+
"To be used together with github_find_examples and github_read_file to find working examples and documentation.\n\n"
|
| 851 |
+
"Pattern: explore_hf_docs (find relevant pages) β fetch_hf_docs (get full content).\n\n"
|
| 852 |
+
"For training tasks: fetch the trainer config docs (SFTConfig, DPOConfig, GRPOConfig) to verify parameter names. "
|
| 853 |
+
"Returns top 20 results by default; set max_results (max 50) to adjust."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 854 |
),
|
| 855 |
"parameters": {
|
| 856 |
"type": "object",
|
|
|
|
| 923 |
HF_DOCS_FETCH_TOOL_SPEC = {
|
| 924 |
"name": "fetch_hf_docs",
|
| 925 |
"description": (
|
| 926 |
+
"Fetch full markdown content of an HF documentation page. Use after explore_hf_docs.\n\n"
|
| 927 |
+
"Critical for finding documentation e.g. current trainer configuration parameters (SFTConfig, DPOConfig, etc.) "
|
| 928 |
+
"Use for researching solutions and before writing training scripts. Your internal knowledge is outdated.\n\n"
|
| 929 |
+
"Provide the full URL from explore_hf_docs results. The .md extension is added automatically."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 930 |
),
|
| 931 |
"parameters": {
|
| 932 |
"type": "object",
|
agent/tools/github_find_examples.py
CHANGED
|
@@ -405,55 +405,16 @@ def find_examples(
|
|
| 405 |
GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
|
| 406 |
"name": "github_find_examples",
|
| 407 |
"description": (
|
| 408 |
-
"
|
| 409 |
-
"
|
| 410 |
-
"
|
| 411 |
-
"
|
| 412 |
-
"
|
| 413 |
-
"
|
| 414 |
-
"
|
| 415 |
-
"
|
| 416 |
-
"
|
| 417 |
-
"
|
| 418 |
-
"## How it works\n\n"
|
| 419 |
-
"1. Fetches all example files (examples/, scripts/, tutorials/, demos/, notebooks/, etc.) from repository\n"
|
| 420 |
-
"2. If keyword provided, scores files against keyword using fuzzy matching\n"
|
| 421 |
-
"3. Returns best matches sorted by relevance and pattern priority\n"
|
| 422 |
-
"4. Provides copyable parameters for github_read_file tool\n\n"
|
| 423 |
-
"## Examples\n\n"
|
| 424 |
-
"<example>\n"
|
| 425 |
-
"// ML Workflow Step: Find GRPO training examples before implementation\n"
|
| 426 |
-
"// Task: Starting GRPO fine-tuning project, need reference implementation\n"
|
| 427 |
-
"{\n"
|
| 428 |
-
" keyword: 'grpo',\n"
|
| 429 |
-
" repo: 'trl',\n"
|
| 430 |
-
" org: 'huggingface'\n"
|
| 431 |
-
"}\n"
|
| 432 |
-
"// Returns: examples/scripts/grpo_agent.py, examples/scripts/grpo_vlm.py\n"
|
| 433 |
-
"// Next step: github_read_file to study working implementation\n"
|
| 434 |
-
"</example>\n\n"
|
| 435 |
-
"<example>\n"
|
| 436 |
-
"// ML Workflow Step: Discover all available training methods\n"
|
| 437 |
-
"// Task: Exploring TRL training options before choosing approach\n"
|
| 438 |
-
"{\n"
|
| 439 |
-
" repo: 'trl',\n"
|
| 440 |
-
" org: 'huggingface',\n"
|
| 441 |
-
" max_results: 20\n"
|
| 442 |
-
"}\n"
|
| 443 |
-
"// Lists: SFT, DPO, GRPO, PPO, reward modeling examples\n"
|
| 444 |
-
"// Helps user choose appropriate method\n"
|
| 445 |
-
"</example>\n\n"
|
| 446 |
-
"<example>\n"
|
| 447 |
-
"// ML Workflow Step: Find LoRA fine-tuning examples\n"
|
| 448 |
-
"// Task: Learning parameter-efficient fine-tuning patterns\n"
|
| 449 |
-
"{\n"
|
| 450 |
-
" keyword: 'lora',\n"
|
| 451 |
-
" repo: 'peft',\n"
|
| 452 |
-
" org: 'huggingface'\n"
|
| 453 |
-
"}\n"
|
| 454 |
-
"// Discovers LoRA configuration and training examples\n"
|
| 455 |
-
"// Shows current PEFT API usage patterns\n"
|
| 456 |
-
"</example>"
|
| 457 |
),
|
| 458 |
"parameters": {
|
| 459 |
"type": "object",
|
|
|
|
| 405 |
GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
|
| 406 |
"name": "github_find_examples",
|
| 407 |
"description": (
|
| 408 |
+
"Find working example scripts in GitHub repositories (from a list of predetermined directories e.g. examples/, scripts/, tutorials/, etc.). "
|
| 409 |
+
"Uses fuzzy keyword matching.\n\n"
|
| 410 |
+
"MANDATORY before writing any ML training, fine-tuning, or inference code. "
|
| 411 |
+
"Your internal knowledge of library APIs is outdated β working examples show current API patterns.\n\n"
|
| 412 |
+
"Sequence: github_find_examples β github_read_file (study the example) β implement based on what you found.\n\n"
|
| 413 |
+
"Skip this only for: simple data queries, status checks, non-code tasks.\n\n"
|
| 414 |
+
"Examples:\n"
|
| 415 |
+
" {keyword: 'sft', repo: 'trl'} β finds examples/scripts/sft.py\n"
|
| 416 |
+
" {keyword: 'grpo', repo: 'trl'} β finds GRPO training examples\n"
|
| 417 |
+
" {repo: 'trl', max_results: 20} β lists all available training method examples"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
),
|
| 419 |
"parameters": {
|
| 420 |
"type": "object",
|
agent/tools/github_read_file.py
CHANGED
|
@@ -250,59 +250,13 @@ def read_file(
|
|
| 250 |
GITHUB_READ_FILE_TOOL_SPEC = {
|
| 251 |
"name": "github_read_file",
|
| 252 |
"description": (
|
| 253 |
-
"Read file contents from GitHub repositories
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
| 258 |
-
"**Pattern:** github_find_examples (discover files) β github_read_file (read code) β implement using researched patterns. "
|
| 259 |
-
"Returns: File contents with line numbers, formatted for LLM reading. Auto-converts Jupyter notebooks to markdown. "
|
| 260 |
-
"**Then:** Implement using patterns and APIs from the example code. "
|
| 261 |
-
"**Critical for reliability:** Reading working examples prevents API errors and shows current best practices. "
|
| 262 |
"Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n"
|
| 263 |
-
"
|
| 264 |
-
"- When reading example code, trainer implementations, or configuration files\n"
|
| 265 |
-
"- After github_find_examples returns file paths you want to study\n"
|
| 266 |
-
"- When investigating specific code sections with line ranges\n"
|
| 267 |
-
"- When reading from specific branches, tags, or commits (use ref parameter)\n\n"
|
| 268 |
-
"## When NOT to use this tool\n\n"
|
| 269 |
-
"- When you don't know exact file path (use github_find_examples or github_search_code first)\n"
|
| 270 |
-
"- When searching for code patterns across repos (use github_search_code instead)\n\n"
|
| 271 |
-
"## Examples\n\n"
|
| 272 |
-
"<example>\n"
|
| 273 |
-
"// ML Workflow Step: Read GRPO trainer class after finding via github_find_examples\n"
|
| 274 |
-
"// Use case: Understand GRPOTrainer API, parameters, and methods\n"
|
| 275 |
-
"{\n"
|
| 276 |
-
" repo: 'huggingface/trl',\n"
|
| 277 |
-
" path: 'trl/trainer/grpo_trainer.py',\n"
|
| 278 |
-
" line_start: 1,\n"
|
| 279 |
-
" line_end: 200\n"
|
| 280 |
-
"}\n"
|
| 281 |
-
"// Read class definition and constructor to understand current API\n"
|
| 282 |
-
"// Shows: __init__ parameters, configuration, required arguments\n"
|
| 283 |
-
"</example>\n\n"
|
| 284 |
-
"<example>\n"
|
| 285 |
-
"// ML Workflow Step: Study complete training script from examples\n"
|
| 286 |
-
"// Use case: Learn end-to-end VLM fine-tuning workflow\n"
|
| 287 |
-
"{\n"
|
| 288 |
-
" repo: 'huggingface/trl',\n"
|
| 289 |
-
" path: 'examples/scripts/grpo_vlm.py'\n"
|
| 290 |
-
"}\n"
|
| 291 |
-
"// Returns first 300 lines - shows full training setup\n"
|
| 292 |
-
"// Use line_start/line_end if need to read more\n"
|
| 293 |
-
"</example>\n\n"
|
| 294 |
-
"<example>\n"
|
| 295 |
-
"// ML Workflow Step: Check TrainingArguments configuration patterns\n"
|
| 296 |
-
"// Use case: Learn how to structure training configs correctly\n"
|
| 297 |
-
"{\n"
|
| 298 |
-
" repo: 'huggingface/transformers',\n"
|
| 299 |
-
" path: 'examples/pytorch/language-modeling/run_clm.py',\n"
|
| 300 |
-
" line_start: 50,\n"
|
| 301 |
-
" line_end: 150\n"
|
| 302 |
-
"}\n"
|
| 303 |
-
"// Read argument parsing and config setup section\n"
|
| 304 |
-
"// Shows: current parameter names, default values, best practices\n"
|
| 305 |
-
"</example>"
|
| 306 |
),
|
| 307 |
"parameters": {
|
| 308 |
"type": "object",
|
|
|
|
| 250 |
GITHUB_READ_FILE_TOOL_SPEC = {
|
| 251 |
"name": "github_read_file",
|
| 252 |
"description": (
|
| 253 |
+
"Read file contents from GitHub repositories. Returns first 300 lines by default. "
|
| 254 |
+
"Auto-converts Jupyter notebooks to markdown.\n\n"
|
| 255 |
+
"Use AFTER github_find_examples to study the working implementation. "
|
| 256 |
+
"The purpose is to learn current API patterns β imports, trainer configs, dataset handling β "
|
| 257 |
+
"so your implementation uses correct, up-to-date code.\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
"Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n"
|
| 259 |
+
"When NOT to use: when you don't know the file path (use github_find_examples first)."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
),
|
| 261 |
"parameters": {
|
| 262 |
"type": "object",
|
agent/tools/jobs_tool.py
CHANGED
|
@@ -29,38 +29,33 @@ from agent.tools.utilities import (
|
|
| 29 |
)
|
| 30 |
|
| 31 |
# Hardware flavors
|
| 32 |
-
CPU_FLAVORS = ["cpu-basic", "cpu-upgrade"
|
| 33 |
GPU_FLAVORS = [
|
| 34 |
-
"sprx8",
|
| 35 |
-
"zero-a10g",
|
| 36 |
"t4-small",
|
| 37 |
"t4-medium",
|
| 38 |
-
"l4x1",
|
| 39 |
-
"l4x4",
|
| 40 |
-
"l40sx1",
|
| 41 |
-
"l40sx4",
|
| 42 |
-
"l40sx8",
|
| 43 |
"a10g-small",
|
| 44 |
"a10g-large",
|
| 45 |
"a10g-largex2",
|
| 46 |
"a10g-largex4",
|
| 47 |
"a100-large",
|
| 48 |
-
"
|
| 49 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
]
|
| 51 |
|
| 52 |
# Detailed specs for display (vCPU/RAM/GPU VRAM)
|
| 53 |
-
CPU_FLAVORS_DESC = (
|
| 54 |
-
"cpu-basic(2vCPU/16GB), cpu-upgrade(8vCPU/32GB), cpu-performance, cpu-xl"
|
| 55 |
-
)
|
| 56 |
GPU_FLAVORS_DESC = (
|
| 57 |
"t4-small(4vCPU/15GB/GPU 16GB), t4-medium(8vCPU/30GB/GPU 16GB), "
|
| 58 |
-
"
|
| 59 |
-
"l40sx1(8vCPU/62GB/GPU 48GB), l40sx4(48vCPU/382GB/GPU 192GB), l40sx8(192vCPU/1534GB/GPU 384GB), "
|
| 60 |
-
"a10g-small(4vCPU/14GB/GPU 24GB), a10g-large(12vCPU/46GB/GPU 24GB), "
|
| 61 |
"a10g-largex2(24vCPU/92GB/GPU 48GB), a10g-largex4(48vCPU/184GB/GPU 96GB), "
|
| 62 |
-
"a100-large(12vCPU/142GB/GPU 80GB),
|
| 63 |
-
"
|
|
|
|
| 64 |
)
|
| 65 |
SPECIALIZED_FLAVORS = ["inf2x6"]
|
| 66 |
ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
|
|
@@ -122,6 +117,21 @@ def _filter_uv_install_output(logs: list[str]) -> list[str]:
|
|
| 122 |
return logs
|
| 123 |
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
def _add_environment_variables(
|
| 126 |
params: Dict[str, Any] | None, user_token: str | None = None
|
| 127 |
) -> Dict[str, Any]:
|
|
@@ -509,7 +519,7 @@ class HfJobsTool:
|
|
| 509 |
self.api.run_job,
|
| 510 |
image=image,
|
| 511 |
command=command,
|
| 512 |
-
env=args.get("env"),
|
| 513 |
secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
|
| 514 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 515 |
timeout=args.get("timeout", "30m"),
|
|
@@ -741,7 +751,7 @@ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}
|
|
| 741 |
image=image,
|
| 742 |
command=command,
|
| 743 |
schedule=schedule,
|
| 744 |
-
env=args.get("env"),
|
| 745 |
secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
|
| 746 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 747 |
timeout=args.get("timeout", "30m"),
|
|
@@ -901,56 +911,31 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_
|
|
| 901 |
HF_JOBS_TOOL_SPEC = {
|
| 902 |
"name": "hf_jobs",
|
| 903 |
"description": (
|
| 904 |
-
"Execute Python scripts or Docker containers on HF cloud infrastructure
|
| 905 |
-
"
|
| 906 |
-
"
|
| 907 |
-
"
|
| 908 |
-
"
|
| 909 |
-
"
|
| 910 |
-
"
|
| 911 |
-
"
|
| 912 |
-
"
|
| 913 |
-
"
|
| 914 |
-
"
|
| 915 |
-
"
|
| 916 |
-
"
|
| 917 |
-
"
|
| 918 |
-
"
|
| 919 |
-
"
|
| 920 |
-
"
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
"
|
| 924 |
-
"
|
| 925 |
-
"
|
| 926 |
-
"
|
| 927 |
-
"
|
| 928 |
-
"
|
| 929 |
-
"β DON'T poll logs automatically\n"
|
| 930 |
-
"β DON'T wait for completion\n"
|
| 931 |
-
"β DON'T check status unless user asks\n\n"
|
| 932 |
-
"**For Training Tasks:**\n"
|
| 933 |
-
"β’ ALWAYS research TRL docs first: explore_hf_docs('trl') β fetch_hf_docs(<trainer_url>)\n"
|
| 934 |
-
"β’ ALWAYS validate dataset format with hub_repo_details (SFT needs messages/text, DPO needs chosen/rejected)\n"
|
| 935 |
-
"β’ ALWAYS include Trackio monitoring in script (explore_hf_docs('trackio'))\n"
|
| 936 |
-
"β’ ALWAYS enable push_to_hub=True in training config\n"
|
| 937 |
-
"β’ Set timeout 2-8h for training (NOT default 30m)\n"
|
| 938 |
-
"β’ Confirm model/dataset choices with user before submitting\n\n"
|
| 939 |
-
"**Examples:**\n\n"
|
| 940 |
-
"**Training - Fine-tune LLM:**\n"
|
| 941 |
-
"{'operation': 'run', 'script': '# Training script with TRL\\nfrom trl import SFTConfig, SFTTrainer\\nfrom transformers import AutoModelForCausalLM\\nmodel = AutoModelForCausalLM.from_pretrained(\"Qwen/Qwen3-4B\")\\n# ... researched implementation from docs ...\\ntrainer.train()\\ntrainer.push_to_hub(\"user-name/my-model\")', 'dependencies': ['transformers', 'trl', 'torch', 'datasets', 'trackio'], 'hardware_flavor': 'a10g-large', 'timeout': '4h'}\n\n"
|
| 942 |
-
"**Data Processing:**\n"
|
| 943 |
-
"{'operation': 'run', 'script': 'from datasets import load_dataset\\nds = load_dataset(\"data\")\\n# process...\\nds.push_to_hub(\"user/processed\")', 'dependencies': ['datasets', 'pandas'], 'hardware_flavor': 'cpu-upgrade', 'timeout': '2h'}\n\n"
|
| 944 |
-
"**Scheduled Daily Job:**\n"
|
| 945 |
-
"{'operation': 'scheduled run', 'schedule': '@daily', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'hardware_flavor': 'cpu-basic'}\n\n"
|
| 946 |
-
"**Docker Mode:**\n"
|
| 947 |
-
"{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
|
| 948 |
-
"**Monitor Operations:**\n"
|
| 949 |
-
"{'operation': 'ps'} - List all jobs\n"
|
| 950 |
-
"{'operation': 'logs', 'job_id': 'xxx'} - Stream logs (only when user requests)\n"
|
| 951 |
-
"{'operation': 'inspect', 'job_id': 'xxx'} - Get job details\n"
|
| 952 |
-
"{'operation': 'cancel', 'job_id': 'xxx'} - Stop job\n\n"
|
| 953 |
-
"β οΈ CRITICAL: Files created during execution are DELETED when job finishes. MUST push_to_hub() all outputs (models, datasets, artifacts) in script. For logs/scripts, use hf_private_repos after completion."
|
| 954 |
),
|
| 955 |
"parameters": {
|
| 956 |
"type": "object",
|
|
@@ -970,13 +955,8 @@ HF_JOBS_TOOL_SPEC = {
|
|
| 970 |
"scheduled suspend",
|
| 971 |
"scheduled resume",
|
| 972 |
],
|
| 973 |
-
"description":
|
| 974 |
-
"Operation to execute. Valid values: [run, ps, logs, inspect, cancel, "
|
| 975 |
-
"scheduled run, scheduled ps, scheduled inspect, scheduled delete, "
|
| 976 |
-
"scheduled suspend, scheduled resume]"
|
| 977 |
-
),
|
| 978 |
},
|
| 979 |
-
# Python/UV specific parameters
|
| 980 |
"script": {
|
| 981 |
"type": "string",
|
| 982 |
"description": (
|
|
@@ -988,44 +968,52 @@ HF_JOBS_TOOL_SPEC = {
|
|
| 988 |
"dependencies": {
|
| 989 |
"type": "array",
|
| 990 |
"items": {"type": "string"},
|
| 991 |
-
"description":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 992 |
},
|
| 993 |
-
# Docker specific parameters
|
| 994 |
"image": {
|
| 995 |
"type": "string",
|
| 996 |
-
"description": "Docker image.
|
| 997 |
},
|
| 998 |
"command": {
|
| 999 |
"type": "array",
|
| 1000 |
"items": {"type": "string"},
|
| 1001 |
-
"description": "Command to execute as list.
|
| 1002 |
},
|
| 1003 |
-
# Hardware and environment
|
| 1004 |
"hardware_flavor": {
|
| 1005 |
"type": "string",
|
| 1006 |
-
"description":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1007 |
},
|
| 1008 |
"timeout": {
|
| 1009 |
"type": "string",
|
| 1010 |
-
"description":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1011 |
},
|
| 1012 |
"env": {
|
| 1013 |
"type": "object",
|
| 1014 |
-
"description": "Environment variables
|
| 1015 |
},
|
| 1016 |
-
# Job management parameters
|
| 1017 |
"job_id": {
|
| 1018 |
"type": "string",
|
| 1019 |
-
"description": "Job ID
|
| 1020 |
},
|
| 1021 |
-
# Scheduled job parameters
|
| 1022 |
"scheduled_job_id": {
|
| 1023 |
"type": "string",
|
| 1024 |
-
"description": "Scheduled job ID. Required for:
|
| 1025 |
},
|
| 1026 |
"schedule": {
|
| 1027 |
"type": "string",
|
| 1028 |
-
"description": "
|
| 1029 |
},
|
| 1030 |
},
|
| 1031 |
"required": ["operation"],
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
# Hardware flavors
|
| 32 |
+
CPU_FLAVORS = ["cpu-basic", "cpu-upgrade"]
|
| 33 |
GPU_FLAVORS = [
|
|
|
|
|
|
|
| 34 |
"t4-small",
|
| 35 |
"t4-medium",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
"a10g-small",
|
| 37 |
"a10g-large",
|
| 38 |
"a10g-largex2",
|
| 39 |
"a10g-largex4",
|
| 40 |
"a100-large",
|
| 41 |
+
"a100x4",
|
| 42 |
+
"a100x8",
|
| 43 |
+
"l4x1",
|
| 44 |
+
"l4x4",
|
| 45 |
+
"l40sx1",
|
| 46 |
+
"l40sx4",
|
| 47 |
+
"l40sx8",
|
| 48 |
]
|
| 49 |
|
| 50 |
# Detailed specs for display (vCPU/RAM/GPU VRAM)
|
| 51 |
+
CPU_FLAVORS_DESC = "cpu-basic(2vCPU/16GB), cpu-upgrade(8vCPU/32GB)"
|
|
|
|
|
|
|
| 52 |
GPU_FLAVORS_DESC = (
|
| 53 |
"t4-small(4vCPU/15GB/GPU 16GB), t4-medium(8vCPU/30GB/GPU 16GB), "
|
| 54 |
+
"a10g-small(4vCPU/15GB/GPU 24GB), a10g-large(12vCPU/46GB/GPU 24GB), "
|
|
|
|
|
|
|
| 55 |
"a10g-largex2(24vCPU/92GB/GPU 48GB), a10g-largex4(48vCPU/184GB/GPU 96GB), "
|
| 56 |
+
"a100-large(12vCPU/142GB/GPU 80GB), a100x4(48vCPU/568GB/GPU 320GB), a100x8(96vCPU/1136GB/GPU 640GB), "
|
| 57 |
+
"l4x1(8vCPU/30GB/GPU 24GB), l4x4(48vCPU/186GB/GPU 96GB), "
|
| 58 |
+
"l40sx1(8vCPU/62GB/GPU 48GB), l40sx4(48vCPU/382GB/GPU 192GB), l40sx8(192vCPU/1534GB/GPU 384GB)"
|
| 59 |
)
|
| 60 |
SPECIALIZED_FLAVORS = ["inf2x6"]
|
| 61 |
ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
|
|
|
|
| 117 |
return logs
|
| 118 |
|
| 119 |
|
| 120 |
+
_DEFAULT_ENV = {
|
| 121 |
+
"HF_HUB_DISABLE_PROGRESS_BARS": "1",
|
| 122 |
+
"TQDM_DISABLE": "1",
|
| 123 |
+
"TRANSFORMERS_VERBOSITY": "warning",
|
| 124 |
+
"HF_HUB_ENABLE_HF_TRANSFER": "1",
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _add_default_env(params: Dict[str, Any] | None) -> Dict[str, Any]:
|
| 129 |
+
"""Inject default env vars for clean, agent-friendly output."""
|
| 130 |
+
result = dict(_DEFAULT_ENV)
|
| 131 |
+
result.update(params or {}) # user-provided values override defaults
|
| 132 |
+
return result
|
| 133 |
+
|
| 134 |
+
|
| 135 |
def _add_environment_variables(
|
| 136 |
params: Dict[str, Any] | None, user_token: str | None = None
|
| 137 |
) -> Dict[str, Any]:
|
|
|
|
| 519 |
self.api.run_job,
|
| 520 |
image=image,
|
| 521 |
command=command,
|
| 522 |
+
env=_add_default_env(args.get("env")),
|
| 523 |
secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
|
| 524 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 525 |
timeout=args.get("timeout", "30m"),
|
|
|
|
| 751 |
image=image,
|
| 752 |
command=command,
|
| 753 |
schedule=schedule,
|
| 754 |
+
env=_add_default_env(args.get("env")),
|
| 755 |
secrets=_add_environment_variables(args.get("secrets"), self.hf_token),
|
| 756 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 757 |
timeout=args.get("timeout", "30m"),
|
|
|
|
| 911 |
HF_JOBS_TOOL_SPEC = {
|
| 912 |
"name": "hf_jobs",
|
| 913 |
"description": (
|
| 914 |
+
"Execute Python scripts or Docker containers on HF cloud infrastructure.\n\n"
|
| 915 |
+
"Two modes (mutually exclusive): Python mode (script + dependencies) or Docker mode (command + image). "
|
| 916 |
+
"Provide exactly ONE of 'script' or 'command'.\n\n"
|
| 917 |
+
"BEFORE submitting training/fine-tuning jobs:\n"
|
| 918 |
+
"- You MUST have called github_find_examples + github_read_file to find a working reference implementation. "
|
| 919 |
+
"Scripts based on your internal knowledge WILL use outdated APIs and fail.\n"
|
| 920 |
+
"- You MUST have validated dataset format via hf_inspect_dataset or hub_repo_details.\n"
|
| 921 |
+
"- Training config MUST include push_to_hub=True and hub_model_id. "
|
| 922 |
+
"Job storage is EPHEMERAL β all files are deleted when the job ends. Without push_to_hub, trained models are lost permanently.\n"
|
| 923 |
+
"- Include trackio monitoring and provide the dashboard URL to the user.\n\n"
|
| 924 |
+
"BATCH/ABLATION JOBS: Submit ONE job first. Check logs to confirm it starts training successfully. "
|
| 925 |
+
"Only then submit the remaining jobs. Never submit all at once β if there's a bug, all jobs fail.\n\n"
|
| 926 |
+
"Operations: run, ps, logs, inspect, cancel, scheduled run/ps/inspect/delete/suspend/resume.\n\n"
|
| 927 |
+
f"Hardware: CPU: {CPU_FLAVORS_DESC}. GPU: {GPU_FLAVORS_DESC}.\n"
|
| 928 |
+
"Common picks: t4-small ($0.60/hr, 1-3B), a10g-large ($2/hr, 7-13B), a100-large ($4/hr, 30B+), h100 ($6/hr, 70B+). "
|
| 929 |
+
"Note: a10g-small and a10g-large have the SAME 24GB GPU β the difference is CPU/RAM only.\n\n"
|
| 930 |
+
"OOM RECOVERY: When a training job fails with CUDA OOM:\n"
|
| 931 |
+
"1. Reduce per_device_train_batch_size and increase gradient_accumulation_steps proportionally (keep effective batch size identical)\n"
|
| 932 |
+
"2. Enable gradient_checkpointing=True\n"
|
| 933 |
+
"3. Upgrade to larger GPU (a10gβa100βh100)\n"
|
| 934 |
+
"Do NOT switch training methods (e.g. full SFT to LoRA) or reduce max_length β those change what the user gets and require explicit approval.\n\n"
|
| 935 |
+
"Examples:\n"
|
| 936 |
+
"Training: {'operation': 'run', 'script': '/app/train.py', 'dependencies': ['transformers', 'trl', 'torch', 'datasets', 'trackio'], 'hardware_flavor': 'a100-large', 'timeout': '8h'}\n"
|
| 937 |
+
"Monitor: {'operation': 'ps'}, {'operation': 'logs', 'job_id': 'xxx'}, {'operation': 'cancel', 'job_id': 'xxx'}"
|
| 938 |
+
"Docker: {'operation': 'run', 'command': ['duckdb', '-c', 'select 1 + 2'], 'image': 'duckdb/duckdb', 'hardware_flavor': 'cpu-basic', 'timeout': '1h'}\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 939 |
),
|
| 940 |
"parameters": {
|
| 941 |
"type": "object",
|
|
|
|
| 955 |
"scheduled suspend",
|
| 956 |
"scheduled resume",
|
| 957 |
],
|
| 958 |
+
"description": "Operation to execute.",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 959 |
},
|
|
|
|
| 960 |
"script": {
|
| 961 |
"type": "string",
|
| 962 |
"description": (
|
|
|
|
| 968 |
"dependencies": {
|
| 969 |
"type": "array",
|
| 970 |
"items": {"type": "string"},
|
| 971 |
+
"description": (
|
| 972 |
+
"Pip packages to install. Include ALL required packages. "
|
| 973 |
+
"Common training set: ['transformers', 'trl', 'torch', 'datasets', 'trackio', 'accelerate']. "
|
| 974 |
+
"Only used with 'script'."
|
| 975 |
+
),
|
| 976 |
},
|
|
|
|
| 977 |
"image": {
|
| 978 |
"type": "string",
|
| 979 |
+
"description": "Docker image. Optional β auto-selected if not provided. Use with 'command'.",
|
| 980 |
},
|
| 981 |
"command": {
|
| 982 |
"type": "array",
|
| 983 |
"items": {"type": "string"},
|
| 984 |
+
"description": "Command to execute as list. Triggers Docker mode. Mutually exclusive with 'script'.",
|
| 985 |
},
|
|
|
|
| 986 |
"hardware_flavor": {
|
| 987 |
"type": "string",
|
| 988 |
+
"description": (
|
| 989 |
+
"Hardware type. Sizing guide: 1-3B params β t4-small/a10g-small, "
|
| 990 |
+
"7-13B β a10g-large, 30B+ β a100-large, 70B+ β h100/h100x8. "
|
| 991 |
+
f"All options: CPU: {CPU_FLAVORS}. GPU: {GPU_FLAVORS}."
|
| 992 |
+
),
|
| 993 |
},
|
| 994 |
"timeout": {
|
| 995 |
"type": "string",
|
| 996 |
+
"description": (
|
| 997 |
+
"Maximum job runtime. MUST be >2h for any training job β default 30m kills training mid-run. "
|
| 998 |
+
"Guidelines: 1-3B models: 3-4h, 7-13B: 6-8h, 30B+: 12-24h. "
|
| 999 |
+
"Use 30m-1h only for quick data processing or inference tasks. Default: '30m'."
|
| 1000 |
+
),
|
| 1001 |
},
|
| 1002 |
"env": {
|
| 1003 |
"type": "object",
|
| 1004 |
+
"description": "Environment variables {'KEY': 'VALUE'}. HF_TOKEN is auto-included.",
|
| 1005 |
},
|
|
|
|
| 1006 |
"job_id": {
|
| 1007 |
"type": "string",
|
| 1008 |
+
"description": "Job ID. Required for: logs, inspect, cancel.",
|
| 1009 |
},
|
|
|
|
| 1010 |
"scheduled_job_id": {
|
| 1011 |
"type": "string",
|
| 1012 |
+
"description": "Scheduled job ID. Required for: scheduled inspect/delete/suspend/resume.",
|
| 1013 |
},
|
| 1014 |
"schedule": {
|
| 1015 |
"type": "string",
|
| 1016 |
+
"description": "Cron schedule or preset (@hourly, @daily, @weekly, @monthly). Required for: scheduled run.",
|
| 1017 |
},
|
| 1018 |
},
|
| 1019 |
"required": ["operation"],
|
agent/tools/plan_tool.py
CHANGED
|
@@ -85,18 +85,11 @@ def get_current_plan() -> List[Dict[str, str]]:
|
|
| 85 |
PLAN_TOOL_SPEC = {
|
| 86 |
"name": "plan_tool",
|
| 87 |
"description": (
|
| 88 |
-
"
|
| 89 |
-
"
|
| 90 |
-
"
|
| 91 |
-
"
|
| 92 |
-
"
|
| 93 |
-
"**Pattern:** Create plan at start β Mark in_progress when starting task β Mark completed immediately after finishing β User sees clear progress. "
|
| 94 |
-
"Each call replaces entire plan (full list required). "
|
| 95 |
-
"**Critical for reliability:** Exactly ONE task in_progress at a time (not zero, not multiple). "
|
| 96 |
-
"Mark tasks completed IMMEDIATELY after finishing - don't batch completions. "
|
| 97 |
-
"**For long-running tasks:** Update plan after each major step to keep user informed. "
|
| 98 |
-
"**Only mark completed when:** Task fully accomplished, no errors, all requirements met. "
|
| 99 |
-
"Keep tasks pending if blocked/errors occur - create new task to resolve blockers."
|
| 100 |
),
|
| 101 |
"parameters": {
|
| 102 |
"type": "object",
|
|
|
|
| 85 |
PLAN_TOOL_SPEC = {
|
| 86 |
"name": "plan_tool",
|
| 87 |
"description": (
|
| 88 |
+
"Track progress on multi-step tasks with a todo list (pending/in_progress/completed).\n\n"
|
| 89 |
+
"Use for tasks with 3+ steps. Each call replaces the entire plan (send full list).\n\n"
|
| 90 |
+
"Rules: exactly ONE task in_progress at a time. Mark completed immediately after finishing. "
|
| 91 |
+
"Only mark completed when the task fully succeeded β keep in_progress if there are errors. "
|
| 92 |
+
"Update frequently so the user sees progress."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
),
|
| 94 |
"parameters": {
|
| 95 |
"type": "object",
|