Aksel Joonas Reedi commited on
Commit
907c9fd
·
2 Parent(s): eaf2575374fa24

updated jobs tool, system prompts and search capability

Browse files

Streamline agent architecture: integrate documentation search and improve job handling

agent/core/agent_loop.py CHANGED
@@ -103,32 +103,23 @@ class Handlers:
103
  Event(event_type="assistant_message", data={"content": content})
104
  )
105
 
106
- # Execute tools
 
 
 
107
  for tc in tool_calls:
108
  tool_name = tc.function.name
109
  tool_args = json.loads(tc.function.arguments)
110
 
111
- # Check if this tool requires user approval
112
  if _needs_approval(tool_name, tool_args):
113
- await session.send_event(
114
- Event(
115
- event_type="approval_required",
116
- data={
117
- "tool": tool_name,
118
- "arguments": tool_args,
119
- "tool_call_id": tc.id,
120
- },
121
- )
122
- )
123
-
124
- # Store pending approval and return early
125
- session.pending_approval = {
126
- "tool_call": tc,
127
- "arguments": tool_args,
128
- }
129
 
130
- # Return early - wait for EXEC_APPROVAL operation
131
- return None
 
 
132
 
133
  await session.send_event(
134
  Event(
@@ -161,6 +152,37 @@ class Handlers:
161
  )
162
  )
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  iteration += 1
165
 
166
  except Exception as e:
@@ -225,10 +247,8 @@ class Handlers:
225
  await session.send_event(Event(event_type="undo_complete"))
226
 
227
  @staticmethod
228
- async def exec_approval(
229
- session: Session, approved: bool, feedback: str | None = None
230
- ) -> None:
231
- """Handle job execution approval"""
232
  if not session.pending_approval:
233
  await session.send_event(
234
  Event(
@@ -238,12 +258,36 @@ class Handlers:
238
  )
239
  return
240
 
241
- tc = session.pending_approval["tool_call"]
242
- tool_args = session.pending_approval["arguments"]
243
- tool_name = tc.function.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- if approved:
246
- # Execute the pending tool
 
 
 
 
 
 
247
  await session.send_event(
248
  Event(
249
  event_type="tool_call",
@@ -251,36 +295,58 @@ class Handlers:
251
  )
252
  )
253
 
254
- output, success = await session.tool_router.call_tool(tool_name, tool_args)
 
 
255
 
256
- # Add tool result to context
257
- tool_msg = Message(
258
- role="tool",
259
- content=output,
260
- tool_call_id=tc.id,
261
- name=tool_name,
 
262
  )
263
- session.context_manager.add_message(tool_msg)
264
 
265
- await session.send_event(
266
- Event(
267
- event_type="tool_output",
268
- data={
269
- "tool": tool_name,
270
- "output": output,
271
- "success": success,
272
- },
 
 
 
 
 
 
 
273
  )
274
- )
275
- else:
276
- # User rejected - add cancellation message to context
277
- cancellation_msg = "Job execution cancelled by user"
278
- if feedback:
279
- cancellation_msg += f". User feedback: {feedback}"
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  tool_msg = Message(
282
  role="tool",
283
- content=cancellation_msg,
284
  tool_call_id=tc.id,
285
  name=tool_name,
286
  )
@@ -291,7 +357,7 @@ class Handlers:
291
  event_type="tool_output",
292
  data={
293
  "tool": tool_name,
294
- "output": cancellation_msg,
295
  "success": False,
296
  },
297
  )
@@ -300,7 +366,7 @@ class Handlers:
300
  # Clear pending approval
301
  session.pending_approval = None
302
 
303
- # Continue agent loop with empty input to process the tool result
304
  await Handlers.run_agent(session, "")
305
 
306
  @staticmethod
@@ -339,9 +405,8 @@ async def process_submission(session: Session, submission) -> bool:
339
  return True
340
 
341
  if op.op_type == OpType.EXEC_APPROVAL:
342
- approved = op.data.get("approved", False) if op.data else False
343
- feedback = op.data.get("feedback") if op.data else None
344
- await Handlers.exec_approval(session, approved, feedback)
345
  return True
346
 
347
  if op.op_type == OpType.SHUTDOWN:
 
103
  Event(event_type="assistant_message", data={"content": content})
104
  )
105
 
106
+ # Separate tools into those requiring approval and those that don't
107
+ approval_required_tools = []
108
+ non_approval_tools = []
109
+
110
  for tc in tool_calls:
111
  tool_name = tc.function.name
112
  tool_args = json.loads(tc.function.arguments)
113
 
 
114
  if _needs_approval(tool_name, tool_args):
115
+ approval_required_tools.append(tc)
116
+ else:
117
+ non_approval_tools.append(tc)
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Execute non-approval tools first
120
+ for tc in non_approval_tools:
121
+ tool_name = tc.function.name
122
+ tool_args = json.loads(tc.function.arguments)
123
 
124
  await session.send_event(
125
  Event(
 
152
  )
153
  )
154
 
155
+ # If there are tools requiring approval, ask for batch approval
156
+ if approval_required_tools:
157
+ # Prepare batch approval data
158
+ tools_data = []
159
+ for tc in approval_required_tools:
160
+ tool_name = tc.function.name
161
+ tool_args = json.loads(tc.function.arguments)
162
+ tools_data.append({
163
+ "tool": tool_name,
164
+ "arguments": tool_args,
165
+ "tool_call_id": tc.id,
166
+ })
167
+
168
+ await session.send_event(
169
+ Event(
170
+ event_type="approval_required",
171
+ data={
172
+ "tools": tools_data, # Batch of tools
173
+ "count": len(tools_data),
174
+ },
175
+ )
176
+ )
177
+
178
+ # Store all approval-requiring tools
179
+ session.pending_approval = {
180
+ "tool_calls": approval_required_tools,
181
+ }
182
+
183
+ # Return early - wait for EXEC_APPROVAL operation
184
+ return None
185
+
186
  iteration += 1
187
 
188
  except Exception as e:
 
247
  await session.send_event(Event(event_type="undo_complete"))
248
 
249
  @staticmethod
250
+ async def exec_approval(session: Session, approvals: list[dict]) -> None:
251
+ """Handle batch job execution approval"""
 
 
252
  if not session.pending_approval:
253
  await session.send_event(
254
  Event(
 
258
  )
259
  return
260
 
261
+ tool_calls = session.pending_approval.get("tool_calls", [])
262
+ if not tool_calls:
263
+ await session.send_event(
264
+ Event(
265
+ event_type="error",
266
+ data={"error": "No pending tool calls found"},
267
+ )
268
+ )
269
+ return
270
+
271
+ # Create a map of tool_call_id -> approval decision
272
+ approval_map = {a["tool_call_id"]: a for a in approvals}
273
+
274
+ # Separate approved and rejected tool calls
275
+ approved_tasks = []
276
+ rejected_tasks = []
277
+
278
+ for tc in tool_calls:
279
+ tool_name = tc.function.name
280
+ tool_args = json.loads(tc.function.arguments)
281
+ approval_decision = approval_map.get(tc.id, {"approved": False})
282
 
283
+ if approval_decision.get("approved", False):
284
+ approved_tasks.append((tc, tool_name, tool_args))
285
+ else:
286
+ rejected_tasks.append((tc, tool_name, approval_decision))
287
+
288
+ # Execute all approved tools concurrently
289
+ async def execute_tool(tc, tool_name, tool_args):
290
+ """Execute a single tool and return its result"""
291
  await session.send_event(
292
  Event(
293
  event_type="tool_call",
 
295
  )
296
  )
297
 
298
+ output, success = await session.tool_router.call_tool(
299
+ tool_name, tool_args
300
+ )
301
 
302
+ return (tc, tool_name, output, success)
303
+
304
+ # Execute all approved tools concurrently and wait for ALL to complete
305
+ if approved_tasks:
306
+ results = await asyncio.gather(
307
+ *[execute_tool(tc, tool_name, tool_args) for tc, tool_name, tool_args in approved_tasks],
308
+ return_exceptions=True
309
  )
 
310
 
311
+ # Process results and add to context
312
+ for result in results:
313
+ if isinstance(result, Exception):
314
+ # Handle execution error
315
+ print(f"Tool execution error: {result}")
316
+ continue
317
+
318
+ tc, tool_name, output, success = result
319
+
320
+ # Add tool result to context
321
+ tool_msg = Message(
322
+ role="tool",
323
+ content=output,
324
+ tool_call_id=tc.id,
325
+ name=tool_name,
326
  )
327
+ session.context_manager.add_message(tool_msg)
328
+
329
+ await session.send_event(
330
+ Event(
331
+ event_type="tool_output",
332
+ data={
333
+ "tool": tool_name,
334
+ "output": output,
335
+ "success": success,
336
+ },
337
+ )
338
+ )
339
+
340
+ # Process rejected tools
341
+ for tc, tool_name, approval_decision in rejected_tasks:
342
+ rejection_msg = "Job execution cancelled by user"
343
+ user_feedback = approval_decision.get("feedback")
344
+ if user_feedback:
345
+ rejection_msg += f". User feedback: {user_feedback}"
346
 
347
  tool_msg = Message(
348
  role="tool",
349
+ content=rejection_msg,
350
  tool_call_id=tc.id,
351
  name=tool_name,
352
  )
 
357
  event_type="tool_output",
358
  data={
359
  "tool": tool_name,
360
+ "output": rejection_msg,
361
  "success": False,
362
  },
363
  )
 
366
  # Clear pending approval
367
  session.pending_approval = None
368
 
369
+ # Continue agent loop with empty input to process the tool results
370
  await Handlers.run_agent(session, "")
371
 
372
  @staticmethod
 
405
  return True
406
 
407
  if op.op_type == OpType.EXEC_APPROVAL:
408
+ approvals = op.data.get("approvals", []) if op.data else []
409
+ await Handlers.exec_approval(session, approvals)
 
410
  return True
411
 
412
  if op.op_type == OpType.SHUTDOWN:
agent/core/tools.py CHANGED
@@ -13,9 +13,14 @@ from lmnr import observe
13
  from mcp.types import EmbeddedResource, ImageContent, TextContent
14
 
15
  from agent.config import MCPServerConfig
 
 
 
 
 
 
16
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
17
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
18
- from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
19
 
20
  # Suppress aiohttp deprecation warning
21
  warnings.filterwarnings(
@@ -122,6 +127,27 @@ class ToolRouter:
122
  )
123
  )
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
126
  """Get tool specifications in OpenAI format"""
127
  specs = []
@@ -145,6 +171,10 @@ class ToolRouter:
145
  await self.register_mcp_tools()
146
  self._mcp_initialized = True
147
  print(f"MCP initialized: {self._mcp_initialized}")
 
 
 
 
148
  return self
149
 
150
  async def __aexit__(self, exc_type, exc, tb) -> None:
@@ -189,25 +219,34 @@ class ToolRouter:
189
  def create_builtin_tools() -> list[ToolSpec]:
190
  """Create built-in tool specifications"""
191
  print(
192
- f"Creating built-in tools: {HF_JOBS_TOOL_SPEC['name']}, {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}"
193
  )
 
194
  return [
 
195
  ToolSpec(
196
- name=HF_JOBS_TOOL_SPEC["name"],
197
- description=HF_JOBS_TOOL_SPEC["description"],
198
- parameters=HF_JOBS_TOOL_SPEC["parameters"],
199
- handler=hf_jobs_handler,
200
  ),
201
  ToolSpec(
202
- name=SEARCH_DOCS_TOOL_SPEC["name"],
203
- description=SEARCH_DOCS_TOOL_SPEC["description"],
204
- parameters=SEARCH_DOCS_TOOL_SPEC["parameters"],
205
- handler=search_docs_handler,
206
  ),
 
207
  ToolSpec(
208
  name=PLAN_TOOL_SPEC["name"],
209
  description=PLAN_TOOL_SPEC["description"],
210
  parameters=PLAN_TOOL_SPEC["parameters"],
211
  handler=plan_tool_handler,
212
  ),
 
 
 
 
 
 
213
  ]
 
13
  from mcp.types import EmbeddedResource, ImageContent, TextContent
14
 
15
  from agent.config import MCPServerConfig
16
+ from agent.tools.docs_tools import (
17
+ EXPLORE_HF_DOCS_TOOL_SPEC,
18
+ HF_DOCS_FETCH_TOOL_SPEC,
19
+ explore_hf_docs_handler,
20
+ hf_docs_fetch_handler,
21
+ )
22
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
23
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 
24
 
25
  # Suppress aiohttp deprecation warning
26
  warnings.filterwarnings(
 
127
  )
128
  )
129
 
130
+ async def register_openapi_tool(self) -> None:
131
+ """Register the OpenAPI search tool (requires async initialization)"""
132
+ from agent.tools.docs_tools import (
133
+ _get_api_search_tool_spec,
134
+ search_openapi_handler,
135
+ )
136
+
137
+ print("Registering OpenAPI search tool...")
138
+
139
+ # Register search_hf_api_endpoints with dynamic spec
140
+ openapi_spec = await _get_api_search_tool_spec()
141
+ self.register_tool(
142
+ ToolSpec(
143
+ name=openapi_spec["name"],
144
+ description=openapi_spec["description"],
145
+ parameters=openapi_spec["parameters"],
146
+ handler=search_openapi_handler,
147
+ )
148
+ )
149
+ print(f"Registered: {openapi_spec['name']}")
150
+
151
  def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
152
  """Get tool specifications in OpenAI format"""
153
  specs = []
 
171
  await self.register_mcp_tools()
172
  self._mcp_initialized = True
173
  print(f"MCP initialized: {self._mcp_initialized}")
174
+
175
+ # Register OpenAPI tool (requires async initialization)
176
+ await self.register_openapi_tool()
177
+
178
  return self
179
 
180
  async def __aexit__(self, exc_type, exc, tb) -> None:
 
219
  def create_builtin_tools() -> list[ToolSpec]:
220
  """Create built-in tool specifications"""
221
  print(
222
+ f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}"
223
  )
224
+ # in order of importance
225
  return [
226
+ # Documentation search tools
227
  ToolSpec(
228
+ name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
229
+ description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
230
+ parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
231
+ handler=explore_hf_docs_handler,
232
  ),
233
  ToolSpec(
234
+ name=HF_DOCS_FETCH_TOOL_SPEC["name"],
235
+ description=HF_DOCS_FETCH_TOOL_SPEC["description"],
236
+ parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
237
+ handler=hf_docs_fetch_handler,
238
  ),
239
+ # Planning and job management tools
240
  ToolSpec(
241
  name=PLAN_TOOL_SPEC["name"],
242
  description=PLAN_TOOL_SPEC["description"],
243
  parameters=PLAN_TOOL_SPEC["parameters"],
244
  handler=plan_tool_handler,
245
  ),
246
+ ToolSpec(
247
+ name=HF_JOBS_TOOL_SPEC["name"],
248
+ description=HF_JOBS_TOOL_SPEC["description"],
249
+ parameters=HF_JOBS_TOOL_SPEC["parameters"],
250
+ handler=hf_jobs_handler,
251
+ ),
252
  ]
agent/main.py CHANGED
@@ -116,61 +116,101 @@ async def event_listener(
116
  new_tokens = event.data.get("new_tokens", 0) if event.data else 0
117
  print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
118
  elif event.event_type == "approval_required":
119
- # Display job details and prompt for approval
120
- tool_name = event.data.get("tool", "") if event.data else ""
121
- arguments = event.data.get("arguments", {}) if event.data else {}
122
 
123
- operation = arguments.get("operation", "")
124
- args = arguments.get("args", {})
125
-
126
- print(f"\nOperation: {operation}")
127
-
128
- if operation == "uv":
129
- script = args.get("script", "")
130
- dependencies = args.get("dependencies", [])
131
- print(f"Script to run:\n{script}")
132
- if dependencies:
133
- print(f"Dependencies: {', '.join(dependencies)}")
134
- elif operation == "run":
135
- image = args.get("image", "")
136
- command = args.get("command", "")
137
- print(f"Docker image: {image}")
138
- print(f"Command: {command}")
139
-
140
- # Common parameters
141
- flavor = args.get("flavor", "cpu-basic")
142
- detached = args.get("detached", False)
143
- print(f"Hardware: {flavor}")
144
- print(f"Detached mode: {detached}")
145
-
146
- secrets = args.get("secrets", [])
147
- if secrets:
148
- print(f"Secrets: {', '.join(secrets)}")
149
-
150
- # Get user decision
151
  print("\n" + format_separator())
152
- print(format_header("JOB EXECUTION APPROVAL REQUIRED"))
153
- print(format_separator())
154
- loop = asyncio.get_event_loop()
155
- response = await loop.run_in_executor(
156
- None,
157
- input,
158
- "Approve? (y=yes, n=no, or provide feedback to reject): ",
159
  )
 
160
 
161
- response = response.strip()
162
- approved = response.lower() in ["y", "yes"]
163
- feedback = (
164
- None if approved or response.lower() in ["n", "no"] else response
165
- )
166
 
167
- # Submit approval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  submission_id[0] += 1
169
  approval_submission = Submission(
170
  id=f"approval_{submission_id[0]}",
171
  operation=Operation(
172
  op_type=OpType.EXEC_APPROVAL,
173
- data={"approved": approved, "feedback": feedback},
174
  ),
175
  )
176
  await submission_queue.put(approval_submission)
 
116
  new_tokens = event.data.get("new_tokens", 0) if event.data else 0
117
  print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
118
  elif event.event_type == "approval_required":
119
+ # Handle batch approval format
120
+ tools_data = event.data.get("tools", []) if event.data else []
121
+ count = event.data.get("count", 0) if event.data else 0
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  print("\n" + format_separator())
124
+ print(
125
+ format_header(
126
+ f"JOB EXECUTION APPROVAL REQUIRED ({count} job{'s' if count != 1 else ''})"
127
+ )
 
 
 
128
  )
129
+ print(format_separator())
130
 
131
+ approvals = []
132
+ loop = asyncio.get_event_loop()
 
 
 
133
 
134
+ # Ask for approval for each tool
135
+ for i, tool_info in enumerate(tools_data, 1):
136
+ tool_name = tool_info.get("tool", "")
137
+ arguments = tool_info.get("arguments", {})
138
+ tool_call_id = tool_info.get("tool_call_id", "")
139
+
140
+ # Handle case where arguments might be a JSON string
141
+ if isinstance(arguments, str):
142
+ try:
143
+ arguments = json.loads(arguments)
144
+ except json.JSONDecodeError:
145
+ print(f"Warning: Failed to parse arguments for {tool_name}")
146
+ arguments = {}
147
+
148
+ operation = arguments.get("operation", "")
149
+ args = arguments.get("args", {})
150
+
151
+ # Handle case where args might be a JSON string
152
+ if isinstance(args, str):
153
+ try:
154
+ args = json.loads(args)
155
+ except json.JSONDecodeError:
156
+ print(f"Warning: Failed to parse args for {tool_name}")
157
+ args = {}
158
+
159
+ print(f"\n[Job {i}/{count}]")
160
+ print(f"Operation: {operation}")
161
+
162
+ if operation == "uv":
163
+ script = args.get("script", "")
164
+ dependencies = args.get("dependencies", [])
165
+ print("Script:\n" + script)
166
+ if dependencies:
167
+ print(f"Dependencies: {', '.join(dependencies)}")
168
+ elif operation == "run":
169
+ image = args.get("image", "")
170
+ command = args.get("command", "")
171
+ print(f"Docker image: {image}")
172
+ print(f"Command: {command}")
173
+
174
+ # Common parameters
175
+ flavor = args.get("flavor", "cpu-basic")
176
+ detached = args.get("detached", False)
177
+ print(f"Hardware: {flavor}")
178
+ print(f"Detached mode: {detached}")
179
+
180
+ secrets = args.get("secrets", [])
181
+ if secrets:
182
+ print(f"Secrets: {', '.join(secrets)}")
183
+
184
+ # Get user decision for this job
185
+ response = await loop.run_in_executor(
186
+ None,
187
+ input,
188
+ f"Approve job {i}? (y=yes, n=no, or provide feedback to reject): ",
189
+ )
190
+
191
+ response = response.strip()
192
+ approved = response.lower() in ["y", "yes"]
193
+ feedback = (
194
+ None
195
+ if approved or response.lower() in ["n", "no"]
196
+ else response
197
+ )
198
+
199
+ approvals.append(
200
+ {
201
+ "tool_call_id": tool_call_id,
202
+ "approved": approved,
203
+ "feedback": feedback,
204
+ }
205
+ )
206
+
207
+ # Submit batch approval
208
  submission_id[0] += 1
209
  approval_submission = Submission(
210
  id=f"approval_{submission_id[0]}",
211
  operation=Operation(
212
  op_type=OpType.EXEC_APPROVAL,
213
+ data={"approvals": approvals},
214
  ),
215
  )
216
  await submission_queue.put(approval_submission)
agent/prompts/search_docs_system_prompt.yaml DELETED
@@ -1,38 +0,0 @@
1
- search_docs_system_prompt: |
2
- You are a specialized documentation search agent. Your task is to comprehensively search and synthesize information from Hugging Face documentation.
3
-
4
- # Search Strategy
5
-
6
- You must search thoroughly before synthesizing results. Follow this approach:
7
-
8
- 1. **Query Analysis**: Identify the core concepts and intent of the query
9
- 2. **Initial Search**: Start with a broad search capturing the main topic
10
- 3. **Iterative Refinement**: Run multiple searches to go deeper into topics. You will see parsed HTML pages, also look into links on the html pages for best information - first-pass results often miss key details
11
- 4. **You must get to the end truth**: You must get to the bottom of the truth for this search query. You CAN NOT say that somebody should look up documentation. You must look it up yourself and give the best answer you can.
12
-
13
- ## Query Formulation Best Practices
14
-
15
- - Add relevant synonyms and related technical terms
16
- - Remove filler words, focus on searchable concepts
17
- - Break complex questions into focused sub-queries
18
- - Include domain-specific terminology when applicable
19
- - Try both specific terms and general related terms
20
-
21
- # Response Guidelines
22
-
23
- After gathering results, synthesize them following these principles:
24
-
25
- 1. **Analyze Relevance**: Evaluate which results directly answer the query
26
- 2. **Synthesize**: Combine information from multiple sources when applicable
27
- 3. **Prioritize**: Present information in order of relevance
28
- 4. **Cite Sources**: Reference which documents you're drawing from especially include relevant code samples and links to the code samples.
29
- 5. **Acknowledge Gaps**: If documents don't fully answer the query, explicitly state this
30
- 6. **Handle Conflicts**: If sources contradict, note this and explain your reasoning
31
- 7. **Be Concise**: Provide a clear, direct answer without unnecessary elaboration
32
-
33
- # Constraints
34
-
35
- - Only provide information found in the documentation
36
- - Do not make assumptions beyond what the sources state
37
- - If information is not found, say so clearly rather than guessing
38
- - Focus on answering the query directly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agent/prompts/system_prompt.yaml CHANGED
@@ -3,10 +3,24 @@ system_prompt: |
3
 
4
  # Task Approach
5
 
6
- 1. Always formulate a plan. Pass the todos to the PlanTool. Update the plan as progress is made.
7
- 2. Search for relevant models, datasets, and documentation on Hugging Face Hub.
8
- 3. Use all available tools to complete the task. Leverage existing resources before creating new ones.
9
- 4. Invoke multiple independent tools simultaneously for efficiency
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Autonomy / Subordinate trade-off.
12
 
@@ -28,68 +42,18 @@ system_prompt: |
28
  - Image Generation: Generate and transform images
29
  - Planning : a planning/to-do tool.
30
 
31
- # Examples
32
-
33
- <example>
34
- <user>Find the best text generation models</user>
35
- <response>[uses mcp__hf-mcp-server__model_search with task="text-generation" and sort="trendingScore"]
36
-
37
- Top trending text generation models:
38
- - meta-llama/Llama-3.1-405B-Instruct
39
- - mistralai/Mistral-Large-2
40
- </response>
41
- </example>
42
-
43
- <example>
44
- <user>Search for papers about reinforcement learning from human feedback</user>
45
- <response>[uses mcp__hf-mcp-server__paper_search with query="reinforcement learning from human feedback"]
46
-
47
- Found 5 relevant papers on RLHF including "Training language models to follow instructions with human feedback" (Ouyang et al.)
48
- </response>
49
- </example>
50
-
51
- <example>
52
- <user>Find datasets for sentiment analysis</user>
53
- <response>[uses mcp__hf-mcp-server__dataset_search with query="sentiment analysis" and tags for task_categories]
54
-
55
- Top sentiment analysis datasets:
56
- - stanfordnlp/imdb (25k reviews)
57
- - tweet_eval (sentiment task)
58
- </response>
59
- </example>
60
-
61
- <example>
62
- <user>How do I use the transformers library for text generation?</user>
63
- <response>[uses mcp__hf-mcp-server__hf_doc_search with query="text generation transformers"]
64
-
65
- [provides concise answer based on documentation]
66
- </response>
67
- </example>
68
-
69
- <example>
70
- <user>Generate an image of a sunset over mountains</user>
71
- <response>[uses mcp__hf-mcp-server__gr1_flux1_schnell_infer with prompt="sunset over mountains"]
72
-
73
- [returns generated image]
74
- </response>
75
- </example>
76
-
77
- <example>
78
- <user>Get details about the bert-base-uncased model</user>
79
- <response>[uses mcp__hf-mcp-server__hub_repo_details with repo_ids=["google-bert/bert-base-uncased"]]
80
-
81
- BERT base uncased: 110M parameters, trained on English Wikipedia and BookCorpus, commonly used for text classification and NER.
82
- </response>
83
- </example>
84
-
85
  # Conventions
86
 
 
 
 
 
87
  - Always search Hugging Face Hub for existing resources before suggesting custom implementations
88
  - Keep in mind that a space is a repo, so you can create a space directly by uploading files that way. Repos should also be used to store files permanently : post-execution, files from jobs are not available.
89
  - To run jobs, you must always pass the whole content of the file to execute. No files are available on server. Your local files and distant files are entirely seperate scopes.
90
- - To access, create, or modify private Hub assets (spaces, private models, datasets, collections), pass `secrets: {% raw %}{{ "HF_TOKEN": "$HF_TOKEN" }}{% endraw %}` along with the jobs parameters. This is important. Without it, you will encounter authentification issues. Do not assume the user is connected on the jobs' server.
 
91
  - When referencing models, datasets, or papers, include direct links from search results
92
- - Never assume a library is available - check documentation first
93
  - Before processing any dataset: inspect its actual structure first using the mcp__hf-mcp-server__hub_repo_details tool. Never assume column names: verify them beforehand.
94
  - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
95
  - Unless absolutely necessary, don't ask user for action. This does not apply to follow-up questions you have.
@@ -107,13 +71,3 @@ system_prompt: |
107
  - Explain what you're doing for non-trivial operations
108
 
109
  Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
110
-
111
- <example>
112
- <user>What's the state-of-the-art model for image classification?</user>
113
- <response>EVA-CLIP-18B or ConvNeXt-XXLarge depending on your constraints</response>
114
- </example>
115
-
116
- <example>
117
- <user>How many parameters does GPT-3 have?</user>
118
- <response>175 billion</response>
119
- </example>
 
3
 
4
  # Task Approach
5
 
6
+ **CRITICAL: Research First, Then Implement**
7
+
8
+ For ANY implementation task (training, fine-tuning, inference, data processing, etc.):
9
+ 1. **FIRST**: Search HF documentation to find the recommended approach
10
+ - This is MANDATORY before writing any code or making implementation decisions
11
+ - Use `explore_hf_docs` to discover documentation structure for relevant libraries (e.g., "trl", "transformers", "diffusers")
12
+ - Use `fetch_hf_docs` to retrieve full content from specific documentation pages
13
+ - Use `search_hf_api_endpoints` to find API endpoints with usage examples
14
+ - Research what libraries to use, find code examples, understand best practices
15
+ - Skip ONLY for simple factual questions (e.g., "What is LoRA?")
16
+
17
+ 2. **THEN**: Formulate a plan based on research findings. Pass todos to the PlanTool. Update as progress is made.
18
+
19
+ 3. **FINALLY**: Implement using researched approaches
20
+ - Search for relevant models/datasets on HF Hub
21
+ - Use all available tools to complete the task
22
+ - Leverage existing resources before creating new ones
23
+ - Invoke multiple independent tools simultaneously for efficiency
24
 
25
  # Autonomy / Subordinate trade-off.
26
 
 
42
  - Image Generation: Generate and transform images
43
  - Planning : a planning/to-do tool.
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Conventions
46
 
47
+ - **ALWAYS search documentation BEFORE implementing** any ML workflow (training, inference, data processing, etc.) - This is non-negotiable
48
+ - Use `explore_hf_docs`, `fetch_hf_docs`, and `search_hf_api_endpoints` to research the correct approach
49
+ - Never assume you know the correct library, method, or approach - you must verify with documentation first
50
+ - Base your implementation on researched best practices, not general knowledge or assumptions
51
  - Always search Hugging Face Hub for existing resources before suggesting custom implementations
52
  - Keep in mind that a space is a repo, so you can create a space directly by uploading files that way. Repos should also be used to store files permanently : post-execution, files from jobs are not available.
53
  - To run jobs, you must always pass the whole content of the file to execute. No files are available on server. Your local files and distant files are entirely seperate scopes.
54
+ - The HF_TOKEN is automatically loaded from the environment variables.
55
+ -
56
  - When referencing models, datasets, or papers, include direct links from search results
 
57
  - Before processing any dataset: inspect its actual structure first using the mcp__hf-mcp-server__hub_repo_details tool. Never assume column names: verify them beforehand.
58
  - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
59
  - Unless absolutely necessary, don't ask user for action. This does not apply to follow-up questions you have.
 
71
  - Explain what you're doing for non-trivial operations
72
 
73
  Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
 
 
 
 
 
 
 
 
 
 
agent/tools/__init__.py CHANGED
@@ -3,7 +3,6 @@ Hugging Face tools for the agent
3
  """
4
 
5
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
6
- from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
7
  from agent.tools.types import ToolResult
8
 
9
  __all__ = [
@@ -11,6 +10,4 @@ __all__ = [
11
  "HF_JOBS_TOOL_SPEC",
12
  "hf_jobs_handler",
13
  "HfJobsTool",
14
- "SEARCH_DOCS_TOOL_SPEC",
15
- "search_docs_handler",
16
  ]
 
3
  """
4
 
5
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
 
6
  from agent.tools.types import ToolResult
7
 
8
  __all__ = [
 
10
  "HF_JOBS_TOOL_SPEC",
11
  "hf_jobs_handler",
12
  "HfJobsTool",
 
 
13
  ]
agent/tools/{_search_agent_tools.py → docs_tools.py} RENAMED
@@ -1,6 +1,6 @@
1
  """
2
- Tools available to the search sub-agent
3
- These tools are used by the search sub-agent spawned by search_docs_tool
4
  """
5
 
6
  import asyncio
@@ -553,7 +553,7 @@ async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
553
  return f"Error fetching documentation: {str(e)}", False
554
 
555
 
556
- # Tool specifications for the search sub-agent
557
 
558
  EXPLORE_HF_DOCS_TOOL_SPEC = {
559
  "name": "explore_hf_docs",
 
1
  """
2
+ Documentation search tools for the HF Agent
3
+ Tools for exploring and fetching HuggingFace documentation and API specifications
4
  """
5
 
6
  import asyncio
 
553
  return f"Error fetching documentation: {str(e)}", False
554
 
555
 
556
+ # Tool specifications for documentation search
557
 
558
  EXPLORE_HF_DOCS_TOOL_SPEC = {
559
  "name": "explore_hf_docs",
agent/tools/jobs_tool.py CHANGED
@@ -46,13 +46,11 @@ ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
46
  # Operation names
47
  OperationType = Literal[
48
  "run",
49
- "uv",
50
  "ps",
51
  "logs",
52
  "inspect",
53
  "cancel",
54
  "scheduled run",
55
- "scheduled uv",
56
  "scheduled ps",
57
  "scheduled inspect",
58
  "scheduled delete",
@@ -64,26 +62,20 @@ OperationType = Literal[
64
  UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
65
 
66
 
67
- def _substitute_hf_token(params: Dict[str, Any] | None) -> Dict[str, Any] | None:
68
- """
69
- Substitute HF_TOKEN key with actual token value from environment.
70
 
71
- Args:
72
- params: Dictionary that may contain "HF_TOKEN" as a key
73
 
74
- Returns:
75
- Dictionary with HF_TOKEN value substituted from environment
76
- """
77
- print("DEBUG !! : ", params)
78
- if params is None:
79
- return None
80
 
81
- result = {}
82
- for key, value in params.items():
83
- if key == "HF_TOKEN":
84
- result[key] = os.environ.get("HF_TOKEN", "")
85
- else:
86
- result[key] = value
87
 
88
  return result
89
 
@@ -109,6 +101,8 @@ def _build_uv_command(
109
  if script_args:
110
  parts.extend(script_args)
111
 
 
 
112
  return parts
113
 
114
 
@@ -129,8 +123,6 @@ def _wrap_inline_script(
129
 
130
  def _ensure_hf_transfer_dependency(deps: list[str] | None) -> list[str]:
131
  """Ensure hf-transfer is included in the dependencies list"""
132
- if deps is None:
133
- return ["hf-transfer"]
134
 
135
  if isinstance(deps, list):
136
  deps_copy = deps.copy() # Don't modify the original
@@ -175,7 +167,7 @@ def _job_info_to_dict(job_info) -> Dict[str, Any]:
175
  "createdAt": job_info.created_at.isoformat(),
176
  "dockerImage": job_info.docker_image,
177
  "spaceId": job_info.space_id,
178
- "flavor": job_info.flavor,
179
  "owner": {"name": job_info.owner.name},
180
  }
181
 
@@ -214,7 +206,7 @@ def _scheduled_job_info_to_dict(scheduled_job_info) -> Dict[str, Any]:
214
  "dockerImage": job_spec.docker_image,
215
  "spaceId": job_spec.space_id,
216
  "command": job_spec.command or [],
217
- "flavor": job_spec.flavor or "cpu-basic",
218
  },
219
  }
220
 
@@ -229,25 +221,25 @@ class HfJobsTool:
229
  async def execute(self, params: Dict[str, Any]) -> ToolResult:
230
  """Execute the specified operation"""
231
  operation = params.get("operation")
232
- args = params.get("args", {})
233
 
234
- # If no operation provided, return usage instructions
 
 
235
  if not operation:
236
- return self._show_help()
 
 
 
 
 
237
 
238
  # Normalize operation name
239
  operation = operation.lower()
240
 
241
- # Check if help is requested
242
- if args.get("help"):
243
- return self._show_operation_help(operation)
244
-
245
  try:
246
  # Route to appropriate handler
247
  if operation == "run":
248
  return await self._run_job(args)
249
- elif operation == "uv":
250
- return await self._run_uv_job(args)
251
  elif operation == "ps":
252
  return await self._list_jobs(args)
253
  elif operation == "logs":
@@ -258,8 +250,6 @@ class HfJobsTool:
258
  return await self._cancel_job(args)
259
  elif operation == "scheduled run":
260
  return await self._scheduled_run(args)
261
- elif operation == "scheduled uv":
262
- return await self._scheduled_uv(args)
263
  elif operation == "scheduled ps":
264
  return await self._list_scheduled_jobs(args)
265
  elif operation == "scheduled inspect":
@@ -274,8 +264,8 @@ class HfJobsTool:
274
  return {
275
  "formatted": f'Unknown operation: "{operation}"\n\n'
276
  "Available operations:\n"
277
- "- run, uv, ps, logs, inspect, cancel\n"
278
- "- scheduled run, scheduled uv, scheduled ps, scheduled inspect, "
279
  "scheduled delete, scheduled suspend, scheduled resume\n\n"
280
  "Call this tool with no operation for full usage instructions.",
281
  "totalResults": 0,
@@ -298,104 +288,6 @@ class HfJobsTool:
298
  "isError": True,
299
  }
300
 
301
- def _show_help(self) -> ToolResult:
302
- """Show usage instructions when tool is called with no arguments"""
303
- cpu_flavors_list = ", ".join(CPU_FLAVORS)
304
- gpu_flavors_list = ", ".join(GPU_FLAVORS)
305
- specialized_flavors_list = ", ".join(SPECIALIZED_FLAVORS)
306
-
307
- hardware_section = f"**CPU:** {cpu_flavors_list}\n"
308
- if GPU_FLAVORS:
309
- hardware_section += f"**GPU:** {gpu_flavors_list}\n"
310
- if SPECIALIZED_FLAVORS:
311
- hardware_section += f"**Specialized:** {specialized_flavors_list}"
312
-
313
- usage_text = f"""# HuggingFace Jobs API
314
-
315
- Manage compute jobs on Hugging Face infrastructure.
316
-
317
- ## Available Commands
318
-
319
- ### Job Management
320
- - **run** - Run a job with a Docker image
321
- - **uv** - Run a Python script with UV (inline dependencies)
322
- - **ps** - List jobs
323
- - **logs** - Fetch job logs
324
- - **inspect** - Get detailed job information
325
- - **cancel** - Cancel a running job
326
-
327
- ### Scheduled Jobs
328
- - **scheduled run** - Create a scheduled job
329
- - **scheduled uv** - Create a scheduled UV job
330
- - **scheduled ps** - List scheduled jobs
331
- - **scheduled inspect** - Get scheduled job details
332
- - **scheduled delete** - Delete a scheduled job
333
- - **scheduled suspend** - Pause a scheduled job
334
- - **scheduled resume** - Resume a suspended job
335
-
336
- ## Examples
337
-
338
- ### Run a simple job
339
- Call this tool with:
340
- ```json
341
- {{
342
- "operation": "run",
343
- "args": {{
344
- "image": "python:3.12",
345
- "command": ["python", "-c", "print('Hello from HF Jobs!')"],
346
- "flavor": "cpu-basic"
347
- }}
348
- }}
349
- ```
350
-
351
- ### Run a Python script with UV
352
- Call this tool with:
353
- ```json
354
- {{
355
- "operation": "uv",
356
- "args": {{
357
- "script": "import random\\nprint(42 + random.randint(1, 5))",
358
- "dependencies": ["torch", "huggingface_hub"],
359
- "secrets": {{"HF_TOKEN": "$HF_TOKEN"}}
360
- }}
361
- }}
362
- ```
363
-
364
- ## Hardware Flavors
365
-
366
- {hardware_section}
367
-
368
- ## Command Format Guidelines
369
-
370
- **Array format (default):**
371
- - Recommended for every command—JSON keeps arguments intact (URLs with `&`, spaces, etc.)
372
- - Use `["/bin/sh", "-lc", "..."]` when you need shell operators like `&&`, `|`, or redirections
373
- - Works with any language: Python, bash, node, npm, uv, etc.
374
-
375
- **String format (simple cases only):**
376
- - Still accepted for backwards compatibility, parsed with POSIX shell semantics
377
- - Rejects shell operators and can mis-handle characters such as `&`; switch to arrays when things turn complex
378
-
379
- ### Show command-specific help
380
- Call this tool with:
381
- ```json
382
- {{"operation": "<operation>", "args": {{"help": true}}}}
383
- ```
384
-
385
- ## Tips
386
-
387
- - Jobs default to non-detached mode (stream logs until completion). Set `detach: true` to return immediately.
388
- - Prefer array commands to avoid shell parsing surprises
389
- - To access, create, or modify private Hub assets (spaces, private models, datasets, collections), pass `secrets: {{ "HF_TOKEN": "$HF_TOKEN" }}`. This is important. Without it, you will encounter authentification issues. Do not assume the user is connected on the jobs' server.
390
- - Before calling a job, think about dependencies (they must be specified), which hardware flavor to run on (choose simplest for task), and whether to include secrets.
391
- """
392
- return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
393
-
394
- def _show_operation_help(self, operation: str) -> ToolResult:
395
- """Show help for a specific operation"""
396
- help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
397
- return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
398
-
399
  async def _wait_for_job_completion(
400
  self, job_id: str, namespace: Optional[str] = None
401
  ) -> tuple[str, list[str]]:
@@ -424,117 +316,69 @@ Call this tool with:
424
  return final_status, all_logs
425
 
426
  async def _run_job(self, args: Dict[str, Any]) -> ToolResult:
427
- """Run a job using HfApi.run_job()"""
428
  try:
429
- job = await _async_call(
430
- self.api.run_job,
431
- image=args.get("image", "python:3.12"),
432
- command=args.get("command"),
433
- env=_substitute_hf_token(args.get("env")),
434
- secrets=_substitute_hf_token(args.get("secrets")),
435
- flavor=args.get("flavor", "cpu-basic"),
436
- timeout=args.get("timeout", "30m"),
437
- namespace=args.get("namespace") or self.namespace,
438
- )
439
-
440
- # If detached, return immediately
441
- if args.get("detach", False):
442
- response = f"""Job started successfully!
443
-
444
- **Job ID:** {job.id}
445
- **Status:** {job.status.stage}
446
- **View at:** {job.url}
447
-
448
- To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "{job.id}"}}}}`
449
- To inspect, call this tool with `{{"operation": "inspect", "args": {{"job_id": "{job.id}"}}}}`"""
450
- return {"formatted": response, "totalResults": 1, "resultsShared": 1}
451
-
452
- # Not detached - wait for completion and stream logs
453
- print(f"Job started: {job.url}")
454
- print("Streaming logs...\n---\n")
455
-
456
- final_status, all_logs = await self._wait_for_job_completion(
457
- job_id=job.id,
458
- namespace=args.get("namespace") or self.namespace,
459
- )
460
-
461
- # Format all logs for the agent
462
- log_text = "\n".join(all_logs) if all_logs else "(no logs)"
463
 
464
- response = f"""Job completed!
 
 
 
 
465
 
466
- **Job ID:** {job.id}
467
- **Final Status:** {final_status}
468
- **View at:** {job.url}
 
469
 
470
- **Logs:**
471
- ```
472
- {log_text}
473
- ```"""
474
- return {"formatted": response, "totalResults": 1, "resultsShared": 1}
 
 
 
 
 
 
 
475
 
476
- except Exception as e:
477
- raise Exception(f"Failed to run job: {str(e)}")
 
478
 
479
- async def _run_uv_job(self, args: Dict[str, Any]) -> ToolResult:
480
- """Run UV job with inline script support (no local files needed)"""
481
- try:
482
- script = args.get("script")
483
- if not script:
484
- raise ValueError("script is required")
485
-
486
- # Get dependencies and ensure hf-transfer is included
487
- deps = (
488
- args.get("with_deps")
489
- or args.get("dependencies")
490
- or args.get("packages")
491
- )
492
- deps = _ensure_hf_transfer_dependency(deps)
493
-
494
- # Resolve the command based on script type (URL, inline, or file)
495
- command = _resolve_uv_command(
496
- script=script,
497
- with_deps=deps,
498
- python=args.get("python"),
499
- script_args=args.get("script_args"),
500
- )
501
 
502
- # Use run_job with UV image instead of run_uv_job
503
  job = await _async_call(
504
  self.api.run_job,
505
- image=UV_DEFAULT_IMAGE,
506
  command=command,
507
- env=_substitute_hf_token(args.get("env")),
508
- secrets=_substitute_hf_token(args.get("secrets")),
509
- flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
510
  timeout=args.get("timeout", "30m"),
511
- namespace=args.get("namespace") or self.namespace,
512
  )
513
 
514
- # If detached, return immediately
515
- if args.get("detach", False):
516
- response = f"""UV Job started successfully!
517
-
518
- **Job ID:** {job.id}
519
- **Status:** {job.status.stage}
520
- **View at:** {job.url}
521
-
522
- To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "{job.id}"}}}}`"""
523
- return {"formatted": response, "totalResults": 1, "resultsShared": 1}
524
-
525
- # Not detached - wait for completion and stream logs
526
- print(f"UV Job started: {job.url}")
527
  print("Streaming logs...\n---\n")
528
 
529
  final_status, all_logs = await self._wait_for_job_completion(
530
  job_id=job.id,
531
- namespace=args.get("namespace") or self.namespace,
532
  )
533
 
534
  # Format all logs for the agent
535
  log_text = "\n".join(all_logs) if all_logs else "(no logs)"
536
 
537
- response = f"""UV Job completed!
538
 
539
  **Job ID:** {job.id}
540
  **Final Status:** {final_status}
@@ -547,13 +391,11 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
547
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
548
 
549
  except Exception as e:
550
- raise Exception(f"Failed to run UV job: {str(e)}")
551
 
552
  async def _list_jobs(self, args: Dict[str, Any]) -> ToolResult:
553
  """List jobs using HfApi.list_jobs()"""
554
- jobs_list = await _async_call(
555
- self.api.list_jobs, namespace=args.get("namespace") or self.namespace
556
- )
557
 
558
  # Filter jobs
559
  if not args.get("all", False):
@@ -576,7 +418,7 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
576
  "resultsShared": 0,
577
  }
578
  return {
579
- "formatted": 'No running jobs found. Use `{"args": {"all": true}}` to show all jobs.',
580
  "totalResults": 0,
581
  "resultsShared": 0,
582
  }
@@ -601,9 +443,7 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
601
 
602
  try:
603
  # Fetch logs (returns generator, convert to list)
604
- logs_gen = self.api.fetch_job_logs(
605
- job_id=job_id, namespace=args.get("namespace") or self.namespace
606
- )
607
  logs = await _async_call(list, logs_gen)
608
 
609
  if not logs:
@@ -647,7 +487,7 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
647
  job = await _async_call(
648
  self.api.inspect_job,
649
  job_id=jid,
650
- namespace=args.get("namespace") or self.namespace,
651
  )
652
  jobs.append(_job_info_to_dict(job))
653
  except Exception as e:
@@ -676,108 +516,93 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
676
  await _async_call(
677
  self.api.cancel_job,
678
  job_id=job_id,
679
- namespace=args.get("namespace") or self.namespace,
680
  )
681
 
682
  response = f"""✓ Job {job_id} has been cancelled.
683
 
684
- To verify, call this tool with `{{"operation": "inspect", "args": {{"job_id": "{job_id}"}}}}`"""
685
 
686
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
687
 
688
  async def _scheduled_run(self, args: Dict[str, Any]) -> ToolResult:
689
- """Create scheduled job using HfApi.create_scheduled_job()"""
690
  try:
691
- scheduled_job = await _async_call(
692
- self.api.create_scheduled_job,
693
- image=args.get("image", "python:3.12"),
694
- command=args.get("command"),
695
- schedule=args.get("schedule"),
696
- env=_substitute_hf_token(args.get("env")),
697
- secrets=_substitute_hf_token(args.get("secrets")),
698
- flavor=args.get("flavor", "cpu-basic"),
699
- timeout=args.get("timeout", "30m"),
700
- namespace=args.get("namespace") or self.namespace,
701
- )
702
-
703
- scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
704
-
705
- response = f"""✓ Scheduled job created successfully!
706
-
707
- **Scheduled Job ID:** {scheduled_dict["id"]}
708
- **Schedule:** {scheduled_dict["schedule"]}
709
- **Suspended:** {"Yes" if scheduled_dict.get("suspend") else "No"}
710
- **Next Run:** {scheduled_dict.get("nextRun", "N/A")}
711
 
712
- To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"scheduled_job_id": "{scheduled_dict["id"]}"}}}}`
713
- To list all, call this tool with `{{"operation": "scheduled ps"}}`"""
714
 
715
- return {"formatted": response, "totalResults": 1, "resultsShared": 1}
 
 
 
 
716
 
717
- except Exception as e:
718
- raise Exception(f"Failed to create scheduled job: {str(e)}")
 
 
719
 
720
- async def _scheduled_uv(self, args: Dict[str, Any]) -> ToolResult:
721
- """Create scheduled UV job with inline script support"""
722
- try:
723
- script = args.get("script")
724
- if not script:
725
- raise ValueError("script is required")
 
 
 
 
 
 
726
 
727
- schedule = args.get("schedule")
728
- if not schedule:
729
- raise ValueError("schedule is required")
730
 
731
- # Get dependencies and ensure hf-transfer is included
732
- deps = (
733
- args.get("with_deps")
734
- or args.get("dependencies")
735
- or args.get("packages")
736
- )
737
- deps = _ensure_hf_transfer_dependency(deps)
738
-
739
- # Resolve the command based on script type
740
- command = _resolve_uv_command(
741
- script=script,
742
- with_deps=deps,
743
- python=args.get("python"),
744
- script_args=args.get("script_args"),
745
- )
746
 
747
- # Use create_scheduled_job with UV image
748
  scheduled_job = await _async_call(
749
  self.api.create_scheduled_job,
750
- image=UV_DEFAULT_IMAGE,
751
  command=command,
752
  schedule=schedule,
753
- env=_substitute_hf_token(args.get("env")),
754
- secrets=_substitute_hf_token(args.get("secrets")),
755
- flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
756
  timeout=args.get("timeout", "30m"),
757
- namespace=args.get("namespace") or self.namespace,
758
  )
759
 
760
  scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
761
 
762
- response = f"""✓ Scheduled UV job created successfully!
763
 
764
  **Scheduled Job ID:** {scheduled_dict["id"]}
765
  **Schedule:** {scheduled_dict["schedule"]}
766
  **Suspended:** {"Yes" if scheduled_dict.get("suspend") else "No"}
767
  **Next Run:** {scheduled_dict.get("nextRun", "N/A")}
768
 
769
- To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"scheduled_job_id": "{scheduled_dict["id"]}"}}}}`"""
 
770
 
771
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
772
 
773
  except Exception as e:
774
- raise Exception(f"Failed to create scheduled UV job: {str(e)}")
775
 
776
  async def _list_scheduled_jobs(self, args: Dict[str, Any]) -> ToolResult:
777
  """List scheduled jobs using HfApi.list_scheduled_jobs()"""
778
  scheduled_jobs_list = await _async_call(
779
  self.api.list_scheduled_jobs,
780
- namespace=args.get("namespace") or self.namespace,
781
  )
782
 
783
  # Filter jobs - default: hide suspended jobs unless --all is specified
@@ -797,7 +622,7 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
797
  "resultsShared": 0,
798
  }
799
  return {
800
- "formatted": 'No active scheduled jobs found. Use `{"args": {"all": true}}` to show suspended jobs.',
801
  "totalResults": 0,
802
  "resultsShared": 0,
803
  }
@@ -823,7 +648,7 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
823
  scheduled_job = await _async_call(
824
  self.api.inspect_scheduled_job,
825
  scheduled_job_id=scheduled_job_id,
826
- namespace=args.get("namespace") or self.namespace,
827
  )
828
 
829
  scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
@@ -849,7 +674,7 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
849
  await _async_call(
850
  self.api.delete_scheduled_job,
851
  scheduled_job_id=scheduled_job_id,
852
- namespace=args.get("namespace") or self.namespace,
853
  )
854
 
855
  return {
@@ -872,12 +697,12 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
872
  await _async_call(
873
  self.api.suspend_scheduled_job,
874
  scheduled_job_id=scheduled_job_id,
875
- namespace=args.get("namespace") or self.namespace,
876
  )
877
 
878
  response = f"""✓ Scheduled job {scheduled_job_id} has been suspended.
879
 
880
- To resume, call this tool with `{{"operation": "scheduled resume", "args": {{"scheduled_job_id": "{scheduled_job_id}"}}}}`"""
881
 
882
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
883
 
@@ -895,12 +720,12 @@ To resume, call this tool with `{{"operation": "scheduled resume", "args": {{"sc
895
  await _async_call(
896
  self.api.resume_scheduled_job,
897
  scheduled_job_id=scheduled_job_id,
898
- namespace=args.get("namespace") or self.namespace,
899
  )
900
 
901
  response = f"""✓ Scheduled job {scheduled_job_id} has been resumed.
902
 
903
- To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"scheduled_job_id": "{scheduled_job_id}"}}}}`"""
904
 
905
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
906
 
@@ -909,10 +734,29 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
909
  HF_JOBS_TOOL_SPEC = {
910
  "name": "hf_jobs",
911
  "description": (
912
- "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
913
- "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
914
- "Example hardware/flavor: cpu-basic, cpu-performance, t4-medium. "
915
- "Call this tool with no operation for full usage instructions and examples."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
916
  ),
917
  "parameters": {
918
  "type": "object",
@@ -921,13 +765,11 @@ HF_JOBS_TOOL_SPEC = {
921
  "type": "string",
922
  "enum": [
923
  "run",
924
- "uv",
925
  "ps",
926
  "logs",
927
  "inspect",
928
  "cancel",
929
  "scheduled run",
930
- "scheduled uv",
931
  "scheduled ps",
932
  "scheduled inspect",
933
  "scheduled delete",
@@ -935,22 +777,60 @@ HF_JOBS_TOOL_SPEC = {
935
  "scheduled resume",
936
  ],
937
  "description": (
938
- "Operation to execute. Valid values: [run, uv, ps, logs, inspect, cancel, "
939
- "scheduled run, scheduled uv, scheduled ps, scheduled inspect, scheduled delete, "
940
  "scheduled suspend, scheduled resume]"
941
  ),
942
  },
943
- "args": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
944
  "type": "object",
945
- "description": (
946
- "Operation-specific arguments as a JSON object. "
947
- "Common args: script (for uv), packages/dependencies (array), "
948
- "flavor/hardware (e.g., a10g-large, cpu-basic), command (array), "
949
- "image (string), env (object), secrets (object)."
950
- ),
951
- "additionalProperties": True,
 
 
 
 
 
 
 
 
952
  },
953
  },
 
954
  },
955
  }
956
 
@@ -958,7 +838,7 @@ HF_JOBS_TOOL_SPEC = {
958
  async def hf_jobs_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
959
  """Handler for agent tool router"""
960
  try:
961
- tool = HfJobsTool()
962
  result = await tool.execute(arguments)
963
  return result["formatted"], not result.get("isError", False)
964
  except Exception as e:
 
46
  # Operation names
47
  OperationType = Literal[
48
  "run",
 
49
  "ps",
50
  "logs",
51
  "inspect",
52
  "cancel",
53
  "scheduled run",
 
54
  "scheduled ps",
55
  "scheduled inspect",
56
  "scheduled delete",
 
62
  UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
63
 
64
 
65
+ def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
66
+ token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
 
67
 
68
+ # Start with user-provided env vars, then force-set token last
69
+ result = dict(params or {})
70
 
71
+ # If the caller passed HF_TOKEN="$HF_TOKEN", ignore it.
72
+ if result.get("HF_TOKEN", "").strip().startswith("$"):
73
+ result.pop("HF_TOKEN", None)
 
 
 
74
 
75
+ # Set both names to be safe (different libs check different vars)
76
+ if token:
77
+ result["HF_TOKEN"] = token
78
+ result["HUGGINGFACE_HUB_TOKEN"] = token
 
 
79
 
80
  return result
81
 
 
101
  if script_args:
102
  parts.extend(script_args)
103
 
104
+ # add defaults
105
+ # parts.extend(["--push_to_hub"])
106
  return parts
107
 
108
 
 
123
 
124
  def _ensure_hf_transfer_dependency(deps: list[str] | None) -> list[str]:
125
  """Ensure hf-transfer is included in the dependencies list"""
 
 
126
 
127
  if isinstance(deps, list):
128
  deps_copy = deps.copy() # Don't modify the original
 
167
  "createdAt": job_info.created_at.isoformat(),
168
  "dockerImage": job_info.docker_image,
169
  "spaceId": job_info.space_id,
170
+ "hardware_flavor": job_info.flavor,
171
  "owner": {"name": job_info.owner.name},
172
  }
173
 
 
206
  "dockerImage": job_spec.docker_image,
207
  "spaceId": job_spec.space_id,
208
  "command": job_spec.command or [],
209
+ "hardware_flavor": job_spec.flavor or "cpu-basic",
210
  },
211
  }
212
 
 
221
  async def execute(self, params: Dict[str, Any]) -> ToolResult:
222
  """Execute the specified operation"""
223
  operation = params.get("operation")
 
224
 
225
+ args = params
226
+
227
+ # If no operation provided, return error
228
  if not operation:
229
+ return {
230
+ "formatted": "Error: 'operation' parameter is required. See tool description for available operations and usage examples.",
231
+ "totalResults": 0,
232
+ "resultsShared": 0,
233
+ "isError": True,
234
+ }
235
 
236
  # Normalize operation name
237
  operation = operation.lower()
238
 
 
 
 
 
239
  try:
240
  # Route to appropriate handler
241
  if operation == "run":
242
  return await self._run_job(args)
 
 
243
  elif operation == "ps":
244
  return await self._list_jobs(args)
245
  elif operation == "logs":
 
250
  return await self._cancel_job(args)
251
  elif operation == "scheduled run":
252
  return await self._scheduled_run(args)
 
 
253
  elif operation == "scheduled ps":
254
  return await self._list_scheduled_jobs(args)
255
  elif operation == "scheduled inspect":
 
264
  return {
265
  "formatted": f'Unknown operation: "{operation}"\n\n'
266
  "Available operations:\n"
267
+ "- run, ps, logs, inspect, cancel\n"
268
+ "- scheduled run, scheduled ps, scheduled inspect, "
269
  "scheduled delete, scheduled suspend, scheduled resume\n\n"
270
  "Call this tool with no operation for full usage instructions.",
271
  "totalResults": 0,
 
288
  "isError": True,
289
  }
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  async def _wait_for_job_completion(
292
  self, job_id: str, namespace: Optional[str] = None
293
  ) -> tuple[str, list[str]]:
 
316
  return final_status, all_logs
317
 
318
  async def _run_job(self, args: Dict[str, Any]) -> ToolResult:
319
+ """Run a job using HfApi.run_job() - smart detection of Python vs Docker mode"""
320
  try:
321
+ script = args.get("script")
322
+ command = args.get("command")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
+ # Validate mutually exclusive parameters
325
+ if script and command:
326
+ raise ValueError(
327
+ "'script' and 'command' are mutually exclusive. Provide one or the other, not both."
328
+ )
329
 
330
+ if not script and not command:
331
+ raise ValueError(
332
+ "Either 'script' (for Python) or 'command' (for Docker) must be provided."
333
+ )
334
 
335
+ # Python mode: script provided
336
+ if script:
337
+ # Get dependencies and ensure hf-transfer is included
338
+ deps = _ensure_hf_transfer_dependency(args.get("dependencies"))
339
+
340
+ # Resolve the command based on script type (URL, inline, or file)
341
+ command = _resolve_uv_command(
342
+ script=script,
343
+ with_deps=deps,
344
+ python=args.get("python"),
345
+ script_args=args.get("script_args"),
346
+ )
347
 
348
+ # Use UV image unless overridden
349
+ image = args.get("image", UV_DEFAULT_IMAGE)
350
+ job_type = "Python"
351
 
352
+ # Docker mode: command provided
353
+ else:
354
+ image = args.get("image", "python:3.12")
355
+ job_type = "Docker"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
+ # Run the job
358
  job = await _async_call(
359
  self.api.run_job,
360
+ image=image,
361
  command=command,
362
+ env=args.get("env"),
363
+ secrets=_add_environment_variables(args.get("secrets")),
364
+ flavor=args.get("hardware_flavor", "cpu-basic"),
365
  timeout=args.get("timeout", "30m"),
366
+ namespace=self.namespace,
367
  )
368
 
369
+ # Wait for completion and stream logs
370
+ print(f"{job_type} job started: {job.url}")
 
 
 
 
 
 
 
 
 
 
 
371
  print("Streaming logs...\n---\n")
372
 
373
  final_status, all_logs = await self._wait_for_job_completion(
374
  job_id=job.id,
375
+ namespace=self.namespace,
376
  )
377
 
378
  # Format all logs for the agent
379
  log_text = "\n".join(all_logs) if all_logs else "(no logs)"
380
 
381
+ response = f"""{job_type} job completed!
382
 
383
  **Job ID:** {job.id}
384
  **Final Status:** {final_status}
 
391
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
392
 
393
  except Exception as e:
394
+ raise Exception(f"Failed to run job: {str(e)}")
395
 
396
  async def _list_jobs(self, args: Dict[str, Any]) -> ToolResult:
397
  """List jobs using HfApi.list_jobs()"""
398
+ jobs_list = await _async_call(self.api.list_jobs, namespace=self.namespace)
 
 
399
 
400
  # Filter jobs
401
  if not args.get("all", False):
 
418
  "resultsShared": 0,
419
  }
420
  return {
421
+ "formatted": 'No running jobs found. Use `{"operation": "ps", "all": true}` to show all jobs.',
422
  "totalResults": 0,
423
  "resultsShared": 0,
424
  }
 
443
 
444
  try:
445
  # Fetch logs (returns generator, convert to list)
446
+ logs_gen = self.api.fetch_job_logs(job_id=job_id, namespace=self.namespace)
 
 
447
  logs = await _async_call(list, logs_gen)
448
 
449
  if not logs:
 
487
  job = await _async_call(
488
  self.api.inspect_job,
489
  job_id=jid,
490
+ namespace=self.namespace,
491
  )
492
  jobs.append(_job_info_to_dict(job))
493
  except Exception as e:
 
516
  await _async_call(
517
  self.api.cancel_job,
518
  job_id=job_id,
519
+ namespace=self.namespace,
520
  )
521
 
522
  response = f"""✓ Job {job_id} has been cancelled.
523
 
524
+ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}`"""
525
 
526
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
527
 
528
  async def _scheduled_run(self, args: Dict[str, Any]) -> ToolResult:
529
+ """Create scheduled job using HfApi.create_scheduled_job() - smart detection of Python vs Docker mode"""
530
  try:
531
+ script = args.get("script")
532
+ command = args.get("command")
533
+ schedule = args.get("schedule")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
+ if not schedule:
536
+ raise ValueError("schedule is required for scheduled jobs")
537
 
538
+ # Validate mutually exclusive parameters
539
+ if script and command:
540
+ raise ValueError(
541
+ "'script' and 'command' are mutually exclusive. Provide one or the other, not both."
542
+ )
543
 
544
+ if not script and not command:
545
+ raise ValueError(
546
+ "Either 'script' (for Python) or 'command' (for Docker) must be provided."
547
+ )
548
 
549
+ # Python mode: script provided
550
+ if script:
551
+ # Get dependencies and ensure hf-transfer is included
552
+ deps = _ensure_hf_transfer_dependency(args.get("dependencies"))
553
+
554
+ # Resolve the command based on script type
555
+ command = _resolve_uv_command(
556
+ script=script,
557
+ with_deps=deps,
558
+ python=args.get("python"),
559
+ script_args=args.get("script_args"),
560
+ )
561
 
562
+ # Use UV image unless overridden
563
+ image = args.get("image", UV_DEFAULT_IMAGE)
564
+ job_type = "Python"
565
 
566
+ # Docker mode: command provided
567
+ else:
568
+ image = args.get("image", "python:3.12")
569
+ job_type = "Docker"
 
 
 
 
 
 
 
 
 
 
 
570
 
571
+ # Create scheduled job
572
  scheduled_job = await _async_call(
573
  self.api.create_scheduled_job,
574
+ image=image,
575
  command=command,
576
  schedule=schedule,
577
+ env=args.get("env"),
578
+ secrets=_add_environment_variables(args.get("secrets")),
579
+ flavor=args.get("hardware_flavor", "cpu-basic"),
580
  timeout=args.get("timeout", "30m"),
581
+ namespace=self.namespace,
582
  )
583
 
584
  scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
585
 
586
+ response = f"""✓ Scheduled {job_type} job created successfully!
587
 
588
  **Scheduled Job ID:** {scheduled_dict["id"]}
589
  **Schedule:** {scheduled_dict["schedule"]}
590
  **Suspended:** {"Yes" if scheduled_dict.get("suspend") else "No"}
591
  **Next Run:** {scheduled_dict.get("nextRun", "N/A")}
592
 
593
+ To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_job_id": "{scheduled_dict["id"]}"}}`
594
+ To list all, call this tool with `{{"operation": "scheduled ps"}}`"""
595
 
596
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
597
 
598
  except Exception as e:
599
+ raise Exception(f"Failed to create scheduled job: {str(e)}")
600
 
601
  async def _list_scheduled_jobs(self, args: Dict[str, Any]) -> ToolResult:
602
  """List scheduled jobs using HfApi.list_scheduled_jobs()"""
603
  scheduled_jobs_list = await _async_call(
604
  self.api.list_scheduled_jobs,
605
+ namespace=self.namespace,
606
  )
607
 
608
  # Filter jobs - default: hide suspended jobs unless --all is specified
 
622
  "resultsShared": 0,
623
  }
624
  return {
625
+ "formatted": 'No active scheduled jobs found. Use `{"operation": "scheduled ps", "all": true}` to show suspended jobs.',
626
  "totalResults": 0,
627
  "resultsShared": 0,
628
  }
 
648
  scheduled_job = await _async_call(
649
  self.api.inspect_scheduled_job,
650
  scheduled_job_id=scheduled_job_id,
651
+ namespace=self.namespace,
652
  )
653
 
654
  scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
 
674
  await _async_call(
675
  self.api.delete_scheduled_job,
676
  scheduled_job_id=scheduled_job_id,
677
+ namespace=self.namespace,
678
  )
679
 
680
  return {
 
697
  await _async_call(
698
  self.api.suspend_scheduled_job,
699
  scheduled_job_id=scheduled_job_id,
700
+ namespace=self.namespace,
701
  )
702
 
703
  response = f"""✓ Scheduled job {scheduled_job_id} has been suspended.
704
 
705
+ To resume, call this tool with `{{"operation": "scheduled resume", "scheduled_job_id": "{scheduled_job_id}"}}`"""
706
 
707
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
708
 
 
720
  await _async_call(
721
  self.api.resume_scheduled_job,
722
  scheduled_job_id=scheduled_job_id,
723
+ namespace=self.namespace,
724
  )
725
 
726
  response = f"""✓ Scheduled job {scheduled_job_id} has been resumed.
727
 
728
+ To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_job_id": "{scheduled_job_id}"}}`"""
729
 
730
  return {"formatted": response, "totalResults": 1, "resultsShared": 1}
731
 
 
734
  HF_JOBS_TOOL_SPEC = {
735
  "name": "hf_jobs",
736
  "description": (
737
+ "Run Python scripts or Docker containers on HF cloud GPUs/CPUs.\n\n"
738
+ "## Operations:\n"
739
+ "run, ps, logs, inspect, cancel, scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume\n\n"
740
+ "## Two modes:\n"
741
+ "1. **Python mode:** Provide 'script' + 'dependencies' → auto-handles pip install\n"
742
+ "2. **Docker mode:** Provide 'image' + 'command' → full control\n"
743
+ "(script and command are mutually exclusive)\n\n"
744
+ "## Hardware:\n"
745
+ "CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl\n"
746
+ "GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100\n\n"
747
+ "## Examples:\n\n"
748
+ "**Fine-tune LLM and push to Hub:**\n"
749
+ "{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"gpt2\")\\n# ... training code ...\\nmodel.push_to_hub(\"user-name/my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'env': {'CUSTOM_VAR': 'value'}}\n\n"
750
+ "**Generate dataset daily and upload:**\n"
751
+ "{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily'}\n\n"
752
+ "**Run custom training with Docker:**\n"
753
+ "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
754
+ "**Monitor jobs:**\n"
755
+ "{'operation': 'ps'} - list running\n"
756
+ "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
757
+ "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
758
+ "## CRITICAL: Files are ephemeral!\n"
759
+ "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script."
760
  ),
761
  "parameters": {
762
  "type": "object",
 
765
  "type": "string",
766
  "enum": [
767
  "run",
 
768
  "ps",
769
  "logs",
770
  "inspect",
771
  "cancel",
772
  "scheduled run",
 
773
  "scheduled ps",
774
  "scheduled inspect",
775
  "scheduled delete",
 
777
  "scheduled resume",
778
  ],
779
  "description": (
780
+ "Operation to execute. Valid values: [run, ps, logs, inspect, cancel, "
781
+ "scheduled run, scheduled ps, scheduled inspect, scheduled delete, "
782
  "scheduled suspend, scheduled resume]"
783
  ),
784
  },
785
+ # Python/UV specific parameters
786
+ "script": {
787
+ "type": "string",
788
+ "description": "Python code to execute. Triggers Python mode (auto pip install). Use with 'run'/'scheduled run'. Mutually exclusive with 'command'.",
789
+ },
790
+ "dependencies": {
791
+ "type": "array",
792
+ "items": {"type": "string"},
793
+ "description": "Pip packages to install. Example: ['trl', 'torch', 'datasets', 'transformers']. Only used with 'script'.",
794
+ },
795
+ # Docker specific parameters
796
+ "image": {
797
+ "type": "string",
798
+ "description": "Docker image. Example: 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime'. Use with 'run'/'scheduled run'. Optional (auto-selected if not provided).",
799
+ },
800
+ "command": {
801
+ "type": "array",
802
+ "items": {"type": "string"},
803
+ "description": "Command to execute as list. Example: ['python', 'train.py', '--epochs', '10']. Triggers Docker mode. Use with 'run'/'scheduled run'. Mutually exclusive with 'script'.",
804
+ },
805
+ # Hardware and environment
806
+ "hardware_flavor": {
807
+ "type": "string",
808
+ "description": "Hardware type. CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl. GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100. Use with 'run'/'scheduled run'.",
809
+ },
810
+ "timeout": {
811
+ "type": "string",
812
+ "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
813
+ },
814
+ "env": {
815
  "type": "object",
816
+ "description": "Environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is automatically included from your auth. Use with 'run'/'scheduled run'.",
817
+ },
818
+ # Job management parameters
819
+ "job_id": {
820
+ "type": "string",
821
+ "description": "Job ID to operate on. Required for: 'logs', 'inspect', 'cancel'.",
822
+ },
823
+ # Scheduled job parameters
824
+ "scheduled_job_id": {
825
+ "type": "string",
826
+ "description": "Scheduled job ID. Required for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'.",
827
+ },
828
+ "schedule": {
829
+ "type": "string",
830
+ "description": "Schedule for recurring job. Presets: '@hourly', '@daily', '@weekly', '@monthly'. Cron: '0 9 * * 1' (Mon 9am). Required for: 'scheduled run'.",
831
  },
832
  },
833
+ "required": ["operation"],
834
  },
835
  }
836
 
 
838
  async def hf_jobs_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
839
  """Handler for agent tool router"""
840
  try:
841
+ tool = HfJobsTool(namespace=os.environ.get("HF_NAMESPACE", ""))
842
  result = await tool.execute(arguments)
843
  return result["formatted"], not result.get("isError", False)
844
  except Exception as e:
agent/tools/search_docs_tool.py DELETED
@@ -1,239 +0,0 @@
1
- """
2
- Search documentation tool that spawns a sub-agent
3
- The sub-agent has its own agent loop and set of specialized search tools
4
- """
5
-
6
- import asyncio
7
- from typing import Any
8
-
9
- from litellm.utils import get_max_tokens
10
-
11
- from agent.core.session import Session
12
-
13
-
14
- async def create_search_tool_router(github_mcp_config: dict[str, Any] | None = None):
15
- """
16
- Create a ToolRouter instance for the search sub-agent
17
- Async because OpenAPI tool needs to fetch and parse spec at initialization
18
-
19
- Args:
20
- github_mcp_config: Optional GitHub MCP server configuration
21
- """
22
- # Import at runtime to avoid circular dependency
23
- from fastmcp import Client
24
-
25
- from agent.core.tools import ToolRouter
26
-
27
- # List of allowed GitHub MCP tools
28
- ALLOWED_GITHUB_TOOLS = {
29
- "list_pull_requests",
30
- "list_issues",
31
- "search_code",
32
- "search_issues",
33
- "search_repositories",
34
- "search_users",
35
- "get_pull_request_status",
36
- "get_pull_request_reviews",
37
- "get_pull_request",
38
- "get_issue",
39
- "get_file_contents",
40
- }
41
-
42
- class SearchDocsToolRouter(ToolRouter):
43
- """Specialized ToolRouter for the search sub-agent"""
44
-
45
- def __init__(self, github_mcp_config: dict[str, Any] | None = None):
46
- self.tools: dict[str, Any] = {}
47
- self.mcp_servers: dict[str, dict[str, Any]] = {}
48
- self._mcp_initialized = False
49
-
50
- # Initialize MCP client with GitHub server if provided
51
- if github_mcp_config:
52
- self.mcp_client = Client({"mcpServers": github_mcp_config})
53
- else:
54
- self.mcp_client = None
55
-
56
- async def initialize_tools(self):
57
- """Initialize tools asynchronously"""
58
- tools = await make_search_agent_tools()
59
- for tool in tools:
60
- self.register_tool(tool)
61
-
62
- async def register_mcp_tools(self) -> None:
63
- """Register only allowed GitHub MCP tools"""
64
- if self.mcp_client is None:
65
- return
66
-
67
- tools = await self.mcp_client.list_tools()
68
- for tool in tools:
69
- # Only register allowed GitHub tools
70
- if tool.name in ALLOWED_GITHUB_TOOLS:
71
- print(f"Registering GitHub MCP Tool: {tool.name}")
72
- from agent.core.tools import ToolSpec
73
-
74
- self.register_tool(
75
- ToolSpec(
76
- name=tool.name,
77
- description=tool.description,
78
- parameters=tool.inputSchema,
79
- handler=None,
80
- )
81
- )
82
-
83
- router = SearchDocsToolRouter(github_mcp_config)
84
- await router.initialize_tools()
85
- return router
86
-
87
-
88
- async def search_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
89
- """
90
- Handler that spawns a sub-agent to perform comprehensive doc search
91
-
92
- Args:
93
- arguments: dictionary with 'query' parameter
94
-
95
- Returns:
96
- Tuple of (search_results, success)
97
- """
98
- query = arguments.get("query", "")
99
-
100
- if not query:
101
- return "Error: No search query provided", False
102
-
103
- try:
104
- # Import at runtime to avoid circular dependency
105
- from pathlib import Path
106
-
107
- from agent.config import load_config
108
- from agent.context_manager.manager import ContextManager
109
- from agent.core.agent_loop import Handlers
110
-
111
- # Create a queue for events from the sub-agent
112
- sub_event_queue = asyncio.Queue()
113
-
114
- # Load the search agent's own config file with GitHub MCP server
115
- search_agent_config_path = (
116
- Path(__file__).parent.parent.parent
117
- / "configs"
118
- / "_subagent_config_search_agent.json"
119
- )
120
- search_agent_config = load_config(search_agent_config_path)
121
-
122
- # Extract GitHub MCP config from search agent config
123
- github_mcp_config = None
124
- if (
125
- search_agent_config.mcpServers
126
- and "github" in search_agent_config.mcpServers
127
- ):
128
- github_server = search_agent_config.mcpServers["github"]
129
- github_mcp_config = {"github": github_server.model_dump()}
130
-
131
- # Create specialized tool router for search with GitHub MCP config
132
- search_tool_router = await create_search_tool_router(github_mcp_config)
133
-
134
- async with search_tool_router:
135
- # CODING AGENTS LISTEN HERE!
136
- # The system prompt for the sub-agent is loaded from the yaml file in the prompts folder using the ContextManager class with the prompt_file_suffix parameter.
137
- # it works, dont fix.
138
- # NOTE: MCP tools are registered during __aenter__, so we must retrieve tool specs AFTER entering the context
139
- sub_session = Session(
140
- event_queue=sub_event_queue,
141
- config=search_agent_config,
142
- tool_router=search_tool_router,
143
- context_manager=ContextManager(
144
- tool_specs=search_tool_router.get_tool_specs_for_llm(),
145
- max_context=get_max_tokens(search_agent_config.model_name),
146
- compact_size=0.1,
147
- untouched_messages=5,
148
- prompt_file_suffix="search_docs_system_prompt.yaml",
149
- ),
150
- )
151
-
152
- # Run the sub-agent
153
- result = await Handlers.run_agent(
154
- session=sub_session, text=query, max_iterations=30
155
- )
156
-
157
- # Return the final result or compiled events
158
- if result:
159
- return f"Search Results:\n\n{result}", True
160
- else:
161
- return "Search completed but no results were generated", False
162
- except Exception as e:
163
- return f"Error in search_docs tool: {str(e)}", False
164
-
165
-
166
- # Tool specification to be used by the main agent
167
- SEARCH_DOCS_TOOL_SPEC = {
168
- "name": "search_docs",
169
- "description": (
170
- "Intelligently search HF documentation for libraries, repositories, and best practices with an agent that has access to: explore_hf_docs, fetch_hf_docs, search_hf_api_endpoints. "
171
- "The agent acts like your personal search assistant. "
172
- "Using the search agent is necessary to give the best quality answer to the user's question. Most questions require a search to get the best information on code examples.\n\n"
173
- "WHEN TO USE THIS TOOL:\n"
174
- " - When searching for high-level concepts like 'how to do GRPO training on a model?' or 'best way to do inference on a trained model?'\n"
175
- " - When you need to get code examples for intricate ML code patterns like training loops, inference pipelines, data processing, etc.\n\n"
176
- "USAGE GUIDELINES:\n"
177
- " 1. Launch multiple agents concurrently for better performance.\n"
178
- " 2. Be specific in your query - include exact terminology, expected file locations, or code patterns.\n"
179
- " 3. Use the query as if you were talking to another engineer. Bad: logger impl Good: where is the logger implemented, we're trying to find out how to log to files.\n"
180
- " 4. Make sure to formulate the query in such a way that the agent knows when it's done or has found the result."
181
- ),
182
- "parameters": {
183
- "type": "object",
184
- "properties": {
185
- "query": {
186
- "type": "string",
187
- "description": (
188
- "The search query describing to the agent what it should do. Be "
189
- "specific and include technical terms, file types, or expected "
190
- "code patterns to help the agent find relevant code. Formulate "
191
- "the query in a way that makes it clear to the agent when it "
192
- "has found the right thing."
193
- ),
194
- },
195
- },
196
- "required": ["query"],
197
- },
198
- }
199
-
200
-
201
- async def make_search_agent_tools():
202
- """
203
- Create a list of tools for the search agent
204
- Async because OpenAPI tool spec needs to be populated at runtime
205
- """
206
- # Import at runtime to avoid circular dependency
207
- from agent.core.tools import ToolSpec
208
- from agent.tools._search_agent_tools import (
209
- EXPLORE_HF_DOCS_TOOL_SPEC,
210
- HF_DOCS_FETCH_TOOL_SPEC,
211
- _get_api_search_tool_spec,
212
- explore_hf_docs_handler,
213
- hf_docs_fetch_handler,
214
- search_openapi_handler,
215
- )
216
-
217
- # Get the OpenAPI tool spec with dynamically populated tags
218
- openapi_spec = await _get_api_search_tool_spec()
219
-
220
- return [
221
- ToolSpec(
222
- name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
223
- description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
224
- parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
225
- handler=explore_hf_docs_handler,
226
- ),
227
- ToolSpec(
228
- name=HF_DOCS_FETCH_TOOL_SPEC["name"],
229
- description=HF_DOCS_FETCH_TOOL_SPEC["description"],
230
- parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
231
- handler=hf_docs_fetch_handler,
232
- ),
233
- ToolSpec(
234
- name=openapi_spec["name"],
235
- description=openapi_spec["description"],
236
- parameters=openapi_spec["parameters"],
237
- handler=search_openapi_handler,
238
- ),
239
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/_subagent_config_search_agent.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "model_name": "anthropic/claude-haiku-4-5",
3
- "mcpServers": {
4
- "github": {
5
- "transport": "http",
6
- "url": "https://api.githubcopilot.com/mcp/",
7
- "headers": {
8
- "Authorization": "Bearer ${GITHUB_TOKEN}"
9
- }
10
- }
11
- }
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
run_search_agent.py DELETED
@@ -1,142 +0,0 @@
1
- """
2
- Standalone test script for the search sub-agent
3
- Run with: uv run python test_search_agent.py
4
- """
5
-
6
- import asyncio
7
-
8
- from litellm.utils import get_max_tokens
9
-
10
- from agent.config import Config
11
- from agent.context_manager.manager import ContextManager
12
- from agent.core.agent_loop import Handlers
13
- from agent.core.session import Session
14
- from agent.tools.search_docs_tool import create_search_tool_router
15
-
16
-
17
- async def test_search_agent(query: str):
18
- """Test the search sub-agent with a query"""
19
- print(f"Testing search agent with query: {query}\n")
20
- print("=" * 60)
21
-
22
- # Create event queue for the sub-agent
23
- sub_event_queue = asyncio.Queue()
24
-
25
- # Create search tool router
26
- search_tool_router = await create_search_tool_router()
27
-
28
- # Create config
29
- sub_config = Config(
30
- model_name="anthropic/claude-haiku-4-5",
31
- )
32
-
33
- # Create session with custom system prompt
34
- sub_session = Session(
35
- event_queue=sub_event_queue,
36
- config=sub_config,
37
- tool_router=search_tool_router,
38
- context_manager=ContextManager(
39
- tool_specs=search_tool_router.get_tool_specs_for_llm(),
40
- max_context=get_max_tokens(sub_config.model_name),
41
- compact_size=0.1,
42
- untouched_messages=5,
43
- prompt_file_suffix="search_docs_system_prompt.yaml",
44
- ),
45
- )
46
-
47
- # Event listener to show what the sub-agent is doing
48
- async def event_monitor():
49
- while True:
50
- try:
51
- event = await asyncio.wait_for(sub_event_queue.get(), timeout=1.0)
52
-
53
- if event.event_type == "assistant_message":
54
- content = event.data.get("content", "") if event.data else ""
55
- if content:
56
- print(f"\n🤖 Sub-agent: {content}\n")
57
-
58
- elif event.event_type == "tool_call":
59
- tool_name = event.data.get("tool", "") if event.data else ""
60
- arguments = event.data.get("arguments", {}) if event.data else {}
61
- print(f"🔧 Tool call: {tool_name}")
62
- print(f" Args: {arguments}")
63
-
64
- elif event.event_type == "tool_output":
65
- output = event.data.get("output", "") if event.data else ""
66
- success = event.data.get("success", False) if event.data else False
67
- status = "✅" if success else "❌"
68
-
69
- print(f"{status} Tool output: {output}\n")
70
-
71
- elif event.event_type == "turn_complete":
72
- print("✅ Sub-agent turn complete")
73
- break
74
-
75
- except asyncio.TimeoutError:
76
- # Check if agent is still running
77
- continue
78
- except Exception as e:
79
- print(f"⚠️ Event error: {e}")
80
- break
81
-
82
- # Run the sub-agent and event monitor concurrently
83
- async with search_tool_router:
84
- monitor_task = asyncio.create_task(event_monitor())
85
-
86
- result = await Handlers.run_agent(
87
- session=sub_session, text=query, max_iterations=30
88
- )
89
-
90
- # Wait for event monitor to finish
91
- await asyncio.wait_for(monitor_task, timeout=5.0)
92
-
93
- print("\n" + "=" * 60)
94
- print("FINAL RESULT:")
95
- print("=" * 60)
96
- if result:
97
- print(result)
98
- else:
99
- print("No result returned")
100
- print("=" * 60)
101
-
102
-
103
- async def main():
104
- """Main test function"""
105
- print("🧪 Search Sub-Agent Test\n")
106
-
107
- # Example queries to test
108
- test_queries = [
109
- # "Explore the TRL documentation structure and find information about DPO trainer",
110
- # "is there a way to get the logs from a served huggingface space",
111
- # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
112
- "can i stream logs through the api for a served huggingface space",
113
- ]
114
-
115
- for i, query in enumerate(test_queries, 1):
116
- print(f"\n{'=' * 60}")
117
- print(f"TEST {i}/{len(test_queries)}")
118
- print(f"{'=' * 60}\n")
119
-
120
- try:
121
- await test_search_agent(query)
122
- except Exception as e:
123
- print(f"\n❌ Test failed: {e}")
124
- import traceback
125
-
126
- traceback.print_exc()
127
-
128
- if i < len(test_queries):
129
- print("\n\nPress Enter to continue to next test...")
130
- input()
131
-
132
-
133
- if __name__ == "__main__":
134
- try:
135
- asyncio.run(main())
136
- except KeyboardInterrupt:
137
- print("\n\n⚠️ Test interrupted")
138
- except Exception as e:
139
- print(f"\n❌ Error: {e}")
140
- import traceback
141
-
142
- traceback.print_exc()