Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Merge pull request #1 from huggingface/compacting-context
Browse files- agent/codex_agent_demo.py +1 -1
- agent/config.py +0 -1
- agent/context_manager/manager.py +73 -15
- agent/core/agent_loop.py +26 -13
- agent/core/session.py +11 -3
- agent/main.py +4 -0
- agent/prompts/system_prompt.yaml +112 -0
agent/codex_agent_demo.py
CHANGED
|
@@ -201,7 +201,7 @@ class Session:
|
|
| 201 |
"""
|
| 202 |
|
| 203 |
def __init__(self, event_queue: asyncio.Queue):
|
| 204 |
-
self.context_manager = ContextManager()
|
| 205 |
self.event_queue = event_queue
|
| 206 |
self.is_running = True
|
| 207 |
self.current_task: Optional[asyncio.Task] = None
|
|
|
|
| 201 |
"""
|
| 202 |
|
| 203 |
def __init__(self, event_queue: asyncio.Queue):
|
| 204 |
+
self.context_manager = ContextManager(tool_specs=[])
|
| 205 |
self.event_queue = event_queue
|
| 206 |
self.is_running = True
|
| 207 |
self.current_task: Optional[asyncio.Task] = None
|
agent/config.py
CHANGED
|
@@ -20,7 +20,6 @@ class Config(BaseModel):
|
|
| 20 |
|
| 21 |
model_name: str
|
| 22 |
tools: list[Tool] = []
|
| 23 |
-
system_prompt_path: str = ""
|
| 24 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 25 |
|
| 26 |
|
|
|
|
| 20 |
|
| 21 |
model_name: str
|
| 22 |
tools: list[Tool] = []
|
|
|
|
| 23 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 24 |
|
| 25 |
|
agent/context_manager/manager.py
CHANGED
|
@@ -2,43 +2,101 @@
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
class ContextManager:
|
| 9 |
"""Manages conversation context and message history for the agent"""
|
| 10 |
|
| 11 |
-
def __init__(
|
| 12 |
-
self
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 14 |
|
| 15 |
-
def _load_system_prompt(self):
|
| 16 |
-
"""Load the system prompt"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
| 19 |
-
return
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
def add_message(self, message: Message) -> None:
|
| 22 |
"""Add a message to the history"""
|
|
|
|
|
|
|
|
|
|
| 23 |
self.items.append(message)
|
| 24 |
|
| 25 |
def get_messages(self) -> list[Message]:
|
| 26 |
"""Get all messages for sending to LLM"""
|
| 27 |
return self.items
|
| 28 |
|
| 29 |
-
def compact(self,
|
| 30 |
"""Remove old messages to keep history under target size"""
|
| 31 |
-
|
| 32 |
-
if len(self.items) <= target_size:
|
| 33 |
return
|
| 34 |
|
| 35 |
-
# Always keep system prompt
|
| 36 |
system_msg = (
|
| 37 |
self.items[0] if self.items and self.items[0].role == "system" else None
|
| 38 |
)
|
| 39 |
-
messages_to_keep = self.items[-(target_size - 1) :]
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
if system_msg:
|
| 42 |
-
self.items = [system_msg] +
|
| 43 |
else:
|
| 44 |
-
self.items =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
import yaml
|
| 9 |
+
from jinja2 import Template
|
| 10 |
+
from litellm import Message, acompletion
|
| 11 |
|
| 12 |
|
| 13 |
class ContextManager:
|
| 14 |
"""Manages conversation context and message history for the agent"""
|
| 15 |
|
| 16 |
+
def __init__(
|
| 17 |
+
self,
|
| 18 |
+
max_context: int = 180_000,
|
| 19 |
+
compact_size: float = 0.1,
|
| 20 |
+
untouched_messages: int = 5,
|
| 21 |
+
tool_specs: list[dict[str, Any]] | None = None,
|
| 22 |
+
):
|
| 23 |
+
self.system_prompt = self._load_system_prompt(tool_specs or [])
|
| 24 |
+
self.max_context = max_context
|
| 25 |
+
self.compact_size = int(max_context * compact_size)
|
| 26 |
+
self.context_length = len(self.system_prompt) // 4
|
| 27 |
+
self.untouched_messages = untouched_messages
|
| 28 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 29 |
|
| 30 |
+
def _load_system_prompt(self, tool_specs: list[dict[str, Any]]):
|
| 31 |
+
"""Load and render the system prompt from YAML file with Jinja2"""
|
| 32 |
+
prompt_file = Path(__file__).parent.parent / "prompts" / "system_prompt.yaml"
|
| 33 |
+
|
| 34 |
+
with open(prompt_file, "r") as f:
|
| 35 |
+
prompt_data = yaml.safe_load(f)
|
| 36 |
+
template_str = prompt_data.get("system_prompt", "")
|
| 37 |
|
| 38 |
+
template = Template(template_str)
|
| 39 |
+
return template.render(
|
| 40 |
+
tools=tool_specs,
|
| 41 |
+
num_tools=len(tool_specs),
|
| 42 |
+
)
|
| 43 |
|
| 44 |
+
def add_message(self, message: Message, token_count: int = None) -> None:
|
| 45 |
"""Add a message to the history"""
|
| 46 |
+
if token_count:
|
| 47 |
+
self.context_length = token_count
|
| 48 |
+
print(f"DEBUG : token_count = {self.context_length}")
|
| 49 |
self.items.append(message)
|
| 50 |
|
| 51 |
def get_messages(self) -> list[Message]:
|
| 52 |
"""Get all messages for sending to LLM"""
|
| 53 |
return self.items
|
| 54 |
|
| 55 |
+
async def compact(self, model_name: str) -> None:
|
| 56 |
"""Remove old messages to keep history under target size"""
|
| 57 |
+
if (self.context_length <= self.max_context) or not self.items:
|
|
|
|
| 58 |
return
|
| 59 |
|
|
|
|
| 60 |
system_msg = (
|
| 61 |
self.items[0] if self.items and self.items[0].role == "system" else None
|
| 62 |
)
|
|
|
|
| 63 |
|
| 64 |
+
# Don't summarize a certain number of just-preceding messages
|
| 65 |
+
# Walk back to find a user message to make sure we keep an assistant -> user ->
|
| 66 |
+
# assistant general conversation structure
|
| 67 |
+
idx = len(self.items) - self.untouched_messages
|
| 68 |
+
while idx > 1 and self.items[idx].role != "user":
|
| 69 |
+
idx -= 1
|
| 70 |
+
|
| 71 |
+
recent_messages = self.items[idx:]
|
| 72 |
+
messages_to_summarize = self.items[1:idx]
|
| 73 |
+
|
| 74 |
+
# improbable, messages would have to very long
|
| 75 |
+
if not messages_to_summarize:
|
| 76 |
+
return
|
| 77 |
+
|
| 78 |
+
messages_to_summarize.append(
|
| 79 |
+
Message(
|
| 80 |
+
role="user",
|
| 81 |
+
content="Please provide a concise summary of the conversation above, focusing on key decisions, code changes, problems solved, and important context needed for future turns.",
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
response = await acompletion(
|
| 86 |
+
model=model_name,
|
| 87 |
+
messages=messages_to_summarize,
|
| 88 |
+
max_completion_tokens=self.compact_size,
|
| 89 |
+
)
|
| 90 |
+
summarized_message = Message(
|
| 91 |
+
role="assistant", content=response.choices[0].message.content
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Reconstruct: system + summary + recent messages (includes tools)
|
| 95 |
if system_msg:
|
| 96 |
+
self.items = [system_msg, summarized_message] + recent_messages
|
| 97 |
else:
|
| 98 |
+
self.items = [summarized_message] + recent_messages
|
| 99 |
+
|
| 100 |
+
self.context_length = (
|
| 101 |
+
len(self.system_prompt) // 4 + response.usage.completion_tokens
|
| 102 |
+
)
|
agent/core/agent_loop.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""
|
| 2 |
Main agent implementation with integrated tool system and MCP support
|
| 3 |
"""
|
| 4 |
|
|
@@ -58,17 +58,17 @@ class Handlers:
|
|
| 58 |
tool_choice="auto",
|
| 59 |
)
|
| 60 |
|
|
|
|
| 61 |
message = response.choices[0].message
|
| 62 |
-
|
| 63 |
-
# Extract content and tool calls
|
| 64 |
content = message.content
|
|
|
|
| 65 |
tool_calls: list[ToolCall] = message.get("tool_calls", [])
|
| 66 |
|
| 67 |
# If no tool calls, add assistant message and we're done
|
| 68 |
if not tool_calls:
|
| 69 |
if content:
|
| 70 |
assistant_msg = Message(role="assistant", content=content)
|
| 71 |
-
session.context_manager.add_message(assistant_msg)
|
| 72 |
await session.send_event(
|
| 73 |
Event(
|
| 74 |
event_type="assistant_message",
|
|
@@ -81,9 +81,11 @@ class Handlers:
|
|
| 81 |
# Add assistant message with tool calls to history
|
| 82 |
# LiteLLM will format this correctly for the provider
|
| 83 |
assistant_msg = Message(
|
| 84 |
-
role="assistant",
|
|
|
|
|
|
|
| 85 |
)
|
| 86 |
-
session.context_manager.add_message(assistant_msg)
|
| 87 |
|
| 88 |
if content:
|
| 89 |
await session.send_event(
|
|
@@ -139,6 +141,18 @@ class Handlers:
|
|
| 139 |
)
|
| 140 |
break
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
await session.send_event(
|
| 143 |
Event(
|
| 144 |
event_type="turn_complete",
|
|
@@ -156,14 +170,14 @@ class Handlers:
|
|
| 156 |
@staticmethod
|
| 157 |
async def compact(session: Session) -> None:
|
| 158 |
"""Handle compact (like compact in codex.rs:1317)"""
|
| 159 |
-
|
| 160 |
-
session.context_manager.compact(
|
| 161 |
-
|
| 162 |
|
| 163 |
await session.send_event(
|
| 164 |
Event(
|
| 165 |
event_type="compacted",
|
| 166 |
-
data={"removed":
|
| 167 |
)
|
| 168 |
)
|
| 169 |
|
|
@@ -231,9 +245,8 @@ async def submission_loop(
|
|
| 231 |
This is the core of the agent (like submission_loop in codex.rs:1259-1340)
|
| 232 |
"""
|
| 233 |
|
| 234 |
-
# Create session
|
| 235 |
-
session = Session(event_queue, config=config)
|
| 236 |
-
session.tool_router = tool_router
|
| 237 |
print("🤖 Agent loop started")
|
| 238 |
|
| 239 |
# Main processing loop
|
|
|
|
| 1 |
+
"""loop
|
| 2 |
Main agent implementation with integrated tool system and MCP support
|
| 3 |
"""
|
| 4 |
|
|
|
|
| 58 |
tool_choice="auto",
|
| 59 |
)
|
| 60 |
|
| 61 |
+
# Extract text response, token usage, and tool calls
|
| 62 |
message = response.choices[0].message
|
|
|
|
|
|
|
| 63 |
content = message.content
|
| 64 |
+
token_count = response.usage.total_tokens
|
| 65 |
tool_calls: list[ToolCall] = message.get("tool_calls", [])
|
| 66 |
|
| 67 |
# If no tool calls, add assistant message and we're done
|
| 68 |
if not tool_calls:
|
| 69 |
if content:
|
| 70 |
assistant_msg = Message(role="assistant", content=content)
|
| 71 |
+
session.context_manager.add_message(assistant_msg, token_count)
|
| 72 |
await session.send_event(
|
| 73 |
Event(
|
| 74 |
event_type="assistant_message",
|
|
|
|
| 81 |
# Add assistant message with tool calls to history
|
| 82 |
# LiteLLM will format this correctly for the provider
|
| 83 |
assistant_msg = Message(
|
| 84 |
+
role="assistant",
|
| 85 |
+
content=content,
|
| 86 |
+
tool_calls=tool_calls,
|
| 87 |
)
|
| 88 |
+
session.context_manager.add_message(assistant_msg, token_count)
|
| 89 |
|
| 90 |
if content:
|
| 91 |
await session.send_event(
|
|
|
|
| 141 |
)
|
| 142 |
break
|
| 143 |
|
| 144 |
+
old_length = session.context_manager.context_length
|
| 145 |
+
await session.context_manager.compact(model_name=session.config.model_name)
|
| 146 |
+
new_length = session.context_manager.context_length
|
| 147 |
+
|
| 148 |
+
if new_length != old_length:
|
| 149 |
+
await session.send_event(
|
| 150 |
+
Event(
|
| 151 |
+
event_type="compacted",
|
| 152 |
+
data={"old_tokens": old_length, "new_tokens": new_length},
|
| 153 |
+
)
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
await session.send_event(
|
| 157 |
Event(
|
| 158 |
event_type="turn_complete",
|
|
|
|
| 170 |
@staticmethod
|
| 171 |
async def compact(session: Session) -> None:
|
| 172 |
"""Handle compact (like compact in codex.rs:1317)"""
|
| 173 |
+
old_length = session.context_manager.context_length
|
| 174 |
+
await session.context_manager.compact(model_name=session.config.model_name)
|
| 175 |
+
new_length = session.context_manager.context_length
|
| 176 |
|
| 177 |
await session.send_event(
|
| 178 |
Event(
|
| 179 |
event_type="compacted",
|
| 180 |
+
data={"removed": old_length, "remaining": new_length},
|
| 181 |
)
|
| 182 |
)
|
| 183 |
|
|
|
|
| 245 |
This is the core of the agent (like submission_loop in codex.rs:1259-1340)
|
| 246 |
"""
|
| 247 |
|
| 248 |
+
# Create session with tool router
|
| 249 |
+
session = Session(event_queue, config=config, tool_router=tool_router)
|
|
|
|
| 250 |
print("🤖 Agent loop started")
|
| 251 |
|
| 252 |
# Main processing loop
|
agent/core/session.py
CHANGED
|
@@ -4,6 +4,8 @@ from dataclasses import dataclass
|
|
| 4 |
from enum import Enum
|
| 5 |
from typing import Any, Optional
|
| 6 |
|
|
|
|
|
|
|
| 7 |
from agent.config import Config
|
| 8 |
from agent.context_manager.manager import ContextManager
|
| 9 |
|
|
@@ -33,18 +35,24 @@ class Session:
|
|
| 33 |
self,
|
| 34 |
event_queue: asyncio.Queue,
|
| 35 |
config: Config | None = None,
|
|
|
|
| 36 |
):
|
| 37 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
self.event_queue = event_queue
|
| 39 |
self.session_id = str(uuid.uuid4())
|
| 40 |
self.config = config or Config(
|
| 41 |
model_name="anthropic/claude-sonnet-4-5-20250929",
|
| 42 |
tools=[],
|
| 43 |
-
system_prompt_path="",
|
| 44 |
)
|
| 45 |
self.is_running = True
|
| 46 |
self.current_task: asyncio.Task | None = None
|
| 47 |
-
self.tool_router = None # Set by submission_loop
|
| 48 |
|
| 49 |
async def send_event(self, event: Event) -> None:
|
| 50 |
"""Send event back to client"""
|
|
|
|
| 4 |
from enum import Enum
|
| 5 |
from typing import Any, Optional
|
| 6 |
|
| 7 |
+
from litellm import get_max_tokens
|
| 8 |
+
|
| 9 |
from agent.config import Config
|
| 10 |
from agent.context_manager.manager import ContextManager
|
| 11 |
|
|
|
|
| 35 |
self,
|
| 36 |
event_queue: asyncio.Queue,
|
| 37 |
config: Config | None = None,
|
| 38 |
+
tool_router=None,
|
| 39 |
):
|
| 40 |
+
self.tool_router = tool_router
|
| 41 |
+
tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
|
| 42 |
+
self.context_manager = ContextManager(
|
| 43 |
+
max_context=get_max_tokens(config.model_name),
|
| 44 |
+
compact_size=0.1,
|
| 45 |
+
untouched_messages=5,
|
| 46 |
+
tool_specs=tool_specs,
|
| 47 |
+
)
|
| 48 |
self.event_queue = event_queue
|
| 49 |
self.session_id = str(uuid.uuid4())
|
| 50 |
self.config = config or Config(
|
| 51 |
model_name="anthropic/claude-sonnet-4-5-20250929",
|
| 52 |
tools=[],
|
|
|
|
| 53 |
)
|
| 54 |
self.is_running = True
|
| 55 |
self.current_task: asyncio.Task | None = None
|
|
|
|
| 56 |
|
| 57 |
async def send_event(self, event: Event) -> None:
|
| 58 |
"""Send event back to client"""
|
agent/main.py
CHANGED
|
@@ -88,6 +88,10 @@ async def event_listener(
|
|
| 88 |
break
|
| 89 |
elif event.event_type == "processing":
|
| 90 |
print("⏳ Processing...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# Silently ignore other events
|
| 92 |
|
| 93 |
except asyncio.CancelledError:
|
|
|
|
| 88 |
break
|
| 89 |
elif event.event_type == "processing":
|
| 90 |
print("⏳ Processing...", flush=True)
|
| 91 |
+
elif event.event_type == "compacted":
|
| 92 |
+
old_tokens = event.data.get("old_tokens", 0) if event.data else 0
|
| 93 |
+
new_tokens = event.data.get("new_tokens", 0) if event.data else 0
|
| 94 |
+
print(f"📦 Compacted context: {old_tokens} → {new_tokens} tokens")
|
| 95 |
# Silently ignore other events
|
| 96 |
|
| 97 |
except asyncio.CancelledError:
|
agent/prompts/system_prompt.yaml
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
system_prompt: |
|
| 2 |
+
You are HF Agent, a powerful AI assistant for Machine Learning Engineering, particularly training Large Language Models. You have access to {{ num_tools }} tools for interacting with Hugging Face Hub and performing ML tasks.
|
| 3 |
+
|
| 4 |
+
# Available Tools
|
| 5 |
+
|
| 6 |
+
You have access to the following categories of tools:
|
| 7 |
+
|
| 8 |
+
- Hugging Face Hub: Search and interact with models, datasets, papers, and documentation
|
| 9 |
+
- Spaces: Use and discover ML applications
|
| 10 |
+
- Jobs: Manage compute jobs for training and inference
|
| 11 |
+
- Image Generation: Generate and transform images
|
| 12 |
+
|
| 13 |
+
# Agency
|
| 14 |
+
|
| 15 |
+
You take initiative when the user asks you to do something, maintaining an appropriate balance between:
|
| 16 |
+
|
| 17 |
+
1. Doing the right thing when asked, including taking actions and follow-up actions
|
| 18 |
+
2. Not surprising the user with actions you take without asking
|
| 19 |
+
3. Not adding unnecessary explanations after completing tasks
|
| 20 |
+
|
| 21 |
+
# Task Approach
|
| 22 |
+
|
| 23 |
+
For ML engineering tasks:
|
| 24 |
+
1. Use all available tools to complete the task
|
| 25 |
+
2. Search for relevant models, datasets, and documentation on Hugging Face Hub
|
| 26 |
+
3. Leverage existing resources before creating new ones
|
| 27 |
+
4. Invoke multiple independent tools simultaneously for efficiency
|
| 28 |
+
|
| 29 |
+
# Examples
|
| 30 |
+
|
| 31 |
+
<example>
|
| 32 |
+
<user>Find the best text generation models</user>
|
| 33 |
+
<response>[uses mcp__hf-mcp-server__model_search with task="text-generation" and sort="trendingScore"]
|
| 34 |
+
|
| 35 |
+
Top trending text generation models:
|
| 36 |
+
- meta-llama/Llama-3.1-405B-Instruct
|
| 37 |
+
- mistralai/Mistral-Large-2
|
| 38 |
+
</response>
|
| 39 |
+
</example>
|
| 40 |
+
|
| 41 |
+
<example>
|
| 42 |
+
<user>Search for papers about reinforcement learning from human feedback</user>
|
| 43 |
+
<response>[uses mcp__hf-mcp-server__paper_search with query="reinforcement learning from human feedback"]
|
| 44 |
+
|
| 45 |
+
Found 5 relevant papers on RLHF including "Training language models to follow instructions with human feedback" (Ouyang et al.)
|
| 46 |
+
</response>
|
| 47 |
+
</example>
|
| 48 |
+
|
| 49 |
+
<example>
|
| 50 |
+
<user>Find datasets for sentiment analysis</user>
|
| 51 |
+
<response>[uses mcp__hf-mcp-server__dataset_search with query="sentiment analysis" and tags for task_categories]
|
| 52 |
+
|
| 53 |
+
Top sentiment analysis datasets:
|
| 54 |
+
- stanfordnlp/imdb (25k reviews)
|
| 55 |
+
- tweet_eval (sentiment task)
|
| 56 |
+
</response>
|
| 57 |
+
</example>
|
| 58 |
+
|
| 59 |
+
<example>
|
| 60 |
+
<user>How do I use the transformers library for text generation?</user>
|
| 61 |
+
<response>[uses mcp__hf-mcp-server__hf_doc_search with query="text generation transformers"]
|
| 62 |
+
|
| 63 |
+
[provides concise answer based on documentation]
|
| 64 |
+
</response>
|
| 65 |
+
</example>
|
| 66 |
+
|
| 67 |
+
<example>
|
| 68 |
+
<user>Generate an image of a sunset over mountains</user>
|
| 69 |
+
<response>[uses mcp__hf-mcp-server__gr1_flux1_schnell_infer with prompt="sunset over mountains"]
|
| 70 |
+
|
| 71 |
+
[returns generated image]
|
| 72 |
+
</response>
|
| 73 |
+
</example>
|
| 74 |
+
|
| 75 |
+
<example>
|
| 76 |
+
<user>Get details about the bert-base-uncased model</user>
|
| 77 |
+
<response>[uses mcp__hf-mcp-server__hub_repo_details with repo_ids=["google-bert/bert-base-uncased"]]
|
| 78 |
+
|
| 79 |
+
BERT base uncased: 110M parameters, trained on English Wikipedia and BookCorpus, commonly used for text classification and NER.
|
| 80 |
+
</response>
|
| 81 |
+
</example>
|
| 82 |
+
|
| 83 |
+
# Conventions
|
| 84 |
+
|
| 85 |
+
- Always search Hugging Face Hub for existing resources before suggesting custom implementations
|
| 86 |
+
- When referencing models, datasets, or papers, include direct links from search results
|
| 87 |
+
- Never assume a library is available - check documentation first
|
| 88 |
+
- Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
|
| 89 |
+
- For training tasks, consider compute requirements and suggest appropriate hardware
|
| 90 |
+
- Never expose or log API keys, tokens, or secrets
|
| 91 |
+
|
| 92 |
+
# Communication Style
|
| 93 |
+
|
| 94 |
+
- Be concise and direct
|
| 95 |
+
- Skip flattery and unnecessary preamble
|
| 96 |
+
- Respond in 1-3 sentences when possible
|
| 97 |
+
- No emojis, minimal exclamation points
|
| 98 |
+
- Don't apologize for limitations - offer alternatives or keep responses short
|
| 99 |
+
- Don't thank the user for results
|
| 100 |
+
- Explain what you're doing for non-trivial operations
|
| 101 |
+
|
| 102 |
+
Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
|
| 103 |
+
|
| 104 |
+
<example>
|
| 105 |
+
<user>What's the state-of-the-art model for image classification?</user>
|
| 106 |
+
<response>EVA-CLIP-18B or ConvNeXt-XXLarge depending on your constraints</response>
|
| 107 |
+
</example>
|
| 108 |
+
|
| 109 |
+
<example>
|
| 110 |
+
<user>How many parameters does GPT-3 have?</user>
|
| 111 |
+
<response>175 billion</response>
|
| 112 |
+
</example>
|