Henri Bonamy commited on
Commit
3df534a
·
2 Parent(s): 7534b92a3268b6

Merge pull request #1 from huggingface/compacting-context

Browse files
agent/codex_agent_demo.py CHANGED
@@ -201,7 +201,7 @@ class Session:
201
  """
202
 
203
  def __init__(self, event_queue: asyncio.Queue):
204
- self.context_manager = ContextManager()
205
  self.event_queue = event_queue
206
  self.is_running = True
207
  self.current_task: Optional[asyncio.Task] = None
 
201
  """
202
 
203
  def __init__(self, event_queue: asyncio.Queue):
204
+ self.context_manager = ContextManager(tool_specs=[])
205
  self.event_queue = event_queue
206
  self.is_running = True
207
  self.current_task: Optional[asyncio.Task] = None
agent/config.py CHANGED
@@ -20,7 +20,6 @@ class Config(BaseModel):
20
 
21
  model_name: str
22
  tools: list[Tool] = []
23
- system_prompt_path: str = ""
24
  mcpServers: dict[str, MCPServerConfig] = {}
25
 
26
 
 
20
 
21
  model_name: str
22
  tools: list[Tool] = []
 
23
  mcpServers: dict[str, MCPServerConfig] = {}
24
 
25
 
agent/context_manager/manager.py CHANGED
@@ -2,43 +2,101 @@
2
  Context management for conversation history
3
  """
4
 
5
- from litellm import Message
 
 
 
 
 
6
 
7
 
8
  class ContextManager:
9
  """Manages conversation context and message history for the agent"""
10
 
11
- def __init__(self):
12
- self.system_prompt = self._load_system_prompt()
 
 
 
 
 
 
 
 
 
 
13
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
14
 
15
- def _load_system_prompt(self):
16
- """Load the system prompt"""
 
 
 
 
 
17
 
18
- # TODO: get system prompt from jinja template
19
- return "You are a helpful assistant."
 
 
 
20
 
21
- def add_message(self, message: Message) -> None:
22
  """Add a message to the history"""
 
 
 
23
  self.items.append(message)
24
 
25
  def get_messages(self) -> list[Message]:
26
  """Get all messages for sending to LLM"""
27
  return self.items
28
 
29
- def compact(self, target_size: int) -> None:
30
  """Remove old messages to keep history under target size"""
31
- # Keep system prompt (first message) and remove oldest user/assistant messages
32
- if len(self.items) <= target_size:
33
  return
34
 
35
- # Always keep system prompt
36
  system_msg = (
37
  self.items[0] if self.items and self.items[0].role == "system" else None
38
  )
39
- messages_to_keep = self.items[-(target_size - 1) :]
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  if system_msg:
42
- self.items = [system_msg] + messages_to_keep
43
  else:
44
- self.items = messages_to_keep
 
 
 
 
 
2
  Context management for conversation history
3
  """
4
 
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import yaml
9
+ from jinja2 import Template
10
+ from litellm import Message, acompletion
11
 
12
 
13
  class ContextManager:
14
  """Manages conversation context and message history for the agent"""
15
 
16
+ def __init__(
17
+ self,
18
+ max_context: int = 180_000,
19
+ compact_size: float = 0.1,
20
+ untouched_messages: int = 5,
21
+ tool_specs: list[dict[str, Any]] | None = None,
22
+ ):
23
+ self.system_prompt = self._load_system_prompt(tool_specs or [])
24
+ self.max_context = max_context
25
+ self.compact_size = int(max_context * compact_size)
26
+ self.context_length = len(self.system_prompt) // 4
27
+ self.untouched_messages = untouched_messages
28
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
29
 
30
+ def _load_system_prompt(self, tool_specs: list[dict[str, Any]]):
31
+ """Load and render the system prompt from YAML file with Jinja2"""
32
+ prompt_file = Path(__file__).parent.parent / "prompts" / "system_prompt.yaml"
33
+
34
+ with open(prompt_file, "r") as f:
35
+ prompt_data = yaml.safe_load(f)
36
+ template_str = prompt_data.get("system_prompt", "")
37
 
38
+ template = Template(template_str)
39
+ return template.render(
40
+ tools=tool_specs,
41
+ num_tools=len(tool_specs),
42
+ )
43
 
44
+ def add_message(self, message: Message, token_count: int = None) -> None:
45
  """Add a message to the history"""
46
+ if token_count:
47
+ self.context_length = token_count
48
+ print(f"DEBUG : token_count = {self.context_length}")
49
  self.items.append(message)
50
 
51
  def get_messages(self) -> list[Message]:
52
  """Get all messages for sending to LLM"""
53
  return self.items
54
 
55
+ async def compact(self, model_name: str) -> None:
56
  """Remove old messages to keep history under target size"""
57
+ if (self.context_length <= self.max_context) or not self.items:
 
58
  return
59
 
 
60
  system_msg = (
61
  self.items[0] if self.items and self.items[0].role == "system" else None
62
  )
 
63
 
64
+ # Don't summarize a certain number of just-preceding messages
65
+ # Walk back to find a user message to make sure we keep an assistant -> user ->
66
+ # assistant general conversation structure
67
+ idx = len(self.items) - self.untouched_messages
68
+ while idx > 1 and self.items[idx].role != "user":
69
+ idx -= 1
70
+
71
+ recent_messages = self.items[idx:]
72
+ messages_to_summarize = self.items[1:idx]
73
+
74
+ # improbable, messages would have to very long
75
+ if not messages_to_summarize:
76
+ return
77
+
78
+ messages_to_summarize.append(
79
+ Message(
80
+ role="user",
81
+ content="Please provide a concise summary of the conversation above, focusing on key decisions, code changes, problems solved, and important context needed for future turns.",
82
+ )
83
+ )
84
+
85
+ response = await acompletion(
86
+ model=model_name,
87
+ messages=messages_to_summarize,
88
+ max_completion_tokens=self.compact_size,
89
+ )
90
+ summarized_message = Message(
91
+ role="assistant", content=response.choices[0].message.content
92
+ )
93
+
94
+ # Reconstruct: system + summary + recent messages (includes tools)
95
  if system_msg:
96
+ self.items = [system_msg, summarized_message] + recent_messages
97
  else:
98
+ self.items = [summarized_message] + recent_messages
99
+
100
+ self.context_length = (
101
+ len(self.system_prompt) // 4 + response.usage.completion_tokens
102
+ )
agent/core/agent_loop.py CHANGED
@@ -1,4 +1,4 @@
1
- """
2
  Main agent implementation with integrated tool system and MCP support
3
  """
4
 
@@ -58,17 +58,17 @@ class Handlers:
58
  tool_choice="auto",
59
  )
60
 
 
61
  message = response.choices[0].message
62
-
63
- # Extract content and tool calls
64
  content = message.content
 
65
  tool_calls: list[ToolCall] = message.get("tool_calls", [])
66
 
67
  # If no tool calls, add assistant message and we're done
68
  if not tool_calls:
69
  if content:
70
  assistant_msg = Message(role="assistant", content=content)
71
- session.context_manager.add_message(assistant_msg)
72
  await session.send_event(
73
  Event(
74
  event_type="assistant_message",
@@ -81,9 +81,11 @@ class Handlers:
81
  # Add assistant message with tool calls to history
82
  # LiteLLM will format this correctly for the provider
83
  assistant_msg = Message(
84
- role="assistant", content=content, tool_calls=tool_calls
 
 
85
  )
86
- session.context_manager.add_message(assistant_msg)
87
 
88
  if content:
89
  await session.send_event(
@@ -139,6 +141,18 @@ class Handlers:
139
  )
140
  break
141
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  await session.send_event(
143
  Event(
144
  event_type="turn_complete",
@@ -156,14 +170,14 @@ class Handlers:
156
  @staticmethod
157
  async def compact(session: Session) -> None:
158
  """Handle compact (like compact in codex.rs:1317)"""
159
- old_size = len(session.context_manager.items)
160
- session.context_manager.compact(target_size=10)
161
- new_size = len(session.context_manager.items)
162
 
163
  await session.send_event(
164
  Event(
165
  event_type="compacted",
166
- data={"removed": old_size - new_size, "remaining": new_size},
167
  )
168
  )
169
 
@@ -231,9 +245,8 @@ async def submission_loop(
231
  This is the core of the agent (like submission_loop in codex.rs:1259-1340)
232
  """
233
 
234
- # Create session and assign tool router
235
- session = Session(event_queue, config=config)
236
- session.tool_router = tool_router
237
  print("🤖 Agent loop started")
238
 
239
  # Main processing loop
 
1
+ """loop
2
  Main agent implementation with integrated tool system and MCP support
3
  """
4
 
 
58
  tool_choice="auto",
59
  )
60
 
61
+ # Extract text response, token usage, and tool calls
62
  message = response.choices[0].message
 
 
63
  content = message.content
64
+ token_count = response.usage.total_tokens
65
  tool_calls: list[ToolCall] = message.get("tool_calls", [])
66
 
67
  # If no tool calls, add assistant message and we're done
68
  if not tool_calls:
69
  if content:
70
  assistant_msg = Message(role="assistant", content=content)
71
+ session.context_manager.add_message(assistant_msg, token_count)
72
  await session.send_event(
73
  Event(
74
  event_type="assistant_message",
 
81
  # Add assistant message with tool calls to history
82
  # LiteLLM will format this correctly for the provider
83
  assistant_msg = Message(
84
+ role="assistant",
85
+ content=content,
86
+ tool_calls=tool_calls,
87
  )
88
+ session.context_manager.add_message(assistant_msg, token_count)
89
 
90
  if content:
91
  await session.send_event(
 
141
  )
142
  break
143
 
144
+ old_length = session.context_manager.context_length
145
+ await session.context_manager.compact(model_name=session.config.model_name)
146
+ new_length = session.context_manager.context_length
147
+
148
+ if new_length != old_length:
149
+ await session.send_event(
150
+ Event(
151
+ event_type="compacted",
152
+ data={"old_tokens": old_length, "new_tokens": new_length},
153
+ )
154
+ )
155
+
156
  await session.send_event(
157
  Event(
158
  event_type="turn_complete",
 
170
  @staticmethod
171
  async def compact(session: Session) -> None:
172
  """Handle compact (like compact in codex.rs:1317)"""
173
+ old_length = session.context_manager.context_length
174
+ await session.context_manager.compact(model_name=session.config.model_name)
175
+ new_length = session.context_manager.context_length
176
 
177
  await session.send_event(
178
  Event(
179
  event_type="compacted",
180
+ data={"removed": old_length, "remaining": new_length},
181
  )
182
  )
183
 
 
245
  This is the core of the agent (like submission_loop in codex.rs:1259-1340)
246
  """
247
 
248
+ # Create session with tool router
249
+ session = Session(event_queue, config=config, tool_router=tool_router)
 
250
  print("🤖 Agent loop started")
251
 
252
  # Main processing loop
agent/core/session.py CHANGED
@@ -4,6 +4,8 @@ from dataclasses import dataclass
4
  from enum import Enum
5
  from typing import Any, Optional
6
 
 
 
7
  from agent.config import Config
8
  from agent.context_manager.manager import ContextManager
9
 
@@ -33,18 +35,24 @@ class Session:
33
  self,
34
  event_queue: asyncio.Queue,
35
  config: Config | None = None,
 
36
  ):
37
- self.context_manager = ContextManager()
 
 
 
 
 
 
 
38
  self.event_queue = event_queue
39
  self.session_id = str(uuid.uuid4())
40
  self.config = config or Config(
41
  model_name="anthropic/claude-sonnet-4-5-20250929",
42
  tools=[],
43
- system_prompt_path="",
44
  )
45
  self.is_running = True
46
  self.current_task: asyncio.Task | None = None
47
- self.tool_router = None # Set by submission_loop
48
 
49
  async def send_event(self, event: Event) -> None:
50
  """Send event back to client"""
 
4
  from enum import Enum
5
  from typing import Any, Optional
6
 
7
+ from litellm import get_max_tokens
8
+
9
  from agent.config import Config
10
  from agent.context_manager.manager import ContextManager
11
 
 
35
  self,
36
  event_queue: asyncio.Queue,
37
  config: Config | None = None,
38
+ tool_router=None,
39
  ):
40
+ self.tool_router = tool_router
41
+ tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
42
+ self.context_manager = ContextManager(
43
+ max_context=get_max_tokens(config.model_name),
44
+ compact_size=0.1,
45
+ untouched_messages=5,
46
+ tool_specs=tool_specs,
47
+ )
48
  self.event_queue = event_queue
49
  self.session_id = str(uuid.uuid4())
50
  self.config = config or Config(
51
  model_name="anthropic/claude-sonnet-4-5-20250929",
52
  tools=[],
 
53
  )
54
  self.is_running = True
55
  self.current_task: asyncio.Task | None = None
 
56
 
57
  async def send_event(self, event: Event) -> None:
58
  """Send event back to client"""
agent/main.py CHANGED
@@ -88,6 +88,10 @@ async def event_listener(
88
  break
89
  elif event.event_type == "processing":
90
  print("⏳ Processing...", flush=True)
 
 
 
 
91
  # Silently ignore other events
92
 
93
  except asyncio.CancelledError:
 
88
  break
89
  elif event.event_type == "processing":
90
  print("⏳ Processing...", flush=True)
91
+ elif event.event_type == "compacted":
92
+ old_tokens = event.data.get("old_tokens", 0) if event.data else 0
93
+ new_tokens = event.data.get("new_tokens", 0) if event.data else 0
94
+ print(f"📦 Compacted context: {old_tokens} → {new_tokens} tokens")
95
  # Silently ignore other events
96
 
97
  except asyncio.CancelledError:
agent/prompts/system_prompt.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_prompt: |
2
+ You are HF Agent, a powerful AI assistant for Machine Learning Engineering, particularly training Large Language Models. You have access to {{ num_tools }} tools for interacting with Hugging Face Hub and performing ML tasks.
3
+
4
+ # Available Tools
5
+
6
+ You have access to the following categories of tools:
7
+
8
+ - Hugging Face Hub: Search and interact with models, datasets, papers, and documentation
9
+ - Spaces: Use and discover ML applications
10
+ - Jobs: Manage compute jobs for training and inference
11
+ - Image Generation: Generate and transform images
12
+
13
+ # Agency
14
+
15
+ You take initiative when the user asks you to do something, maintaining an appropriate balance between:
16
+
17
+ 1. Doing the right thing when asked, including taking actions and follow-up actions
18
+ 2. Not surprising the user with actions you take without asking
19
+ 3. Not adding unnecessary explanations after completing tasks
20
+
21
+ # Task Approach
22
+
23
+ For ML engineering tasks:
24
+ 1. Use all available tools to complete the task
25
+ 2. Search for relevant models, datasets, and documentation on Hugging Face Hub
26
+ 3. Leverage existing resources before creating new ones
27
+ 4. Invoke multiple independent tools simultaneously for efficiency
28
+
29
+ # Examples
30
+
31
+ <example>
32
+ <user>Find the best text generation models</user>
33
+ <response>[uses mcp__hf-mcp-server__model_search with task="text-generation" and sort="trendingScore"]
34
+
35
+ Top trending text generation models:
36
+ - meta-llama/Llama-3.1-405B-Instruct
37
+ - mistralai/Mistral-Large-2
38
+ </response>
39
+ </example>
40
+
41
+ <example>
42
+ <user>Search for papers about reinforcement learning from human feedback</user>
43
+ <response>[uses mcp__hf-mcp-server__paper_search with query="reinforcement learning from human feedback"]
44
+
45
+ Found 5 relevant papers on RLHF including "Training language models to follow instructions with human feedback" (Ouyang et al.)
46
+ </response>
47
+ </example>
48
+
49
+ <example>
50
+ <user>Find datasets for sentiment analysis</user>
51
+ <response>[uses mcp__hf-mcp-server__dataset_search with query="sentiment analysis" and tags for task_categories]
52
+
53
+ Top sentiment analysis datasets:
54
+ - stanfordnlp/imdb (25k reviews)
55
+ - tweet_eval (sentiment task)
56
+ </response>
57
+ </example>
58
+
59
+ <example>
60
+ <user>How do I use the transformers library for text generation?</user>
61
+ <response>[uses mcp__hf-mcp-server__hf_doc_search with query="text generation transformers"]
62
+
63
+ [provides concise answer based on documentation]
64
+ </response>
65
+ </example>
66
+
67
+ <example>
68
+ <user>Generate an image of a sunset over mountains</user>
69
+ <response>[uses mcp__hf-mcp-server__gr1_flux1_schnell_infer with prompt="sunset over mountains"]
70
+
71
+ [returns generated image]
72
+ </response>
73
+ </example>
74
+
75
+ <example>
76
+ <user>Get details about the bert-base-uncased model</user>
77
+ <response>[uses mcp__hf-mcp-server__hub_repo_details with repo_ids=["google-bert/bert-base-uncased"]]
78
+
79
+ BERT base uncased: 110M parameters, trained on English Wikipedia and BookCorpus, commonly used for text classification and NER.
80
+ </response>
81
+ </example>
82
+
83
+ # Conventions
84
+
85
+ - Always search Hugging Face Hub for existing resources before suggesting custom implementations
86
+ - When referencing models, datasets, or papers, include direct links from search results
87
+ - Never assume a library is available - check documentation first
88
+ - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
89
+ - For training tasks, consider compute requirements and suggest appropriate hardware
90
+ - Never expose or log API keys, tokens, or secrets
91
+
92
+ # Communication Style
93
+
94
+ - Be concise and direct
95
+ - Skip flattery and unnecessary preamble
96
+ - Respond in 1-3 sentences when possible
97
+ - No emojis, minimal exclamation points
98
+ - Don't apologize for limitations - offer alternatives or keep responses short
99
+ - Don't thank the user for results
100
+ - Explain what you're doing for non-trivial operations
101
+
102
+ Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
103
+
104
+ <example>
105
+ <user>What's the state-of-the-art model for image classification?</user>
106
+ <response>EVA-CLIP-18B or ConvNeXt-XXLarge depending on your constraints</response>
107
+ </example>
108
+
109
+ <example>
110
+ <user>How many parameters does GPT-3 have?</user>
111
+ <response>175 billion</response>
112
+ </example>