camdog920 commited on
Commit
786ed57
·
verified ·
1 Parent(s): ece3f12

Upload aether/agents.py

Browse files
Files changed (1) hide show
  1. aether/agents.py +363 -0
aether/agents.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AETHER Agent Orchestration.
3
+ Integrates:
4
+ - smolagents multi-agent hierarchy (Manager + Workers)
5
+ - MLPO: Multi-agent guided Leader Policy Optimization
6
+ - BabyAGI task creation/prioritization/execution loop
7
+ - Agentic Neural Networks: textual backpropagation
8
+ - Yunjue Agent: Manager/Executor/Developer/Integrator/Merger/Aggregator roles
9
+ """
10
+
11
+ import torch
12
+ import torch.nn as nn
13
+ from typing import Dict, List, Any, Optional, Callable
14
+ import logging
15
+ import time
16
+ from collections import deque
17
+
18
+ logger = logging.getLogger("AETHER.Agents")
19
+
20
+
21
+ class AgentRole:
22
+ """Role definitions inspired by Yunjue Agent multi-agent system."""
23
+ MANAGER = "manager"
24
+ EXECUTOR = "executor"
25
+ DEVELOPER = "developer"
26
+ INTEGRATOR = "integrator"
27
+ MERGER = "merger"
28
+ AGGREGATOR = "aggregator"
29
+ RESEARCHER = "researcher"
30
+
31
+
32
+ class BaseAgent(nn.Module):
33
+ """Base agent with policy network. Implements MLPO-style leader policy."""
34
+
35
+ def __init__(self, role: str, hidden_dim: int = 128,
36
+ vocab_size: int = 32000):
37
+ super().__init__()
38
+ self.role = role
39
+ self.hidden_dim = hidden_dim
40
+
41
+ self.encoder = nn.Sequential(
42
+ nn.Embedding(vocab_size, hidden_dim),
43
+ nn.LSTM(hidden_dim, hidden_dim, batch_first=True),
44
+ )
45
+ self.policy_head = nn.Linear(hidden_dim, hidden_dim)
46
+ self.value_head = nn.Linear(hidden_dim, 1)
47
+
48
+ self.task_history: deque = deque(maxlen=100)
49
+ self.performance_log: List[float] = []
50
+
51
+ def forward(self, input_ids: torch.Tensor) -> Dict[str, torch.Tensor]:
52
+ embeds = self.encoder[0](input_ids)
53
+ lstm_out, _ = self.encoder[1](embeds)
54
+ hidden = lstm_out[:, -1, :]
55
+
56
+ return {
57
+ "policy_logits": self.policy_head(hidden),
58
+ "value": self.value_head(hidden),
59
+ "hidden": hidden,
60
+ }
61
+
62
+ def act(self, observation: str) -> str:
63
+ self.task_history.append({
64
+ "observation": observation,
65
+ "timestamp": time.time(),
66
+ })
67
+
68
+ role_actions = {
69
+ AgentRole.MANAGER: f"[MANAGER] Decomposing task: '{observation[:50]}...'",
70
+ AgentRole.EXECUTOR: f"[EXECUTOR] Executing: '{observation[:50]}...'",
71
+ AgentRole.DEVELOPER: f"[DEVELOPER] Synthesizing tool for: '{observation[:50]}...'",
72
+ AgentRole.INTEGRATOR: f"[INTEGRATOR] Integrating components for: '{observation[:50]}...'",
73
+ AgentRole.MERGER: f"[MERGER] Consolidating tools for: '{observation[:50]}...'",
74
+ AgentRole.AGGREGATOR: f"[AGGREGATOR] Aggregating results for: '{observation[:50]}...'",
75
+ AgentRole.RESEARCHER: f"[RESEARCHER] Exploring knowledge for: '{observation[:50]}...'",
76
+ }
77
+
78
+ return role_actions.get(self.role, f"[{self.role.upper()}] Processing: '{observation}'")
79
+
80
+ def update(self, reward: float):
81
+ self.performance_log.append(reward)
82
+
83
+
84
+ class HierarchicalAgent(nn.Module):
85
+ """
86
+ HiMAC-style hierarchical agent with Macro-Policy and Micro-Policy.
87
+ Macro: generates blueprint (sub-goals)
88
+ Micro: executes atomic actions conditioned on blueprint
89
+ """
90
+
91
+ def __init__(self, macro_dim: int = 256, micro_dim: int = 128,
92
+ num_subgoals: int = 5):
93
+ super().__init__()
94
+ self.macro_dim = macro_dim
95
+ self.micro_dim = micro_dim
96
+ self.num_subgoals = num_subgoals
97
+
98
+ self.macro_encoder = nn.LSTM(macro_dim, macro_dim, batch_first=True)
99
+ self.macro_decoder = nn.LSTM(macro_dim, macro_dim, batch_first=True)
100
+ self.subgoal_head = nn.Linear(macro_dim, num_subgoals)
101
+ self.termination_token = nn.Parameter(torch.randn(macro_dim))
102
+
103
+ self.micro_encoder = nn.LSTM(micro_dim + macro_dim, micro_dim, batch_first=True)
104
+ self.action_head = nn.Linear(micro_dim, 50)
105
+
106
+ self.current_blueprint: Optional[List[str]] = None
107
+ self.active_subgoal_idx = 0
108
+
109
+ def generate_blueprint(self, task_embedding: torch.Tensor) -> List[str]:
110
+ batch_size = task_embedding.size(0)
111
+ hidden = (torch.zeros(1, batch_size, self.macro_dim),
112
+ torch.zeros(1, batch_size, self.macro_dim))
113
+
114
+ blueprints = []
115
+ input_token = task_embedding.unsqueeze(1)
116
+
117
+ for _ in range(self.num_subgoals):
118
+ out, hidden = self.macro_decoder(input_token, hidden)
119
+ subgoal_logits = self.subgoal_head(out.squeeze(1))
120
+ subgoal_id = torch.argmax(subgoal_logits, dim=-1)
121
+
122
+ similarity = torch.cosine_similarity(out.squeeze(1),
123
+ self.termination_token.unsqueeze(0))
124
+ if similarity.item() > 0.9:
125
+ break
126
+
127
+ blueprints.append(f"subgoal_{subgoal_id.item()}")
128
+ input_token = out
129
+
130
+ self.current_blueprint = blueprints
131
+ self.active_subgoal_idx = 0
132
+ return blueprints
133
+
134
+ def execute_action(self, observation: torch.Tensor,
135
+ blueprint: Optional[List[str]] = None) -> torch.Tensor:
136
+ if blueprint is not None:
137
+ self.current_blueprint = blueprint
138
+
139
+ if not self.current_blueprint:
140
+ return torch.zeros(1, 50)
141
+
142
+ active_subgoal = self.current_blueprint[
143
+ min(self.active_subgoal_idx, len(self.current_blueprint) - 1)
144
+ ]
145
+
146
+ subgoal_embed = torch.randn(1, self.macro_dim)
147
+ combined = torch.cat([observation, subgoal_embed], dim=-1)
148
+
149
+ out, _ = self.micro_encoder(combined.unsqueeze(1))
150
+ action_logits = self.action_head(out.squeeze(1))
151
+
152
+ return action_logits
153
+
154
+ def advance_subgoal(self):
155
+ self.active_subgoal_idx += 1
156
+
157
+ def reset(self):
158
+ self.current_blueprint = None
159
+ self.active_subgoal_idx = 0
160
+
161
+
162
+ class BabyAGILoop:
163
+ """BabyAGI-inspired task-driven autonomous loop."""
164
+
165
+ def __init__(self, objective: str, max_iterations: int = 50):
166
+ self.objective = objective
167
+ self.max_iterations = max_iterations
168
+ self.task_list: deque = deque()
169
+ self.completed_tasks: List[Dict] = []
170
+ self.results: Dict[int, Any] = {}
171
+ self.iteration = 0
172
+
173
+ def create_tasks(self, previous_result: str, task_description: str) -> List[str]:
174
+ new_tasks = [
175
+ f"Sub-task {len(self.task_list) + i}: Analyze {previous_result[:30]}..."
176
+ for i in range(3)
177
+ ]
178
+ return new_tasks
179
+
180
+ def prioritize_tasks(self) -> List[str]:
181
+ tasks = list(self.task_list)
182
+ scores = []
183
+ for task in tasks:
184
+ overlap = sum(1 for word in self.objective.lower().split()
185
+ if word in task.lower())
186
+ scores.append(overlap)
187
+
188
+ sorted_tasks = [t for _, t in sorted(zip(scores, tasks), reverse=True)]
189
+ return sorted_tasks
190
+
191
+ def execute_task(self, task: str, agent: BaseAgent) -> str:
192
+ result = agent.act(task)
193
+ self.completed_tasks.append({
194
+ "task": task,
195
+ "result": result,
196
+ "iteration": self.iteration,
197
+ })
198
+ return result
199
+
200
+ def run(self, execution_agent: BaseAgent) -> Dict[str, Any]:
201
+ self.task_list.append(self.objective)
202
+
203
+ while self.iteration < self.max_iterations and self.task_list:
204
+ prioritized = self.prioritize_tasks()
205
+ self.task_list = deque(prioritized)
206
+
207
+ current_task = self.task_list.popleft()
208
+ previous_result = self.completed_tasks[-1]["result"] if self.completed_tasks else ""
209
+
210
+ result = self.execute_task(current_task, execution_agent)
211
+ self.results[self.iteration] = result
212
+
213
+ new_tasks = self.create_tasks(result, current_task)
214
+ for t in new_tasks:
215
+ if t not in self.task_list:
216
+ self.task_list.append(t)
217
+
218
+ self.iteration += 1
219
+
220
+ logger.info(f"BabyAGI iteration {self.iteration}: "
221
+ f"tasks_remaining={len(self.task_list)}, "
222
+ f"completed={len(self.completed_tasks)}")
223
+
224
+ return {
225
+ "completed_tasks": self.completed_tasks,
226
+ "results": self.results,
227
+ "iterations": self.iteration,
228
+ "objective": self.objective,
229
+ }
230
+
231
+
232
+ class AetherAgentOrchestrator(nn.Module):
233
+ """
234
+ Multi-agent orchestrator combining:
235
+ - smolagents hierarchical delegation
236
+ - MLPO: train single leader, peers untrained
237
+ - Agentic Neural Networks: textual backpropagation
238
+ - CoMAS: co-evolving via interaction rewards
239
+ """
240
+
241
+ def __init__(self, config):
242
+ super().__init__()
243
+ self.config = config
244
+
245
+ self.agents: Dict[str, BaseAgent] = nn.ModuleDict({
246
+ "manager": BaseAgent(AgentRole.MANAGER, hidden_dim=config.macro_policy_dim),
247
+ "executor": BaseAgent(AgentRole.EXECUTOR, hidden_dim=config.micro_policy_dim),
248
+ "developer": BaseAgent(AgentRole.DEVELOPER, hidden_dim=config.micro_policy_dim),
249
+ "researcher": BaseAgent(AgentRole.RESEARCHER, hidden_dim=config.micro_policy_dim),
250
+ })
251
+
252
+ self.leader = BaseAgent(AgentRole.MANAGER, hidden_dim=config.macro_policy_dim)
253
+
254
+ self.hierarchical = HierarchicalAgent(
255
+ macro_dim=config.macro_policy_dim,
256
+ micro_dim=config.micro_policy_dim,
257
+ )
258
+
259
+ self.routing_weights = nn.Parameter(torch.ones(len(self.agents)))
260
+ self.aggregation_gate = nn.Softmax(dim=0)
261
+
262
+ self.agent_tasks: Dict[str, BabyAGILoop] = {}
263
+
264
+ self.task_count = 0
265
+ self.agent_interactions: List[Dict] = []
266
+
267
+ def forward(self, task: str, context: Dict[str, Any]) -> Dict[str, Any]:
268
+ task_embed = torch.randn(1, self.config.macro_policy_dim)
269
+ blueprint = self.hierarchical.generate_blueprint(task_embed)
270
+
271
+ routing_probs = self.aggregation_gate(self.routing_weights)
272
+
273
+ agent_outputs = {}
274
+ for i, (name, agent) in enumerate(self.agents.items()):
275
+ if name == "manager":
276
+ continue
277
+
278
+ weight = routing_probs[i].item()
279
+ if weight < 0.15:
280
+ continue
281
+
282
+ sub_task = blueprint[min(i, len(blueprint) - 1)] if blueprint else task
283
+ output = agent.act(f"[{name}] {sub_task}")
284
+ agent_outputs[name] = {
285
+ "output": output,
286
+ "weight": weight,
287
+ "sub_task": sub_task,
288
+ }
289
+
290
+ synthesized = self.leader.act(
291
+ f"Synthesize: {task} with inputs: {list(agent_outputs.keys())}"
292
+ )
293
+
294
+ self.agent_interactions.append({
295
+ "task": task,
296
+ "blueprint": blueprint,
297
+ "agent_outputs": agent_outputs,
298
+ "leader_synthesis": synthesized,
299
+ "routing_probs": routing_probs.detach().cpu().tolist(),
300
+ "timestamp": time.time(),
301
+ })
302
+
303
+ self.task_count += 1
304
+
305
+ return {
306
+ "output": synthesized,
307
+ "blueprint": blueprint,
308
+ "agent_outputs": agent_outputs,
309
+ "routing_weights": routing_probs.detach().cpu().tolist(),
310
+ }
311
+
312
+ def execute(self, task: str, kg_context: Any, context: Dict[str, Any]) -> Dict[str, Any]:
313
+ return self.forward(task, context)
314
+
315
+ def textual_backprop(self, global_gradient: str,
316
+ performance_feedback: float,
317
+ beta: float = 0.5) -> Dict[str, str]:
318
+ updates = {}
319
+ for name, agent in self.agents.items():
320
+ local_grad = f"{global_gradient} + Agent {name} performance: {performance_feedback}"
321
+
322
+ if hasattr(agent, 'previous_gradient'):
323
+ blended = f"0.7*{local_grad} + 0.3*{agent.previous_gradient}"
324
+ else:
325
+ blended = local_grad
326
+
327
+ agent.previous_gradient = blended
328
+ updates[name] = blended
329
+
330
+ self.routing_weights.data += performance_feedback * 0.01
331
+
332
+ return updates
333
+
334
+ def co_evolve_interactions(self) -> List[Dict]:
335
+ rewards = []
336
+
337
+ for interaction in self.agent_interactions[-10:]:
338
+ num_agents_involved = len(interaction.get("agent_outputs", {}))
339
+ blueprint_complexity = len(interaction.get("blueprint", []))
340
+
341
+ reward = num_agents_involved * 0.1 + min(blueprint_complexity * 0.05, 0.5)
342
+ rewards.append({
343
+ "interaction_id": id(interaction),
344
+ "reward": reward,
345
+ "agents_involved": num_agents_involved,
346
+ })
347
+
348
+ return rewards
349
+
350
+ def run_babyagi(self, objective: str, max_iterations: int = 20) -> Dict[str, Any]:
351
+ loop = BabyAGILoop(objective, max_iterations)
352
+ result = loop.run(self.agents["manager"])
353
+ self.agent_tasks[objective] = loop
354
+ return result
355
+
356
+ def stats(self) -> Dict[str, Any]:
357
+ return {
358
+ "total_tasks": self.task_count,
359
+ "num_agents": len(self.agents),
360
+ "total_interactions": len(self.agent_interactions),
361
+ "routing_weights": self.routing_weights.detach().cpu().tolist(),
362
+ "active_tasks": len(self.agent_tasks),
363
+ }