""" AETHER Agent Orchestration. Integrates: - smolagents multi-agent hierarchy (Manager + Workers) - MLPO: Multi-agent guided Leader Policy Optimization - BabyAGI task creation/prioritization/execution loop - Agentic Neural Networks: textual backpropagation - Yunjue Agent: Manager/Executor/Developer/Integrator/Merger/Aggregator roles """ import torch import torch.nn as nn from typing import Dict, List, Any, Optional, Callable import logging import time from collections import deque logger = logging.getLogger("AETHER.Agents") class AgentRole: """Role definitions inspired by Yunjue Agent multi-agent system.""" MANAGER = "manager" EXECUTOR = "executor" DEVELOPER = "developer" INTEGRATOR = "integrator" MERGER = "merger" AGGREGATOR = "aggregator" RESEARCHER = "researcher" class BaseAgent(nn.Module): """Base agent with policy network. Implements MLPO-style leader policy.""" def __init__(self, role: str, hidden_dim: int = 128, vocab_size: int = 32000): super().__init__() self.role = role self.hidden_dim = hidden_dim self.encoder = nn.Sequential( nn.Embedding(vocab_size, hidden_dim), nn.LSTM(hidden_dim, hidden_dim, batch_first=True), ) self.policy_head = nn.Linear(hidden_dim, hidden_dim) self.value_head = nn.Linear(hidden_dim, 1) self.task_history: deque = deque(maxlen=100) self.performance_log: List[float] = [] def forward(self, input_ids: torch.Tensor) -> Dict[str, torch.Tensor]: embeds = self.encoder[0](input_ids) lstm_out, _ = self.encoder[1](embeds) hidden = lstm_out[:, -1, :] return { "policy_logits": self.policy_head(hidden), "value": self.value_head(hidden), "hidden": hidden, } def act(self, observation: str) -> str: self.task_history.append({ "observation": observation, "timestamp": time.time(), }) role_actions = { AgentRole.MANAGER: f"[MANAGER] Decomposing task: '{observation[:50]}...'", AgentRole.EXECUTOR: f"[EXECUTOR] Executing: '{observation[:50]}...'", AgentRole.DEVELOPER: f"[DEVELOPER] Synthesizing tool for: '{observation[:50]}...'", AgentRole.INTEGRATOR: f"[INTEGRATOR] Integrating components for: '{observation[:50]}...'", AgentRole.MERGER: f"[MERGER] Consolidating tools for: '{observation[:50]}...'", AgentRole.AGGREGATOR: f"[AGGREGATOR] Aggregating results for: '{observation[:50]}...'", AgentRole.RESEARCHER: f"[RESEARCHER] Exploring knowledge for: '{observation[:50]}...'", } return role_actions.get(self.role, f"[{self.role.upper()}] Processing: '{observation}'") def update(self, reward: float): self.performance_log.append(reward) class HierarchicalAgent(nn.Module): """ HiMAC-style hierarchical agent with Macro-Policy and Micro-Policy. Macro: generates blueprint (sub-goals) Micro: executes atomic actions conditioned on blueprint """ def __init__(self, macro_dim: int = 256, micro_dim: int = 128, num_subgoals: int = 5): super().__init__() self.macro_dim = macro_dim self.micro_dim = micro_dim self.num_subgoals = num_subgoals self.macro_encoder = nn.LSTM(macro_dim, macro_dim, batch_first=True) self.macro_decoder = nn.LSTM(macro_dim, macro_dim, batch_first=True) self.subgoal_head = nn.Linear(macro_dim, num_subgoals) self.termination_token = nn.Parameter(torch.randn(macro_dim)) self.micro_encoder = nn.LSTM(micro_dim + macro_dim, micro_dim, batch_first=True) self.action_head = nn.Linear(micro_dim, 50) self.current_blueprint: Optional[List[str]] = None self.active_subgoal_idx = 0 def generate_blueprint(self, task_embedding: torch.Tensor) -> List[str]: batch_size = task_embedding.size(0) hidden = (torch.zeros(1, batch_size, self.macro_dim), torch.zeros(1, batch_size, self.macro_dim)) blueprints = [] input_token = task_embedding.unsqueeze(1) for _ in range(self.num_subgoals): out, hidden = self.macro_decoder(input_token, hidden) subgoal_logits = self.subgoal_head(out.squeeze(1)) subgoal_id = torch.argmax(subgoal_logits, dim=-1) similarity = torch.cosine_similarity(out.squeeze(1), self.termination_token.unsqueeze(0)) if similarity.item() > 0.9: break blueprints.append(f"subgoal_{subgoal_id.item()}") input_token = out self.current_blueprint = blueprints self.active_subgoal_idx = 0 return blueprints def execute_action(self, observation: torch.Tensor, blueprint: Optional[List[str]] = None) -> torch.Tensor: if blueprint is not None: self.current_blueprint = blueprint if not self.current_blueprint: return torch.zeros(1, 50) active_subgoal = self.current_blueprint[ min(self.active_subgoal_idx, len(self.current_blueprint) - 1) ] subgoal_embed = torch.randn(1, self.macro_dim) combined = torch.cat([observation, subgoal_embed], dim=-1) out, _ = self.micro_encoder(combined.unsqueeze(1)) action_logits = self.action_head(out.squeeze(1)) return action_logits def advance_subgoal(self): self.active_subgoal_idx += 1 def reset(self): self.current_blueprint = None self.active_subgoal_idx = 0 class BabyAGILoop: """BabyAGI-inspired task-driven autonomous loop.""" def __init__(self, objective: str, max_iterations: int = 50): self.objective = objective self.max_iterations = max_iterations self.task_list: deque = deque() self.completed_tasks: List[Dict] = [] self.results: Dict[int, Any] = {} self.iteration = 0 def create_tasks(self, previous_result: str, task_description: str) -> List[str]: new_tasks = [ f"Sub-task {len(self.task_list) + i}: Analyze {previous_result[:30]}..." for i in range(3) ] return new_tasks def prioritize_tasks(self) -> List[str]: tasks = list(self.task_list) scores = [] for task in tasks: overlap = sum(1 for word in self.objective.lower().split() if word in task.lower()) scores.append(overlap) sorted_tasks = [t for _, t in sorted(zip(scores, tasks), reverse=True)] return sorted_tasks def execute_task(self, task: str, agent: BaseAgent) -> str: result = agent.act(task) self.completed_tasks.append({ "task": task, "result": result, "iteration": self.iteration, }) return result def run(self, execution_agent: BaseAgent) -> Dict[str, Any]: self.task_list.append(self.objective) while self.iteration < self.max_iterations and self.task_list: prioritized = self.prioritize_tasks() self.task_list = deque(prioritized) current_task = self.task_list.popleft() previous_result = self.completed_tasks[-1]["result"] if self.completed_tasks else "" result = self.execute_task(current_task, execution_agent) self.results[self.iteration] = result new_tasks = self.create_tasks(result, current_task) for t in new_tasks: if t not in self.task_list: self.task_list.append(t) self.iteration += 1 logger.info(f"BabyAGI iteration {self.iteration}: " f"tasks_remaining={len(self.task_list)}, " f"completed={len(self.completed_tasks)}") return { "completed_tasks": self.completed_tasks, "results": self.results, "iterations": self.iteration, "objective": self.objective, } class AetherAgentOrchestrator(nn.Module): """ Multi-agent orchestrator combining: - smolagents hierarchical delegation - MLPO: train single leader, peers untrained - Agentic Neural Networks: textual backpropagation - CoMAS: co-evolving via interaction rewards """ def __init__(self, config): super().__init__() self.config = config self.agents: Dict[str, BaseAgent] = nn.ModuleDict({ "manager": BaseAgent(AgentRole.MANAGER, hidden_dim=config.macro_policy_dim), "executor": BaseAgent(AgentRole.EXECUTOR, hidden_dim=config.micro_policy_dim), "developer": BaseAgent(AgentRole.DEVELOPER, hidden_dim=config.micro_policy_dim), "researcher": BaseAgent(AgentRole.RESEARCHER, hidden_dim=config.micro_policy_dim), }) self.leader = BaseAgent(AgentRole.MANAGER, hidden_dim=config.macro_policy_dim) self.hierarchical = HierarchicalAgent( macro_dim=config.macro_policy_dim, micro_dim=config.micro_policy_dim, ) self.routing_weights = nn.Parameter(torch.ones(len(self.agents))) self.aggregation_gate = nn.Softmax(dim=0) self.agent_tasks: Dict[str, BabyAGILoop] = {} self.task_count = 0 self.agent_interactions: List[Dict] = [] def forward(self, task: str, context: Dict[str, Any]) -> Dict[str, Any]: task_embed = torch.randn(1, self.config.macro_policy_dim) blueprint = self.hierarchical.generate_blueprint(task_embed) routing_probs = self.aggregation_gate(self.routing_weights) agent_outputs = {} for i, (name, agent) in enumerate(self.agents.items()): if name == "manager": continue weight = routing_probs[i].item() if weight < 0.15: continue sub_task = blueprint[min(i, len(blueprint) - 1)] if blueprint else task output = agent.act(f"[{name}] {sub_task}") agent_outputs[name] = { "output": output, "weight": weight, "sub_task": sub_task, } synthesized = self.leader.act( f"Synthesize: {task} with inputs: {list(agent_outputs.keys())}" ) self.agent_interactions.append({ "task": task, "blueprint": blueprint, "agent_outputs": agent_outputs, "leader_synthesis": synthesized, "routing_probs": routing_probs.detach().cpu().tolist(), "timestamp": time.time(), }) self.task_count += 1 return { "output": synthesized, "blueprint": blueprint, "agent_outputs": agent_outputs, "routing_weights": routing_probs.detach().cpu().tolist(), } def execute(self, task: str, kg_context: Any, context: Dict[str, Any]) -> Dict[str, Any]: return self.forward(task, context) def textual_backprop(self, global_gradient: str, performance_feedback: float, beta: float = 0.5) -> Dict[str, str]: updates = {} for name, agent in self.agents.items(): local_grad = f"{global_gradient} + Agent {name} performance: {performance_feedback}" if hasattr(agent, 'previous_gradient'): blended = f"0.7*{local_grad} + 0.3*{agent.previous_gradient}" else: blended = local_grad agent.previous_gradient = blended updates[name] = blended self.routing_weights.data += performance_feedback * 0.01 return updates def co_evolve_interactions(self) -> List[Dict]: rewards = [] for interaction in self.agent_interactions[-10:]: num_agents_involved = len(interaction.get("agent_outputs", {})) blueprint_complexity = len(interaction.get("blueprint", [])) reward = num_agents_involved * 0.1 + min(blueprint_complexity * 0.05, 0.5) rewards.append({ "interaction_id": id(interaction), "reward": reward, "agents_involved": num_agents_involved, }) return rewards def run_babyagi(self, objective: str, max_iterations: int = 20) -> Dict[str, Any]: loop = BabyAGILoop(objective, max_iterations) result = loop.run(self.agents["manager"]) self.agent_tasks[objective] = loop return result def stats(self) -> Dict[str, Any]: return { "total_tasks": self.task_count, "num_agents": len(self.agents), "total_interactions": len(self.agent_interactions), "routing_weights": self.routing_weights.detach().cpu().tolist(), "active_tasks": len(self.agent_tasks), }