camdog920
/

aether-core

Model card Files Files and versions

xet

Community

camdog920 commited on 3 days ago

Commit

786ed57

verified ·

1 Parent(s): ece3f12

Upload aether/agents.py

Browse files

Files changed (1) hide show

aether/agents.py +363 -0

aether/agents.py ADDED Viewed

	@@ -0,0 +1,363 @@

+"""
+AETHER Agent Orchestration.
+Integrates:
+- smolagents multi-agent hierarchy (Manager + Workers)
+- MLPO: Multi-agent guided Leader Policy Optimization
+- BabyAGI task creation/prioritization/execution loop
+- Agentic Neural Networks: textual backpropagation
+- Yunjue Agent: Manager/Executor/Developer/Integrator/Merger/Aggregator roles
+"""
+import torch
+import torch.nn as nn
+from typing import Dict, List, Any, Optional, Callable
+import logging
+import time
+from collections import deque
+logger = logging.getLogger("AETHER.Agents")
+class AgentRole:
+    """Role definitions inspired by Yunjue Agent multi-agent system."""
+    MANAGER = "manager"
+    EXECUTOR = "executor"
+    DEVELOPER = "developer"
+    INTEGRATOR = "integrator"
+    MERGER = "merger"
+    AGGREGATOR = "aggregator"
+    RESEARCHER = "researcher"
+class BaseAgent(nn.Module):
+    """Base agent with policy network. Implements MLPO-style leader policy."""
+    def __init__(self, role: str, hidden_dim: int = 128,
+                 vocab_size: int = 32000):
+        super().__init__()
+        self.role = role
+        self.hidden_dim = hidden_dim
+        self.encoder = nn.Sequential(
+            nn.Embedding(vocab_size, hidden_dim),
+            nn.LSTM(hidden_dim, hidden_dim, batch_first=True),
+        )
+        self.policy_head = nn.Linear(hidden_dim, hidden_dim)
+        self.value_head = nn.Linear(hidden_dim, 1)
+        self.task_history: deque = deque(maxlen=100)
+        self.performance_log: List[float] = []
+    def forward(self, input_ids: torch.Tensor) -> Dict[str, torch.Tensor]:
+        embeds = self.encoder[0](input_ids)
+        lstm_out, _ = self.encoder[1](embeds)
+        hidden = lstm_out[:, -1, :]
+        return {
+            "policy_logits": self.policy_head(hidden),
+            "value": self.value_head(hidden),
+            "hidden": hidden,
+        }
+    def act(self, observation: str) -> str:
+        self.task_history.append({
+            "observation": observation,
+            "timestamp": time.time(),
+        })
+        role_actions = {
+            AgentRole.MANAGER: f"[MANAGER] Decomposing task: '{observation[:50]}...'",
+            AgentRole.EXECUTOR: f"[EXECUTOR] Executing: '{observation[:50]}...'",
+            AgentRole.DEVELOPER: f"[DEVELOPER] Synthesizing tool for: '{observation[:50]}...'",
+            AgentRole.INTEGRATOR: f"[INTEGRATOR] Integrating components for: '{observation[:50]}...'",
+            AgentRole.MERGER: f"[MERGER] Consolidating tools for: '{observation[:50]}...'",
+            AgentRole.AGGREGATOR: f"[AGGREGATOR] Aggregating results for: '{observation[:50]}...'",
+            AgentRole.RESEARCHER: f"[RESEARCHER] Exploring knowledge for: '{observation[:50]}...'",
+        }
+        return role_actions.get(self.role, f"[{self.role.upper()}] Processing: '{observation}'")
+    def update(self, reward: float):
+        self.performance_log.append(reward)
+class HierarchicalAgent(nn.Module):
+    """
+    HiMAC-style hierarchical agent with Macro-Policy and Micro-Policy.
+    Macro: generates blueprint (sub-goals)
+    Micro: executes atomic actions conditioned on blueprint
+    """
+    def __init__(self, macro_dim: int = 256, micro_dim: int = 128,
+                 num_subgoals: int = 5):
+        super().__init__()
+        self.macro_dim = macro_dim
+        self.micro_dim = micro_dim
+        self.num_subgoals = num_subgoals
+        self.macro_encoder = nn.LSTM(macro_dim, macro_dim, batch_first=True)
+        self.macro_decoder = nn.LSTM(macro_dim, macro_dim, batch_first=True)
+        self.subgoal_head = nn.Linear(macro_dim, num_subgoals)
+        self.termination_token = nn.Parameter(torch.randn(macro_dim))
+        self.micro_encoder = nn.LSTM(micro_dim + macro_dim, micro_dim, batch_first=True)
+        self.action_head = nn.Linear(micro_dim, 50)
+        self.current_blueprint: Optional[List[str]] = None
+        self.active_subgoal_idx = 0
+    def generate_blueprint(self, task_embedding: torch.Tensor) -> List[str]:
+        batch_size = task_embedding.size(0)
+        hidden = (torch.zeros(1, batch_size, self.macro_dim),
+                  torch.zeros(1, batch_size, self.macro_dim))
+        blueprints = []
+        input_token = task_embedding.unsqueeze(1)
+        for _ in range(self.num_subgoals):
+            out, hidden = self.macro_decoder(input_token, hidden)
+            subgoal_logits = self.subgoal_head(out.squeeze(1))
+            subgoal_id = torch.argmax(subgoal_logits, dim=-1)
+            similarity = torch.cosine_similarity(out.squeeze(1),
+                                                  self.termination_token.unsqueeze(0))
+            if similarity.item() > 0.9:
+                break
+            blueprints.append(f"subgoal_{subgoal_id.item()}")
+            input_token = out
+        self.current_blueprint = blueprints
+        self.active_subgoal_idx = 0
+        return blueprints
+    def execute_action(self, observation: torch.Tensor,
+                      blueprint: Optional[List[str]] = None) -> torch.Tensor:
+        if blueprint is not None:
+            self.current_blueprint = blueprint
+        if not self.current_blueprint:
+            return torch.zeros(1, 50)
+        active_subgoal = self.current_blueprint[
+            min(self.active_subgoal_idx, len(self.current_blueprint) - 1)
+        ]
+        subgoal_embed = torch.randn(1, self.macro_dim)
+        combined = torch.cat([observation, subgoal_embed], dim=-1)
+        out, _ = self.micro_encoder(combined.unsqueeze(1))
+        action_logits = self.action_head(out.squeeze(1))
+        return action_logits
+    def advance_subgoal(self):
+        self.active_subgoal_idx += 1
+    def reset(self):
+        self.current_blueprint = None
+        self.active_subgoal_idx = 0
+class BabyAGILoop:
+    """BabyAGI-inspired task-driven autonomous loop."""
+    def __init__(self, objective: str, max_iterations: int = 50):
+        self.objective = objective
+        self.max_iterations = max_iterations
+        self.task_list: deque = deque()
+        self.completed_tasks: List[Dict] = []
+        self.results: Dict[int, Any] = {}
+        self.iteration = 0
+    def create_tasks(self, previous_result: str, task_description: str) -> List[str]:
+        new_tasks = [
+            f"Sub-task {len(self.task_list) + i}: Analyze {previous_result[:30]}..."
+            for i in range(3)
+        ]
+        return new_tasks
+    def prioritize_tasks(self) -> List[str]:
+        tasks = list(self.task_list)
+        scores = []
+        for task in tasks:
+            overlap = sum(1 for word in self.objective.lower().split()
+                         if word in task.lower())
+            scores.append(overlap)
+        sorted_tasks = [t for _, t in sorted(zip(scores, tasks), reverse=True)]
+        return sorted_tasks
+    def execute_task(self, task: str, agent: BaseAgent) -> str:
+        result = agent.act(task)
+        self.completed_tasks.append({
+            "task": task,
+            "result": result,
+            "iteration": self.iteration,
+        })
+        return result
+    def run(self, execution_agent: BaseAgent) -> Dict[str, Any]:
+        self.task_list.append(self.objective)
+        while self.iteration < self.max_iterations and self.task_list:
+            prioritized = self.prioritize_tasks()
+            self.task_list = deque(prioritized)
+            current_task = self.task_list.popleft()
+            previous_result = self.completed_tasks[-1]["result"] if self.completed_tasks else ""
+            result = self.execute_task(current_task, execution_agent)
+            self.results[self.iteration] = result
+            new_tasks = self.create_tasks(result, current_task)
+            for t in new_tasks:
+                if t not in self.task_list:
+                    self.task_list.append(t)
+            self.iteration += 1
+            logger.info(f"BabyAGI iteration {self.iteration}: "
+                       f"tasks_remaining={len(self.task_list)}, "
+                       f"completed={len(self.completed_tasks)}")
+        return {
+            "completed_tasks": self.completed_tasks,
+            "results": self.results,
+            "iterations": self.iteration,
+            "objective": self.objective,
+        }
+class AetherAgentOrchestrator(nn.Module):
+    """
+    Multi-agent orchestrator combining:
+    - smolagents hierarchical delegation
+    - MLPO: train single leader, peers untrained
+    - Agentic Neural Networks: textual backpropagation
+    - CoMAS: co-evolving via interaction rewards
+    """
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.agents: Dict[str, BaseAgent] = nn.ModuleDict({
+            "manager": BaseAgent(AgentRole.MANAGER, hidden_dim=config.macro_policy_dim),
+            "executor": BaseAgent(AgentRole.EXECUTOR, hidden_dim=config.micro_policy_dim),
+            "developer": BaseAgent(AgentRole.DEVELOPER, hidden_dim=config.micro_policy_dim),
+            "researcher": BaseAgent(AgentRole.RESEARCHER, hidden_dim=config.micro_policy_dim),
+        })
+        self.leader = BaseAgent(AgentRole.MANAGER, hidden_dim=config.macro_policy_dim)
+        self.hierarchical = HierarchicalAgent(
+            macro_dim=config.macro_policy_dim,
+            micro_dim=config.micro_policy_dim,
+        )
+        self.routing_weights = nn.Parameter(torch.ones(len(self.agents)))
+        self.aggregation_gate = nn.Softmax(dim=0)
+        self.agent_tasks: Dict[str, BabyAGILoop] = {}
+        self.task_count = 0
+        self.agent_interactions: List[Dict] = []
+    def forward(self, task: str, context: Dict[str, Any]) -> Dict[str, Any]:
+        task_embed = torch.randn(1, self.config.macro_policy_dim)
+        blueprint = self.hierarchical.generate_blueprint(task_embed)
+        routing_probs = self.aggregation_gate(self.routing_weights)
+        agent_outputs = {}
+        for i, (name, agent) in enumerate(self.agents.items()):
+            if name == "manager":
+                continue
+            weight = routing_probs[i].item()
+            if weight < 0.15:
+                continue
+            sub_task = blueprint[min(i, len(blueprint) - 1)] if blueprint else task
+            output = agent.act(f"[{name}] {sub_task}")
+            agent_outputs[name] = {
+                "output": output,
+                "weight": weight,
+                "sub_task": sub_task,
+            }
+        synthesized = self.leader.act(
+            f"Synthesize: {task} with inputs: {list(agent_outputs.keys())}"
+        )
+        self.agent_interactions.append({
+            "task": task,
+            "blueprint": blueprint,
+            "agent_outputs": agent_outputs,
+            "leader_synthesis": synthesized,
+            "routing_probs": routing_probs.detach().cpu().tolist(),
+            "timestamp": time.time(),
+        })
+        self.task_count += 1
+        return {
+            "output": synthesized,
+            "blueprint": blueprint,
+            "agent_outputs": agent_outputs,
+            "routing_weights": routing_probs.detach().cpu().tolist(),
+        }
+    def execute(self, task: str, kg_context: Any, context: Dict[str, Any]) -> Dict[str, Any]:
+        return self.forward(task, context)
+    def textual_backprop(self, global_gradient: str,
+                        performance_feedback: float,
+                        beta: float = 0.5) -> Dict[str, str]:
+        updates = {}
+        for name, agent in self.agents.items():
+            local_grad = f"{global_gradient} + Agent {name} performance: {performance_feedback}"
+            if hasattr(agent, 'previous_gradient'):
+                blended = f"0.7*{local_grad} + 0.3*{agent.previous_gradient}"
+            else:
+                blended = local_grad
+            agent.previous_gradient = blended
+            updates[name] = blended
+        self.routing_weights.data += performance_feedback * 0.01
+        return updates
+    def co_evolve_interactions(self) -> List[Dict]:
+        rewards = []
+        for interaction in self.agent_interactions[-10:]:
+            num_agents_involved = len(interaction.get("agent_outputs", {}))
+            blueprint_complexity = len(interaction.get("blueprint", []))
+            reward = num_agents_involved * 0.1 + min(blueprint_complexity * 0.05, 0.5)
+            rewards.append({
+                "interaction_id": id(interaction),
+                "reward": reward,
+                "agents_involved": num_agents_involved,
+            })
+        return rewards
+    def run_babyagi(self, objective: str, max_iterations: int = 20) -> Dict[str, Any]:
+        loop = BabyAGILoop(objective, max_iterations)
+        result = loop.run(self.agents["manager"])
+        self.agent_tasks[objective] = loop
+        return result
+    def stats(self) -> Dict[str, Any]:
+        return {
+            "total_tasks": self.task_count,
+            "num_agents": len(self.agents),
+            "total_interactions": len(self.agent_interactions),
+            "routing_weights": self.routing_weights.detach().cpu().tolist(),
+            "active_tasks": len(self.agent_tasks),
+        }