Rohan03 commited on
Commit
be73428
·
verified ·
1 Parent(s): 80a4e8f

Sprint 6: mas_generator.py — use-case → generated multi-agent system

Browse files
Files changed (1) hide show
  1. purpose_agent/mas_generator.py +255 -0
purpose_agent/mas_generator.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ mas_generator.py — Multi-Agent System Generator.
3
+
4
+ Takes a use-case description and outputs a complete generated system:
5
+ - Agent specs with roles
6
+ - Flow (workflow graph)
7
+ - Tools needed
8
+ - Memory budget
9
+ - Eval suite
10
+ - Routing policy
11
+
12
+ Usage:
13
+ from purpose_agent.mas_generator import generate
14
+
15
+ mas = generate("Monitor GitHub repos for CVEs and alert the team")
16
+ # → GeneratedMAS with agents, flow, tools, evals, routing
17
+
18
+ # Run it
19
+ team = mas.to_team(model=backend)
20
+ result = team.run("Check for new CVEs today")
21
+ """
22
+ from __future__ import annotations
23
+
24
+ import logging
25
+ from dataclasses import dataclass, field
26
+ from typing import Any
27
+
28
+ from purpose_agent.routing import RoutingPolicy, TaskComplexity
29
+ from purpose_agent.memory_homeostasis import MemoryBudget
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ @dataclass
35
+ class GeneratedAgent:
36
+ """Spec for a generated agent."""
37
+ name: str
38
+ role: str
39
+ expertise: list[str] = field(default_factory=list)
40
+ tools_needed: list[str] = field(default_factory=list)
41
+
42
+
43
+ @dataclass
44
+ class GeneratedFlow:
45
+ """Spec for a generated workflow."""
46
+ nodes: list[str] = field(default_factory=list) # Node names in order
47
+ edges: list[tuple[str, str]] = field(default_factory=list) # (from, to)
48
+ conditional: dict[str, dict[str, str]] = field(default_factory=dict) # node → {condition: target}
49
+
50
+
51
+ @dataclass
52
+ class GeneratedEval:
53
+ """A generated evaluation case."""
54
+ id: str
55
+ purpose: str
56
+ expected_behavior: str
57
+ category: str = "general"
58
+
59
+
60
+ @dataclass
61
+ class GeneratedMAS:
62
+ """Complete generated multi-agent system."""
63
+ purpose: str
64
+ agents: list[GeneratedAgent] = field(default_factory=list)
65
+ flow: GeneratedFlow = field(default_factory=GeneratedFlow)
66
+ tools: list[str] = field(default_factory=list)
67
+ memory_budget: MemoryBudget = field(default_factory=MemoryBudget)
68
+ eval_suite: list[GeneratedEval] = field(default_factory=list)
69
+ routing_policy: RoutingPolicy = field(default_factory=RoutingPolicy)
70
+ metadata: dict[str, Any] = field(default_factory=dict)
71
+
72
+ def to_team(self, model=None):
73
+ """Convert to a Purpose Agent Team for execution."""
74
+ from purpose_agent.easy import Team
75
+ agent_specs = [{"name": a.name, "role": a.role} for a in self.agents]
76
+ return Team(purpose=self.purpose, agents=agent_specs, model=model)
77
+
78
+
79
+ # ═══════════════════════════════════════════════════════════════
80
+ # Templates — deterministic generation (no LLM needed)
81
+ # ═══════════════════════════════════════════════════════════════
82
+
83
+ _TEMPLATES = {
84
+ "code": {
85
+ "keywords": ["code", "program", "develop", "build", "software", "python", "debug", "function", "api", "script"],
86
+ "agents": [
87
+ GeneratedAgent("architect", "Design solution architecture and break into subtasks", ["design", "planning"]),
88
+ GeneratedAgent("coder", "Write clean, tested code following best practices", ["coding", "python"], ["python_exec"]),
89
+ GeneratedAgent("tester", "Review code for bugs, edge cases, and improvements", ["testing", "review"]),
90
+ ],
91
+ "flow_nodes": ["architect", "coder", "tester"],
92
+ "flow_type": "sequential_with_review",
93
+ "tools": ["python_exec", "read_file", "write_file"],
94
+ },
95
+ "security": {
96
+ "keywords": ["security", "cve", "vulnerability", "audit", "penetration", "threat", "monitor"],
97
+ "agents": [
98
+ GeneratedAgent("scanner", "Scan and identify potential security issues", ["scanning", "detection"]),
99
+ GeneratedAgent("analyst", "Analyze severity and impact of findings", ["analysis", "risk"]),
100
+ GeneratedAgent("reporter", "Create clear security reports with recommendations", ["reporting"]),
101
+ GeneratedAgent("security_critic", "Verify findings and check for false positives", ["verification"]),
102
+ ],
103
+ "flow_nodes": ["scanner", "analyst", "security_critic", "reporter"],
104
+ "flow_type": "sequential_with_critic",
105
+ "tools": ["read_file", "calculator"],
106
+ },
107
+ "research": {
108
+ "keywords": ["research", "find", "search", "discover", "papers", "study", "investigate", "analyze"],
109
+ "agents": [
110
+ GeneratedAgent("researcher", "Find and gather relevant information", ["search", "gathering"]),
111
+ GeneratedAgent("verifier", "Cross-check facts and verify sources", ["verification", "fact-check"]),
112
+ GeneratedAgent("synthesizer", "Combine findings into coherent summary", ["synthesis", "writing"]),
113
+ ],
114
+ "flow_nodes": ["researcher", "verifier", "synthesizer"],
115
+ "flow_type": "sequential",
116
+ "tools": ["calculator"],
117
+ },
118
+ "data": {
119
+ "keywords": ["data", "csv", "excel", "database", "analytics", "chart", "statistics", "report"],
120
+ "agents": [
121
+ GeneratedAgent("loader", "Load and validate data from sources", ["data_loading"], ["read_file"]),
122
+ GeneratedAgent("analyst", "Analyze data, compute statistics, find patterns", ["analysis"], ["python_exec", "calculator"]),
123
+ GeneratedAgent("validator", "Validate results and check for errors", ["validation"]),
124
+ GeneratedAgent("reporter", "Present findings in clear format", ["reporting"]),
125
+ ],
126
+ "flow_nodes": ["loader", "analyst", "validator", "reporter"],
127
+ "flow_type": "sequential",
128
+ "tools": ["python_exec", "read_file", "calculator"],
129
+ },
130
+ "operations": {
131
+ "keywords": ["deploy", "monitor", "operate", "maintain", "alert", "incident", "pipeline"],
132
+ "agents": [
133
+ GeneratedAgent("planner", "Plan operations and identify risks", ["planning"]),
134
+ GeneratedAgent("executor", "Execute planned operations carefully", ["execution"], ["python_exec"]),
135
+ GeneratedAgent("auditor", "Verify operations completed correctly", ["auditing"]),
136
+ ],
137
+ "flow_nodes": ["planner", "executor", "auditor"],
138
+ "flow_type": "sequential_with_approval",
139
+ "tools": ["python_exec", "read_file"],
140
+ },
141
+ }
142
+
143
+
144
+ def _match_template(use_case: str) -> dict:
145
+ """Match use case to best template by keyword overlap."""
146
+ words = set(use_case.lower().split())
147
+ best_template = None
148
+ best_score = 0
149
+
150
+ for name, template in _TEMPLATES.items():
151
+ score = len(words & set(template["keywords"]))
152
+ if score > best_score:
153
+ best_score = score
154
+ best_template = template
155
+
156
+ # Default to research if no match
157
+ return best_template or _TEMPLATES["research"]
158
+
159
+
160
+ def _generate_flow(template: dict) -> GeneratedFlow:
161
+ """Generate flow graph from template."""
162
+ nodes = template["flow_nodes"]
163
+ flow = GeneratedFlow(nodes=list(nodes))
164
+
165
+ # Sequential edges
166
+ for i in range(len(nodes) - 1):
167
+ flow.edges.append((nodes[i], nodes[i + 1]))
168
+
169
+ # Add review loop for certain types
170
+ if template.get("flow_type") == "sequential_with_review":
171
+ # Last node can loop back to second node on failure
172
+ flow.conditional[nodes[-1]] = {"pass": "__END__", "fail": nodes[1]}
173
+ elif template.get("flow_type") == "sequential_with_critic":
174
+ # Critic can reject and loop back
175
+ critic = [n for n in nodes if "critic" in n]
176
+ if critic:
177
+ flow.conditional[critic[0]] = {"approve": nodes[-1], "reject": nodes[0]}
178
+
179
+ return flow
180
+
181
+
182
+ def _generate_evals(use_case: str, agents: list[GeneratedAgent]) -> list[GeneratedEval]:
183
+ """Generate evaluation cases for the system."""
184
+ evals = [
185
+ GeneratedEval(
186
+ id="eval_runs",
187
+ purpose=f"System can process: {use_case}",
188
+ expected_behavior="Completes without error",
189
+ category="basic",
190
+ ),
191
+ GeneratedEval(
192
+ id="eval_agents_contribute",
193
+ purpose="Each agent contributes to the output",
194
+ expected_behavior="All agents produce non-empty output",
195
+ category="coverage",
196
+ ),
197
+ ]
198
+ # Add per-agent evals
199
+ for agent in agents[:3]:
200
+ evals.append(GeneratedEval(
201
+ id=f"eval_{agent.name}",
202
+ purpose=f"{agent.name} performs its role: {agent.role}",
203
+ expected_behavior=f"{agent.name} produces relevant output for its expertise",
204
+ category="agent_role",
205
+ ))
206
+ return evals
207
+
208
+
209
+ def generate(use_case: str, constraints: dict[str, Any] | None = None) -> GeneratedMAS:
210
+ """
211
+ Generate a complete multi-agent system from a use-case description.
212
+
213
+ Uses deterministic templates (no LLM required).
214
+
215
+ Args:
216
+ use_case: Plain English description of what the system should do
217
+ constraints: Optional overrides (max_agents, prefer_local, etc.)
218
+
219
+ Returns:
220
+ GeneratedMAS with agents, flow, tools, evals, and routing policy
221
+ """
222
+ constraints = constraints or {}
223
+ template = _match_template(use_case)
224
+
225
+ agents = template["agents"]
226
+ flow = _generate_flow(template)
227
+ tools = template["tools"]
228
+ evals = _generate_evals(use_case, agents)
229
+
230
+ # Routing policy based on template complexity
231
+ n_agents = len(agents)
232
+ policy = RoutingPolicy(
233
+ prefer_local=constraints.get("prefer_local", True),
234
+ max_cost_per_task_usd=constraints.get("max_cost", 0.10),
235
+ )
236
+
237
+ # Memory budget scaled to system size
238
+ budget = MemoryBudget(
239
+ max_active_cards=min(512, n_agents * 128),
240
+ max_injected_tokens=min(500, n_agents * 150),
241
+ )
242
+
243
+ mas = GeneratedMAS(
244
+ purpose=use_case,
245
+ agents=agents,
246
+ flow=flow,
247
+ tools=tools,
248
+ memory_budget=budget,
249
+ eval_suite=evals,
250
+ routing_policy=policy,
251
+ metadata={"template": next((k for k, v in _TEMPLATES.items() if v is template), "research")},
252
+ )
253
+
254
+ logger.info(f"MAS generated: {len(agents)} agents, {len(flow.nodes)} nodes, {len(evals)} evals")
255
+ return mas