File size: 10,131 Bytes
be73428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36d2671
be73428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""
mas_generator.py β€” Multi-Agent System Generator.

Takes a use-case description and outputs a complete generated system:
  - Agent specs with roles
  - Flow (workflow graph)
  - Tools needed
  - Memory budget
  - Eval suite
  - Routing policy

Usage:
    from purpose_agent.mas_generator import generate
    
    mas = generate("Monitor GitHub repos for CVEs and alert the team")
    # β†’ GeneratedMAS with agents, flow, tools, evals, routing
    
    # Run it
    team = mas.to_team(model=backend)
    result = team.run("Check for new CVEs today")
"""
from __future__ import annotations

import logging
from dataclasses import dataclass, field
from typing import Any

from purpose_agent.routing import RoutingPolicy, TaskComplexity
from purpose_agent.memory_homeostasis import MemoryBudget

logger = logging.getLogger(__name__)


@dataclass
class GeneratedAgent:
    """Spec for a generated agent."""
    name: str
    role: str
    expertise: list[str] = field(default_factory=list)
    tools_needed: list[str] = field(default_factory=list)


@dataclass
class GeneratedFlow:
    """Spec for a generated workflow."""
    nodes: list[str] = field(default_factory=list)         # Node names in order
    edges: list[tuple[str, str]] = field(default_factory=list)  # (from, to)
    conditional: dict[str, dict[str, str]] = field(default_factory=dict)  # node β†’ {condition: target}


@dataclass
class GeneratedEval:
    """A generated evaluation case."""
    id: str
    purpose: str
    expected_behavior: str
    category: str = "general"


@dataclass
class GeneratedMAS:
    """Complete generated multi-agent system."""
    purpose: str
    agents: list[GeneratedAgent] = field(default_factory=list)
    flow: GeneratedFlow = field(default_factory=GeneratedFlow)
    tools: list[str] = field(default_factory=list)
    memory_budget: MemoryBudget = field(default_factory=MemoryBudget)
    eval_suite: list[GeneratedEval] = field(default_factory=list)
    routing_policy: RoutingPolicy = field(default_factory=RoutingPolicy)
    metadata: dict[str, Any] = field(default_factory=dict)

    def to_team(self, model=None):
        """Convert to a Purpose Agent Team for execution."""
        from purpose_agent.easy import Team
        agent_specs = [{"name": a.name, "role": a.role} for a in self.agents]
        return Team(purpose=self.purpose, agents=agent_specs, model=model)


# ═══════════════════════════════════════════════════════════════
# Templates β€” deterministic generation (no LLM needed)
# ═══════════════════════════════════════════════════════════════

_TEMPLATES = {
    "code": {
        "keywords": ["code", "program", "develop", "build", "software", "python", "debug", "function", "api", "script"],
        "agents": [
            GeneratedAgent("architect", "Design solution architecture and break into subtasks", ["design", "planning"]),
            GeneratedAgent("coder", "Write clean, tested code following best practices", ["coding", "python"], ["python_exec"]),
            GeneratedAgent("tester", "Review code for bugs, edge cases, and improvements", ["testing", "review"]),
        ],
        "flow_nodes": ["architect", "coder", "tester"],
        "flow_type": "sequential_with_review",
        "tools": ["python_exec", "read_file", "write_file"],
    },
    "security": {
        "keywords": ["security", "cve", "cves", "vulnerability", "audit", "penetration", "threat", "monitor", "alert"],
        "agents": [
            GeneratedAgent("scanner", "Scan and identify potential security issues", ["scanning", "detection"]),
            GeneratedAgent("analyst", "Analyze severity and impact of findings", ["analysis", "risk"]),
            GeneratedAgent("reporter", "Create clear security reports with recommendations", ["reporting"]),
            GeneratedAgent("security_critic", "Verify findings and check for false positives", ["verification"]),
        ],
        "flow_nodes": ["scanner", "analyst", "security_critic", "reporter"],
        "flow_type": "sequential_with_critic",
        "tools": ["read_file", "calculator"],
    },
    "research": {
        "keywords": ["research", "find", "search", "discover", "papers", "study", "investigate", "analyze"],
        "agents": [
            GeneratedAgent("researcher", "Find and gather relevant information", ["search", "gathering"]),
            GeneratedAgent("verifier", "Cross-check facts and verify sources", ["verification", "fact-check"]),
            GeneratedAgent("synthesizer", "Combine findings into coherent summary", ["synthesis", "writing"]),
        ],
        "flow_nodes": ["researcher", "verifier", "synthesizer"],
        "flow_type": "sequential",
        "tools": ["calculator"],
    },
    "data": {
        "keywords": ["data", "csv", "excel", "database", "analytics", "chart", "statistics", "report"],
        "agents": [
            GeneratedAgent("loader", "Load and validate data from sources", ["data_loading"], ["read_file"]),
            GeneratedAgent("analyst", "Analyze data, compute statistics, find patterns", ["analysis"], ["python_exec", "calculator"]),
            GeneratedAgent("validator", "Validate results and check for errors", ["validation"]),
            GeneratedAgent("reporter", "Present findings in clear format", ["reporting"]),
        ],
        "flow_nodes": ["loader", "analyst", "validator", "reporter"],
        "flow_type": "sequential",
        "tools": ["python_exec", "read_file", "calculator"],
    },
    "operations": {
        "keywords": ["deploy", "monitor", "operate", "maintain", "alert", "incident", "pipeline"],
        "agents": [
            GeneratedAgent("planner", "Plan operations and identify risks", ["planning"]),
            GeneratedAgent("executor", "Execute planned operations carefully", ["execution"], ["python_exec"]),
            GeneratedAgent("auditor", "Verify operations completed correctly", ["auditing"]),
        ],
        "flow_nodes": ["planner", "executor", "auditor"],
        "flow_type": "sequential_with_approval",
        "tools": ["python_exec", "read_file"],
    },
}


def _match_template(use_case: str) -> dict:
    """Match use case to best template by keyword overlap."""
    words = set(use_case.lower().split())
    best_template = None
    best_score = 0

    for name, template in _TEMPLATES.items():
        score = len(words & set(template["keywords"]))
        if score > best_score:
            best_score = score
            best_template = template

    # Default to research if no match
    return best_template or _TEMPLATES["research"]


def _generate_flow(template: dict) -> GeneratedFlow:
    """Generate flow graph from template."""
    nodes = template["flow_nodes"]
    flow = GeneratedFlow(nodes=list(nodes))

    # Sequential edges
    for i in range(len(nodes) - 1):
        flow.edges.append((nodes[i], nodes[i + 1]))

    # Add review loop for certain types
    if template.get("flow_type") == "sequential_with_review":
        # Last node can loop back to second node on failure
        flow.conditional[nodes[-1]] = {"pass": "__END__", "fail": nodes[1]}
    elif template.get("flow_type") == "sequential_with_critic":
        # Critic can reject and loop back
        critic = [n for n in nodes if "critic" in n]
        if critic:
            flow.conditional[critic[0]] = {"approve": nodes[-1], "reject": nodes[0]}

    return flow


def _generate_evals(use_case: str, agents: list[GeneratedAgent]) -> list[GeneratedEval]:
    """Generate evaluation cases for the system."""
    evals = [
        GeneratedEval(
            id="eval_runs",
            purpose=f"System can process: {use_case}",
            expected_behavior="Completes without error",
            category="basic",
        ),
        GeneratedEval(
            id="eval_agents_contribute",
            purpose="Each agent contributes to the output",
            expected_behavior="All agents produce non-empty output",
            category="coverage",
        ),
    ]
    # Add per-agent evals
    for agent in agents[:3]:
        evals.append(GeneratedEval(
            id=f"eval_{agent.name}",
            purpose=f"{agent.name} performs its role: {agent.role}",
            expected_behavior=f"{agent.name} produces relevant output for its expertise",
            category="agent_role",
        ))
    return evals


def generate(use_case: str, constraints: dict[str, Any] | None = None) -> GeneratedMAS:
    """
    Generate a complete multi-agent system from a use-case description.
    
    Uses deterministic templates (no LLM required).
    
    Args:
        use_case: Plain English description of what the system should do
        constraints: Optional overrides (max_agents, prefer_local, etc.)
        
    Returns:
        GeneratedMAS with agents, flow, tools, evals, and routing policy
    """
    constraints = constraints or {}
    template = _match_template(use_case)

    agents = template["agents"]
    flow = _generate_flow(template)
    tools = template["tools"]
    evals = _generate_evals(use_case, agents)

    # Routing policy based on template complexity
    n_agents = len(agents)
    policy = RoutingPolicy(
        prefer_local=constraints.get("prefer_local", True),
        max_cost_per_task_usd=constraints.get("max_cost", 0.10),
    )

    # Memory budget scaled to system size
    budget = MemoryBudget(
        max_active_cards=min(512, n_agents * 128),
        max_injected_tokens=min(500, n_agents * 150),
    )

    mas = GeneratedMAS(
        purpose=use_case,
        agents=agents,
        flow=flow,
        tools=tools,
        memory_budget=budget,
        eval_suite=evals,
        routing_policy=policy,
        metadata={"template": next((k for k, v in _TEMPLATES.items() if v is template), "research")},
    )

    logger.info(f"MAS generated: {len(agents)} agents, {len(flow.nodes)} nodes, {len(evals)} evals")
    return mas