camdog920 commited on
Commit
ece3f12
·
verified ·
1 Parent(s): 22026ba

Upload aether/evolution.py

Browse files
Files changed (1) hide show
  1. aether/evolution.py +382 -0
aether/evolution.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AETHER Evolution Engine.
3
+ Integrates AlphaEvolve-style code diff evolution,
4
+ GEA-style group experience sharing,
5
+ MAP-Elites diversity maintenance,
6
+ and HiMAC hierarchical co-evolution phases.
7
+ """
8
+
9
+ import numpy as np
10
+ import torch
11
+ from typing import List, Dict, Any, Callable, Optional, Tuple
12
+ import random
13
+ import copy
14
+ import logging
15
+ from dataclasses import dataclass, fields
16
+
17
+ logger = logging.getLogger("AETHER.Evolution")
18
+
19
+
20
+ @dataclass
21
+ class ArchitectureDNA:
22
+ """Genotype encoding for AETHER architecture variants."""
23
+ population_size: int
24
+ mutation_rate: float
25
+ learning_rate: float
26
+ macro_policy_dim: int
27
+ micro_policy_dim: int
28
+ num_agents: int
29
+ kg_embedding_dim: int
30
+ symbolic_bias: float = 0.5 # 0=neural, 1=symbolic
31
+
32
+ def to_vector(self) -> np.ndarray:
33
+ return np.array([
34
+ self.population_size,
35
+ self.mutation_rate,
36
+ self.learning_rate * 1e5, # scale for numerical stability
37
+ self.macro_policy_dim,
38
+ self.micro_policy_dim,
39
+ self.num_agents,
40
+ self.kg_embedding_dim,
41
+ self.symbolic_bias * 10,
42
+ ])
43
+
44
+ @classmethod
45
+ def from_vector(cls, vec: np.ndarray) -> "ArchitectureDNA":
46
+ return cls(
47
+ population_size=int(np.clip(vec[0], 2, 64)),
48
+ mutation_rate=float(np.clip(vec[1], 0.01, 0.5)),
49
+ learning_rate=float(np.clip(vec[2] / 1e5, 1e-6, 1e-3)),
50
+ macro_policy_dim=int(np.clip(vec[3], 64, 512)),
51
+ micro_policy_dim=int(np.clip(vec[4], 32, 256)),
52
+ num_agents=int(np.clip(vec[5], 1, 16)),
53
+ kg_embedding_dim=int(np.clip(vec[6], 32, 512)),
54
+ symbolic_bias=float(np.clip(vec[7] / 10, 0.0, 1.0)),
55
+ )
56
+
57
+ def to_config_dict(self) -> Dict[str, Any]:
58
+ return {
59
+ "population_size": self.population_size,
60
+ "mutation_rate": self.mutation_rate,
61
+ "learning_rate": self.learning_rate,
62
+ "macro_policy_dim": self.macro_policy_dim,
63
+ "micro_policy_dim": self.micro_policy_dim,
64
+ "num_agents": self.num_agents,
65
+ "kg_embedding_dim": self.kg_embedding_dim,
66
+ }
67
+
68
+
69
+ class MAPelitesArchive:
70
+ """
71
+ MAP-Elites archive for quality-diversity optimization.
72
+ Cells indexed by behavioral descriptors (capability dimensions).
73
+ """
74
+ def __init__(self, dims: Tuple[int, int] = (10, 10),
75
+ ranges: List[Tuple[float, float]] = None):
76
+ self.dims = dims
77
+ self.ranges = ranges or [(0, 1), (0, 1)]
78
+ self.archive: Dict[Tuple[int, int], Tuple[ArchitectureDNA, float]] = {}
79
+
80
+ def _get_index(self, measures: np.ndarray) -> Tuple[int, int]:
81
+ """Map continuous measures to discrete cell indices."""
82
+ indices = []
83
+ for m, (low, high), dim in zip(measures, self.ranges, self.dims):
84
+ normalized = (m - low) / (high - low + 1e-8)
85
+ idx = int(np.clip(normalized * dim, 0, dim - 1))
86
+ indices.append(idx)
87
+ return tuple(indices)
88
+
89
+ def add(self, dna: ArchitectureDNA, fitness: float,
90
+ measures: np.ndarray) -> bool:
91
+ """Add solution to archive. Returns True if improved cell."""
92
+ idx = self._get_index(measures)
93
+ if idx not in self.archive or self.archive[idx][1] < fitness:
94
+ self.archive[idx] = (dna, fitness)
95
+ return True
96
+ return False
97
+
98
+ def sample(self, n: int = 1) -> List[ArchitectureDNA]:
99
+ """Sample random solutions from archive."""
100
+ if not self.archive:
101
+ return []
102
+ items = list(self.archive.values())
103
+ selected = random.sample(items, min(n, len(items)))
104
+ return [dna for dna, _ in selected]
105
+
106
+ def get_best(self) -> Optional[Tuple[ArchitectureDNA, float]]:
107
+ """Get highest fitness solution."""
108
+ if not self.archive:
109
+ return None
110
+ return max(self.archive.values(), key=lambda x: x[1])
111
+
112
+ def stats(self) -> Dict[str, float]:
113
+ total_cells = self.dims[0] * self.dims[1]
114
+ return {
115
+ "coverage": len(self.archive) / total_cells,
116
+ "qd_score": sum(f for _, f in self.archive.values()),
117
+ "max_fitness": max((f for _, f in self.archive.values()), default=0),
118
+ }
119
+
120
+
121
+ class AetherEvolutionEngine:
122
+ """
123
+ Evolutionary engine combining:
124
+ - AlphaEvolve-style LLM-guided mutation (code diffs)
125
+ - GEA-style group experience sharing
126
+ - MAP-Elites quality-diversity
127
+ - HiMAC hierarchical co-evolution phases
128
+ """
129
+
130
+ def __init__(self, config):
131
+ self.config = config
132
+ self.archive = MAPelitesArchive(
133
+ dims=(10, 10),
134
+ ranges=[(0, 1), (0, 1)], # (symbolic_bias, task_complexity)
135
+ )
136
+ self.generation = 0
137
+ self.experience_log: List[Dict] = [] # GEA experience sharing
138
+
139
+ def generate_candidates(self, base_config,
140
+ population_size: int = 8) -> List[Any]:
141
+ """
142
+ Generate candidate architecture variants.
143
+ Uses mutation + archive seeding.
144
+ """
145
+ candidates = []
146
+
147
+ # Seed from archive if available
148
+ archive_seeds = self.archive.sample(n=min(2, len(self.archive.archive)))
149
+
150
+ # Always include base config
151
+ candidates.append(base_config)
152
+
153
+ # Mutate base config
154
+ for _ in range(population_size - len(archive_seeds) - 1):
155
+ mutated = self._mutate_config(base_config)
156
+ candidates.append(mutated)
157
+
158
+ # Add archive seeds (converted back to config format)
159
+ for dna in archive_seeds:
160
+ from .core import AetherConfig
161
+ cfg = AetherConfig(**dna.to_config_dict())
162
+ candidates.append(cfg)
163
+
164
+ return candidates
165
+
166
+ def _mutate_config(self, config) -> Any:
167
+ """Apply constrained mutation to config."""
168
+ from .core import AetherConfig
169
+
170
+ dna = ArchitectureDNA(
171
+ population_size=config.population_size,
172
+ mutation_rate=config.mutation_rate,
173
+ learning_rate=config.learning_rate,
174
+ macro_policy_dim=config.macro_policy_dim,
175
+ micro_policy_dim=config.micro_policy_dim,
176
+ num_agents=config.num_agents,
177
+ kg_embedding_dim=config.kg_embedding_dim,
178
+ symbolic_bias=getattr(config, 'symbolic_bias', 0.5),
179
+ )
180
+
181
+ vec = dna.to_vector()
182
+
183
+ # Gaussian mutation (AlphaEvolve-style: small perturbations)
184
+ noise = np.random.normal(0, config.mutation_rate, size=vec.shape)
185
+ mutated_vec = vec + noise * vec # proportional mutation
186
+
187
+ new_dna = ArchitectureDNA.from_vector(mutated_vec)
188
+
189
+ new_config = AetherConfig(**new_dna.to_config_dict())
190
+ new_config.generations = config.generations
191
+ new_config.sandbox_timeout = config.sandbox_timeout
192
+ new_config.max_architecture_depth = config.max_architecture_depth
193
+ new_config.enable_self_modification = config.enable_self_modification
194
+
195
+ return new_config
196
+
197
+ def select(self, candidates: List[Any], fitness_scores: List[float],
198
+ alpha_exploration: float = 0.3) -> List[Any]:
199
+ """
200
+ Select candidates using Performance-Novelty scoring (from GEA).
201
+ score(i) = performance_i * sqrt(novelty_i)
202
+ """
203
+ if not candidates or not fitness_scores:
204
+ return candidates[:2] if len(candidates) >= 2 else candidates
205
+
206
+ vectors = []
207
+ for cfg in candidates:
208
+ dna = ArchitectureDNA(
209
+ population_size=cfg.population_size,
210
+ mutation_rate=cfg.mutation_rate,
211
+ learning_rate=cfg.learning_rate,
212
+ macro_policy_dim=cfg.macro_policy_dim,
213
+ micro_policy_dim=cfg.micro_policy_dim,
214
+ num_agents=cfg.num_agents,
215
+ kg_embedding_dim=cfg.kg_embedding_dim,
216
+ )
217
+ vectors.append(dna.to_vector())
218
+
219
+ vectors = np.array(vectors)
220
+
221
+ f = np.array(fitness_scores)
222
+ f_norm = (f - f.min()) / (f.max() - f.min() + 1e-8)
223
+
224
+ k = min(4, len(candidates) - 1)
225
+ novelties = []
226
+ for i, v in enumerate(vectors):
227
+ distances = np.linalg.norm(vectors - v, axis=1)
228
+ distances[i] = np.inf # exclude self
229
+ knn = np.partition(distances, k)[:k]
230
+ novelty = np.mean(knn)
231
+ novelties.append(novelty)
232
+
233
+ novelties = np.array(novelties)
234
+ nov_norm = novelties / (novelties.max() + 1e-8)
235
+
236
+ scores = f_norm * np.sqrt(nov_norm + 1e-8)
237
+
238
+ n_select = max(1, len(candidates) // 2)
239
+ top_indices = np.argsort(scores)[-n_select:]
240
+
241
+ selected = [candidates[i] for i in top_indices]
242
+
243
+ logger.info(f"Selected {len(selected)} candidates. "
244
+ f"Score range: [{scores.min():.3f}, {scores.max():.3f}]")
245
+
246
+ return selected
247
+
248
+ def mutate(self, candidates: List[Any], mutation_rate: float = 0.15,
249
+ max_depth: int = 5) -> List[Any]:
250
+ """
251
+ Apply constrained mutations.
252
+ Enforces max architecture depth and safety constraints.
253
+ """
254
+ mutated = []
255
+ for cfg in candidates:
256
+ new_cfg = self._mutate_config(cfg)
257
+
258
+ if new_cfg.macro_policy_dim > 512:
259
+ new_cfg.macro_policy_dim = 512
260
+ if new_cfg.micro_policy_dim > new_cfg.macro_policy_dim:
261
+ new_cfg.micro_policy_dim = new_cfg.macro_policy_dim // 2
262
+ if new_cfg.num_agents > max_depth * 2:
263
+ new_cfg.num_agents = max_depth * 2
264
+
265
+ mutated.append(new_cfg)
266
+
267
+ return mutated
268
+
269
+ def co_evolve_phases(self, macro_policy, micro_policy,
270
+ macro_env_fn, micro_env_fn,
271
+ num_iterations: int = 10) -> Tuple[Any, Any]:
272
+ """
273
+ HiMAC-style iterative co-evolution.
274
+ Phase A: Macro-exploration (freeze micro)
275
+ Phase B: Micro-adaptation (freeze macro, train on best blueprint)
276
+ """
277
+ logger.info(f"Starting hierarchical co-evolution for {num_iterations} iterations")
278
+
279
+ best_blueprint = None
280
+ best_reward = -float('inf')
281
+
282
+ for iteration in range(num_iterations):
283
+ logger.info(f"Iteration {iteration}: Phase A - Macro Exploration")
284
+ blueprints = []
285
+ rewards = []
286
+
287
+ for _ in range(8):
288
+ blueprint = macro_policy.sample()
289
+ reward = macro_env_fn(blueprint, micro_policy)
290
+ blueprints.append(blueprint)
291
+ rewards.append(reward)
292
+
293
+ r = np.array(rewards)
294
+ advantages = (r - r.mean()) / (r.std() + 1e-8)
295
+
296
+ macro_policy.update(blueprints, advantages)
297
+
298
+ best_idx = int(np.argmax(rewards))
299
+ if rewards[best_idx] > best_reward:
300
+ best_reward = rewards[best_idx]
301
+ best_blueprint = blueprints[best_idx]
302
+
303
+ logger.info(f"Iteration {iteration}: Phase B - Micro Adaptation")
304
+ if best_blueprint is not None:
305
+ micro_policy.update(best_blueprint, micro_env_fn)
306
+
307
+ return macro_policy, micro_policy
308
+
309
+ def share_experience(self, agent_group: List[Any],
310
+ traces: List[Dict]) -> List[str]:
311
+ """
312
+ GEA-style experience sharing: agents reflect on group traces
313
+ and generate evolution directives.
314
+ """
315
+ aggregated = {
316
+ "patches_applied": [],
317
+ "predicted_patches": [],
318
+ "execution_logs": [],
319
+ "outcomes": [],
320
+ }
321
+
322
+ for trace in traces:
323
+ for key in aggregated:
324
+ if key in trace:
325
+ aggregated[key].append(trace[key])
326
+
327
+ directives = []
328
+ for agent in agent_group:
329
+ directive = self._generate_directive(agent, aggregated)
330
+ directives.append(directive)
331
+
332
+ self.experience_log.append({
333
+ "generation": self.generation,
334
+ "group_size": len(agent_group),
335
+ "traces": len(traces),
336
+ "directives": directives,
337
+ })
338
+
339
+ self.generation += 1
340
+ return directives
341
+
342
+ def _generate_directive(self, agent, aggregated: Dict) -> str:
343
+ success_rate = (np.mean(aggregated["outcomes"])
344
+ if aggregated["outcomes"] else 0.5)
345
+
346
+ if success_rate < 0.3:
347
+ return "Increase exploration diversity. Decrease learning rate. Add more agents."
348
+ elif success_rate > 0.8:
349
+ return "Consolidate current strategy. Increase exploitation. Optimize inference speed."
350
+ else:
351
+ return "Balance exploration and exploitation. Refine tool definitions."
352
+
353
+ def update_archive(self, candidates: List[Any],
354
+ fitness_scores: List[float]) -> None:
355
+ """Update MAP-Elites archive with evaluated candidates."""
356
+ for cfg, fitness in zip(candidates, fitness_scores):
357
+ if fitness == -float('inf'):
358
+ continue
359
+
360
+ symbolic_bias = getattr(cfg, 'symbolic_bias', 0.5)
361
+ measures = np.array([
362
+ symbolic_bias,
363
+ np.clip(fitness, 0, 1),
364
+ ])
365
+
366
+ dna = ArchitectureDNA(
367
+ population_size=cfg.population_size,
368
+ mutation_rate=cfg.mutation_rate,
369
+ learning_rate=cfg.learning_rate,
370
+ macro_policy_dim=cfg.macro_policy_dim,
371
+ micro_policy_dim=cfg.micro_policy_dim,
372
+ num_agents=cfg.num_agents,
373
+ kg_embedding_dim=cfg.kg_embedding_dim,
374
+ symbolic_bias=symbolic_bias,
375
+ )
376
+
377
+ improved = self.archive.add(dna, fitness, measures)
378
+ if improved:
379
+ logger.debug(f"Archive improved at cell with fitness {fitness:.4f}")
380
+
381
+ def get_diversity_stats(self) -> Dict[str, float]:
382
+ return self.archive.stats()