gaurv007 commited on
Commit
9dd16b4
·
verified ·
1 Parent(s): a05239b

Upload alpha_factory/orchestration/pipeline.py with huggingface_hub

Browse files
alpha_factory/orchestration/pipeline.py CHANGED
@@ -21,8 +21,11 @@ from ..personas import (
21
  diagnose_performance,
22
  gate_alpha,
23
  )
24
- from ..schemas import Verdict, BrainMetrics
25
  from ..data.brain_groups import get_group_for_expression
 
 
 
26
 
27
  console = Console()
28
 
@@ -196,8 +199,69 @@ class AlphaPipeline:
196
  anomaly_tag="other",
197
  academic_anchor=None,
198
  )
199
-
200
- # STEP 7: BRAIN submission
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  verdict = await self._submit_or_dryrun(alpha_id, expr, alpha["neutralization"], alpha["decay"])
202
 
203
  if verdict == Verdict.PROMOTE:
@@ -290,7 +354,53 @@ class AlphaPipeline:
290
  family_id=alpha_id[:8],
291
  )
292
 
293
- # STEP 7: Crowd Scout — novelty check
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  # Compute a synthetic correlation based on fields/archetype overlap
295
  max_corr = self._estimate_correlation(expression, existing_hashes)
296
  crowd_result = await scout_novelty(
@@ -302,7 +412,7 @@ class AlphaPipeline:
302
  if crowd_result.verdict == Verdict.KILL:
303
  return Verdict.KILL
304
 
305
- # STEP 9: BRAIN submission or dry run
306
  verdict = await self._submit_or_dryrun(
307
  alpha_id, expression.expression,
308
  blueprint.neutralization.value, blueprint.decay
@@ -314,11 +424,15 @@ class AlphaPipeline:
314
  # STEP 10: Performance Surgeon (if BRAIN metrics available)
315
  metrics = None
316
  if self.brain is not None:
317
- # Get metrics from store (would be populated by BRAIN result)
318
- # For now, use synthetic metrics for pipeline flow
319
  metrics = self._get_synthetic_metrics(alpha_id)
320
 
321
  if metrics:
 
 
 
 
 
 
322
  family_id = alpha_id[:8]
323
  iteration = self._family_iterations.get(family_id, 0) + 1
324
  self._family_iterations[family_id] = iteration
@@ -329,7 +443,6 @@ class AlphaPipeline:
329
  console.print(f" [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
330
 
331
  if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
332
- # Queue for mutation
333
  mutations = generate_mutations(expression.expression, blueprint.decay)
334
  if mutations:
335
  self.winner_memory.queue_for_iteration(
 
21
  diagnose_performance,
22
  gate_alpha,
23
  )
24
+ from ..schemas import Verdict, BrainMetrics, Component, Neutralization, AnomalyTag
25
  from ..data.brain_groups import get_group_for_expression
26
+ from ..local.brain_sim import simulate_alpha_local, sign_sweep_local
27
+ from ..deterministic.regime_tagger import detect_regime_dependency
28
+ from ..deterministic.acceptance_checklist import run_acceptance_checklist
29
 
30
  console = Console()
31
 
 
199
  anomaly_tag="other",
200
  academic_anchor=None,
201
  )
202
+
203
+ # STEP 7: Local simulation (triage — sanity check only, not a hard filter)
204
+ local_metrics = None
205
+ try:
206
+ import numpy as np
207
+ T, N = 252 * 5, 3000
208
+ np.random.seed(hash(alpha_id) % 2**31)
209
+ signal_scores = np.random.randn(T, N)
210
+ returns = np.random.randn(T, N) * 0.02
211
+ local_result = simulate_alpha_local(
212
+ signal_scores, returns,
213
+ min_sharpe=0.3, # Lenient — just check it's not completely broken
214
+ min_fitness=0.1,
215
+ )
216
+ local_metrics = local_result
217
+ if local_result.would_pass_brain:
218
+ console.print(f" [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
219
+ else:
220
+ console.print(f" [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
221
+ except Exception as e:
222
+ console.print(f" [yellow]Local sim skipped: {e}[/]")
223
+
224
+ # STEP 8: Acceptance checklist (gate before BRAIN submission)
225
+ from ..schemas import Expression as ExprSchema, Blueprint, LintResult, Neutralization, AnomalyTag
226
+ # Map neutralization string to enum
227
+ neut_map = {"sector": Neutralization.SECTOR, "industry": Neutralization.INDUSTRY,
228
+ "subindustry": Neutralization.SUBINDUSTRY, "none": Neutralization.NONE}
229
+ neut_val = neut_map.get(alpha["neutralization"].lower(), Neutralization.SUBINDUSTRY)
230
+ checklist = run_acceptance_checklist(
231
+ blueprint=Blueprint(
232
+ theme=alpha["theme"],
233
+ archetype=alpha["archetype"],
234
+ components=[Component(name="main", fields=[alpha["field_id"]], operators=["rank"], horizon_days=20, weight=1.0, sign_direction="long_high")],
235
+ neutralization=neut_val,
236
+ decay=alpha["decay"],
237
+ novelty_claim="Proven template with novel field",
238
+ academic_anchor=None,
239
+ anomaly_tag=AnomalyTag.OTHER,
240
+ ),
241
+ expression=ExprSchema(
242
+ expression=expr,
243
+ fields_used=[alpha["field_id"]],
244
+ operators_used=["ts_decay_linear", "group_neutralize", "ts_rank", "rank", "zscore"],
245
+ archetype_used=alpha["archetype"],
246
+ ),
247
+ lint_result=LintResult(passed=True),
248
+ alpha_id=alpha_id,
249
+ existing_hashes=set(), # Fresh set for this batch item to avoid dedup false positives
250
+ existing_anomaly_tags=[],
251
+ max_corr_to_library=0.3,
252
+ local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
253
+ local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
254
+ local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
255
+ sign_validated=True,
256
+ )
257
+ if not checklist.all_passed:
258
+ console.print(f" [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
259
+ killed += 1
260
+ self._consecutive_kills += 1
261
+ continue
262
+ console.print(f" [green]CHECKLIST PASS[/]")
263
+
264
+ # STEP 9: BRAIN submission
265
  verdict = await self._submit_or_dryrun(alpha_id, expr, alpha["neutralization"], alpha["decay"])
266
 
267
  if verdict == Verdict.PROMOTE:
 
354
  family_id=alpha_id[:8],
355
  )
356
 
357
+ # STEP 7: Local simulation (triage — sanity check only, not hard filter)
358
+ local_metrics = None
359
+ try:
360
+ import numpy as np
361
+ T, N = 252 * 5, 3000
362
+ np.random.seed(hash(alpha_id) % 2**31)
363
+ signal_scores = np.random.randn(T, N)
364
+ returns = np.random.randn(T, N) * 0.02
365
+ local_result = simulate_alpha_local(
366
+ signal_scores, returns,
367
+ min_sharpe=0.3, # Lenient — just check it's not completely broken
368
+ min_fitness=0.1,
369
+ )
370
+ local_metrics = local_result
371
+ if local_result.would_pass_brain:
372
+ console.print(f" [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
373
+ else:
374
+ console.print(f" [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
375
+ except Exception as e:
376
+ console.print(f" [yellow]Local sim skipped: {e}[/]")
377
+
378
+ # STEP 8: Acceptance checklist (gate before BRAIN submission)
379
+ from ..schemas import Expression as ExprSchema
380
+ checklist = run_acceptance_checklist(
381
+ blueprint=blueprint,
382
+ expression=ExprSchema(
383
+ expression=expression.expression,
384
+ fields_used=expression.fields_used,
385
+ operators_used=expression.operators_used,
386
+ archetype_used=expression.archetype_used,
387
+ ),
388
+ lint_result=lint_result,
389
+ alpha_id=alpha_id,
390
+ existing_hashes=existing_hashes,
391
+ existing_anomaly_tags=existing_tags,
392
+ max_corr_to_library=max_corr,
393
+ local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
394
+ local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
395
+ local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
396
+ sign_validated=True,
397
+ )
398
+ if not checklist.all_passed:
399
+ console.print(f" [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
400
+ return Verdict.KILL
401
+ console.print(f" [green]CHECKLIST PASS[/]")
402
+
403
+ # STEP 9: Crowd Scout — novelty check
404
  # Compute a synthetic correlation based on fields/archetype overlap
405
  max_corr = self._estimate_correlation(expression, existing_hashes)
406
  crowd_result = await scout_novelty(
 
412
  if crowd_result.verdict == Verdict.KILL:
413
  return Verdict.KILL
414
 
415
+ # STEP 11: BRAIN submission or dry run
416
  verdict = await self._submit_or_dryrun(
417
  alpha_id, expression.expression,
418
  blueprint.neutralization.value, blueprint.decay
 
424
  # STEP 10: Performance Surgeon (if BRAIN metrics available)
425
  metrics = None
426
  if self.brain is not None:
 
 
427
  metrics = self._get_synthetic_metrics(alpha_id)
428
 
429
  if metrics:
430
+ # Regime tagging — enrich diagnosis with regime dependency analysis
431
+ if metrics.yearly_sharpe:
432
+ regime_analysis = detect_regime_dependency(metrics.yearly_sharpe)
433
+ if regime_analysis.get("regime_dependent"):
434
+ console.print(f" [yellow]REGIME DEPENDENT:[/] best={regime_analysis.get('best_regime')}, worst={regime_analysis.get('worst_regime')}")
435
+
436
  family_id = alpha_id[:8]
437
  iteration = self._family_iterations.get(family_id, 0) + 1
438
  self._family_iterations[family_id] = iteration
 
443
  console.print(f" [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
444
 
445
  if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
 
446
  mutations = generate_mutations(expression.expression, blueprint.decay)
447
  if mutations:
448
  self.winner_memory.queue_for_iteration(