gaurv007 commited on
Commit
85f8d4c
·
verified ·
1 Parent(s): bc1b21d

fix: run.py v3 — add --proven flag as primary mode, fix sign logic in compiler, add retry to llm_client"

Browse files
Files changed (1) hide show
  1. alpha_factory/run.py +82 -39
alpha_factory/run.py CHANGED
@@ -1,81 +1,124 @@
1
  """
2
  Alpha Factory — Entry Point
3
- Run: python -m alpha_factory.run [--dry-run] [--batch-size N] [--interactive]
4
  """
5
  import os
6
  import asyncio
7
  import argparse
8
 
9
- # Load .env file FIRST before anything else reads env vars
10
  try:
11
  from dotenv import load_dotenv
12
- load_dotenv() # Reads .env from current directory
13
  except ImportError:
14
- pass # python-dotenv not installed; rely on system env vars
15
 
16
  from rich.console import Console
17
  from .config import load_config
18
- from .infra import ModelManager, interactive_model_select, LLMClient
19
  from .orchestration import AlphaPipeline
20
 
21
  console = Console()
22
 
23
 
24
  async def setup_models(interactive: bool = False, hf_token: str = None) -> ModelManager:
25
- """Discover models and optionally let user pick interactively."""
26
  manager = ModelManager(hf_token=hf_token)
27
-
28
- console.print("\n[bold]🔍 Discovering available models...[/]")
29
  await manager.discover_all()
30
-
31
- if interactive:
32
- selections = interactive_model_select(manager)
33
- for tier, model in selections.items():
34
- manager.select_model(tier, model)
35
- else:
36
- manager.auto_assign_defaults()
37
-
38
  manager.print_status()
39
  return manager
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def main():
43
- parser = argparse.ArgumentParser(description="Alpha Factory — LLM-Driven Alpha Generation Pipeline")
44
- parser.add_argument("--dry-run", action="store_true", help="Run without BRAIN submissions")
45
- parser.add_argument("--batch-size", type=int, default=10, help="Number of candidates per batch")
46
- parser.add_argument("--interactive", action="store_true", help="Interactively select models")
47
- parser.add_argument("--hf-token", type=str, default=None, help="HuggingFace API token (or set HF_TOKEN env)")
48
- parser.add_argument("--ollama-url", type=str, default="http://localhost:11434", help="Ollama server URL")
 
49
  args = parser.parse_args()
50
 
51
  config = load_config()
52
  config.batch_size = args.batch_size
53
-
54
- # Resolve HF token: CLI arg > env var (loaded from .env)
55
  hf_token = args.hf_token or os.getenv("HF_TOKEN")
56
 
57
  console.print(f"""
58
- [bold green]━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[/]
59
- [bold green] ALPHA FACTORY v0.1.0[/]
60
- [bold green] Open-Source LLM-Driven Pipeline for WorldQuant BRAIN[/]
61
- [bold green]━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[/]
62
 
63
- Mode: {"DRY RUN (no BRAIN submissions)" if args.dry_run else "LIVE"}
64
  Batch size: {args.batch_size}
65
- Ollama: {args.ollama_url}
66
- HF Token: {"✓ Set" if hf_token else "✗ Not set (cloud models unavailable)"}
67
  """)
68
 
69
- # Discover and select models
70
- manager = asyncio.run(setup_models(
71
- interactive=args.interactive,
72
- hf_token=hf_token,
73
- ))
74
 
75
- # Update LLM config with Ollama URL
 
76
  config.llm.base_url = f"{args.ollama_url}/v1"
77
 
78
- # Create pipeline with model manager
79
  pipeline = AlphaPipeline(config)
80
  pipeline.llm = LLMClient(config.llm, model_manager=manager)
81
 
@@ -83,7 +126,7 @@ def main():
83
  result = asyncio.run(pipeline.run_batch(args.batch_size))
84
  console.print(f"\n[bold]Final: {result}[/]")
85
  except KeyboardInterrupt:
86
- console.print("\n[yellow]Interrupted by user[/]")
87
  finally:
88
  pipeline.close()
89
 
 
1
  """
2
  Alpha Factory — Entry Point
3
+ Run: python -m alpha_factory.run [--dry-run] [--proven] [--batch-size N]
4
  """
5
  import os
6
  import asyncio
7
  import argparse
8
 
 
9
  try:
10
  from dotenv import load_dotenv
11
+ load_dotenv()
12
  except ImportError:
13
+ pass
14
 
15
  from rich.console import Console
16
  from .config import load_config
17
+ from .infra import ModelManager, LLMClient
18
  from .orchestration import AlphaPipeline
19
 
20
  console = Console()
21
 
22
 
23
  async def setup_models(interactive: bool = False, hf_token: str = None) -> ModelManager:
 
24
  manager = ModelManager(hf_token=hf_token)
25
+ console.print("\n[bold]Discovering available models...[/]")
 
26
  await manager.discover_all()
27
+ manager.auto_assign_defaults()
 
 
 
 
 
 
 
28
  manager.print_status()
29
  return manager
30
 
31
 
32
+ def run_proven_mode(batch_size: int):
33
+ """Generate alphas using proven templates only — no LLM needed."""
34
+ from .deterministic.proven_templates import generate_batch_from_proven_templates
35
+ from .deterministic.lint import lint
36
+ from .infra import FactorStore
37
+ from pathlib import Path
38
+
39
+ console.print(f"\n[bold green]--- Proven Template Mode (no LLM) ---[/]")
40
+ console.print(f"Generating {batch_size} alphas from Alpha 15/6 structures\n")
41
+
42
+ store = FactorStore(Path("factor_store/alphas.duckdb"))
43
+ existing_hashes = store.get_expression_hashes()
44
+
45
+ batch = generate_batch_from_proven_templates(count=batch_size)
46
+ passed = 0
47
+ stored = 0
48
+
49
+ for i, alpha in enumerate(batch, 1):
50
+ expr = alpha["expression"]
51
+ field = alpha["field_id"]
52
+ template = alpha["template"]
53
+ ac = alpha["field_ac"]
54
+ group = alpha["group_key"]
55
+
56
+ result = lint(expr)
57
+ status = "[green]PASS[/]" if result.passed else "[red]FAIL[/]"
58
+ console.print(f" {i}. {status} [{template}] field={field} (AC={ac}) group={group}")
59
+
60
+ if result.passed:
61
+ passed += 1
62
+ # Store
63
+ from .deterministic.lint import quick_dedup_hash
64
+ alpha_id = quick_dedup_hash(expr, alpha["neutralization"], alpha["decay"])
65
+ if alpha_id not in existing_hashes:
66
+ store.insert_alpha(
67
+ alpha_id=alpha_id,
68
+ expression=expr,
69
+ neutralization=alpha["neutralization"],
70
+ decay=alpha["decay"],
71
+ fields_used=[alpha["field_id"]],
72
+ operators_used=["ts_decay_linear", "group_neutralize", "rank", "zscore", "ts_rank"],
73
+ archetype=alpha["archetype"],
74
+ theme=alpha["theme"],
75
+ anomaly_tag="value",
76
+ academic_anchor=None,
77
+ )
78
+ existing_hashes.add(alpha_id)
79
+ stored += 1
80
+ console.print(f" {expr[:90]}...")
81
+ else:
82
+ console.print(f" [yellow]DEDUP: already exists[/]")
83
+ else:
84
+ console.print(f" [red]Errors: {result.errors}[/]")
85
+
86
+ console.print(f"\n[bold]Results: {passed}/{len(batch)} passed lint, {stored} stored[/]")
87
+ console.print("[dim]Copy expressions from UI or DuckDB. Paste into BRAIN with settings: USA/TOP3000/D1/Decay=5[/]")
88
+ store.close()
89
+
90
+
91
  def main():
92
+ parser = argparse.ArgumentParser(description="Alpha Factory")
93
+ parser.add_argument("--dry-run", action="store_true", help="No BRAIN submissions")
94
+ parser.add_argument("--proven", action="store_true", help="Use proven templates only (no LLM, recommended)")
95
+ parser.add_argument("--batch-size", type=int, default=10, help="Candidates per batch")
96
+ parser.add_argument("--interactive", action="store_true", help="Select models interactively")
97
+ parser.add_argument("--hf-token", type=str, default=None, help="HuggingFace API token")
98
+ parser.add_argument("--ollama-url", type=str, default="http://localhost:11434", help="Ollama URL")
99
  args = parser.parse_args()
100
 
101
  config = load_config()
102
  config.batch_size = args.batch_size
 
 
103
  hf_token = args.hf_token or os.getenv("HF_TOKEN")
104
 
105
  console.print(f"""
106
+ [bold green]Alpha Factory v0.2.0[/]
 
 
 
107
 
108
+ Mode: {"PROVEN TEMPLATES" if args.proven else "LLM-ASSISTED (dry-run)" if args.dry_run else "LLM-ASSISTED"}
109
  Batch size: {args.batch_size}
110
+ HF Token: {"Set" if hf_token else "Not set"}
 
111
  """)
112
 
113
+ # PROVEN MODE no LLM needed, guaranteed valid expressions
114
+ if args.proven:
115
+ run_proven_mode(args.batch_size)
116
+ return
 
117
 
118
+ # LLM MODE requires HF token or Ollama
119
+ manager = asyncio.run(setup_models(interactive=args.interactive, hf_token=hf_token))
120
  config.llm.base_url = f"{args.ollama_url}/v1"
121
 
 
122
  pipeline = AlphaPipeline(config)
123
  pipeline.llm = LLMClient(config.llm, model_manager=manager)
124
 
 
126
  result = asyncio.run(pipeline.run_batch(args.batch_size))
127
  console.print(f"\n[bold]Final: {result}[/]")
128
  except KeyboardInterrupt:
129
+ console.print("\n[yellow]Interrupted[/]")
130
  finally:
131
  pipeline.close()
132