gaurv007 commited on
Commit
a7a3c87
·
verified ·
1 Parent(s): 1991baa

feat: config v2 — add dataset tier priorities, field selection strategy, and novel group preference

Browse files
Files changed (1) hide show
  1. alpha_factory/config.py +38 -15
alpha_factory/config.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Configuration — all settings in one place.
3
- Environment variables override defaults.
4
  """
5
  from pydantic import BaseModel, Field
6
  from pathlib import Path
@@ -8,12 +8,11 @@ import os
8
 
9
 
10
  class LLMConfig(BaseModel):
11
- """LLM serving configuration."""
12
  microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
13
  tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
14
  mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
15
  bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
16
- base_url: str = Field(default="http://localhost:8000/v1", description="vLLM / Ollama endpoint")
17
  api_key: str = "dummy"
18
  temperature_generation: float = 0.7
19
  temperature_compilation: float = 0.1
@@ -22,7 +21,6 @@ class LLMConfig(BaseModel):
22
 
23
 
24
  class BrainConfig(BaseModel):
25
- """WorldQuant BRAIN API configuration."""
26
  api_url: str = "https://api.worldquantbrain.com"
27
  region: str = "USA"
28
  universe: str = "TOP3000"
@@ -34,8 +32,25 @@ class BrainConfig(BaseModel):
34
  submit_interval_sec: float = 15.0
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  class KillSwitches(BaseModel):
38
- """Hard circuit breakers — non-negotiable."""
39
  daily_brain_submissions_max: int = 200
40
  consecutive_lint_fail_max: int = 10
41
  consecutive_kill_verdict_max: int = 30
@@ -44,7 +59,6 @@ class KillSwitches(BaseModel):
44
 
45
 
46
  class FitnessWeights(BaseModel):
47
- """Fitness function coefficients. Calibrate with hand-rankings after 20+ alphas."""
48
  sharpe_os: float = 1.0
49
  is_os_gap_penalty: float = 0.5
50
  worst_year_penalty: float = 1.0
@@ -54,10 +68,22 @@ class FitnessWeights(BaseModel):
54
  drawdown_penalty: float = 0.1
55
  drawdown_threshold: float = 0.05
56
  novelty_bonus: float = 0.4
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
 
59
  class Paths(BaseModel):
60
- """All filesystem paths."""
61
  root: Path = Path(os.getenv("AF_ROOT", "."))
62
  data: Path = Field(default=None)
63
  factor_store: Path = Field(default=None)
@@ -73,22 +99,22 @@ class Paths(BaseModel):
73
  self.prompts = self.root / "prompts"
74
  if self.logs is None:
75
  self.logs = self.root / "logs"
76
- # Ensure directories exist
77
  for p in [self.data, self.factor_store, self.factor_store / "alphas",
78
  self.prompts, self.prompts / "templates", self.logs]:
79
  p.mkdir(parents=True, exist_ok=True)
80
 
81
 
82
  class Config(BaseModel):
83
- """Master configuration."""
84
  llm: LLMConfig = LLMConfig()
85
  brain: BrainConfig = BrainConfig()
 
86
  kill: KillSwitches = KillSwitches()
87
  fitness: FitnessWeights = FitnessWeights()
 
88
  paths: Paths = Paths()
89
 
90
  # Pipeline settings
91
- batch_size: int = 10 # alphas per batch
92
  max_iterations_per_family: int = 3
93
  correlation_threshold: float = 0.65
94
  min_sharpe_local_sim: float = 1.0
@@ -96,7 +122,4 @@ class Config(BaseModel):
96
 
97
 
98
  def load_config() -> Config:
99
- """Load config with env var overrides."""
100
- return Config(
101
- paths=Paths(root=Path(os.getenv("AF_ROOT", ".")))
102
- )
 
1
  """
2
+ Configuration v2 — all settings in one place.
3
+ Now includes dataset-tier priorities and field selection strategy.
4
  """
5
  from pydantic import BaseModel, Field
6
  from pathlib import Path
 
8
 
9
 
10
  class LLMConfig(BaseModel):
 
11
  microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
12
  tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
13
  mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
14
  bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
15
+ base_url: str = Field(default="http://localhost:8000/v1")
16
  api_key: str = "dummy"
17
  temperature_generation: float = 0.7
18
  temperature_compilation: float = 0.1
 
21
 
22
 
23
  class BrainConfig(BaseModel):
 
24
  api_url: str = "https://api.worldquantbrain.com"
25
  region: str = "USA"
26
  universe: str = "TOP3000"
 
32
  submit_interval_sec: float = 15.0
33
 
34
 
35
+ class FieldSelectionStrategy(BaseModel):
36
+ """Strategy for picking fields — key lever for alpha novelty."""
37
+ # Priority order for dataset tiers (lower index = try first)
38
+ tier_priority: list[str] = ["tier1", "tier2", "tier3"]
39
+ # Maximum alpha count for a field to be considered "novel"
40
+ max_alpha_count: int = 50
41
+ # Fields with AC=0 get this multiplier in selection weight
42
+ goldmine_weight_multiplier: float = 10.0
43
+ # Minimum coverage threshold
44
+ min_coverage: float = 0.55
45
+ # Prefer novel group keys for neutralization
46
+ prefer_novel_groups: bool = True
47
+ # Maximum group AC to qualify as "novel"
48
+ max_group_ac: int = 30
49
+ # How many fields to try per batch before giving up
50
+ fields_per_batch: int = 5
51
+
52
+
53
  class KillSwitches(BaseModel):
 
54
  daily_brain_submissions_max: int = 200
55
  consecutive_lint_fail_max: int = 10
56
  consecutive_kill_verdict_max: int = 30
 
59
 
60
 
61
  class FitnessWeights(BaseModel):
 
62
  sharpe_os: float = 1.0
63
  is_os_gap_penalty: float = 0.5
64
  worst_year_penalty: float = 1.0
 
68
  drawdown_penalty: float = 0.1
69
  drawdown_threshold: float = 0.05
70
  novelty_bonus: float = 0.4
71
+ # New: bonus for using AC=0 fields
72
+ goldmine_field_bonus: float = 0.3
73
+ # New: bonus for using novel group keys
74
+ novel_group_bonus: float = 0.2
75
+
76
+
77
+ class SubmissionThresholds(BaseModel):
78
+ """BRAIN submission pass/fail cutoffs from IQC 2026."""
79
+ min_sharpe: float = 1.25
80
+ min_fitness: float = 1.0
81
+ max_turnover: float = 0.70
82
+ min_sub_universe_sharpe: float = 0.78
83
+ max_self_correlation: float = 0.65
84
 
85
 
86
  class Paths(BaseModel):
 
87
  root: Path = Path(os.getenv("AF_ROOT", "."))
88
  data: Path = Field(default=None)
89
  factor_store: Path = Field(default=None)
 
99
  self.prompts = self.root / "prompts"
100
  if self.logs is None:
101
  self.logs = self.root / "logs"
 
102
  for p in [self.data, self.factor_store, self.factor_store / "alphas",
103
  self.prompts, self.prompts / "templates", self.logs]:
104
  p.mkdir(parents=True, exist_ok=True)
105
 
106
 
107
  class Config(BaseModel):
 
108
  llm: LLMConfig = LLMConfig()
109
  brain: BrainConfig = BrainConfig()
110
+ field_strategy: FieldSelectionStrategy = FieldSelectionStrategy()
111
  kill: KillSwitches = KillSwitches()
112
  fitness: FitnessWeights = FitnessWeights()
113
+ submission: SubmissionThresholds = SubmissionThresholds()
114
  paths: Paths = Paths()
115
 
116
  # Pipeline settings
117
+ batch_size: int = 10
118
  max_iterations_per_family: int = 3
119
  correlation_threshold: float = 0.65
120
  min_sharpe_local_sim: float = 1.0
 
122
 
123
 
124
  def load_config() -> Config:
125
+ return Config(paths=Paths(root=Path(os.getenv("AF_ROOT", "."))))