feat: config v2 — add dataset tier priorities, field selection strategy, and novel group preference
Browse files- alpha_factory/config.py +38 -15
alpha_factory/config.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
-
Configuration — all settings in one place.
|
| 3 |
-
|
| 4 |
"""
|
| 5 |
from pydantic import BaseModel, Field
|
| 6 |
from pathlib import Path
|
|
@@ -8,12 +8,11 @@ import os
|
|
| 8 |
|
| 9 |
|
| 10 |
class LLMConfig(BaseModel):
|
| 11 |
-
"""LLM serving configuration."""
|
| 12 |
microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 13 |
tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
|
| 14 |
mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
|
| 15 |
bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
|
| 16 |
-
base_url: str = Field(default="http://localhost:8000/v1"
|
| 17 |
api_key: str = "dummy"
|
| 18 |
temperature_generation: float = 0.7
|
| 19 |
temperature_compilation: float = 0.1
|
|
@@ -22,7 +21,6 @@ class LLMConfig(BaseModel):
|
|
| 22 |
|
| 23 |
|
| 24 |
class BrainConfig(BaseModel):
|
| 25 |
-
"""WorldQuant BRAIN API configuration."""
|
| 26 |
api_url: str = "https://api.worldquantbrain.com"
|
| 27 |
region: str = "USA"
|
| 28 |
universe: str = "TOP3000"
|
|
@@ -34,8 +32,25 @@ class BrainConfig(BaseModel):
|
|
| 34 |
submit_interval_sec: float = 15.0
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
class KillSwitches(BaseModel):
|
| 38 |
-
"""Hard circuit breakers — non-negotiable."""
|
| 39 |
daily_brain_submissions_max: int = 200
|
| 40 |
consecutive_lint_fail_max: int = 10
|
| 41 |
consecutive_kill_verdict_max: int = 30
|
|
@@ -44,7 +59,6 @@ class KillSwitches(BaseModel):
|
|
| 44 |
|
| 45 |
|
| 46 |
class FitnessWeights(BaseModel):
|
| 47 |
-
"""Fitness function coefficients. Calibrate with hand-rankings after 20+ alphas."""
|
| 48 |
sharpe_os: float = 1.0
|
| 49 |
is_os_gap_penalty: float = 0.5
|
| 50 |
worst_year_penalty: float = 1.0
|
|
@@ -54,10 +68,22 @@ class FitnessWeights(BaseModel):
|
|
| 54 |
drawdown_penalty: float = 0.1
|
| 55 |
drawdown_threshold: float = 0.05
|
| 56 |
novelty_bonus: float = 0.4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
class Paths(BaseModel):
|
| 60 |
-
"""All filesystem paths."""
|
| 61 |
root: Path = Path(os.getenv("AF_ROOT", "."))
|
| 62 |
data: Path = Field(default=None)
|
| 63 |
factor_store: Path = Field(default=None)
|
|
@@ -73,22 +99,22 @@ class Paths(BaseModel):
|
|
| 73 |
self.prompts = self.root / "prompts"
|
| 74 |
if self.logs is None:
|
| 75 |
self.logs = self.root / "logs"
|
| 76 |
-
# Ensure directories exist
|
| 77 |
for p in [self.data, self.factor_store, self.factor_store / "alphas",
|
| 78 |
self.prompts, self.prompts / "templates", self.logs]:
|
| 79 |
p.mkdir(parents=True, exist_ok=True)
|
| 80 |
|
| 81 |
|
| 82 |
class Config(BaseModel):
|
| 83 |
-
"""Master configuration."""
|
| 84 |
llm: LLMConfig = LLMConfig()
|
| 85 |
brain: BrainConfig = BrainConfig()
|
|
|
|
| 86 |
kill: KillSwitches = KillSwitches()
|
| 87 |
fitness: FitnessWeights = FitnessWeights()
|
|
|
|
| 88 |
paths: Paths = Paths()
|
| 89 |
|
| 90 |
# Pipeline settings
|
| 91 |
-
batch_size: int = 10
|
| 92 |
max_iterations_per_family: int = 3
|
| 93 |
correlation_threshold: float = 0.65
|
| 94 |
min_sharpe_local_sim: float = 1.0
|
|
@@ -96,7 +122,4 @@ class Config(BaseModel):
|
|
| 96 |
|
| 97 |
|
| 98 |
def load_config() -> Config:
|
| 99 |
-
|
| 100 |
-
return Config(
|
| 101 |
-
paths=Paths(root=Path(os.getenv("AF_ROOT", ".")))
|
| 102 |
-
)
|
|
|
|
| 1 |
"""
|
| 2 |
+
Configuration v2 — all settings in one place.
|
| 3 |
+
Now includes dataset-tier priorities and field selection strategy.
|
| 4 |
"""
|
| 5 |
from pydantic import BaseModel, Field
|
| 6 |
from pathlib import Path
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class LLMConfig(BaseModel):
|
|
|
|
| 11 |
microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 12 |
tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
|
| 13 |
mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
|
| 14 |
bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
|
| 15 |
+
base_url: str = Field(default="http://localhost:8000/v1")
|
| 16 |
api_key: str = "dummy"
|
| 17 |
temperature_generation: float = 0.7
|
| 18 |
temperature_compilation: float = 0.1
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class BrainConfig(BaseModel):
|
|
|
|
| 24 |
api_url: str = "https://api.worldquantbrain.com"
|
| 25 |
region: str = "USA"
|
| 26 |
universe: str = "TOP3000"
|
|
|
|
| 32 |
submit_interval_sec: float = 15.0
|
| 33 |
|
| 34 |
|
| 35 |
+
class FieldSelectionStrategy(BaseModel):
|
| 36 |
+
"""Strategy for picking fields — key lever for alpha novelty."""
|
| 37 |
+
# Priority order for dataset tiers (lower index = try first)
|
| 38 |
+
tier_priority: list[str] = ["tier1", "tier2", "tier3"]
|
| 39 |
+
# Maximum alpha count for a field to be considered "novel"
|
| 40 |
+
max_alpha_count: int = 50
|
| 41 |
+
# Fields with AC=0 get this multiplier in selection weight
|
| 42 |
+
goldmine_weight_multiplier: float = 10.0
|
| 43 |
+
# Minimum coverage threshold
|
| 44 |
+
min_coverage: float = 0.55
|
| 45 |
+
# Prefer novel group keys for neutralization
|
| 46 |
+
prefer_novel_groups: bool = True
|
| 47 |
+
# Maximum group AC to qualify as "novel"
|
| 48 |
+
max_group_ac: int = 30
|
| 49 |
+
# How many fields to try per batch before giving up
|
| 50 |
+
fields_per_batch: int = 5
|
| 51 |
+
|
| 52 |
+
|
| 53 |
class KillSwitches(BaseModel):
|
|
|
|
| 54 |
daily_brain_submissions_max: int = 200
|
| 55 |
consecutive_lint_fail_max: int = 10
|
| 56 |
consecutive_kill_verdict_max: int = 30
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
class FitnessWeights(BaseModel):
|
|
|
|
| 62 |
sharpe_os: float = 1.0
|
| 63 |
is_os_gap_penalty: float = 0.5
|
| 64 |
worst_year_penalty: float = 1.0
|
|
|
|
| 68 |
drawdown_penalty: float = 0.1
|
| 69 |
drawdown_threshold: float = 0.05
|
| 70 |
novelty_bonus: float = 0.4
|
| 71 |
+
# New: bonus for using AC=0 fields
|
| 72 |
+
goldmine_field_bonus: float = 0.3
|
| 73 |
+
# New: bonus for using novel group keys
|
| 74 |
+
novel_group_bonus: float = 0.2
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class SubmissionThresholds(BaseModel):
|
| 78 |
+
"""BRAIN submission pass/fail cutoffs from IQC 2026."""
|
| 79 |
+
min_sharpe: float = 1.25
|
| 80 |
+
min_fitness: float = 1.0
|
| 81 |
+
max_turnover: float = 0.70
|
| 82 |
+
min_sub_universe_sharpe: float = 0.78
|
| 83 |
+
max_self_correlation: float = 0.65
|
| 84 |
|
| 85 |
|
| 86 |
class Paths(BaseModel):
|
|
|
|
| 87 |
root: Path = Path(os.getenv("AF_ROOT", "."))
|
| 88 |
data: Path = Field(default=None)
|
| 89 |
factor_store: Path = Field(default=None)
|
|
|
|
| 99 |
self.prompts = self.root / "prompts"
|
| 100 |
if self.logs is None:
|
| 101 |
self.logs = self.root / "logs"
|
|
|
|
| 102 |
for p in [self.data, self.factor_store, self.factor_store / "alphas",
|
| 103 |
self.prompts, self.prompts / "templates", self.logs]:
|
| 104 |
p.mkdir(parents=True, exist_ok=True)
|
| 105 |
|
| 106 |
|
| 107 |
class Config(BaseModel):
|
|
|
|
| 108 |
llm: LLMConfig = LLMConfig()
|
| 109 |
brain: BrainConfig = BrainConfig()
|
| 110 |
+
field_strategy: FieldSelectionStrategy = FieldSelectionStrategy()
|
| 111 |
kill: KillSwitches = KillSwitches()
|
| 112 |
fitness: FitnessWeights = FitnessWeights()
|
| 113 |
+
submission: SubmissionThresholds = SubmissionThresholds()
|
| 114 |
paths: Paths = Paths()
|
| 115 |
|
| 116 |
# Pipeline settings
|
| 117 |
+
batch_size: int = 10
|
| 118 |
max_iterations_per_family: int = 3
|
| 119 |
correlation_threshold: float = 0.65
|
| 120 |
min_sharpe_local_sim: float = 1.0
|
|
|
|
| 122 |
|
| 123 |
|
| 124 |
def load_config() -> Config:
|
| 125 |
+
return Config(paths=Paths(root=Path(os.getenv("AF_ROOT", "."))))
|
|
|
|
|
|
|
|
|