gaurv007's picture
Upload alpha_factory/config.py
9c2119c verified
"""
Configuration v3 — all settings in one place.
Pydantic v2 compatible (removed deprecated model_post_init).
"""
from pydantic import BaseModel, Field, model_validator
from pathlib import Path
import os
class LLMConfig(BaseModel):
microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
base_url: str = Field(default="http://localhost:8000/v1")
api_key: str = "dummy"
temperature_generation: float = 0.7
temperature_compilation: float = 0.1
temperature_critique: float = 0.3
max_tokens: int = 4096
class BrainConfig(BaseModel):
api_url: str = "https://api.worldquantbrain.com"
region: str = "USA"
universe: str = "TOP3000"
delay: int = 1
truncation: float = 0.08
pasteurization: str = "ON"
nan_handling: str = "OFF"
max_concurrent: int = 4
submit_interval_sec: float = 15.0
class FieldSelectionStrategy(BaseModel):
"""Strategy for picking fields — key lever for alpha novelty."""
tier_priority: list[str] = ["tier1", "tier2", "tier3"]
max_alpha_count: int = 50
goldmine_weight_multiplier: float = 10.0
min_coverage: float = 0.55
prefer_novel_groups: bool = True
max_group_ac: int = 30
fields_per_batch: int = 5
class KillSwitches(BaseModel):
daily_brain_submissions_max: int = 200
consecutive_lint_fail_max: int = 10
consecutive_kill_verdict_max: int = 30
daily_llm_token_budget: int = 5_000_000
max_credits_per_family: int = 3
class FitnessWeights(BaseModel):
sharpe_os: float = 1.0
is_os_gap_penalty: float = 0.5
worst_year_penalty: float = 1.0
crowding_penalty: float = 0.3
turnover_penalty: float = 0.2
turnover_threshold: float = 0.40
drawdown_penalty: float = 0.1
drawdown_threshold: float = 0.05
novelty_bonus: float = 0.4
goldmine_field_bonus: float = 0.3
novel_group_bonus: float = 0.2
class SubmissionThresholds(BaseModel):
"""BRAIN submission pass/fail cutoffs from IQC 2026."""
min_sharpe: float = 1.25
min_fitness: float = 1.0
max_turnover: float = 0.70
min_sub_universe_sharpe: float = 0.78
max_self_correlation: float = 0.65
class Paths(BaseModel):
root: Path = Path(os.getenv("AF_ROOT", "."))
data: Path | None = None
factor_store: Path | None = None
prompts: Path | None = None
logs: Path | None = None
@model_validator(mode="after")
def set_defaults(self):
if self.data is None:
self.data = self.root / "data"
if self.factor_store is None:
self.factor_store = self.root / "factor_store"
if self.prompts is None:
self.prompts = self.root / "prompts"
if self.logs is None:
self.logs = self.root / "logs"
for p in [self.data, self.factor_store, self.factor_store / "alphas",
self.prompts, self.prompts / "templates", self.logs]:
p.mkdir(parents=True, exist_ok=True)
return self
class Config(BaseModel):
llm: LLMConfig = LLMConfig()
brain: BrainConfig = BrainConfig()
field_strategy: FieldSelectionStrategy = FieldSelectionStrategy()
kill: KillSwitches = KillSwitches()
fitness: FitnessWeights = FitnessWeights()
submission: SubmissionThresholds = SubmissionThresholds()
paths: Paths = Paths()
# Pipeline settings
batch_size: int = 10
max_iterations_per_family: int = 3
correlation_threshold: float = 0.65
min_sharpe_local_sim: float = 1.0
min_info_value_sign_sweep: float = 0.3
max_parallel_candidates: int = 3 # Concurrency control for LLM calls
enable_brain_client: bool = False # Must be explicitly enabled (needs valid BRAIN token)
use_proven_templates: bool = False # Use deterministic templates instead of LLM generation
def load_config() -> Config:
return Config()