""" Configuration v3 — all settings in one place. Pydantic v2 compatible (removed deprecated model_post_init). """ from pydantic import BaseModel, Field, model_validator from pathlib import Path import os class LLMConfig(BaseModel): microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct" tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct" mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct" bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct" base_url: str = Field(default="http://localhost:8000/v1") api_key: str = "dummy" temperature_generation: float = 0.7 temperature_compilation: float = 0.1 temperature_critique: float = 0.3 max_tokens: int = 4096 class BrainConfig(BaseModel): api_url: str = "https://api.worldquantbrain.com" region: str = "USA" universe: str = "TOP3000" delay: int = 1 truncation: float = 0.08 pasteurization: str = "ON" nan_handling: str = "OFF" max_concurrent: int = 4 submit_interval_sec: float = 15.0 class FieldSelectionStrategy(BaseModel): """Strategy for picking fields — key lever for alpha novelty.""" tier_priority: list[str] = ["tier1", "tier2", "tier3"] max_alpha_count: int = 50 goldmine_weight_multiplier: float = 10.0 min_coverage: float = 0.55 prefer_novel_groups: bool = True max_group_ac: int = 30 fields_per_batch: int = 5 class KillSwitches(BaseModel): daily_brain_submissions_max: int = 200 consecutive_lint_fail_max: int = 10 consecutive_kill_verdict_max: int = 30 daily_llm_token_budget: int = 5_000_000 max_credits_per_family: int = 3 class FitnessWeights(BaseModel): sharpe_os: float = 1.0 is_os_gap_penalty: float = 0.5 worst_year_penalty: float = 1.0 crowding_penalty: float = 0.3 turnover_penalty: float = 0.2 turnover_threshold: float = 0.40 drawdown_penalty: float = 0.1 drawdown_threshold: float = 0.05 novelty_bonus: float = 0.4 goldmine_field_bonus: float = 0.3 novel_group_bonus: float = 0.2 class SubmissionThresholds(BaseModel): """BRAIN submission pass/fail cutoffs from IQC 2026.""" min_sharpe: float = 1.25 min_fitness: float = 1.0 max_turnover: float = 0.70 min_sub_universe_sharpe: float = 0.78 max_self_correlation: float = 0.65 class Paths(BaseModel): root: Path = Path(os.getenv("AF_ROOT", ".")) data: Path | None = None factor_store: Path | None = None prompts: Path | None = None logs: Path | None = None @model_validator(mode="after") def set_defaults(self): if self.data is None: self.data = self.root / "data" if self.factor_store is None: self.factor_store = self.root / "factor_store" if self.prompts is None: self.prompts = self.root / "prompts" if self.logs is None: self.logs = self.root / "logs" for p in [self.data, self.factor_store, self.factor_store / "alphas", self.prompts, self.prompts / "templates", self.logs]: p.mkdir(parents=True, exist_ok=True) return self class Config(BaseModel): llm: LLMConfig = LLMConfig() brain: BrainConfig = BrainConfig() field_strategy: FieldSelectionStrategy = FieldSelectionStrategy() kill: KillSwitches = KillSwitches() fitness: FitnessWeights = FitnessWeights() submission: SubmissionThresholds = SubmissionThresholds() paths: Paths = Paths() # Pipeline settings batch_size: int = 10 max_iterations_per_family: int = 3 correlation_threshold: float = 0.65 min_sharpe_local_sim: float = 1.0 min_info_value_sign_sweep: float = 0.3 max_parallel_candidates: int = 3 # Concurrency control for LLM calls enable_brain_client: bool = False # Must be explicitly enabled (needs valid BRAIN token) use_proven_templates: bool = False # Use deterministic templates instead of LLM generation def load_config() -> Config: return Config()