File size: 3,979 Bytes

edca7d5
18cab64
 
edca7d5
18cab64
edca7d5
 
 
 
 
 
 
 
 
a7a3c87
edca7d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7a3c87
 
 
 
 
 
 
 
 
 
 
edca7d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7a3c87
 
 
 
 
 
 
 
 
 
 
edca7d5
 
 
 
18cab64
 
 
 
edca7d5
18cab64
 
edca7d5
 
 
 
 
 
 
 
 
 
 
18cab64
edca7d5
 
 
 
 
a7a3c87
edca7d5
 
a7a3c87
edca7d5
 
 
a7a3c87
edca7d5
 
 
 
18cab64
 
 
edca7d5
 
 
18cab64

"""
Configuration v3 — all settings in one place.
Pydantic v2 compatible (removed deprecated model_post_init).
"""
from pydantic import BaseModel, Field, model_validator
from pathlib import Path
import os


class LLMConfig(BaseModel):
    microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
    tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
    mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
    bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
    base_url: str = Field(default="http://localhost:8000/v1")
    api_key: str = "dummy"
    temperature_generation: float = 0.7
    temperature_compilation: float = 0.1
    temperature_critique: float = 0.3
    max_tokens: int = 4096


class BrainConfig(BaseModel):
    api_url: str = "https://api.worldquantbrain.com"
    region: str = "USA"
    universe: str = "TOP3000"
    delay: int = 1
    truncation: float = 0.08
    pasteurization: str = "ON"
    nan_handling: str = "OFF"
    max_concurrent: int = 4
    submit_interval_sec: float = 15.0


class FieldSelectionStrategy(BaseModel):
    """Strategy for picking fields — key lever for alpha novelty."""
    tier_priority: list[str] = ["tier1", "tier2", "tier3"]
    max_alpha_count: int = 50
    goldmine_weight_multiplier: float = 10.0
    min_coverage: float = 0.55
    prefer_novel_groups: bool = True
    max_group_ac: int = 30
    fields_per_batch: int = 5


class KillSwitches(BaseModel):
    daily_brain_submissions_max: int = 200
    consecutive_lint_fail_max: int = 10
    consecutive_kill_verdict_max: int = 30
    daily_llm_token_budget: int = 5_000_000
    max_credits_per_family: int = 3


class FitnessWeights(BaseModel):
    sharpe_os: float = 1.0
    is_os_gap_penalty: float = 0.5
    worst_year_penalty: float = 1.0
    crowding_penalty: float = 0.3
    turnover_penalty: float = 0.2
    turnover_threshold: float = 0.40
    drawdown_penalty: float = 0.1
    drawdown_threshold: float = 0.05
    novelty_bonus: float = 0.4
    goldmine_field_bonus: float = 0.3
    novel_group_bonus: float = 0.2


class SubmissionThresholds(BaseModel):
    """BRAIN submission pass/fail cutoffs from IQC 2026."""
    min_sharpe: float = 1.25
    min_fitness: float = 1.0
    max_turnover: float = 0.70
    min_sub_universe_sharpe: float = 0.78
    max_self_correlation: float = 0.65


class Paths(BaseModel):
    root: Path = Path(os.getenv("AF_ROOT", "."))
    data: Path | None = None
    factor_store: Path | None = None
    prompts: Path | None = None
    logs: Path | None = None

    @model_validator(mode="after")
    def set_defaults(self):
        if self.data is None:
            self.data = self.root / "data"
        if self.factor_store is None:
            self.factor_store = self.root / "factor_store"
        if self.prompts is None:
            self.prompts = self.root / "prompts"
        if self.logs is None:
            self.logs = self.root / "logs"
        for p in [self.data, self.factor_store, self.factor_store / "alphas",
                  self.prompts, self.prompts / "templates", self.logs]:
            p.mkdir(parents=True, exist_ok=True)
        return self


class Config(BaseModel):
    llm: LLMConfig = LLMConfig()
    brain: BrainConfig = BrainConfig()
    field_strategy: FieldSelectionStrategy = FieldSelectionStrategy()
    kill: KillSwitches = KillSwitches()
    fitness: FitnessWeights = FitnessWeights()
    submission: SubmissionThresholds = SubmissionThresholds()
    paths: Paths = Paths()

    # Pipeline settings
    batch_size: int = 10
    max_iterations_per_family: int = 3
    correlation_threshold: float = 0.65
    min_sharpe_local_sim: float = 1.0
    min_info_value_sign_sweep: float = 0.3
    max_parallel_candidates: int = 3  # Concurrency control for LLM calls
    enable_brain_client: bool = False  # Must be explicitly enabled (needs valid BRAIN token)
    use_proven_templates: bool = False  # Use deterministic templates instead of LLM generation


def load_config() -> Config:
    return Config()