File size: 3,979 Bytes
edca7d5
18cab64
 
edca7d5
18cab64
edca7d5
 
 
 
 
 
 
 
 
a7a3c87
edca7d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7a3c87
 
 
 
 
 
 
 
 
 
 
edca7d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7a3c87
 
 
 
 
 
 
 
 
 
 
edca7d5
 
 
 
18cab64
 
 
 
edca7d5
18cab64
 
edca7d5
 
 
 
 
 
 
 
 
 
 
18cab64
edca7d5
 
 
 
 
a7a3c87
edca7d5
 
a7a3c87
edca7d5
 
 
a7a3c87
edca7d5
 
 
 
18cab64
 
 
edca7d5
 
 
18cab64
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
Configuration v3 — all settings in one place.
Pydantic v2 compatible (removed deprecated model_post_init).
"""
from pydantic import BaseModel, Field, model_validator
from pathlib import Path
import os


class LLMConfig(BaseModel):
    microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
    tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
    mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
    bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
    base_url: str = Field(default="http://localhost:8000/v1")
    api_key: str = "dummy"
    temperature_generation: float = 0.7
    temperature_compilation: float = 0.1
    temperature_critique: float = 0.3
    max_tokens: int = 4096


class BrainConfig(BaseModel):
    api_url: str = "https://api.worldquantbrain.com"
    region: str = "USA"
    universe: str = "TOP3000"
    delay: int = 1
    truncation: float = 0.08
    pasteurization: str = "ON"
    nan_handling: str = "OFF"
    max_concurrent: int = 4
    submit_interval_sec: float = 15.0


class FieldSelectionStrategy(BaseModel):
    """Strategy for picking fields — key lever for alpha novelty."""
    tier_priority: list[str] = ["tier1", "tier2", "tier3"]
    max_alpha_count: int = 50
    goldmine_weight_multiplier: float = 10.0
    min_coverage: float = 0.55
    prefer_novel_groups: bool = True
    max_group_ac: int = 30
    fields_per_batch: int = 5


class KillSwitches(BaseModel):
    daily_brain_submissions_max: int = 200
    consecutive_lint_fail_max: int = 10
    consecutive_kill_verdict_max: int = 30
    daily_llm_token_budget: int = 5_000_000
    max_credits_per_family: int = 3


class FitnessWeights(BaseModel):
    sharpe_os: float = 1.0
    is_os_gap_penalty: float = 0.5
    worst_year_penalty: float = 1.0
    crowding_penalty: float = 0.3
    turnover_penalty: float = 0.2
    turnover_threshold: float = 0.40
    drawdown_penalty: float = 0.1
    drawdown_threshold: float = 0.05
    novelty_bonus: float = 0.4
    goldmine_field_bonus: float = 0.3
    novel_group_bonus: float = 0.2


class SubmissionThresholds(BaseModel):
    """BRAIN submission pass/fail cutoffs from IQC 2026."""
    min_sharpe: float = 1.25
    min_fitness: float = 1.0
    max_turnover: float = 0.70
    min_sub_universe_sharpe: float = 0.78
    max_self_correlation: float = 0.65


class Paths(BaseModel):
    root: Path = Path(os.getenv("AF_ROOT", "."))
    data: Path | None = None
    factor_store: Path | None = None
    prompts: Path | None = None
    logs: Path | None = None

    @model_validator(mode="after")
    def set_defaults(self):
        if self.data is None:
            self.data = self.root / "data"
        if self.factor_store is None:
            self.factor_store = self.root / "factor_store"
        if self.prompts is None:
            self.prompts = self.root / "prompts"
        if self.logs is None:
            self.logs = self.root / "logs"
        for p in [self.data, self.factor_store, self.factor_store / "alphas",
                  self.prompts, self.prompts / "templates", self.logs]:
            p.mkdir(parents=True, exist_ok=True)
        return self


class Config(BaseModel):
    llm: LLMConfig = LLMConfig()
    brain: BrainConfig = BrainConfig()
    field_strategy: FieldSelectionStrategy = FieldSelectionStrategy()
    kill: KillSwitches = KillSwitches()
    fitness: FitnessWeights = FitnessWeights()
    submission: SubmissionThresholds = SubmissionThresholds()
    paths: Paths = Paths()

    # Pipeline settings
    batch_size: int = 10
    max_iterations_per_family: int = 3
    correlation_threshold: float = 0.65
    min_sharpe_local_sim: float = 1.0
    min_info_value_sign_sweep: float = 0.3
    max_parallel_candidates: int = 3  # Concurrency control for LLM calls
    enable_brain_client: bool = False  # Must be explicitly enabled (needs valid BRAIN token)
    use_proven_templates: bool = False  # Use deterministic templates instead of LLM generation


def load_config() -> Config:
    return Config()