Upload alpha_factory/config.py

9c2119c verified about 5 hours ago

3.98 kB

	"""
	Configuration v3 — all settings in one place.
	Pydantic v2 compatible (removed deprecated model_post_init).
	"""
	from pydantic import BaseModel, Field, model_validator
	from pathlib import Path
	import os


	class LLMConfig(BaseModel):
	microfish_model: str = "Qwen/Qwen2.5-1.5B-Instruct"
	tinyfish_model: str = "Qwen/Qwen2.5-3B-Instruct"
	mediumfish_model: str = "Qwen/Qwen2.5-7B-Instruct"
	bigfish_model: str = "Qwen/Qwen2.5-72B-Instruct"
	base_url: str = Field(default="http://localhost:8000/v1")
	api_key: str = "dummy"
	temperature_generation: float = 0.7
	temperature_compilation: float = 0.1
	temperature_critique: float = 0.3
	max_tokens: int = 4096


	class BrainConfig(BaseModel):
	api_url: str = "https://api.worldquantbrain.com"
	region: str = "USA"
	universe: str = "TOP3000"
	delay: int = 1
	truncation: float = 0.08
	pasteurization: str = "ON"
	nan_handling: str = "OFF"
	max_concurrent: int = 4
	submit_interval_sec: float = 15.0


	class FieldSelectionStrategy(BaseModel):
	"""Strategy for picking fields — key lever for alpha novelty."""
	tier_priority: list[str] = ["tier1", "tier2", "tier3"]
	max_alpha_count: int = 50
	goldmine_weight_multiplier: float = 10.0
	min_coverage: float = 0.55
	prefer_novel_groups: bool = True
	max_group_ac: int = 30
	fields_per_batch: int = 5


	class KillSwitches(BaseModel):
	daily_brain_submissions_max: int = 200
	consecutive_lint_fail_max: int = 10
	consecutive_kill_verdict_max: int = 30
	daily_llm_token_budget: int = 5_000_000
	max_credits_per_family: int = 3


	class FitnessWeights(BaseModel):
	sharpe_os: float = 1.0
	is_os_gap_penalty: float = 0.5
	worst_year_penalty: float = 1.0
	crowding_penalty: float = 0.3
	turnover_penalty: float = 0.2
	turnover_threshold: float = 0.40
	drawdown_penalty: float = 0.1
	drawdown_threshold: float = 0.05
	novelty_bonus: float = 0.4
	goldmine_field_bonus: float = 0.3
	novel_group_bonus: float = 0.2


	class SubmissionThresholds(BaseModel):
	"""BRAIN submission pass/fail cutoffs from IQC 2026."""
	min_sharpe: float = 1.25
	min_fitness: float = 1.0
	max_turnover: float = 0.70
	min_sub_universe_sharpe: float = 0.78
	max_self_correlation: float = 0.65


	class Paths(BaseModel):
	root: Path = Path(os.getenv("AF_ROOT", "."))
	data: Path \| None = None
	factor_store: Path \| None = None
	prompts: Path \| None = None
	logs: Path \| None = None

	@model_validator(mode="after")
	def set_defaults(self):
	if self.data is None:
	self.data = self.root / "data"
	if self.factor_store is None:
	self.factor_store = self.root / "factor_store"
	if self.prompts is None:
	self.prompts = self.root / "prompts"
	if self.logs is None:
	self.logs = self.root / "logs"
	for p in [self.data, self.factor_store, self.factor_store / "alphas",
	self.prompts, self.prompts / "templates", self.logs]:
	p.mkdir(parents=True, exist_ok=True)
	return self


	class Config(BaseModel):
	llm: LLMConfig = LLMConfig()
	brain: BrainConfig = BrainConfig()
	field_strategy: FieldSelectionStrategy = FieldSelectionStrategy()
	kill: KillSwitches = KillSwitches()
	fitness: FitnessWeights = FitnessWeights()
	submission: SubmissionThresholds = SubmissionThresholds()
	paths: Paths = Paths()

	# Pipeline settings
	batch_size: int = 10
	max_iterations_per_family: int = 3
	correlation_threshold: float = 0.65
	min_sharpe_local_sim: float = 1.0
	min_info_value_sign_sweep: float = 0.3
	max_parallel_candidates: int = 3 # Concurrency control for LLM calls
	enable_brain_client: bool = False # Must be explicitly enabled (needs valid BRAIN token)
	use_proven_templates: bool = False # Use deterministic templates instead of LLM generation


	def load_config() -> Config:
	return Config()