Upload alpha_factory/schemas/__init__.py with huggingface_hub

8b6847c verified 19 days ago

4.01 kB

	"""
	Pydantic schemas — typed contracts between every agent.
	No free-text. Every LLM output is schema-constrained.
	"""
	from pydantic import BaseModel, Field
	from typing import Optional
	from enum import Enum


	class Neutralization(str, Enum):
	SECTOR = "sector"
	INDUSTRY = "industry"
	SUBINDUSTRY = "subindustry"
	NONE = "none"


	class AnomalyTag(str, Enum):
	PEAD = "pead"
	VALUE = "value"
	MOMENTUM = "momentum"
	REVERSAL = "reversal"
	LOW_VOL = "low_vol"
	QUALITY = "quality"
	LIQUIDITY = "liquidity"
	SENTIMENT = "sentiment"
	ANALYST = "analyst"
	OPTION_SURFACE = "option_surface"
	SOCIAL = "social"
	FUNDAMENTAL = "fundamental"
	TECHNICAL = "technical"
	EVENT = "event"
	OTHER = "other"


	class Component(BaseModel):
	"""A single component of a multi-factor alpha."""
	name: str = Field(description="Descriptive name for this component")
	fields: list[str] = Field(description="BRAIN data fields used")
	operators: list[str] = Field(description="BRAIN operators applied")
	horizon_days: int = Field(ge=1, le=252, description="Lookback horizon")
	weight: float = Field(ge=-2.0, le=2.0, description="Weight in composite")
	sign_direction: str = Field(description="Expected cross-sectional sign: 'long_high' or 'long_low'")


	class Blueprint(BaseModel):
	"""Output of the Hypothesis Hunter. A structured factor idea."""
	theme: str = Field(description="High-level theme/anomaly category")
	archetype: str = Field(description="Known archetype name or 'novel'")
	components: list[Component] = Field(min_length=1, max_length=5)
	neutralization: Neutralization
	decay: int = Field(ge=0, le=20, default=5)
	novelty_claim: str = Field(min_length=10, description="Why this is different from existing alphas")
	academic_anchor: Optional[str] = Field(default=None, description="arXiv ID or DOI")
	anomaly_tag: AnomalyTag


	class Expression(BaseModel):
	"""Output of the Expression Compiler. A valid BRAIN expression."""
	expression: str = Field(min_length=10, description="The BRAIN expression string")
	fields_used: list[str]
	operators_used: list[str]
	archetype_used: str = Field(description="Which template/archetype was used")


	class LintResult(BaseModel):
	"""Output of the Static Lint layer."""
	passed: bool
	errors: list[str] = Field(default_factory=list)
	warnings: list[str] = Field(default_factory=list)


	class SignSweepResult(BaseModel):
	"""Output of the sign direction sweep."""
	pos_sharpe: float
	neg_sharpe: float
	verdict: str = Field(description="'pos' or 'neg' — which direction works")
	info_value: float = Field(description="abs(pos - neg), higher = more signal")


	class BrainMetrics(BaseModel):
	"""Metrics harvested from BRAIN simulation."""
	alpha_id: str
	sharpe_full: float
	sharpe_is: float
	sharpe_os: float
	fitness: float
	turnover: float
	returns: float
	max_drawdown: float
	yearly_sharpe: list[float]
	yearly_returns: list[float]
	margin_pct: Optional[float] = None
	long_count: Optional[int] = None
	short_count: Optional[int] = None


	class Verdict(str, Enum):
	PROMOTE = "promote"
	ITERATE = "iterate"
	KILL = "kill"


	class CrowdScoutResult(BaseModel):
	"""Output of the Crowd Scout agent."""
	max_corr_to_library: float
	is_thematic_duplicate: bool
	anomaly_already_saturated: bool
	verdict: Verdict
	reason: str


	class SurgeonResult(BaseModel):
	"""Output of the Performance Surgeon agent."""
	regime_dependent: bool
	decay_detected: bool
	sign_error_likely: bool
	dominant_regime: Optional[str] = None
	iteration_suggestion: str
	verdict: Verdict
	reason: str


	class GatekeeperMemo(BaseModel):
	"""Output of the Production Gatekeeper (Bigfish)."""
	go_no_go: bool
	confidence: float = Field(ge=0.0, le=1.0)
	strengths: list[str]
	weaknesses: list[str]
	risks: list[str]
	recommendation: str
	full_memo: str