gaurv007's picture
Upload alpha_factory/schemas/__init__.py with huggingface_hub
8b6847c verified
"""
Pydantic schemas — typed contracts between every agent.
No free-text. Every LLM output is schema-constrained.
"""
from pydantic import BaseModel, Field
from typing import Optional
from enum import Enum
class Neutralization(str, Enum):
SECTOR = "sector"
INDUSTRY = "industry"
SUBINDUSTRY = "subindustry"
NONE = "none"
class AnomalyTag(str, Enum):
PEAD = "pead"
VALUE = "value"
MOMENTUM = "momentum"
REVERSAL = "reversal"
LOW_VOL = "low_vol"
QUALITY = "quality"
LIQUIDITY = "liquidity"
SENTIMENT = "sentiment"
ANALYST = "analyst"
OPTION_SURFACE = "option_surface"
SOCIAL = "social"
FUNDAMENTAL = "fundamental"
TECHNICAL = "technical"
EVENT = "event"
OTHER = "other"
class Component(BaseModel):
"""A single component of a multi-factor alpha."""
name: str = Field(description="Descriptive name for this component")
fields: list[str] = Field(description="BRAIN data fields used")
operators: list[str] = Field(description="BRAIN operators applied")
horizon_days: int = Field(ge=1, le=252, description="Lookback horizon")
weight: float = Field(ge=-2.0, le=2.0, description="Weight in composite")
sign_direction: str = Field(description="Expected cross-sectional sign: 'long_high' or 'long_low'")
class Blueprint(BaseModel):
"""Output of the Hypothesis Hunter. A structured factor idea."""
theme: str = Field(description="High-level theme/anomaly category")
archetype: str = Field(description="Known archetype name or 'novel'")
components: list[Component] = Field(min_length=1, max_length=5)
neutralization: Neutralization
decay: int = Field(ge=0, le=20, default=5)
novelty_claim: str = Field(min_length=10, description="Why this is different from existing alphas")
academic_anchor: Optional[str] = Field(default=None, description="arXiv ID or DOI")
anomaly_tag: AnomalyTag
class Expression(BaseModel):
"""Output of the Expression Compiler. A valid BRAIN expression."""
expression: str = Field(min_length=10, description="The BRAIN expression string")
fields_used: list[str]
operators_used: list[str]
archetype_used: str = Field(description="Which template/archetype was used")
class LintResult(BaseModel):
"""Output of the Static Lint layer."""
passed: bool
errors: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
class SignSweepResult(BaseModel):
"""Output of the sign direction sweep."""
pos_sharpe: float
neg_sharpe: float
verdict: str = Field(description="'pos' or 'neg' — which direction works")
info_value: float = Field(description="abs(pos - neg), higher = more signal")
class BrainMetrics(BaseModel):
"""Metrics harvested from BRAIN simulation."""
alpha_id: str
sharpe_full: float
sharpe_is: float
sharpe_os: float
fitness: float
turnover: float
returns: float
max_drawdown: float
yearly_sharpe: list[float]
yearly_returns: list[float]
margin_pct: Optional[float] = None
long_count: Optional[int] = None
short_count: Optional[int] = None
class Verdict(str, Enum):
PROMOTE = "promote"
ITERATE = "iterate"
KILL = "kill"
class CrowdScoutResult(BaseModel):
"""Output of the Crowd Scout agent."""
max_corr_to_library: float
is_thematic_duplicate: bool
anomaly_already_saturated: bool
verdict: Verdict
reason: str
class SurgeonResult(BaseModel):
"""Output of the Performance Surgeon agent."""
regime_dependent: bool
decay_detected: bool
sign_error_likely: bool
dominant_regime: Optional[str] = None
iteration_suggestion: str
verdict: Verdict
reason: str
class GatekeeperMemo(BaseModel):
"""Output of the Production Gatekeeper (Bigfish)."""
go_no_go: bool
confidence: float = Field(ge=0.0, le=1.0)
strengths: list[str]
weaknesses: list[str]
risks: list[str]
recommendation: str
full_memo: str