"""Hidden ground-truth target state for the drug-target-validation POMDP.

The agent never directly observes any of these models; it must infer them
through investigation. The simulator uses ``FullLatentState`` to generate
all simulated outputs and to compute terminal rewards.
"""

from __future__ import annotations

from typing import List, Optional

from pydantic import BaseModel, Field


class TargetProfile(BaseModel):
    """Hidden ground-truth drug target properties."""

    # Expression
    expression_level: str = Field(
        "moderate",
        description=(
            "One of 'high_specific', 'high_nonspecific', 'moderate', 'low'."
        ),
    )
    tissue_specificity: float = Field(0.5, ge=0.0, le=1.0)
    disease_overexpression: float = Field(
        1.0, description="Fold change vs. matched normal tissue."
    )

    # Druggability
    druggability_score: float = Field(0.5, ge=0.0, le=1.0)
    binding_pocket_quality: str = Field(
        "good",
        description=(
            "One of 'excellent', 'good', 'poor', 'undruggable'."
        ),
    )
    has_known_ligands: bool = False
    allosteric_site_available: bool = Field(
        False,
        description=(
            "Whether a non-classical (allosteric) druggable site exists. "
            "Only revealed by binding-site analyses with the appropriate "
            "parameters."
        ),
    )

    # Selectivity
    selectivity_ratio: float = Field(
        5.0,
        description="On-target vs off-target activity ratio.",
    )
    off_target_count: int = 0
    off_target_genes: List[str] = Field(default_factory=list)

    # Safety
    toxicity_profile: str = Field(
        "mild",
        description="One of 'clean', 'mild', 'moderate', 'severe'.",
    )
    toxicity_tissues: List[str] = Field(default_factory=list)

    # Clinical
    clinical_precedent: str = Field(
        "none",
        description=(
            "One of 'positive', 'mixed', 'negative', 'none'."
        ),
    )
    clinical_stage_reached: Optional[str] = Field(
        None,
        description=(
            "Highest clinical stage previously reached: 'phase1' / 'phase2' "
            "/ 'phase3' / None."
        ),
    )
    competitor_programs: List[str] = Field(default_factory=list)

    # Patient stratification / biomarker context
    requires_patient_stratification: bool = False
    responder_biomarker: Optional[str] = None

    # In-vitro / in-vivo expectations
    in_vitro_ic50_nM: float = Field(
        100.0, description="Expected on-target IC50 (nM)."
    )
    in_vivo_efficacy: str = Field(
        "moderate",
        description=(
            "Expected pharmacological efficacy in disease-relevant models: "
            "'strong', 'moderate', 'weak', 'none'."
        ),
    )
    crispr_essentiality: float = Field(
        -0.3,
        description=(
            "DepMap-style essentiality score (more negative = more "
            "essential)."
        ),
    )

    # Hidden truth used for terminal reward computation
    true_viability_score: float = Field(0.5, ge=0.0, le=1.0)
    correct_decision: str = Field(
        "no_go", description="Either 'go' or 'no_go'."
    )
    misleading_signals: List[str] = Field(default_factory=list)
    key_evidence_dimensions: List[str] = Field(
        default_factory=list,
        description=(
            "Evidence categories the agent must touch to score well, e.g. "
            "'expression', 'druggability', 'off_target', 'toxicity', "
            "'clinical', 'literature', 'in_vitro', 'in_vivo', "
            "'patient_stratification'."
        ),
    )


class DataQualityState(BaseModel):
    """Technical noise parameters for simulated experimental outputs."""

    noise_level: float = Field(0.1, ge=0.0, le=1.0)
    false_positive_rate: float = Field(0.05, ge=0.0, le=1.0)
    false_negative_rate: float = Field(0.05, ge=0.0, le=1.0)
    database_coverage: float = Field(0.85, ge=0.0, le=1.0)


class CreditState(BaseModel):
    """Tracks the single unified experimental-credit budget."""

    credits_total: int = 50
    credits_used: int = 0

    @property
    def credits_remaining(self) -> int:
        return max(0, self.credits_total - self.credits_used)

    @property
    def exhausted(self) -> bool:
        return self.credits_used >= self.credits_total


class ValidationProgress(BaseModel):
    """Flags tracking which evidence dimensions have been investigated."""

    expression_queried: bool = False
    druggability_assessed: bool = False
    selectivity_checked: bool = False
    toxicity_assessed: bool = False
    clinical_checked: bool = False
    literature_reviewed: bool = False
    in_vitro_done: bool = False
    in_vivo_done: bool = False
    patient_stratification_done: bool = False
    pathway_analysed: bool = False
    structure_resolved: bool = False
    interactions_mapped: bool = False
    crispr_done: bool = False
    biomarker_correlated: bool = False
    evidence_synthesised: bool = False
    expert_reviewed: bool = False
    report_submitted: bool = False


class FullLatentState(BaseModel):
    """Complete hidden state of the simulated drug-target world."""

    target: TargetProfile = Field(default_factory=TargetProfile)
    data_quality: DataQualityState = Field(default_factory=DataQualityState)
    credits: CreditState = Field(default_factory=CreditState)
    progress: ValidationProgress = Field(default_factory=ValidationProgress)

    # Tracking which action types have been executed (used by rules / rewards)
    action_call_counts: dict = Field(default_factory=dict)
    rng_seed: int = 0