Spaces:

anugrahteesdollar
/

drugenv

Sleeping

File size: 34,203 Bytes

77e1e28

"""
Data models for the Drug Target Validation RL Environment.

Defines the POMDP action and observation contracts for an agent that acts
as a computational pharma scientist. Given a proposed drug target and a
disease context, the agent issues bioinformatics / clinical / experimental
queries one at a time and finally submits a go / no-go validation report.
"""

from __future__ import annotations

from enum import Enum
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field

from openenv.core.env_server.types import Action, Observation


# ── Action vocabulary ───────────────────────────────────────────────────────


class ActionType(str, Enum):
    # Expression & Omics
    QUERY_EXPRESSION = "query_expression"
    DIFFERENTIAL_EXPRESSION = "differential_expression"
    PATHWAY_ENRICHMENT = "pathway_enrichment"
    COEXPRESSION_NETWORK = "coexpression_network"

    # Protein & Structure
    PROTEIN_STRUCTURE_LOOKUP = "protein_structure_lookup"
    BINDING_SITE_ANALYSIS = "binding_site_analysis"
    PROTEIN_INTERACTION_NETWORK = "protein_interaction_network"
    DRUGGABILITY_SCREEN = "druggability_screen"

    # Clinical & Safety
    CLINICAL_TRIAL_LOOKUP = "clinical_trial_lookup"
    TOXICITY_PANEL = "toxicity_panel"
    OFF_TARGET_SCREEN = "off_target_screen"
    PATIENT_STRATIFICATION = "patient_stratification"

    # Literature & Evidence
    LITERATURE_SEARCH = "literature_search"
    EVIDENCE_SYNTHESIS = "evidence_synthesis"
    COMPETITOR_LANDSCAPE = "competitor_landscape"

    # Experimental (expensive, consume more credits)
    IN_VITRO_ASSAY = "in_vitro_assay"
    IN_VIVO_MODEL = "in_vivo_model"
    CRISPR_KNOCKOUT = "crispr_knockout"
    BIOMARKER_CORRELATION = "biomarker_correlation"

    # Meta
    FLAG_RED_FLAG = "flag_red_flag"
    REQUEST_EXPERT_REVIEW = "request_expert_review"
    SUBMIT_VALIDATION_REPORT = "submit_validation_report"  # terminal action


OMICS_ACTIONS = frozenset({
    ActionType.QUERY_EXPRESSION,
    ActionType.DIFFERENTIAL_EXPRESSION,
    ActionType.PATHWAY_ENRICHMENT,
    ActionType.COEXPRESSION_NETWORK,
})

PROTEIN_ACTIONS = frozenset({
    ActionType.PROTEIN_STRUCTURE_LOOKUP,
    ActionType.BINDING_SITE_ANALYSIS,
    ActionType.PROTEIN_INTERACTION_NETWORK,
    ActionType.DRUGGABILITY_SCREEN,
})

CLINICAL_ACTIONS = frozenset({
    ActionType.CLINICAL_TRIAL_LOOKUP,
    ActionType.TOXICITY_PANEL,
    ActionType.OFF_TARGET_SCREEN,
    ActionType.PATIENT_STRATIFICATION,
})

LITERATURE_ACTIONS = frozenset({
    ActionType.LITERATURE_SEARCH,
    ActionType.EVIDENCE_SYNTHESIS,
    ActionType.COMPETITOR_LANDSCAPE,
})

EXPERIMENTAL_ACTIONS = frozenset({
    ActionType.IN_VITRO_ASSAY,
    ActionType.IN_VIVO_MODEL,
    ActionType.CRISPR_KNOCKOUT,
    ActionType.BIOMARKER_CORRELATION,
})

META_ACTIONS = frozenset({
    ActionType.FLAG_RED_FLAG,
    ActionType.REQUEST_EXPERT_REVIEW,
    ActionType.SUBMIT_VALIDATION_REPORT,
})


# ── Tool registry (pharma / bioinformatics) ─────────────────────────────────


class ToolCategory(str, Enum):
    EXPRESSION_DB = "expression_db"
    OMICS_ANALYSIS = "omics_analysis"
    PATHWAY_DB = "pathway_db"
    PROTEIN_STRUCTURE = "protein_structure"
    BINDING_SITE = "binding_site"
    INTERACTION_NETWORK = "interaction_network"
    DRUGGABILITY = "druggability"
    CLINICAL_DB = "clinical_db"
    SAFETY_DB = "safety_db"
    OFF_TARGET = "off_target"
    LITERATURE = "literature"
    PATIENT_GENOMICS = "patient_genomics"
    IN_VITRO = "in_vitro"
    IN_VIVO = "in_vivo"
    CRISPR = "crispr"
    BIOMARKER = "biomarker"


class ToolSpec(BaseModel):
    """Registry entry describing a pharma / bioinformatics tool or database."""

    name: str
    category: ToolCategory
    relevant_actions: List[ActionType] = Field(default_factory=list)
    description: str = ""
    input_types: List[str] = Field(default_factory=list)
    output_types: List[str] = Field(default_factory=list)
    typical_runtime_hours: float = 0.1
    typical_credit_cost: int = 1
    requires_compute: bool = False
    open_source: bool = True


TOOL_REGISTRY: Dict[str, ToolSpec] = {
    # ── Expression & omics databases ──
    "GTEx": ToolSpec(
        name="GTEx",
        category=ToolCategory.EXPRESSION_DB,
        relevant_actions=[ActionType.QUERY_EXPRESSION],
        description="Tissue-level expression atlas across normal human tissues",
        input_types=["gene_symbol"],
        output_types=["tissue_expression"],
        typical_credit_cost=2,
    ),
    "TCGA": ToolSpec(
        name="TCGA",
        category=ToolCategory.EXPRESSION_DB,
        relevant_actions=[
            ActionType.QUERY_EXPRESSION,
            ActionType.DIFFERENTIAL_EXPRESSION,
            ActionType.BIOMARKER_CORRELATION,
        ],
        description="The Cancer Genome Atlas tumor vs normal expression / mutation",
        input_types=["gene_symbol", "indication"],
        output_types=["tumor_expression", "mutation_frequency"],
        typical_credit_cost=2,
    ),
    "Human_Protein_Atlas": ToolSpec(
        name="Human_Protein_Atlas",
        category=ToolCategory.EXPRESSION_DB,
        relevant_actions=[ActionType.QUERY_EXPRESSION],
        description="Antibody-based protein expression across normal and cancer tissues",
        input_types=["gene_symbol"],
        output_types=["protein_expression", "tissue_specificity"],
    ),
    "DepMap": ToolSpec(
        name="DepMap",
        category=ToolCategory.OMICS_ANALYSIS,
        relevant_actions=[
            ActionType.CRISPR_KNOCKOUT,
            ActionType.COEXPRESSION_NETWORK,
        ],
        description="Cancer Dependency Map: genome-scale CRISPR essentiality scores",
        input_types=["gene_symbol", "cell_line_panel"],
        output_types=["essentiality_score", "synthetic_lethality"],
        typical_credit_cost=4,
    ),
    "ARCHS4": ToolSpec(
        name="ARCHS4",
        category=ToolCategory.OMICS_ANALYSIS,
        relevant_actions=[
            ActionType.COEXPRESSION_NETWORK,
            ActionType.QUERY_EXPRESSION,
        ],
        description="Massive RNA-seq compendium for coexpression and tissue queries",
        input_types=["gene_symbol"],
        output_types=["coexpression_partners"],
    ),
    "GEO": ToolSpec(
        name="GEO",
        category=ToolCategory.OMICS_ANALYSIS,
        relevant_actions=[
            ActionType.DIFFERENTIAL_EXPRESSION,
            ActionType.QUERY_EXPRESSION,
        ],
        description="Gene Expression Omnibus: curated bulk and single-cell datasets",
        input_types=["gene_symbol", "indication"],
        output_types=["de_result"],
    ),
    # ── Pathway / annotation databases ──
    "Reactome": ToolSpec(
        name="Reactome",
        category=ToolCategory.PATHWAY_DB,
        relevant_actions=[ActionType.PATHWAY_ENRICHMENT],
        description="Curated human pathway and reaction database",
        input_types=["gene_list"],
        output_types=["pathway_enrichment"],
    ),
    "KEGG": ToolSpec(
        name="KEGG",
        category=ToolCategory.PATHWAY_DB,
        relevant_actions=[ActionType.PATHWAY_ENRICHMENT],
        description="KEGG metabolic and signalling pathways",
        input_types=["gene_list"],
        output_types=["pathway_enrichment"],
    ),
    "MSigDB": ToolSpec(
        name="MSigDB",
        category=ToolCategory.PATHWAY_DB,
        relevant_actions=[ActionType.PATHWAY_ENRICHMENT],
        description="Molecular Signatures Database for GSEA",
        input_types=["ranked_gene_list"],
        output_types=["pathway_enrichment"],
    ),
    # ── Protein structure / binding-site tools ──
    "AlphaFold": ToolSpec(
        name="AlphaFold",
        category=ToolCategory.PROTEIN_STRUCTURE,
        relevant_actions=[
            ActionType.PROTEIN_STRUCTURE_LOOKUP,
            ActionType.BINDING_SITE_ANALYSIS,
        ],
        description="Predicted full-length 3D protein structures",
        input_types=["uniprot_id", "gene_symbol"],
        output_types=["pdb_structure", "plddt_confidence"],
        typical_credit_cost=3,
    ),
    "PDB": ToolSpec(
        name="PDB",
        category=ToolCategory.PROTEIN_STRUCTURE,
        relevant_actions=[ActionType.PROTEIN_STRUCTURE_LOOKUP],
        description="Experimentally determined protein structures",
        input_types=["uniprot_id"],
        output_types=["pdb_structure"],
    ),
    "UniProt": ToolSpec(
        name="UniProt",
        category=ToolCategory.PROTEIN_STRUCTURE,
        relevant_actions=[
            ActionType.PROTEIN_STRUCTURE_LOOKUP,
            ActionType.PROTEIN_INTERACTION_NETWORK,
        ],
        description="Curated protein sequence and functional annotation",
        input_types=["gene_symbol"],
        output_types=["uniprot_entry", "domain_annotation"],
    ),
    "fpocket": ToolSpec(
        name="fpocket",
        category=ToolCategory.BINDING_SITE,
        relevant_actions=[ActionType.BINDING_SITE_ANALYSIS],
        description="Geometric pocket detection on protein structures",
        input_types=["pdb_structure"],
        output_types=["pocket_list", "druggability_score"],
        requires_compute=True,
    ),
    "SiteMap": ToolSpec(
        name="SiteMap",
        category=ToolCategory.BINDING_SITE,
        relevant_actions=[ActionType.BINDING_SITE_ANALYSIS],
        description="Schrödinger binding-site detection and scoring",
        input_types=["pdb_structure"],
        output_types=["pocket_list", "site_score"],
        open_source=False,
        typical_credit_cost=3,
    ),
    # ── Druggability / chemistry ──
    "ChEMBL": ToolSpec(
        name="ChEMBL",
        category=ToolCategory.DRUGGABILITY,
        relevant_actions=[
            ActionType.DRUGGABILITY_SCREEN,
            ActionType.COMPETITOR_LANDSCAPE,
        ],
        description="Bioactivity database of drug-like molecules vs targets",
        input_types=["gene_symbol", "uniprot_id"],
        output_types=["bioactivity", "known_ligands"],
        typical_credit_cost=3,
    ),
    "DrugBank": ToolSpec(
        name="DrugBank",
        category=ToolCategory.DRUGGABILITY,
        relevant_actions=[
            ActionType.DRUGGABILITY_SCREEN,
            ActionType.COMPETITOR_LANDSCAPE,
        ],
        description="Comprehensive drug and target reference",
        input_types=["gene_symbol"],
        output_types=["approved_drugs", "drug_target_pairs"],
    ),
    "OpenTargets": ToolSpec(
        name="OpenTargets",
        category=ToolCategory.DRUGGABILITY,
        relevant_actions=[
            ActionType.DRUGGABILITY_SCREEN,
            ActionType.EVIDENCE_SYNTHESIS,
        ],
        description="Integrated target-disease evidence platform",
        input_types=["gene_symbol", "indication"],
        output_types=["target_score", "evidence_summary"],
    ),
    "canSAR": ToolSpec(
        name="canSAR",
        category=ToolCategory.DRUGGABILITY,
        relevant_actions=[ActionType.DRUGGABILITY_SCREEN],
        description="Cancer translational research and drug discovery knowledgebase",
        input_types=["gene_symbol"],
        output_types=["druggability_score", "ligandability"],
    ),
    # ── Interaction networks ──
    "STRING": ToolSpec(
        name="STRING",
        category=ToolCategory.INTERACTION_NETWORK,
        relevant_actions=[
            ActionType.PROTEIN_INTERACTION_NETWORK,
            ActionType.COEXPRESSION_NETWORK,
        ],
        description="Protein-protein interaction database with confidence scores",
        input_types=["gene_symbol"],
        output_types=["ppi_network"],
    ),
    "BioGRID": ToolSpec(
        name="BioGRID",
        category=ToolCategory.INTERACTION_NETWORK,
        relevant_actions=[ActionType.PROTEIN_INTERACTION_NETWORK],
        description="Curated genetic and protein-protein interactions",
        input_types=["gene_symbol"],
        output_types=["ppi_network", "genetic_interactions"],
    ),
    # ── Clinical & safety ──
    "ClinicalTrials_gov": ToolSpec(
        name="ClinicalTrials_gov",
        category=ToolCategory.CLINICAL_DB,
        relevant_actions=[
            ActionType.CLINICAL_TRIAL_LOOKUP,
            ActionType.COMPETITOR_LANDSCAPE,
        ],
        description="Registry of human clinical trials worldwide",
        input_types=["gene_symbol", "indication"],
        output_types=["trial_list", "phase_status"],
    ),
    "FAERS": ToolSpec(
        name="FAERS",
        category=ToolCategory.SAFETY_DB,
        relevant_actions=[ActionType.TOXICITY_PANEL],
        description="FDA Adverse Event Reporting System",
        input_types=["drug_name", "gene_symbol"],
        output_types=["adverse_events"],
    ),
    "ToxCast": ToolSpec(
        name="ToxCast",
        category=ToolCategory.SAFETY_DB,
        relevant_actions=[ActionType.TOXICITY_PANEL],
        description="EPA high-throughput toxicology assays",
        input_types=["compound", "gene_symbol"],
        output_types=["toxicity_assays"],
        typical_credit_cost=3,
    ),
    "gnomAD": ToolSpec(
        name="gnomAD",
        category=ToolCategory.PATIENT_GENOMICS,
        relevant_actions=[
            ActionType.PATIENT_STRATIFICATION,
            ActionType.OFF_TARGET_SCREEN,
        ],
        description="Population variant frequencies and constraint metrics",
        input_types=["gene_symbol"],
        output_types=["pLI_score", "loftool_score"],
    ),
    "ClinVar": ToolSpec(
        name="ClinVar",
        category=ToolCategory.PATIENT_GENOMICS,
        relevant_actions=[ActionType.PATIENT_STRATIFICATION],
        description="Clinically interpreted germline and somatic variants",
        input_types=["gene_symbol"],
        output_types=["pathogenic_variants"],
    ),
    # ── Off-target / selectivity ──
    "Eurofins_DiscoverX": ToolSpec(
        name="Eurofins_DiscoverX",
        category=ToolCategory.OFF_TARGET,
        relevant_actions=[ActionType.OFF_TARGET_SCREEN],
        description="Kinome-wide selectivity profiling panels",
        input_types=["compound"],
        output_types=["kinase_selectivity"],
        open_source=False,
        typical_credit_cost=3,
    ),
    "SafetyPanel": ToolSpec(
        name="SafetyPanel",
        category=ToolCategory.OFF_TARGET,
        relevant_actions=[
            ActionType.OFF_TARGET_SCREEN,
            ActionType.TOXICITY_PANEL,
        ],
        description="Standard secondary pharmacology / off-target assay panel",
        input_types=["compound"],
        output_types=["off_target_hits"],
        typical_credit_cost=3,
    ),
    # ── Literature ──
    "PubMed": ToolSpec(
        name="PubMed",
        category=ToolCategory.LITERATURE,
        relevant_actions=[
            ActionType.LITERATURE_SEARCH,
            ActionType.EVIDENCE_SYNTHESIS,
        ],
        description="Biomedical literature database",
        input_types=["query"],
        output_types=["abstract_list"],
        typical_credit_cost=1,
    ),
    "Europe_PMC": ToolSpec(
        name="Europe_PMC",
        category=ToolCategory.LITERATURE,
        relevant_actions=[ActionType.LITERATURE_SEARCH],
        description="Open biomedical literature search with full-text mining",
        input_types=["query"],
        output_types=["abstract_list", "fulltext_excerpts"],
    ),
    # ── Experimental wet-lab ──
    "InVitroPanel": ToolSpec(
        name="InVitroPanel",
        category=ToolCategory.IN_VITRO,
        relevant_actions=[
            ActionType.IN_VITRO_ASSAY,
            ActionType.BIOMARKER_CORRELATION,
        ],
        description="Cell-line viability / IC50 panel against the proposed target",
        input_types=["compound", "cell_line_panel"],
        output_types=["IC50", "selectivity_window"],
        typical_runtime_hours=72.0,
        typical_credit_cost=5,
        requires_compute=False,
    ),
    "MouseModel": ToolSpec(
        name="MouseModel",
        category=ToolCategory.IN_VIVO,
        relevant_actions=[ActionType.IN_VIVO_MODEL],
        description="In-vivo efficacy + tolerability in disease-relevant mouse models",
        input_types=["compound", "indication"],
        output_types=["efficacy_endpoint", "tolerability", "PK_PD"],
        typical_runtime_hours=720.0,
        typical_credit_cost=8,
    ),
    "CRISPR_screen": ToolSpec(
        name="CRISPR_screen",
        category=ToolCategory.CRISPR,
        relevant_actions=[ActionType.CRISPR_KNOCKOUT],
        description="Genome- or focused-library CRISPR knockout / dependency screen",
        input_types=["gene_symbol", "cell_line_panel"],
        output_types=["essentiality_score", "synthetic_lethality"],
        typical_credit_cost=4,
    ),
    "BiomarkerPanel": ToolSpec(
        name="BiomarkerPanel",
        category=ToolCategory.BIOMARKER,
        relevant_actions=[
            ActionType.BIOMARKER_CORRELATION,
            ActionType.PATIENT_STRATIFICATION,
        ],
        description="Patient-derived biomarker correlation with target activity",
        input_types=["gene_symbol", "patient_cohort"],
        output_types=["biomarker_correlation"],
        typical_credit_cost=3,
    ),
}


# ── Registry helper functions ──────────────────────────────────────────────


def tools_by_category(category: ToolCategory) -> List[ToolSpec]:
    """Return all registered tools in a given category."""
    return [t for t in TOOL_REGISTRY.values() if t.category == category]


def tools_for_action(action_type: ActionType) -> List[ToolSpec]:
    """Return all registered tools that are relevant for a given action type."""
    return [t for t in TOOL_REGISTRY.values() if action_type in t.relevant_actions]


# ── Action schema ───────────────────────────────────────────────────────────


class DrugTargetAction(Action):
    """Structured action for one drug-target-validation step.

    Hybrid representation: a discrete ``action_type`` plus typed
    ``parameters``, an optional free-text ``reasoning`` string, and the
    terminal-only ``final_decision`` / ``confidence`` fields used when the
    agent submits its validation report.
    """

    action_type: ActionType = Field(
        ...,
        description=(
            "Discrete simulator step type. Each action type maps to a "
            "specific class of pharma / bioinformatics query, in-vitro / "
            "in-vivo experiment, or terminal report submission."
        ),
    )
    parameters: Dict[str, Any] = Field(
        default_factory=dict,
        description=(
            "Action-specific arguments such as the database to query, the "
            "compound to profile, or include_allosteric flags. Use only "
            "parameters that materially change the simulated output."
        ),
    )
    reasoning: str = Field(
        "",
        description=(
            "Short scientific rationale explaining why this is the right "
            "next step in the current investigation."
        ),
    )
    final_decision: Optional[str] = Field(
        None,
        description=(
            "'go' or 'no_go' recommendation. Only set on a "
            "SUBMIT_VALIDATION_REPORT action."
        ),
    )
    confidence: Optional[float] = Field(
        None,
        ge=0.0,
        le=1.0,
        description=(
            "Calibrated confidence in the final decision in [0, 1]. Only "
            "set on a SUBMIT_VALIDATION_REPORT action."
        ),
    )


# ── Intermediate outputs ────────────────────────────────────────────────────


class OutputType(str, Enum):
    EXPRESSION_RESULT = "expression_result"
    DE_RESULT = "de_result"
    PATHWAY_RESULT = "pathway_result"
    COEXPRESSION_RESULT = "coexpression_result"
    STRUCTURE_RESULT = "structure_result"
    BINDING_SITE_RESULT = "binding_site_result"
    INTERACTION_RESULT = "interaction_result"
    DRUGGABILITY_RESULT = "druggability_result"
    CLINICAL_RESULT = "clinical_result"
    TOXICITY_RESULT = "toxicity_result"
    OFF_TARGET_RESULT = "off_target_result"
    PATIENT_STRATIFICATION_RESULT = "patient_stratification_result"
    LITERATURE_RESULT = "literature_result"
    EVIDENCE_SYNTHESIS_RESULT = "evidence_synthesis_result"
    COMPETITOR_LANDSCAPE_RESULT = "competitor_landscape_result"
    IN_VITRO_RESULT = "in_vitro_result"
    IN_VIVO_RESULT = "in_vivo_result"
    CRISPR_RESULT = "crispr_result"
    BIOMARKER_RESULT = "biomarker_result"
    RED_FLAG_NOTE = "red_flag_note"
    EXPERT_REVIEW = "expert_review"
    VALIDATION_REPORT = "validation_report"
    FAILURE_REPORT = "failure_report"


class IntermediateOutput(BaseModel):
    """A single simulated output from one validation step."""

    output_type: OutputType
    step_index: int
    success: bool = True
    quality_score: float = Field(1.0, ge=0.0, le=1.0)
    summary: str = ""
    data: Dict[str, Any] = Field(default_factory=dict)
    uncertainty: float = Field(0.0, ge=0.0, le=1.0)
    warnings: List[str] = Field(default_factory=list)
    artifacts_available: List[str] = Field(default_factory=list)


# ── Observable state components ─────────────────────────────────────────────


class CreditUsage(BaseModel):
    """Agent-visible view of the experimental credit budget."""

    credits_used: int = 0
    credits_remaining: int = 50
    credits_total: int = 50


class ValidationStepRecord(BaseModel):
    """One row of the agent's pipeline history."""

    step_index: int
    action_type: ActionType
    parameters: Dict[str, Any] = Field(default_factory=dict)
    output_summary: str = ""
    output_type: OutputType
    success: bool = True
    quality_score: float = 1.0
    credit_cost: int = 0


class EvidenceDossier(BaseModel):
    """Structured running dossier of everything the agent has discovered.

    Maintained on the environment side and surfaced verbatim inside each
    ``ValidationObservation``. It is the primary state the agent should
    consult when deciding what to investigate next.
    """

    expression_findings: Dict[str, Any] = Field(default_factory=dict)
    protein_findings: Dict[str, Any] = Field(default_factory=dict)
    clinical_findings: Dict[str, Any] = Field(default_factory=dict)
    safety_findings: Dict[str, Any] = Field(default_factory=dict)
    literature_findings: Dict[str, Any] = Field(default_factory=dict)
    experimental_results: List[Dict[str, Any]] = Field(default_factory=list)
    flagged_red_flags: List[str] = Field(default_factory=list)
    credits_used: int = 0


class ValidationTaskSpec(BaseModel):
    """Specification of the drug-target-validation problem to solve."""

    problem_statement: str = "Unspecified drug target validation problem"
    target_gene: str = "UNKNOWN"
    disease_context: str = "unspecified disease"
    indication: str = "unspecified indication"
    credits_limit: int = 50
    success_criteria: List[str] = Field(default_factory=list)
    prior_observations: List[str] = Field(default_factory=list)
    available_actions: List[str] = Field(
        default_factory=lambda: [a.value for a in ActionType],
    )
    expected_findings: List[Any] = Field(default_factory=list)
    dataset_metadata: Dict[str, Any] = Field(default_factory=dict)


# ── Observation schema ──────────────────────────────────────────────────────


class ValidationObservation(Observation):
    """Full observable state returned to the agent at each timestep.

    Deliberately excludes the hidden ``TargetProfile``, which the agent
    must infer through investigation.
    """

    target_gene: str = "UNKNOWN"
    disease_context: str = "unspecified disease"
    indication: str = "unspecified indication"
    credits_remaining: int = 50
    credits_total: int = 50
    dossier: EvidenceDossier = Field(default_factory=EvidenceDossier)
    pipeline_history: List[Dict[str, Any]] = Field(default_factory=list)
    available_actions: List[str] = Field(default_factory=list)
    step_index: int = 0
    done: bool = False
    reward: float = 0.0
    step_reward_breakdown: Dict[str, float] = Field(default_factory=dict)
    rule_violations: List[str] = Field(default_factory=list)
    latest_output: Optional[IntermediateOutput] = None
    metadata: Dict[str, Any] = Field(default_factory=dict)


# ── Agent prompt scaffolding ────────────────────────────────────────────────


AGENT_ACTION_GUIDANCE: Dict[ActionType, str] = {
    ActionType.QUERY_EXPRESSION: (
        "Cheap expression lookup across normal and disease tissues. Run "
        "early to gauge tissue specificity and disease over-expression."
    ),
    ActionType.DIFFERENTIAL_EXPRESSION: (
        "Disease-vs-normal differential expression. Useful to confirm "
        "disease-driven dysregulation of the target."
    ),
    ActionType.PATHWAY_ENRICHMENT: (
        "Find pathways the target participates in. Best after expression / "
        "DE so you have an informative gene context."
    ),
    ActionType.COEXPRESSION_NETWORK: (
        "Identify functionally related genes. Useful for mechanism "
        "hypotheses and synthetic-lethality candidates."
    ),
    ActionType.PROTEIN_STRUCTURE_LOOKUP: (
        "Pull experimental or AlphaFold structures of the target."
    ),
    ActionType.BINDING_SITE_ANALYSIS: (
        "Detect ligandable pockets. Pass include_allosteric=true for "
        "non-classical sites."
    ),
    ActionType.PROTEIN_INTERACTION_NETWORK: (
        "Map first-degree PPI partners. Useful for off-target reasoning."
    ),
    ActionType.DRUGGABILITY_SCREEN: (
        "High-level druggability assessment. Critical for any go/no_go."
    ),
    ActionType.CLINICAL_TRIAL_LOOKUP: (
        "Look up clinical precedent for this target / indication. Often "
        "decisive for borderline scenarios."
    ),
    ActionType.TOXICITY_PANEL: (
        "Probe target-mediated toxicity. Best after expression so on-target "
        "tissue toxicity can be interpreted."
    ),
    ActionType.OFF_TARGET_SCREEN: (
        "Quantify off-target / paralog selectivity. Always run when "
        "selectivity is plausibly limiting."
    ),
    ActionType.PATIENT_STRATIFICATION: (
        "Identify responder subpopulations and biomarker hypotheses."
    ),
    ActionType.LITERATURE_SEARCH: (
        "Cheap PubMed / Europe-PMC scan. Cheap to run and often surfaces "
        "recent precedent that overrides historical priors."
    ),
    ActionType.EVIDENCE_SYNTHESIS: (
        "Aggregate prior findings into a coherent picture. Best run after "
        "several queries have populated the dossier."
    ),
    ActionType.COMPETITOR_LANDSCAPE: (
        "Survey other programs against the same target. Useful for "
        "differentiation strategy."
    ),
    ActionType.IN_VITRO_ASSAY: (
        "Expensive cell-line assay (5 credits). Run after computational "
        "evidence justifies wet-lab spend."
    ),
    ActionType.IN_VIVO_MODEL: (
        "Most expensive action (8 credits). Should only follow positive "
        "in-vitro signal."
    ),
    ActionType.CRISPR_KNOCKOUT: (
        "Functional knockout / dependency check (4 credits)."
    ),
    ActionType.BIOMARKER_CORRELATION: (
        "Correlate target activity with patient biomarkers (3 credits)."
    ),
    ActionType.FLAG_RED_FLAG: (
        "Free annotation that records a concern in the dossier without "
        "spending credits."
    ),
    ActionType.REQUEST_EXPERT_REVIEW: (
        "Lightweight critique by a simulated reviewer. Use sparingly."
    ),
    ActionType.SUBMIT_VALIDATION_REPORT: (
        "Terminal action. Must include final_decision ('go' / 'no_go') and "
        "a calibrated confidence score; the episode ends immediately."
    ),
}


AGENT_ENVIRONMENT_RULES: List[str] = [
    (
        "You start with a fixed pool of experimental credits; every action "
        "deducts a known credit cost and credit-exhaustion ends the episode."
    ),
    (
        "Each successful action returns concrete pharma evidence, so "
        "repeated queries of the same type are usually wasteful."
    ),
    (
        "Some prerequisites apply: e.g. interpret toxicity in light of "
        "expression, and run in-vitro work before in-vivo."
    ),
    (
        "Always finish the episode by submitting a calibrated "
        "submit_validation_report — exhausting credits without a report "
        "yields the worst possible reward."
    ),
]


_TOOL_CATEGORY_AGENT_NOTES: Dict[ToolCategory, str] = {
    ToolCategory.EXPRESSION_DB: (
        "Use early to characterise expression in normal vs disease tissue."
    ),
    ToolCategory.OMICS_ANALYSIS: (
        "Use to mine bulk / single-cell expression compendia for context."
    ),
    ToolCategory.PATHWAY_DB: (
        "Use after gathering a gene list for enrichment / mechanism."
    ),
    ToolCategory.PROTEIN_STRUCTURE: (
        "Use when reasoning about binding pockets or structure-based design."
    ),
    ToolCategory.BINDING_SITE: (
        "Use to score pocket druggability and detect allosteric sites."
    ),
    ToolCategory.INTERACTION_NETWORK: (
        "Use to reason about partners, paralogs, and pathway context."
    ),
    ToolCategory.DRUGGABILITY: (
        "Use to assess overall ligandability and known chemical matter."
    ),
    ToolCategory.CLINICAL_DB: (
        "Use to gather clinical precedent and competitor activity."
    ),
    ToolCategory.SAFETY_DB: (
        "Use after expression / off-target queries to interpret risk."
    ),
    ToolCategory.OFF_TARGET: (
        "Use whenever paralogs or kinase selectivity could limit the program."
    ),
    ToolCategory.LITERATURE: (
        "Cheap and often decisive — recent literature can flip historical "
        "priors."
    ),
    ToolCategory.PATIENT_GENOMICS: (
        "Use for stratification and human genetics-based de-risking."
    ),
    ToolCategory.IN_VITRO: (
        "Expensive; run only after computational evidence justifies it."
    ),
    ToolCategory.IN_VIVO: (
        "Most expensive; only run after in-vitro / target-engagement data."
    ),
    ToolCategory.CRISPR: (
        "Use to test functional dependency or synthetic lethality."
    ),
    ToolCategory.BIOMARKER: (
        "Use to correlate target activity with patient-level biomarkers."
    ),
}


def describe_tool_for_agent(tool_name: str) -> str:
    """Return a compact environment-aware tool description for prompts."""
    tool = TOOL_REGISTRY.get(tool_name)
    if tool is None:
        return tool_name

    parts = [f"{tool.name}: {tool.description}."]
    if tool.input_types or tool.output_types:
        inputs = ", ".join(tool.input_types) or "context"
        outputs = ", ".join(tool.output_types) or "evidence"
        parts.append(f"Consumes {inputs}; yields {outputs}.")

    category_note = _TOOL_CATEGORY_AGENT_NOTES.get(tool.category)
    if category_note:
        parts.append(category_note)

    if tool.relevant_actions:
        action_names = ", ".join(a.value for a in tool.relevant_actions[:3])
        parts.append(f"Relevant for: {action_names}.")

    if tool.typical_credit_cost > 0:
        parts.append(f"Approx cost: {tool.typical_credit_cost} credits.")

    return " ".join(parts)


def build_agent_system_prompt() -> str:
    """Build the shared agent system prompt for training and inference."""
    lines = [
        "You are a computational drug discovery scientist evaluating a "
        "proposed drug target.",
        "",
        "Each turn, you observe the running evidence dossier and remaining "
        "credits, and you must pick the next investigation step. Your goal "
        "is to gather sufficient evidence to submit a calibrated go / no_go "
        "validation report before credits run out.",
        "",
        "Environment-specific reasoning rules:",
    ]
    lines.extend(f"  - {rule}" for rule in AGENT_ENVIRONMENT_RULES)
    lines.append("")
    lines.append("Action guidance:")
    lines.extend(
        f"  - {action_type.value}: {AGENT_ACTION_GUIDANCE[action_type]}"
        for action_type in ActionType
    )
    lines.extend([
        "",
        "Respond with ONLY valid JSON, nothing else:",
        '{"action_type": "...", "parameters": {}, "reasoning": "..."}',
        "",
        "When you submit the final report, use this exact shape:",
        '{"action_type": "submit_validation_report", "parameters": {}, '
        '"reasoning": "...", "final_decision": "go", "confidence": 0.8}',
    ])
    return "\n".join(lines)


def build_agent_observation_context(
    obs: ValidationObservation,
    *,
    max_tools: int = 6,
) -> str:
    """Summarize action / tool context for the agent's prompt."""
    sections: List[str] = []

    sections.append(
        f"Target: {obs.target_gene} | Indication: {obs.indication} | "
        f"Disease: {obs.disease_context}"
    )
    sections.append(
        f"Credits: {obs.credits_remaining}/{obs.credits_total} remaining"
    )

    by_category: Dict[ToolCategory, List[ToolSpec]] = {}
    for tool in TOOL_REGISTRY.values():
        by_category.setdefault(tool.category, []).append(tool)

    sections.append("Representative tools available (already filtered):")
    shown = 0
    for category, tools in by_category.items():
        if shown >= max_tools:
            break
        first = tools[0]
        sections.append(f"  - {describe_tool_for_agent(first.name)}")
        shown += 1

    return "\n".join(sections)