""" Data models for the Drug Target Validation RL Environment. Defines the POMDP action and observation contracts for an agent that acts as a computational pharma scientist. Given a proposed drug target and a disease context, the agent issues bioinformatics / clinical / experimental queries one at a time and finally submits a go / no-go validation report. """ from __future__ import annotations from enum import Enum from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field from openenv.core.env_server.types import Action, Observation # ── Action vocabulary ─────────────────────────────────────────────────────── class ActionType(str, Enum): # Expression & Omics QUERY_EXPRESSION = "query_expression" DIFFERENTIAL_EXPRESSION = "differential_expression" PATHWAY_ENRICHMENT = "pathway_enrichment" COEXPRESSION_NETWORK = "coexpression_network" # Protein & Structure PROTEIN_STRUCTURE_LOOKUP = "protein_structure_lookup" BINDING_SITE_ANALYSIS = "binding_site_analysis" PROTEIN_INTERACTION_NETWORK = "protein_interaction_network" DRUGGABILITY_SCREEN = "druggability_screen" # Clinical & Safety CLINICAL_TRIAL_LOOKUP = "clinical_trial_lookup" TOXICITY_PANEL = "toxicity_panel" OFF_TARGET_SCREEN = "off_target_screen" PATIENT_STRATIFICATION = "patient_stratification" # Literature & Evidence LITERATURE_SEARCH = "literature_search" EVIDENCE_SYNTHESIS = "evidence_synthesis" COMPETITOR_LANDSCAPE = "competitor_landscape" # Experimental (expensive, consume more credits) IN_VITRO_ASSAY = "in_vitro_assay" IN_VIVO_MODEL = "in_vivo_model" CRISPR_KNOCKOUT = "crispr_knockout" BIOMARKER_CORRELATION = "biomarker_correlation" # Meta FLAG_RED_FLAG = "flag_red_flag" REQUEST_EXPERT_REVIEW = "request_expert_review" SUBMIT_VALIDATION_REPORT = "submit_validation_report" # terminal action OMICS_ACTIONS = frozenset({ ActionType.QUERY_EXPRESSION, ActionType.DIFFERENTIAL_EXPRESSION, ActionType.PATHWAY_ENRICHMENT, ActionType.COEXPRESSION_NETWORK, }) PROTEIN_ACTIONS = frozenset({ ActionType.PROTEIN_STRUCTURE_LOOKUP, ActionType.BINDING_SITE_ANALYSIS, ActionType.PROTEIN_INTERACTION_NETWORK, ActionType.DRUGGABILITY_SCREEN, }) CLINICAL_ACTIONS = frozenset({ ActionType.CLINICAL_TRIAL_LOOKUP, ActionType.TOXICITY_PANEL, ActionType.OFF_TARGET_SCREEN, ActionType.PATIENT_STRATIFICATION, }) LITERATURE_ACTIONS = frozenset({ ActionType.LITERATURE_SEARCH, ActionType.EVIDENCE_SYNTHESIS, ActionType.COMPETITOR_LANDSCAPE, }) EXPERIMENTAL_ACTIONS = frozenset({ ActionType.IN_VITRO_ASSAY, ActionType.IN_VIVO_MODEL, ActionType.CRISPR_KNOCKOUT, ActionType.BIOMARKER_CORRELATION, }) META_ACTIONS = frozenset({ ActionType.FLAG_RED_FLAG, ActionType.REQUEST_EXPERT_REVIEW, ActionType.SUBMIT_VALIDATION_REPORT, }) # ── Tool registry (pharma / bioinformatics) ───────────────────────────────── class ToolCategory(str, Enum): EXPRESSION_DB = "expression_db" OMICS_ANALYSIS = "omics_analysis" PATHWAY_DB = "pathway_db" PROTEIN_STRUCTURE = "protein_structure" BINDING_SITE = "binding_site" INTERACTION_NETWORK = "interaction_network" DRUGGABILITY = "druggability" CLINICAL_DB = "clinical_db" SAFETY_DB = "safety_db" OFF_TARGET = "off_target" LITERATURE = "literature" PATIENT_GENOMICS = "patient_genomics" IN_VITRO = "in_vitro" IN_VIVO = "in_vivo" CRISPR = "crispr" BIOMARKER = "biomarker" class ToolSpec(BaseModel): """Registry entry describing a pharma / bioinformatics tool or database.""" name: str category: ToolCategory relevant_actions: List[ActionType] = Field(default_factory=list) description: str = "" input_types: List[str] = Field(default_factory=list) output_types: List[str] = Field(default_factory=list) typical_runtime_hours: float = 0.1 typical_credit_cost: int = 1 requires_compute: bool = False open_source: bool = True TOOL_REGISTRY: Dict[str, ToolSpec] = { # ── Expression & omics databases ── "GTEx": ToolSpec( name="GTEx", category=ToolCategory.EXPRESSION_DB, relevant_actions=[ActionType.QUERY_EXPRESSION], description="Tissue-level expression atlas across normal human tissues", input_types=["gene_symbol"], output_types=["tissue_expression"], typical_credit_cost=2, ), "TCGA": ToolSpec( name="TCGA", category=ToolCategory.EXPRESSION_DB, relevant_actions=[ ActionType.QUERY_EXPRESSION, ActionType.DIFFERENTIAL_EXPRESSION, ActionType.BIOMARKER_CORRELATION, ], description="The Cancer Genome Atlas tumor vs normal expression / mutation", input_types=["gene_symbol", "indication"], output_types=["tumor_expression", "mutation_frequency"], typical_credit_cost=2, ), "Human_Protein_Atlas": ToolSpec( name="Human_Protein_Atlas", category=ToolCategory.EXPRESSION_DB, relevant_actions=[ActionType.QUERY_EXPRESSION], description="Antibody-based protein expression across normal and cancer tissues", input_types=["gene_symbol"], output_types=["protein_expression", "tissue_specificity"], ), "DepMap": ToolSpec( name="DepMap", category=ToolCategory.OMICS_ANALYSIS, relevant_actions=[ ActionType.CRISPR_KNOCKOUT, ActionType.COEXPRESSION_NETWORK, ], description="Cancer Dependency Map: genome-scale CRISPR essentiality scores", input_types=["gene_symbol", "cell_line_panel"], output_types=["essentiality_score", "synthetic_lethality"], typical_credit_cost=4, ), "ARCHS4": ToolSpec( name="ARCHS4", category=ToolCategory.OMICS_ANALYSIS, relevant_actions=[ ActionType.COEXPRESSION_NETWORK, ActionType.QUERY_EXPRESSION, ], description="Massive RNA-seq compendium for coexpression and tissue queries", input_types=["gene_symbol"], output_types=["coexpression_partners"], ), "GEO": ToolSpec( name="GEO", category=ToolCategory.OMICS_ANALYSIS, relevant_actions=[ ActionType.DIFFERENTIAL_EXPRESSION, ActionType.QUERY_EXPRESSION, ], description="Gene Expression Omnibus: curated bulk and single-cell datasets", input_types=["gene_symbol", "indication"], output_types=["de_result"], ), # ── Pathway / annotation databases ── "Reactome": ToolSpec( name="Reactome", category=ToolCategory.PATHWAY_DB, relevant_actions=[ActionType.PATHWAY_ENRICHMENT], description="Curated human pathway and reaction database", input_types=["gene_list"], output_types=["pathway_enrichment"], ), "KEGG": ToolSpec( name="KEGG", category=ToolCategory.PATHWAY_DB, relevant_actions=[ActionType.PATHWAY_ENRICHMENT], description="KEGG metabolic and signalling pathways", input_types=["gene_list"], output_types=["pathway_enrichment"], ), "MSigDB": ToolSpec( name="MSigDB", category=ToolCategory.PATHWAY_DB, relevant_actions=[ActionType.PATHWAY_ENRICHMENT], description="Molecular Signatures Database for GSEA", input_types=["ranked_gene_list"], output_types=["pathway_enrichment"], ), # ── Protein structure / binding-site tools ── "AlphaFold": ToolSpec( name="AlphaFold", category=ToolCategory.PROTEIN_STRUCTURE, relevant_actions=[ ActionType.PROTEIN_STRUCTURE_LOOKUP, ActionType.BINDING_SITE_ANALYSIS, ], description="Predicted full-length 3D protein structures", input_types=["uniprot_id", "gene_symbol"], output_types=["pdb_structure", "plddt_confidence"], typical_credit_cost=3, ), "PDB": ToolSpec( name="PDB", category=ToolCategory.PROTEIN_STRUCTURE, relevant_actions=[ActionType.PROTEIN_STRUCTURE_LOOKUP], description="Experimentally determined protein structures", input_types=["uniprot_id"], output_types=["pdb_structure"], ), "UniProt": ToolSpec( name="UniProt", category=ToolCategory.PROTEIN_STRUCTURE, relevant_actions=[ ActionType.PROTEIN_STRUCTURE_LOOKUP, ActionType.PROTEIN_INTERACTION_NETWORK, ], description="Curated protein sequence and functional annotation", input_types=["gene_symbol"], output_types=["uniprot_entry", "domain_annotation"], ), "fpocket": ToolSpec( name="fpocket", category=ToolCategory.BINDING_SITE, relevant_actions=[ActionType.BINDING_SITE_ANALYSIS], description="Geometric pocket detection on protein structures", input_types=["pdb_structure"], output_types=["pocket_list", "druggability_score"], requires_compute=True, ), "SiteMap": ToolSpec( name="SiteMap", category=ToolCategory.BINDING_SITE, relevant_actions=[ActionType.BINDING_SITE_ANALYSIS], description="Schrödinger binding-site detection and scoring", input_types=["pdb_structure"], output_types=["pocket_list", "site_score"], open_source=False, typical_credit_cost=3, ), # ── Druggability / chemistry ── "ChEMBL": ToolSpec( name="ChEMBL", category=ToolCategory.DRUGGABILITY, relevant_actions=[ ActionType.DRUGGABILITY_SCREEN, ActionType.COMPETITOR_LANDSCAPE, ], description="Bioactivity database of drug-like molecules vs targets", input_types=["gene_symbol", "uniprot_id"], output_types=["bioactivity", "known_ligands"], typical_credit_cost=3, ), "DrugBank": ToolSpec( name="DrugBank", category=ToolCategory.DRUGGABILITY, relevant_actions=[ ActionType.DRUGGABILITY_SCREEN, ActionType.COMPETITOR_LANDSCAPE, ], description="Comprehensive drug and target reference", input_types=["gene_symbol"], output_types=["approved_drugs", "drug_target_pairs"], ), "OpenTargets": ToolSpec( name="OpenTargets", category=ToolCategory.DRUGGABILITY, relevant_actions=[ ActionType.DRUGGABILITY_SCREEN, ActionType.EVIDENCE_SYNTHESIS, ], description="Integrated target-disease evidence platform", input_types=["gene_symbol", "indication"], output_types=["target_score", "evidence_summary"], ), "canSAR": ToolSpec( name="canSAR", category=ToolCategory.DRUGGABILITY, relevant_actions=[ActionType.DRUGGABILITY_SCREEN], description="Cancer translational research and drug discovery knowledgebase", input_types=["gene_symbol"], output_types=["druggability_score", "ligandability"], ), # ── Interaction networks ── "STRING": ToolSpec( name="STRING", category=ToolCategory.INTERACTION_NETWORK, relevant_actions=[ ActionType.PROTEIN_INTERACTION_NETWORK, ActionType.COEXPRESSION_NETWORK, ], description="Protein-protein interaction database with confidence scores", input_types=["gene_symbol"], output_types=["ppi_network"], ), "BioGRID": ToolSpec( name="BioGRID", category=ToolCategory.INTERACTION_NETWORK, relevant_actions=[ActionType.PROTEIN_INTERACTION_NETWORK], description="Curated genetic and protein-protein interactions", input_types=["gene_symbol"], output_types=["ppi_network", "genetic_interactions"], ), # ── Clinical & safety ── "ClinicalTrials_gov": ToolSpec( name="ClinicalTrials_gov", category=ToolCategory.CLINICAL_DB, relevant_actions=[ ActionType.CLINICAL_TRIAL_LOOKUP, ActionType.COMPETITOR_LANDSCAPE, ], description="Registry of human clinical trials worldwide", input_types=["gene_symbol", "indication"], output_types=["trial_list", "phase_status"], ), "FAERS": ToolSpec( name="FAERS", category=ToolCategory.SAFETY_DB, relevant_actions=[ActionType.TOXICITY_PANEL], description="FDA Adverse Event Reporting System", input_types=["drug_name", "gene_symbol"], output_types=["adverse_events"], ), "ToxCast": ToolSpec( name="ToxCast", category=ToolCategory.SAFETY_DB, relevant_actions=[ActionType.TOXICITY_PANEL], description="EPA high-throughput toxicology assays", input_types=["compound", "gene_symbol"], output_types=["toxicity_assays"], typical_credit_cost=3, ), "gnomAD": ToolSpec( name="gnomAD", category=ToolCategory.PATIENT_GENOMICS, relevant_actions=[ ActionType.PATIENT_STRATIFICATION, ActionType.OFF_TARGET_SCREEN, ], description="Population variant frequencies and constraint metrics", input_types=["gene_symbol"], output_types=["pLI_score", "loftool_score"], ), "ClinVar": ToolSpec( name="ClinVar", category=ToolCategory.PATIENT_GENOMICS, relevant_actions=[ActionType.PATIENT_STRATIFICATION], description="Clinically interpreted germline and somatic variants", input_types=["gene_symbol"], output_types=["pathogenic_variants"], ), # ── Off-target / selectivity ── "Eurofins_DiscoverX": ToolSpec( name="Eurofins_DiscoverX", category=ToolCategory.OFF_TARGET, relevant_actions=[ActionType.OFF_TARGET_SCREEN], description="Kinome-wide selectivity profiling panels", input_types=["compound"], output_types=["kinase_selectivity"], open_source=False, typical_credit_cost=3, ), "SafetyPanel": ToolSpec( name="SafetyPanel", category=ToolCategory.OFF_TARGET, relevant_actions=[ ActionType.OFF_TARGET_SCREEN, ActionType.TOXICITY_PANEL, ], description="Standard secondary pharmacology / off-target assay panel", input_types=["compound"], output_types=["off_target_hits"], typical_credit_cost=3, ), # ── Literature ── "PubMed": ToolSpec( name="PubMed", category=ToolCategory.LITERATURE, relevant_actions=[ ActionType.LITERATURE_SEARCH, ActionType.EVIDENCE_SYNTHESIS, ], description="Biomedical literature database", input_types=["query"], output_types=["abstract_list"], typical_credit_cost=1, ), "Europe_PMC": ToolSpec( name="Europe_PMC", category=ToolCategory.LITERATURE, relevant_actions=[ActionType.LITERATURE_SEARCH], description="Open biomedical literature search with full-text mining", input_types=["query"], output_types=["abstract_list", "fulltext_excerpts"], ), # ── Experimental wet-lab ── "InVitroPanel": ToolSpec( name="InVitroPanel", category=ToolCategory.IN_VITRO, relevant_actions=[ ActionType.IN_VITRO_ASSAY, ActionType.BIOMARKER_CORRELATION, ], description="Cell-line viability / IC50 panel against the proposed target", input_types=["compound", "cell_line_panel"], output_types=["IC50", "selectivity_window"], typical_runtime_hours=72.0, typical_credit_cost=5, requires_compute=False, ), "MouseModel": ToolSpec( name="MouseModel", category=ToolCategory.IN_VIVO, relevant_actions=[ActionType.IN_VIVO_MODEL], description="In-vivo efficacy + tolerability in disease-relevant mouse models", input_types=["compound", "indication"], output_types=["efficacy_endpoint", "tolerability", "PK_PD"], typical_runtime_hours=720.0, typical_credit_cost=8, ), "CRISPR_screen": ToolSpec( name="CRISPR_screen", category=ToolCategory.CRISPR, relevant_actions=[ActionType.CRISPR_KNOCKOUT], description="Genome- or focused-library CRISPR knockout / dependency screen", input_types=["gene_symbol", "cell_line_panel"], output_types=["essentiality_score", "synthetic_lethality"], typical_credit_cost=4, ), "BiomarkerPanel": ToolSpec( name="BiomarkerPanel", category=ToolCategory.BIOMARKER, relevant_actions=[ ActionType.BIOMARKER_CORRELATION, ActionType.PATIENT_STRATIFICATION, ], description="Patient-derived biomarker correlation with target activity", input_types=["gene_symbol", "patient_cohort"], output_types=["biomarker_correlation"], typical_credit_cost=3, ), } # ── Registry helper functions ────────────────────────────────────────────── def tools_by_category(category: ToolCategory) -> List[ToolSpec]: """Return all registered tools in a given category.""" return [t for t in TOOL_REGISTRY.values() if t.category == category] def tools_for_action(action_type: ActionType) -> List[ToolSpec]: """Return all registered tools that are relevant for a given action type.""" return [t for t in TOOL_REGISTRY.values() if action_type in t.relevant_actions] # ── Action schema ─────────────────────────────────────────────────────────── class DrugTargetAction(Action): """Structured action for one drug-target-validation step. Hybrid representation: a discrete ``action_type`` plus typed ``parameters``, an optional free-text ``reasoning`` string, and the terminal-only ``final_decision`` / ``confidence`` fields used when the agent submits its validation report. """ action_type: ActionType = Field( ..., description=( "Discrete simulator step type. Each action type maps to a " "specific class of pharma / bioinformatics query, in-vitro / " "in-vivo experiment, or terminal report submission." ), ) parameters: Dict[str, Any] = Field( default_factory=dict, description=( "Action-specific arguments such as the database to query, the " "compound to profile, or include_allosteric flags. Use only " "parameters that materially change the simulated output." ), ) reasoning: str = Field( "", description=( "Short scientific rationale explaining why this is the right " "next step in the current investigation." ), ) final_decision: Optional[str] = Field( None, description=( "'go' or 'no_go' recommendation. Only set on a " "SUBMIT_VALIDATION_REPORT action." ), ) confidence: Optional[float] = Field( None, ge=0.0, le=1.0, description=( "Calibrated confidence in the final decision in [0, 1]. Only " "set on a SUBMIT_VALIDATION_REPORT action." ), ) # ── Intermediate outputs ──────────────────────────────────────────────────── class OutputType(str, Enum): EXPRESSION_RESULT = "expression_result" DE_RESULT = "de_result" PATHWAY_RESULT = "pathway_result" COEXPRESSION_RESULT = "coexpression_result" STRUCTURE_RESULT = "structure_result" BINDING_SITE_RESULT = "binding_site_result" INTERACTION_RESULT = "interaction_result" DRUGGABILITY_RESULT = "druggability_result" CLINICAL_RESULT = "clinical_result" TOXICITY_RESULT = "toxicity_result" OFF_TARGET_RESULT = "off_target_result" PATIENT_STRATIFICATION_RESULT = "patient_stratification_result" LITERATURE_RESULT = "literature_result" EVIDENCE_SYNTHESIS_RESULT = "evidence_synthesis_result" COMPETITOR_LANDSCAPE_RESULT = "competitor_landscape_result" IN_VITRO_RESULT = "in_vitro_result" IN_VIVO_RESULT = "in_vivo_result" CRISPR_RESULT = "crispr_result" BIOMARKER_RESULT = "biomarker_result" RED_FLAG_NOTE = "red_flag_note" EXPERT_REVIEW = "expert_review" VALIDATION_REPORT = "validation_report" FAILURE_REPORT = "failure_report" class IntermediateOutput(BaseModel): """A single simulated output from one validation step.""" output_type: OutputType step_index: int success: bool = True quality_score: float = Field(1.0, ge=0.0, le=1.0) summary: str = "" data: Dict[str, Any] = Field(default_factory=dict) uncertainty: float = Field(0.0, ge=0.0, le=1.0) warnings: List[str] = Field(default_factory=list) artifacts_available: List[str] = Field(default_factory=list) # ── Observable state components ───────────────────────────────────────────── class CreditUsage(BaseModel): """Agent-visible view of the experimental credit budget.""" credits_used: int = 0 credits_remaining: int = 50 credits_total: int = 50 class ValidationStepRecord(BaseModel): """One row of the agent's pipeline history.""" step_index: int action_type: ActionType parameters: Dict[str, Any] = Field(default_factory=dict) output_summary: str = "" output_type: OutputType success: bool = True quality_score: float = 1.0 credit_cost: int = 0 class EvidenceDossier(BaseModel): """Structured running dossier of everything the agent has discovered. Maintained on the environment side and surfaced verbatim inside each ``ValidationObservation``. It is the primary state the agent should consult when deciding what to investigate next. """ expression_findings: Dict[str, Any] = Field(default_factory=dict) protein_findings: Dict[str, Any] = Field(default_factory=dict) clinical_findings: Dict[str, Any] = Field(default_factory=dict) safety_findings: Dict[str, Any] = Field(default_factory=dict) literature_findings: Dict[str, Any] = Field(default_factory=dict) experimental_results: List[Dict[str, Any]] = Field(default_factory=list) flagged_red_flags: List[str] = Field(default_factory=list) credits_used: int = 0 class ValidationTaskSpec(BaseModel): """Specification of the drug-target-validation problem to solve.""" problem_statement: str = "Unspecified drug target validation problem" target_gene: str = "UNKNOWN" disease_context: str = "unspecified disease" indication: str = "unspecified indication" credits_limit: int = 50 success_criteria: List[str] = Field(default_factory=list) prior_observations: List[str] = Field(default_factory=list) available_actions: List[str] = Field( default_factory=lambda: [a.value for a in ActionType], ) expected_findings: List[Any] = Field(default_factory=list) dataset_metadata: Dict[str, Any] = Field(default_factory=dict) # ── Observation schema ────────────────────────────────────────────────────── class ValidationObservation(Observation): """Full observable state returned to the agent at each timestep. Deliberately excludes the hidden ``TargetProfile``, which the agent must infer through investigation. """ target_gene: str = "UNKNOWN" disease_context: str = "unspecified disease" indication: str = "unspecified indication" credits_remaining: int = 50 credits_total: int = 50 dossier: EvidenceDossier = Field(default_factory=EvidenceDossier) pipeline_history: List[Dict[str, Any]] = Field(default_factory=list) available_actions: List[str] = Field(default_factory=list) step_index: int = 0 done: bool = False reward: float = 0.0 step_reward_breakdown: Dict[str, float] = Field(default_factory=dict) rule_violations: List[str] = Field(default_factory=list) latest_output: Optional[IntermediateOutput] = None metadata: Dict[str, Any] = Field(default_factory=dict) # ── Agent prompt scaffolding ──────────────────────────────────────────────── AGENT_ACTION_GUIDANCE: Dict[ActionType, str] = { ActionType.QUERY_EXPRESSION: ( "Cheap expression lookup across normal and disease tissues. Run " "early to gauge tissue specificity and disease over-expression." ), ActionType.DIFFERENTIAL_EXPRESSION: ( "Disease-vs-normal differential expression. Useful to confirm " "disease-driven dysregulation of the target." ), ActionType.PATHWAY_ENRICHMENT: ( "Find pathways the target participates in. Best after expression / " "DE so you have an informative gene context." ), ActionType.COEXPRESSION_NETWORK: ( "Identify functionally related genes. Useful for mechanism " "hypotheses and synthetic-lethality candidates." ), ActionType.PROTEIN_STRUCTURE_LOOKUP: ( "Pull experimental or AlphaFold structures of the target." ), ActionType.BINDING_SITE_ANALYSIS: ( "Detect ligandable pockets. Pass include_allosteric=true for " "non-classical sites." ), ActionType.PROTEIN_INTERACTION_NETWORK: ( "Map first-degree PPI partners. Useful for off-target reasoning." ), ActionType.DRUGGABILITY_SCREEN: ( "High-level druggability assessment. Critical for any go/no_go." ), ActionType.CLINICAL_TRIAL_LOOKUP: ( "Look up clinical precedent for this target / indication. Often " "decisive for borderline scenarios." ), ActionType.TOXICITY_PANEL: ( "Probe target-mediated toxicity. Best after expression so on-target " "tissue toxicity can be interpreted." ), ActionType.OFF_TARGET_SCREEN: ( "Quantify off-target / paralog selectivity. Always run when " "selectivity is plausibly limiting." ), ActionType.PATIENT_STRATIFICATION: ( "Identify responder subpopulations and biomarker hypotheses." ), ActionType.LITERATURE_SEARCH: ( "Cheap PubMed / Europe-PMC scan. Cheap to run and often surfaces " "recent precedent that overrides historical priors." ), ActionType.EVIDENCE_SYNTHESIS: ( "Aggregate prior findings into a coherent picture. Best run after " "several queries have populated the dossier." ), ActionType.COMPETITOR_LANDSCAPE: ( "Survey other programs against the same target. Useful for " "differentiation strategy." ), ActionType.IN_VITRO_ASSAY: ( "Expensive cell-line assay (5 credits). Run after computational " "evidence justifies wet-lab spend." ), ActionType.IN_VIVO_MODEL: ( "Most expensive action (8 credits). Should only follow positive " "in-vitro signal." ), ActionType.CRISPR_KNOCKOUT: ( "Functional knockout / dependency check (4 credits)." ), ActionType.BIOMARKER_CORRELATION: ( "Correlate target activity with patient biomarkers (3 credits)." ), ActionType.FLAG_RED_FLAG: ( "Free annotation that records a concern in the dossier without " "spending credits." ), ActionType.REQUEST_EXPERT_REVIEW: ( "Lightweight critique by a simulated reviewer. Use sparingly." ), ActionType.SUBMIT_VALIDATION_REPORT: ( "Terminal action. Must include final_decision ('go' / 'no_go') and " "a calibrated confidence score; the episode ends immediately." ), } AGENT_ENVIRONMENT_RULES: List[str] = [ ( "You start with a fixed pool of experimental credits; every action " "deducts a known credit cost and credit-exhaustion ends the episode." ), ( "Each successful action returns concrete pharma evidence, so " "repeated queries of the same type are usually wasteful." ), ( "Some prerequisites apply: e.g. interpret toxicity in light of " "expression, and run in-vitro work before in-vivo." ), ( "Always finish the episode by submitting a calibrated " "submit_validation_report — exhausting credits without a report " "yields the worst possible reward." ), ] _TOOL_CATEGORY_AGENT_NOTES: Dict[ToolCategory, str] = { ToolCategory.EXPRESSION_DB: ( "Use early to characterise expression in normal vs disease tissue." ), ToolCategory.OMICS_ANALYSIS: ( "Use to mine bulk / single-cell expression compendia for context." ), ToolCategory.PATHWAY_DB: ( "Use after gathering a gene list for enrichment / mechanism." ), ToolCategory.PROTEIN_STRUCTURE: ( "Use when reasoning about binding pockets or structure-based design." ), ToolCategory.BINDING_SITE: ( "Use to score pocket druggability and detect allosteric sites." ), ToolCategory.INTERACTION_NETWORK: ( "Use to reason about partners, paralogs, and pathway context." ), ToolCategory.DRUGGABILITY: ( "Use to assess overall ligandability and known chemical matter." ), ToolCategory.CLINICAL_DB: ( "Use to gather clinical precedent and competitor activity." ), ToolCategory.SAFETY_DB: ( "Use after expression / off-target queries to interpret risk." ), ToolCategory.OFF_TARGET: ( "Use whenever paralogs or kinase selectivity could limit the program." ), ToolCategory.LITERATURE: ( "Cheap and often decisive — recent literature can flip historical " "priors." ), ToolCategory.PATIENT_GENOMICS: ( "Use for stratification and human genetics-based de-risking." ), ToolCategory.IN_VITRO: ( "Expensive; run only after computational evidence justifies it." ), ToolCategory.IN_VIVO: ( "Most expensive; only run after in-vitro / target-engagement data." ), ToolCategory.CRISPR: ( "Use to test functional dependency or synthetic lethality." ), ToolCategory.BIOMARKER: ( "Use to correlate target activity with patient-level biomarkers." ), } def describe_tool_for_agent(tool_name: str) -> str: """Return a compact environment-aware tool description for prompts.""" tool = TOOL_REGISTRY.get(tool_name) if tool is None: return tool_name parts = [f"{tool.name}: {tool.description}."] if tool.input_types or tool.output_types: inputs = ", ".join(tool.input_types) or "context" outputs = ", ".join(tool.output_types) or "evidence" parts.append(f"Consumes {inputs}; yields {outputs}.") category_note = _TOOL_CATEGORY_AGENT_NOTES.get(tool.category) if category_note: parts.append(category_note) if tool.relevant_actions: action_names = ", ".join(a.value for a in tool.relevant_actions[:3]) parts.append(f"Relevant for: {action_names}.") if tool.typical_credit_cost > 0: parts.append(f"Approx cost: {tool.typical_credit_cost} credits.") return " ".join(parts) def build_agent_system_prompt() -> str: """Build the shared agent system prompt for training and inference.""" lines = [ "You are a computational drug discovery scientist evaluating a " "proposed drug target.", "", "Each turn, you observe the running evidence dossier and remaining " "credits, and you must pick the next investigation step. Your goal " "is to gather sufficient evidence to submit a calibrated go / no_go " "validation report before credits run out.", "", "Environment-specific reasoning rules:", ] lines.extend(f" - {rule}" for rule in AGENT_ENVIRONMENT_RULES) lines.append("") lines.append("Action guidance:") lines.extend( f" - {action_type.value}: {AGENT_ACTION_GUIDANCE[action_type]}" for action_type in ActionType ) lines.extend([ "", "Respond with ONLY valid JSON, nothing else:", '{"action_type": "...", "parameters": {}, "reasoning": "..."}', "", "When you submit the final report, use this exact shape:", '{"action_type": "submit_validation_report", "parameters": {}, ' '"reasoning": "...", "final_decision": "go", "confidence": 0.8}', ]) return "\n".join(lines) def build_agent_observation_context( obs: ValidationObservation, *, max_tools: int = 6, ) -> str: """Summarize action / tool context for the agent's prompt.""" sections: List[str] = [] sections.append( f"Target: {obs.target_gene} | Indication: {obs.indication} | " f"Disease: {obs.disease_context}" ) sections.append( f"Credits: {obs.credits_remaining}/{obs.credits_total} remaining" ) by_category: Dict[ToolCategory, List[ToolSpec]] = {} for tool in TOOL_REGISTRY.values(): by_category.setdefault(tool.category, []).append(tool) sections.append("Representative tools available (already filtered):") shown = 0 for category, tools in by_category.items(): if shown >= max_tools: break first = tools[0] sections.append(f" - {describe_tool_for_agent(first.name)}") shown += 1 return "\n".join(sections)