Spaces:
Sleeping
Sleeping
| """ | |
| Data models for CERNenv: an LHC (Large Hadron Collider) style particle | |
| physics discovery POMDP (Partially Observable Markov Decision Process). | |
| The agent is a Large Language Model (LLM) acting as a high-energy physicist. | |
| Each step it picks one structured action (configure beams, allocate | |
| luminosity, run a trigger, fit a spectrum, request systematics, submit a | |
| discovery claim, etc.) and receives a noisy detector-style observation. | |
| The latent particle and detector parameters are the hidden ground truth. | |
| """ | |
| from __future__ import annotations | |
| from enum import Enum | |
| from typing import Any, Dict, List, Optional | |
| from pydantic import BaseModel, Field | |
| from openenv.core.env_server.types import Action, Observation | |
| # ── Action vocabulary ─────────────────────────────────────────────────────── | |
| class ActionType(str, Enum): | |
| # ── Beam & data acquisition (DAQ) ───────────────────────────────── | |
| CONFIGURE_BEAM = "configure_beam" | |
| ALLOCATE_LUMINOSITY = "allocate_luminosity" | |
| SET_TRIGGER = "set_trigger" | |
| COLLECT_COLLISIONS = "collect_collisions" | |
| # ── Reconstruction & calibration ───────────────────────────────── | |
| CALIBRATE_DETECTOR = "calibrate_detector" | |
| RECONSTRUCT_TRACKS = "reconstruct_tracks" | |
| SELECT_CHANNEL = "select_channel" | |
| # ── Analysis ────────────────────────────────────────────────────── | |
| BUILD_INVARIANT_MASS = "build_invariant_mass" | |
| SUBTRACT_BACKGROUND = "subtract_background" | |
| FIT_RESONANCE = "fit_resonance" | |
| SCAN_BUMP = "scan_bump" | |
| MEASURE_ANGULAR = "measure_angular" | |
| ESTIMATE_SIGNIFICANCE = "estimate_significance" | |
| # ── Systematics & meta ─────────────────────────────────────────── | |
| REQUEST_SYSTEMATICS = "request_systematics" | |
| REQUEST_THEORY_REVIEW = "request_theory_review" | |
| # ── Final ───────────────────────────────────────────────────────── | |
| SUBMIT_DISCOVERY_CLAIM = "submit_discovery_claim" | |
| DAQ_ACTIONS = frozenset({ | |
| ActionType.CONFIGURE_BEAM, | |
| ActionType.ALLOCATE_LUMINOSITY, | |
| ActionType.SET_TRIGGER, | |
| ActionType.COLLECT_COLLISIONS, | |
| }) | |
| RECO_ACTIONS = frozenset({ | |
| ActionType.CALIBRATE_DETECTOR, | |
| ActionType.RECONSTRUCT_TRACKS, | |
| ActionType.SELECT_CHANNEL, | |
| }) | |
| ANALYSIS_ACTIONS = frozenset({ | |
| ActionType.BUILD_INVARIANT_MASS, | |
| ActionType.SUBTRACT_BACKGROUND, | |
| ActionType.FIT_RESONANCE, | |
| ActionType.SCAN_BUMP, | |
| ActionType.MEASURE_ANGULAR, | |
| ActionType.ESTIMATE_SIGNIFICANCE, | |
| }) | |
| META_ACTIONS = frozenset({ | |
| ActionType.REQUEST_SYSTEMATICS, | |
| ActionType.REQUEST_THEORY_REVIEW, | |
| ActionType.SUBMIT_DISCOVERY_CLAIM, | |
| }) | |
| # ── Detector channels & physics primitives ──────────────────────────────── | |
| class DetectorChannel(str, Enum): | |
| """Final-state decay channel the agent reconstructs in. | |
| Channels affect signal acceptance and background composition. Picking a | |
| channel where the true particle does not decay yields low signal yield | |
| no matter how much luminosity is collected — this is intentional. | |
| """ | |
| DIPHOTON = "diphoton" # γγ | |
| DILEPTON_EE = "dilepton_ee" # e+ e- | |
| DILEPTON_MUMU = "dilepton_mumu" # μ+ μ- | |
| DIJET = "dijet" # jj | |
| FOUR_LEPTON = "four_lepton" # 4ℓ | |
| BB = "bb" # b b-bar | |
| class TriggerType(str, Enum): | |
| """Hardware-level event selection.""" | |
| LOW_PT = "low_pt" # broad acceptance, lots of background | |
| HIGH_PT = "high_pt" # high-mass focus, lower QCD | |
| DIPHOTON_HLT = "diphoton_hlt" | |
| DILEPTON_HLT = "dilepton_hlt" | |
| JET_HLT = "jet_hlt" | |
| class BeamEnergy(str, Enum): | |
| """LHC-style center-of-mass energies (TeV).""" | |
| E_7 = "7TeV" | |
| E_8 = "8TeV" | |
| E_13 = "13TeV" | |
| E_14 = "14TeV" | |
| # ── Tool / instrument registry (for prompts and tool-fit reward) ────────── | |
| class ToolCategory(str, Enum): | |
| DAQ = "daq" | |
| RECONSTRUCTION = "reconstruction" | |
| CALIBRATION = "calibration" | |
| ANALYSIS = "analysis" | |
| STATISTICS = "statistics" | |
| SYSTEMATICS = "systematics" | |
| class ToolSpec(BaseModel): | |
| name: str | |
| category: ToolCategory | |
| description: str = "" | |
| typical_runtime_hours: float = 0.5 | |
| typical_cost_musd: float = 0.0 # in millions of USD (compute / beam time proxy) | |
| requires_gpu: bool = False | |
| channels: List[str] = Field(default_factory=list) | |
| TOOL_REGISTRY: Dict[str, ToolSpec] = { | |
| "ATLAS_HLT": ToolSpec( | |
| name="ATLAS_HLT", | |
| category=ToolCategory.DAQ, | |
| description="ATLAS High-Level Trigger system for online event selection", | |
| typical_runtime_hours=0.0, | |
| channels=["diphoton", "dilepton_ee", "dilepton_mumu", "four_lepton", "dijet", "bb"], | |
| ), | |
| "CMS_HLT": ToolSpec( | |
| name="CMS_HLT", | |
| category=ToolCategory.DAQ, | |
| description="CMS High-Level Trigger system", | |
| typical_runtime_hours=0.0, | |
| channels=["diphoton", "dilepton_ee", "dilepton_mumu", "four_lepton", "dijet", "bb"], | |
| ), | |
| "GEANT4": ToolSpec( | |
| name="GEANT4", | |
| category=ToolCategory.RECONSTRUCTION, | |
| description="Detector simulation toolkit for full event reconstruction", | |
| typical_runtime_hours=1.0, | |
| typical_cost_musd=0.05, | |
| requires_gpu=False, | |
| ), | |
| "Athena": ToolSpec( | |
| name="Athena", | |
| category=ToolCategory.RECONSTRUCTION, | |
| description="ATLAS reconstruction framework", | |
| typical_runtime_hours=0.8, | |
| ), | |
| "CMSSW": ToolSpec( | |
| name="CMSSW", | |
| category=ToolCategory.RECONSTRUCTION, | |
| description="CMS reconstruction software", | |
| typical_runtime_hours=0.8, | |
| ), | |
| "ECAL_calibration": ToolSpec( | |
| name="ECAL_calibration", | |
| category=ToolCategory.CALIBRATION, | |
| description="Electromagnetic calorimeter energy-scale calibration", | |
| typical_runtime_hours=0.3, | |
| ), | |
| "Tracker_alignment": ToolSpec( | |
| name="Tracker_alignment", | |
| category=ToolCategory.CALIBRATION, | |
| description="Inner tracker alignment for momentum precision", | |
| typical_runtime_hours=0.4, | |
| ), | |
| "ROOT_RooFit": ToolSpec( | |
| name="ROOT_RooFit", | |
| category=ToolCategory.ANALYSIS, | |
| description="Maximum-likelihood spectrum fitting toolkit", | |
| typical_runtime_hours=0.2, | |
| ), | |
| "MadGraph": ToolSpec( | |
| name="MadGraph", | |
| category=ToolCategory.ANALYSIS, | |
| description="Matrix-element generator for signal+background templates", | |
| typical_runtime_hours=1.5, | |
| typical_cost_musd=0.02, | |
| ), | |
| "Pythia8": ToolSpec( | |
| name="Pythia8", | |
| category=ToolCategory.ANALYSIS, | |
| description="Parton-shower and hadronisation generator", | |
| typical_runtime_hours=0.5, | |
| ), | |
| "BumpHunter": ToolSpec( | |
| name="BumpHunter", | |
| category=ToolCategory.STATISTICS, | |
| description="Sliding-window local-significance bump-hunting algorithm", | |
| typical_runtime_hours=0.1, | |
| ), | |
| "CLs_fit": ToolSpec( | |
| name="CLs_fit", | |
| category=ToolCategory.STATISTICS, | |
| description="Modified-frequentist CLs limits and significance", | |
| typical_runtime_hours=0.1, | |
| ), | |
| "Asimov_significance": ToolSpec( | |
| name="Asimov_significance", | |
| category=ToolCategory.STATISTICS, | |
| description="Asymptotic significance from Asimov dataset", | |
| typical_runtime_hours=0.05, | |
| ), | |
| "JES_systematics": ToolSpec( | |
| name="JES_systematics", | |
| category=ToolCategory.SYSTEMATICS, | |
| description="Jet energy-scale systematic study", | |
| typical_runtime_hours=0.4, | |
| ), | |
| "Luminosity_calibration": ToolSpec( | |
| name="Luminosity_calibration", | |
| category=ToolCategory.SYSTEMATICS, | |
| description="Van der Meer scan luminosity calibration", | |
| typical_runtime_hours=0.3, | |
| ), | |
| } | |
| # ── Action schema ────────────────────────────────────────────────────────── | |
| class ExperimentAction(Action): | |
| """One structured experimental step at the LHC.""" | |
| action_type: ActionType = Field( | |
| ..., | |
| description=( | |
| "Discrete LHC pipeline step. The environment enforces physics " | |
| "prerequisites: you cannot fit a spectrum before collecting data, " | |
| "or claim a discovery before estimating significance." | |
| ), | |
| ) | |
| method: Optional[str] = Field( | |
| None, | |
| description=( | |
| "Optional named instrument or framework (e.g. 'ROOT_RooFit', " | |
| "'BumpHunter', 'Pythia8'). Affects cost, runtime, and tool-fit reward." | |
| ), | |
| ) | |
| parameters: Dict[str, Any] = Field( | |
| default_factory=dict, | |
| description=( | |
| "Action-specific settings such as beam energy, integrated luminosity " | |
| "(fb^-1), trigger selection, decay channel, mass window, fit model." | |
| ), | |
| ) | |
| justification: Optional[str] = Field( | |
| None, | |
| description="Short scientific rationale for picking this step now.", | |
| ) | |
| confidence: float = Field( | |
| 0.5, ge=0.0, le=1.0, | |
| description="Agent confidence in the chosen step.", | |
| ) | |
| # ── Outputs ──────────────────────────────────────────────────────────────── | |
| class OutputType(str, Enum): | |
| BEAM_CONFIG = "beam_config" | |
| LUMINOSITY_LOG = "luminosity_log" | |
| TRIGGER_REPORT = "trigger_report" | |
| COLLISION_BATCH = "collision_batch" | |
| CALIBRATION_REPORT = "calibration_report" | |
| RECONSTRUCTION = "reconstruction" | |
| CHANNEL_SELECTION = "channel_selection" | |
| INVARIANT_MASS_HIST = "invariant_mass_hist" | |
| BACKGROUND_SUBTRACTION = "background_subtraction" | |
| FIT_RESULT = "fit_result" | |
| BUMP_SCAN = "bump_scan" | |
| ANGULAR_RESULT = "angular_result" | |
| SIGNIFICANCE = "significance" | |
| SYSTEMATICS_REPORT = "systematics_report" | |
| THEORY_REVIEW = "theory_review" | |
| DISCOVERY_CLAIM = "discovery_claim" | |
| FAILURE_REPORT = "failure_report" | |
| class IntermediateOutput(BaseModel): | |
| """A single noisy detector or analysis artifact.""" | |
| output_type: OutputType | |
| step_index: int | |
| success: bool = True | |
| quality_score: float = Field(1.0, ge=0.0, le=1.0) | |
| summary: str = "" | |
| data: Dict[str, Any] = Field(default_factory=dict) | |
| uncertainty: float = Field(0.0, ge=0.0, le=1.0) | |
| warnings: List[str] = Field(default_factory=list) | |
| artifacts_available: List[str] = Field(default_factory=list) | |
| # ── Observable state components ─────────────────────────────────────────── | |
| class ResourceUsage(BaseModel): | |
| """Agent-visible resource counters.""" | |
| budget_used_musd: float = 0.0 | |
| budget_remaining_musd: float = 100.0 | |
| luminosity_used_fb: float = 0.0 | |
| luminosity_remaining_fb: float = 300.0 | |
| time_used_days: float = 0.0 | |
| time_remaining_days: float = 365.0 | |
| compute_hours_used: float = 0.0 | |
| class PipelineStepRecord(BaseModel): | |
| step_index: int | |
| action_type: ActionType | |
| method: Optional[str] = None | |
| parameters: Dict[str, Any] = Field(default_factory=dict) | |
| output_summary: str = "" | |
| output_type: OutputType | |
| success: bool = True | |
| quality_score: float = 1.0 | |
| cost_musd: float = 0.0 | |
| luminosity_cost_fb: float = 0.0 | |
| time_cost_days: float = 0.0 | |
| class PaperReference(BaseModel): | |
| title: str | |
| citation: Optional[str] = None | |
| doi: Optional[str] = None | |
| arxiv_id: Optional[str] = None | |
| url: Optional[str] = None | |
| class ExpectedFinding(BaseModel): | |
| finding: str | |
| category: str = "claim" | |
| keywords: List[str] = Field(default_factory=list) | |
| class TaskSpec(BaseModel): | |
| """The physics question the agent is given for this episode.""" | |
| problem_statement: str = "Discover and characterise an unknown resonance." | |
| target_collider: str = "LHC" | |
| beam_energy_options: List[str] = Field( | |
| default_factory=lambda: [e.value for e in BeamEnergy], | |
| ) | |
| available_channels: List[str] = Field( | |
| default_factory=lambda: [c.value for c in DetectorChannel], | |
| ) | |
| available_triggers: List[str] = Field( | |
| default_factory=lambda: [t.value for t in TriggerType], | |
| ) | |
| available_tools: List[str] = Field( | |
| default_factory=lambda: list(TOOL_REGISTRY.keys()), | |
| ) | |
| mass_search_window_gev: List[float] = Field(default_factory=lambda: [50.0, 1000.0]) | |
| budget_limit_musd: float = 100.0 | |
| luminosity_budget_fb: float = 300.0 | |
| time_limit_days: float = 365.0 | |
| prior_observations: List[str] = Field(default_factory=list) | |
| success_criteria: List[str] = Field(default_factory=list) | |
| paper_references: List[PaperReference] = Field(default_factory=list) | |
| expected_findings: List[ExpectedFinding] = Field(default_factory=list) | |
| difficulty: str = "medium" | |
| class DiscoveryClaim(BaseModel): | |
| """Structured final claim graded against hidden truth.""" | |
| claim: str = "" | |
| mass_estimate_gev: Optional[float] = None | |
| mass_uncertainty_gev: Optional[float] = None | |
| width_estimate_gev: Optional[float] = None | |
| significance_sigma: Optional[float] = None | |
| decay_channel: Optional[str] = None | |
| spin_hypothesis: Optional[int] = None # 0, 1, 2 | |
| parity: Optional[str] = None # "+", "-" | |
| cross_section_fb: Optional[float] = None | |
| confidence: float = Field(0.5, ge=0.0, le=1.0) | |
| evidence_steps: List[int] = Field(default_factory=list) | |
| class CollisionObservation(Observation): | |
| """Full observable state returned to the agent each step. | |
| Excludes the hidden particle truth and hidden detector systematics. | |
| """ | |
| task: TaskSpec = Field(default_factory=TaskSpec) | |
| step_index: int = 0 | |
| pipeline_history: List[PipelineStepRecord] = Field(default_factory=list) | |
| available_channels: List[str] = Field(default_factory=list) | |
| available_triggers: List[str] = Field(default_factory=list) | |
| available_tools: List[str] = Field(default_factory=list) | |
| resource_usage: ResourceUsage = Field(default_factory=ResourceUsage) | |
| latest_output: Optional[IntermediateOutput] = None | |
| all_outputs: List[IntermediateOutput] = Field(default_factory=list) | |
| candidate_masses_gev: List[float] = Field(default_factory=list) | |
| candidate_significances: List[float] = Field(default_factory=list) | |
| selected_channel: Optional[str] = None | |
| selected_beam_energy: Optional[str] = None | |
| cumulative_significance: float = 0.0 | |
| uncertainty_summary: Dict[str, float] = Field(default_factory=dict) | |
| rule_violations: List[str] = Field(default_factory=list) | |
| step_reward_breakdown: Dict[str, float] = Field(default_factory=dict) | |
| # ── Agent-facing prompt helpers ─────────────────────────────────────────── | |
| AGENT_ACTION_GUIDANCE: Dict[ActionType, str] = { | |
| ActionType.CONFIGURE_BEAM: ( | |
| "Pick the LHC center-of-mass energy. Higher energy reaches heavier " | |
| "resonances but costs more per fb^-1. Required before collecting data." | |
| ), | |
| ActionType.ALLOCATE_LUMINOSITY: ( | |
| "Schedule a chunk of integrated luminosity (fb^-1). More luminosity " | |
| "means more events but uses budget and time. Required before collecting." | |
| ), | |
| ActionType.SET_TRIGGER: ( | |
| "Choose a hardware/HLT trigger. Match the trigger to the channel of " | |
| "interest; mismatched triggers throw away signal." | |
| ), | |
| ActionType.COLLECT_COLLISIONS: ( | |
| "Run the experiment. Returns a noisy raw event count plus background " | |
| "estimate, conditioned on beam, luminosity, trigger, and channel." | |
| ), | |
| ActionType.CALIBRATE_DETECTOR: ( | |
| "Apply ECAL/tracker calibration. Reduces systematic uncertainty; " | |
| "neglecting it inflates fit uncertainty later." | |
| ), | |
| ActionType.RECONSTRUCT_TRACKS: ( | |
| "Reconstruct charged-particle tracks and physics objects. Required " | |
| "before any analysis-level step." | |
| ), | |
| ActionType.SELECT_CHANNEL: ( | |
| "Pick the decay channel to study (γγ, ℓℓ, jj, 4ℓ, bb). Wrong channel " | |
| "= small signal acceptance regardless of luminosity." | |
| ), | |
| ActionType.BUILD_INVARIANT_MASS: ( | |
| "Construct the invariant-mass histogram in the chosen channel and " | |
| "mass window." | |
| ), | |
| ActionType.SUBTRACT_BACKGROUND: ( | |
| "Fit a smooth background model and subtract it to expose any peak." | |
| ), | |
| ActionType.FIT_RESONANCE: ( | |
| "Fit a Breit-Wigner / Crystal Ball line shape. Returns mass, width, " | |
| "and statistical uncertainty." | |
| ), | |
| ActionType.SCAN_BUMP: ( | |
| "Run a sliding-window bump hunt over the mass window. Reports the " | |
| "most-significant candidate region." | |
| ), | |
| ActionType.MEASURE_ANGULAR: ( | |
| "Measure decay angular distribution to constrain spin/parity. " | |
| "Useful only after a peak is identified." | |
| ), | |
| ActionType.ESTIMATE_SIGNIFICANCE: ( | |
| "Compute the statistical significance of a candidate signal in σ. " | |
| "Required before claiming a discovery." | |
| ), | |
| ActionType.REQUEST_SYSTEMATICS: ( | |
| "Run a systematics study (JES, luminosity, calibration). Improves " | |
| "uncertainty estimates and reduces overconfidence penalty." | |
| ), | |
| ActionType.REQUEST_THEORY_REVIEW: ( | |
| "Ask a theorist sub-agent to review the evidence; small extra signal " | |
| "but not a substitute for missing data." | |
| ), | |
| ActionType.SUBMIT_DISCOVERY_CLAIM: ( | |
| "Submit a structured discovery claim. Graded on mass calibration, " | |
| "significance, channel, spin hypothesis, and overconfidence." | |
| ), | |
| } | |
| AGENT_ENVIRONMENT_RULES: List[str] = [ | |
| "Each successful action returns summarized evidence; do not repeat steps.", | |
| "Hard prerequisites are enforced: data collection requires beam+luminosity+trigger; " | |
| "analysis requires reconstruction and a chosen channel.", | |
| "A discovery claim requires a fitted resonance and an estimated significance.", | |
| "Tools listed in available_tools are pre-filtered for this episode; prefer them.", | |
| "Submitting an overconfident wrong claim is heavily penalised.", | |
| ] | |
| def build_agent_system_prompt() -> str: | |
| lines = [ | |
| "You are an expert high-energy physicist running an analysis at the LHC.", | |
| "", | |
| "At each turn you observe the experiment state and pick one structured next step", | |
| "to maximise the probability of correctly characterising a hidden resonance.", | |
| "", | |
| "Environment rules:", | |
| ] | |
| lines.extend(f" - {rule}" for rule in AGENT_ENVIRONMENT_RULES) | |
| lines.append("") | |
| lines.append("Action guidance:") | |
| lines.extend( | |
| f" - {a.value}: {AGENT_ACTION_GUIDANCE[a]}" for a in ActionType | |
| ) | |
| lines.extend([ | |
| "", | |
| "Respond with ONLY a single valid JSON object, no extra prose:", | |
| '{"action_type": "...", "method": null, "parameters": {}, "justification": "...", "confidence": 0.8}', | |
| "", | |
| "For submit_discovery_claim, structure parameters['claim'] as:", | |
| '{"mass_estimate_gev": 125.0, "mass_uncertainty_gev": 0.5, "width_estimate_gev": 0.004,' | |
| ' "significance_sigma": 5.2, "decay_channel": "diphoton", "spin_hypothesis": 0,' | |
| ' "parity": "+", "cross_section_fb": 50.0, "confidence": 0.9}', | |
| ]) | |
| return "\n".join(lines) | |
| def build_agent_observation_context( | |
| obs: CollisionObservation, | |
| *, | |
| max_tools: int = 6, | |
| max_channels: int = 4, | |
| ) -> str: | |
| parts: List[str] = [] | |
| parts.append( | |
| f"Mass search window: [{obs.task.mass_search_window_gev[0]:.0f}, " | |
| f"{obs.task.mass_search_window_gev[1]:.0f}] GeV; " | |
| f"difficulty={obs.task.difficulty}." | |
| ) | |
| chans = list(dict.fromkeys(obs.available_channels or obs.task.available_channels)) | |
| if chans: | |
| parts.append("Available channels: " + ", ".join(chans[:max_channels])) | |
| tools = list(dict.fromkeys(obs.available_tools or obs.task.available_tools)) | |
| if tools: | |
| parts.append("Available tools: " + ", ".join(tools[:max_tools])) | |
| if obs.selected_channel: | |
| parts.append(f"Selected channel: {obs.selected_channel}") | |
| if obs.selected_beam_energy: | |
| parts.append(f"Beam energy: {obs.selected_beam_energy}") | |
| if obs.candidate_masses_gev: | |
| masses = [f"{m:.1f}" for m in obs.candidate_masses_gev[:3]] | |
| sigmas = [f"{s:.1f}" for s in obs.candidate_significances[:3]] | |
| parts.append( | |
| "Candidate peaks (GeV / σ): " | |
| + ", ".join(f"{m}/{s}" for m, s in zip(masses, sigmas)) | |
| ) | |
| return "\n".join(parts) | |
| __all__ = [ | |
| "ActionType", | |
| "DAQ_ACTIONS", | |
| "RECO_ACTIONS", | |
| "ANALYSIS_ACTIONS", | |
| "META_ACTIONS", | |
| "DetectorChannel", | |
| "TriggerType", | |
| "BeamEnergy", | |
| "ToolCategory", | |
| "ToolSpec", | |
| "TOOL_REGISTRY", | |
| "ExperimentAction", | |
| "OutputType", | |
| "IntermediateOutput", | |
| "ResourceUsage", | |
| "PipelineStepRecord", | |
| "PaperReference", | |
| "ExpectedFinding", | |
| "TaskSpec", | |
| "DiscoveryClaim", | |
| "CollisionObservation", | |
| "AGENT_ACTION_GUIDANCE", | |
| "AGENT_ENVIRONMENT_RULES", | |
| "build_agent_system_prompt", | |
| "build_agent_observation_context", | |
| ] | |