"""Data models for the metric tracker RL environment.""" from __future__ import annotations from typing import Any, Literal from pydantic import BaseModel, Field from openenv.core.env_server.types import Action, Observation class MetricRecord(BaseModel): """Hourly or daily aggregate metrics for the app funnel.""" date: str = Field(..., description="ISO date in YYYY-MM-DD format.") hour: int | None = Field( default=None, description="Hour bucket in 24h format. Null for daily aggregates.", ) app_opens: int = Field(default=0, description="Count of app_open events.") menu_opens: int = Field(default=0, description="Count of menu_open events.") product_added_to_cart: int = Field( default=0, description="Count of product_added_to_cart events.", ) orders_placed: int = Field(default=0, description="Count of order_placed events.") payment_successful: int = Field( default=0, description="Count of payment_successful events.", ) class ConversionMetricDefinition(BaseModel): """Definition for a conversion metric that the agent can cite.""" name: str = Field(..., description="Stable conversion metric identifier.") numerator: str = Field(..., description="Numerator event.") denominator: str = Field(..., description="Denominator event.") description: str = Field(..., description="Human-readable formula.") class MethodSpec(BaseModel): """Description of a shared safe analysis method.""" name: str = Field(..., description="Method name.") description: str = Field(..., description="What the method does.") parameters: list[str] = Field( default_factory=list, description="Ordered parameter names for the method.", ) class MetricSubmissionRow(BaseModel): """Submitted anomaly row.""" date: str = Field(..., description="ISO date in YYYY-MM-DD format.") entity_type: str = Field( ..., description=( "Stable entity family such as conversion_rate, event_count, funnel_step, " "hourly_mix, or data_quality." ), ) entity_name: str = Field(..., description="Stable entity identifier.") anomaly_type: str = Field(..., description="Stable anomaly type identifier.") detection_method: str = Field(..., description="Shared analysis method used.") baseline_value: float = Field(..., description="Reference baseline value.") observed_value: float = Field(..., description="Observed anomalous value.") delta_value: float = Field(..., description="Observed minus baseline.") severity: Literal["low", "medium", "high", "critical"] = Field( ..., description="Severity label.", ) class PayloadGeneratorMethod(BaseModel): """A declarative payload generation method.""" method_name: str = Field( ..., description="Generator method name, for example get_median_filter_rows.", ) metric_name: str | None = Field( default=None, description="Single count metric or conversion metric name. Optional.", ) metric_names: list[str] = Field( default_factory=list, description="Optional list of metrics to run. Empty means all metrics.", ) threshold_multiplier: float = Field( ..., description="Multiplier applied to the metric std-from-median value.", ) class SyntheticAnomalyGenerator(BaseModel): """A declarative reset-time synthetic anomaly generator.""" method_name: str = Field( default="metric_stddev_shift", description="Synthetic generator method name.", ) metric_name: str | None = Field( default=None, description="Single count metric or conversion metric name. Optional.", ) metric_names: list[str] = Field( default_factory=list, description="Optional list of metrics to generate on. Empty means use metric_name.", ) date: str | None = Field( default=None, description="Single ISO date to inject on. Optional.", ) dates: list[str] = Field( default_factory=list, description="Optional list of ISO dates to inject on.", ) stddev_factor: float = Field( default=2.0, description="Multiplier applied to std_dev_from_median when creating the target value.", ) direction: Literal["up", "down", "auto"] = Field( default="auto", description="Whether to shift the metric upward or downward.", ) class SyntheticGeneratorApplication(BaseModel): """Resolved synthetic generator application used for the active episode.""" method_name: str = Field(..., description="Synthetic generator method used.") date: str = Field(..., description="ISO date the generator was applied to.") metric_name: str = Field(..., description="Metric name used by the generator.") metric_type: Literal["event_count", "conversion_rate"] = Field( ..., description="Resolved metric family.", ) direction: Literal["up", "down"] = Field(..., description="Resolved direction.") anomaly_type: str = Field(..., description="Expected anomaly type generated.") detection_method: str = Field(..., description="Shared analysis method that should detect it.") baseline_value: float = Field(..., description="Median baseline used during generation.") pre_applied_value: float = Field(..., description="Metric value before generation.") std_dev_from_median: float = Field(..., description="Std-from-median used during generation.") stddev_factor: float = Field(..., description="Configured stddev factor.") threshold_value: float = Field(..., description="stddev_factor * std_dev_from_median.") target_value: float = Field(..., description="Requested target value before rebalancing.") actual_value: float = Field(..., description="Observed value after generation.") formula: str = Field(..., description="Human-readable formula used for generation.") class SubmissionIssue(BaseModel): """Feedback about a submitted row or missing expectation.""" row_key: str = Field(..., description="Stable key in date|entity_type|entity_name form.") issue_type: str = Field(..., description="Issue class.") message: str = Field(..., description="Human-readable explanation.") submitted_row: dict[str, Any] | None = Field( default=None, description="Submitted row fragment when relevant.", ) expected_row: dict[str, Any] | None = Field( default=None, description="Expected row fragment when debug is enabled.", ) class RewardBreakdown(BaseModel): """Deterministic grading components.""" precision: float = 0.0 recall: float = 0.0 anomaly_type_accuracy: float = 0.0 detection_method_accuracy: float = 0.0 value_accuracy: float = 0.0 severity_accuracy: float = 0.0 extra_row_penalty: float = 0.0 duplicate_penalty: float = 0.0 invalid_row_penalty: float = 0.0 exploit_penalty: float = 0.0 total_score: float = 0.0 matched_rows: int = 0 expected_rows: int = 0 submitted_rows: int = 0 valid_submitted_rows: int = 0 extra_rows: int = 0 duplicate_rows: int = 0 invalid_rows: int = 0 missing_rows: int = 0 class SubmissionPreview(BaseModel): """Safe preview of a candidate submission before grading.""" valid_rows: int = 0 invalid_rows: int = 0 duplicate_rows: int = 0 unique_keys: int = 0 issues: list[SubmissionIssue] = Field(default_factory=list) normalized_rows: list[MetricSubmissionRow] = Field(default_factory=list) class BenchmarkTaskSpec(BaseModel): """Public metadata for a benchmark task.""" task_id: str = Field(..., description="Stable benchmark task identifier.") difficulty: Literal["easy", "medium", "hard"] = Field( ..., description="Canonical task difficulty.", ) instruction: str = Field(..., description="Task instruction shown to the agent.") objective: str = Field(..., description="Concrete success objective.") scenario_family: str = Field(..., description="Scenario family used to generate the task episode.") anomaly_density: str = Field(..., description="Relative anomaly density for the task episode.") anomaly_count: int = Field(..., description="Number of anomalous rows expected for the task.") grader_name: str = Field(..., description="Programmatic grader used for the task.") class MetricTrackerRlAction(Action): """Submitted anomaly payload for the current episode.""" classifications: list[MetricSubmissionRow] = Field( default_factory=list, description="Submitted anomaly rows for the dataset.", ) analysis_method: str | None = Field( default=None, description="Optional shared analysis method to call instead of grading a submission.", ) analysis_args: dict[str, Any] = Field( default_factory=dict, description="Arguments for the selected analysis method.", ) payload_generators: list[PayloadGeneratorMethod] = Field( default_factory=list, description="Declarative payload generation methods to run inside the environment.", ) class MetricTrackerRlObservation(Observation): """Observation containing the dataset and analysis surface.""" task_id: str = Field( default="", description="Stable identifier for the active benchmark task.", ) status: str = Field( default="ready", description="Episode status: ready, in_progress, evaluated, or completed.", ) message: str = Field(default="", description="Human-readable environment feedback.") instruction: str = Field( default="", description="Task presented to the model for the current episode.", ) conversion_metric_definitions: list[ConversionMetricDefinition] = Field( default_factory=list, description="Conversion formulas the model may cite.", ) available_synthetic_generator_methods: list[MethodSpec] = Field( default_factory=list, description="Reset-time synthetic generator methods available for seeded data creation.", ) applied_synthetic_generators: list[SyntheticGeneratorApplication] = Field( default_factory=list, description="Resolved synthetic generator applications used for the active episode.", ) available_methods: list[MethodSpec] = Field( default_factory=list, description="Safe shared analysis methods available to agents and humans.", ) available_tasks: list[BenchmarkTaskSpec] = Field( default_factory=list, description="Catalog of benchmark tasks available in this environment.", ) daily_metrics: list[MetricRecord] = Field( default_factory=list, description="Deprecated raw daily data field. Kept empty in standard mode.", ) hourly_metrics: list[MetricRecord] = Field( default_factory=list, description="Deprecated raw hourly data field. Kept empty in standard mode.", ) analysis_result: dict[str, Any] | None = Field( default=None, description="Result of the latest analysis-method call.", ) generated_rows: list[MetricSubmissionRow] = Field( default_factory=list, description="Rows generated from payload generator methods, if used.", ) submitted_rows: list[MetricSubmissionRow] = Field( default_factory=list, description="Most recent submitted anomaly rows.", ) submission_preview: SubmissionPreview | None = Field( default=None, description="Safe preview information for the latest submitted payload.", ) submission_issues: list[SubmissionIssue] = Field( default_factory=list, description="Feedback for the latest submitted payload.", ) reward_breakdown: RewardBreakdown | None = Field( default=None, description="Deterministic reward components for the latest step.", ) expected_row_count: int = Field( default=0, description="Number of expected anomaly rows in the current episode.", ) correct_row_count: int = Field( default=0, description="Number of matched anomaly rows in the latest step.", ) config: dict[str, Any] = Field( default_factory=dict, description="Episode configuration visible in standard mode.", ) debug: dict[str, Any] | None = Field( default=None, description="Developer-only debug payload. Hidden in standard mode.", )