"""Pydantic models for the OpenSleuth API and core state.

Backwards-compat note: any field added to ``Observation`` /
``StepResponse`` /``State`` after v0.2 carries a default value so the in-flight
trainer (which only inspects a small subset of fields) keeps working.
"""

from __future__ import annotations

from typing import Any, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel, ConfigDict, Field


class ProbeAction(BaseModel):
    action_type: Literal["probe"] = "probe"
    # The agent submits inputs as a Python literal string (e.g. "5", "'abc'",
    # "[1, 2, 3]"). We parse it server-side with ast.literal_eval. Keeping it
    # as a string avoids a class of FastAPI auto-coercion bugs and matches
    # what an LLM naturally emits.
    input_repr: str = Field(..., description="Python literal repr of the probe input")


class SubmitAction(BaseModel):
    action_type: Literal["submit"] = "submit"
    code: str = Field(..., description="Python source defining the target function")


Action = Union[ProbeAction, SubmitAction]


class ProbeRecord(BaseModel):
    """One entry in the probe history. Output is either the function's return
    value (Pythonic repr) or, if it raised, an error string."""

    input_repr: str
    output_repr: str
    is_error: bool = False
    error_type: Optional[str] = None
    # Coverage bucket label assigned by the env when the probe was recorded.
    # ``None`` for parse-error probes (we never executed the target).
    bucket: Optional[str] = None


class Observation(BaseModel):
    episode_id: str
    target_function_name: str
    target_function_signature: str = Field(
        "", description="Human readable signature + docstring shown to the agent"
    )
    probe_history: List[ProbeRecord] = Field(default_factory=list)
    last_error: str = ""
    steps_taken: int = 0
    max_steps: int = 25
    # --- New, optional metadata fields (default-safe; trainer ignores them) ---
    difficulty: Optional[str] = Field(
        None, description="Curriculum difficulty: easy / medium / hard."
    )
    coverage_buckets_seen: int = Field(
        0, description="How many distinct input-domain buckets the agent has probed so far."
    )
    seen_outputs_count: int = Field(
        0, description="How many distinct outputs the target function has produced so far."
    )
    seen_error_types_count: int = Field(
        0, description="How many distinct error types the target function has raised so far."
    )


class StepResponse(BaseModel):
    observation: Observation
    reward: float
    done: bool
    info: dict = Field(default_factory=dict)


class State(BaseModel):
    """Internal mutable state for one episode. Not exposed in /step responses
    in full, but available via /state/{eid} for debugging."""

    model_config = ConfigDict(arbitrary_types_allowed=True)

    episode_id: str
    target_function_name: str
    probe_history: List[ProbeRecord] = Field(default_factory=list)
    seen_outputs: set = Field(default_factory=set)
    seen_error_types: set = Field(default_factory=set)
    seen_buckets: set = Field(default_factory=set)
    steps_taken: int = 0
    done: bool = False
    seed: int = 0


class ResetRequest(BaseModel):
    """Reset payload.

    The original (v0.3) shape ``{"target_name": "fibonacci", "seed": 0,
    "max_steps": 25}`` still works exactly as before -- the four new fields
    below are all optional and additive so the in-flight trainer doesn't
    have to change.

    Open-ended (Level 2) targets are specified by passing ``target_code``
    + ``target_function_name`` (and optionally ``edge_cases`` and
    ``fuzz_spec``), which is then resolved via the TaskCatalog using the
    same hardened sandbox the verifier uses for agent submissions.
    """

    target_name: Optional[str] = None
    seed: int = 0
    max_steps: int = 25
    # --- Level 2 open-ended fields (additive, default-None) ---
    target_code: Optional[str] = Field(
        default=None,
        description="Python source defining a black-box callable. When set, "
        "overrides target_name (caller-supplied beats Hub beats builtin).",
    )
    target_function_name: Optional[str] = Field(
        default=None,
        description="Name of the callable inside target_code to use as the "
        "oracle. Required when target_code is set.",
    )
    edge_cases: Optional[List[str]] = Field(
        default=None,
        description="Optional list of must-pass probe inputs as Python "
        "literal strings (e.g. ['0', '\"\"', '([1,2,3], 2)']).",
    )
    fuzz_spec: Optional[dict] = Field(
        default=None,
        description="Optional auto-fuzzer override map keyed by parameter "
        "name, e.g. {'n': {'type': 'int', 'min': 1, 'max': 90}}.",
    )


class StepRequest(BaseModel):
    episode_id: str
    action: Action