from __future__ import annotations from enum import Enum from typing import Any, Literal from pydantic import BaseModel, Field, field_validator class TaskType(str, Enum): EASY = "easy_docker" MEDIUM = "medium_k8s" HARD = "hard_ml_config" class ConfigAction(BaseModel): operation: Literal["edit", "add", "delete"] = Field( description="Operation type" ) path: str = Field(description="Dot path, list indexes allowed (example: a.b.0.c)") value: Any | None = Field(default=None, description="Value used for edit/add") @field_validator("path") @classmethod def _validate_path(cls, value: str) -> str: cleaned = value.strip() if not cleaned: raise ValueError("path cannot be empty") return cleaned class ConfigObservation(BaseModel): task_id: TaskType task_description: str current_config: str syntax_valid: bool validation_errors: list[str] = Field(default_factory=list) schema_score: float = Field(ge=0.0, le=1.0) logic_score: float = Field(ge=0.0, le=1.0) overall_score: float = Field(ge=0.0, le=1.0) step_count: int = Field(ge=0) max_steps: int = Field(ge=1) class ConfigReward(BaseModel): value: float = Field(ge=0.0, le=1.0) previous_score: float = Field(ge=0.0, le=1.0) current_score: float = Field(ge=0.0, le=1.0) delta: float penalties: list[str] = Field(default_factory=list) class EnvState(BaseModel): task_id: TaskType | None = None done: bool step_count: int = Field(ge=0) max_steps: int = Field(ge=1) observation: ConfigObservation | None = None last_reward: ConfigReward | None = None class ResetRequest(BaseModel): task_id: TaskType | None = None task: TaskType | None = None class StepResponse(BaseModel): observation: ConfigObservation reward: ConfigReward done: bool info: dict[str, Any]