Spaces:
Build error
Build error
File size: 5,214 Bytes
c29f1fd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """Typed models for the python_code_review_env environment."""
from __future__ import annotations
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field
from openenv.core.env_server.types import Action, Observation, State
Difficulty = Literal["easy", "medium", "hard"]
TaskKind = Literal["syntax_fix", "bug_fix", "optimization"]
ActionType = Literal["analyze_code", "edit_code", "run_tests", "submit_solution"]
class HistoryEntry(BaseModel):
"""One environment transition recorded for the agent."""
step: int = Field(..., ge=0)
action_type: ActionType
status: str = Field(..., description="Short outcome summary.")
reward: float = Field(..., gt=0.0, lt=1.0, description="Reward returned for the step.")
class RewardDetails(BaseModel):
"""Transparent reward decomposition for debugging and training."""
value: float = Field(..., gt=0.0, lt=1.0, description="Clamped net reward in (0.0, 1.0).")
syntax_reward: float = Field(default=0.0)
test_reward: float = Field(default=0.0)
correctness_bonus: float = Field(default=0.0)
quality_bonus: float = Field(default=0.0)
progress_delta: float = Field(default=0.0)
invalid_action_penalty: float = Field(default=0.0)
timeout_penalty: float = Field(default=0.0)
regression_penalty: float = Field(default=0.0)
stagnation_penalty: float = Field(default=0.0)
reason: str = Field(..., description="Human-readable reward explanation.")
prev_score: float = Field(default=0.01, gt=0.0, lt=1.0)
curr_score: float = Field(default=0.01, gt=0.0, lt=1.0)
code_changed: bool = Field(default=False)
class PythonCodeReviewAction(Action):
"""Action schema exposed to the agent."""
action_type: ActionType = Field(..., description="Environment action to take.")
code: Optional[str] = Field(
default=None,
description="Updated Python source for edit_code or submit_solution actions.",
)
class PythonCodeReviewObservation(Observation):
"""Observation returned by reset and step."""
task_id: str = Field(..., description="Stable task identifier.")
title: str = Field(..., description="Human-readable task title.")
difficulty: Difficulty
task_kind: TaskKind
task_description: str = Field(..., description="Task instructions shown to the agent.")
current_code: str = Field(..., description="Latest code under review.")
errors: str = Field(default="", description="Syntax or execution errors.")
test_results: str = Field(default="", description="Public test and benchmark feedback.")
visible_tests: List[str] = Field(default_factory=list)
history: List[HistoryEntry] = Field(default_factory=list)
attempts_remaining: int = Field(..., ge=0)
last_action_status: str = Field(default="")
score: float = Field(..., gt=0.0, lt=1.0)
reward_details: RewardDetails = Field(
default_factory=lambda: RewardDetails(value=0.1, reason="Environment reset.")
)
class PythonCodeReviewState(State):
"""Internal environment state exposed through /state."""
task_id: Optional[str] = Field(default=None)
difficulty: Optional[Difficulty] = Field(default=None)
task_kind: Optional[TaskKind] = Field(default=None)
attempts_remaining: int = Field(default=0, ge=0)
current_code: str = Field(default="")
errors: str = Field(default="")
test_results: str = Field(default="")
history: List[HistoryEntry] = Field(default_factory=list)
score: float = Field(default=0.01, gt=0.0, lt=1.0)
done: bool = Field(default=False)
class TaskDescriptor(BaseModel):
"""Static task metadata."""
task_id: str
title: str
difficulty: Difficulty
task_kind: TaskKind
task_description: str
starter_code: str
visible_tests: List[str] = Field(default_factory=list)
repo_summary: str = Field(default="")
changed_files: List[str] = Field(default_factory=list)
available_files: List[str] = Field(default_factory=list)
goal: str = Field(default="")
max_steps: int = Field(..., ge=1)
class TaskSummary(BaseModel):
"""Compact task listing entry."""
task_id: str
difficulty: Difficulty
title: str
goal: str = Field(default="")
class TaskGrade(BaseModel):
"""Deterministic grader output."""
score: float = Field(..., gt=0.0, lt=1.0)
syntax_score: float = Field(default=0.01, gt=0.0, lt=1.0)
tests_passed: int = Field(default=0, ge=0)
tests_total: int = Field(default=0, ge=0)
quality_score: float = Field(default=0.01, gt=0.0, lt=1.0)
runtime_score: float = Field(default=0.01, gt=0.0, lt=1.0)
timed_out: bool = Field(default=False)
details: Dict[str, Any] = Field(default_factory=dict)
class HealthResponse(BaseModel):
"""Health payload for smoke tests."""
status: Literal["ok"] = "ok"
environment: str = "python_code_review_env"
task_count: int = Field(default=0, ge=0)
PythonAction = PythonCodeReviewAction
PythonObservation = PythonCodeReviewObservation
PythonState = PythonCodeReviewState
|