File size: 5,214 Bytes
c29f1fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""Typed models for the python_code_review_env environment."""

from __future__ import annotations

from typing import Any, Dict, List, Literal, Optional

from pydantic import BaseModel, Field

from openenv.core.env_server.types import Action, Observation, State


Difficulty = Literal["easy", "medium", "hard"]
TaskKind = Literal["syntax_fix", "bug_fix", "optimization"]
ActionType = Literal["analyze_code", "edit_code", "run_tests", "submit_solution"]


class HistoryEntry(BaseModel):
    """One environment transition recorded for the agent."""

    step: int = Field(..., ge=0)
    action_type: ActionType
    status: str = Field(..., description="Short outcome summary.")
    reward: float = Field(..., gt=0.0, lt=1.0, description="Reward returned for the step.")


class RewardDetails(BaseModel):
    """Transparent reward decomposition for debugging and training."""

    value: float = Field(..., gt=0.0, lt=1.0, description="Clamped net reward in (0.0, 1.0).")
    syntax_reward: float = Field(default=0.0)
    test_reward: float = Field(default=0.0)
    correctness_bonus: float = Field(default=0.0)
    quality_bonus: float = Field(default=0.0)
    progress_delta: float = Field(default=0.0)
    invalid_action_penalty: float = Field(default=0.0)
    timeout_penalty: float = Field(default=0.0)
    regression_penalty: float = Field(default=0.0)
    stagnation_penalty: float = Field(default=0.0)
    reason: str = Field(..., description="Human-readable reward explanation.")
    prev_score: float = Field(default=0.01, gt=0.0, lt=1.0)
    curr_score: float = Field(default=0.01, gt=0.0, lt=1.0)
    code_changed: bool = Field(default=False)


class PythonCodeReviewAction(Action):
    """Action schema exposed to the agent."""

    action_type: ActionType = Field(..., description="Environment action to take.")
    code: Optional[str] = Field(
        default=None,
        description="Updated Python source for edit_code or submit_solution actions.",
    )


class PythonCodeReviewObservation(Observation):
    """Observation returned by reset and step."""

    task_id: str = Field(..., description="Stable task identifier.")
    title: str = Field(..., description="Human-readable task title.")
    difficulty: Difficulty
    task_kind: TaskKind
    task_description: str = Field(..., description="Task instructions shown to the agent.")
    current_code: str = Field(..., description="Latest code under review.")
    errors: str = Field(default="", description="Syntax or execution errors.")
    test_results: str = Field(default="", description="Public test and benchmark feedback.")
    visible_tests: List[str] = Field(default_factory=list)
    history: List[HistoryEntry] = Field(default_factory=list)
    attempts_remaining: int = Field(..., ge=0)
    last_action_status: str = Field(default="")
    score: float = Field(..., gt=0.0, lt=1.0)
    reward_details: RewardDetails = Field(
        default_factory=lambda: RewardDetails(value=0.1, reason="Environment reset.")
    )


class PythonCodeReviewState(State):
    """Internal environment state exposed through /state."""

    task_id: Optional[str] = Field(default=None)
    difficulty: Optional[Difficulty] = Field(default=None)
    task_kind: Optional[TaskKind] = Field(default=None)
    attempts_remaining: int = Field(default=0, ge=0)
    current_code: str = Field(default="")
    errors: str = Field(default="")
    test_results: str = Field(default="")
    history: List[HistoryEntry] = Field(default_factory=list)
    score: float = Field(default=0.01, gt=0.0, lt=1.0)
    done: bool = Field(default=False)


class TaskDescriptor(BaseModel):
    """Static task metadata."""

    task_id: str
    title: str
    difficulty: Difficulty
    task_kind: TaskKind
    task_description: str
    starter_code: str
    visible_tests: List[str] = Field(default_factory=list)
    repo_summary: str = Field(default="")
    changed_files: List[str] = Field(default_factory=list)
    available_files: List[str] = Field(default_factory=list)
    goal: str = Field(default="")
    max_steps: int = Field(..., ge=1)


class TaskSummary(BaseModel):
    """Compact task listing entry."""

    task_id: str
    difficulty: Difficulty
    title: str
    goal: str = Field(default="")


class TaskGrade(BaseModel):
    """Deterministic grader output."""

    score: float = Field(..., gt=0.0, lt=1.0)
    syntax_score: float = Field(default=0.01, gt=0.0, lt=1.0)
    tests_passed: int = Field(default=0, ge=0)
    tests_total: int = Field(default=0, ge=0)
    quality_score: float = Field(default=0.01, gt=0.0, lt=1.0)
    runtime_score: float = Field(default=0.01, gt=0.0, lt=1.0)
    timed_out: bool = Field(default=False)
    details: Dict[str, Any] = Field(default_factory=dict)


class HealthResponse(BaseModel):
    """Health payload for smoke tests."""

    status: Literal["ok"] = "ok"
    environment: str = "python_code_review_env"
    task_count: int = Field(default=0, ge=0)


PythonAction = PythonCodeReviewAction
PythonObservation = PythonCodeReviewObservation
PythonState = PythonCodeReviewState