SimranShaikh commited on
Commit
fd78303
Β·
verified Β·
1 Parent(s): 9553eba
Files changed (1) hide show
  1. environment/env.py +47 -121
environment/env.py CHANGED
@@ -1,135 +1,61 @@
1
- """
2
- CodeReviewEnv β€” main environment logic.
3
- Manages state, episode flow, reward accumulation, and per-step grading.
4
- """
5
- from typing import Optional, List, Dict, Any
6
-
7
- from environment.models import (
8
- CodeReviewAction,
9
- CodeReviewObservation,
10
- StepResult,
11
- ResetResult,
12
- StateResult,
13
- )
14
- from environment.tasks import get_task, list_tasks as _list_tasks
15
  from environment.graders import grade
16
 
17
 
18
- class CodeReviewEnv:
19
- """
20
- OpenEnv-compliant code-review environment.
21
 
22
- Episode flow
23
- ────────────
24
- reset(task_id) β†’ observation
25
- step(action) β†’ (observation, reward, done, info) [repeated ≀ max_steps]
26
- state() β†’ current state snapshot
27
- """
28
 
29
- def __init__(self) -> None:
30
- self._task: Optional[dict] = None
31
- self._step_number: int = 0
32
- self._total_reward: float = 0.0
33
- self._actions_history: List[Dict[str, Any]] = []
34
- self._done: bool = False
35
- self._initialized: bool = False
36
  self._last_feedback: Optional[str] = None
37
 
38
- # ── public properties ────────────────────────────────────────────────────
39
-
40
- @property
41
- def is_initialized(self) -> bool:
42
- return self._initialized
43
-
44
- # ── core API ─────────────────────────────────────────────────────────────
45
-
46
- def reset(self, task_id: Optional[str] = None) -> ResetResult:
47
- """Start a new episode. Defaults to the easy task."""
48
- if task_id is None:
49
- task_id = "easy_syntax"
50
-
51
- self._task = get_task(task_id)
52
- self._step_number = 0
53
- self._total_reward = 0.0
54
- self._actions_history = []
55
- self._done = False
56
- self._initialized = True
57
  self._last_feedback = None
58
-
59
- obs = self._make_observation()
60
- return ResetResult(observation=obs)
61
-
62
- def step(self, action: CodeReviewAction) -> StepResult:
63
- """Process one agent action and return (observation, reward, done, info)."""
64
- if not self._initialized or self._done:
65
- raise RuntimeError("Call reset() before stepping, or episode is over.")
66
-
67
- self._step_number += 1
68
- task_id = self._task["task_id"]
69
- ground_truth = self._task["ground_truth"]
70
-
71
- # Grade the action
72
- reward, feedback = grade(task_id, action, ground_truth)
73
- self._last_feedback = feedback
74
- self._total_reward += reward
75
-
76
- # Record history
77
- self._actions_history.append(
78
- {
79
- "step": self._step_number,
80
- "num_issues_reported": len(action.identified_issues),
81
- "has_fix": action.suggested_fix is not None,
82
- "reward": reward,
83
- }
84
  )
85
 
86
- # Episode ends when: agent says done, reward is perfect, or max steps reached
 
 
 
 
 
87
  max_steps = self._task["max_steps"]
88
- done = action.done or reward >= 0.95 or self._step_number >= max_steps
89
- self._done = done
90
-
91
- obs = self._make_observation()
92
-
93
- return StepResult(
94
- observation=obs,
95
- reward=reward,
 
 
 
96
  done=done,
97
- info={
98
- "feedback": feedback,
99
- "step": self._step_number,
100
- "total_reward": round(self._total_reward, 4),
101
- "cumulative_score": round(
102
- self._total_reward / max(self._step_number, 1), 4
103
- ),
104
- },
105
- )
106
-
107
- def get_state(self) -> StateResult:
108
- """Return a snapshot of the current episode state."""
109
- return StateResult(
110
- task_id=self._task["task_id"] if self._task else "",
111
- step_number=self._step_number,
112
- total_reward=round(self._total_reward, 4),
113
- actions_history=self._actions_history,
114
- done=self._done,
115
- initialized=self._initialized,
116
  )
117
 
118
- def list_tasks(self) -> list:
119
- return _list_tasks()
120
-
121
- # ── internal helpers ─────────────────────────────────────────────────────
122
-
123
- def _make_observation(self) -> CodeReviewObservation:
124
- t = self._task
125
- return CodeReviewObservation(
126
- task_id=t["task_id"],
127
- task_name=t["task_name"],
128
- difficulty=t["difficulty"],
129
- language=t["language"],
130
- code_snippet=t["code_snippet"],
131
- context=t["context"],
132
- step_number=self._step_number,
133
- max_steps=t["max_steps"],
134
- previous_feedback=self._last_feedback,
135
- )
 
1
+ from uuid import uuid4
2
+ from typing import Optional
3
+ from openenv.core.env_server.interfaces import Environment
4
+ from openenv.core.env_server.types import State
5
+ from environment.models import CodeReviewAction, CodeReviewObservation
6
+ from environment.tasks import get_task
 
 
 
 
 
 
 
 
7
  from environment.graders import grade
8
 
9
 
10
+ class CodeReviewEnv(Environment):
 
 
11
 
12
+ SUPPORTS_CONCURRENT_SESSIONS: bool = False
 
 
 
 
 
13
 
14
+ def __init__(self):
15
+ self._state = State(episode_id=str(uuid4()), step_count=0)
16
+ self._task = get_task("easy_syntax")
 
 
 
 
17
  self._last_feedback: Optional[str] = None
18
 
19
+ def reset(self) -> CodeReviewObservation:
20
+ self._task = get_task("easy_syntax")
21
+ self._state = State(episode_id=str(uuid4()), step_count=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  self._last_feedback = None
23
+ return CodeReviewObservation(
24
+ task_id=self._task["task_id"],
25
+ task_name=self._task["task_name"],
26
+ difficulty=self._task["difficulty"],
27
+ language=self._task["language"],
28
+ code_snippet=self._task["code_snippet"],
29
+ context=self._task["context"],
30
+ step_number=0,
31
+ max_steps=self._task["max_steps"],
32
+ previous_feedback=None,
33
+ done=False,
34
+ reward=0.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  )
36
 
37
+ def step(self, action: CodeReviewAction) -> CodeReviewObservation:
38
+ self._state.step_count += 1
39
+ reward, feedback = grade(
40
+ self._task["task_id"], action, self._task["ground_truth"]
41
+ )
42
+ self._last_feedback = feedback
43
  max_steps = self._task["max_steps"]
44
+ done = action.submit or reward >= 0.95 or self._state.step_count >= max_steps
45
+ return CodeReviewObservation(
46
+ task_id=self._task["task_id"],
47
+ task_name=self._task["task_name"],
48
+ difficulty=self._task["difficulty"],
49
+ language=self._task["language"],
50
+ code_snippet=self._task["code_snippet"],
51
+ context=self._task["context"],
52
+ step_number=self._state.step_count,
53
+ max_steps=max_steps,
54
+ previous_feedback=feedback,
55
  done=done,
56
+ reward=reward,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  )
58
 
59
+ @property
60
+ def state(self) -> State:
61
+ return self._state