SimranShaikh's picture
commit
fd78303 verified
from uuid import uuid4
from typing import Optional
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from environment.models import CodeReviewAction, CodeReviewObservation
from environment.tasks import get_task
from environment.graders import grade
class CodeReviewEnv(Environment):
SUPPORTS_CONCURRENT_SESSIONS: bool = False
def __init__(self):
self._state = State(episode_id=str(uuid4()), step_count=0)
self._task = get_task("easy_syntax")
self._last_feedback: Optional[str] = None
def reset(self) -> CodeReviewObservation:
self._task = get_task("easy_syntax")
self._state = State(episode_id=str(uuid4()), step_count=0)
self._last_feedback = None
return CodeReviewObservation(
task_id=self._task["task_id"],
task_name=self._task["task_name"],
difficulty=self._task["difficulty"],
language=self._task["language"],
code_snippet=self._task["code_snippet"],
context=self._task["context"],
step_number=0,
max_steps=self._task["max_steps"],
previous_feedback=None,
done=False,
reward=0.0,
)
def step(self, action: CodeReviewAction) -> CodeReviewObservation:
self._state.step_count += 1
reward, feedback = grade(
self._task["task_id"], action, self._task["ground_truth"]
)
self._last_feedback = feedback
max_steps = self._task["max_steps"]
done = action.submit or reward >= 0.95 or self._state.step_count >= max_steps
return CodeReviewObservation(
task_id=self._task["task_id"],
task_name=self._task["task_name"],
difficulty=self._task["difficulty"],
language=self._task["language"],
code_snippet=self._task["code_snippet"],
context=self._task["context"],
step_number=self._state.step_count,
max_steps=max_steps,
previous_feedback=feedback,
done=done,
reward=reward,
)
@property
def state(self) -> State:
return self._state