Spaces:

ColdHearted
/

CodeReviewEnv

Sleeping

App Files Files Community

CodeReviewEnv / models.py

janakb

comit

ced8fd0 7 days ago

raw

history blame contribute delete

3.48 kB

	"""
	OpenEnv-compliant Pydantic models for the Code Review Environment.
	"""
	from __future__ import annotations
	from typing import Any, Dict, List, Literal, Optional
	from pydantic import BaseModel, Field


	# ─── Action Space ────────────────────────────────────────────────────────────

	class ReviewAction(BaseModel):
	"""Agent action: review and optionally patch code."""
	action_type: Literal["review", "patch", "comment", "submit"] = Field(
	description="Type of action the agent takes."
	)
	# For 'review': provide a structured analysis
	severity: Optional[Literal["critical", "major", "minor", "info"]] = None
	issue_type: Optional[str] = Field(
	default=None,
	description="Category: bug, security, performance, style, logic"
	)
	line_number: Optional[int] = Field(default=None, ge=1)
	description: Optional[str] = Field(default=None, max_length=500)

	# For 'patch': provide fixed code
	patched_code: Optional[str] = Field(
	default=None,
	description="Full corrected code (for patch actions)."
	)

	# For 'comment': free-form annotation
	comment: Optional[str] = Field(default=None, max_length=1000)

	# For 'submit': final verdict
	verdict: Optional[Literal["approve", "request_changes", "reject"]] = None
	confidence: Optional[float] = Field(default=None, ge=0.0, le=1.0)


	# ─── Observation Space ───────────────────────────────────────────────────────

	class CodeFile(BaseModel):
	filename: str
	language: str
	content: str
	line_count: int


	class ReviewContext(BaseModel):
	pull_request_title: str
	author: str
	description: str
	files_changed: List[CodeFile]
	test_results: Optional[str] = None
	linter_output: Optional[str] = None


	class Observation(BaseModel):
	"""What the agent sees at each step."""
	task_id: str
	step: int
	max_steps: int
	review_context: ReviewContext
	previous_actions: List[ReviewAction] = Field(default_factory=list)
	feedback: Optional[str] = None
	issues_found_so_far: List[Dict[str, Any]] = Field(default_factory=list)
	score_so_far: float = 0.0
	done: bool = False


	# ─── Reward Model ────────────────────────────────────────────────────────────

	class StepReward(BaseModel):
	"""Reward signal returned at each step."""
	value: float = Field(ge=-1.0, le=1.0)
	breakdown: Dict[str, float] = Field(default_factory=dict)
	explanation: str = ""


	# ─── State ───────────────────────────────────────────────────────────────────

	class EnvironmentState(BaseModel):
	task_id: str
	step: int
	max_steps: int
	review_context: ReviewContext
	actions_taken: List[ReviewAction] = Field(default_factory=list)
	issues_identified: List[Dict[str, Any]] = Field(default_factory=list)
	patch_submitted: Optional[str] = None
	verdict_submitted: Optional[str] = None
	total_reward: float = 0.0
	done: bool = False
	terminated_reason: Optional[str] = None