Spaces:

uvpatel7271
/

python-code-review-env

Runtime error

App Files Files Community

python-code-review-env / tests /test_inference_runner.py

uvpatel7271

added a modularity and updations

cd5c208 5 days ago

raw

history blame contribute delete

2.37 kB

	"""Smoke tests for the strict inference output contract."""

	from __future__ import annotations

	from dataclasses import dataclass, field

	from app.env.runner import InferenceRunner
	from app.models.inference import AgentDecision, InferenceConfig


	@dataclass
	class _FakeObservation:
	task_id: str
	attempts_remaining: int
	score: float
	done: bool
	history: list[object] = field(default_factory=list)
	current_code: str = "print('broken')"
	last_action_error: str \| None = None


	class _FakeEnv:
	def __init__(self) -> None:
	self._step = 0

	def reset(self, *, task_id: str) -> _FakeObservation:
	return _FakeObservation(task_id=task_id, attempts_remaining=4, score=0.2, done=False)

	def step_result(self, action: object) -> tuple[_FakeObservation, float, bool, dict[str, object]]:
	self._step += 1
	if self._step == 1:
	return (
	_FakeObservation("demo_task", 3, 0.45, False, current_code="candidate"),
	0.45,
	False,
	{"last_action_error": None},
	)
	if self._step == 2:
	return (
	_FakeObservation("demo_task", 2, 0.97, True, current_code="reference"),
	0.97,
	True,
	{"last_action_error": None},
	)
	raise AssertionError("runner stepped too many times")


	class _FakeAgent:
	def __init__(self) -> None:
	self._step = 0

	def act(self, observation: object) -> AgentDecision:
	self._step += 1
	if self._step == 1:
	return AgentDecision(action_type="run_tests")
	return AgentDecision(action_type="submit_solution")


	def test_inference_runner_emits_strict_lines(capsys) -> None:
	runner = InferenceRunner(InferenceConfig.from_env())
	runner.agent = _FakeAgent()
	runner._create_env = lambda: _FakeEnv() # type: ignore[method-assign]
	runner.run_task("demo_task")

	captured = capsys.readouterr().out.strip().splitlines()
	assert captured == [
	f"[START] task=demo_task env={runner.config.benchmark_name} model={runner.config.model_name}",
	"[STEP] step=1 action=run_tests reward=0.45 done=false error=null",
	"[STEP] step=2 action=submit_solution reward=0.97 done=true error=null",
	"[END] success=true steps=2 rewards=0.45,0.97",
	]