DECEIT / smoke_test.py
Jayant-Kernel
Phase 3: Dockerfile, openenv manifest, client, deployment guide, GRPO training notebook
f89afce unverified
from unittest.mock import MagicMock
import os
from deceit_env import DeceitEnvironment, DeceitAction
from deceit_env.server.grader import GraderResult
print("=== Import check ===")
print(f"DeceitEnvironment: {DeceitEnvironment}")
grader = MagicMock()
grader.check.return_value = GraderResult(correct=True, method="exact", explanation="smoke")
env = DeceitEnvironment(grader=grader)
print()
print("=== Multi-turn trajectory: think -> think -> commit ===")
obs = env.reset(seed=42)
print(f"Question: {obs.question}")
print(f"max_turns: {obs.max_turns}")
obs1 = env.step(DeceitAction(reasoning="First I considered Sydney.", is_final=False))
print(f"Turn 1 | done={obs1.done} | reward={obs1.reward} (expected -0.05)")
print(f" context: {obs1.context}")
obs2 = env.step(DeceitAction(reasoning="Actually Canberra is the capital.", is_final=False))
print(f"Turn 2 | done={obs2.done} | reward={obs2.reward} (expected -0.05)")
print(f" context len: {len(obs2.context)} (expected 2)")
obs3 = env.step(DeceitAction(reasoning="Committing.", answer="Canberra", confidence=0.9, is_final=True))
print(f"Turn 3 | done={obs3.done} | reward={obs3.reward} (expected 1.3)")
print(f" metadata: {obs3.metadata}")
print()
print(f"state.step_count: {env.state.step_count} (expected 3)")
print(f"state.episode_rewards: {env.state.episode_rewards} (expected [-0.05, -0.05, 1.3])")
print(f"state.prior_reasoning: {len(env.state.prior_reasoning)} entries (expected 2)")
print()
cache = os.environ.get("DECEIT_GRADER_CACHE", "not set -> /tmp/deceit_grader_cache.json")
print(f"Grader cache path env: {cache}")
print()
print("Smoke test PASSED")