"""End-to-end smoke test: scripted episode, in-process, no server. Runs: run_baseline(adam) -> draft(Adam-ish) -> inspect -> draft(SGD+momentum) -> commit, and verifies the env threads state correctly and produces a finite reward. """ from __future__ import annotations import sys from pathlib import Path # Allow running directly: `python tests/test_episode.py` sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) from landscapeforge.models import LandscapeforgeAction # type: ignore from landscapeforge.server.landscapeforge_environment import ( # type: ignore LandscapeforgeEnvironment, ) ADAM_CODE = """ import numpy as np class Optimizer: def __init__(self, dim): self.lr = 1e-3 self.b1 = 0.9 self.b2 = 0.999 self.eps = 1e-8 self.m = np.zeros(dim) self.v = np.zeros(dim) self.t = 0 def step(self, x, f_val, grad): self.t += 1 self.m = self.b1 * self.m + (1 - self.b1) * grad self.v = self.b2 * self.v + (1 - self.b2) * grad * grad m_hat = self.m / (1 - self.b1 ** self.t) v_hat = self.v / (1 - self.b2 ** self.t) return x - self.lr * m_hat / (np.sqrt(v_hat) + self.eps) """ SGDM_CODE = """ import numpy as np class Optimizer: def __init__(self, dim): self.lr = 0.05 self.beta = 0.9 self.v = np.zeros(dim) def step(self, x, f_val, grad): self.v = self.beta * self.v - self.lr * grad return x + self.v """ def scripted_episode() -> None: env = LandscapeforgeEnvironment(tier="T0", seed=42) obs = env.reset() print(f"[reset] landscape: {obs.landscape_description}") print(f" dim={obs.dim}, hints={obs.structural_hints}") print(f" budget={obs.budget_remaining}") # 1. Run Adam baseline to see what it does. obs = env.step(LandscapeforgeAction( kind="run_baseline", baseline_name="adam", )) print(f"\n[run_baseline adam] result={obs.last_action_result}") print(f" budget_remaining={obs.budget_remaining}") # 2. Submit an Adam draft. obs = env.step(LandscapeforgeAction(kind="draft", code=ADAM_CODE)) print(f"\n[draft adam] compile_error={obs.last_action_result.get('compile_error')}") print(f" summary={obs.last_action_result.get('summary')}") print(f" budget_remaining={obs.budget_remaining}") # 3. Inspect the first draft. obs = env.step(LandscapeforgeAction( kind="inspect", draft_idx=0, step_range_start=10, step_range_end=20, )) print(f"\n[inspect 0 steps 10-20] result={obs.last_action_result}") print(f" budget_remaining={obs.budget_remaining}") # 4. Submit an SGD+momentum alternative. obs = env.step(LandscapeforgeAction(kind="draft", code=SGDM_CODE)) print(f"\n[draft sgdm] compile_error={obs.last_action_result.get('compile_error')}") print(f" summary={obs.last_action_result.get('summary')}") print(f" budget_remaining={obs.budget_remaining}") # 5. Commit. obs = env.step(LandscapeforgeAction(kind="commit")) print(f"\n[commit]") print(f" done={obs.done}") print(f" reward={obs.reward}") print(f" final_regret={obs.final_regret}") print(f" r_optcoder_breakdown={obs.r_optcoder_breakdown}") print(f" last_action_result={obs.last_action_result}") # Sanity checks assert obs.done is True, "should be done after commit" assert obs.reward is not None, "reward must be produced" assert obs.final_regret is not None, "final_regret must be produced" assert obs.r_optcoder_breakdown, "breakdown must be populated" print("\n✓ scripted_episode PASSED") def episode_with_broken_code() -> None: """Submitting code that fails to compile should not crash the env.""" env = LandscapeforgeEnvironment(tier="T0", seed=7) env.reset() # Intentional syntax error obs = env.step(LandscapeforgeAction( kind="draft", code="this is not python", )) print(f"\n[broken draft] compile_error={obs.last_action_result.get('compile_error')}") assert obs.last_action_result.get("compile_error") is not None assert obs.done is False # Commit with bad code — should produce worst-case regret, not crash obs = env.step(LandscapeforgeAction(kind="commit")) print(f"[broken commit] reward={obs.reward}, final_regret={obs.final_regret}") assert obs.done is True assert obs.reward is not None print("\n✓ episode_with_broken_code PASSED") def budget_exhaustion() -> None: """Spamming drafts until budget runs out should auto-commit.""" env = LandscapeforgeEnvironment(tier="T0", seed=3) env.reset() for i in range(10): obs = env.step(LandscapeforgeAction(kind="draft", code=ADAM_CODE)) if obs.done: print(f"\n[budget_exhaustion] auto-committed after {i+1} drafts") print(f" reason={obs.last_action_result.get('reason')}") assert obs.last_action_result.get("reason") == "budget_exhausted" print("\n✓ budget_exhaustion PASSED") return raise AssertionError("Budget never exhausted — shouldn't happen with draft cost 2, budget 12") if __name__ == "__main__": scripted_episode() episode_with_broken_code() budget_exhaustion() print("\nAll tests passed.")