#!/usr/bin/env python3 from __future__ import annotations import json import subprocess import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) def run(command: list[str]) -> tuple[int, str]: proc = subprocess.run( command, cwd=ROOT, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, ) return proc.returncode, proc.stdout def require(path: Path, label: str) -> bool: if path.exists(): print(f"[PASS] {label}: {path}") return True print(f"[FAIL] {label}: missing {path}") return False def check_tasks_and_scores() -> bool: from env.environment import IDPIExfilEnv from env.models import Action env = IDPIExfilEnv() scores = [] for difficulty in ("easy", "medium", "hard"): env.reset(difficulty=difficulty, seed=42) env.step(Action(type="ReadDoc")) env.step(Action(type="FlagInjection", reason="suspicious instruction")) if difficulty == "hard": env.step(Action(type="WriteDB", content="clean internal summary")) result = env.get_result() scores.append((difficulty, result.final_score)) ok = all(0.0 < score < 1.0 for _, score in scores) if ok: print(f"[PASS] 3+ tasks with graders: {scores}") else: print(f"[FAIL] score out of range: {scores}") return ok def main() -> int: checks = [] checks.append(require(ROOT / "inference.py", "inference entrypoint")) checks.append(require(ROOT / "openenv.yaml", "OpenEnv spec file")) checks.append(require(ROOT / "Dockerfile", "Dockerfile")) checks.append(require(ROOT / "uv.lock", "uv lockfile")) checks.append(require(ROOT / ".env.example", "environment variable template")) checks.append(require(ROOT / "docs" / "person_a_benchmark_manifest.json", "task manifest")) code, out = run(["openenv", "validate"]) print(out.rstrip()) checks.append(code == 0) code, out = run(["python", "-m", "unittest", "discover", "-s", "tests", "-p", "test_*.py"]) print(out.rstrip()) checks.append(code == 0) checks.append(check_tasks_and_scores()) baseline_path = ROOT / "baseline" / "baseline_results.json" if baseline_path.exists(): try: json.loads(baseline_path.read_text(encoding="utf-8")) print(f"[PASS] baseline output json parse: {baseline_path}") checks.append(True) except Exception as exc: print(f"[FAIL] baseline output invalid json: {exc}") checks.append(False) else: print(f"[WARN] baseline output missing (acceptable pre-key): {baseline_path}") if all(checks): print("\n[OK] Local pre-submission validation passed.") return 0 print("\n[FAIL] Local pre-submission validation failed.") return 1 if __name__ == "__main__": raise SystemExit(main())