"""Run 10 real SupportDesk episodes and stream inference logs. This does not fabricate output. It simply invokes `inference.py` repeatedly with real task ids from the registry so stdout contains valid `[START]`, `[STEP]`, and `[END]` lines for each completed run. """ from __future__ import annotations import os import subprocess import sys from itertools import cycle, islice from tasks import list_task_ids TOTAL_RUNS = 10 def main() -> int: base_env = os.environ.copy() task_ids = list(list_task_ids()) if not task_ids: print("No tasks registered.", file=sys.stderr) return 1 # Repeat the real task ids until we have 10 actual runs. run_plan = list(islice(cycle(task_ids), TOTAL_RUNS)) for idx, task_id in enumerate(run_plan, start=1): env = base_env.copy() env["SUPPORTDESK_TASK_ID"] = task_id env.setdefault("PYTHONUTF8", "1") print( f"# run {idx}/{TOTAL_RUNS} task={task_id}", file=sys.stderr, flush=True, ) completed = subprocess.run( [sys.executable, "inference.py"], env=env, check=False, ) if completed.returncode != 0: print( f"Run {idx} failed for task {task_id} with exit code {completed.returncode}.", file=sys.stderr, ) return completed.returncode return 0 if __name__ == "__main__": raise SystemExit(main())