HyperBrickCaseOps / run_10_real_episodes.py
modelbuilderhq's picture
Upload folder using huggingface_hub
1995f0f verified
"""Run 10 real SupportDesk episodes and stream inference logs.
This does not fabricate output. It simply invokes `inference.py` repeatedly
with real task ids from the registry so stdout contains valid `[START]`,
`[STEP]`, and `[END]` lines for each completed run.
"""
from __future__ import annotations
import os
import subprocess
import sys
from itertools import cycle, islice
from tasks import list_task_ids
TOTAL_RUNS = 10
def main() -> int:
base_env = os.environ.copy()
task_ids = list(list_task_ids())
if not task_ids:
print("No tasks registered.", file=sys.stderr)
return 1
# Repeat the real task ids until we have 10 actual runs.
run_plan = list(islice(cycle(task_ids), TOTAL_RUNS))
for idx, task_id in enumerate(run_plan, start=1):
env = base_env.copy()
env["SUPPORTDESK_TASK_ID"] = task_id
env.setdefault("PYTHONUTF8", "1")
print(
f"# run {idx}/{TOTAL_RUNS} task={task_id}",
file=sys.stderr,
flush=True,
)
completed = subprocess.run(
[sys.executable, "inference.py"],
env=env,
check=False,
)
if completed.returncode != 0:
print(
f"Run {idx} failed for task {task_id} with exit code {completed.returncode}.",
file=sys.stderr,
)
return completed.returncode
return 0
if __name__ == "__main__":
raise SystemExit(main())