""" Upload teacher trajectory JSONL files to a HF Hub dataset repo so the SFT-on-HF-Jobs orchestrator can download them. HF Jobs containers don't have access to local files — the teacher data has to live on HF Hub first. Usage: python scripts/upload_teacher_data.py \\ --files data/teacher_30ep_validation.jsonl \\ data/teacher_indist_30_99.jsonl \\ data/teacher_ood_10000_10049.jsonl \\ --repo InosLihka/rhythm-env-teacher-trajectories Requires HF_TOKEN env var (or `hf auth login` already done). """ import argparse import os import sys from pathlib import Path from huggingface_hub import HfApi, login def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--files", nargs="+", required=True, help="Local JSONL files to upload") parser.add_argument("--repo", type=str, required=True, help="HF Hub dataset repo (e.g. InosLihka/rhythm-env-teacher-trajectories)") parser.add_argument("--commit_message", type=str, default="Add teacher trajectories from gpt-5.4 + grader v2") args = parser.parse_args() token = os.environ.get("HF_TOKEN") if token: login(token=token) api = HfApi() api.create_repo(args.repo, exist_ok=True, repo_type="dataset", private=False) print(f"Repo: https://huggingface.co/datasets/{args.repo}") for path in args.files: p = Path(path) if not p.exists(): print(f"SKIP missing: {path}") continue print(f"Uploading {p.name} ({p.stat().st_size / 1024:.1f} KB)...") api.upload_file( path_or_fileobj=str(p), path_in_repo=p.name, repo_id=args.repo, repo_type="dataset", commit_message=args.commit_message, ) # Add a small README documenting the dataset format readme = """# RhythmEnv teacher trajectories Per-step (state, prompt, teacher_response, action, belief, reward) tuples collected by replaying RhythmEnv with gpt-5.4 (Azure AI Foundry) as the acting agent. Used as the SFT corpus for Algorithm Distillation. ## Files Each JSONL row is one step. Schema: ``` { "seed": int, # episode seed (also determines hidden profile) "step": int, # step index 0..27 "profile_name": str, # 'sampled_' for continuous-mode profiles "user_prompt": str, # observation prompt the student will see at inference "teacher_response": str, # full teacher output: "...\\nS M W ACTION_NAME" "parsed_action": str, # action name (e.g. "deep_work") "parsed_belief": [s, m, w], # 3-dim belief in [0, 1] "answer_match": str, # raw matched substring of the answer line "env_reward": float, # per-step env reward "parse_failed": bool, # True if response couldn't be parsed into action+belief "true_belief": [s, m, w] # ground-truth belief vector for the active profile } ``` ## Generation Generated using `scripts/generate_teacher_trajectories.py` from the [InosLihka/rhythm_env Space](https://huggingface.co/spaces/InosLihka/rhythm_env). Teacher: `gpt-5.4` (Azure AI Foundry, version 2026-03-05). Sampling temperature 0.5. ~840 (state, response) pairs per 30-episode batch. """ api.upload_file( path_or_fileobj=readme.encode("utf-8"), path_in_repo="README.md", repo_id=args.repo, repo_type="dataset", commit_message="Add dataset README", ) print() print(f"Done. Dataset: https://huggingface.co/datasets/{args.repo}") if __name__ == "__main__": main()