Spaces:
Sleeping
Sleeping
| """ | |
| Upload teacher trajectory JSONL files to a HF Hub dataset repo so the | |
| SFT-on-HF-Jobs orchestrator can download them. | |
| HF Jobs containers don't have access to local files — the teacher data | |
| has to live on HF Hub first. | |
| Usage: | |
| python scripts/upload_teacher_data.py \\ | |
| --files data/teacher_30ep_validation.jsonl \\ | |
| data/teacher_indist_30_99.jsonl \\ | |
| data/teacher_ood_10000_10049.jsonl \\ | |
| --repo InosLihka/rhythm-env-teacher-trajectories | |
| Requires HF_TOKEN env var (or `hf auth login` already done). | |
| """ | |
| import argparse | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, login | |
| def main() -> None: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--files", nargs="+", required=True, | |
| help="Local JSONL files to upload") | |
| parser.add_argument("--repo", type=str, required=True, | |
| help="HF Hub dataset repo (e.g. InosLihka/rhythm-env-teacher-trajectories)") | |
| parser.add_argument("--commit_message", type=str, | |
| default="Add teacher trajectories from gpt-5.4 + grader v2") | |
| args = parser.parse_args() | |
| token = os.environ.get("HF_TOKEN") | |
| if token: | |
| login(token=token) | |
| api = HfApi() | |
| api.create_repo(args.repo, exist_ok=True, repo_type="dataset", private=False) | |
| print(f"Repo: https://huggingface.co/datasets/{args.repo}") | |
| for path in args.files: | |
| p = Path(path) | |
| if not p.exists(): | |
| print(f"SKIP missing: {path}") | |
| continue | |
| print(f"Uploading {p.name} ({p.stat().st_size / 1024:.1f} KB)...") | |
| api.upload_file( | |
| path_or_fileobj=str(p), | |
| path_in_repo=p.name, | |
| repo_id=args.repo, | |
| repo_type="dataset", | |
| commit_message=args.commit_message, | |
| ) | |
| # Add a small README documenting the dataset format | |
| readme = """# RhythmEnv teacher trajectories | |
| Per-step (state, prompt, teacher_response, action, belief, reward) tuples | |
| collected by replaying RhythmEnv with gpt-5.4 (Azure AI Foundry) as the | |
| acting agent. Used as the SFT corpus for Algorithm Distillation. | |
| ## Files | |
| Each JSONL row is one step. Schema: | |
| ``` | |
| { | |
| "seed": int, # episode seed (also determines hidden profile) | |
| "step": int, # step index 0..27 | |
| "profile_name": str, # 'sampled_<seed>' for continuous-mode profiles | |
| "user_prompt": str, # observation prompt the student will see at inference | |
| "teacher_response": str, # full teacher output: "<reasoning>...</reasoning>\\nS M W ACTION_NAME" | |
| "parsed_action": str, # action name (e.g. "deep_work") | |
| "parsed_belief": [s, m, w], # 3-dim belief in [0, 1] | |
| "answer_match": str, # raw matched substring of the answer line | |
| "env_reward": float, # per-step env reward | |
| "parse_failed": bool, # True if response couldn't be parsed into action+belief | |
| "true_belief": [s, m, w] # ground-truth belief vector for the active profile | |
| } | |
| ``` | |
| ## Generation | |
| Generated using `scripts/generate_teacher_trajectories.py` from the | |
| [InosLihka/rhythm_env Space](https://huggingface.co/spaces/InosLihka/rhythm_env). | |
| Teacher: `gpt-5.4` (Azure AI Foundry, version 2026-03-05). Sampling | |
| temperature 0.5. ~840 (state, response) pairs per 30-episode batch. | |
| """ | |
| api.upload_file( | |
| path_or_fileobj=readme.encode("utf-8"), | |
| path_in_repo="README.md", | |
| repo_id=args.repo, | |
| repo_type="dataset", | |
| commit_message="Add dataset README", | |
| ) | |
| print() | |
| print(f"Done. Dataset: https://huggingface.co/datasets/{args.repo}") | |
| if __name__ == "__main__": | |
| main() | |