Spaces:

InosLihka
/

rhythm_env

Sleeping

File size: 3,713 Bytes

ece0bbe

"""
Upload teacher trajectory JSONL files to a HF Hub dataset repo so the
SFT-on-HF-Jobs orchestrator can download them.

HF Jobs containers don't have access to local files — the teacher data
has to live on HF Hub first.

Usage:
    python scripts/upload_teacher_data.py \\
        --files data/teacher_30ep_validation.jsonl \\
                data/teacher_indist_30_99.jsonl \\
                data/teacher_ood_10000_10049.jsonl \\
        --repo InosLihka/rhythm-env-teacher-trajectories

Requires HF_TOKEN env var (or `hf auth login` already done).
"""

import argparse
import os
import sys
from pathlib import Path

from huggingface_hub import HfApi, login


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--files", nargs="+", required=True,
                        help="Local JSONL files to upload")
    parser.add_argument("--repo", type=str, required=True,
                        help="HF Hub dataset repo (e.g. InosLihka/rhythm-env-teacher-trajectories)")
    parser.add_argument("--commit_message", type=str,
                        default="Add teacher trajectories from gpt-5.4 + grader v2")
    args = parser.parse_args()

    token = os.environ.get("HF_TOKEN")
    if token:
        login(token=token)

    api = HfApi()
    api.create_repo(args.repo, exist_ok=True, repo_type="dataset", private=False)
    print(f"Repo: https://huggingface.co/datasets/{args.repo}")

    for path in args.files:
        p = Path(path)
        if not p.exists():
            print(f"SKIP missing: {path}")
            continue
        print(f"Uploading {p.name} ({p.stat().st_size / 1024:.1f} KB)...")
        api.upload_file(
            path_or_fileobj=str(p),
            path_in_repo=p.name,
            repo_id=args.repo,
            repo_type="dataset",
            commit_message=args.commit_message,
        )

    # Add a small README documenting the dataset format
    readme = """# RhythmEnv teacher trajectories

Per-step (state, prompt, teacher_response, action, belief, reward) tuples
collected by replaying RhythmEnv with gpt-5.4 (Azure AI Foundry) as the
acting agent. Used as the SFT corpus for Algorithm Distillation.

## Files

Each JSONL row is one step. Schema:

```
{
    "seed": int,                 # episode seed (also determines hidden profile)
    "step": int,                 # step index 0..27
    "profile_name": str,         # 'sampled_<seed>' for continuous-mode profiles
    "user_prompt": str,          # observation prompt the student will see at inference
    "teacher_response": str,     # full teacher output: "<reasoning>...</reasoning>\\nS M W ACTION_NAME"
    "parsed_action": str,        # action name (e.g. "deep_work")
    "parsed_belief": [s, m, w],  # 3-dim belief in [0, 1]
    "answer_match": str,         # raw matched substring of the answer line
    "env_reward": float,         # per-step env reward
    "parse_failed": bool,        # True if response couldn't be parsed into action+belief
    "true_belief": [s, m, w]     # ground-truth belief vector for the active profile
}
```

## Generation

Generated using `scripts/generate_teacher_trajectories.py` from the
[InosLihka/rhythm_env Space](https://huggingface.co/spaces/InosLihka/rhythm_env).
Teacher: `gpt-5.4` (Azure AI Foundry, version 2026-03-05). Sampling
temperature 0.5. ~840 (state, response) pairs per 30-episode batch.
"""
    api.upload_file(
        path_or_fileobj=readme.encode("utf-8"),
        path_in_repo="README.md",
        repo_id=args.repo,
        repo_type="dataset",
        commit_message="Add dataset README",
    )

    print()
    print(f"Done. Dataset: https://huggingface.co/datasets/{args.repo}")


if __name__ == "__main__":
    main()