Spaces:
Sleeping
Sleeping
File size: 3,713 Bytes
ece0bbe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | """
Upload teacher trajectory JSONL files to a HF Hub dataset repo so the
SFT-on-HF-Jobs orchestrator can download them.
HF Jobs containers don't have access to local files — the teacher data
has to live on HF Hub first.
Usage:
python scripts/upload_teacher_data.py \\
--files data/teacher_30ep_validation.jsonl \\
data/teacher_indist_30_99.jsonl \\
data/teacher_ood_10000_10049.jsonl \\
--repo InosLihka/rhythm-env-teacher-trajectories
Requires HF_TOKEN env var (or `hf auth login` already done).
"""
import argparse
import os
import sys
from pathlib import Path
from huggingface_hub import HfApi, login
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--files", nargs="+", required=True,
help="Local JSONL files to upload")
parser.add_argument("--repo", type=str, required=True,
help="HF Hub dataset repo (e.g. InosLihka/rhythm-env-teacher-trajectories)")
parser.add_argument("--commit_message", type=str,
default="Add teacher trajectories from gpt-5.4 + grader v2")
args = parser.parse_args()
token = os.environ.get("HF_TOKEN")
if token:
login(token=token)
api = HfApi()
api.create_repo(args.repo, exist_ok=True, repo_type="dataset", private=False)
print(f"Repo: https://huggingface.co/datasets/{args.repo}")
for path in args.files:
p = Path(path)
if not p.exists():
print(f"SKIP missing: {path}")
continue
print(f"Uploading {p.name} ({p.stat().st_size / 1024:.1f} KB)...")
api.upload_file(
path_or_fileobj=str(p),
path_in_repo=p.name,
repo_id=args.repo,
repo_type="dataset",
commit_message=args.commit_message,
)
# Add a small README documenting the dataset format
readme = """# RhythmEnv teacher trajectories
Per-step (state, prompt, teacher_response, action, belief, reward) tuples
collected by replaying RhythmEnv with gpt-5.4 (Azure AI Foundry) as the
acting agent. Used as the SFT corpus for Algorithm Distillation.
## Files
Each JSONL row is one step. Schema:
```
{
"seed": int, # episode seed (also determines hidden profile)
"step": int, # step index 0..27
"profile_name": str, # 'sampled_<seed>' for continuous-mode profiles
"user_prompt": str, # observation prompt the student will see at inference
"teacher_response": str, # full teacher output: "<reasoning>...</reasoning>\\nS M W ACTION_NAME"
"parsed_action": str, # action name (e.g. "deep_work")
"parsed_belief": [s, m, w], # 3-dim belief in [0, 1]
"answer_match": str, # raw matched substring of the answer line
"env_reward": float, # per-step env reward
"parse_failed": bool, # True if response couldn't be parsed into action+belief
"true_belief": [s, m, w] # ground-truth belief vector for the active profile
}
```
## Generation
Generated using `scripts/generate_teacher_trajectories.py` from the
[InosLihka/rhythm_env Space](https://huggingface.co/spaces/InosLihka/rhythm_env).
Teacher: `gpt-5.4` (Azure AI Foundry, version 2026-03-05). Sampling
temperature 0.5. ~840 (state, response) pairs per 30-episode batch.
"""
api.upload_file(
path_or_fileobj=readme.encode("utf-8"),
path_in_repo="README.md",
repo_id=args.repo,
repo_type="dataset",
commit_message="Add dataset README",
)
print()
print(f"Done. Dataset: https://huggingface.co/datasets/{args.repo}")
if __name__ == "__main__":
main()
|