Spaces:

InosLihka
/

rhythm_env

Sleeping

App Files Files Community

rhythm_env / scripts /upload_teacher_data.py

InosLihka

Algorithm Distillation: grader v2 with belief_accuracy + SFT pipeline

ece0bbe 12 days ago

raw

history blame contribute delete

3.71 kB

	"""
	Upload teacher trajectory JSONL files to a HF Hub dataset repo so the
	SFT-on-HF-Jobs orchestrator can download them.

	HF Jobs containers don't have access to local files — the teacher data
	has to live on HF Hub first.

	Usage:
	python scripts/upload_teacher_data.py \\
	--files data/teacher_30ep_validation.jsonl \\
	data/teacher_indist_30_99.jsonl \\
	data/teacher_ood_10000_10049.jsonl \\
	--repo InosLihka/rhythm-env-teacher-trajectories

	Requires HF_TOKEN env var (or `hf auth login` already done).
	"""

	import argparse
	import os
	import sys
	from pathlib import Path

	from huggingface_hub import HfApi, login


	def main() -> None:
	parser = argparse.ArgumentParser()
	parser.add_argument("--files", nargs="+", required=True,
	help="Local JSONL files to upload")
	parser.add_argument("--repo", type=str, required=True,
	help="HF Hub dataset repo (e.g. InosLihka/rhythm-env-teacher-trajectories)")
	parser.add_argument("--commit_message", type=str,
	default="Add teacher trajectories from gpt-5.4 + grader v2")
	args = parser.parse_args()

	token = os.environ.get("HF_TOKEN")
	if token:
	login(token=token)

	api = HfApi()
	api.create_repo(args.repo, exist_ok=True, repo_type="dataset", private=False)
	print(f"Repo: https://huggingface.co/datasets/{args.repo}")

	for path in args.files:
	p = Path(path)
	if not p.exists():
	print(f"SKIP missing: {path}")
	continue
	print(f"Uploading {p.name} ({p.stat().st_size / 1024:.1f} KB)...")
	api.upload_file(
	path_or_fileobj=str(p),
	path_in_repo=p.name,
	repo_id=args.repo,
	repo_type="dataset",
	commit_message=args.commit_message,
	)

	# Add a small README documenting the dataset format
	readme = """# RhythmEnv teacher trajectories

	Per-step (state, prompt, teacher_response, action, belief, reward) tuples
	collected by replaying RhythmEnv with gpt-5.4 (Azure AI Foundry) as the
	acting agent. Used as the SFT corpus for Algorithm Distillation.

	## Files

	Each JSONL row is one step. Schema:

	```
	{
	"seed": int, # episode seed (also determines hidden profile)
	"step": int, # step index 0..27
	"profile_name": str, # 'sampled_<seed>' for continuous-mode profiles
	"user_prompt": str, # observation prompt the student will see at inference
	"teacher_response": str, # full teacher output: "<reasoning>...</reasoning>\\nS M W ACTION_NAME"
	"parsed_action": str, # action name (e.g. "deep_work")
	"parsed_belief": [s, m, w], # 3-dim belief in [0, 1]
	"answer_match": str, # raw matched substring of the answer line
	"env_reward": float, # per-step env reward
	"parse_failed": bool, # True if response couldn't be parsed into action+belief
	"true_belief": [s, m, w] # ground-truth belief vector for the active profile
	}
	```

	## Generation

	Generated using `scripts/generate_teacher_trajectories.py` from the
	[InosLihka/rhythm_env Space](https://huggingface.co/spaces/InosLihka/rhythm_env).
	Teacher: `gpt-5.4` (Azure AI Foundry, version 2026-03-05). Sampling
	temperature 0.5. ~840 (state, response) pairs per 30-episode batch.
	"""
	api.upload_file(
	path_or_fileobj=readme.encode("utf-8"),
	path_in_repo="README.md",
	repo_id=args.repo,
	repo_type="dataset",
	commit_message="Add dataset README",
	)

	print()
	print(f"Done. Dataset: https://huggingface.co/datasets/{args.repo}")


	if __name__ == "__main__":
	main()