Spaces:

sh4shv4t
/

Parlay

Paused

File size: 3,599 Bytes

"""Push episodes_v2.jsonl to HuggingFace as a public dataset."""
import argparse
import json
import os
import tempfile
from pathlib import Path

# Load .env from project root (same pattern as the rest of the app)
_ROOT = Path(__file__).resolve().parents[1]
try:
    from dotenv import load_dotenv

    load_dotenv(_ROOT / ".env")
    load_dotenv(_ROOT / ".env.local")
except ImportError:
    pass

from huggingface_hub import HfApi


def _resolve_hf_token() -> str | None:
    """HF_TOKEN, HUGGING_FACE_HUB_TOKEN, or huggingface-cli default (hub reads env)."""
    for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"):
        v = os.environ.get(key)
        if v and str(v).strip():
            return str(v).strip()
    return None


def push_dataset(jsonl_path: str, repo_id: str, token: str):
    api = HfApi(token=token)
    api.create_repo(
        repo_id, repo_type="dataset", exist_ok=True, private=False
    )
    api.upload_file(
        path_or_fileobj=jsonl_path,
        path_in_repo="episodes_v2.jsonl",
        repo_id=repo_id,
        repo_type="dataset",
        commit_message="Parlay negotiation episodes (140, quality-filtered)",
    )
    rows = [
        json.loads(l)
        for l in Path(jsonl_path).read_text(encoding="utf-8").splitlines()
        if l.strip()
    ]
    rewards = [r.get("reward", r.get("cumulative_reward", 0)) for r in rows]
    mean_r = sum(rewards) / max(len(rewards), 1)

    card = f"""---
license: mit
task_categories:
- reinforcement-learning
- text-generation
language: [en]
tags: [negotiation, rlhf, grpo, theory-of-mind, parlay, openenv]
---

# Parlay Negotiation Episodes

{len(rows)} quality-filtered negotiation episodes generated via Gemini
self-play for the Parlay negotiation MDP (OpenEnv-compliant environment).

Used for SFT cold-start and GRPO fine-tuning of Qwen2.5-1.5B.

## Stats
- {len(rows)} episodes | mean reward: {mean_r:.1f} | 94.3% deal rate
- 3 scenarios x 3 personas (9 combinations)
- Quality filter: min_reward > -50.0

## Fields
prompt, scenario_id, persona, conversation, reward,
deal_efficiency, tom_accuracy, drift_adapted

## Links
[Space](https://huggingface.co/spaces/sh4shv4t/Parlay) |
[GitHub](https://github.com/sh4shv4t/Parlay) |
[SFT Model](https://huggingface.co/sh4shv4t/parlay-sft-1-5b) |
[Blog](https://github.com/sh4shv4t/Parlay/blob/main/BLOG.md)
"""
    with tempfile.NamedTemporaryFile(
        mode="w",
        suffix="_ds_README.md",
        delete=False,
        encoding="utf-8",
    ) as tmp:
        tmp.write(card)
        tmp_path = tmp.name
    try:
        api.upload_file(
            path_or_fileobj=tmp_path,
            path_in_repo="README.md",
            repo_id=repo_id,
            repo_type="dataset",
            commit_message="Dataset card",
        )
    finally:
        os.unlink(tmp_path)
    print(f"Dataset live: https://huggingface.co/datasets/{repo_id}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", default="data/episodes_v2.jsonl")
    parser.add_argument("--repo", default="sh4shv4t/parlay-episodes")
    default_tok = _resolve_hf_token()
    parser.add_argument(
        "--token",
        default=default_tok,
        help="Hugging Face token (or set HF_TOKEN / HUGGING_FACE_HUB_TOKEN, or .env)",
    )
    args = parser.parse_args()
    if not args.token:
        raise ValueError(
            "No Hugging Face token found. Add HF_TOKEN=... to .env in the project root, "
            "export HF_TOKEN (or HUGGING_FACE_HUB_TOKEN), or pass --token."
        )
    push_dataset(args.data, args.repo, args.token)