""" EchoBench Publisher Converts ECHO task bank to HuggingFace Dataset and publishes to the Hub. Usage: python scripts/publish_echobench.py --token YOUR_HF_TOKEN python scripts/publish_echobench.py --token YOUR_HF_TOKEN --repo your-username/echobench """ import argparse import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) def load_tasks_from_bank(): """Load all tasks from ECHO's task bank.""" from env.task_bank import TaskBank from config import cfg bank = TaskBank() print("Loading task bank (downloads datasets if not cached)…") bank.ensure_loaded() all_tasks = [] for domain in cfg.DOMAINS: for difficulty in cfg.DIFFICULTIES: bucket = bank._tasks.get(domain, {}).get(difficulty, []) all_tasks.extend(bucket) print(f" {domain}/{difficulty}: {len(bucket)} tasks") print(f"\nTotal tasks: {len(all_tasks)}") return all_tasks def tasks_to_hf_dataset(tasks): """Convert task dicts to HuggingFace DatasetDict split by domain.""" from datasets import Dataset, DatasetDict records = [] for task in tasks: records.append({ "id": str(task.get("id", "")), "domain": str(task.get("domain", "")), "difficulty": str(task.get("difficulty", "")), "difficulty_score": float(task.get("difficulty_score", 0.5)), "question": str(task.get("question", "")), "answer": str(task.get("answer", "")), "answer_aliases": [str(a) for a in task.get("answer_aliases", [])], "source_dataset": str(task.get("source_dataset", "")), }) splits = {} domains = sorted({r["domain"] for r in records}) for domain in domains: subset = [r for r in records if r["domain"] == domain] splits[domain] = Dataset.from_list(subset) print(f" Split '{domain}': {len(subset)} rows") splits["all"] = Dataset.from_list(records) print(f" Split 'all': {len(records)} rows") return DatasetDict(splits) _DATASET_CARD = """\ --- license: apache-2.0 task_categories: - question-answering - text-classification language: - en tags: - calibration - metacognition - llm-evaluation - grpo - openenv size_categories: - 10K