cmpatino
/

math500-bon-exercise

Model card Files Files and versions

xet

Community

cmpatino HF Staff commited on 29 days ago

Commit

962718c

verified ·

1 Parent(s): a98a175

Upload code/step5_push_dataset.py with huggingface_hub

Browse files

Files changed (1) hide show

code/step5_push_dataset.py +77 -0

code/step5_push_dataset.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+Step 5: Create a HuggingFace dataset from the results and push to Hub.
+This creates a dataset with:
+- The original problem and ground truth answer
+- The greedy (N=1) solution and whether it was correct
+- The Best-of-N (N=16) weighted answer and whether it was correct
+- All 16 sampled solutions with their PRM scores
+- The PRM score breakdown per answer group
+Co-authored with Claude (Anthropic). I can explain all code logic.
+"""
+import json
+from datasets import Dataset, Features, Value, Sequence
+from huggingface_hub import HfApi
+# ──────────────────────────────────────────────────────────────────────────────
+# Load all results
+# ──────────────────────────────────────────────────────────────────────────────
+with open("/Users/cmpatino/Projects/ml-intern/exercise/outputs/greedy_results.json") as f:
+    greedy_results = json.load(f)
+with open("/Users/cmpatino/Projects/ml-intern/exercise/outputs/scored_results.json") as f:
+    scored_results = json.load(f)
+with open("/Users/cmpatino/Projects/ml-intern/exercise/outputs/bon_results.json") as f:
+    bon_results = json.load(f)
+# ──────────────────────────────────────────────────────────────────────────────
+# Build dataset rows
+# ──────────────────────────────────────────────────────────────────────────────
+rows = []
+for greedy, scored, bon in zip(greedy_results, scored_results, bon_results):
+    row = {
+        # Original problem info
+        "problem": greedy["problem"],
+        "ground_truth_solution": greedy["solution"],
+        "ground_truth_answer": greedy["answer"],
+        "subject": greedy["subject"],
+        "level": greedy["level"],
+        "unique_id": greedy["unique_id"],
+        # Greedy solution
+        "greedy_solution": greedy["generated_solutions"][0],
+        "greedy_extracted_answer": greedy["greedy_extracted_answer"],
+        "greedy_correct": greedy["greedy_correct"],
+        # Best-of-N results
+        "bon_weighted_answer": bon["weighted_bon_answer"],
+        "bon_weighted_correct": bon["weighted_bon_correct"],
+        "bon_standard_answer": bon["standard_bon_answer"],
+        "bon_standard_correct": bon["standard_bon_correct"],
+        "bon_majority_answer": bon["majority_vote_answer"],
+        "bon_majority_correct": bon["majority_vote_correct"],
+        # All N=16 sampled solutions
+        "sampled_solutions": scored["sampled_solutions"],
+        "sampled_extracted_answers": scored["extracted_answers"],
+        "sampled_prm_scores": scored["prm_scores"],
+        # Summary stats
+        "n_correct_in_16": bon["n_correct_in_16"],
+        "answer_score_breakdown": json.dumps(bon["answer_score_breakdown"]),
+    }
+    rows.append(row)
+# ──────────────────────────────────────────────────────────────────────────────
+# Create and push dataset
+# ──────────────────────────────────────────────────────────────────────────────
+dataset = Dataset.from_list(rows)
+print(f"Created dataset with {len(dataset)} rows")
+print(f"Columns: {dataset.column_names}")
+print(f"\nSample row:")
+for col in ["unique_id", "level", "subject", "ground_truth_answer", "greedy_correct", "bon_weighted_correct"]:
+    print(f"  {col}: {dataset[0][col]}")
+DATASET_ID = "cmpatino/math500-bon-weighted-results"
+dataset.push_to_hub(DATASET_ID, split="test")
+print(f"\nDataset pushed to: https://huggingface.co/datasets/{DATASET_ID}")