cmpatino HF Staff commited on
Commit
962718c
·
verified ·
1 Parent(s): a98a175

Upload code/step5_push_dataset.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. code/step5_push_dataset.py +77 -0
code/step5_push_dataset.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Step 5: Create a HuggingFace dataset from the results and push to Hub.
3
+
4
+ This creates a dataset with:
5
+ - The original problem and ground truth answer
6
+ - The greedy (N=1) solution and whether it was correct
7
+ - The Best-of-N (N=16) weighted answer and whether it was correct
8
+ - All 16 sampled solutions with their PRM scores
9
+ - The PRM score breakdown per answer group
10
+
11
+ Co-authored with Claude (Anthropic). I can explain all code logic.
12
+ """
13
+
14
+ import json
15
+ from datasets import Dataset, Features, Value, Sequence
16
+ from huggingface_hub import HfApi
17
+
18
+
19
+ # ──────────────────────────────────────────────────────────────────────────────
20
+ # Load all results
21
+ # ──────────────────────────────────────────────────────────────────────────────
22
+ with open("/Users/cmpatino/Projects/ml-intern/exercise/outputs/greedy_results.json") as f:
23
+ greedy_results = json.load(f)
24
+
25
+ with open("/Users/cmpatino/Projects/ml-intern/exercise/outputs/scored_results.json") as f:
26
+ scored_results = json.load(f)
27
+
28
+ with open("/Users/cmpatino/Projects/ml-intern/exercise/outputs/bon_results.json") as f:
29
+ bon_results = json.load(f)
30
+
31
+ # ──────────────────────────────────────────────────────────────────────────────
32
+ # Build dataset rows
33
+ # ──────────────────────────────────────────────────────────────────────────────
34
+ rows = []
35
+ for greedy, scored, bon in zip(greedy_results, scored_results, bon_results):
36
+ row = {
37
+ # Original problem info
38
+ "problem": greedy["problem"],
39
+ "ground_truth_solution": greedy["solution"],
40
+ "ground_truth_answer": greedy["answer"],
41
+ "subject": greedy["subject"],
42
+ "level": greedy["level"],
43
+ "unique_id": greedy["unique_id"],
44
+ # Greedy solution
45
+ "greedy_solution": greedy["generated_solutions"][0],
46
+ "greedy_extracted_answer": greedy["greedy_extracted_answer"],
47
+ "greedy_correct": greedy["greedy_correct"],
48
+ # Best-of-N results
49
+ "bon_weighted_answer": bon["weighted_bon_answer"],
50
+ "bon_weighted_correct": bon["weighted_bon_correct"],
51
+ "bon_standard_answer": bon["standard_bon_answer"],
52
+ "bon_standard_correct": bon["standard_bon_correct"],
53
+ "bon_majority_answer": bon["majority_vote_answer"],
54
+ "bon_majority_correct": bon["majority_vote_correct"],
55
+ # All N=16 sampled solutions
56
+ "sampled_solutions": scored["sampled_solutions"],
57
+ "sampled_extracted_answers": scored["extracted_answers"],
58
+ "sampled_prm_scores": scored["prm_scores"],
59
+ # Summary stats
60
+ "n_correct_in_16": bon["n_correct_in_16"],
61
+ "answer_score_breakdown": json.dumps(bon["answer_score_breakdown"]),
62
+ }
63
+ rows.append(row)
64
+
65
+ # ──────────────────────────────────────────────────────────────────────────────
66
+ # Create and push dataset
67
+ # ──────────────────────────────────────────────────────────────────────────────
68
+ dataset = Dataset.from_list(rows)
69
+ print(f"Created dataset with {len(dataset)} rows")
70
+ print(f"Columns: {dataset.column_names}")
71
+ print(f"\nSample row:")
72
+ for col in ["unique_id", "level", "subject", "ground_truth_answer", "greedy_correct", "bon_weighted_correct"]:
73
+ print(f" {col}: {dataset[0][col]}")
74
+
75
+ DATASET_ID = "cmpatino/math500-bon-weighted-results"
76
+ dataset.push_to_hub(DATASET_ID, split="test")
77
+ print(f"\nDataset pushed to: https://huggingface.co/datasets/{DATASET_ID}")