josephmayo commited on
Commit
db84f1e
·
verified ·
1 Parent(s): cc07ae8

Add evaluation scope proof note

Browse files
Files changed (1) hide show
  1. evaluation_scope.json +13 -0
evaluation_scope.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "benchmark": "HumanEval executable subset",
3
+ "evaluated_tasks": 8,
4
+ "task_selection": "first 8 HumanEval tasks",
5
+ "before_pass": 5,
6
+ "after_pass": 7,
7
+ "absolute_pass_rate_before": 0.625,
8
+ "absolute_pass_rate_after": 0.875,
9
+ "absolute_percentage_point_delta": 25.0,
10
+ "relative_pass_count_increase_percent": 40.0,
11
+ "scope_reason": "Kaggle GPU-hour budget was exhausted during training, merge preparation, and upload validation, so the public executable proof was kept to a small reproducible subset.",
12
+ "artifact_note": "eval_before_after.csv preserves scored output previews, not full generated code. executable_eval.json is the preserved pass/fail proof artifact. Future runs should save full generated completions in eval_before_after_full.jsonl."
13
+ }