File size: 1,007 Bytes
e81353d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | {
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"train_task": "task_karnataka",
"train_time_minutes": 159.6,
"num_prompts": 600,
"num_epochs": 3,
"num_steps": 449,
"gpu": "NVIDIA A10G (23.9 GB)",
"lora_rank": 16,
"framework": "TRL GRPOTrainer + bitsandbytes 4-bit",
"reward_start": -0.2308,
"reward_end": 0.6638,
"reward_peak": 0.6883,
"note": "Post-training eval OOM'd during model save; reward values from training log",
"baseline": {
"task_easy": {
"avg": 31.99,
"std": 0.0
},
"task_medium": {
"avg": 46.69,
"std": 0.36
},
"karnataka_easy": {
"avg": 56.33,
"std": 0.25
},
"karnataka_medium": {
"avg": 49.57,
"std": 0.21
},
"karnataka_hard": {
"avg": -417.15,
"std": 63.02
},
"task_karnataka": {
"avg": 49.43,
"std": 0.21
}
},
"training_reward": {
"initial_avg_5steps": -0.2308,
"mid_avg_steps100_150": 0.6266,
"final_avg_last50steps": 0.6634
}
} |