Opengrid / training /outputs /summary.json
K446's picture
Polish for hackathon submission: training evidence, two pipelines, UI, docs
e81353d
{
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"train_task": "task_karnataka",
"train_time_minutes": 159.6,
"num_prompts": 600,
"num_epochs": 3,
"num_steps": 449,
"gpu": "NVIDIA A10G (23.9 GB)",
"lora_rank": 16,
"framework": "TRL GRPOTrainer + bitsandbytes 4-bit",
"reward_start": -0.2308,
"reward_end": 0.6638,
"reward_peak": 0.6883,
"note": "Post-training eval OOM'd during model save; reward values from training log",
"baseline": {
"task_easy": {
"avg": 31.99,
"std": 0.0
},
"task_medium": {
"avg": 46.69,
"std": 0.36
},
"karnataka_easy": {
"avg": 56.33,
"std": 0.25
},
"karnataka_medium": {
"avg": 49.57,
"std": 0.21
},
"karnataka_hard": {
"avg": -417.15,
"std": 63.02
},
"task_karnataka": {
"avg": 49.43,
"std": 0.21
}
},
"training_reward": {
"initial_avg_5steps": -0.2308,
"mid_avg_steps100_150": 0.6266,
"final_avg_last50steps": 0.6634
}
}