final-iteration / run-output /plots /training_summary.json
vaibhavkhandare's picture
Upload folder using huggingface_hub
e52d302 verified
raw
history blame
1.91 kB
{
"model": "Qwen/Qwen2.5-3B-Instruct",
"training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
"phases": [
"phase1_timing",
"phase2_content"
],
"rounds_per_phase": 3,
"episodes_per_round": 6,
"before": {
"monthly_engage": 0.0,
"monthly_strategic": 0.175,
"monthly_competitive": 0.035
},
"after": {
"monthly_engage": 0.0,
"monthly_strategic": 0.175,
"monthly_competitive": 0.035
},
"smart_heuristic": {
"monthly_engage": 0.7519,
"monthly_strategic": 0.9101,
"monthly_competitive": 0.9141
},
"improvement": {
"monthly_engage": 0.0,
"monthly_strategic": 0.0,
"monthly_competitive": 0.0
},
"training_log": {
"phase": [
"phase1_timing",
"phase1_timing",
"phase1_timing",
"phase2_content",
"phase2_content",
"phase2_content"
],
"round": [
1,
2,
3,
1,
2,
3
],
"global_step": [
1,
2,
3,
4,
5,
6
],
"use_hint": [
true,
false,
false,
true,
false,
false
],
"avg_episode_reward": [
5.127,
3.04,
2.867,
3.538,
2.15,
1.924
],
"max_episode_reward": [
5.315,
3.303,
3.016,
3.837,
2.807,
2.609
],
"min_episode_reward": [
4.96,
2.6,
2.555,
3.338,
1.587,
1.375
],
"avg_grader": [
0.9498,
0.259,
0.2083,
0.8697,
0.3763,
0.2855
],
"max_grader": [
1.0,
0.3614,
0.3042,
1.0,
0.5979,
0.5027
],
"n_training_samples": [
81,
96,
102,
77,
90,
76
],
"train_loss": [
2.833,
3.1413,
3.1255,
2.8381,
2.9281,
2.9184
]
}
}