final-iteration / run-output /plots /training_summary.json
vaibhavkhandare's picture
Upload folder using huggingface_hub
3419724 verified
raw
history blame
1.09 kB
{
"model": "Qwen/Qwen2.5-3B-Instruct",
"training": "LoRA SFT (real weight updates)",
"rounds": 2,
"episodes_per_round": 6,
"before": {
"monthly_engage": 1.0,
"monthly_strategic": 0.8426,
"monthly_competitive": 0.9521
},
"after": {
"monthly_engage": 1.0,
"monthly_strategic": 0.8416,
"monthly_competitive": 0.964
},
"smart_heuristic": {
"monthly_engage": 0.7352,
"monthly_strategic": 0.9043,
"monthly_competitive": 0.9066
},
"improvement": {
"monthly_engage": 0.0,
"monthly_strategic": -0.0010000000000000009,
"monthly_competitive": 0.011900000000000022
},
"training_log": {
"round": [
1,
2
],
"avg_episode_reward": [
3.904,
4.215
],
"max_episode_reward": [
4.514,
4.658
],
"min_episode_reward": [
3.287,
3.566
],
"avg_grader": [
0.6202,
0.7325
],
"max_grader": [
0.8268,
0.8703
],
"n_training_samples": [
101,
102
],
"train_loss": [
2.6723,
2.5934
]
}
}