File size: 997 Bytes
0813516 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | {
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"training": "LoRA SFT (real weight updates)",
"rounds": 1,
"episodes_per_round": 1,
"before": {
"monthly_engage": 0.3048,
"monthly_strategic": 0.3456,
"monthly_competitive": 0.4808
},
"after": {
"monthly_engage": 0.0162,
"monthly_strategic": 0.1749,
"monthly_competitive": 0.3621
},
"smart_heuristic": {
"monthly_engage": 0.6342,
"monthly_strategic": 0.7218,
"monthly_competitive": 0.8315
},
"improvement": {
"monthly_engage": -0.2886,
"monthly_strategic": -0.17070000000000002,
"monthly_competitive": -0.11870000000000003
},
"training_log": {
"round": [
1
],
"avg_episode_reward": [
1.593
],
"max_episode_reward": [
1.593
],
"min_episode_reward": [
1.593
],
"avg_grader": [
0.0268
],
"max_grader": [
0.0268
],
"n_training_samples": [
4
],
"train_loss": [
2.3314
]
}
} |