vaibhav12332112312's picture
update
c3e9b69
raw
history blame contribute delete
997 Bytes
{
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"training": "LoRA SFT (real weight updates)",
"rounds": 1,
"episodes_per_round": 1,
"before": {
"monthly_engage": 0.3048,
"monthly_strategic": 0.3456,
"monthly_competitive": 0.4808
},
"after": {
"monthly_engage": 0.0162,
"monthly_strategic": 0.1749,
"monthly_competitive": 0.3621
},
"smart_heuristic": {
"monthly_engage": 0.6342,
"monthly_strategic": 0.7218,
"monthly_competitive": 0.8315
},
"improvement": {
"monthly_engage": -0.2886,
"monthly_strategic": -0.17070000000000002,
"monthly_competitive": -0.11870000000000003
},
"training_log": {
"round": [
1
],
"avg_episode_reward": [
1.593
],
"max_episode_reward": [
1.593
],
"min_episode_reward": [
1.593
],
"avg_grader": [
0.0268
],
"max_grader": [
0.0268
],
"n_training_samples": [
4
],
"train_loss": [
2.3314
]
}
}