Spaces:
Paused
Paused
File size: 1,035 Bytes
302be2b e955a2d e52d302 17149c8 e52d302 e955a2d 302be2b 17149c8 302be2b 17149c8 302be2b e52d302 302be2b 17149c8 302be2b 17149c8 302be2b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | {
"model": "Qwen/Qwen2.5-3B-Instruct",
"training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
"phases": [],
"rounds_per_phase": 3,
"episodes_per_round": 6,
"before": {
"monthly_engage": 1.0,
"monthly_strategic": 0.8357,
"monthly_competitive": 0.9414
},
"after": {
"monthly_engage": 0.999,
"monthly_strategic": 0.9321439559505211,
"monthly_competitive": 0.999
},
"smart_heuristic": {
"monthly_engage": 0.7519,
"monthly_strategic": 0.9101,
"monthly_competitive": 0.9141
},
"improvement": {
"monthly_engage": -0.0010000000000000009,
"monthly_strategic": 0.09644395595052113,
"monthly_competitive": 0.057599999999999985
},
"training_log": {
"phase": [],
"round": [],
"global_step": [],
"use_hint": [],
"avg_episode_reward": [],
"max_episode_reward": [],
"min_episode_reward": [],
"avg_grader": [],
"max_grader": [],
"n_training_samples": [],
"train_loss": []
}
} |