Spaces:
Paused
Paused
| { | |
| "model": "Qwen/Qwen2.5-3B-Instruct", | |
| "training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase", | |
| "phases": [], | |
| "rounds_per_phase": 3, | |
| "episodes_per_round": 6, | |
| "before": { | |
| "monthly_engage": 1.0, | |
| "monthly_strategic": 0.8357, | |
| "monthly_competitive": 0.9414 | |
| }, | |
| "after": { | |
| "monthly_engage": 0.999, | |
| "monthly_strategic": 0.9321439559505211, | |
| "monthly_competitive": 0.999 | |
| }, | |
| "smart_heuristic": { | |
| "monthly_engage": 0.7519, | |
| "monthly_strategic": 0.9101, | |
| "monthly_competitive": 0.9141 | |
| }, | |
| "improvement": { | |
| "monthly_engage": -0.0010000000000000009, | |
| "monthly_strategic": 0.09644395595052113, | |
| "monthly_competitive": 0.057599999999999985 | |
| }, | |
| "training_log": { | |
| "phase": [], | |
| "round": [], | |
| "global_step": [], | |
| "use_hint": [], | |
| "avg_episode_reward": [], | |
| "max_episode_reward": [], | |
| "min_episode_reward": [], | |
| "avg_grader": [], | |
| "max_grader": [], | |
| "n_training_samples": [], | |
| "train_loss": [] | |
| } | |
| } |