{ "model": "Qwen/Qwen2.5-3B-Instruct", "training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase", "phases": [], "rounds_per_phase": 3, "episodes_per_round": 6, "before": { "monthly_engage": 1.0, "monthly_strategic": 0.8357, "monthly_competitive": 0.9414 }, "after": { "monthly_engage": 0.999, "monthly_strategic": 0.9321439559505211, "monthly_competitive": 0.999 }, "smart_heuristic": { "monthly_engage": 0.7519, "monthly_strategic": 0.9101, "monthly_competitive": 0.9141 }, "improvement": { "monthly_engage": -0.0010000000000000009, "monthly_strategic": 0.09644395595052113, "monthly_competitive": 0.057599999999999985 }, "training_log": { "phase": [], "round": [], "global_step": [], "use_hint": [], "avg_episode_reward": [], "max_episode_reward": [], "min_episode_reward": [], "avg_grader": [], "max_grader": [], "n_training_samples": [], "train_loss": [] } }