{
  "model": "Qwen/Qwen2.5-3B-Instruct",
  "training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
  "phases": [],
  "rounds_per_phase": 3,
  "episodes_per_round": 6,
  "before": {
    "monthly_engage": 1.0,
    "monthly_strategic": 0.8357,
    "monthly_competitive": 0.9414
  },
  "after": {
    "monthly_engage": 0.999,
    "monthly_strategic": 0.9321439559505211,
    "monthly_competitive": 0.999
  },
  "smart_heuristic": {
    "monthly_engage": 0.7519,
    "monthly_strategic": 0.9101,
    "monthly_competitive": 0.9141
  },
  "improvement": {
    "monthly_engage": -0.0010000000000000009,
    "monthly_strategic": 0.09644395595052113,
    "monthly_competitive": 0.057599999999999985
  },
  "training_log": {
    "phase": [],
    "round": [],
    "global_step": [],
    "use_hint": [],
    "avg_episode_reward": [],
    "max_episode_reward": [],
    "min_episode_reward": [],
    "avg_grader": [],
    "max_grader": [],
    "n_training_samples": [],
    "train_loss": []
  }
}