{
  "model": "Qwen/Qwen2.5-3B-Instruct",
  "training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
  "phases": [
    "phase1_timing",
    "phase2_content"
  ],
  "rounds_per_phase": 3,
  "episodes_per_round": 6,
  "before": {
    "monthly_engage": 0.0,
    "monthly_strategic": 0.175,
    "monthly_competitive": 0.035
  },
  "after": {
    "monthly_engage": 0.0,
    "monthly_strategic": 0.175,
    "monthly_competitive": 0.035
  },
  "smart_heuristic": {
    "monthly_engage": 0.7519,
    "monthly_strategic": 0.9101,
    "monthly_competitive": 0.9141
  },
  "improvement": {
    "monthly_engage": 0.0,
    "monthly_strategic": 0.0,
    "monthly_competitive": 0.0
  },
  "training_log": {
    "phase": [
      "phase1_timing",
      "phase1_timing",
      "phase1_timing",
      "phase2_content",
      "phase2_content",
      "phase2_content"
    ],
    "round": [
      1,
      2,
      3,
      1,
      2,
      3
    ],
    "global_step": [
      1,
      2,
      3,
      4,
      5,
      6
    ],
    "use_hint": [
      true,
      false,
      false,
      true,
      false,
      false
    ],
    "avg_episode_reward": [
      5.127,
      3.04,
      2.867,
      3.538,
      2.15,
      1.924
    ],
    "max_episode_reward": [
      5.315,
      3.303,
      3.016,
      3.837,
      2.807,
      2.609
    ],
    "min_episode_reward": [
      4.96,
      2.6,
      2.555,
      3.338,
      1.587,
      1.375
    ],
    "avg_grader": [
      0.9498,
      0.259,
      0.2083,
      0.8697,
      0.3763,
      0.2855
    ],
    "max_grader": [
      1.0,
      0.3614,
      0.3042,
      1.0,
      0.5979,
      0.5027
    ],
    "n_training_samples": [
      81,
      96,
      102,
      77,
      90,
      76
    ],
    "train_loss": [
      2.833,
      3.1413,
      3.1255,
      2.8381,
      2.9281,
      2.9184
    ]
  }
}