{
  "model": "Qwen/Qwen2.5-3B-Instruct",
  "training": "LoRA SFT (real weight updates)",
  "rounds": 2,
  "episodes_per_round": 6,
  "before": {
    "monthly_engage": 1.0,
    "monthly_strategic": 0.8426,
    "monthly_competitive": 0.9521
  },
  "after": {
    "monthly_engage": 1.0,
    "monthly_strategic": 0.8416,
    "monthly_competitive": 0.964
  },
  "smart_heuristic": {
    "monthly_engage": 0.7352,
    "monthly_strategic": 0.9043,
    "monthly_competitive": 0.9066
  },
  "improvement": {
    "monthly_engage": 0.0,
    "monthly_strategic": -0.0010000000000000009,
    "monthly_competitive": 0.011900000000000022
  },
  "training_log": {
    "round": [
      1,
      2
    ],
    "avg_episode_reward": [
      3.904,
      4.215
    ],
    "max_episode_reward": [
      4.514,
      4.658
    ],
    "min_episode_reward": [
      3.287,
      3.566
    ],
    "avg_grader": [
      0.6202,
      0.7325
    ],
    "max_grader": [
      0.8268,
      0.8703
    ],
    "n_training_samples": [
      101,
      102
    ],
    "train_loss": [
      2.6723,
      2.5934
    ]
  }
}