{
  "model": "Qwen/Qwen2.5-1.5B-Instruct",
  "training": "LoRA SFT (real weight updates)",
  "rounds": 1,
  "episodes_per_round": 1,
  "before": {
    "monthly_engage": 0.3048,
    "monthly_strategic": 0.3456,
    "monthly_competitive": 0.4808
  },
  "after": {
    "monthly_engage": 0.0162,
    "monthly_strategic": 0.1749,
    "monthly_competitive": 0.3621
  },
  "smart_heuristic": {
    "monthly_engage": 0.6342,
    "monthly_strategic": 0.7218,
    "monthly_competitive": 0.8315
  },
  "improvement": {
    "monthly_engage": -0.2886,
    "monthly_strategic": -0.17070000000000002,
    "monthly_competitive": -0.11870000000000003
  },
  "training_log": {
    "round": [
      1
    ],
    "avg_episode_reward": [
      1.593
    ],
    "max_episode_reward": [
      1.593
    ],
    "min_episode_reward": [
      1.593
    ],
    "avg_grader": [
      0.0268
    ],
    "max_grader": [
      0.0268
    ],
    "n_training_samples": [
      4
    ],
    "train_loss": [
      2.3314
    ]
  }
}