{ "model": "Qwen/Qwen2.5-3B-Instruct", "training": "LoRA SFT (real weight updates)", "rounds": 2, "episodes_per_round": 6, "before": { "monthly_engage": 1.0, "monthly_strategic": 0.8426, "monthly_competitive": 0.9521 }, "after": { "monthly_engage": 1.0, "monthly_strategic": 0.8416, "monthly_competitive": 0.964 }, "smart_heuristic": { "monthly_engage": 0.7352, "monthly_strategic": 0.9043, "monthly_competitive": 0.9066 }, "improvement": { "monthly_engage": 0.0, "monthly_strategic": -0.0010000000000000009, "monthly_competitive": 0.011900000000000022 }, "training_log": { "round": [ 1, 2 ], "avg_episode_reward": [ 3.904, 4.215 ], "max_episode_reward": [ 4.514, 4.658 ], "min_episode_reward": [ 3.287, 3.566 ], "avg_grader": [ 0.6202, 0.7325 ], "max_grader": [ 0.8268, 0.8703 ], "n_training_samples": [ 101, 102 ], "train_loss": [ 2.6723, 2.5934 ] } }