{ "model": "Qwen/Qwen2.5-1.5B-Instruct", "training": "LoRA SFT (real weight updates)", "rounds": 1, "episodes_per_round": 1, "before": { "monthly_engage": 0.3048, "monthly_strategic": 0.3456, "monthly_competitive": 0.4808 }, "after": { "monthly_engage": 0.0162, "monthly_strategic": 0.1749, "monthly_competitive": 0.3621 }, "smart_heuristic": { "monthly_engage": 0.6342, "monthly_strategic": 0.7218, "monthly_competitive": 0.8315 }, "improvement": { "monthly_engage": -0.2886, "monthly_strategic": -0.17070000000000002, "monthly_competitive": -0.11870000000000003 }, "training_log": { "round": [ 1 ], "avg_episode_reward": [ 1.593 ], "max_episode_reward": [ 1.593 ], "min_episode_reward": [ 1.593 ], "avg_grader": [ 0.0268 ], "max_grader": [ 0.0268 ], "n_training_samples": [ 4 ], "train_loss": [ 2.3314 ] } }