| { | |
| "model": "Qwen/Qwen2.5-1.5B-Instruct", | |
| "training": "LoRA SFT (real weight updates)", | |
| "rounds": 1, | |
| "episodes_per_round": 1, | |
| "before": { | |
| "monthly_engage": 0.3048, | |
| "monthly_strategic": 0.3456, | |
| "monthly_competitive": 0.4808 | |
| }, | |
| "after": { | |
| "monthly_engage": 0.0162, | |
| "monthly_strategic": 0.1749, | |
| "monthly_competitive": 0.3621 | |
| }, | |
| "smart_heuristic": { | |
| "monthly_engage": 0.6342, | |
| "monthly_strategic": 0.7218, | |
| "monthly_competitive": 0.8315 | |
| }, | |
| "improvement": { | |
| "monthly_engage": -0.2886, | |
| "monthly_strategic": -0.17070000000000002, | |
| "monthly_competitive": -0.11870000000000003 | |
| }, | |
| "training_log": { | |
| "round": [ | |
| 1 | |
| ], | |
| "avg_episode_reward": [ | |
| 1.593 | |
| ], | |
| "max_episode_reward": [ | |
| 1.593 | |
| ], | |
| "min_episode_reward": [ | |
| 1.593 | |
| ], | |
| "avg_grader": [ | |
| 0.0268 | |
| ], | |
| "max_grader": [ | |
| 0.0268 | |
| ], | |
| "n_training_samples": [ | |
| 4 | |
| ], | |
| "train_loss": [ | |
| 2.3314 | |
| ] | |
| } | |
| } |