Spaces:
Paused
Paused
| { | |
| "model": "Qwen/Qwen2.5-3B-Instruct", | |
| "training": "LoRA SFT (real weight updates)", | |
| "rounds": 2, | |
| "episodes_per_round": 6, | |
| "before": { | |
| "monthly_engage": 1.0, | |
| "monthly_strategic": 0.8426, | |
| "monthly_competitive": 0.9521 | |
| }, | |
| "after": { | |
| "monthly_engage": 1.0, | |
| "monthly_strategic": 0.8416, | |
| "monthly_competitive": 0.964 | |
| }, | |
| "smart_heuristic": { | |
| "monthly_engage": 0.7352, | |
| "monthly_strategic": 0.9043, | |
| "monthly_competitive": 0.9066 | |
| }, | |
| "improvement": { | |
| "monthly_engage": 0.0, | |
| "monthly_strategic": -0.0010000000000000009, | |
| "monthly_competitive": 0.011900000000000022 | |
| }, | |
| "training_log": { | |
| "round": [ | |
| 1, | |
| 2 | |
| ], | |
| "avg_episode_reward": [ | |
| 3.904, | |
| 4.215 | |
| ], | |
| "max_episode_reward": [ | |
| 4.514, | |
| 4.658 | |
| ], | |
| "min_episode_reward": [ | |
| 3.287, | |
| 3.566 | |
| ], | |
| "avg_grader": [ | |
| 0.6202, | |
| 0.7325 | |
| ], | |
| "max_grader": [ | |
| 0.8268, | |
| 0.8703 | |
| ], | |
| "n_training_samples": [ | |
| 101, | |
| 102 | |
| ], | |
| "train_loss": [ | |
| 2.6723, | |
| 2.5934 | |
| ] | |
| } | |
| } |