Spaces:
Paused
Paused
| { | |
| "model": "Qwen/Qwen2.5-3B-Instruct", | |
| "training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase", | |
| "phases": [ | |
| "phase1_timing", | |
| "phase2_content" | |
| ], | |
| "rounds_per_phase": 3, | |
| "episodes_per_round": 6, | |
| "before": { | |
| "monthly_engage": 0.0, | |
| "monthly_strategic": 0.175, | |
| "monthly_competitive": 0.035 | |
| }, | |
| "after": { | |
| "monthly_engage": 0.0, | |
| "monthly_strategic": 0.175, | |
| "monthly_competitive": 0.035 | |
| }, | |
| "smart_heuristic": { | |
| "monthly_engage": 0.7519, | |
| "monthly_strategic": 0.9101, | |
| "monthly_competitive": 0.9141 | |
| }, | |
| "improvement": { | |
| "monthly_engage": 0.0, | |
| "monthly_strategic": 0.0, | |
| "monthly_competitive": 0.0 | |
| }, | |
| "training_log": { | |
| "phase": [ | |
| "phase1_timing", | |
| "phase1_timing", | |
| "phase1_timing", | |
| "phase2_content", | |
| "phase2_content", | |
| "phase2_content" | |
| ], | |
| "round": [ | |
| 1, | |
| 2, | |
| 3, | |
| 1, | |
| 2, | |
| 3 | |
| ], | |
| "global_step": [ | |
| 1, | |
| 2, | |
| 3, | |
| 4, | |
| 5, | |
| 6 | |
| ], | |
| "use_hint": [ | |
| true, | |
| false, | |
| false, | |
| true, | |
| false, | |
| false | |
| ], | |
| "avg_episode_reward": [ | |
| 5.127, | |
| 3.04, | |
| 2.867, | |
| 3.538, | |
| 2.15, | |
| 1.924 | |
| ], | |
| "max_episode_reward": [ | |
| 5.315, | |
| 3.303, | |
| 3.016, | |
| 3.837, | |
| 2.807, | |
| 2.609 | |
| ], | |
| "min_episode_reward": [ | |
| 4.96, | |
| 2.6, | |
| 2.555, | |
| 3.338, | |
| 1.587, | |
| 1.375 | |
| ], | |
| "avg_grader": [ | |
| 0.9498, | |
| 0.259, | |
| 0.2083, | |
| 0.8697, | |
| 0.3763, | |
| 0.2855 | |
| ], | |
| "max_grader": [ | |
| 1.0, | |
| 0.3614, | |
| 0.3042, | |
| 1.0, | |
| 0.5979, | |
| 0.5027 | |
| ], | |
| "n_training_samples": [ | |
| 81, | |
| 96, | |
| 102, | |
| 77, | |
| 90, | |
| 76 | |
| ], | |
| "train_loss": [ | |
| 2.833, | |
| 3.1413, | |
| 3.1255, | |
| 2.8381, | |
| 2.9281, | |
| 2.9184 | |
| ] | |
| } | |
| } |