Spaces:
Paused
Paused
| { | |
| "model": "qwen2.5:3b-instruct-q4_K_M", | |
| "device": "M4 Mac (Ollama local)", | |
| "training_rounds": 4, | |
| "episodes_per_round": 6, | |
| "before": { | |
| "monthly_engage": 0.3548, | |
| "monthly_strategic": 0.6795, | |
| "monthly_competitive": 0.3738 | |
| }, | |
| "after": { | |
| "monthly_engage": 0.4086, | |
| "monthly_strategic": 0.6273, | |
| "monthly_competitive": 0.5101 | |
| }, | |
| "smart_heuristic": { | |
| "monthly_engage": 0.4312, | |
| "monthly_strategic": 0.7682, | |
| "monthly_competitive": 0.8094 | |
| }, | |
| "improvement": { | |
| "monthly_engage": 0.053800000000000014, | |
| "monthly_strategic": -0.052200000000000024, | |
| "monthly_competitive": 0.13629999999999998 | |
| }, | |
| "training_log": { | |
| "round": [ | |
| 1, | |
| 2, | |
| 3, | |
| 4 | |
| ], | |
| "avg_grader": [ | |
| 0.4958, | |
| 0.4912, | |
| 0.6015, | |
| 0.5548 | |
| ], | |
| "max_grader": [ | |
| 0.7391, | |
| 0.7236, | |
| 0.7529, | |
| 0.7705 | |
| ], | |
| "min_grader": [ | |
| 0.3698, | |
| 0.2527, | |
| 0.382, | |
| 0.3764 | |
| ], | |
| "avg_reward": [ | |
| 6.07, | |
| 6.093, | |
| 6.418, | |
| 6.467 | |
| ], | |
| "max_reward": [ | |
| 6.104, | |
| 6.1, | |
| 6.481, | |
| 6.527 | |
| ], | |
| "min_reward": [ | |
| 6.037, | |
| 6.076, | |
| 6.343, | |
| 6.366 | |
| ], | |
| "best_temperature": [ | |
| 1.4, | |
| 1.0, | |
| 0.7, | |
| 0.7 | |
| ] | |
| }, | |
| "all_episodes": [ | |
| { | |
| "round": 1, | |
| "task": "monthly_engage", | |
| "seed": 42, | |
| "grader_score": 0.4395, | |
| "total_reward": 6.1044, | |
| "temperature": 1.4 | |
| }, | |
| { | |
| "round": 1, | |
| "task": "monthly_strategic", | |
| "seed": 43, | |
| "grader_score": 0.6758, | |
| "total_reward": 6.0373, | |
| "temperature": 1.4 | |
| }, | |
| { | |
| "round": 1, | |
| "task": "monthly_competitive", | |
| "seed": 44, | |
| "grader_score": 0.3698, | |
| "total_reward": 6.0686, | |
| "temperature": 1.4 | |
| }, | |
| { | |
| "round": 1, | |
| "task": "monthly_engage", | |
| "seed": 45, | |
| "grader_score": 0.3806, | |
| "total_reward": 6.0643, | |
| "temperature": 1.4 | |
| }, | |
| { | |
| "round": 1, | |
| "task": "monthly_strategic", | |
| "seed": 46, | |
| "grader_score": 0.7391, | |
| "total_reward": 6.096, | |
| "temperature": 1.4 | |
| }, | |
| { | |
| "round": 1, | |
| "task": "monthly_competitive", | |
| "seed": 47, | |
| "grader_score": 0.3699, | |
| "total_reward": 6.0489999999999995, | |
| "temperature": 1.4 | |
| }, | |
| { | |
| "round": 2, | |
| "task": "monthly_engage", | |
| "seed": 142, | |
| "grader_score": 0.4335, | |
| "total_reward": 6.0995, | |
| "temperature": 1.0 | |
| }, | |
| { | |
| "round": 2, | |
| "task": "monthly_strategic", | |
| "seed": 143, | |
| "grader_score": 0.7236, | |
| "total_reward": 6.0992, | |
| "temperature": 1.0 | |
| }, | |
| { | |
| "round": 2, | |
| "task": "monthly_competitive", | |
| "seed": 144, | |
| "grader_score": 0.3789, | |
| "total_reward": 6.0943, | |
| "temperature": 1.0 | |
| }, | |
| { | |
| "round": 2, | |
| "task": "monthly_engage", | |
| "seed": 145, | |
| "grader_score": 0.4356, | |
| "total_reward": 6.0999, | |
| "temperature": 1.0 | |
| }, | |
| { | |
| "round": 2, | |
| "task": "monthly_strategic", | |
| "seed": 146, | |
| "grader_score": 0.7232, | |
| "total_reward": 6.0882, | |
| "temperature": 1.0 | |
| }, | |
| { | |
| "round": 2, | |
| "task": "monthly_competitive", | |
| "seed": 147, | |
| "grader_score": 0.2527, | |
| "total_reward": 6.0764, | |
| "temperature": 1.0 | |
| }, | |
| { | |
| "round": 3, | |
| "task": "monthly_engage", | |
| "seed": 242, | |
| "grader_score": 0.382, | |
| "total_reward": 6.4364, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 3, | |
| "task": "monthly_strategic", | |
| "seed": 243, | |
| "grader_score": 0.6426, | |
| "total_reward": 6.4364, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 3, | |
| "task": "monthly_competitive", | |
| "seed": 244, | |
| "grader_score": 0.7529, | |
| "total_reward": 6.3849, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 3, | |
| "task": "monthly_engage", | |
| "seed": 245, | |
| "grader_score": 0.3935, | |
| "total_reward": 6.4805, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 3, | |
| "task": "monthly_strategic", | |
| "seed": 246, | |
| "grader_score": 0.724, | |
| "total_reward": 6.4286, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 3, | |
| "task": "monthly_competitive", | |
| "seed": 247, | |
| "grader_score": 0.7138, | |
| "total_reward": 6.3425, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 4, | |
| "task": "monthly_engage", | |
| "seed": 342, | |
| "grader_score": 0.3764, | |
| "total_reward": 6.4858, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 4, | |
| "task": "monthly_strategic", | |
| "seed": 343, | |
| "grader_score": 0.6314, | |
| "total_reward": 6.4636, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 4, | |
| "task": "monthly_competitive", | |
| "seed": 344, | |
| "grader_score": 0.7705, | |
| "total_reward": 6.4934, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 4, | |
| "task": "monthly_engage", | |
| "seed": 345, | |
| "grader_score": 0.3851, | |
| "total_reward": 6.4661, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 4, | |
| "task": "monthly_strategic", | |
| "seed": 346, | |
| "grader_score": 0.6755, | |
| "total_reward": 6.5269, | |
| "temperature": 0.7 | |
| }, | |
| { | |
| "round": 4, | |
| "task": "monthly_competitive", | |
| "seed": 347, | |
| "grader_score": 0.4897, | |
| "total_reward": 6.3657, | |
| "temperature": 0.7 | |
| } | |
| ], | |
| "elapsed_seconds": 6034.9 | |
| } |