{ "model": "qwen2.5:3b-instruct-q4_K_M", "device": "M4 Mac (Ollama local)", "training_rounds": 4, "episodes_per_round": 6, "before": { "monthly_engage": 0.3548, "monthly_strategic": 0.6795, "monthly_competitive": 0.3738 }, "after": { "monthly_engage": 0.4086, "monthly_strategic": 0.6273, "monthly_competitive": 0.5101 }, "smart_heuristic": { "monthly_engage": 0.4312, "monthly_strategic": 0.7682, "monthly_competitive": 0.8094 }, "improvement": { "monthly_engage": 0.053800000000000014, "monthly_strategic": -0.052200000000000024, "monthly_competitive": 0.13629999999999998 }, "training_log": { "round": [ 1, 2, 3, 4 ], "avg_grader": [ 0.4958, 0.4912, 0.6015, 0.5548 ], "max_grader": [ 0.7391, 0.7236, 0.7529, 0.7705 ], "min_grader": [ 0.3698, 0.2527, 0.382, 0.3764 ], "avg_reward": [ 6.07, 6.093, 6.418, 6.467 ], "max_reward": [ 6.104, 6.1, 6.481, 6.527 ], "min_reward": [ 6.037, 6.076, 6.343, 6.366 ], "best_temperature": [ 1.4, 1.0, 0.7, 0.7 ] }, "all_episodes": [ { "round": 1, "task": "monthly_engage", "seed": 42, "grader_score": 0.4395, "total_reward": 6.1044, "temperature": 1.4 }, { "round": 1, "task": "monthly_strategic", "seed": 43, "grader_score": 0.6758, "total_reward": 6.0373, "temperature": 1.4 }, { "round": 1, "task": "monthly_competitive", "seed": 44, "grader_score": 0.3698, "total_reward": 6.0686, "temperature": 1.4 }, { "round": 1, "task": "monthly_engage", "seed": 45, "grader_score": 0.3806, "total_reward": 6.0643, "temperature": 1.4 }, { "round": 1, "task": "monthly_strategic", "seed": 46, "grader_score": 0.7391, "total_reward": 6.096, "temperature": 1.4 }, { "round": 1, "task": "monthly_competitive", "seed": 47, "grader_score": 0.3699, "total_reward": 6.0489999999999995, "temperature": 1.4 }, { "round": 2, "task": "monthly_engage", "seed": 142, "grader_score": 0.4335, "total_reward": 6.0995, "temperature": 1.0 }, { "round": 2, "task": "monthly_strategic", "seed": 143, "grader_score": 0.7236, "total_reward": 6.0992, "temperature": 1.0 }, { "round": 2, "task": "monthly_competitive", "seed": 144, "grader_score": 0.3789, "total_reward": 6.0943, "temperature": 1.0 }, { "round": 2, "task": "monthly_engage", "seed": 145, "grader_score": 0.4356, "total_reward": 6.0999, "temperature": 1.0 }, { "round": 2, "task": "monthly_strategic", "seed": 146, "grader_score": 0.7232, "total_reward": 6.0882, "temperature": 1.0 }, { "round": 2, "task": "monthly_competitive", "seed": 147, "grader_score": 0.2527, "total_reward": 6.0764, "temperature": 1.0 }, { "round": 3, "task": "monthly_engage", "seed": 242, "grader_score": 0.382, "total_reward": 6.4364, "temperature": 0.7 }, { "round": 3, "task": "monthly_strategic", "seed": 243, "grader_score": 0.6426, "total_reward": 6.4364, "temperature": 0.7 }, { "round": 3, "task": "monthly_competitive", "seed": 244, "grader_score": 0.7529, "total_reward": 6.3849, "temperature": 0.7 }, { "round": 3, "task": "monthly_engage", "seed": 245, "grader_score": 0.3935, "total_reward": 6.4805, "temperature": 0.7 }, { "round": 3, "task": "monthly_strategic", "seed": 246, "grader_score": 0.724, "total_reward": 6.4286, "temperature": 0.7 }, { "round": 3, "task": "monthly_competitive", "seed": 247, "grader_score": 0.7138, "total_reward": 6.3425, "temperature": 0.7 }, { "round": 4, "task": "monthly_engage", "seed": 342, "grader_score": 0.3764, "total_reward": 6.4858, "temperature": 0.7 }, { "round": 4, "task": "monthly_strategic", "seed": 343, "grader_score": 0.6314, "total_reward": 6.4636, "temperature": 0.7 }, { "round": 4, "task": "monthly_competitive", "seed": 344, "grader_score": 0.7705, "total_reward": 6.4934, "temperature": 0.7 }, { "round": 4, "task": "monthly_engage", "seed": 345, "grader_score": 0.3851, "total_reward": 6.4661, "temperature": 0.7 }, { "round": 4, "task": "monthly_strategic", "seed": 346, "grader_score": 0.6755, "total_reward": 6.5269, "temperature": 0.7 }, { "round": 4, "task": "monthly_competitive", "seed": 347, "grader_score": 0.4897, "total_reward": 6.3657, "temperature": 0.7 } ], "elapsed_seconds": 6034.9 }