| { |
| "base_label": "./sft_checkpoint (PEFT on unsloth/Qwen2.5-1.5B-Instruct)", |
| "trained_label": "./grpo_checkpoint (PEFT on unsloth/Qwen2.5-1.5B-Instruct)", |
| "seed": 1234, |
| "episodes_per_level": 4, |
| "base": { |
| "1": { |
| "violations_per_episode": 0.25, |
| "ordering_rate": 0.9166666666666666, |
| "close_rate": 0.75, |
| "correct_disqual_rate": 0.0, |
| "mean_final_reward": 0.24625000000000002, |
| "mean_cum_reward": 0.9545833333333333, |
| "mean_turns": 6.75, |
| "n_episodes": 4, |
| "n_disqual_cases": 0 |
| }, |
| "2": { |
| "violations_per_episode": 1.0, |
| "ordering_rate": 0.8, |
| "close_rate": 0.0, |
| "correct_disqual_rate": 0.0, |
| "mean_final_reward": -0.08000000000000002, |
| "mean_cum_reward": 0.5800000000000001, |
| "mean_turns": 6.0, |
| "n_episodes": 4, |
| "n_disqual_cases": 0 |
| }, |
| "3": { |
| "violations_per_episode": 1.0, |
| "ordering_rate": 0.5714285714285714, |
| "close_rate": 0.0, |
| "correct_disqual_rate": 0.0, |
| "mean_final_reward": -0.08000000000000002, |
| "mean_cum_reward": 0.5342857142857143, |
| "mean_turns": 6.0, |
| "n_episodes": 4, |
| "n_disqual_cases": 0 |
| }, |
| "4": { |
| "violations_per_episode": 0.0, |
| "ordering_rate": 0.75, |
| "close_rate": 0.0, |
| "correct_disqual_rate": 0.75, |
| "mean_final_reward": 0.07500000000000001, |
| "mean_cum_reward": -0.6749999999999996, |
| "mean_turns": 9.5, |
| "n_episodes": 4, |
| "n_disqual_cases": 4 |
| } |
| }, |
| "trained": { |
| "1": { |
| "violations_per_episode": 0.0, |
| "ordering_rate": 1.0, |
| "close_rate": 1.0, |
| "correct_disqual_rate": 0.0, |
| "mean_final_reward": 0.3566666666666667, |
| "mean_cum_reward": 1.09, |
| "mean_turns": 7.0, |
| "n_episodes": 4, |
| "n_disqual_cases": 0 |
| }, |
| "2": { |
| "violations_per_episode": 0.75, |
| "ordering_rate": 0.8500000000000001, |
| "close_rate": 0.25, |
| "correct_disqual_rate": 0.0, |
| "mean_final_reward": 0.024999999999999994, |
| "mean_cum_reward": 0.7100000000000001, |
| "mean_turns": 6.25, |
| "n_episodes": 4, |
| "n_disqual_cases": 0 |
| }, |
| "3": { |
| "violations_per_episode": 1.25, |
| "ordering_rate": 0.5714285714285714, |
| "close_rate": 0.0, |
| "correct_disqual_rate": 0.0, |
| "mean_final_reward": -0.08000000000000002, |
| "mean_cum_reward": 0.5392857142857143, |
| "mean_turns": 6.25, |
| "n_episodes": 4, |
| "n_disqual_cases": 0 |
| }, |
| "4": { |
| "violations_per_episode": 0.25, |
| "ordering_rate": 1.0, |
| "close_rate": 0.0, |
| "correct_disqual_rate": 1.0, |
| "mean_final_reward": 0.2, |
| "mean_cum_reward": 0.7050000000000001, |
| "mean_turns": 6.25, |
| "n_episodes": 4, |
| "n_disqual_cases": 4 |
| } |
| } |
| } |