salespath-grpo / eval_results.json
Lomesh7777's picture
Upload eval_results.json with huggingface_hub
97a0169 verified
{
"base_label": "./sft_checkpoint (PEFT on unsloth/Qwen2.5-1.5B-Instruct)",
"trained_label": "./grpo_checkpoint (PEFT on unsloth/Qwen2.5-1.5B-Instruct)",
"seed": 1234,
"episodes_per_level": 4,
"base": {
"1": {
"violations_per_episode": 0.25,
"ordering_rate": 0.9166666666666666,
"close_rate": 0.75,
"correct_disqual_rate": 0.0,
"mean_final_reward": 0.24625000000000002,
"mean_cum_reward": 0.9545833333333333,
"mean_turns": 6.75,
"n_episodes": 4,
"n_disqual_cases": 0
},
"2": {
"violations_per_episode": 1.0,
"ordering_rate": 0.8,
"close_rate": 0.0,
"correct_disqual_rate": 0.0,
"mean_final_reward": -0.08000000000000002,
"mean_cum_reward": 0.5800000000000001,
"mean_turns": 6.0,
"n_episodes": 4,
"n_disqual_cases": 0
},
"3": {
"violations_per_episode": 1.0,
"ordering_rate": 0.5714285714285714,
"close_rate": 0.0,
"correct_disqual_rate": 0.0,
"mean_final_reward": -0.08000000000000002,
"mean_cum_reward": 0.5342857142857143,
"mean_turns": 6.0,
"n_episodes": 4,
"n_disqual_cases": 0
},
"4": {
"violations_per_episode": 0.0,
"ordering_rate": 0.75,
"close_rate": 0.0,
"correct_disqual_rate": 0.75,
"mean_final_reward": 0.07500000000000001,
"mean_cum_reward": -0.6749999999999996,
"mean_turns": 9.5,
"n_episodes": 4,
"n_disqual_cases": 4
}
},
"trained": {
"1": {
"violations_per_episode": 0.0,
"ordering_rate": 1.0,
"close_rate": 1.0,
"correct_disqual_rate": 0.0,
"mean_final_reward": 0.3566666666666667,
"mean_cum_reward": 1.09,
"mean_turns": 7.0,
"n_episodes": 4,
"n_disqual_cases": 0
},
"2": {
"violations_per_episode": 0.75,
"ordering_rate": 0.8500000000000001,
"close_rate": 0.25,
"correct_disqual_rate": 0.0,
"mean_final_reward": 0.024999999999999994,
"mean_cum_reward": 0.7100000000000001,
"mean_turns": 6.25,
"n_episodes": 4,
"n_disqual_cases": 0
},
"3": {
"violations_per_episode": 1.25,
"ordering_rate": 0.5714285714285714,
"close_rate": 0.0,
"correct_disqual_rate": 0.0,
"mean_final_reward": -0.08000000000000002,
"mean_cum_reward": 0.5392857142857143,
"mean_turns": 6.25,
"n_episodes": 4,
"n_disqual_cases": 0
},
"4": {
"violations_per_episode": 0.25,
"ordering_rate": 1.0,
"close_rate": 0.0,
"correct_disqual_rate": 1.0,
"mean_final_reward": 0.2,
"mean_cum_reward": 0.7050000000000001,
"mean_turns": 6.25,
"n_episodes": 4,
"n_disqual_cases": 4
}
}
}