plots/grpo_iter2_reward_curve.png filter=lfs diff=lfs merge=lfs -text plots/grpo_iter2_reward_components.png filter=lfs diff=lfs merge=lfs -text plots/grpo_iter2_belief_accuracy.png filter=lfs diff=lfs merge=lfs -text