Ctrl+K
- GRPO-Qwen2.5-7B
- dpo_Qwen2.5-14B-Instruct-RM-Gemma-2
- dpo_Qwen2.5-14B-Instruct
- llama3_sft_dpo_v15_8_new_rw_URM
- llama3_sft_mappo_sft_v15
- llama3_sft_mappo_v15_500_new_rw
- llama3_sft_mappo_v15_new_rw
- llama_sft_dpo_new_rw
- llama_sft_dpo_sft
- llama_sft_mappo_v15_350_new_rw
- llama_sft_mappo_v15_50_new_rw_URM
- qwen2.5_32B_DPO
- qwen2.5_32B_Instruct-MaPPO_v15
- qwen2.5_32B_Instruct-MaPPO_v15_iter2
- qwen2.5_3B_rapo
- qwen2.5_7B_IPO_beta_0_1
- qwen2.5_7B_LR_ablation_DPO
- qwen2.5_7B_cpo_beta_0_001_alpha_0_01
- qwen2.5_7B_cpo_beta_0_001_alpha_0_2
- qwen2.5_7B_cpo_beta_0_001_alpha_0_2_lr_5e_8_iter3
- qwen2.5_7B_cpo_beta_0_01_alpha_0_1
- qwen2.5_7B_cpo_beta_0_01_alpha_0_2
- qwen2.5_7B_cpo_beta_0_01_alpha_0_2_lr_5e_8_iter3
- qwen2.5_7B_cpo_beta_0_01_alpha_0_4
- qwen2.5_7B_cpo_mappo_beta_0_001_alpha_0_1
- qwen2.5_7B_cpo_mappo_beta_0_001_alpha_0_1_lr_5e_8_iter3
- qwen2.5_7B_cpo_mappo_beta_0_001_alpha_0_2
- qwen2.5_7B_cpo_mappo_beta_0_001_alpha_0_2_lr_5e_8_iter3
- qwen2.5_7B_cpo_mappo_beta_0_01_alpha_0_1
- qwen2.5_7B_cpo_mappo_beta_0_01_alpha_0_4
- qwen2.5_7B_cpo_mappo_beta_0_01_alpha_0_4_lr_5e_8_iter3
- qwen2.5_7B_dpo_new_rw
- qwen2.5_7B_dpo_sft
- qwen2.5_7B_dpo_v15_8_new_rw_URM
- qwen2.5_7B_ipo_mappo_beta_0_01_lr_1e_7_iter3
- qwen2.5_7B_ipo_mappo_beta_0_01_lr_5e_8_iter3
- qwen2.5_7B_ipo_mappo_beta_0_1_lr_1e_7_iter3
- qwen2.5_7B_ipo_mappo_beta_0_1_lr_5e_8_iter3
- qwen2.5_7B_ipo_mappo_beta_1
- qwen2.5_7B_mappo_sft
- qwen2.5_7B_mappo_v15_350_new_rw
- qwen2.5_7B_mappo_v15_500_new_rw
- qwen2.5_7B_mappo_v15_50_new_rw_URM
- qwen2.5_7B_mappo_v15_new_rw
- qwen2.5_7B_simpo_beta_2_gamma_0.1
- qwen2.5_7B_simpo_beta_2_gamma_0_01
- rapo_Qwen2.5-14B-Instruct-MaPPO_v15_max_min
- rapo_Qwen2.5-14B-Instruct-RAPO-RM-Gemma-2B
- rapo_Qwen2.5-7B-Instruct-RAPO-LR-Abalation
- 1.8 kB