diff --git "a/data/benchmarks.json" "b/data/benchmarks.json" new file mode 100644--- /dev/null +++ "b/data/benchmarks.json" @@ -0,0 +1,39453 @@ +[ + { + "slug": "ai21/jamba-1.5-large", + "name": "Jamba 1.5 Large", + "mmlu": 0.812, + "mmlu_pro": 0.535, + "gpqa": 0.369, + "arc": 0.93, + "gsm8k": 0.87, + "arena_elo": 1288.66, + "arena_rank": 193, + "arena_votes": 8659 + }, + { + "slug": "ai21/jamba-1.5-mini", + "name": "Jamba 1.5 Mini", + "mmlu": 0.697, + "mmlu_pro": 0.425, + "gpqa": 0.323, + "arc": 0.857, + "gsm8k": 0.758, + "arena_elo": 1238.91, + "arena_rank": 225, + "arena_votes": 8854 + }, + { + "slug": "amazon/nova-lite", + "name": "Nova Lite", + "mmlu": 0.805, + "arc": 0.924, + "human_eval": 0.854, + "mmmu": 0.562, + "drop": 0.802, + "gpqa": 0.42, + "math": 0.733, + "gsm8k": 0.945, + "ifeval": 0.897, + "bbh": 0.824 + }, + { + "slug": "amazon/nova-micro", + "name": "Nova Micro", + "mmlu": 0.776, + "arc": 0.902, + "drop": 0.793, + "gpqa": 0.4, + "math": 0.693, + "gsm8k": 0.923, + "ifeval": 0.872, + "bbh": 0.795, + "human_eval": 0.811 + }, + { + "slug": "amazon/nova-pro", + "name": "Nova Pro", + "mmlu": 0.859, + "gsm8k": 0.948, + "mmmu": 0.617, + "arc": 0.948, + "drop": 0.854, + "gpqa": 0.469, + "math": 0.766, + "ifeval": 0.921, + "bbh": 0.869, + "human_eval": 0.89, + "aider_pass_rate": 0.444 + }, + { + "slug": "anthropic/claude-3-5-haiku-20241022", + "name": "Claude 3.5 Haiku", + "gpqa": 0.416, + "mmlu_pro": 0.65, + "human_eval": 0.881, + "math": 0.694, + "mgsm": 0.856, + "drop": 0.831, + "lb_name": "claude-3-5-haiku-20241022", + "lb_global": 0.44959999999999994, + "lb_reasoning": 0.31125, + "lb_coding": 0.53169, + "lb_math": 0.3484066666666667, + "lb_language": 0.39707333333333333, + "lb_if": 0.6187925, + "lb_data_analysis": 0.5411900000000001, + "arena_elo": 1323.39, + "arena_rank": 157, + "arena_votes": 70972, + "aider_pass_rate": 0.617 + }, + { + "slug": "anthropic/claude-3-5-sonnet-20240620", + "name": "Claude 3.5 Sonnet", + "gpqa": 0.594, + "mmlu": 0.904, + "human_eval": 0.92, + "math": 0.711, + "mgsm": 0.916, + "drop": 0.871, + "bbh": 0.931, + "gsm8k": 0.964, + "mmlu_pro": 0.761, + "lb_name": "claude-3-5-sonnet-20240620", + "lb_global": 0.6043855555555556, + "lb_reasoning": 0.48, + "lb_math": 0.5332366666666667, + "lb_language": 0.56937, + "lb_if": 0.7229999999999999, + "lb_data_analysis": 0.5411, + "arena_elo": 1342.44, + "arena_rank": 135, + "arena_votes": 82417, + "aider_pass_rate": 0.5710000000000001 + }, + { + "slug": "anthropic/claude-3-5-sonnet-20241022", + "name": "Claude 3.5 Sonnet", + "gpqa": 0.672, + "mmlu": 0.904, + "mmlu_pro": 0.776, + "human_eval": 0.937, + "math": 0.783, + "mgsm": 0.916, + "drop": 0.871, + "bbh": 0.931, + "gsm8k": 0.964, + "mmmu": 0.683, + "lb_name": "claude-3-5-sonnet-20241022", + "lb_global": 0.5776941176470588, + "lb_reasoning": 0.51, + "lb_coding": 0.738975, + "lb_math": 0.5054333333333334, + "lb_language": 0.54477, + "lb_if": 0.69296, + "lb_data_analysis": 0.5618650000000001, + "arena_elo": 1372.29, + "arena_rank": 103, + "arena_votes": 89293, + "aider_pass_rate": 0.6920000000000001 + }, + { + "slug": "anthropic/claude-3-haiku-20240307", + "name": "Claude 3 Haiku", + "mmlu": 0.752, + "gpqa": 0.333, + "gsm8k": 0.889, + "math": 0.389, + "mgsm": 0.751, + "human_eval": 0.759, + "drop": 0.784, + "bbh": 0.737, + "arc": 0.892, + "hellaswag": 0.859, + "lb_name": "claude-3-haiku-20240307", + "lb_global": 0.37593888888888893, + "lb_reasoning": 0.33999999999999997, + "lb_math": 0.22936666666666664, + "lb_language": 0.30073333333333335, + "lb_if": 0.6402924999999999, + "lb_data_analysis": 0.3731, + "arena_elo": 1261.08, + "arena_rank": 217, + "arena_votes": 117705, + "aider_pass_rate": 0.406 + }, + { + "slug": "anthropic/claude-3-opus-20240229", + "name": "Claude 3 Opus", + "mmlu": 0.868, + "gpqa": 0.504, + "gsm8k": 0.95, + "math": 0.601, + "mgsm": 0.907, + "human_eval": 0.849, + "drop": 0.831, + "bbh": 0.868, + "arc": 0.964, + "hellaswag": 0.954, + "mmlu_pro": 0.685, + "lb_name": "claude-3-opus-20240229", + "lb_global": 0.47859833333333335, + "lb_reasoning": 0.35875, + "lb_math": 0.42924999999999996, + "lb_language": 0.53574, + "lb_if": 0.6388750000000001, + "lb_data_analysis": 0.5784, + "arena_elo": 1321.93, + "arena_rank": 162, + "arena_votes": 194904, + "aider_pass_rate": 0.534 + }, + { + "slug": "anthropic/claude-3-sonnet-20240229", + "name": "Claude 3 Sonnet", + "mmlu": 0.79, + "gpqa": 0.404, + "gsm8k": 0.923, + "math": 0.431, + "mgsm": 0.835, + "human_eval": 0.73, + "drop": 0.789, + "bbh": 0.829, + "arc": 0.932, + "hellaswag": 0.89, + "mmlu_pro": 0.568, + "lb_name": "claude-3-sonnet-20240229", + "lb_global": 0.39621777777777784, + "lb_reasoning": 0.35, + "lb_math": 0.22158666666666668, + "lb_language": 0.38083333333333336, + "lb_if": 0.6500425, + "lb_data_analysis": 0.38839999999999997, + "arena_elo": 1280.97, + "arena_rank": 202, + "arena_votes": 109289, + "aider_pass_rate": 0.436 + }, + { + "slug": "cohere/command-r-plus-04-2024", + "name": "Command R+", + "arc": 0.7099, + "hellaswag": 0.886, + "mmlu": 0.757, + "gsm8k": 0.707, + "lb_name": "command-r", + "lb_global": 0.2825211111111111, + "lb_reasoning": 0.29000000000000004, + "lb_math": 0.11454000000000002, + "lb_language": 0.14644333333333334, + "lb_if": 0.571625, + "lb_data_analysis": 0.2354, + "arena_elo": 1226.94, + "arena_rank": 233, + "arena_votes": 54038 + }, + { + "slug": "deepseek/deepseek-r1", + "name": "DeepSeek-R1", + "mmlu": 0.908, + "mmlu_pro": 0.84, + "drop": 0.922, + "ifeval": 0.833, + "gpqa": 0.715, + "math": 0.973, + "lb_name": "deepseek-r1", + "lb_global": 0.7311576470588236, + "lb_reasoning": 0.7575000000000001, + "lb_coding": 0.760715, + "lb_math": 0.7791, + "lb_language": 0.5477066666666667, + "lb_if": 0.8050825, + "lb_data_analysis": 0.69625, + "arena_elo": 1397.65, + "arena_rank": 76, + "arena_votes": 18537 + }, + { + "slug": "deepseek/deepseek-v2.5", + "name": "DeepSeek-V2.5", + "human_eval": 0.89, + "mmlu": 0.804, + "gsm8k": 0.951, + "math": 0.747, + "bbh": 0.843, + "lb_name": "deepseek-v2.5", + "lb_global": 0.4862783333333333, + "lb_reasoning": 0.30000000000000004, + "lb_math": 0.4794766666666667, + "lb_language": 0.3518266666666667, + "lb_if": 0.6915024999999999, + "lb_data_analysis": 0.4417, + "arena_elo": 1306.99, + "arena_rank": 181, + "arena_votes": 24574, + "aider_pass_rate": 0.5489999999999999 + }, + { + "slug": "deepseek/deepseek-v3", + "name": "DeepSeek-V3", + "mmlu": 0.885, + "mmlu_pro": 0.759, + "drop": 0.916, + "ifeval": 0.861, + "gpqa": 0.591, + "math": 0.902, + "lb_name": "deepseek-v3", + "lb_global": 0.6120366666666667, + "lb_reasoning": 0.42125, + "lb_math": 0.6054, + "lb_language": 0.47484666666666664, + "lb_if": 0.75246, + "lb_data_analysis": 0.6241, + "arena_elo": 1358.47, + "arena_rank": 114, + "arena_votes": 21788 + }, + { + "slug": "google/gemini-1.0-pro", + "name": "Gemini 1.0 Pro", + "mmlu": 0.718, + "math": 0.326, + "gpqa": 0.279, + "mmmu": 0.479 + }, + { + "slug": "google/gemini-1.5-flash-8b", + "name": "Gemini 1.5 Flash 8B", + "mmlu_pro": 0.587, + "math": 0.587, + "gpqa": 0.384, + "mmmu": 0.537 + }, + { + "slug": "google/gemini-1.5-flash", + "name": "Gemini 1.5 Flash", + "mmlu_pro": 0.673, + "mmlu": 0.789, + "math": 0.779, + "gpqa": 0.51, + "mmmu": 0.623, + "gsm8k": 0.862, + "bbh": 0.855, + "hellaswag": 0.865, + "human_eval": 0.743, + "mgsm": 0.826 + }, + { + "slug": "google/gemini-1.5-pro", + "name": "Gemini 1.5 Pro", + "mmlu": 0.859, + "mmlu_pro": 0.758, + "math": 0.646, + "gsm8k": 0.908, + "bbh": 0.892, + "drop": 0.749, + "hellaswag": 0.933, + "human_eval": 0.841, + "mgsm": 0.875, + "gpqa": 0.591, + "mmmu": 0.659 + }, + { + "slug": "google/gemini-2.0-flash-thinking", + "name": "Gemini 2.0 Flash Thinking", + "gpqa": 0.742, + "mmmu": 0.754 + }, + { + "slug": "google/gemini-2.0-flash", + "name": "Gemini 2.0 Flash", + "mmlu_pro": 0.764, + "math": 0.897, + "gpqa": 0.621, + "mmmu": 0.707, + "lb_name": "gemini-2.0-flash", + "lb_global": 0.6323894444444444, + "lb_reasoning": 0.45875, + "lb_math": 0.6561933333333333, + "lb_language": 0.40685666666666664, + "lb_if": 0.8578749999999999, + "lb_data_analysis": 0.7332000000000001 + }, + { + "slug": "google/gemma-2-27b-it", + "name": "Gemma 2 27B", + "mmlu": 0.752, + "hellaswag": 0.864, + "arc": 0.886, + "human_eval": 0.518, + "mbpp": 0.626, + "gsm8k": 0.74, + "math": 0.423, + "hf_id": "google/gemma-2-27b", + "params_b": 27.227, + "ifeval": 0.24752213017017072, + "bbh": 0.5642908317482057, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.4370844414893617, + "hf_math_lvl5": 0.23867069486404835, + "hf_musr": 0.40330208333333334, + "hf_avg": 36.17428251510342, + "lb_name": "gemma-2-27b-it", + "lb_global": 0.3941138888888889, + "lb_reasoning": 0.23125, + "lb_math": 0.2645733333333334, + "lb_language": 0.32621666666666665, + "lb_if": 0.5810025000000001, + "lb_data_analysis": 0.4481, + "arena_elo": 1288.04, + "arena_rank": 194, + "arena_votes": 75764 + }, + { + "slug": "google/gemma-2-9b-it", + "name": "Gemma 2 9B", + "mmlu": 0.713, + "hellaswag": 0.819, + "arc": 0.88, + "human_eval": 0.402, + "mbpp": 0.524, + "gsm8k": 0.686, + "math": 0.366, + "hf_id": "BlackBeenie/Neos-Gemma-2-9b", + "params_b": 9.242, + "ifeval": 0.5875665456544192, + "bbh": 0.5502975126048852, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.39810505319148937, + "hf_math_lvl5": 0.2054380664652568, + "hf_musr": 0.4321979166666667, + "hf_avg": 34.20907135232025, + "lb_name": "gemma-2-9b-it", + "lb_global": 0.303385, + "lb_reasoning": 0.1275, + "lb_math": 0.19804333333333335, + "lb_language": 0.25531, + "lb_if": 0.52621, + "lb_data_analysis": 0.2959, + "arena_elo": 1265.46, + "arena_rank": 213, + "arena_votes": 54615 + }, + { + "slug": "meta/llama-3.1-405b-instruct", + "name": "Llama 3.1 405B Instruct", + "mmlu": 0.873, + "mmlu_pro": 0.733, + "ifeval": 0.886, + "arc": 0.969, + "gpqa": 0.507, + "human_eval": 0.89, + "gsm8k": 0.968, + "math": 0.738, + "mgsm": 0.916, + "drop": 0.848 + }, + { + "slug": "meta/llama-3.1-70b-instruct", + "name": "Llama 3.1 70B Instruct", + "mmlu": 0.836, + "mmlu_pro": 0.664, + "ifeval": 0.875, + "arc": 0.948, + "gpqa": 0.417, + "human_eval": 0.805, + "gsm8k": 0.951, + "math": 0.68, + "mgsm": 0.869, + "drop": 0.796, + "hf_id": "meta-llama/Llama-3.1-70B-Instruct", + "params_b": 70.554, + "bbh": 0.6917287453663654, + "hf_math_lvl5": 0.3806646525679758, + "hf_musr": 0.45806250000000004, + "hf_avg": 43.409948245645786, + "arena_elo": 1293.43, + "arena_rank": 191, + "arena_votes": 55234, + "aider_pass_rate": 0.436 + }, + { + "slug": "meta/llama-3.1-8b-instruct", + "name": "Llama 3.1 8B Instruct", + "mmlu": 0.694, + "mmlu_pro": 0.483, + "ifeval": 0.804, + "arc": 0.834, + "gpqa": 0.304, + "human_eval": 0.726, + "mbpp": 0.728, + "gsm8k": 0.845, + "math": 0.519, + "mgsm": 0.689, + "drop": 0.595, + "hf_id": "BAAI/OPI-Llama-3.1-8B-Instruct", + "params_b": 8.03, + "bbh": 0.3551224419497605, + "hf_math_lvl5": 0.1729607250755287, + "hf_musr": 0.3845416666666666, + "hf_avg": 28.01011138792457, + "arena_elo": 1211.47, + "arena_rank": 243, + "arena_votes": 49605, + "aider_pass_rate": 0.263 + }, + { + "slug": "meta/llama-3.2-11b-instruct", + "name": "Llama 3.2 11B Instruct", + "mmlu": 0.73, + "mmmu": 0.507, + "math": 0.519, + "gpqa": 0.328, + "mgsm": 0.689 + }, + { + "slug": "meta/llama-3.2-3b-instruct", + "name": "Llama 3.2 3B Instruct", + "mmlu": 0.634, + "ifeval": 0.774, + "gsm8k": 0.777, + "math": 0.48, + "arc": 0.786, + "gpqa": 0.328, + "hellaswag": 0.698, + "mgsm": 0.582, + "hf_id": "meta-llama/Llama-3.2-3B-Instruct", + "params_b": 3.213, + "bbh": 0.4610070239466069, + "mmlu_pro": 0.3194813829787234, + "hf_math_lvl5": 0.17673716012084592, + "hf_musr": 0.3528541666666667, + "hf_avg": 24.204650807793456, + "arena_elo": 1166.61, + "arena_rank": 269, + "arena_votes": 7936 + }, + { + "slug": "meta/llama-3.2-90b-instruct", + "name": "Llama 3.2 90B Instruct", + "mmmu": 0.603, + "mmlu": 0.86, + "math": 0.68, + "gpqa": 0.467, + "mgsm": 0.869 + }, + { + "slug": "meta/llama-3.3-70b-instruct", + "name": "Llama 3.3 70B Instruct", + "mmlu": 0.86, + "mmlu_pro": 0.689, + "ifeval": 0.921, + "gpqa": 0.505, + "human_eval": 0.884, + "math": 0.77, + "mgsm": 0.911, + "hf_id": "meta-llama/Llama-3.3-70B-Instruct", + "params_b": 70.554, + "bbh": 0.6919312828325811, + "hf_math_lvl5": 0.48338368580060426, + "hf_musr": 0.44612500000000005, + "hf_avg": 44.84747145129876, + "arena_elo": 1319.32, + "arena_rank": 166, + "arena_votes": 55454, + "aider_pass_rate": 0.42100000000000004 + }, + { + "slug": "microsoft/phi-3.5-mini-instruct", + "name": "Phi-3.5-mini-instruct", + "mmlu": 0.554, + "mmlu_pro": 0.309, + "mgsm": 0.479, + "bbh": 0.69, + "arc": 0.846, + "gpqa": 0.304, + "hellaswag": 0.694, + "gsm8k": 0.862, + "math": 0.485, + "human_eval": 0.628, + "mbpp": 0.696, + "hf_id": "microsoft/Phi-3.5-mini-instruct", + "params_b": 3.821, + "ifeval": 0.5774500547436359, + "hf_math_lvl5": 0.19637462235649547, + "hf_musr": 0.402125, + "hf_avg": 28.184391192864627, + "lb_name": "phi-3.5-mini-instruct", + "lb_global": 0.297565, + "lb_reasoning": 0.36, + "lb_math": 0.16781000000000001, + "lb_language": 0.09666333333333332, + "lb_if": 0.583, + "lb_data_analysis": 0.19640000000000002 + }, + { + "slug": "microsoft/phi-3.5-moe-instruct", + "name": "Phi-3.5-MoE-instruct", + "bbh": 0.791, + "mmlu": 0.789, + "mmlu_pro": 0.543, + "arc": 0.91, + "gpqa": 0.368, + "hellaswag": 0.838, + "mgsm": 0.587, + "gsm8k": 0.887, + "math": 0.595, + "human_eval": 0.707, + "mbpp": 0.808, + "hf_id": "microsoft/Phi-3.5-MoE-instruct", + "params_b": 42, + "ifeval": 0.692454908531585, + "hf_math_lvl5": 0.3119335347432024, + "hf_musr": 0.4564791666666667, + "hf_avg": 36.8789647220093, + "lb_name": "phi-3.5-moe-instruct", + "lb_global": 0.3618783333333333, + "lb_reasoning": 0.34, + "lb_math": 0.26788666666666666, + "lb_language": 0.17071666666666666, + "lb_if": 0.5973325, + "lb_data_analysis": 0.30689999999999995 + }, + { + "slug": "microsoft/phi-3.5-vision-instruct", + "name": "Phi-3.5-vision-instruct", + "mmmu": 0.43 + }, + { + "slug": "microsoft/phi-4", + "name": "Phi-4", + "mmlu": 0.848, + "gpqa": 0.561, + "math": 0.804, + "human_eval": 0.826, + "mgsm": 0.806, + "drop": 0.755, + "mmlu_pro": 0.704, + "ifeval": 0.63, + "hf_id": "NyxKrage/Microsoft_Phi-4", + "params_b": 14.66, + "bbh": 0.6690562305322874, + "hf_math_lvl5": 0.5, + "hf_musr": 0.41142708333333333, + "hf_avg": 40.728304291060965, + "lb_name": "phi-4", + "lb_global": 0.4230505555555555, + "lb_reasoning": 0.39749999999999996, + "lb_math": 0.4302633333333333, + "lb_language": 0.29333666666666663, + "lb_if": 0.5838349999999999, + "lb_data_analysis": 0.4376, + "arena_elo": 1256.07, + "arena_rank": 220, + "arena_votes": 24126 + }, + { + "slug": "mistral/codestral-22b", + "name": "Codestral-22B", + "human_eval": 0.811, + "mbpp": 0.782 + }, + { + "slug": "mistral/ministral-8b-instruct-2410", + "name": "Ministral 8B Instruct", + "mmlu": 0.65, + "human_eval": 0.348, + "math": 0.545, + "mbpp": 0.7, + "arc": 0.719, + "hf_id": "mistralai/Ministral-8B-Instruct-2410", + "params_b": 8.02, + "ifeval": 0.5896399331551394, + "bbh": 0.47616402016891385, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3291223404255319, + "hf_math_lvl5": 0.19561933534743203, + "hf_musr": 0.41375, + "hf_avg": 24.185603139774845 + }, + { + "slug": "mistral/mistral-large-2-2407", + "name": "Mistral Large 2", + "mmlu": 0.84, + "human_eval": 0.92, + "gsm8k": 0.93, + "aider_pass_rate": 0.39799999999999996 + }, + { + "slug": "mistral/mistral-nemo-instruct-2407", + "name": "Mistral NeMo Instruct", + "hellaswag": 0.835, + "mmlu": 0.68, + "hf_id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", + "params_b": 12.248, + "ifeval": 0.5451269298793867, + "bbh": 0.5261780772532613, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.35206117021276595, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.38999999999999996, + "hf_avg": 24.665599941304677 + }, + { + "slug": "mistral/mistral-small-2409", + "name": "Mistral Small" + }, + { + "slug": "mistral/mistral-small-3", + "name": "Mistral Small 3", + "mmlu_pro": 0.663, + "human_eval": 0.848, + "gpqa": 0.453, + "math": 0.706, + "ifeval": 0.829 + }, + { + "slug": "mistral/pixtral-12b-2409", + "name": "Pixtral-12B", + "mmmu": 0.525, + "ifeval": 0.613, + "mmlu": 0.692, + "math": 0.481, + "human_eval": 0.72 + }, + { + "slug": "mistral/pixtral-large", + "name": "Pixtral Large", + "mmmu": 0.64 + }, + { + "slug": "moonshotai/kimi-k1.5", + "name": "Kimi-k1.5", + "mmlu": 0.874, + "math": 0.962, + "mmmu": 0.7, + "ifeval": 0.872 + }, + { + "slug": "nvidia/llama-3.1-nemotron-70b-instruct", + "name": "Llama 3.1 Nemotron 70B Instruct", + "arc": 0.692, + "mmlu": 0.802, + "hellaswag": 0.8558, + "gsm8k": 0.9143, + "human_eval": 0.7384, + "lb_name": "llama-3.1-nemotron-70b-instruct", + "lb_global": 0.45162166666666675, + "lb_reasoning": 0.45, + "lb_math": 0.37562333333333325, + "lb_language": 0.31805, + "lb_if": 0.6957099999999999, + "lb_data_analysis": 0.33599999999999997, + "arena_elo": 1298.63, + "arena_rank": 189, + "arena_votes": 7136 + }, + { + "slug": "openai/gpt-3.5-turbo-0125", + "name": "GPT-3.5 Turbo", + "mmlu": 0.698, + "gpqa": 0.308, + "drop": 0.702, + "mgsm": 0.563, + "math": 0.431, + "human_eval": 0.68, + "mmmu": 0, + "lb_name": "gpt-3.5-turbo-0125", + "lb_global": 0.35024277777777774, + "lb_reasoning": 0.31000000000000005, + "lb_math": 0.1893166666666667, + "lb_language": 0.2421633333333333, + "lb_if": 0.604665, + "lb_data_analysis": 0.3682, + "arena_elo": 1223.96, + "arena_rank": 235, + "arena_votes": 66191, + "aider_pass_rate": 0.414 + }, + { + "slug": "openai/gpt-4-0613", + "name": "GPT-4", + "mmlu": 0.864, + "hellaswag": 0.953, + "arc": 0.963, + "human_eval": 0.67, + "drop": 0.809, + "gpqa": 0.357, + "math": 0.42, + "mgsm": 0.745, + "lb_name": "gpt-4-0613", + "lb_global": 0.4705527777777777, + "lb_reasoning": 0.42, + "lb_math": 0.3348166666666667, + "lb_language": 0.4956833333333333, + "lb_if": 0.717875, + "lb_data_analysis": 0.39039999999999997, + "arena_elo": 1275.06, + "arena_rank": 207, + "arena_votes": 88721, + "aider_pass_rate": 0.466 + }, + { + "slug": "openai/gpt-4-turbo-2024-04-09", + "name": "GPT-4 Turbo", + "mmlu": 0.865, + "gpqa": 0.48, + "math": 0.726, + "human_eval": 0.871, + "mgsm": 0.885, + "drop": 0.86, + "lb_name": "gpt-4-turbo-2024-04-09", + "lb_global": 0.5307516666666667, + "lb_reasoning": 0.42000000000000004, + "lb_math": 0.42685666666666666, + "lb_language": 0.45262, + "lb_if": 0.713875, + "lb_data_analysis": 0.4998, + "arena_elo": 1324.13, + "arena_rank": 156, + "arena_votes": 98130 + }, + { + "slug": "openai/gpt-4o-2024-05-13", + "name": "GPT-4o", + "mmlu": 0.887, + "gpqa": 0.536, + "math": 0.766, + "human_eval": 0.902, + "mgsm": 0.905, + "drop": 0.834, + "mmlu_pro": 0.726, + "lb_name": "gpt-4o-2024-05-13", + "lb_global": 0.5524327777777779, + "lb_reasoning": 0.4, + "lb_math": 0.45983666666666667, + "lb_language": 0.5393533333333334, + "lb_if": 0.7217100000000001, + "lb_data_analysis": 0.4661, + "arena_elo": 1345.71, + "arena_rank": 133, + "arena_votes": 112863, + "aider_pass_rate": 0.602 + }, + { + "slug": "openai/gpt-4o-2024-08-06", + "name": "GPT-4o", + "gpqa": 0.536, + "mmlu": 0.88, + "mmmu": 0.691, + "mmlu_pro": 0.747, + "lb_name": "gpt-4o-2024-08-06", + "lb_global": 0.5125105555555556, + "lb_reasoning": 0.47875, + "lb_math": 0.45724666666666663, + "lb_language": 0.4563466666666667, + "lb_if": 0.6858299999999999, + "lb_data_analysis": 0.6236999999999999, + "arena_elo": 1334.94, + "arena_rank": 145, + "arena_votes": 45498, + "aider_pass_rate": 0.5710000000000001 + }, + { + "slug": "openai/gpt-4o-mini-2024-07-18", + "name": "GPT-4o mini", + "mmlu": 0.82, + "human_eval": 0.872, + "mmmu": 0.594, + "gpqa": 0.402, + "drop": 0.797, + "mgsm": 0.87, + "math": 0.702, + "lb_name": "gpt-4o-mini-2024-07-18", + "lb_global": 0.428314705882353, + "lb_reasoning": 0.30125, + "lb_coding": 0.5502149999999999, + "lb_math": 0.3804666666666667, + "lb_language": 0.29879333333333336, + "lb_if": 0.5679974999999999, + "lb_data_analysis": 0.55099, + "arena_elo": 1317.68, + "arena_rank": 169, + "arena_votes": 68794, + "aider_pass_rate": 0.406 + }, + { + "slug": "openai/o1-2024-12-17", + "name": "o1", + "math": 0.964, + "mmlu": 0.918, + "gsm8k": 0.971, + "human_eval": 0.881, + "gpqa": 0.757, + "mmmu": 0.773, + "mgsm": 0.893, + "hf_id": "AIDC-AI/Marco-o1", + "params_b": 7.616, + "ifeval": 0.477083028586373, + "bbh": 0.5364362696398749, + "mmlu_pro": 0.41165226063829785, + "hf_math_lvl5": 0.37462235649546827, + "hf_musr": 0.41384375, + "hf_avg": 27.639223265636087, + "arena_elo": 1401.92, + "arena_rank": 70, + "arena_votes": 27822, + "aider_pass_rate": 0.654 + }, + { + "slug": "openai/o1-mini", + "name": "o1-mini", + "human_eval": 0.924, + "mmlu": 0.852, + "gpqa": 0.6, + "math": 0.9, + "arena_elo": 1336.76, + "arena_rank": 141, + "arena_votes": 51986, + "aider_pass_rate": 0.5 + }, + { + "slug": "openai/o1-preview", + "name": "o1-preview", + "math": 0.855, + "mmlu": 0.908, + "gpqa": 0.733, + "mgsm": 0.908, + "arena_elo": 1388.16, + "arena_rank": 88, + "arena_votes": 31120, + "aider_pass_rate": 0.579 + }, + { + "slug": "openai/o1-pro-mode", + "name": "o1-pro", + "gpqa": 0.79 + }, + { + "slug": "openai/o3-mini", + "name": "o3-mini", + "gpqa": 0.797, + "mmlu": 0.869, + "math": 0.979, + "mgsm": 0.92, + "arena_elo": 1348.1, + "arena_rank": 125, + "arena_votes": 58451 + }, + { + "slug": "openai/o3", + "name": "o3", + "gpqa": 0.877 + }, + { + "slug": "qwen/qvq-72b-preview", + "name": "QvQ-72B-Preview", + "mmmu": 0.703 + }, + { + "slug": "qwen/qwen-2.5-14b-instruct", + "name": "Qwen2.5 14B Instruct", + "mmlu": 0.797, + "mmlu_pro": 0.637, + "bbh": 0.782, + "arc": 0.673, + "gpqa": 0.455, + "math": 0.8, + "gsm8k": 0.948, + "human_eval": 0.835, + "mbpp": 0.82, + "hf_id": "Qwen/Qwen2.5-14B-Instruct", + "params_b": 14.77, + "ifeval": 0.8157776920792386, + "hf_math_lvl5": 0.552870090634441, + "hf_musr": 0.414, + "hf_avg": 41.60549009199536 + }, + { + "slug": "qwen/qwen-2.5-32b-instruct", + "name": "Qwen2.5 32B Instruct", + "mmlu": 0.833, + "mmlu_pro": 0.69, + "bbh": 0.845, + "arc": 0.704, + "hellaswag": 0.852, + "gpqa": 0.495, + "math": 0.831, + "gsm8k": 0.959, + "human_eval": 0.884, + "mbpp": 0.84, + "hf_id": "Qwen/Qwen2.5-32B-Instruct", + "params_b": 32.764, + "ifeval": 0.8346121623957765, + "hf_math_lvl5": 0.6253776435045317, + "hf_musr": 0.42612500000000003, + "hf_avg": 46.59714569921449 + }, + { + "slug": "qwen/qwen-2.5-72b-instruct", + "name": "Qwen2.5 72B Instruct", + "mmlu_pro": 0.711, + "gpqa": 0.49, + "math": 0.831, + "gsm8k": 0.958, + "human_eval": 0.866, + "mbpp": 0.882, + "ifeval": 0.841, + "hf_id": "Qwen/Qwen2.5-72B-Instruct", + "params_b": 72.706, + "bbh": 0.7272747321744824, + "hf_math_lvl5": 0.5981873111782477, + "hf_musr": 0.42060416666666667, + "hf_avg": 47.98045991216864, + "lb_name": "qwen2.5-72b-instruct", + "lb_global": 0.5393961111111111, + "lb_reasoning": 0.39, + "lb_math": 0.5235966666666667, + "lb_language": 0.38114999999999993, + "lb_if": 0.7550025, + "lb_data_analysis": 0.4718, + "arena_elo": 1302.64, + "arena_rank": 188, + "arena_votes": 39409 + }, + { + "slug": "qwen/qwen-2.5-7b-instruct", + "name": "Qwen2.5 7B Instruct", + "mmlu_pro": 0.563, + "math": 0.755, + "gsm8k": 0.916, + "human_eval": 0.848, + "mbpp": 0.792, + "ifeval": 0.712, + "gpqa": 0.364, + "hf_id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", + "params_b": 7.616, + "bbh": 0.5363681457807072, + "hf_math_lvl5": 0.5, + "hf_musr": 0.40203125, + "hf_avg": 35.200108659947965 + }, + { + "slug": "qwen/qwen-2.5-coder-32b-instruct", + "name": "Qwen2.5-Coder 32B Instruct", + "human_eval": 0.927, + "mbpp": 0.902, + "arc": 0.705, + "hellaswag": 0.83, + "math": 0.572, + "gsm8k": 0.911, + "mmlu": 0.751, + "mmlu_pro": 0.504, + "hf_id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "params_b": 32.764, + "ifeval": 0.7265267268625026, + "bbh": 0.6625222222405129, + "gpqa": 0.348993288590604, + "hf_math_lvl5": 0.4954682779456193, + "hf_musr": 0.4385833333333333, + "hf_avg": 39.885471774422804, + "lb_name": "qwen2.5-coder-32b-instruct", + "lb_global": 0.46328222222222215, + "lb_reasoning": 0.28125, + "lb_math": 0.4661433333333333, + "lb_language": 0.23245333333333332, + "lb_if": 0.5869175, + "lb_data_analysis": 0.48810000000000003, + "arena_elo": 1270.57, + "arena_rank": 211, + "arena_votes": 5430, + "aider_pass_rate": 0.594 + }, + { + "slug": "qwen/qwen-2.5-coder-7b-instruct", + "name": "Qwen2.5-Coder 7B Instruct", + "math": 0.466, + "gsm8k": 0.839, + "mmlu": 0.676, + "mmlu_pro": 0.401, + "arc": 0.609, + "hellaswag": 0.768, + "human_eval": 0.884, + "mbpp": 0.835, + "hf_id": "Qwen/Qwen2.5-Coder-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.6101477413263474, + "bbh": 0.5007976986224548, + "gpqa": 0.29194630872483224, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.4099375, + "hf_avg": 22.52451581645211, + "aider_pass_rate": 0.519 + }, + { + "slug": "qwen/qwen2-72b-instruct", + "name": "Qwen2 72B Instruct", + "mmlu": 0.823, + "mmlu_pro": 0.644, + "gpqa": 0.424, + "bbh": 0.824, + "hellaswag": 0.876, + "arc": 0.689, + "human_eval": 0.86, + "mbpp": 0.802, + "gsm8k": 0.911, + "math": 0.597, + "hf_id": "Qwen/Qwen2-72B-Instruct", + "params_b": 72.706, + "ifeval": 0.7989168738945996, + "hf_math_lvl5": 0.4131419939577039, + "hf_musr": 0.44007291666666665, + "hf_avg": 42.07422014151536, + "lb_name": "qwen2-72b-instruct", + "lb_global": 0.42140222222222223, + "lb_reasoning": 0.4, + "lb_math": 0.43436666666666673, + "lb_language": 0.29213666666666666, + "lb_if": 0.68271, + "lb_data_analysis": 0.1636, + "arena_elo": 1261.79, + "arena_rank": 216, + "arena_votes": 37325, + "aider_pass_rate": 0.444 + }, + { + "slug": "qwen/qwen2-7b-instruct", + "name": "Qwen2 7B Instruct", + "mmlu": 0.705, + "mmlu_pro": 0.441, + "gpqa": 0.253, + "human_eval": 0.799, + "mbpp": 0.672, + "gsm8k": 0.823, + "math": 0.496, + "hf_id": "Alibaba-NLP/gte-Qwen2-7B-instruct", + "params_b": 7.613, + "ifeval": 0.22554045488193547, + "bbh": 0.4495144990818469, + "hf_math_lvl5": 0.2764350453172205, + "hf_musr": 0.39279166666666665, + "hf_avg": 27.93668778218485, + "lb_name": "qwen2-7b-instruct", + "lb_global": 0.2746205555555556, + "lb_reasoning": 0.22, + "lb_math": 0.26868000000000003, + "lb_language": 0.10208333333333335, + "lb_if": 0.4473775, + "lb_data_analysis": 0.19119999999999998 + }, + { + "slug": "qwen/qwen2-vl-72b", + "name": "Qwen2-VL-72B-Instruct", + "mmmu": 0.645, + "hf_id": "Qwen/Qwen2-VL-72B-Instruct", + "params_b": 73.406, + "ifeval": 0.5982326892644849, + "bbh": 0.6946287292338682, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.5717253989361702, + "hf_math_lvl5": 0.34441087613293053, + "hf_musr": 0.44921875, + "hf_avg": 39.53661995549832 + }, + { + "slug": "qwen/qwq-32b-preview", + "name": "QwQ-32B-Preview", + "gpqa": 0.652, + "math": 0.906, + "hf_id": "Qwen/QwQ-32B-Preview", + "params_b": 32.764, + "ifeval": 0.4035437084713006, + "bbh": 0.6691381482252744, + "mmlu_pro": 0.5678191489361702, + "hf_math_lvl5": 0.44939577039274925, + "hf_musr": 0.4109895833333333, + "hf_avg": 34.11985336826446, + "lb_name": "qwq-32b-preview", + "lb_global": 0.4015572222222222, + "lb_reasoning": 0.405625, + "lb_math": 0.58265, + "lb_language": 0.21091000000000001, + "lb_if": 0.35587499999999994, + "lb_data_analysis": 0.25925, + "arena_elo": 1157, + "arena_rank": 271, + "arena_votes": 3233 + }, + { + "slug": "xai/grok-1.5", + "name": "Grok-1.5", + "gpqa": 0.359, + "mmlu": 0.813, + "mmlu_pro": 0.51, + "math": 0.506, + "gsm8k": 0.9, + "human_eval": 0.741, + "mmmu": 0.536 + }, + { + "slug": "xai/grok-1.5v", + "name": "Grok-1.5V", + "mmmu": 0.536 + }, + { + "slug": "xai/grok-2-mini", + "name": "Grok-2 mini", + "gpqa": 0.51, + "mmlu": 0.862, + "mmlu_pro": 0.72, + "math": 0.73, + "human_eval": 0.857, + "mmmu": 0.632, + "lb_name": "grok-2-mini", + "lb_global": 0.4732861111111112, + "lb_reasoning": 0.42000000000000004, + "lb_math": 0.40276333333333336, + "lb_language": 0.395, + "lb_if": 0.6970000000000001, + "lb_data_analysis": 0.3938, + "aider_pass_rate": 0.406 + }, + { + "slug": "xai/grok-2", + "name": "Grok-2", + "gpqa": 0.56, + "mmlu": 0.875, + "mmlu_pro": 0.755, + "math": 0.761, + "human_eval": 0.884, + "mmmu": 0.661, + "lb_name": "grok-2", + "lb_global": 0.48686722222222223, + "lb_reasoning": 0.38, + "lb_math": 0.42738666666666675, + "lb_language": 0.34836333333333336, + "lb_if": 0.747585, + "lb_data_analysis": 0.5207, + "aider_pass_rate": 0.45899999999999996 + }, + { + "hf_id": "0-hero/Matter-0.2-7B-DPO", + "name": "Matter-0.2-7B-DPO", + "params_b": 7.242, + "ifeval": 0.3302792147058693, + "bbh": 0.3596254301656297, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.1163563829787234, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.381375, + "hf_avg": 8.90636130175029 + }, + { + "hf_id": "01-ai/Yi-1.5-34B", + "name": "Yi-1.5-34B", + "params_b": 34.389, + "ifeval": 0.2841172533322695, + "bbh": 0.5976391706360018, + "gpqa": 0.36577181208053694, + "mmlu_pro": 0.4665890957446808, + "hf_math_lvl5": 0.15332326283987915, + "hf_musr": 0.4236041666666667, + "hf_avg": 25.64649419429311 + }, + { + "hf_id": "01-ai/Yi-1.5-34B-32K", + "name": "Yi-1.5-34B-32K", + "params_b": 34.389, + "ifeval": 0.3118691737922047, + "bbh": 0.6015685776542417, + "gpqa": 0.36325503355704697, + "mmlu_pro": 0.4709109042553192, + "hf_math_lvl5": 0.1540785498489426, + "hf_musr": 0.4398229166666667, + "hf_avg": 26.727912908508134 + }, + { + "hf_id": "01-ai/Yi-1.5-34B-Chat", + "name": "Yi-1.5-34B-Chat", + "params_b": 34.389, + "ifeval": 0.6066758423205982, + "bbh": 0.6083748310271819, + "gpqa": 0.3649328859060403, + "mmlu_pro": 0.45204454787234044, + "hf_math_lvl5": 0.277190332326284, + "hf_musr": 0.4281979166666667, + "hf_avg": 33.35799367075618, + "arena_elo": 1213.33, + "arena_rank": 241, + "arena_votes": 24142 + }, + { + "hf_id": "01-ai/Yi-1.5-34B-Chat-16K", + "name": "Yi-1.5-34B-Chat-16K", + "params_b": 34.389, + "ifeval": 0.456449997118756, + "bbh": 0.6100218256499571, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.45445478723404253, + "hf_math_lvl5": 0.21374622356495468, + "hf_musr": 0.43976041666666665, + "hf_avg": 29.403554842710225 + }, + { + "hf_id": "01-ai/Yi-1.5-6B", + "name": "Yi-1.5-6B", + "params_b": 6.061, + "ifeval": 0.26166017278598563, + "bbh": 0.44925820198929056, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.31441156914893614, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.43740625, + "hf_avg": 16.745698054972127 + }, + { + "hf_id": "01-ai/Yi-1.5-6B-Chat", + "name": "Yi-1.5-6B-Chat", + "params_b": 6.061, + "ifeval": 0.5145270105542183, + "bbh": 0.4571311331954389, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.3193151595744681, + "hf_math_lvl5": 0.1623867069486405, + "hf_musr": 0.43917708333333333, + "hf_avg": 22.784006289829847 + }, + { + "hf_id": "01-ai/Yi-1.5-9B", + "name": "Yi-1.5-9B", + "params_b": 8.829, + "ifeval": 0.29358435617494916, + "bbh": 0.514294179104191, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.3916223404255319, + "hf_math_lvl5": 0.11404833836858005, + "hf_musr": 0.43278124999999995, + "hf_avg": 22.153901514184795 + }, + { + "hf_id": "01-ai/Yi-1.5-9B-32K", + "name": "Yi-1.5-9B-32K", + "params_b": 8.829, + "ifeval": 0.23031113002389217, + "bbh": 0.496332115988265, + "gpqa": 0.35906040268456374, + "mmlu_pro": 0.37649601063829785, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.4186145833333333, + "hf_avg": 19.809786285875365 + }, + { + "hf_id": "01-ai/Yi-1.5-9B-Chat", + "name": "Yi-1.5-9B-Chat", + "params_b": 8.829, + "ifeval": 0.6045525871354672, + "bbh": 0.555906430281685, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.39752327127659576, + "hf_math_lvl5": 0.2258308157099698, + "hf_musr": 0.42590625, + "hf_avg": 29.530872220260978 + }, + { + "hf_id": "01-ai/Yi-1.5-9B-Chat-16K", + "name": "Yi-1.5-9B-Chat-16K", + "params_b": 8.829, + "ifeval": 0.4214040966856829, + "bbh": 0.5153383364651778, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.39935172872340424, + "hf_math_lvl5": 0.1782477341389728, + "hf_musr": 0.40990624999999997, + "hf_avg": 23.76539234993476 + }, + { + "hf_id": "01-ai/Yi-34B", + "name": "Yi-34B", + "params_b": 34.389, + "ifeval": 0.3045751938190667, + "bbh": 0.5457099951794562, + "gpqa": 0.36661073825503354, + "mmlu_pro": 0.441156914893617, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.4118541666666667, + "hf_avg": 22.373127018936653 + }, + { + "hf_id": "01-ai/Yi-34B-200K", + "name": "Yi-34B-200K", + "params_b": 34.389, + "ifeval": 0.15424850507763843, + "bbh": 0.5441817925289527, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.45345744680851063, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.38171874999999994, + "hf_avg": 20.01347533597433 + }, + { + "hf_id": "01-ai/Yi-34B-Chat", + "name": "Yi-34B-Chat", + "params_b": 34.389, + "ifeval": 0.4698887839820565, + "bbh": 0.5560872910766164, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.4093251329787234, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.39784375, + "hf_avg": 24.226662652803373, + "arena_elo": 1183.75, + "arena_rank": 255, + "arena_votes": 15483 + }, + { + "hf_id": "01-ai/Yi-6B", + "name": "Yi-6B", + "params_b": 6.061, + "ifeval": 0.28933784580468713, + "bbh": 0.4309230591000865, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.29911901595744683, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.39368749999999997, + "hf_avg": 13.611617485376058 + }, + { + "hf_id": "01-ai/Yi-6B-200K", + "name": "Yi-6B-200K", + "params_b": 6.061, + "ifeval": 0.08433068702154728, + "bbh": 0.42892948109603307, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.2844082446808511, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.45873958333333337, + "hf_avg": 11.996098298832841 + }, + { + "hf_id": "01-ai/Yi-6B-Chat", + "name": "Yi-6B-Chat", + "params_b": 6.061, + "ifeval": 0.33952135888331847, + "bbh": 0.41326019207548687, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3061003989361702, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.36879166666666663, + "hf_avg": 14.11765000523676, + "lb_name": "yi-6b-chat", + "lb_global": 0.1098216666666667, + "lb_reasoning": 0.14, + "lb_math": 0.08529666666666667, + "lb_language": 0.046889999999999994, + "lb_if": 0.27220750000000005, + "lb_data_analysis": 0.0657 + }, + { + "hf_id": "01-ai/Yi-9B", + "name": "Yi-9B", + "params_b": 8.829, + "ifeval": 0.2708779372066118, + "bbh": 0.49396075125308075, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.35738031914893614, + "hf_math_lvl5": 0.055891238670694864, + "hf_musr": 0.40540624999999997, + "hf_avg": 17.811867367746064 + }, + { + "hf_id": "01-ai/Yi-9B-200K", + "name": "Yi-9B-200K", + "params_b": 8.829, + "ifeval": 0.23270921155866434, + "bbh": 0.4793302602023641, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.36220079787234044, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.42940625, + "hf_avg": 17.72955178611439 + }, + { + "hf_id": "01-ai/Yi-Coder-9B-Chat", + "name": "Yi-Coder-9B-Chat", + "params_b": 8.829, + "ifeval": 0.4817041006750976, + "bbh": 0.48142000339111674, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.24251994680851063, + "hf_math_lvl5": 0.04003021148036254, + "hf_musr": 0.3991770833333333, + "hf_avg": 16.985989314863886, + "aider_pass_rate": 0.466 + }, + { + "hf_id": "1-800-LLMs/Qwen-2.5-14B-Hindi", + "name": "Qwen-2.5-14B-Hindi", + "params_b": 14.77, + "ifeval": 0.582570911847232, + "bbh": 0.6523901531956199, + "gpqa": 0.3624161073825503, + "mmlu_pro": 0.5262632978723404, + "hf_math_lvl5": 0.3330815709969788, + "hf_musr": 0.4489375, + "hf_avg": 36.266177006668904 + }, + { + "hf_id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", + "name": "Qwen-2.5-14B-Hindi-Custom-Instruct", + "params_b": 14.77, + "ifeval": 0.30774677854758703, + "bbh": 0.6284322714967584, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.516373005319149, + "hf_math_lvl5": 0.311178247734139, + "hf_musr": 0.4490625, + "hf_avg": 31.020777275634742 + }, + { + "hf_id": "1024m/PHI-4-Hindi", + "name": "PHI-4-Hindi", + "params_b": 14.66, + "ifeval": 0.00816832670647216, + "bbh": 0.6710015642760666, + "gpqa": 0.3976510067114094, + "mmlu_pro": 0.523936170212766, + "hf_math_lvl5": 0.23338368580060423, + "hf_musr": 0.4913541666666667, + "hf_avg": 27.48785095027712 + }, + { + "hf_id": "1024m/QWEN-14B-B100", + "name": "QWEN-14B-B100", + "params_b": 14.77, + "ifeval": 0.7762104549262623, + "bbh": 0.653271132679638, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.5178690159574468, + "hf_math_lvl5": 0.5438066465256798, + "hf_musr": 0.41, + "hf_avg": 41.91906679224076 + }, + { + "hf_id": "1TuanPham/T-VisStar-7B-v0.1", + "name": "T-VisStar-7B-v0.1", + "params_b": 7.294, + "ifeval": 0.36070404305021786, + "bbh": 0.5052203113352468, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3210605053191489, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.4375, + "hf_avg": 19.144808610895357 + }, + { + "hf_id": "1TuanPham/T-VisStar-v0.1", + "name": "T-VisStar-v0.1", + "params_b": 7.294, + "ifeval": 0.36070404305021786, + "bbh": 0.5052203113352468, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3210605053191489, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.4375, + "hf_avg": 19.144808610895357 + }, + { + "hf_id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", + "name": "Llama-3.1-8B-Squareroot", + "params_b": 8.03, + "ifeval": 0.22134381219608418, + "bbh": 0.34609423326328875, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.17495013297872342, + "hf_math_lvl5": 0.26586102719033233, + "hf_musr": 0.3089166666666667, + "hf_avg": 11.223740793316347 + }, + { + "hf_id": "4season/final_model_test_v2", + "name": "final_model_test_v2", + "params_b": 21.421, + "ifeval": 0.3191132860809319, + "bbh": 0.6342049783295018, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.3528091755319149, + "hf_math_lvl5": 0.08383685800604229, + "hf_musr": 0.4314479166666667, + "hf_avg": 23.086235043644056 + }, + { + "hf_id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", + "name": "FuseChat-Llama-3.1-8B-Instruct-preview", + "params_b": 8.03, + "ifeval": 0.7189579205397235, + "bbh": 0.5119887898349903, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3732546542553192, + "hf_math_lvl5": 0.24773413897280966, + "hf_musr": 0.38200000000000006, + "hf_avg": 28.56857495894073 + }, + { + "hf_id": "AALF/gemma-2-27b-it-SimPO-37K", + "name": "gemma-2-27b-it-SimPO-37K", + "params_b": 27.227, + "ifeval": 0.24065257959990605, + "bbh": 0.3911343917952534, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.1971409574468085, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3487604166666667, + "hf_avg": 9.512077380763676 + }, + { + "hf_id": "AALF/gemma-2-27b-it-SimPO-37K-100steps", + "name": "gemma-2-27b-it-SimPO-37K-100steps", + "params_b": 27.227, + "ifeval": 0.2567642743476199, + "bbh": 0.39308230769885016, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.21251662234042554, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.3329166666666667, + "hf_avg": 10.246803363324455 + }, + { + "hf_id": "AGI-0/Art-v0-3B", + "name": "Art-v0-3B", + "params_b": 3.086, + "ifeval": 0.319238509377341, + "bbh": 0.3400959483013824, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11785239361702128, + "hf_math_lvl5": 0.24622356495468278, + "hf_musr": 0.3768229166666666, + "hf_avg": 12.132145545921874 + }, + { + "hf_id": "AGI-0/Artificium-llama3.1-8B-001", + "name": "Artificium-llama3.1-8B-001", + "params_b": 8.03, + "ifeval": 0.5247687247614108, + "bbh": 0.42562150225923556, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.3181515957446808, + "hf_math_lvl5": 0.13595166163141995, + "hf_musr": 0.3794583333333333, + "hf_avg": 19.491817924739056 + }, + { + "hf_id": "AGI-0/smartllama3.1-8B-001", + "name": "smartllama3.1-8B-001", + "params_b": 8.03, + "ifeval": 0.35178659290682057, + "bbh": 0.46701787510868176, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.3486535904255319, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.43864583333333335, + "hf_avg": 20.424552077662117 + }, + { + "hf_id": "AI-MO/NuminaMath-7B-CoT", + "name": "NuminaMath-7B-CoT", + "params_b": 6.91, + "ifeval": 0.2688544173903022, + "bbh": 0.4314193495860012, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.28681848404255317, + "hf_math_lvl5": 0.26963746223564955, + "hf_musr": 0.33034375, + "hf_avg": 16.118457218023075 + }, + { + "hf_id": "AI-MO/NuminaMath-7B-TIR", + "name": "NuminaMath-7B-TIR", + "params_b": 6.91, + "ifeval": 0.27562423259174545, + "bbh": 0.41436913375897894, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.2732712765957447, + "hf_math_lvl5": 0.1608761329305136, + "hf_musr": 0.35092708333333333, + "hf_avg": 14.182289143173433 + }, + { + "hf_id": "AI-Sweden-Models/Llama-3-8B-instruct", + "name": "Llama-3-8B-instruct", + "params_b": 8.03, + "ifeval": 0.24012841482821137, + "bbh": 0.4173460154515302, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.25972406914893614, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.47709375000000004, + "hf_avg": 14.343669671742774, + "arena_elo": 1223.12, + "arena_rank": 236, + "arena_votes": 104636 + }, + { + "hf_id": "AI-Sweden-Models/gpt-sw3-40b", + "name": "gpt-sw3-40b", + "params_b": 39.927, + "ifeval": 0.1470298807164989, + "bbh": 0.3267744702957652, + "gpqa": 0.2348993288590604, + "mmlu_pro": 0.12757646276595744, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.36323958333333334, + "hf_avg": 4.872902485288683 + }, + { + "hf_id": "AI4free/Dhanishtha", + "name": "Dhanishtha", + "params_b": 1.777, + "ifeval": 0.2451240486353985, + "bbh": 0.34039444943326375, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.16431183510638298, + "hf_math_lvl5": 0.25604229607250756, + "hf_musr": 0.35694791666666664, + "hf_avg": 11.247711953182252 + }, + { + "hf_id": "Aashraf995/Creative-7B-nerd", + "name": "Creative-7B-nerd", + "params_b": 7.616, + "ifeval": 0.4721871301480073, + "bbh": 0.5606785565640195, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.44921875, + "hf_math_lvl5": 0.3164652567975831, + "hf_musr": 0.4515416666666667, + "hf_avg": 29.97819251596781 + }, + { + "hf_id": "Aashraf995/Gemma-Evo-10B", + "name": "Gemma-Evo-10B", + "params_b": 10.159, + "ifeval": 0.7332211864519476, + "bbh": 0.6044352897552882, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.4275265957446808, + "hf_math_lvl5": 0.22280966767371602, + "hf_musr": 0.45947916666666666, + "hf_avg": 34.32632733409121 + }, + { + "hf_id": "Aashraf995/Qwen-Evo-7B", + "name": "Qwen-Evo-7B", + "params_b": 7.616, + "ifeval": 0.4757343847657549, + "bbh": 0.5709361538590277, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.44622672872340424, + "hf_math_lvl5": 0.31419939577039274, + "hf_musr": 0.4541458333333333, + "hf_avg": 30.275058582706105 + }, + { + "hf_id": "Aashraf995/QwenStock-14B", + "name": "QwenStock-14B", + "params_b": 14.766, + "ifeval": 0.5008632650256873, + "bbh": 0.6550130348108012, + "gpqa": 0.38926174496644295, + "mmlu_pro": 0.5382313829787234, + "hf_math_lvl5": 0.35725075528700906, + "hf_musr": 0.4792604166666667, + "hf_avg": 37.13002133952593 + }, + { + "hf_id": "AbacusResearch/Jallabi-34B", + "name": "Jallabi-34B", + "params_b": 34.389, + "ifeval": 0.3528604103777976, + "bbh": 0.6023380603196266, + "gpqa": 0.3389261744966443, + "mmlu_pro": 0.4681682180851064, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.48217708333333337, + "hf_avg": 26.186081920716532 + }, + { + "hf_id": "Ahdoot/Test_StealthThinker", + "name": "Test_StealthThinker", + "params_b": 3.086, + "ifeval": 0.42200361706937595, + "bbh": 0.46466398134666304, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.35970744680851063, + "hf_math_lvl5": 0.17900302114803626, + "hf_musr": 0.42804166666666665, + "hf_avg": 22.06904758264274 + }, + { + "hf_id": "Alepach/notHumpback-M0", + "name": "notHumpback-M0", + "params_b": 3.213, + "ifeval": 0.23500755772461512, + "bbh": 0.27849287879199425, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.1118683510638298, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.35523958333333333, + "hf_avg": 5.137220097878128 + }, + { + "hf_id": "Alepach/notHumpback-M1", + "name": "notHumpback-M1", + "params_b": 3.213, + "ifeval": 0.2206944241279804, + "bbh": 0.28824720129981835, + "gpqa": 0.23741610738255034, + "mmlu_pro": 0.10912566489361702, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.342, + "hf_avg": 4.779297887614574 + }, + { + "hf_id": "Alsebay/Qwen2.5-7B-test-novelist", + "name": "Qwen2.5-7B-test-novelist", + "params_b": 7.616, + "ifeval": 0.5351600420218354, + "bbh": 0.515121518446605, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.3865525265957447, + "hf_math_lvl5": 0.2348942598187311, + "hf_musr": 0.47488541666666667, + "hf_avg": 27.172849099811746 + }, + { + "hf_id": "Amu/t1-1.5B", + "name": "t1-1.5B", + "params_b": 1.777, + "ifeval": 0.3393717558300864, + "bbh": 0.4007606984109216, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.2566489361702128, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.3517083333333333, + "hf_avg": 12.141383312461109 + }, + { + "hf_id": "Amu/t1-3B", + "name": "t1-3B", + "params_b": 3.397, + "ifeval": 0.33277703160946287, + "bbh": 0.39989750143834385, + "gpqa": 0.2407718120805369, + "mmlu_pro": 0.12840757978723405, + "hf_math_lvl5": 0.13746223564954682, + "hf_musr": 0.34348958333333335, + "hf_avg": 11.160895171582572 + }, + { + "hf_id": "ArliAI/ArliAI-RPMax-12B-v1.1", + "name": "ArliAI-RPMax-12B-v1.1", + "params_b": 12.248, + "ifeval": 0.5348852156721942, + "bbh": 0.475181760840119, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.3384308510638298, + "hf_math_lvl5": 0.11253776435045318, + "hf_musr": 0.36184375, + "hf_avg": 20.976339912086193 + }, + { + "hf_id": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", + "name": "Llama-3.1-8B-ArliAI-RPMax-v1.1", + "params_b": 8.03, + "ifeval": 0.6359016298975606, + "bbh": 0.5015613456039083, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.35513630319148937, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.3576875, + "hf_avg": 23.942143268323218 + }, + { + "hf_id": "Arthur-LAGACHERIE/Precis-1B-Instruct", + "name": "Precis-1B-Instruct", + "params_b": 1.236, + "ifeval": 0.3670738086056109, + "bbh": 0.3223614510687368, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.14261968085106383, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.34355208333333337, + "hf_avg": 8.848710962428209 + }, + { + "hf_id": "Artples/L-MChat-7b", + "name": "L-MChat-7b", + "params_b": 7.242, + "ifeval": 0.5296646231997766, + "bbh": 0.46003301674679414, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3298703457446808, + "hf_math_lvl5": 0.09214501510574018, + "hf_musr": 0.4028645833333333, + "hf_avg": 21.238493444242586 + }, + { + "hf_id": "Artples/L-MChat-Small", + "name": "L-MChat-Small", + "params_b": 2.78, + "ifeval": 0.32870561222002065, + "bbh": 0.48225627665257265, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.24642619680851063, + "hf_math_lvl5": 0.0377643504531722, + "hf_musr": 0.36959375, + "hf_avg": 15.23132798262929 + }, + { + "hf_id": "Aryanne/QwentileSwap", + "name": "QwentileSwap", + "params_b": 32.764, + "ifeval": 0.7378422585406721, + "bbh": 0.7008370136278447, + "gpqa": 0.3674496644295302, + "mmlu_pro": 0.5945811170212766, + "hf_math_lvl5": 0.42220543806646527, + "hf_musr": 0.4640416666666667, + "hf_avg": 43.91650770857709 + }, + { + "hf_id": "Aryanne/SuperHeart", + "name": "SuperHeart", + "params_b": 8.03, + "ifeval": 0.5192234382549413, + "bbh": 0.5215375046264326, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3912067819148936, + "hf_math_lvl5": 0.15634441087613293, + "hf_musr": 0.44357291666666665, + "hf_avg": 25.55719928277682 + }, + { + "hf_id": "AuraIndustries/Aura-4B", + "name": "Aura-4B", + "params_b": 4.513, + "ifeval": 0.38156203318306536, + "bbh": 0.4490409465001946, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.27061170212765956, + "hf_math_lvl5": 0.04229607250755287, + "hf_musr": 0.39384375, + "hf_avg": 16.063480369846303 + }, + { + "hf_id": "AuraIndustries/Aura-8B", + "name": "Aura-8B", + "params_b": 8.03, + "ifeval": 0.7205315230255722, + "bbh": 0.5131231419849063, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.38738364361702127, + "hf_math_lvl5": 0.15181268882175228, + "hf_musr": 0.4004479166666666, + "hf_avg": 27.363297533889394 + }, + { + "hf_id": "Azure99/Blossom-V6-14B", + "name": "Blossom-V6-14B", + "params_b": 14.77, + "ifeval": 0.6395486198841297, + "bbh": 0.5068726694646123, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.4543716755319149, + "hf_math_lvl5": 0.525679758308157, + "hf_musr": 0.40352083333333333, + "hf_avg": 32.80581546858162 + }, + { + "hf_id": "Azure99/Blossom-V6-7B", + "name": "Blossom-V6-7B", + "params_b": 7.616, + "ifeval": 0.5538194213575536, + "bbh": 0.49736683240887, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.41439494680851063, + "hf_math_lvl5": 0.45845921450151056, + "hf_musr": 0.43009375, + "hf_avg": 31.04565038906736 + }, + { + "hf_id": "Azure99/blossom-v5-32b", + "name": "blossom-v5-32b", + "params_b": 32.512, + "ifeval": 0.5235441960664371, + "bbh": 0.5954545257004673, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.4234541223404255, + "hf_math_lvl5": 0.1865558912386707, + "hf_musr": 0.40199999999999997, + "hf_avg": 27.724659940062114 + }, + { + "hf_id": "Azure99/blossom-v5-llama3-8b", + "name": "blossom-v5-llama3-8b", + "params_b": 8.03, + "ifeval": 0.434293230849701, + "bbh": 0.4184909197087261, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.2205784574468085, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.36702083333333335, + "hf_avg": 14.598962808998438 + }, + { + "hf_id": "Azure99/blossom-v5.1-34b", + "name": "blossom-v5.1-34b", + "params_b": 34.389, + "ifeval": 0.5696562897556262, + "bbh": 0.6109110096611161, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.4557845744680851, + "hf_math_lvl5": 0.2590634441087613, + "hf_musr": 0.39279166666666665, + "hf_avg": 30.298681690900178 + }, + { + "hf_id": "Azure99/blossom-v5.1-9b", + "name": "blossom-v5.1-9b", + "params_b": 8.829, + "ifeval": 0.5085816744016985, + "bbh": 0.5343292377916368, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.39793882978723405, + "hf_math_lvl5": 0.2122356495468278, + "hf_musr": 0.39939583333333334, + "hf_avg": 26.470194407631528 + }, + { + "hf_id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", + "name": "Infinity-Instruct-3M-0613-Llama3-70B", + "params_b": 70.554, + "ifeval": 0.6821134589555713, + "bbh": 0.6641614484348598, + "gpqa": 0.35822147651006714, + "mmlu_pro": 0.47298869680851063, + "hf_math_lvl5": 0.21525679758308158, + "hf_musr": 0.45226041666666666, + "hf_avg": 35.578242909223654 + }, + { + "hf_id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", + "name": "Infinity-Instruct-3M-0613-Mistral-7B", + "params_b": 7.242, + "ifeval": 0.5319873491225504, + "bbh": 0.49582333763258896, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.31607380319148937, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.4350833333333333, + "hf_avg": 22.29353000800769 + }, + { + "hf_id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", + "name": "Infinity-Instruct-3M-0625-Llama3-70B", + "params_b": 70.554, + "ifeval": 0.7442120240960651, + "bbh": 0.6670337872930245, + "gpqa": 0.3573825503355705, + "mmlu_pro": 0.4586103723404255, + "hf_math_lvl5": 0.22507552870090636, + "hf_musr": 0.46165625000000005, + "hf_avg": 36.91009196843762 + }, + { + "hf_id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", + "name": "Infinity-Instruct-3M-0625-Llama3-8B", + "params_b": 8.03, + "ifeval": 0.6050268842227512, + "bbh": 0.4954985723563075, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.3252160904255319, + "hf_math_lvl5": 0.08836858006042296, + "hf_musr": 0.37120833333333336, + "hf_avg": 22.062531958779175 + }, + { + "hf_id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", + "name": "Infinity-Instruct-3M-0625-Mistral-7B", + "params_b": 7.242, + "ifeval": 0.5867420666054957, + "bbh": 0.4939670574681802, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.3229720744680851, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.42723958333333334, + "hf_avg": 22.843425216048672 + }, + { + "hf_id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", + "name": "Infinity-Instruct-3M-0625-Qwen2-7B", + "params_b": 7.616, + "ifeval": 0.5553930238434022, + "bbh": 0.5345911997776569, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.39602726063829785, + "hf_math_lvl5": 0.19259818731117825, + "hf_musr": 0.38876041666666666, + "hf_avg": 26.199808011295485 + }, + { + "hf_id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", + "name": "Infinity-Instruct-3M-0625-Yi-1.5-9B", + "params_b": 8.829, + "ifeval": 0.5185984299436606, + "bbh": 0.5509115146247398, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.41181848404255317, + "hf_math_lvl5": 0.16389728096676737, + "hf_musr": 0.45753125, + "hf_avg": 28.14496041142372 + }, + { + "hf_id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", + "name": "Infinity-Instruct-7M-0729-Llama3_1-8B", + "params_b": 8.03, + "ifeval": 0.6131952109292234, + "bbh": 0.5077335431381055, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3223902925531915, + "hf_math_lvl5": 0.12764350453172205, + "hf_musr": 0.35784375, + "hf_avg": 23.447423677135685 + }, + { + "hf_id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", + "name": "Infinity-Instruct-7M-0729-mistral-7B", + "params_b": 7.242, + "ifeval": 0.6161928128476886, + "bbh": 0.4963813586525743, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3273769946808511, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.4061875, + "hf_avg": 23.21644934658775 + }, + { + "hf_id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", + "name": "Infinity-Instruct-7M-Gen-Llama3_1-70B", + "params_b": 70.554, + "ifeval": 0.7335458804859993, + "bbh": 0.6695200461367471, + "gpqa": 0.37583892617449666, + "mmlu_pro": 0.460688164893617, + "hf_math_lvl5": 0.25226586102719034, + "hf_musr": 0.45390625, + "hf_avg": 37.48445364047765 + }, + { + "hf_id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", + "name": "Infinity-Instruct-7M-Gen-Llama3_1-8B", + "params_b": 8.03, + "ifeval": 0.6131952109292234, + "bbh": 0.5077335431381055, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3223902925531915, + "hf_math_lvl5": 0.12764350453172205, + "hf_musr": 0.35784375, + "hf_avg": 23.447423677135685 + }, + { + "hf_id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", + "name": "Infinity-Instruct-7M-Gen-mistral-7B", + "params_b": 7.242, + "ifeval": 0.6146690780462506, + "bbh": 0.4963813586525743, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3273769946808511, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.4061875, + "hf_avg": 23.191053766563783 + }, + { + "hf_id": "BEE-spoke-data/Meta-Llama-3-8Bee", + "name": "Meta-Llama-3-8Bee", + "params_b": 8.03, + "ifeval": 0.19506575885317623, + "bbh": 0.46263641905752745, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.32197473404255317, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.36540625, + "hf_avg": 14.657811877680324 + }, + { + "hf_id": "BEE-spoke-data/smol_llama-101M-GQA", + "name": "smol_llama-101M-GQA", + "params_b": 0.101, + "ifeval": 0.13843712460715346, + "bbh": 0.3017560771912554, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.11070478723404255, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.3712708333333334, + "hf_avg": 4.019599868138451 + }, + { + "hf_id": "BEE-spoke-data/smol_llama-220M-GQA", + "name": "smol_llama-220M-GQA", + "params_b": 0.218, + "ifeval": 0.23860468002677343, + "bbh": 0.30316731388708956, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.1149434840425532, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.405875, + "hf_avg": 6.577800964187134 + }, + { + "hf_id": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", + "name": "smol_llama-220M-GQA-fineweb_edu", + "params_b": 0.218, + "ifeval": 0.19881248420856662, + "bbh": 0.29290517164510593, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11269946808510638, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.4367604166666667, + "hf_avg": 6.629850832257909 + }, + { + "hf_id": "BEE-spoke-data/smol_llama-220M-openhermes", + "name": "smol_llama-220M-openhermes", + "params_b": 0.218, + "ifeval": 0.1555229014570229, + "bbh": 0.30275191401927726, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.11203457446808511, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.3847291666666666, + "hf_avg": 4.938005238605682 + }, + { + "hf_id": "BEE-spoke-data/tFINE-900m-e16-d32-flan", + "name": "tFINE-900m-e16-d32-flan", + "params_b": 0.887, + "ifeval": 0.15057713533424646, + "bbh": 0.30280434847620613, + "gpqa": 0.2332214765100671, + "mmlu_pro": 0.1307347074468085, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.3724166666666667, + "hf_avg": 4.597533011984676 + }, + { + "hf_id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", + "name": "tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", + "params_b": 0.887, + "ifeval": 0.13206735905176042, + "bbh": 0.3137786304497592, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.12367021276595745, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.43927083333333333, + "hf_avg": 5.999886320692095 + }, + { + "hf_id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", + "name": "tFINE-900m-e16-d32-instruct_2e", + "params_b": 0.887, + "ifeval": 0.1402855534426433, + "bbh": 0.31345674638809023, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.12367021276595745, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.42069791666666667, + "hf_avg": 5.908138429401099 + }, + { + "hf_id": "BEE-spoke-data/tFINE-900m-instruct-orpo", + "name": "tFINE-900m-instruct-orpo", + "params_b": 0.887, + "ifeval": 0.13299157346950535, + "bbh": 0.30220933767045094, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11519281914893617, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.3408541666666667, + "hf_avg": 3.69630792495429 + }, + { + "hf_id": "BSC-LT/salamandra-7b", + "name": "salamandra-7b", + "params_b": 7.768, + "ifeval": 0.13673829882489574, + "bbh": 0.3516612209885983, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.14926861702127658, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.35009375000000004, + "hf_avg": 5.704911444345451 + }, + { + "hf_id": "BSC-LT/salamandra-7b-instruct", + "name": "salamandra-7b-instruct", + "params_b": 7.768, + "ifeval": 0.24507418095098782, + "bbh": 0.3851324290080956, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.18051861702127658, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.41343749999999996, + "hf_avg": 10.181243697003833 + }, + { + "hf_id": "Ba2han/Llama-Phi-3_DoRA", + "name": "Llama-Phi-3_DoRA", + "params_b": 3.821, + "ifeval": 0.5130531434371911, + "bbh": 0.5514558259029191, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.39153922872340424, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.40692708333333333, + "hf_avg": 25.469895001154402 + }, + { + "hf_id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", + "name": "LeTriomphant2.2_ECE_iLAB", + "params_b": 72.706, + "ifeval": 0.5076330802271307, + "bbh": 0.7256319952414622, + "gpqa": 0.39932885906040266, + "mmlu_pro": 0.5851063829787234, + "hf_math_lvl5": 0.44486404833836857, + "hf_musr": 0.46255208333333336, + "hf_avg": 41.30426754456703 + }, + { + "hf_id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", + "name": "Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", + "params_b": 28.309, + "ifeval": 0.3011531624977283, + "bbh": 0.4908666248538678, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.26803523936170215, + "hf_math_lvl5": 0.04154078549848943, + "hf_musr": 0.4079791666666666, + "hf_avg": 15.07109165755061 + }, + { + "hf_id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", + "name": "Llama-3.1-8B-OpenO1-SFT-v0.1", + "params_b": 8.03, + "ifeval": 0.5124037553690873, + "bbh": 0.4787448361604986, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.34915226063829785, + "hf_math_lvl5": 0.15256797583081572, + "hf_musr": 0.36181250000000004, + "hf_avg": 21.4004099666704 + }, + { + "hf_id": "BlackBeenie/Neos-Llama-3.1-8B", + "name": "Neos-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.49439376410147295, + "bbh": 0.4424998411442879, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.32621343085106386, + "hf_math_lvl5": 0.13217522658610273, + "hf_musr": 0.3749895833333334, + "hf_avg": 19.51217705998031 + }, + { + "hf_id": "BlackBeenie/Neos-Phi-3-14B-v0.1", + "name": "Neos-Phi-3-14B-v0.1", + "params_b": 13.96, + "ifeval": 0.4022449323350931, + "bbh": 0.6211931530444463, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.45636635638297873, + "hf_math_lvl5": 0.1782477341389728, + "hf_musr": 0.41254166666666664, + "hf_avg": 27.032306783654633 + }, + { + "hf_id": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", + "name": "llama-3.2-Korean-Bllossom-AICA-5B", + "params_b": 5.199, + "ifeval": 0.5172497861230424, + "bbh": 0.42930745041520607, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.27102726063829785, + "hf_math_lvl5": 0.12386706948640483, + "hf_musr": 0.3833958333333333, + "hf_avg": 19.012851991491477 + }, + { + "hf_id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "name": "NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "params_b": 8.03, + "ifeval": 0.7998909559967553, + "bbh": 0.5151987922850448, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.37333776595744683, + "hf_math_lvl5": 0.11933534743202417, + "hf_musr": 0.401875, + "hf_avg": 27.776634015071924 + }, + { + "hf_id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "name": "NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", + "params_b": 8.03, + "ifeval": 0.45902316963434797, + "bbh": 0.5185441912447182, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.3631150265957447, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.4082604166666666, + "hf_avg": 21.345510590269537 + }, + { + "hf_id": "BrainWave-ML/llama3.2-3B-maths-orpo", + "name": "llama3.2-3B-maths-orpo", + "params_b": 3, + "ifeval": 0.20490742341431845, + "bbh": 0.2911778102988436, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11677194148936171, + "hf_musr": 0.35753125, + "hf_avg": 5.07608283209792 + }, + { + "hf_id": "BramVanroy/GEITje-7B-ultra", + "name": "GEITje-7B-ultra", + "params_b": 7.242, + "ifeval": 0.3723442687624392, + "bbh": 0.37761612997305494, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.20113031914893617, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.32897916666666666, + "hf_avg": 11.022898819216541 + }, + { + "hf_id": "BramVanroy/fietje-2", + "name": "fietje-2", + "params_b": 2.78, + "ifeval": 0.20980332185268422, + "bbh": 0.40356695178386187, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.19855385638297873, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.3695625, + "hf_avg": 9.140300477810824 + }, + { + "hf_id": "BramVanroy/fietje-2-chat", + "name": "fietje-2-chat", + "params_b": 2.775, + "ifeval": 0.2917359273394593, + "bbh": 0.4149753717401999, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.20545212765957446, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.3527604166666667, + "hf_avg": 10.615455210257904 + }, + { + "hf_id": "BramVanroy/fietje-2-instruct", + "name": "fietje-2-instruct", + "params_b": 2.775, + "ifeval": 0.2789963962286732, + "bbh": 0.41360714173029806, + "gpqa": 0.2332214765100671, + "mmlu_pro": 0.2103557180851064, + "hf_math_lvl5": 0.022658610271903322, + "hf_musr": 0.3369166666666667, + "hf_avg": 10.48571837435655 + }, + { + "hf_id": "CYFRAGOVPL/Llama-PLLuM-8B-base", + "name": "Llama-PLLuM-8B-base", + "params_b": 8.03, + "ifeval": 0.28988749850396944, + "bbh": 0.43204480458140976, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.27568151595744683, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.39703125, + "hf_avg": 14.51887667865397 + }, + { + "hf_id": "CYFRAGOVPL/Llama-PLLuM-8B-chat", + "name": "Llama-PLLuM-8B-chat", + "params_b": 8.03, + "ifeval": 0.3514862786295917, + "bbh": 0.40770722535589576, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.27194148936170215, + "hf_math_lvl5": 0.033987915407854986, + "hf_musr": 0.41991666666666666, + "hf_avg": 14.61481701543444 + }, + { + "hf_id": "CYFRAGOVPL/PLLuM-12B-base", + "name": "PLLuM-12B-base", + "params_b": 12.248, + "ifeval": 0.2820937335159599, + "bbh": 0.4390596143784447, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.2740192819148936, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.4142395833333334, + "hf_avg": 14.667469455106152 + }, + { + "hf_id": "CYFRAGOVPL/PLLuM-12B-chat", + "name": "PLLuM-12B-chat", + "params_b": 12.248, + "ifeval": 0.32143601200370575, + "bbh": 0.44458000333075703, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.2872340425531915, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.4114791666666666, + "hf_avg": 15.348386882498241 + }, + { + "hf_id": "CYFRAGOVPL/PLLuM-12B-nc-base", + "name": "PLLuM-12B-nc-base", + "params_b": 12.248, + "ifeval": 0.24045310886226323, + "bbh": 0.42767589675970014, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.25590093085106386, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.36451041666666667, + "hf_avg": 11.421222164941392 + }, + { + "hf_id": "CYFRAGOVPL/PLLuM-12B-nc-chat", + "name": "PLLuM-12B-nc-chat", + "params_b": 12.248, + "ifeval": 0.28344237733657807, + "bbh": 0.45764328318815456, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.25972406914893614, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.4353541666666667, + "hf_avg": 14.598342751082507 + }, + { + "hf_id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", + "name": "Llama-3.2-Rabbit-Ko-3B-Instruct", + "params_b": 3.213, + "ifeval": 0.7198821349574684, + "bbh": 0.4426719080820793, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.2822473404255319, + "hf_math_lvl5": 0.2054380664652568, + "hf_musr": 0.3649166666666667, + "hf_avg": 23.50945092474288 + }, + { + "hf_id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", + "name": "Llama-3.2-Rabbit-Ko-3B-Instruct-2412", + "params_b": 3.213, + "ifeval": 0.47818233398493776, + "bbh": 0.43577246498246686, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.31341422872340424, + "hf_math_lvl5": 0.17598187311178248, + "hf_musr": 0.3872083333333334, + "hf_avg": 20.301755342918195 + }, + { + "hf_id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", + "name": "L3-Umbral-Mind-RP-v2.0-8B", + "params_b": 8.03, + "ifeval": 0.7122634609502786, + "bbh": 0.5262406145493724, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.3723404255319149, + "hf_math_lvl5": 0.1095166163141994, + "hf_musr": 0.3686666666666667, + "hf_avg": 25.899338809216697 + }, + { + "hf_id": "CausalLM/14B", + "name": "14B", + "params_b": 14, + "ifeval": 0.2788213052478535, + "bbh": 0.4700462397700626, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3221409574468085, + "hf_math_lvl5": 0.0755287009063444, + "hf_musr": 0.4154791666666667, + "hf_avg": 17.235580154596317 + }, + { + "hf_id": "CausalLM/34b-beta", + "name": "34b-beta", + "params_b": 34.389, + "ifeval": 0.3043247472262486, + "bbh": 0.5590996102136266, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.5324966755319149, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.37486458333333333, + "hf_avg": 23.29783304542188 + }, + { + "hf_id": "CausalLM/preview-1-hf", + "name": "preview-1-hf", + "params_b": 9.543, + "ifeval": 0.5558928088582737, + "bbh": 0.3614567463880903, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.35970744680851063, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.34218750000000003, + "hf_avg": 16.706753161023286 + }, + { + "hf_id": "Changgil/K2S3-14b-v0.2", + "name": "K2S3-14b-v0.2", + "params_b": 14.352, + "ifeval": 0.3242840108689389, + "bbh": 0.4613311786298187, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.2643783244680851, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.3922604166666666, + "hf_avg": 15.275784761251337 + }, + { + "hf_id": "Changgil/K2S3-v0.1", + "name": "K2S3-v0.1", + "params_b": 14.352, + "ifeval": 0.32765617450586665, + "bbh": 0.46554920672286154, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.2562333776595745, + "hf_math_lvl5": 0.04607250755287009, + "hf_musr": 0.40140624999999996, + "hf_avg": 14.839283995827836 + }, + { + "hf_id": "ClaudioItaly/Albacus", + "name": "Albacus", + "params_b": 8.987, + "ifeval": 0.4667415790103592, + "bbh": 0.5113043406568835, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.31648936170212766, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.41353124999999996, + "hf_avg": 20.505574062384014 + }, + { + "hf_id": "ClaudioItaly/Book-Gut12B", + "name": "Book-Gut12B", + "params_b": 12.248, + "ifeval": 0.39984685080032095, + "bbh": 0.5417370194443233, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3670212765957447, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.4635416666666667, + "hf_avg": 23.39409811229197 + }, + { + "hf_id": "ClaudioItaly/Evolutionstory-7B-v2.2", + "name": "Evolutionstory-7B-v2.2", + "params_b": 7.242, + "ifeval": 0.4813794066410457, + "bbh": 0.5108043406568835, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.31590757978723405, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.41353124999999996, + "hf_avg": 20.810835332574907 + }, + { + "hf_id": "ClaudioItaly/intelligence-cod-rag-7b-v3", + "name": "intelligence-cod-rag-7b-v3", + "params_b": 7.616, + "ifeval": 0.6897820006471718, + "bbh": 0.5366339718839108, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.4195478723404255, + "hf_math_lvl5": 0.3806646525679758, + "hf_musr": 0.4152708333333333, + "hf_avg": 31.836966345244434 + }, + { + "hf_id": "CohereForAI/aya-23-35B", + "name": "aya-23-35B", + "params_b": 34.981, + "ifeval": 0.6461932117891638, + "bbh": 0.5399551450731271, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.33560505319148937, + "hf_math_lvl5": 0.03474320241691843, + "hf_musr": 0.4309895833333333, + "hf_avg": 24.755408446939658 + }, + { + "hf_id": "CohereForAI/aya-23-8B", + "name": "aya-23-8B", + "params_b": 8.028, + "ifeval": 0.4698887839820565, + "bbh": 0.4296161519220307, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.2278091755319149, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.3940625, + "hf_avg": 16.010983148168723 + }, + { + "hf_id": "CohereForAI/aya-expanse-32b", + "name": "aya-expanse-32b", + "params_b": 32.296, + "ifeval": 0.7301737168490716, + "bbh": 0.5648670099212114, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.41298204787234044, + "hf_math_lvl5": 0.15332326283987915, + "hf_musr": 0.3872708333333333, + "hf_avg": 29.718510126577055 + }, + { + "hf_id": "CohereForAI/aya-expanse-8b", + "name": "aya-expanse-8b", + "params_b": 8.028, + "ifeval": 0.6358517622131501, + "bbh": 0.4977203055736406, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3003656914893617, + "hf_math_lvl5": 0.08610271903323263, + "hf_musr": 0.37288541666666664, + "hf_avg": 22.406573697993498 + }, + { + "hf_id": "CohereForAI/c4ai-command-r-plus", + "name": "c4ai-command-r-plus", + "params_b": 103.811, + "ifeval": 0.7664186580495308, + "bbh": 0.581542357407793, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3991855053191489, + "hf_math_lvl5": 0.08006042296072508, + "hf_musr": 0.48071875000000003, + "hf_avg": 30.936070612618508 + }, + { + "hf_id": "CohereForAI/c4ai-command-r-plus-08-2024", + "name": "c4ai-command-r-plus-08-2024", + "params_b": 103.811, + "ifeval": 0.7539539532883859, + "bbh": 0.5995999913027185, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.44207114361702127, + "hf_math_lvl5": 0.12386706948640483, + "hf_musr": 0.48294791666666664, + "hf_avg": 33.647474595578004 + }, + { + "hf_id": "CohereForAI/c4ai-command-r-v01", + "name": "c4ai-command-r-v01", + "params_b": 34.981, + "ifeval": 0.6748194789824333, + "bbh": 0.5406415512767856, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3369348404255319, + "hf_math_lvl5": 0.03474320241691843, + "hf_musr": 0.45169791666666664, + "hf_avg": 25.929031834951832 + }, + { + "hf_id": "CohereForAI/c4ai-command-r7b-12-2024", + "name": "c4ai-command-r7b-12-2024", + "params_b": 8.028, + "ifeval": 0.7713145564878965, + "bbh": 0.5502642151855635, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.3572140957446808, + "hf_math_lvl5": 0.2990936555891239, + "hf_musr": 0.41251041666666666, + "hf_avg": 31.61752928799648 + }, + { + "hf_id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", + "name": "Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", + "params_b": 14.77, + "ifeval": 0.8239958864701216, + "bbh": 0.6370093752306357, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.4979222074468085, + "hf_math_lvl5": 0.5317220543806647, + "hf_musr": 0.42603125000000003, + "hf_avg": 41.765081012881176 + }, + { + "hf_id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", + "name": "Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", + "params_b": 7.616, + "ifeval": 0.7564019025075688, + "bbh": 0.5402085849577634, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.4341755319148936, + "hf_math_lvl5": 0.493202416918429, + "hf_musr": 0.40330208333333334, + "hf_avg": 35.36673047341537 + }, + { + "hf_id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", + "name": "YiSM-blossom5.1-34B-SLERP", + "params_b": 34.389, + "ifeval": 0.5033112142448702, + "bbh": 0.6207548093635428, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.4740691489361702, + "hf_math_lvl5": 0.21525679758308158, + "hf_musr": 0.44134375, + "hf_avg": 31.379930205236757 + }, + { + "hf_id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", + "name": "huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", + "params_b": 32.764, + "ifeval": 0.8206237228331937, + "bbh": 0.692924708291253, + "gpqa": 0.3389261744966443, + "mmlu_pro": 0.5720578457446809, + "hf_math_lvl5": 0.5944108761329305, + "hf_musr": 0.42072916666666665, + "hf_avg": 45.6578470977455 + }, + { + "hf_id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", + "name": "huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", + "params_b": 14.77, + "ifeval": 0.8175762532303177, + "bbh": 0.6335891556421077, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.4910239361702128, + "hf_math_lvl5": 0.547583081570997, + "hf_musr": 0.42603125000000003, + "hf_avg": 41.46621085774212 + }, + { + "hf_id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", + "name": "zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", + "params_b": 32.764, + "ifeval": 0.8328136012446974, + "bbh": 0.6955174427138592, + "gpqa": 0.3674496644295302, + "mmlu_pro": 0.5684840425531915, + "hf_math_lvl5": 0.5853474320241692, + "hf_musr": 0.43139583333333337, + "hf_avg": 46.76362120574333 + }, + { + "hf_id": "ContactDoctor/Bio-Medical-3B-CoT-012025", + "name": "Bio-Medical-3B-CoT-012025", + "params_b": 3.085, + "ifeval": 0.360379349016166, + "bbh": 0.438315337642466, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.2933843085106383, + "hf_math_lvl5": 0.2212990936555891, + "hf_musr": 0.3367604166666667, + "hf_avg": 18.73071055946323 + }, + { + "hf_id": "ContactDoctor/Bio-Medical-Llama-3-8B", + "name": "Bio-Medical-Llama-3-8B", + "params_b": 4.015, + "ifeval": 0.4422365988909427, + "bbh": 0.486311802622738, + "gpqa": 0.3338926174496644, + "mmlu_pro": 0.36477726063829785, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.35139583333333335, + "hf_avg": 19.91745266981443 + }, + { + "hf_id": "CoolSpring/Qwen2-0.5B-Abyme", + "name": "Qwen2-0.5B-Abyme", + "params_b": 0.494, + "ifeval": 0.19151850423542865, + "bbh": 0.2861834296481826, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.13331117021276595, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.35421875, + "hf_avg": 4.999994251217356 + }, + { + "hf_id": "CoolSpring/Qwen2-0.5B-Abyme-merge2", + "name": "Qwen2-0.5B-Abyme-merge2", + "params_b": 0.63, + "ifeval": 0.2021846478454944, + "bbh": 0.29942723009138733, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.14893617021276595, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.3687291666666667, + "hf_avg": 6.320258162859158 + }, + { + "hf_id": "CoolSpring/Qwen2-0.5B-Abyme-merge3", + "name": "Qwen2-0.5B-Abyme-merge3", + "params_b": 0.63, + "ifeval": 0.23860468002677343, + "bbh": 0.30031404525933675, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.15001662234042554, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.35009375000000004, + "hf_avg": 6.8201960830112 + }, + { + "hf_id": "Corianas/Neural-Mistral-7B", + "name": "Neural-Mistral-7B", + "params_b": 7.242, + "ifeval": 0.5489235229191878, + "bbh": 0.4428023404192858, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.27376994680851063, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.3872708333333333, + "hf_avg": 18.200438567718393 + }, + { + "hf_id": "Corianas/Quokka_2.7b", + "name": "Quokka_2.7b", + "params_b": 2.786, + "ifeval": 0.17490702447284318, + "bbh": 0.3055474937424842, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.11452792553191489, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.3908333333333333, + "hf_avg": 4.995249580966088 + }, + { + "hf_id": "Corianas/llama-3-reactor", + "name": "llama-3-reactor", + "params_b": -1, + "ifeval": 0.23001192391742797, + "bbh": 0.4457148560545015, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.2800864361702128, + "hf_math_lvl5": 0.04682779456193353, + "hf_musr": 0.39771874999999995, + "hf_avg": 13.995469727045661 + }, + { + "hf_id": "CortexLM/btlm-7b-base-v0.2", + "name": "btlm-7b-base-v0.2", + "params_b": 6.885, + "ifeval": 0.14832865685270635, + "bbh": 0.4006411985841813, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.2349567819148936, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.38460416666666664, + "hf_avg": 8.920254930681883 + }, + { + "hf_id": "Cran-May/T.E-8.1", + "name": "T.E-8.1", + "params_b": 7.616, + "ifeval": 0.7076922565459647, + "bbh": 0.5581754708123893, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.4432347074468085, + "hf_math_lvl5": 0.44561933534743203, + "hf_musr": 0.4505208333333333, + "hf_avg": 35.699515240521414 + }, + { + "hf_id": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", + "name": "Llama-3.1-8B-R1-v0.1", + "params_b": 8.03, + "ifeval": 0.323485019747603, + "bbh": 0.3057485865545513, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.12516622340425532, + "hf_math_lvl5": 0.18126888217522658, + "hf_musr": 0.36215624999999996, + "hf_avg": 10.034841772545372 + }, + { + "hf_id": "CultriX/Qwen2.5-14B-HyperMarck-dl", + "name": "Qwen2.5-14B-HyperMarck-dl", + "params_b": 14.766, + "ifeval": 0.6650276821057017, + "bbh": 0.6096480033153927, + "gpqa": 0.3674496644295302, + "mmlu_pro": 0.5090591755319149, + "hf_math_lvl5": 0.5287009063444109, + "hf_musr": 0.4415625, + "hf_avg": 39.89416756441815 + }, + { + "hf_id": "CultriX/Qwen2.5-14B-ReasoningMerge", + "name": "Qwen2.5-14B-ReasoningMerge", + "params_b": 14.766, + "ifeval": 0.46054690443578594, + "bbh": 0.6578226399295218, + "gpqa": 0.4077181208053691, + "mmlu_pro": 0.5344913563829787, + "hf_math_lvl5": 0.520392749244713, + "hf_musr": 0.5165937500000001, + "hf_avg": 40.645885900911715 + }, + { + "hf_id": "CultriX/Qwen2.5-14B-Ultimav2", + "name": "Qwen2.5-14B-Ultimav2", + "params_b": 14.766, + "ifeval": 0.5500228283177524, + "bbh": 0.6555027486976712, + "gpqa": 0.3850671140939597, + "mmlu_pro": 0.5417220744680851, + "hf_math_lvl5": 0.38444108761329304, + "hf_musr": 0.4965625, + "hf_avg": 38.835600416037856 + }, + { + "hf_id": "CultriX/Qwen2.5-14B-Wernicke", + "name": "Qwen2.5-14B-Wernicke", + "params_b": 14.77, + "ifeval": 0.5234699486252034, + "bbh": 0.6568359662501574, + "gpqa": 0.3934563758389262, + "mmlu_pro": 0.5423869680851063, + "hf_math_lvl5": 0.3814199395770393, + "hf_musr": 0.46890625, + "hf_avg": 37.94335063991926 + }, + { + "hf_id": "CultriX/Qwen2.5-14B-Wernicke-SFT", + "name": "Qwen2.5-14B-Wernicke-SFT", + "params_b": 14.77, + "ifeval": 0.4937443760333692, + "bbh": 0.6460586236565512, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.5069813829787234, + "hf_math_lvl5": 0.3595166163141994, + "hf_musr": 0.38999999999999996, + "hf_avg": 33.549511791224056 + }, + { + "hf_id": "CultriX/SeQwence-14B", + "name": "SeQwence-14B", + "params_b": 14.766, + "ifeval": 0.5351600420218354, + "bbh": 0.6505665291288972, + "gpqa": 0.36073825503355705, + "mmlu_pro": 0.5418882978723404, + "hf_math_lvl5": 0.35347432024169184, + "hf_musr": 0.46661458333333333, + "hf_avg": 36.886272868734814 + }, + { + "hf_id": "CultriX/SeQwence-14B-EvolMerge", + "name": "SeQwence-14B-EvolMerge", + "params_b": 14.766, + "ifeval": 0.5381576439403006, + "bbh": 0.6572183434723883, + "gpqa": 0.3808724832214765, + "mmlu_pro": 0.5418882978723404, + "hf_math_lvl5": 0.36706948640483383, + "hf_musr": 0.48208333333333336, + "hf_avg": 38.018640895247245 + }, + { + "hf_id": "CultriX/SeQwence-14Bv1", + "name": "SeQwence-14Bv1", + "params_b": 14.766, + "ifeval": 0.6678003253589365, + "bbh": 0.6344673727103446, + "gpqa": 0.3615771812080537, + "mmlu_pro": 0.531998005319149, + "hf_math_lvl5": 0.3610271903323263, + "hf_musr": 0.47042708333333333, + "hf_avg": 38.625628460880606 + }, + { + "hf_id": "DRXD1000/Atlas-7B", + "name": "Atlas-7B", + "params_b": 7.768, + "ifeval": 0.3704459722425387, + "bbh": 0.3302176697760134, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.14012632978723405, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.33425, + "hf_avg": 8.78657746420255 + }, + { + "hf_id": "DRXD1000/Phoenix-7B", + "name": "Phoenix-7B", + "params_b": 7.242, + "ifeval": 0.3209617149164218, + "bbh": 0.3931566034728218, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.23429188829787234, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.38494791666666667, + "hf_avg": 12.420154178714037 + }, + { + "hf_id": "Daemontatox/AetherDrake-SFT", + "name": "AetherDrake-SFT", + "params_b": 8.03, + "ifeval": 0.4812796712722244, + "bbh": 0.48720075507220245, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.34990026595744683, + "hf_math_lvl5": 0.1510574018126888, + "hf_musr": 0.40884375, + "hf_avg": 22.917960736440758 + }, + { + "hf_id": "Daemontatox/AetherSett", + "name": "AetherSett", + "params_b": 7.616, + "ifeval": 0.5369586031729146, + "bbh": 0.5451624435465484, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.4278590425531915, + "hf_math_lvl5": 0.3972809667673716, + "hf_musr": 0.46031249999999996, + "hf_avg": 31.420122512312307 + }, + { + "hf_id": "Daemontatox/AetherTOT", + "name": "AetherTOT", + "params_b": 10.67, + "ifeval": 0.4397642699149368, + "bbh": 0.5066056342472064, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.38040226063829785, + "hf_math_lvl5": 0.1487915407854985, + "hf_musr": 0.4078541666666667, + "hf_avg": 23.178825097337906 + }, + { + "hf_id": "Daemontatox/AetherTOT", + "name": "AetherTOT", + "params_b": 10.67, + "ifeval": 0.43829040279790954, + "bbh": 0.5034307630533988, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.37782579787234044, + "hf_math_lvl5": 0.14425981873111782, + "hf_musr": 0.40518750000000003, + "hf_avg": 22.874708418571885 + }, + { + "hf_id": "Daemontatox/CogitoZ", + "name": "CogitoZ", + "params_b": 32.764, + "ifeval": 0.3967240255854466, + "bbh": 0.6734487392645502, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5592586436170213, + "hf_math_lvl5": 0.5241691842900302, + "hf_musr": 0.4792604166666667, + "hf_avg": 39.383291042826805 + }, + { + "hf_id": "Daemontatox/DocumentCogito", + "name": "DocumentCogito", + "params_b": 10.67, + "ifeval": 0.5064340394597445, + "bbh": 0.5111563719111275, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.38023603723404253, + "hf_math_lvl5": 0.16314199395770393, + "hf_musr": 0.3973125, + "hf_avg": 24.220439046588428 + }, + { + "hf_id": "Daemontatox/DocumentCogito", + "name": "DocumentCogito", + "params_b": 10.67, + "ifeval": 0.7770349339751859, + "bbh": 0.5186726621665779, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.3737533244680851, + "hf_math_lvl5": 0.21978851963746224, + "hf_musr": 0.39105208333333336, + "hf_avg": 29.10815605844729 + }, + { + "hf_id": "Daemontatox/Llama3.3-70B-CogniLink", + "name": "Llama3.3-70B-CogniLink", + "params_b": 70.554, + "ifeval": 0.6931042965996888, + "bbh": 0.666832775829349, + "gpqa": 0.44546979865771813, + "mmlu_pro": 0.5172872340425532, + "hf_math_lvl5": 0.41389728096676737, + "hf_musr": 0.4876979166666667, + "hf_avg": 42.77471354959223 + }, + { + "hf_id": "Daemontatox/MawaredT1", + "name": "MawaredT1", + "params_b": 7.616, + "ifeval": 0.41988036188424493, + "bbh": 0.5214815439293661, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.4718251329787234, + "hf_math_lvl5": 0.3021148036253776, + "hf_musr": 0.47020833333333334, + "hf_avg": 29.231298221572775 + }, + { + "hf_id": "Daemontatox/PathFinderAI2.0", + "name": "PathFinderAI2.0", + "params_b": 32.764, + "ifeval": 0.45410178326839457, + "bbh": 0.665823006477417, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.5546875, + "hf_math_lvl5": 0.5075528700906344, + "hf_musr": 0.4215625, + "hf_avg": 36.25665231066442 + }, + { + "hf_id": "Daemontatox/PathFinderAi3.0", + "name": "PathFinderAi3.0", + "params_b": 32.764, + "ifeval": 0.42709898624538445, + "bbh": 0.6884221416328996, + "gpqa": 0.4085570469798658, + "mmlu_pro": 0.5757147606382979, + "hf_math_lvl5": 0.5045317220543807, + "hf_musr": 0.4806875, + "hf_avg": 40.45869427281298 + }, + { + "hf_id": "Daemontatox/PathfinderAI", + "name": "PathfinderAI", + "params_b": 32.764, + "ifeval": 0.37451739163198094, + "bbh": 0.6667854331232542, + "gpqa": 0.39429530201342283, + "mmlu_pro": 0.559341755319149, + "hf_math_lvl5": 0.47583081570996977, + "hf_musr": 0.48583333333333334, + "hf_avg": 38.13131352504058 + }, + { + "hf_id": "Daemontatox/PathfinderAI", + "name": "PathfinderAI", + "params_b": 32.764, + "ifeval": 0.4855006937148987, + "bbh": 0.6627335380624046, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.554188829787234, + "hf_math_lvl5": 0.48413897280966767, + "hf_musr": 0.42559375, + "hf_avg": 36.54876805856116 + }, + { + "hf_id": "Daemontatox/PixelParse_AI", + "name": "PixelParse_AI", + "params_b": 10.67, + "ifeval": 0.43829040279790954, + "bbh": 0.5034307630533988, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.37782579787234044, + "hf_math_lvl5": 0.1472809667673716, + "hf_musr": 0.40518750000000003, + "hf_avg": 22.92506088584278 + }, + { + "hf_id": "Daemontatox/RA_Reasoner", + "name": "RA_Reasoner", + "params_b": 10.306, + "ifeval": 0.559215104810791, + "bbh": 0.6053692417205033, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.43001994680851063, + "hf_math_lvl5": 0.2122356495468278, + "hf_musr": 0.3963541666666666, + "hf_avg": 29.208002590797037 + }, + { + "hf_id": "Daemontatox/RA_Reasoner2.0", + "name": "RA_Reasoner2.0", + "params_b": 10.306, + "ifeval": 0.5366339091388627, + "bbh": 0.6062469551969276, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.4353390957446808, + "hf_math_lvl5": 0.2311178247734139, + "hf_musr": 0.3883541666666667, + "hf_avg": 29.039667218867848 + }, + { + "hf_id": "Daemontatox/SphinX", + "name": "SphinX", + "params_b": 7.616, + "ifeval": 0.5725042886208593, + "bbh": 0.5440583486084486, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.43658577127659576, + "hf_math_lvl5": 0.3081570996978852, + "hf_musr": 0.44049999999999995, + "hf_avg": 29.87478015820759 + }, + { + "hf_id": "Daemontatox/Sphinx2.0", + "name": "Sphinx2.0", + "params_b": 14.77, + "ifeval": 0.7123133286346892, + "bbh": 0.647283976671531, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.5183676861702128, + "hf_math_lvl5": 0.40181268882175225, + "hf_musr": 0.42603125000000003, + "hf_avg": 37.69418546772203 + }, + { + "hf_id": "Daemontatox/Zirel-7B-Math", + "name": "Zirel-7B-Math", + "params_b": 7.616, + "ifeval": 0.6638785090227264, + "bbh": 0.5447698777469486, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.4237034574468085, + "hf_math_lvl5": 0.19788519637462235, + "hf_musr": 0.47891666666666666, + "hf_avg": 30.976625000031117 + }, + { + "hf_id": "Daemontatox/Zirel_1.5", + "name": "Zirel_1.5", + "params_b": 1.544, + "ifeval": 0.4167575366693706, + "bbh": 0.3984669254999634, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.21434507978723405, + "hf_math_lvl5": 0.11329305135951662, + "hf_musr": 0.36581250000000004, + "hf_avg": 14.243506396749902 + }, + { + "hf_id": "Daemontatox/mini-Cogito-R1", + "name": "mini-Cogito-R1", + "params_b": 1.777, + "ifeval": 0.2298368329366082, + "bbh": 0.3280491875175077, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.14818816489361702, + "hf_math_lvl5": 0.27492447129909364, + "hf_musr": 0.34469791666666666, + "hf_avg": 11.629717650498435 + }, + { + "hf_id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", + "name": "Llama-3.1-8B-Ultra-Instruct", + "params_b": 8.03, + "ifeval": 0.8081091503876381, + "bbh": 0.5257532452246574, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.382563164893617, + "hf_math_lvl5": 0.22054380664652568, + "hf_musr": 0.40032291666666664, + "hf_avg": 30.159277021889153 + }, + { + "hf_id": "Danielbrdz/Barcenas-10b", + "name": "Barcenas-10b", + "params_b": 10.306, + "ifeval": 0.6607811717354397, + "bbh": 0.6120828494270083, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.4360871010638298, + "hf_math_lvl5": 0.21525679758308158, + "hf_musr": 0.41346875, + "hf_avg": 31.870970597687858 + }, + { + "hf_id": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", + "name": "Barcenas-14b-Phi-3-medium-ORPO", + "params_b": 13.96, + "ifeval": 0.4799055395240185, + "bbh": 0.6536184886648629, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.47232380319148937, + "hf_math_lvl5": 0.20241691842900303, + "hf_musr": 0.48075, + "hf_avg": 31.88950498581046 + }, + { + "hf_id": "Danielbrdz/Barcenas-14b-phi-4", + "name": "Barcenas-14b-phi-4", + "params_b": 14.66, + "ifeval": 0.0497590836757581, + "bbh": 0.6769303819643072, + "gpqa": 0.38338926174496646, + "mmlu_pro": 0.5174534574468085, + "hf_math_lvl5": 0.2583081570996979, + "hf_musr": 0.5096770833333334, + "hf_avg": 28.746056207730017 + }, + { + "hf_id": "Danielbrdz/Barcenas-14b-phi-4-v2", + "name": "Barcenas-14b-phi-4-v2", + "params_b": 14.66, + "ifeval": 0.27747266142723526, + "bbh": 0.6573002324945257, + "gpqa": 0.3783557046979866, + "mmlu_pro": 0.5243517287234043, + "hf_math_lvl5": 0.3217522658610272, + "hf_musr": 0.43994791666666666, + "hf_avg": 31.447866315089907 + }, + { + "hf_id": "Danielbrdz/Barcenas-3b-GRPO", + "name": "Barcenas-3b-GRPO", + "params_b": 3.213, + "ifeval": 0.5444276741268723, + "bbh": 0.44143515175110304, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3036901595744681, + "hf_math_lvl5": 0.13746223564954682, + "hf_musr": 0.35759375, + "hf_avg": 20.5654768779582 + }, + { + "hf_id": "Danielbrdz/Barcenas-Llama3-8b-ORPO", + "name": "Barcenas-Llama3-8b-ORPO", + "params_b": 8.03, + "ifeval": 0.737242738156979, + "bbh": 0.49865578559911244, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3829787234042553, + "hf_math_lvl5": 0.06570996978851963, + "hf_musr": 0.4189583333333333, + "hf_avg": 26.51900505359198 + }, + { + "hf_id": "Danielbrdz/Barcenas-R1-Qwen-1.5b", + "name": "Barcenas-R1-Qwen-1.5b", + "params_b": 1.777, + "ifeval": 0.24280132271262472, + "bbh": 0.35872011187392944, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.19090757978723405, + "hf_math_lvl5": 0.3496978851963746, + "hf_musr": 0.354125, + "hf_avg": 15.138858570183295 + }, + { + "hf_id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", + "name": "12b-mn-dans-reasoning-test-2", + "params_b": 12.248, + "ifeval": 0.3710953603106424, + "bbh": 0.48070333147041405, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.2507480053191489, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.37021875, + "hf_avg": 15.564777531713789 + }, + { + "hf_id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", + "name": "12b-mn-dans-reasoning-test-3", + "params_b": 12.248, + "ifeval": 0.5052593784491815, + "bbh": 0.48388753289945696, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.2515791223404255, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.4167604166666667, + "hf_avg": 19.13127211265294 + }, + { + "hf_id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", + "name": "Dans-Instruct-CoreCurriculum-12b-ChatML", + "params_b": 12.248, + "ifeval": 0.21110209798889168, + "bbh": 0.4791864789096407, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.2805019946808511, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.3606354166666667, + "hf_avg": 13.542857676063695 + }, + { + "hf_id": "Dans-DiscountModels/mistral-7b-test-merged", + "name": "mistral-7b-test-merged", + "params_b": 7, + "ifeval": 0.6678003253589365, + "bbh": 0.48981661658184755, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.29778922872340424, + "hf_math_lvl5": 0.0445619335347432, + "hf_musr": 0.3753958333333333, + "hf_avg": 22.073339331094914 + }, + { + "hf_id": "DavieLion/Llama-3.2-1B-SPIN-iter0", + "name": "Llama-3.2-1B-SPIN-iter0", + "params_b": 1.236, + "ifeval": 0.15067687070306784, + "bbh": 0.29300816789978756, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.11253324468085106, + "hf_musr": 0.3565416666666667, + "hf_avg": 3.6238167467938993 + }, + { + "hf_id": "DavieLion/Llama-3.2-1B-SPIN-iter0", + "name": "Llama-3.2-1B-SPIN-iter0", + "params_b": 1.236, + "ifeval": 0.15492338107332987, + "bbh": 0.29372614029730437, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.11278257978723404, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.3564791666666667, + "hf_avg": 3.985688359677067 + }, + { + "hf_id": "DavieLion/Llama-3.2-1B-SPIN-iter1", + "name": "Llama-3.2-1B-SPIN-iter1", + "params_b": 1.236, + "ifeval": 0.15754642127333254, + "bbh": 0.29402546232087917, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.11178523936170212, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.3646041666666667, + "hf_avg": 3.7519748574168585 + }, + { + "hf_id": "DavieLion/Llama-3.2-1B-SPIN-iter2", + "name": "Llama-3.2-1B-SPIN-iter2", + "params_b": 1.236, + "ifeval": 0.13761264555822994, + "bbh": 0.2980340303779312, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.11286569148936171, + "hf_math_lvl5": 0.005287009063444108, + "hf_musr": 0.35530208333333335, + "hf_avg": 3.658145708347315 + }, + { + "hf_id": "DavieLion/Llama-3.2-1B-SPIN-iter3", + "name": "Llama-3.2-1B-SPIN-iter3", + "params_b": 1.236, + "ifeval": 0.1335910938531984, + "bbh": 0.29752276438021447, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.11278257978723404, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.34996875, + "hf_avg": 3.593140598286666 + }, + { + "hf_id": "DavieLion/Llama-3.2-1B-SPIN-iter3", + "name": "Llama-3.2-1B-SPIN-iter3", + "params_b": 1.236, + "ifeval": 0.1323920530858123, + "bbh": 0.29722352809482616, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.11286569148936171, + "hf_musr": 0.3526666666666667, + "hf_avg": 3.6136899611836006 + }, + { + "hf_id": "DavieLion/Lllma-3.2-1B", + "name": "Lllma-3.2-1B", + "params_b": 1.236, + "ifeval": 0.1601439735457475, + "bbh": 0.2964692268500723, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.11261635638297872, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.35781250000000003, + "hf_avg": 3.932331961262434 + }, + { + "hf_id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", + "name": "Llama-3.1-Argunaut-1-8B-SFT", + "params_b": 8.03, + "ifeval": 0.551921124837653, + "bbh": 0.48238301936695316, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.3472406914893617, + "hf_math_lvl5": 0.14501510574018128, + "hf_musr": 0.4503020833333333, + "hf_avg": 24.113556306971883 + }, + { + "hf_id": "Deci/DeciLM-7B", + "name": "DeciLM-7B", + "params_b": 7.044, + "ifeval": 0.28129474239462404, + "bbh": 0.44228566674266495, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.26919880319148937, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.43585416666666665, + "hf_avg": 15.023477940437223 + }, + { + "hf_id": "Deci/DeciLM-7B-instruct", + "name": "DeciLM-7B-instruct", + "params_b": 7.044, + "ifeval": 0.4880239985460799, + "bbh": 0.4589748654047652, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.26080452127659576, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.38841666666666663, + "hf_avg": 17.470092220993035 + }, + { + "hf_id": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", + "name": "Explore_Llama-3.1-8B-Inst", + "params_b": 8.03, + "ifeval": 0.7794828831943688, + "bbh": 0.511742159482904, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.379155585106383, + "hf_math_lvl5": 0.20090634441087613, + "hf_musr": 0.3909583333333333, + "hf_avg": 28.926700693431915 + }, + { + "hf_id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", + "name": "Explore_Llama-3.2-1B-Inst", + "params_b": 1.236, + "ifeval": 0.5648856146136695, + "bbh": 0.35048085637770016, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.18085106382978725, + "hf_math_lvl5": 0.07477341389728097, + "hf_musr": 0.31834375, + "hf_avg": 13.897376870733368 + }, + { + "hf_id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", + "name": "Explore_Llama-3.2-1B-Inst_v1", + "params_b": 1.236, + "ifeval": 0.4998891829235318, + "bbh": 0.3141475230443668, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.12691156914893617, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.37809374999999995, + "hf_avg": 10.921433609301348 + }, + { + "hf_id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", + "name": "Explore_Llama-3.2-1B-Inst_v1.1", + "params_b": 1.236, + "ifeval": 0.5844193406827218, + "bbh": 0.3512662445055541, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.18184840425531915, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.3117083333333333, + "hf_avg": 14.311829325717667 + }, + { + "hf_id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", + "name": "ldm_soup_Llama-3.1-8B-Inst", + "params_b": 8.03, + "ifeval": 0.803263119633683, + "bbh": 0.512116784464076, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.38863031914893614, + "hf_math_lvl5": 0.18882175226586104, + "hf_musr": 0.41613541666666665, + "hf_avg": 29.85905810866196 + }, + { + "hf_id": "DeepMount00/Lexora-Medium-7B", + "name": "Lexora-Medium-7B", + "params_b": 7.616, + "ifeval": 0.4103379034295669, + "bbh": 0.5144844494250328, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.43251329787234044, + "hf_math_lvl5": 0.22205438066465258, + "hf_musr": 0.44394791666666666, + "hf_avg": 25.83719802694387 + }, + { + "hf_id": "DeepMount00/Llama-3-8b-Ita", + "name": "Llama-3-8b-Ita", + "params_b": 8.03, + "ifeval": 0.7530297388706411, + "bbh": 0.493576505761469, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.38522273936170215, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.4267708333333333, + "hf_avg": 26.7968164089867 + }, + { + "hf_id": "DeepMount00/Llama-3.1-Distilled", + "name": "Llama-3.1-Distilled", + "params_b": 8.03, + "ifeval": 0.7843787816327346, + "bbh": 0.5100875314179011, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3781582446808511, + "hf_math_lvl5": 0.20317220543806647, + "hf_musr": 0.40581249999999996, + "hf_avg": 29.631398468956544 + }, + { + "hf_id": "DeepMount00/Qwen2-1.5B-Ita", + "name": "Qwen2-1.5B-Ita", + "params_b": 1.544, + "ifeval": 0.5173495214918638, + "bbh": 0.39805765159128703, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.2771775265957447, + "hf_math_lvl5": 0.11404833836858005, + "hf_musr": 0.35037500000000005, + "hf_avg": 16.83176132950784 + }, + { + "hf_id": "DeepMount00/Qwen2-1.5B-Ita_v2", + "name": "Qwen2-1.5B-Ita_v2", + "params_b": 1.544, + "ifeval": 0.49998891829235315, + "bbh": 0.3953827803974795, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.30319148936170215, + "hf_math_lvl5": 0.09667673716012085, + "hf_musr": 0.37018749999999995, + "hf_avg": 17.07000871129109 + }, + { + "hf_id": "DeepMount00/Qwen2-1.5B-Ita_v3", + "name": "Qwen2-1.5B-Ita_v3", + "params_b": 1.544, + "ifeval": 0.4890479483326463, + "bbh": 0.3948478837209111, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.3017785904255319, + "hf_math_lvl5": 0.1042296072507553, + "hf_musr": 0.37415624999999997, + "hf_avg": 16.948513223691304 + }, + { + "hf_id": "DeepMount00/Qwen2-1.5B-Ita_v5", + "name": "Qwen2-1.5B-Ita_v5", + "params_b": 1.544, + "ifeval": 0.4987400098405564, + "bbh": 0.40320443289745417, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.29429853723404253, + "hf_math_lvl5": 0.11782477341389729, + "hf_musr": 0.34225, + "hf_avg": 17.023240512569227 + }, + { + "hf_id": "Delta-Vector/Henbane-7b-attempt2", + "name": "Henbane-7b-attempt2", + "params_b": 7, + "ifeval": 0.4157335868828043, + "bbh": 0.5061177974093075, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.4027593085106383, + "hf_math_lvl5": 0.22734138972809667, + "hf_musr": 0.39734375000000005, + "hf_avg": 23.813949980028713 + }, + { + "hf_id": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", + "name": "DevQuasar-R1-Uncensored-Llama-8B", + "params_b": 8.03, + "ifeval": 0.38488432913558246, + "bbh": 0.5117943836412089, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.3614527925531915, + "hf_math_lvl5": 0.33081570996978854, + "hf_musr": 0.44357291666666665, + "hf_avg": 26.432648640760963 + }, + { + "hf_id": "DoppelReflEx/L3-8B-R1-WolfCore", + "name": "L3-8B-R1-WolfCore", + "params_b": 8.03, + "ifeval": 0.3775404814780339, + "bbh": 0.531794652653343, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.3716755319148936, + "hf_math_lvl5": 0.16314199395770393, + "hf_musr": 0.42766666666666664, + "hf_avg": 23.481232873269377 + }, + { + "hf_id": "DoppelReflEx/MN-12B-FoxFrame2-test", + "name": "MN-12B-FoxFrame2-test", + "params_b": 12.248, + "ifeval": 0.43189514931492884, + "bbh": 0.5484795753806021, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.3568816489361702, + "hf_math_lvl5": 0.1404833836858006, + "hf_musr": 0.4251875, + "hf_avg": 23.639728527998752 + }, + { + "hf_id": "DoppelReflEx/MN-12B-FoxFrame3-test", + "name": "MN-12B-FoxFrame3-test", + "params_b": 12.248, + "ifeval": 0.43231957871780213, + "bbh": 0.5394764281718397, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.35289228723404253, + "hf_math_lvl5": 0.13217522658610273, + "hf_musr": 0.45976041666666667, + "hf_avg": 23.94718793396009 + }, + { + "hf_id": "DoppelReflEx/MN-12B-Kakigori", + "name": "MN-12B-Kakigori", + "params_b": 12.248, + "ifeval": 0.359329911302012, + "bbh": 0.5415529337961275, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.3581283244680851, + "hf_math_lvl5": 0.11933534743202417, + "hf_musr": 0.40521875, + "hf_avg": 21.69773317205639 + }, + { + "hf_id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", + "name": "MN-12B-LilithFrame-Experiment-4", + "params_b": 12.248, + "ifeval": 0.3981480250180632, + "bbh": 0.5534370722864824, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.3648603723404255, + "hf_math_lvl5": 0.12235649546827794, + "hf_musr": 0.43706249999999996, + "hf_avg": 23.52862068050267 + }, + { + "hf_id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", + "name": "MN-12B-Mimicore-GreenSnake", + "params_b": 12.248, + "ifeval": 0.47800724300411795, + "bbh": 0.5480509710089697, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.3651097074468085, + "hf_math_lvl5": 0.13897280966767372, + "hf_musr": 0.4305833333333333, + "hf_avg": 25.0150134402288 + }, + { + "hf_id": "DoppelReflEx/MN-12B-Mimicore-Nocturne", + "name": "MN-12B-Mimicore-Nocturne", + "params_b": 12.248, + "ifeval": 0.3956502081144696, + "bbh": 0.5703329773483826, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.36336436170212766, + "hf_math_lvl5": 0.10574018126888217, + "hf_musr": 0.45690625, + "hf_avg": 24.06630184974314 + }, + { + "hf_id": "DoppelReflEx/MN-12B-Mimicore-Orochi", + "name": "MN-12B-Mimicore-Orochi", + "params_b": 12.248, + "ifeval": 0.4620451513096362, + "bbh": 0.54977394640115, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.34466422872340424, + "hf_math_lvl5": 0.13595166163141995, + "hf_musr": 0.45458333333333334, + "hf_avg": 24.652222849504522 + }, + { + "hf_id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", + "name": "MN-12B-Mimicore-WhiteSnake", + "params_b": 12.248, + "ifeval": 0.44376033369238066, + "bbh": 0.5604605871844869, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.3657746010638298, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.456875, + "hf_avg": 25.058560008551854 + }, + { + "hf_id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", + "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", + "params_b": 12.248, + "ifeval": 0.42405151664250856, + "bbh": 0.5184748714407336, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3341921542553192, + "hf_math_lvl5": 0.11404833836858005, + "hf_musr": 0.40019791666666665, + "hf_avg": 21.794341198490546 + }, + { + "hf_id": "DoppelReflEx/MN-12B-WolFrame", + "name": "MN-12B-WolFrame", + "params_b": 12.248, + "ifeval": 0.4397387819873491, + "bbh": 0.511681287565329, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.33934507978723405, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.40146875, + "hf_avg": 22.078719684057372 + }, + { + "hf_id": "DoppelReflEx/MiniusLight-24B", + "name": "MiniusLight-24B", + "params_b": 23.572, + "ifeval": 0.25766410900854175, + "bbh": 0.6256461050033514, + "gpqa": 0.35822147651006714, + "mmlu_pro": 0.5091422872340425, + "hf_math_lvl5": 0.12613293051359517, + "hf_musr": 0.43191666666666667, + "hf_avg": 26.210340083042343 + }, + { + "hf_id": "DoppelReflEx/MiniusLight-24B-v1c-test", + "name": "MiniusLight-24B-v1c-test", + "params_b": 23.572, + "ifeval": 0.37858881102142317, + "bbh": 0.6752681657268389, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5487034574468085, + "hf_math_lvl5": 0.29682779456193353, + "hf_musr": 0.46341666666666664, + "hf_avg": 34.40831757695809 + }, + { + "hf_id": "DoppelReflEx/MiniusLight-24B-v1d-test", + "name": "MiniusLight-24B-v1d-test", + "params_b": 23.572, + "ifeval": 0.40324339419407174, + "bbh": 0.6712025325276962, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5488696808510638, + "hf_math_lvl5": 0.2945619335347432, + "hf_musr": 0.46208333333333335, + "hf_avg": 34.681949426054764 + }, + { + "hf_id": "DreadPoor/Alita99-8B-LINEAR", + "name": "Alita99-8B-LINEAR", + "params_b": 8.03, + "ifeval": 0.7190077882241341, + "bbh": 0.5441767095577089, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.38090093085106386, + "hf_math_lvl5": 0.1646525679758308, + "hf_musr": 0.42664583333333334, + "hf_avg": 29.392264436250034 + }, + { + "hf_id": "DreadPoor/Aspire-8B-model_stock", + "name": "Aspire-8B-model_stock", + "params_b": 8.03, + "ifeval": 0.7140620221013578, + "bbh": 0.5278251846388996, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.37632978723404253, + "hf_math_lvl5": 0.14954682779456194, + "hf_musr": 0.42124999999999996, + "hf_avg": 28.611281691142512 + }, + { + "hf_id": "DreadPoor/Aurora_faustus-8B-LINEAR", + "name": "Aurora_faustus-8B-LINEAR", + "params_b": 8.03, + "ifeval": 0.7281003293483512, + "bbh": 0.5515538279425277, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3842253989361702, + "hf_math_lvl5": 0.17069486404833836, + "hf_musr": 0.4145833333333333, + "hf_avg": 29.619908170064097 + }, + { + "hf_id": "DreadPoor/BaeZel-8B-LINEAR", + "name": "BaeZel-8B-LINEAR", + "params_b": 8.03, + "ifeval": 0.7377923908562614, + "bbh": 0.5463800554321383, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3861369680851064, + "hf_math_lvl5": 0.18126888217522658, + "hf_musr": 0.4227083333333333, + "hf_avg": 30.34681191186813 + }, + { + "hf_id": "DreadPoor/Casuar-9B-Model_Stock", + "name": "Casuar-9B-Model_Stock", + "params_b": 9.242, + "ifeval": 0.7764852812759035, + "bbh": 0.6106681877306871, + "gpqa": 0.3447986577181208, + "mmlu_pro": 0.4156416223404255, + "hf_math_lvl5": 0.21299093655589124, + "hf_musr": 0.41654166666666664, + "hf_avg": 33.631173922201846 + }, + { + "hf_id": "DreadPoor/Derivative-8B-Model_Stock", + "name": "Derivative-8B-Model_Stock", + "params_b": 8.03, + "ifeval": 0.7667433520835827, + "bbh": 0.5395493987763994, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.3810671542553192, + "hf_math_lvl5": 0.17900302114803626, + "hf_musr": 0.42004166666666665, + "hf_avg": 30.10114939087056 + }, + { + "hf_id": "DreadPoor/Heart_Stolen-8B-Model_Stock", + "name": "Heart_Stolen-8B-Model_Stock", + "params_b": 8.03, + "ifeval": 0.7244533393617822, + "bbh": 0.5395443745186658, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.37940492021276595, + "hf_math_lvl5": 0.17220543806646527, + "hf_musr": 0.41622916666666665, + "hf_avg": 29.411035925290932 + }, + { + "hf_id": "DreadPoor/LemonP-8B-Model_Stock", + "name": "LemonP-8B-Model_Stock", + "params_b": 8.03, + "ifeval": 0.7676176988169169, + "bbh": 0.5439348074265458, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.40043218085106386, + "hf_math_lvl5": 0.17673716012084592, + "hf_musr": 0.40810416666666666, + "hf_avg": 30.052497237216315 + }, + { + "hf_id": "DreadPoor/ONeil-model_stock-8B", + "name": "ONeil-model_stock-8B", + "params_b": 8.03, + "ifeval": 0.6785662043378236, + "bbh": 0.5548337982400763, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.35987367021276595, + "hf_math_lvl5": 0.10120845921450151, + "hf_musr": 0.41734374999999996, + "hf_avg": 26.935908369361425 + }, + { + "hf_id": "DreadPoor/Trinas_Nectar-8B-model_stock", + "name": "Trinas_Nectar-8B-model_stock", + "params_b": 8.03, + "ifeval": 0.7259272064788096, + "bbh": 0.5256123853406084, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.36178523936170215, + "hf_math_lvl5": 0.15256797583081572, + "hf_musr": 0.4067708333333333, + "hf_avg": 27.522454161375844 + }, + { + "hf_id": "DreadPoor/Zelus-8B-Model_Stock", + "name": "Zelus-8B-Model_Stock", + "params_b": 8.03, + "ifeval": 0.778833495126265, + "bbh": 0.5307011398651839, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.38414228723404253, + "hf_math_lvl5": 0.1646525679758308, + "hf_musr": 0.42140625, + "hf_avg": 29.7413869053032 + }, + { + "hf_id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", + "name": "ILAB-Merging-3B-V2", + "params_b": 3.821, + "ifeval": 0.40289432040319684, + "bbh": 0.5401935891431586, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.38605385638297873, + "hf_math_lvl5": 0.15181268882175228, + "hf_musr": 0.43321875, + "hf_avg": 24.06556562824206 + }, + { + "hf_id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", + "name": "EVA-Qwen2.5-14B-v0.2", + "params_b": 14.77, + "ifeval": 0.4038429145777648, + "bbh": 0.6090237540046592, + "gpqa": 0.39429530201342283, + "mmlu_pro": 0.5135472074468085, + "hf_math_lvl5": 0.3406344410876133, + "hf_musr": 0.4794479166666667, + "hf_avg": 33.81260628765727 + }, + { + "hf_id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", + "name": "EVA-Qwen2.5-72B-v0.2", + "params_b": 72.706, + "ifeval": 0.6878837041272712, + "bbh": 0.7088012228048761, + "gpqa": 0.4085570469798658, + "mmlu_pro": 0.581283244680851, + "hf_math_lvl5": 0.4312688821752266, + "hf_musr": 0.47197916666666667, + "hf_avg": 44.221596025457025 + }, + { + "hf_id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", + "name": "meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", + "params_b": 8.03, + "ifeval": 0.7147114101694614, + "bbh": 0.4979908369885237, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.36361369680851063, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.33415625, + "hf_avg": 23.448866811862917 + }, + { + "hf_id": "EleutherAI/gpt-j-6b", + "name": "gpt-j-6b", + "params_b": 6, + "ifeval": 0.2522185578708937, + "bbh": 0.3191044431037278, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.12408577127659574, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.36575, + "hf_avg": 6.570411768928537 + }, + { + "hf_id": "EleutherAI/gpt-neo-1.3B", + "name": "gpt-neo-1.3B", + "params_b": 1.366, + "ifeval": 0.20790502533278366, + "bbh": 0.30392315869356407, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.1163563829787234, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.38165625, + "hf_avg": 5.391090848825532 + }, + { + "hf_id": "EleutherAI/gpt-neo-125m", + "name": "gpt-neo-125m", + "params_b": 0.15, + "ifeval": 0.19054442213327305, + "bbh": 0.3115156885791523, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.10255984042553191, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.3593333333333333, + "hf_avg": 4.407321907614049 + }, + { + "hf_id": "EleutherAI/gpt-neo-2.7B", + "name": "gpt-neo-2.7B", + "params_b": 2.718, + "ifeval": 0.2589628851447493, + "bbh": 0.3139516033315253, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.11627327127659574, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.3553645833333334, + "hf_avg": 6.4310478009874465 + }, + { + "hf_id": "EleutherAI/gpt-neox-20b", + "name": "gpt-neox-20b", + "params_b": 20.739, + "ifeval": 0.2586880587951081, + "bbh": 0.31650380320877564, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.1155252659574468, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.36466666666666664, + "hf_avg": 6.1165221524743325 + }, + { + "hf_id": "EleutherAI/pythia-1.4b", + "name": "pythia-1.4b", + "params_b": 1.515, + "ifeval": 0.23708094522533543, + "bbh": 0.315042649740714, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.11228390957446809, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.35378125, + "hf_avg": 6.008531439497028 + }, + { + "hf_id": "EleutherAI/pythia-12b", + "name": "pythia-12b", + "params_b": 12, + "ifeval": 0.24714756845170813, + "bbh": 0.3179653957935337, + "gpqa": 0.24664429530201343, + "mmlu_pro": 0.11087101063829788, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.3646979166666667, + "hf_avg": 6.059841492942702 + }, + { + "hf_id": "EleutherAI/pythia-160m", + "name": "pythia-160m", + "params_b": 0.213, + "ifeval": 0.18155161637787737, + "bbh": 0.2970437484241321, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11195146276595745, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.4179375, + "hf_avg": 5.730394616916023 + }, + { + "hf_id": "EleutherAI/pythia-1b", + "name": "pythia-1b", + "params_b": 1.079, + "ifeval": 0.2207941594968018, + "bbh": 0.3004093017564394, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.11361369680851063, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.35520833333333335, + "hf_avg": 5.07026822083096 + }, + { + "hf_id": "EleutherAI/pythia-2.8b", + "name": "pythia-2.8b", + "params_b": 2.909, + "ifeval": 0.21732226049105263, + "bbh": 0.3224085936276087, + "gpqa": 0.25, + "mmlu_pro": 0.11369680851063829, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.3485729166666667, + "hf_avg": 5.554946281603011 + }, + { + "hf_id": "EleutherAI/pythia-410m", + "name": "pythia-410m", + "params_b": 0.506, + "ifeval": 0.21954525104500505, + "bbh": 0.302813387064426, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11278257978723404, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.35781250000000003, + "hf_avg": 5.227072311484412 + }, + { + "hf_id": "EleutherAI/pythia-6.9b", + "name": "pythia-6.9b", + "params_b": 6.9, + "ifeval": 0.22811362739752744, + "bbh": 0.3232287869322383, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.1146941489361702, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3590520833333333, + "hf_avg": 5.966546774741993 + }, + { + "hf_id": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", + "name": "EnnoAi-Pro-French-Llama-3-8B-v0.4", + "params_b": 8.031, + "ifeval": 0.4188807918545016, + "bbh": 0.4074954889367559, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.2634640957446808, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.41700000000000004, + "hf_avg": 15.68446940398294 + }, + { + "hf_id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", + "name": "EnnoAi-Pro-Llama-3-8B", + "params_b": 8.031, + "ifeval": 0.31953771548380516, + "bbh": 0.4151575806137866, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.21509308510638298, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.4070520833333333, + "hf_avg": 12.51454591188142 + }, + { + "hf_id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", + "name": "EnnoAi-Pro-Llama-3-8B-v0.3", + "params_b": 8.03, + "ifeval": 0.5082569803676467, + "bbh": 0.4100577461090639, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.2990359042553192, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.42357291666666663, + "hf_avg": 18.128287498482546 + }, + { + "hf_id": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", + "name": "EnnoAi-Pro-Llama-3.1-8B-v0.9", + "params_b": 8.03, + "ifeval": 0.4689147018799009, + "bbh": 0.41602720836190127, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.2595578457446808, + "hf_math_lvl5": 0.0377643504531722, + "hf_musr": 0.3831770833333333, + "hf_avg": 15.575099921155847 + }, + { + "hf_id": "EnnoAi/EnnoAi-7B-French-Instruct-202502", + "name": "EnnoAi-7B-French-Instruct-202502", + "params_b": 7.456, + "ifeval": 0.5564424615575562, + "bbh": 0.5574545199388612, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.4013464095744681, + "hf_math_lvl5": 0.3723564954682779, + "hf_musr": 0.45997916666666666, + "hf_avg": 31.28743755248046 + }, + { + "hf_id": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", + "name": "EnnoAi-Pro-Llama-3.1-8B-v1.0", + "params_b": 8.03, + "ifeval": 0.4704384366813389, + "bbh": 0.41602720836190127, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.2595578457446808, + "hf_math_lvl5": 0.0377643504531722, + "hf_musr": 0.3831770833333333, + "hf_avg": 15.600495501179813 + }, + { + "hf_id": "Epiculous/Azure_Dusk-v0.2", + "name": "Azure_Dusk-v0.2", + "params_b": 12.248, + "ifeval": 0.346715603487635, + "bbh": 0.4119721873553597, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.3034408244680851, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.3834583333333333, + "hf_avg": 14.239648971826766 + }, + { + "hf_id": "Epiculous/Crimson_Dawn-v0.2", + "name": "Crimson_Dawn-v0.2", + "params_b": 12.248, + "ifeval": 0.3103454389907667, + "bbh": 0.44823796489645434, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.27210771276595747, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.4151770833333333, + "hf_avg": 15.085950749805088 + }, + { + "hf_id": "Epiculous/NovaSpark", + "name": "NovaSpark", + "params_b": 8.03, + "ifeval": 0.6408473960203371, + "bbh": 0.5063958663768304, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3648603723404255, + "hf_math_lvl5": 0.15181268882175228, + "hf_musr": 0.3881979166666667, + "hf_avg": 25.253737990368 + }, + { + "hf_id": "Epiculous/Violet_Twilight-v0.2", + "name": "Violet_Twilight-v0.2", + "params_b": 12.248, + "ifeval": 0.45317756885064964, + "bbh": 0.4614552476845888, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.3110871010638298, + "hf_math_lvl5": 0.02870090634441088, + "hf_musr": 0.42993750000000003, + "hf_avg": 18.55277348742638 + }, + { + "hf_id": "EpistemeAI/Alpaca-Llama3.1-8B", + "name": "Alpaca-Llama3.1-8B", + "params_b": 8, + "ifeval": 0.15986914719610634, + "bbh": 0.47552608539742874, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3246343085106383, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.3402604166666667, + "hf_avg": 13.985046352420847 + }, + { + "hf_id": "EpistemeAI/Athena-gemma-2-2b-it", + "name": "Athena-gemma-2-2b-it", + "params_b": 2, + "ifeval": 0.3134172883504657, + "bbh": 0.42642293591146, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.2421875, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.43505208333333334, + "hf_avg": 14.54609172912029 + }, + { + "hf_id": "EpistemeAI/Athena-gemma-2-2b-it-Philos", + "name": "Athena-gemma-2-2b-it-Philos", + "params_b": 2, + "ifeval": 0.4620950189940469, + "bbh": 0.37947768790586744, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.22481715425531915, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.43136458333333333, + "hf_avg": 15.663946300399497 + }, + { + "hf_id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", + "name": "Athene-codegemma-2-7b-it-alpaca-v1.3", + "params_b": 7, + "ifeval": 0.40299405577201824, + "bbh": 0.4331916189482215, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.25872672872340424, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.4503020833333333, + "hf_avg": 17.314021588433324 + }, + { + "hf_id": "EpistemeAI/DeepPhi-3.5-mini-instruct", + "name": "DeepPhi-3.5-mini-instruct", + "params_b": 3.821, + "ifeval": 0.1325915238234551, + "bbh": 0.28822860667627487, + "gpqa": 0.2332214765100671, + "mmlu_pro": 0.11028922872340426, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.36562500000000003, + "hf_avg": 3.46432918593773 + }, + { + "hf_id": "EpistemeAI/DeepThinkers-Phi4", + "name": "DeepThinkers-Phi4", + "params_b": 14.66, + "ifeval": 0.6939786433330231, + "bbh": 0.6790415739665393, + "gpqa": 0.34060402684563756, + "mmlu_pro": 0.5257646276595744, + "hf_math_lvl5": 0.45845921450151056, + "hf_musr": 0.3980625, + "hf_avg": 39.40710857221479 + }, + { + "hf_id": "EpistemeAI/Fireball-12B", + "name": "Fireball-12B", + "params_b": 12.248, + "ifeval": 0.1833501775289565, + "bbh": 0.5110893652548262, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.3343583776595745, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.42363541666666665, + "hf_avg": 15.534531353358348 + }, + { + "hf_id": "EpistemeAI/Fireball-12B-v1.13a-philosophers", + "name": "Fireball-12B-v1.13a-philosophers", + "params_b": 12, + "ifeval": 0.08755324760524298, + "bbh": 0.5102697700597862, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3366855053191489, + "hf_math_lvl5": 0.04607250755287009, + "hf_musr": 0.4080729166666666, + "hf_avg": 14.466040850876615 + }, + { + "hf_id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", + "name": "Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", + "params_b": 8, + "ifeval": 0.4577243934981405, + "bbh": 0.4838398624677178, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.35829454787234044, + "hf_math_lvl5": 0.12311178247734139, + "hf_musr": 0.39445833333333336, + "hf_avg": 21.129914482629534 + }, + { + "hf_id": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", + "name": "Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", + "params_b": 8, + "ifeval": 0.7274010735958367, + "bbh": 0.48648902139668476, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.3543051861702128, + "hf_math_lvl5": 0.15256797583081572, + "hf_musr": 0.3619375, + "hf_avg": 25.242227896928764 + }, + { + "hf_id": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", + "name": "Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", + "params_b": 8, + "ifeval": 0.46731561146646455, + "bbh": 0.4932027479020209, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3351894946808511, + "hf_math_lvl5": 0.12386706948640483, + "hf_musr": 0.46236458333333336, + "hf_avg": 22.550476627240737 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", + "params_b": 8, + "ifeval": 0.44318630123627534, + "bbh": 0.4823644760491404, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.3515625, + "hf_math_lvl5": 0.13293051359516617, + "hf_musr": 0.4066458333333333, + "hf_avg": 21.567361907712467 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", + "params_b": 8, + "ifeval": 0.4457339858242796, + "bbh": 0.48973199216860547, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3543051861702128, + "hf_math_lvl5": 0.12084592145015106, + "hf_musr": 0.37622916666666667, + "hf_avg": 20.62716790652169 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", + "params_b": 8, + "ifeval": 0.5975334335119704, + "bbh": 0.4904191122627008, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.34225398936170215, + "hf_math_lvl5": 0.1336858006042296, + "hf_musr": 0.40103125, + "hf_avg": 23.934713958118284 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", + "params_b": 8, + "ifeval": 0.669099101495144, + "bbh": 0.4668070143164938, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.33892952127659576, + "hf_math_lvl5": 0.1336858006042296, + "hf_musr": 0.34178125, + "hf_avg": 23.14416546279052 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "params_b": 8.03, + "ifeval": 0.7304984108831234, + "bbh": 0.46492466713692354, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.34798869680851063, + "hf_math_lvl5": 0.13972809667673716, + "hf_musr": 0.32088541666666665, + "hf_avg": 23.74994070118673 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", + "params_b": 8.03, + "ifeval": 0.7207066140063919, + "bbh": 0.4610092915501656, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.3353557180851064, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.3432395833333333, + "hf_avg": 23.627287074905905 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", + "params_b": 8, + "ifeval": 0.4578241288669619, + "bbh": 0.4760520079608936, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.3470744680851064, + "hf_math_lvl5": 0.13821752265861026, + "hf_musr": 0.3881354166666667, + "hf_avg": 20.857427405175983 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", + "params_b": 8, + "ifeval": 0.7204816553411615, + "bbh": 0.4817795525811035, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.35480385638297873, + "hf_math_lvl5": 0.14350453172205438, + "hf_musr": 0.33, + "hf_avg": 23.874258540372427 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Math", + "params_b": 8.03, + "ifeval": 0.46229559790245434, + "bbh": 0.49829504320793055, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.33311170212765956, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.3640729166666667, + "hf_avg": 20.557928990030614 + }, + { + "hf_id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", + "name": "Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", + "params_b": 8, + "ifeval": 0.46109655713506825, + "bbh": 0.48010141537970213, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.35206117021276595, + "hf_math_lvl5": 0.12537764350453173, + "hf_musr": 0.3998229166666667, + "hf_avg": 21.293561415203758 + }, + { + "hf_id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", + "name": "Fireball-Mistral-Nemo-Base-2407-v1-DPO2", + "params_b": 12.248, + "ifeval": 0.18607295309778055, + "bbh": 0.49677687590350894, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.33527260638297873, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.4040104166666667, + "hf_avg": 15.339340300308171 + }, + { + "hf_id": "EpistemeAI/Fireball-R1-Llama-3.1-8B", + "name": "Fireball-R1-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.4427363839058143, + "bbh": 0.36434977901496834, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.11153590425531915, + "hf_math_lvl5": 0.311178247734139, + "hf_musr": 0.32879166666666665, + "hf_avg": 14.729862999540122 + }, + { + "hf_id": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", + "name": "Fireball-R1-Llama-3.1-8B-Medical-COT", + "params_b": 8.03, + "ifeval": 0.3216111029845255, + "bbh": 0.37162741490176326, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.1402094414893617, + "hf_math_lvl5": 0.3270392749244713, + "hf_musr": 0.31136458333333333, + "hf_avg": 14.486213316641551 + }, + { + "hf_id": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", + "name": "Fireball-R1.1-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.3676234613048932, + "bbh": 0.33260007841271594, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.11153590425531915, + "hf_math_lvl5": 0.13821752265861026, + "hf_musr": 0.3419375, + "hf_avg": 10.130881566678475 + }, + { + "hf_id": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", + "name": "Llama-3.2-3B-Agent007-Coder", + "params_b": 3, + "ifeval": 0.5399562050913798, + "bbh": 0.4303758760727905, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.28515625, + "hf_math_lvl5": 0.11102719033232629, + "hf_musr": 0.36680208333333336, + "hf_avg": 18.91456180890705 + }, + { + "hf_id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", + "name": "Mistral-Nemo-Instruct-12B-Philosophy-Math", + "params_b": 12.248, + "ifeval": 0.06946790072563022, + "bbh": 0.5364928342081372, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.32962101063829785, + "hf_math_lvl5": 0.09592145015105741, + "hf_musr": 0.42921875, + "hf_avg": 16.60399677564458 + }, + { + "hf_id": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", + "name": "OpenReasoner-Llama-3.2-3B-rs1.0", + "params_b": 3.213, + "ifeval": 0.7274010735958367, + "bbh": 0.45185934849403964, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.31341422872340424, + "hf_math_lvl5": 0.13444108761329304, + "hf_musr": 0.3460625, + "hf_avg": 22.939578822051278 + }, + { + "hf_id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", + "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", + "params_b": 8, + "ifeval": 0.7100903380807368, + "bbh": 0.46279874531423665, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.33111702127659576, + "hf_math_lvl5": 0.13972809667673716, + "hf_musr": 0.3194895833333333, + "hf_avg": 23.321646038971995 + }, + { + "hf_id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", + "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", + "params_b": 8.03, + "ifeval": 0.712213593265868, + "bbh": 0.45659361690861294, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.33502327127659576, + "hf_math_lvl5": 0.12462235649546828, + "hf_musr": 0.32348958333333333, + "hf_avg": 23.204900560269166 + }, + { + "hf_id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", + "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", + "params_b": 8.03, + "ifeval": 0.6915306941138402, + "bbh": 0.4524732961901791, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.32903922872340424, + "hf_math_lvl5": 0.12915407854984895, + "hf_musr": 0.35775, + "hf_avg": 23.02725584049078 + }, + { + "hf_id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", + "name": "Reasoning-Llama-3.1-CoT-RE1-NMT", + "params_b": 8.03, + "ifeval": 0.4828532737580731, + "bbh": 0.47357563863974517, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.33427526595744683, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.31821875, + "hf_avg": 19.208175433660585 + }, + { + "hf_id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", + "name": "Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", + "params_b": 8.03, + "ifeval": 0.4553263119633683, + "bbh": 0.4804219047211424, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3597905585106383, + "hf_math_lvl5": 0.12915407854984895, + "hf_musr": 0.393125, + "hf_avg": 21.331579969605 + }, + { + "hf_id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", + "name": "Reasoning-Llama-3.2-1B-Instruct-v1.2", + "params_b": 1.236, + "ifeval": 0.40871443325930756, + "bbh": 0.3324495305251265, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.11785239361702128, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.3221875, + "hf_avg": 9.508931658156023 + }, + { + "hf_id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", + "name": "Reasoning-Llama-3.2-1B-Instruct-v1.3", + "params_b": 1.236, + "ifeval": 0.3272816127874041, + "bbh": 0.3262818751942827, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11727061170212766, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.326, + "hf_avg": 8.170098865797046 + }, + { + "hf_id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", + "name": "Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", + "params_b": 3.213, + "ifeval": 0.7289746760816855, + "bbh": 0.45181862491313, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.3100066489361702, + "hf_math_lvl5": 0.15332326283987915, + "hf_musr": 0.3486666666666667, + "hf_avg": 23.430640115437427 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", + "name": "ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", + "params_b": 3.213, + "ifeval": 0.5902893212232432, + "bbh": 0.436379591348482, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.28233045212765956, + "hf_math_lvl5": 0.14803625377643503, + "hf_musr": 0.3314270833333333, + "hf_avg": 19.475016650589627 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-0", + "name": "ReasoningCore-3B-0", + "params_b": 3.213, + "ifeval": 0.7341454008696924, + "bbh": 0.44460707451155984, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.3172373670212766, + "hf_math_lvl5": 0.15861027190332325, + "hf_musr": 0.35539583333333336, + "hf_avg": 23.526303672038257 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", + "name": "ReasoningCore-3B-Instruct-r01-Reflect", + "params_b": 3.213, + "ifeval": 0.7334960128015887, + "bbh": 0.44496323889512146, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.31441156914893614, + "hf_math_lvl5": 0.1540785498489426, + "hf_musr": 0.3527291666666667, + "hf_avg": 23.51162994918955 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-R01", + "name": "ReasoningCore-3B-R01", + "params_b": 3.213, + "ifeval": 0.29760590787998065, + "bbh": 0.43725189001258497, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.25914228723404253, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.31945833333333334, + "hf_avg": 14.035244688635869 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-RE1-V2", + "name": "ReasoningCore-3B-RE1-V2", + "params_b": 3.213, + "ifeval": 0.7393161256576994, + "bbh": 0.44623884450165807, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.31806848404255317, + "hf_math_lvl5": 0.15634441087613293, + "hf_musr": 0.3540625, + "hf_avg": 23.57087320558054 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-RE1-V2A", + "name": "ReasoningCore-3B-RE1-V2A", + "params_b": 3.213, + "ifeval": 0.5732534120577845, + "bbh": 0.4189899823502799, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.2736037234042553, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.33520833333333333, + "hf_avg": 18.398292233746382 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-RE1-V2B", + "name": "ReasoningCore-3B-RE1-V2B", + "params_b": 3.213, + "ifeval": 0.5051097753959495, + "bbh": 0.41678877951897175, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.26728723404255317, + "hf_math_lvl5": 0.10725075528700906, + "hf_musr": 0.3448229166666667, + "hf_avg": 16.80359555586663 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-RE1-V2C", + "name": "ReasoningCore-3B-RE1-V2C", + "params_b": 3.213, + "ifeval": 0.5057092957796425, + "bbh": 0.41774567831526244, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.2691156914893617, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.34215625, + "hf_avg": 16.648061999116766 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-T1-V1", + "name": "ReasoningCore-3B-T1-V1", + "params_b": 3.213, + "ifeval": 0.7207564816908026, + "bbh": 0.4516908992961786, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.31200132978723405, + "hf_math_lvl5": 0.14577039274924472, + "hf_musr": 0.35403125, + "hf_avg": 23.243643018070443 + }, + { + "hf_id": "EpistemeAI/ReasoningCore-3B-T1_1", + "name": "ReasoningCore-3B-T1_1", + "params_b": 3.213, + "ifeval": 0.7274509412802475, + "bbh": 0.45239424517060806, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.3116688829787234, + "hf_math_lvl5": 0.1540785498489426, + "hf_musr": 0.3553645833333334, + "hf_avg": 23.492474520020917 + }, + { + "hf_id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", + "name": "Athene-codegemma-2-7b-it-alpaca-v1.2", + "params_b": 7, + "ifeval": 0.4351177098986245, + "bbh": 0.41754154460978427, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.22972074468085107, + "hf_math_lvl5": 0.04229607250755287, + "hf_musr": 0.41696875000000005, + "hf_avg": 15.718390848250493 + }, + { + "hf_id": "EpistemeAI2/Fireball-12B-v1.2", + "name": "Fireball-12B-v1.2", + "params_b": 12, + "ifeval": 0.13553925805750963, + "bbh": 0.5018583230653281, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.33369348404255317, + "hf_math_lvl5": 0.04154078549848943, + "hf_musr": 0.4173125, + "hf_avg": 15.200286583835771 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", + "name": "Fireball-Alpaca-Llama3.1-8B-Philos", + "params_b": 8, + "ifeval": 0.498640274471735, + "bbh": 0.4977581192690881, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3405917553191489, + "hf_math_lvl5": 0.11858006042296072, + "hf_musr": 0.42766666666666664, + "hf_avg": 22.551673578610224 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", + "name": "Fireball-Alpaca-Llama3.1.01-8B-Philos", + "params_b": 8, + "ifeval": 0.42117913802045237, + "bbh": 0.49561092312727917, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.33834773936170215, + "hf_math_lvl5": 0.13595166163141995, + "hf_musr": 0.43706249999999996, + "hf_avg": 21.567143968773337 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", + "name": "Fireball-Alpaca-Llama3.1.03-8B-Philos", + "params_b": 8, + "ifeval": 0.3880814017916905, + "bbh": 0.49508699339363266, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.3355219414893617, + "hf_math_lvl5": 0.1283987915407855, + "hf_musr": 0.42801041666666667, + "hf_avg": 20.274573357917856 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", + "name": "Fireball-Alpaca-Llama3.1.04-8B-Philos", + "params_b": 8, + "ifeval": 0.40843960690966635, + "bbh": 0.4930009712421776, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3402593085106383, + "hf_math_lvl5": 0.12009063444108761, + "hf_musr": 0.43721875, + "hf_avg": 21.094517497325704 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", + "name": "Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", + "params_b": 8, + "ifeval": 0.4865756193566404, + "bbh": 0.48807730539009225, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3614527925531915, + "hf_math_lvl5": 0.13066465256797583, + "hf_musr": 0.3931875, + "hf_avg": 21.867631752899076 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", + "name": "Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", + "params_b": 8, + "ifeval": 0.5079079065767719, + "bbh": 0.4847020640542447, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.35305851063829785, + "hf_math_lvl5": 0.12009063444108761, + "hf_musr": 0.40630208333333334, + "hf_avg": 21.97087003498899 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", + "name": "Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", + "params_b": 8, + "ifeval": 0.39522577871159636, + "bbh": 0.49553052334314723, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.35929188829787234, + "hf_math_lvl5": 0.12462235649546828, + "hf_musr": 0.4048125, + "hf_avg": 20.894624934810878 + }, + { + "hf_id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", + "name": "Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", + "params_b": 8, + "ifeval": 0.5316382753316755, + "bbh": 0.4827931104634334, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3523105053191489, + "hf_math_lvl5": 0.12386706948640483, + "hf_musr": 0.4103020833333333, + "hf_avg": 22.511188245481957 + }, + { + "hf_id": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", + "name": "Fireball-Llama-3.1-8B-Philos-Reflection", + "params_b": 8, + "ifeval": 0.3596047376516532, + "bbh": 0.4897693552241443, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3550531914893617, + "hf_math_lvl5": 0.1283987915407855, + "hf_musr": 0.3957291666666667, + "hf_avg": 20.37672090429901 + }, + { + "hf_id": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", + "name": "Fireball-MathMistral-Nemo-Base-2407-v2dpo", + "params_b": 11.58, + "ifeval": 0.30972043067948596, + "bbh": 0.43276373285682107, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.11477726063829788, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.4029583333333333, + "hf_avg": 11.369982746356044 + }, + { + "hf_id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", + "params_b": 8, + "ifeval": 0.5515465631191904, + "bbh": 0.48075580310342053, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3420046542553192, + "hf_math_lvl5": 0.1351963746223565, + "hf_musr": 0.36925, + "hf_avg": 22.727957302291042 + }, + { + "hf_id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", + "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", + "params_b": 8, + "ifeval": 0.4633195476890207, + "bbh": 0.4790834283312441, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.3564660904255319, + "hf_math_lvl5": 0.11706948640483383, + "hf_musr": 0.37743750000000004, + "hf_avg": 21.037758831912942 + }, + { + "hf_id": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", + "name": "Fireball-Phi-3-medium-4k-inst-Philos", + "params_b": 13.96, + "ifeval": 0.5312880933700359, + "bbh": 0.6177842639287514, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.45985704787234044, + "hf_math_lvl5": 0.17069486404833836, + "hf_musr": 0.41390625, + "hf_avg": 29.67636693004503 + }, + { + "hf_id": "Eric111/CatunaMayo", + "name": "CatunaMayo", + "params_b": 7.242, + "ifeval": 0.4074156571231, + "bbh": 0.5243635518600797, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.3178191489361702, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.45398958333333334, + "hf_avg": 21.27397881040676 + }, + { + "hf_id": "Eric111/CatunaMayo-DPO", + "name": "CatunaMayo-DPO", + "params_b": 7.242, + "ifeval": 0.4214539643700936, + "bbh": 0.5223991323844243, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.3169880319148936, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.44503125, + "hf_avg": 21.292884967568227 + }, + { + "hf_id": "Eurdem/Defne-llama3.1-8B", + "name": "Defne-llama3.1-8B", + "params_b": 8.03, + "ifeval": 0.5036115285220991, + "bbh": 0.5320979090308238, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.3865525265957447, + "hf_math_lvl5": 0.16012084592145015, + "hf_musr": 0.43309375, + "hf_avg": 25.120605411030215 + }, + { + "hf_id": "FINGU-AI/Chocolatine-Fusion-14B", + "name": "Chocolatine-Fusion-14B", + "params_b": 8.367, + "ifeval": 0.6949028577507679, + "bbh": 0.64132285324613, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5261801861702128, + "hf_math_lvl5": 0.3851963746223565, + "hf_musr": 0.49402083333333335, + "hf_avg": 40.36155927006158 + }, + { + "hf_id": "FINGU-AI/L3-8B", + "name": "L3-8B", + "params_b": 8.03, + "ifeval": 0.7517309627344335, + "bbh": 0.4985585187130108, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.36394614361702127, + "hf_math_lvl5": 0.2545317220543807, + "hf_musr": 0.38283333333333336, + "hf_avg": 28.91453457774598 + }, + { + "hf_id": "FINGU-AI/Phi-4-RRStock", + "name": "Phi-4-RRStock", + "params_b": 6.652, + "ifeval": 0.28554125276488607, + "bbh": 0.6443442865581455, + "gpqa": 0.3800335570469799, + "mmlu_pro": 0.48828125, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.44794791666666667, + "hf_avg": 26.415394122718638 + }, + { + "hf_id": "FINGU-AI/Q-Small-3B", + "name": "Q-Small-3B", + "params_b": 3.086, + "ifeval": 0.4145345461154182, + "bbh": 0.43185314557630744, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.27900598404255317, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.40054166666666663, + "hf_avg": 16.89041454501282 + }, + { + "hf_id": "FINGU-AI/QwQ-Buddy-32B-Alpha", + "name": "QwQ-Buddy-32B-Alpha", + "params_b": 19.662, + "ifeval": 0.34464221598691475, + "bbh": 0.642442234274039, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.5294215425531915, + "hf_math_lvl5": 0.3851963746223565, + "hf_musr": 0.5059895833333333, + "hf_avg": 35.1782720586928 + }, + { + "hf_id": "FINGU-AI/RomboUltima-32B", + "name": "RomboUltima-32B", + "params_b": 17.645, + "ifeval": 0.6671509372908327, + "bbh": 0.6938448333620042, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.578873005319149, + "hf_math_lvl5": 0.5385196374622356, + "hf_musr": 0.4836354166666667, + "hf_avg": 44.731545333278405 + }, + { + "hf_id": "FINGU-AI/Ultimos-32B", + "name": "Ultimos-32B", + "params_b": 9.604, + "ifeval": 0.1592197591280026, + "bbh": 0.2905531373728777, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11112034574468085, + "hf_musr": 0.32860416666666664, + "hf_avg": 3.640726542069556 + }, + { + "hf_id": "FallenMerick/Chewy-Lemon-Cookie-11B", + "name": "Chewy-Lemon-Cookie-11B", + "params_b": 10.732, + "ifeval": 0.4875242135312083, + "bbh": 0.5251122307375103, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3267121010638298, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.45455208333333336, + "hf_avg": 22.043725653783593 + }, + { + "hf_id": "Felladrin/Llama-160M-Chat-v1", + "name": "Llama-160M-Chat-v1", + "params_b": 0.162, + "ifeval": 0.15754642127333254, + "bbh": 0.30360811146348365, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.11361369680851063, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.366125, + "hf_avg": 4.201766115349323 + }, + { + "hf_id": "Felladrin/Minueza-32M-UltraChat", + "name": "Minueza-32M-UltraChat", + "params_b": 0.033, + "ifeval": 0.13756277787381924, + "bbh": 0.2941478734048925, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.11328125, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.37418749999999995, + "hf_avg": 3.9242559881806987 + }, + { + "hf_id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "name": "100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "params_b": 0.5, + "ifeval": 0.30832191917445706, + "bbh": 0.3323387445789459, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.14976728723404256, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.33021875, + "hf_avg": 8.550830451075553 + }, + { + "hf_id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "name": "10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "params_b": 16, + "ifeval": 0.509730847484674, + "bbh": 0.5214989784123593, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.37691156914893614, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.43095833333333333, + "hf_avg": 24.04356389387962 + }, + { + "hf_id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "name": "10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "params_b": 0.5, + "ifeval": 0.28154408081667753, + "bbh": 0.3305518729746925, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.15408909574468085, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.33021875, + "hf_avg": 8.363924265648496 + }, + { + "hf_id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "name": "40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "params_b": 0.5, + "ifeval": 0.3015775919006015, + "bbh": 0.33246082656550385, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.14852061170212766, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.3408229166666667, + "hf_avg": 8.381580312764262 + }, + { + "hf_id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "name": "83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", + "params_b": 0.5, + "ifeval": 0.28693976426991497, + "bbh": 0.33465340701604496, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.15550199468085107, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.3289479166666667, + "hf_avg": 8.427461121141809 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", + "name": "smollm2-135M_pretrained_1000k_fineweb", + "params_b": 0.135, + "ifeval": 0.14845388014911545, + "bbh": 0.2917939408206228, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.1163563829787234, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.35806249999999995, + "hf_avg": 4.20780841481479 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", + "name": "smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", + "params_b": 0.135, + "ifeval": 0.15537329840379083, + "bbh": 0.3066426145674803, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.11427859042553191, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.35803125, + "hf_avg": 4.06134991308063 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", + "name": "smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", + "params_b": 0.135, + "ifeval": 0.14678054229444543, + "bbh": 0.29317781029884354, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.11569148936170212, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.4047604166666667, + "hf_avg": 5.0558427645833435 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", + "name": "smollm2-135M_pretrained_1200k_fineweb", + "params_b": 0.135, + "ifeval": 0.15809607397261488, + "bbh": 0.29409841468035297, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.10762965425531915, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.3713645833333333, + "hf_avg": 4.188312239003598 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", + "name": "smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", + "params_b": 0.135, + "ifeval": 0.157771379938563, + "bbh": 0.29496212100634955, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.11394614361702128, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.36999999999999994, + "hf_avg": 4.280291147523802 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", + "name": "smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", + "params_b": 0.135, + "ifeval": 0.15847063569107744, + "bbh": 0.29604672415652145, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.11643949468085106, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.3567291666666667, + "hf_avg": 4.03050468089069 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", + "name": "smollm2-135M_pretrained_1400k_fineweb", + "params_b": 0.135, + "ifeval": 0.17638089158987041, + "bbh": 0.2921781950918249, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.1079621010638298, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.3873333333333333, + "hf_avg": 4.992957353487976 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", + "name": "smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", + "params_b": 0.135, + "ifeval": 0.17066051410258115, + "bbh": 0.2992388897714206, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.11045545212765957, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.3939375, + "hf_avg": 5.063032045008573 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", + "name": "smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", + "params_b": 0.135, + "ifeval": 0.15384956360235286, + "bbh": 0.291672957517483, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.11369680851063829, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.37406249999999996, + "hf_avg": 4.624639622707802 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", + "name": "smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", + "params_b": 0.135, + "ifeval": 0.14747979804695985, + "bbh": 0.30287372123209483, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11195146276595745, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.35784375, + "hf_avg": 3.8819679158721776 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", + "name": "smollm2-135M_pretrained_200k_fineweb_uncovai_selected", + "params_b": 0.135, + "ifeval": 0.13451530827094332, + "bbh": 0.2927186496606003, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.11311502659574468, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.36603125, + "hf_avg": 3.4920262751647098 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_400k_fineweb", + "name": "smollm2-135M_pretrained_400k_fineweb", + "params_b": 0.135, + "ifeval": 0.1511267880335288, + "bbh": 0.29723404576965046, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.11627327127659574, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.3794270833333333, + "hf_avg": 4.224944649553536 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", + "name": "smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", + "params_b": 0.135, + "ifeval": 0.155648124753432, + "bbh": 0.3048804422828362, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.11377992021276596, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.38599999999999995, + "hf_avg": 4.710926533341115 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", + "name": "smollm2-135M_pretrained_400k_fineweb_uncovai_selected", + "params_b": 0.135, + "ifeval": 0.15842076800666677, + "bbh": 0.2925171720555518, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.1157746010638298, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.38199999999999995, + "hf_avg": 4.38733089603933 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_600k_fineweb", + "name": "smollm2-135M_pretrained_600k_fineweb", + "params_b": 0.135, + "ifeval": 0.16391618682872555, + "bbh": 0.3013718229200533, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.11261635638297872, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.38085416666666666, + "hf_avg": 4.886739290130319 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", + "name": "smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", + "params_b": 0.135, + "ifeval": 0.16414114549395603, + "bbh": 0.30001678726257036, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.1146941489361702, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.3793333333333333, + "hf_avg": 4.644401741390064 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", + "name": "smollm2-135M_pretrained_600k_fineweb_uncovai_selected", + "params_b": 0.135, + "ifeval": 0.16059389087620846, + "bbh": 0.2983444769655102, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.11619015957446809, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.3846354166666666, + "hf_avg": 4.657606376854525 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_800k_fineweb", + "name": "smollm2-135M_pretrained_800k_fineweb", + "params_b": 0.135, + "ifeval": 0.16414114549395603, + "bbh": 0.29594449748780255, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11519281914893617, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.370125, + "hf_avg": 4.17450578103981 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", + "name": "smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", + "params_b": 0.135, + "ifeval": 0.1622927166584662, + "bbh": 0.3038096660271284, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.11377992021276596, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.3992708333333333, + "hf_avg": 5.03254333671229 + }, + { + "hf_id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", + "name": "smollm2-135M_pretrained_800k_fineweb_uncovai_selected", + "params_b": 0.135, + "ifeval": 0.14742993036254914, + "bbh": 0.2942808065535252, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.11303191489361702, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.3766354166666666, + "hf_avg": 4.118739390912178 + }, + { + "hf_id": "FlofloB/smollm2_pretrained_200k_fineweb", + "name": "smollm2_pretrained_200k_fineweb", + "params_b": 0.135, + "ifeval": 0.15270039051937748, + "bbh": 0.299468427221449, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.11594082446808511, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.3699375, + "hf_avg": 4.00559929596115 + }, + { + "hf_id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "name": "test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", + "params_b": 16, + "ifeval": 0.521546164177715, + "bbh": 0.5240829189778252, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.3720910904255319, + "hf_math_lvl5": 0.11027190332326284, + "hf_musr": 0.42441666666666666, + "hf_avg": 24.48570245546706 + }, + { + "hf_id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", + "name": "ft-openhermes-25-mistral-7b-irca-dpo-pairs", + "params_b": 14.483, + "ifeval": 0.5420041046645123, + "bbh": 0.47730323895548116, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.2956283244680851, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.417375, + "hf_avg": 20.395988185954646 + }, + { + "hf_id": "FuJhen/mistral-instruct-7B-DPO", + "name": "mistral-instruct-7B-DPO", + "params_b": 14.496, + "ifeval": 0.49684171332065585, + "bbh": 0.46239050561386214, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.30335771276595747, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.4015625, + "hf_avg": 19.029530782130646 + }, + { + "hf_id": "FuJhen/mistral_7b_v0.1_structedData_e2e", + "name": "mistral_7b_v0.1_structedData_e2e", + "params_b": 7, + "ifeval": 0.17268403391889076, + "bbh": 0.4113914854984489, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.2810837765957447, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.3722916666666667, + "hf_avg": 10.909311048443838 + }, + { + "hf_id": "FuJhen/mistral_7b_v0.1_structedData_viggo", + "name": "mistral_7b_v0.1_structedData_viggo", + "params_b": 14.483, + "ifeval": 0.17832905579418165, + "bbh": 0.45238634545986817, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2942154255319149, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.37381250000000005, + "hf_avg": 12.440582510781873 + }, + { + "hf_id": "FuseAI/FuseChat-7B-v2.0", + "name": "FuseChat-7B-v2.0", + "params_b": 7.242, + "ifeval": 0.3423194900641409, + "bbh": 0.4954212795868764, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.3162400265957447, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.4796666666666667, + "hf_avg": 20.14636743747765 + }, + { + "hf_id": "GalrionSoftworks/MagnusIntellectus-12B-v1", + "name": "MagnusIntellectus-12B-v1", + "params_b": 12.248, + "ifeval": 0.4421368635221213, + "bbh": 0.5323010476246133, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.34208776595744683, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.4428020833333333, + "hf_avg": 21.773295532025813 + }, + { + "hf_id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", + "name": "AryaBhatta-GemmaOrca-2-Merged", + "params_b": 8.538, + "ifeval": 0.30637375497014585, + "bbh": 0.3887493166323577, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.23844747340425532, + "hf_math_lvl5": 0.04984894259818731, + "hf_musr": 0.4550208333333334, + "hf_avg": 14.006450277637631 + }, + { + "hf_id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", + "name": "AryaBhatta-GemmaOrca-Merged", + "params_b": 8.538, + "ifeval": 0.30637375497014585, + "bbh": 0.4130633897394575, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.22282247340425532, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.3523854166666667, + "hf_avg": 11.99472750549792 + }, + { + "hf_id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", + "name": "AryaBhatta-GemmaUltra-Merged", + "params_b": 8.538, + "ifeval": 0.30207737691547315, + "bbh": 0.4141445378464817, + "gpqa": 0.25335570469798663, + "mmlu_pro": 0.2265625, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.42785416666666665, + "hf_avg": 13.28281509458921 + }, + { + "hf_id": "GenVRadmin/llama38bGenZ_Vikas-Merged", + "name": "llama38bGenZ_Vikas-Merged", + "params_b": 8.03, + "ifeval": 0.30002947734234053, + "bbh": 0.4535981003984562, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.26221742021276595, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.44016666666666665, + "hf_avg": 16.09338316937527 + }, + { + "hf_id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", + "name": "gemma2-9b-cpt-sahabatai-v1-instruct", + "params_b": 9.242, + "ifeval": 0.6550607942481504, + "bbh": 0.5954551751157878, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.4263630319148936, + "hf_math_lvl5": 0.2054380664652568, + "hf_musr": 0.4778645833333333, + "hf_avg": 32.46826016178984 + }, + { + "hf_id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", + "name": "llama3-8b-cpt-sahabatai-v1-instruct", + "params_b": 8.03, + "ifeval": 0.523844510343666, + "bbh": 0.4951292004509417, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.3453291223404255, + "hf_math_lvl5": 0.12764350453172205, + "hf_musr": 0.44884375, + "hf_avg": 23.05939902243402 + }, + { + "hf_id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "params_b": 0.63, + "ifeval": 0.347189900574919, + "bbh": 0.32683063456958195, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.16414561170212766, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.32625, + "hf_avg": 9.768236092545305 + }, + { + "hf_id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", + "params_b": 0.63, + "ifeval": 0.3416944817528602, + "bbh": 0.32921013057720044, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.16381316489361702, + "hf_math_lvl5": 0.002265861027190332, + "hf_musr": 0.3249166666666667, + "hf_avg": 8.415919404386559 + }, + { + "hf_id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", + "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", + "params_b": 1.777, + "ifeval": 0.47685806992114255, + "bbh": 0.418600731531926, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.27825797872340424, + "hf_math_lvl5": 0.2084592145015106, + "hf_musr": 0.3674895833333333, + "hf_avg": 18.44117473256343 + }, + { + "hf_id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", + "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", + "params_b": 1.544, + "ifeval": 0.421553699738915, + "bbh": 0.40418921704436744, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.25615026595744683, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.37685416666666666, + "hf_avg": 15.56674922027016 + }, + { + "hf_id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", + "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", + "params_b": 1.544, + "ifeval": 0.42525055740989465, + "bbh": 0.4053446177133173, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.25556848404255317, + "hf_math_lvl5": 0.13066465256797583, + "hf_musr": 0.37018749999999995, + "hf_avg": 15.592787360948462 + }, + { + "hf_id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", + "name": "Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", + "params_b": 14.77, + "ifeval": 0.8291666112581284, + "bbh": 0.6355637424320617, + "gpqa": 0.3422818791946309, + "mmlu_pro": 0.5018284574468085, + "hf_math_lvl5": 0.5422960725075529, + "hf_musr": 0.4286666666666667, + "hf_avg": 42.550065874753976 + }, + { + "hf_id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "params_b": 7.616, + "ifeval": 0.7813811797142693, + "bbh": 0.5309672164610734, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.4119847074468085, + "hf_math_lvl5": 0.45317220543806647, + "hf_musr": 0.43539583333333337, + "hf_avg": 35.31663337889917 + }, + { + "hf_id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", + "name": "j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", + "params_b": 1.544, + "ifeval": 0.41883092417009093, + "bbh": 0.41242101633634826, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.2554853723404255, + "hf_math_lvl5": 0.12009063444108761, + "hf_musr": 0.3528541666666667, + "hf_avg": 15.07804770111701 + }, + { + "hf_id": "Goekdeniz-Guelmez/josie-3b-v6.0", + "name": "josie-3b-v6.0", + "params_b": 3.086, + "ifeval": 0.6009554648333089, + "bbh": 0.4496147842264783, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.32197473404255317, + "hf_math_lvl5": 0.2938066465256798, + "hf_musr": 0.386125, + "hf_avg": 24.746540779866937 + }, + { + "hf_id": "Goekdeniz-Guelmez/josie-7b-v6.0", + "name": "josie-7b-v6.0", + "params_b": 7.616, + "ifeval": 0.7411645544931892, + "bbh": 0.5104855208094123, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3806515957446808, + "hf_math_lvl5": 0.43580060422960726, + "hf_musr": 0.41539583333333335, + "hf_avg": 32.374168071296964 + }, + { + "hf_id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", + "name": "josie-7b-v6.0-step2000", + "params_b": 7.616, + "ifeval": 0.7627716680629618, + "bbh": 0.5097811950503962, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.40325797872340424, + "hf_musr": 0.45793750000000005, + "hf_avg": 26.97043751712353 + }, + { + "hf_id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", + "name": "josie-7b-v6.0-step2000", + "params_b": 7.616, + "ifeval": 0.7597740661444966, + "bbh": 0.510712680636641, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.4011801861702128, + "hf_math_lvl5": 0.42371601208459214, + "hf_musr": 0.45393750000000005, + "hf_avg": 33.83292639490552 + }, + { + "hf_id": "GritLM/GritLM-7B-KTO", + "name": "GritLM-7B-KTO", + "params_b": 7.242, + "ifeval": 0.5310132670203948, + "bbh": 0.485293719684692, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.26803523936170215, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.37102083333333336, + "hf_avg": 19.2358949114183 + }, + { + "hf_id": "GritLM/GritLM-8x7B-KTO", + "name": "GritLM-8x7B-KTO", + "params_b": 46.703, + "ifeval": 0.5714049832222946, + "bbh": 0.5820304362331497, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.36477726063829785, + "hf_math_lvl5": 0.12235649546827794, + "hf_musr": 0.42165625, + "hf_avg": 26.2413047085506 + }, + { + "hf_id": "Groq/Llama-3-Groq-8B-Tool-Use", + "name": "Llama-3-Groq-8B-Tool-Use", + "params_b": 8.03, + "ifeval": 0.6098230472922956, + "bbh": 0.4863384977901497, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.33992686170212766, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.36603125000000003, + "hf_avg": 21.44560137489326 + }, + { + "hf_id": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", + "name": "Pantheon-RP-1.0-8b-Llama-3", + "params_b": 8.03, + "ifeval": 0.39325212657969744, + "bbh": 0.4539075127777334, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.30668218085106386, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.3832395833333333, + "hf_avg": 16.87312240356046 + }, + { + "hf_id": "Gryphe/Pantheon-RP-1.5-12b-Nemo", + "name": "Pantheon-RP-1.5-12b-Nemo", + "params_b": 12.248, + "ifeval": 0.47630841722186024, + "bbh": 0.519582216884963, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.3302027925531915, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.44203125000000004, + "hf_avg": 21.32374664039768 + }, + { + "hf_id": "Gryphe/Pantheon-RP-1.6-12b-Nemo", + "name": "Pantheon-RP-1.6-12b-Nemo", + "params_b": 12.248, + "ifeval": 0.44805671174705336, + "bbh": 0.5204007434392454, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.33111702127659576, + "hf_math_lvl5": 0.04607250755287009, + "hf_musr": 0.4287604166666667, + "hf_avg": 20.56659922855925 + }, + { + "hf_id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", + "name": "Pantheon-RP-1.6-12b-Nemo-KTO", + "params_b": 12.248, + "ifeval": 0.4636187537954849, + "bbh": 0.5276980814125921, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.33818151595744683, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.4247916666666667, + "hf_avg": 21.558598525492474 + }, + { + "hf_id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", + "name": "Pantheon-RP-Pure-1.6.2-22b-Small", + "params_b": 22.247, + "ifeval": 0.6931042965996888, + "bbh": 0.5304537230538597, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.39419880319148937, + "hf_math_lvl5": 0.20241691842900303, + "hf_musr": 0.37647916666666664, + "hf_avg": 28.138635196099433 + }, + { + "hf_id": "HPAI-BSC/Llama3-Aloe-8B-Alpha", + "name": "Llama3-Aloe-8B-Alpha", + "params_b": 8.03, + "ifeval": 0.5081073773144147, + "bbh": 0.48308532966126966, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3295378989361702, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.3672708333333334, + "hf_avg": 20.23044725696465 + }, + { + "hf_id": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", + "name": "Llama3.1-Aloe-Beta-8B", + "params_b": 8.03, + "ifeval": 0.7253276860951166, + "bbh": 0.5092760762748857, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.35804521276595747, + "hf_math_lvl5": 0.18277945619335348, + "hf_musr": 0.3834583333333333, + "hf_avg": 26.524195182673978 + }, + { + "hf_id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", + "name": "Qwen2.5-Aloe-Beta-7B", + "params_b": 7.616, + "ifeval": 0.4553506917201914, + "bbh": 0.5048995904321122, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.4354222074468085, + "hf_math_lvl5": 0.3542296072507553, + "hf_musr": 0.42603125000000003, + "hf_avg": 27.826720985708487 + }, + { + "hf_id": "Hastagaras/L3.2-JametMini-3B-MK.III", + "name": "L3.2-JametMini-3B-MK.III", + "params_b": 3.213, + "ifeval": 0.6182662003484088, + "bbh": 0.45385245294894094, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.2982878989361702, + "hf_math_lvl5": 0.14577039274924472, + "hf_musr": 0.3686041666666667, + "hf_avg": 21.75038505136374 + }, + { + "hf_id": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", + "name": "Llama-3.1-Jamet-8B-MK.I", + "params_b": 8.03, + "ifeval": 0.7338207068356406, + "bbh": 0.5048666433733161, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.3482380319148936, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.3726041666666667, + "hf_avg": 25.42380601749484 + }, + { + "hf_id": "Hastagaras/Zabuza-8B-Llama-3.1", + "name": "Zabuza-8B-Llama-3.1", + "params_b": 8.03, + "ifeval": 0.6265342624237025, + "bbh": 0.4538915742546196, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.29230385638297873, + "hf_math_lvl5": 0.05513595166163142, + "hf_musr": 0.3567916666666667, + "hf_avg": 19.92582695678787 + }, + { + "hf_id": "HelpingAI/Cipher-20B", + "name": "Cipher-20B", + "params_b": 20.551, + "ifeval": 0.5377575942942504, + "bbh": 0.6032432743536918, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.3744182180851064, + "hf_math_lvl5": 0.19939577039274925, + "hf_musr": 0.40029166666666666, + "hf_avg": 26.976007616737775 + }, + { + "hf_id": "HelpingAI/Dhanishtha-Large", + "name": "Dhanishtha-Large", + "params_b": 7.613, + "ifeval": 0.24567370133468086, + "bbh": 0.46036539145861094, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.2755152925531915, + "hf_math_lvl5": 0.3851963746223565, + "hf_musr": 0.38451041666666663, + "hf_avg": 19.889172686673326 + }, + { + "hf_id": "HelpingAI/Priya-10B", + "name": "Priya-10B", + "params_b": 10.211, + "ifeval": 0.40429283190822574, + "bbh": 0.4441457310476767, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.24925199468085107, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.3792708333333333, + "hf_avg": 14.14328407509064 + }, + { + "hf_id": "HelpingAI/Priya-3B", + "name": "Priya-3B", + "params_b": 2.81, + "ifeval": 0.4525780484669566, + "bbh": 0.3961184863327844, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.23387632978723405, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3713020833333333, + "hf_avg": 13.429592166190526 + }, + { + "hf_id": "HiroseKoichi/Llama-Salad-4x8B-V3", + "name": "Llama-Salad-4x8B-V3", + "params_b": 24.942, + "ifeval": 0.6653523761397536, + "bbh": 0.5244649789001753, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.351811835106383, + "hf_math_lvl5": 0.09592145015105741, + "hf_musr": 0.37403125, + "hf_avg": 24.922701724199346 + }, + { + "hf_id": "HoangHa/Pensez-Llama3.1-8B", + "name": "Pensez-Llama3.1-8B", + "params_b": 8.03, + "ifeval": 0.3886809221753835, + "bbh": 0.46691313514505667, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.31258311170212766, + "hf_math_lvl5": 0.1148036253776435, + "hf_musr": 0.3596979166666667, + "hf_avg": 19.04804643436893 + }, + { + "hf_id": "HuggingFaceH4/zephyr-7b-alpha", + "name": "zephyr-7b-alpha", + "params_b": 7.242, + "ifeval": 0.5191480826429429, + "bbh": 0.45828635059044115, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.2795046542553192, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3949583333333333, + "hf_avg": 18.598795237504422, + "lb_name": "zephyr-7b-alpha", + "lb_global": 0.2085211111111111, + "lb_reasoning": 0.11, + "lb_math": 0.09956, + "lb_language": 0.07196666666666666, + "lb_if": 0.5279175, + "lb_data_analysis": 0.081, + "arena_elo": 1126.87, + "arena_rank": 292, + "arena_votes": 1785 + }, + { + "hf_id": "HuggingFaceH4/zephyr-7b-beta", + "name": "zephyr-7b-beta", + "params_b": 7.242, + "ifeval": 0.49504315216957673, + "bbh": 0.431582191918003, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.2780917553191489, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.3925416666666666, + "hf_avg": 17.792237227370464, + "lb_name": "zephyr-7b-beta", + "lb_global": 0.18953166666666668, + "lb_reasoning": 0.11, + "lb_math": 0.11230666666666667, + "lb_language": 0.042846666666666665, + "lb_if": 0.48317, + "lb_data_analysis": 0.0762, + "arena_elo": 1131.04, + "arena_rank": 288, + "arena_votes": 11116 + }, + { + "hf_id": "HuggingFaceH4/zephyr-7b-gemma-v0.1", + "name": "zephyr-7b-gemma-v0.1", + "params_b": 8.538, + "ifeval": 0.3363741539116212, + "bbh": 0.4623735014679749, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.2847406914893617, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.37396874999999996, + "hf_avg": 16.030043342251584 + }, + { + "hf_id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + "name": "zephyr-orpo-141b-A35b-v0.1", + "params_b": 140.621, + "ifeval": 0.6510891102275296, + "bbh": 0.6290439728524093, + "gpqa": 0.3783557046979866, + "mmlu_pro": 0.4586103723404255, + "hf_math_lvl5": 0.20468277945619334, + "hf_musr": 0.4465208333333333, + "hf_avg": 34.125963384670946, + "arena_elo": 1212.74, + "arena_rank": 242, + "arena_votes": 4653 + }, + { + "hf_id": "HuggingFaceTB/SmolLM-1.7B", + "name": "SmolLM-1.7B", + "params_b": 1.71, + "ifeval": 0.23615673080759053, + "bbh": 0.3180516538964782, + "gpqa": 0.24161073825503357, + "mmlu_pro": 0.11477726063829788, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.34209375000000003, + "hf_avg": 5.576455936269043 + }, + { + "hf_id": "HuggingFaceTB/SmolLM-1.7B-Instruct", + "name": "SmolLM-1.7B-Instruct", + "params_b": 1.71, + "ifeval": 0.23478259905938464, + "bbh": 0.28851114363217695, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11660571808510638, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.3486666666666667, + "hf_avg": 5.490688803655309 + }, + { + "hf_id": "HuggingFaceTB/SmolLM-135M", + "name": "SmolLM-135M", + "params_b": 0.13, + "ifeval": 0.21247622973709757, + "bbh": 0.3046054260062988, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11220079787234043, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.4366041666666667, + "hf_avg": 6.951489892134872 + }, + { + "hf_id": "HuggingFaceTB/SmolLM-135M-Instruct", + "name": "SmolLM-135M-Instruct", + "params_b": 0.135, + "ifeval": 0.12140121544169469, + "bbh": 0.30150816789978757, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11760305851063829, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.36345833333333327, + "hf_avg": 3.652287650202965 + }, + { + "hf_id": "HuggingFaceTB/SmolLM-360M", + "name": "SmolLM-360M", + "params_b": 0.36, + "ifeval": 0.2133505764704318, + "bbh": 0.30645160333152527, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.11236702127659574, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.40178125, + "hf_avg": 6.260888857386585 + }, + { + "hf_id": "HuggingFaceTB/SmolLM-360M-Instruct", + "name": "SmolLM-360M-Instruct", + "params_b": 0.362, + "ifeval": 0.19516549422199764, + "bbh": 0.28851114363217695, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.11660571808510638, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.34717708333333336, + "hf_avg": 5.0088989557053685 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-1.7B", + "name": "SmolLM2-1.7B", + "params_b": 1.71, + "ifeval": 0.2440003634800108, + "bbh": 0.3452594377166261, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.2137632978723404, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.3485416666666667, + "hf_avg": 9.583621041753199 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-1.7B-Instruct", + "name": "SmolLM2-1.7B-Instruct", + "params_b": 1.711, + "ifeval": 0.5367835121920947, + "bbh": 0.3598617531415158, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.2053690159574468, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.342125, + "hf_avg": 15.02227766709556, + "arena_elo": 1114.26, + "arena_rank": 296, + "arena_votes": 2201 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-135M", + "name": "SmolLM2-135M", + "params_b": 0.135, + "ifeval": 0.18177657504310785, + "bbh": 0.3044234246877141, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.10945811170212766, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.4111770833333333, + "hf_avg": 5.695927392648112 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-135M-Instruct", + "name": "SmolLM2-135M-Instruct", + "params_b": 0.135, + "ifeval": 0.2883138960181208, + "bbh": 0.3124321328066677, + "gpqa": 0.23573825503355705, + "mmlu_pro": 0.11145279255319149, + "hf_math_lvl5": 0.0030211480362537764, + "hf_musr": 0.36621875000000004, + "hf_avg": 6.467364720358819 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-135M-Instruct", + "name": "SmolLM2-135M-Instruct", + "params_b": 0.135, + "ifeval": 0.05925167444602544, + "bbh": 0.31347502947335903, + "gpqa": 0.23406040268456377, + "mmlu_pro": 0.10920877659574468, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3871458333333333, + "hf_avg": 3.2065969565576755 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-360M", + "name": "SmolLM2-360M", + "params_b": 0.36, + "ifeval": 0.21145227995053123, + "bbh": 0.3233478044302361, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.11693816489361702, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.3954270833333333, + "hf_avg": 6.251282350517303 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-360M-Instruct", + "name": "SmolLM2-360M-Instruct", + "params_b": 0.362, + "ifeval": 0.08303191088533979, + "bbh": 0.3052703401844317, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.11261635638297872, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.34228125000000004, + "hf_avg": 3.1000195398620374 + }, + { + "hf_id": "HuggingFaceTB/SmolLM2-360M-Instruct", + "name": "SmolLM2-360M-Instruct", + "params_b": 0.36, + "ifeval": 0.38415958545548035, + "bbh": 0.31435050538888504, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.11170212765957446, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.346125, + "hf_avg": 8.139566424375877 + }, + { + "hf_id": "HumanLLMs/Humanish-LLama3-8B-Instruct", + "name": "Humanish-LLama3-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.6497903340913221, + "bbh": 0.49677096627896544, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.37017952127659576, + "hf_math_lvl5": 0.1027190332326284, + "hf_musr": 0.35815624999999995, + "hf_avg": 22.678203747779094 + }, + { + "hf_id": "IDEA-CCNL/Ziya-LLaMA-13B-v1", + "name": "Ziya-LLaMA-13B-v1", + "params_b": 13, + "ifeval": 0.16968643200042555, + "bbh": 0.28770292445409473, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11012300531914894, + "hf_musr": 0.37505208333333334, + "hf_avg": 3.9064248386004103 + }, + { + "hf_id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", + "name": "BgGPT-Gemma-2-27B-IT-v1.0", + "params_b": 27.227, + "bbh": 0.2911778102988436, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11668882978723404, + "hf_musr": 0.35753125, + "hf_avg": 1.6780628068086922 + }, + { + "hf_id": "IlyaGusev/gemma-2-2b-it-abliterated", + "name": "gemma-2-2b-it-abliterated", + "params_b": 2.614, + "ifeval": 0.533086654521115, + "bbh": 0.4118601326211988, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.25382313829787234, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.37818749999999995, + "hf_avg": 16.705746365531194 + }, + { + "hf_id": "IlyaGusev/gemma-2-9b-it-abliterated", + "name": "gemma-2-9b-it-abliterated", + "params_b": 9.242, + "ifeval": 0.747259493698941, + "bbh": 0.59063299776093, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.39153922872340424, + "hf_math_lvl5": 0.17749244712990936, + "hf_musr": 0.4033645833333333, + "hf_avg": 31.294229585782904 + }, + { + "hf_id": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", + "name": "Infinirc-Llama3-8B-2G-Release-v1.0", + "params_b": 8.03, + "ifeval": 0.20243398626754788, + "bbh": 0.43507435668237937, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.21600731382978725, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.4609375, + "hf_avg": 13.16266165602399 + }, + { + "hf_id": "Intel/neural-chat-7b-v3", + "name": "neural-chat-7b-v3", + "params_b": 7, + "ifeval": 0.27779735546128714, + "bbh": 0.5048316221363103, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.26986369680851063, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.5054895833333334, + "hf_avg": 18.069527284193285 + }, + { + "hf_id": "Intel/neural-chat-7b-v3-1", + "name": "neural-chat-7b-v3-1", + "params_b": 7.242, + "ifeval": 0.4686897432146704, + "bbh": 0.5051565464054848, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.2677859042553192, + "hf_math_lvl5": 0.035498489425981876, + "hf_musr": 0.49789583333333337, + "hf_avg": 21.06792676095294 + }, + { + "hf_id": "Intel/neural-chat-7b-v3-2", + "name": "neural-chat-7b-v3-2", + "params_b": 7, + "ifeval": 0.4988397452093778, + "bbh": 0.5032226831964403, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.26670545212765956, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.48952083333333335, + "hf_avg": 21.471411168582193 + }, + { + "hf_id": "Intel/neural-chat-7b-v3-3", + "name": "neural-chat-7b-v3-3", + "params_b": 7, + "ifeval": 0.4762585495374495, + "bbh": 0.48766180524289693, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.2624667553191489, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.4859583333333333, + "hf_avg": 20.557585514141866 + }, + { + "hf_id": "IntervitensInc/internlm2_5-20b-llamafied", + "name": "internlm2_5-20b-llamafied", + "params_b": 19.861, + "ifeval": 0.3409952260003457, + "bbh": 0.7478466526577329, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.4050864361702128, + "hf_math_lvl5": 0.1714501510574018, + "hf_musr": 0.44754166666666667, + "hf_avg": 29.216880819026148 + }, + { + "hf_id": "Invalid-Null/PeiYangMe-0.5", + "name": "PeiYangMe-0.5", + "params_b": 6.061, + "ifeval": 0.14088507382633633, + "bbh": 0.27907748194216614, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.11087101063829788, + "hf_musr": 0.37381249999999994, + "hf_avg": 3.4273672940351996 + }, + { + "hf_id": "Invalid-Null/PeiYangMe-0.7", + "name": "PeiYangMe-0.7", + "params_b": 6.061, + "ifeval": 0.1491032682172192, + "bbh": 0.30275310145886614, + "gpqa": 0.2332214765100671, + "mmlu_pro": 0.11012300531914894, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.38571874999999994, + "hf_avg": 4.397279528582866 + }, + { + "hf_id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", + "name": "JOSIEv4o-8b-stage1-v4", + "params_b": 8.03, + "ifeval": 0.2552660274737696, + "bbh": 0.4724973116620121, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.3316156914893617, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.3654375, + "hf_avg": 15.66808216767364 + }, + { + "hf_id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", + "name": "JOSIEv4o-8b-stage1-v4", + "params_b": 8.03, + "ifeval": 0.2476972211509905, + "bbh": 0.4758066295235124, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.32920545212765956, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.3641041666666667, + "hf_avg": 15.419272249038903 + }, + { + "hf_id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", + "params_b": 7.616, + "ifeval": 0.7841039552830933, + "bbh": 0.5310923599182072, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.4128158244680851, + "hf_math_lvl5": 0.47205438066465255, + "hf_musr": 0.43539583333333337, + "hf_avg": 35.685532582369234 + }, + { + "hf_id": "JackFram/llama-160m", + "name": "llama-160m", + "params_b": 0.162, + "ifeval": 0.1791036671586945, + "bbh": 0.28880217539042424, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.11278257978723404, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.3792083333333333, + "hf_avg": 4.7381298467963955 + }, + { + "hf_id": "JackFram/llama-68m", + "name": "llama-68m", + "params_b": 0.068, + "ifeval": 0.17263416623448008, + "bbh": 0.29362986509336414, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11436170212765957, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.3909895833333333, + "hf_avg": 4.963339700174584 + }, + { + "hf_id": "Jacoby746/Casual-Magnum-34B", + "name": "Casual-Magnum-34B", + "params_b": 34.389, + "ifeval": 0.19301675110927893, + "bbh": 0.6032046880542974, + "gpqa": 0.3724832214765101, + "mmlu_pro": 0.5183676861702128, + "hf_math_lvl5": 0.09214501510574018, + "hf_musr": 0.4077604166666666, + "hf_avg": 23.79792072114355 + }, + { + "hf_id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", + "name": "Inf-Silent-Kunoichi-v0.1-2x7B", + "params_b": 12.879, + "ifeval": 0.38798166642286913, + "bbh": 0.518546209727402, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3271276595744681, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.42804166666666665, + "hf_avg": 20.186181285193417 + }, + { + "hf_id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", + "name": "Inf-Silent-Kunoichi-v0.2-2x7B", + "params_b": 12.879, + "ifeval": 0.3636019095998617, + "bbh": 0.5209417299963208, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.32721077127659576, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.43197916666666664, + "hf_avg": 20.01822829916165 + }, + { + "hf_id": "Jacoby746/Proto-Athena-4x7B", + "name": "Proto-Athena-4x7B", + "params_b": 24.154, + "ifeval": 0.37029636918930664, + "bbh": 0.5106547638742905, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.32064494680851063, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.43477083333333333, + "hf_avg": 19.77557762348212 + }, + { + "hf_id": "Jacoby746/Proto-Athena-v0.2-4x7B", + "name": "Proto-Athena-v0.2-4x7B", + "params_b": 24.154, + "ifeval": 0.37524213531208306, + "bbh": 0.5067731005424964, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.3197307180851064, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.42128125, + "hf_avg": 19.345307494794017 + }, + { + "hf_id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", + "name": "Proto-Harpy-Spark-v0.1-7B", + "params_b": 7.242, + "ifeval": 0.43326928106313467, + "bbh": 0.4735771808296548, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.30693151595744683, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.43166666666666664, + "hf_avg": 19.84999990070571 + }, + { + "hf_id": "JayHyeon/Qwen-0.5B-DPO-1epoch", + "name": "Qwen-0.5B-DPO-1epoch", + "params_b": 0.494, + "ifeval": 0.26473313031644924, + "bbh": 0.31907502434278595, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.15575132978723405, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.33517708333333335, + "hf_avg": 7.385732750247261 + }, + { + "hf_id": "JayHyeon/Qwen-0.5B-DPO-5epoch", + "name": "Qwen-0.5B-DPO-5epoch", + "params_b": 0.494, + "ifeval": 0.25701472094043804, + "bbh": 0.3112109544868782, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.15325797872340424, + "hf_math_lvl5": 0.04003021148036254, + "hf_musr": 0.33796875, + "hf_avg": 7.198583184761694 + }, + { + "hf_id": "JayHyeon/Qwen-0.5B-IRPO-1epoch", + "name": "Qwen-0.5B-IRPO-1epoch", + "params_b": 0.494, + "ifeval": 0.25891301746033857, + "bbh": 0.31638216610052033, + "gpqa": 0.24664429530201343, + "mmlu_pro": 0.15001662234042554, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.3286354166666667, + "hf_avg": 7.031892544703613 + }, + { + "hf_id": "JayHyeon/Qwen-0.5B-IRPO-5epoch", + "name": "Qwen-0.5B-IRPO-5epoch", + "params_b": 0.494, + "ifeval": 0.24867130325314607, + "bbh": 0.31891656220326015, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.1506815159574468, + "hf_math_lvl5": 0.0324773413897281, + "hf_musr": 0.32866666666666666, + "hf_avg": 6.9234686409529 + }, + { + "hf_id": "JayHyeon/Qwen-0.5B-eDPO-1epoch", + "name": "Qwen-0.5B-eDPO-1epoch", + "params_b": 0.494, + "ifeval": 0.26233504878167707, + "bbh": 0.3180637583450692, + "gpqa": 0.2424496644295302, + "mmlu_pro": 0.15525265957446807, + "hf_math_lvl5": 0.03474320241691843, + "hf_musr": 0.33269791666666665, + "hf_avg": 7.280997177732158 + }, + { + "hf_id": "JayHyeon/Qwen-0.5B-eDPO-5epoch", + "name": "Qwen-0.5B-eDPO-5epoch", + "params_b": 0.494, + "ifeval": 0.24774708883540117, + "bbh": 0.3096491823869347, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.15226063829787234, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.3326354166666667, + "hf_avg": 6.72779460682018 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", + "name": "Qwen2.5-0.5B-Instruct-SFT", + "params_b": 0.63, + "ifeval": 0.27677340567472086, + "bbh": 0.3253697801563151, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.15201130319148937, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.33415625, + "hf_avg": 8.158039000338244 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", + "name": "Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", + "params_b": 0.494, + "ifeval": 0.24687274210206694, + "bbh": 0.3260313037664168, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.1574966755319149, + "hf_math_lvl5": 0.06495468277945618, + "hf_musr": 0.34336458333333336, + "hf_avg": 8.15078478555964 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", + "name": "Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", + "params_b": 0.494, + "ifeval": 0.2605863553150086, + "bbh": 0.3308028437367363, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.16256648936170212, + "hf_math_lvl5": 0.04984894259818731, + "hf_musr": 0.3288229166666667, + "hf_avg": 8.319017359475316 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", + "name": "Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", + "params_b": 0.494, + "ifeval": 0.2529178136234081, + "bbh": 0.3261949089625076, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.15757978723404256, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.330125, + "hf_avg": 7.896219502136105 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT", + "name": "Qwen2.5-0.5B-SFT", + "params_b": 0.63, + "ifeval": 0.19636453498938372, + "bbh": 0.31207478976310743, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.16730385638297873, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.3394270833333333, + "hf_avg": 6.6110587340398945 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", + "name": "Qwen2.5-0.5B-SFT-1e-4", + "params_b": 0.63, + "ifeval": 0.2019596891802639, + "bbh": 0.3017092819749249, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.16190159574468085, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.3446354166666667, + "hf_avg": 5.941930910286053 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", + "name": "Qwen2.5-0.5B-SFT-1e-4-3ep", + "params_b": 0.63, + "ifeval": 0.22573992561957826, + "bbh": 0.3064261556890236, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.15317486702127658, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.36606249999999996, + "hf_avg": 6.469032763498576 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", + "name": "Qwen2.5-0.5B-SFT-1e-5", + "params_b": 0.63, + "ifeval": 0.1985875255433361, + "bbh": 0.3139860294769257, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.1697972074468085, + "hf_math_lvl5": 0.0377643504531722, + "hf_musr": 0.34603125, + "hf_avg": 6.664256620538701 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", + "name": "Qwen2.5-0.5B-SFT-1e-5-3ep", + "params_b": 0.63, + "ifeval": 0.2241164554493189, + "bbh": 0.32468117082421427, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.16888297872340424, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.3353333333333333, + "hf_avg": 7.740340198303553 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", + "name": "Qwen2.5-0.5B-SFT-2e-4", + "params_b": 0.63, + "ifeval": 0.2034335562972912, + "bbh": 0.2935549587263229, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.14128989361702127, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.3434270833333333, + "hf_avg": 5.555176381236881 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", + "name": "Qwen2.5-0.5B-SFT-2e-4-3ep", + "params_b": 0.63, + "ifeval": 0.1989620872617987, + "bbh": 0.3109875129533253, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.14162234042553193, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.3449479166666667, + "hf_avg": 5.465283047679917 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", + "name": "Qwen2.5-0.5B-SFT-2e-5", + "params_b": 0.63, + "ifeval": 0.2067558522498083, + "bbh": 0.3203968601167082, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.16780252659574468, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.3486666666666667, + "hf_avg": 6.9691100972452 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", + "name": "Qwen2.5-0.5B-SFT-2e-5-3ep", + "params_b": 0.63, + "ifeval": 0.22808813946993975, + "bbh": 0.3239538094779519, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.17461768617021275, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.330125, + "hf_avg": 7.452521242968808 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", + "name": "Qwen2.5-0.5B-SFT-5e-5", + "params_b": 0.63, + "ifeval": 0.2009856070781083, + "bbh": 0.31093810553451656, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.16722074468085107, + "hf_math_lvl5": 0.033987915407854986, + "hf_musr": 0.33809375, + "hf_avg": 6.609556986596828 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", + "name": "Qwen2.5-0.5B-SFT-5e-5-3ep", + "params_b": 0.63, + "ifeval": 0.2198699450790569, + "bbh": 0.32974820176156994, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.1651429521276596, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.35933333333333334, + "hf_avg": 7.074425558693155 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", + "name": "Qwen2.5-0.5B-SFT-7e-5", + "params_b": 0.63, + "ifeval": 0.20925366915340185, + "bbh": 0.3158179005969299, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.1622340425531915, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.33669791666666665, + "hf_avg": 6.761335194180454 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", + "name": "Qwen2.5-0.5B-SFT-7e-5-3ep", + "params_b": 0.63, + "ifeval": 0.23805502732749106, + "bbh": 0.3199313632207049, + "gpqa": 0.23657718120805368, + "mmlu_pro": 0.15217752659574468, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.3553645833333334, + "hf_avg": 6.894032740125938 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", + "name": "Qwen2.5-0.5B-SFT-DPO-1epoch_v1", + "params_b": 0.494, + "ifeval": 0.20245947419513555, + "bbh": 0.326814314271471, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.13297872340425532, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.3209166666666667, + "hf_avg": 6.245597623653178 + }, + { + "hf_id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", + "name": "Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", + "params_b": 0.494, + "ifeval": 0.1964144026737944, + "bbh": 0.32925816453885065, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.13372672872340424, + "hf_math_lvl5": 0.04682779456193353, + "hf_musr": 0.32615625, + "hf_avg": 6.644463688191298 + }, + { + "hf_id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", + "name": "llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", + "params_b": 8.03, + "ifeval": 0.6717221416951467, + "bbh": 0.48797965672899357, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.36336436170212766, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.4040729166666667, + "hf_avg": 24.144995316075697 + }, + { + "hf_id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", + "name": "llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", + "params_b": 8.03, + "ifeval": 0.6555605792630221, + "bbh": 0.49345840367294164, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3657746010638298, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.4000104166666667, + "hf_avg": 24.09614909345055 + }, + { + "hf_id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", + "name": "llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", + "params_b": 8.03, + "ifeval": 0.6315055164740666, + "bbh": 0.4916414793938901, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3611203457446808, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.3935, + "hf_avg": 23.03715310569549 + }, + { + "hf_id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", + "name": "llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", + "params_b": 8.03, + "ifeval": 0.6677504576745258, + "bbh": 0.4940463886115545, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.3657746010638298, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.3987083333333334, + "hf_avg": 24.385612249734205 + }, + { + "hf_id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", + "name": "llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", + "params_b": 8.03, + "ifeval": 0.6605063453857986, + "bbh": 0.49160075581298046, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3664394946808511, + "hf_math_lvl5": 0.06570996978851963, + "hf_musr": 0.4000416666666667, + "hf_avg": 24.210850058799164 + }, + { + "hf_id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", + "name": "llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", + "params_b": 8.03, + "ifeval": 0.649190813707629, + "bbh": 0.4952489348573605, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.37109375, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.3961354166666667, + "hf_avg": 24.056972059408523 + }, + { + "hf_id": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", + "name": "Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", + "params_b": 8.03, + "ifeval": 0.6185410266980501, + "bbh": 0.5177452540141246, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.31441156914893614, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.4369375, + "hf_avg": 23.25287703936368 + }, + { + "hf_id": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", + "name": "Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", + "params_b": 8.03, + "ifeval": 0.8096328851890761, + "bbh": 0.5147423127141911, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.38804853723404253, + "hf_math_lvl5": 0.18353474320241692, + "hf_musr": 0.4109895833333333, + "hf_avg": 30.245028049214653 + }, + { + "hf_id": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", + "name": "Cinder-Phi-2-V1-F16-gguf", + "params_b": 2.78, + "ifeval": 0.23565694579271884, + "bbh": 0.4396616219689493, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.2160904255319149, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.34345833333333337, + "hf_avg": 11.258522615146035 + }, + { + "hf_id": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", + "name": "Differential-Attention-Liquid-Metal-Tinyllama", + "params_b": 1.1, + "ifeval": 0.22269245601670234, + "bbh": 0.292556113105267, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.12142619680851063, + "hf_math_lvl5": 0.0324773413897281, + "hf_musr": 0.33555208333333336, + "hf_avg": 5.250959952944556 + }, + { + "hf_id": "Josephgflowers/TinyLlama-Cinder-Agent-v1", + "name": "TinyLlama-Cinder-Agent-v1", + "params_b": 1.1, + "ifeval": 0.26695612087040166, + "bbh": 0.31160367351776513, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.11610704787234043, + "hf_math_lvl5": 0.03474320241691843, + "hf_musr": 0.33945833333333336, + "hf_avg": 6.332677189374276 + }, + { + "hf_id": "Josephgflowers/TinyLlama-v1.1-Cinders-World", + "name": "TinyLlama-v1.1-Cinders-World", + "params_b": 1.1, + "ifeval": 0.24692260978647768, + "bbh": 0.29979653176003074, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.11984707446808511, + "hf_math_lvl5": 0.03474320241691843, + "hf_musr": 0.3356145833333333, + "hf_avg": 5.683002628938165 + }, + { + "hf_id": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", + "name": "TinyLlama_v1.1_math_code-world-test-1", + "params_b": 1.1, + "ifeval": 0.00784363267242029, + "bbh": 0.31463497508928434, + "gpqa": 0.23406040268456377, + "mmlu_pro": 0.11319813829787234, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.34990625000000003, + "hf_avg": 2.0028112750677205 + }, + { + "hf_id": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", + "name": "Tinyllama-STEM-Cinder-Agent-v1", + "params_b": 1.1, + "ifeval": 0.21257596510591897, + "bbh": 0.30843808427144626, + "gpqa": 0.2348993288590604, + "mmlu_pro": 0.10862699468085106, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.334125, + "hf_avg": 5.683635033220809 + }, + { + "hf_id": "Josephgflowers/Tinyllama-r1", + "name": "Tinyllama-r1", + "params_b": 1.1, + "ifeval": 0.2119265770378152, + "bbh": 0.3014631984266974, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.11344747340425532, + "hf_math_lvl5": 0.0324773413897281, + "hf_musr": 0.33148958333333334, + "hf_avg": 5.217266718069099 + }, + { + "hf_id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", + "name": "T3Q-Qwen2.5-14B-Instruct-1M-e3", + "ifeval": 0.732396707403024, + "bbh": 0.7585971930826706, + "gpqa": 0.41694630872483224, + "mmlu_pro": 0.5884308510638298, + "hf_math_lvl5": 0.2862537764350453, + "hf_musr": 0.5911041666666667, + "hf_avg": 47.091544713804204 + }, + { + "hf_id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", + "name": "T3Q-qwen2.5-14b-v1.0-e3", + "params_b": 14.77, + "ifeval": 0.732396707403024, + "bbh": 0.7585971930826706, + "gpqa": 0.41694630872483224, + "mmlu_pro": 0.5884308510638298, + "hf_math_lvl5": 0.2862537764350453, + "hf_musr": 0.5911041666666667, + "hf_avg": 47.091544713804204 + }, + { + "hf_id": "Kimargin/GPT-NEO-1.3B-wiki", + "name": "GPT-NEO-1.3B-wiki", + "params_b": 1.316, + "ifeval": 0.19206815693471102, + "bbh": 0.3026339952046975, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.10987367021276596, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3882604166666666, + "hf_avg": 5.349183189126563 + }, + { + "hf_id": "KingNish/Qwen2.5-0.5b-Test-ft", + "name": "Qwen2.5-0.5b-Test-ft", + "params_b": 0.494, + "ifeval": 0.26708134416681073, + "bbh": 0.3231533857529747, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.16888297872340424, + "hf_math_lvl5": 0.035498489425981876, + "hf_musr": 0.342125, + "hf_avg": 7.865415598812703 + }, + { + "hf_id": "KingNish/Reasoning-0.5b", + "name": "Reasoning-0.5b", + "params_b": 0.494, + "ifeval": 0.217421995859874, + "bbh": 0.33536255853174524, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.16414561170212766, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.35133333333333333, + "hf_avg": 7.163893227645488 + }, + { + "hf_id": "KingNish/Reasoning-Llama-3b-v0.1", + "name": "Reasoning-Llama-3b-v0.1", + "params_b": 3.213, + "ifeval": 0.6224628430342602, + "bbh": 0.43433592509582786, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.3029421542553192, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.31676041666666666, + "hf_avg": 20.212379567792407 + }, + { + "hf_id": "KingNish/qwen-1b-continued", + "name": "qwen-1b-continued", + "params_b": 1.277, + "ifeval": 0.12547263483113694, + "bbh": 0.29909543894796364, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.1260804521276596, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.38587499999999997, + "hf_avg": 4.792600271519794 + }, + { + "hf_id": "KingNish/qwen-1b-continued-v2", + "name": "qwen-1b-continued-v2", + "params_b": 1.277, + "ifeval": 0.1578711153073844, + "bbh": 0.31194932022650246, + "gpqa": 0.25, + "mmlu_pro": 0.11926529255319149, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.33927083333333335, + "hf_avg": 4.4416424249765365 + }, + { + "hf_id": "KingNish/qwen-1b-continued-v2.1", + "name": "qwen-1b-continued-v2.1", + "params_b": 1.277, + "ifeval": 0.11268323603594019, + "bbh": 0.30416583041069006, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.1278257978723404, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.41539583333333335, + "hf_avg": 5.461814579103264 + }, + { + "hf_id": "KingNish/qwen-1b-continued-v2.2", + "name": "qwen-1b-continued-v2.2", + "params_b": 1.277, + "ifeval": 0.14125963554479892, + "bbh": 0.30586579449667844, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.1262466755319149, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.35130208333333335, + "hf_avg": 4.441641963318088 + }, + { + "hf_id": "Kquant03/CognitiveFusion2-4x7B-BF16", + "name": "CognitiveFusion2-4x7B-BF16", + "params_b": 24.154, + "ifeval": 0.35665700341759865, + "bbh": 0.41078286111483786, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.27925531914893614, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.4145520833333333, + "hf_avg": 15.629054867362484 + }, + { + "hf_id": "Kquant03/L3-Pneuma-8B", + "name": "L3-Pneuma-8B", + "params_b": 8.03, + "ifeval": 0.2374056392593873, + "bbh": 0.49550433176754827, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.31840093085106386, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.41715624999999995, + "hf_avg": 16.617570099377616 + }, + { + "hf_id": "Krystalan/DRT-o1-14B", + "name": "DRT-o1-14B", + "params_b": 14.77, + "ifeval": 0.4067662690549963, + "bbh": 0.637927537514229, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.5178690159574468, + "hf_math_lvl5": 0.4826283987915408, + "hf_musr": 0.47951041666666666, + "hf_avg": 36.16608563724584 + }, + { + "hf_id": "Krystalan/DRT-o1-7B", + "name": "DRT-o1-7B", + "params_b": 7.616, + "ifeval": 0.3928276971768242, + "bbh": 0.5467693339610741, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.41514295212765956, + "hf_math_lvl5": 0.4478851963746224, + "hf_musr": 0.50865625, + "hf_avg": 31.402661796813607 + }, + { + "hf_id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", + "name": "NeuralExperiment-7b-MagicCoder-v7.5", + "params_b": 7.242, + "ifeval": 0.4552509563513699, + "bbh": 0.3988446544778517, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.2824135638297872, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.4281979166666667, + "hf_avg": 18.006004625989025 + }, + { + "hf_id": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", + "name": "NeuralLLaMa-3-8b-DT-v0.1", + "params_b": 8.03, + "ifeval": 0.4371412297149342, + "bbh": 0.4986771544360115, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.379155585106383, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.40711458333333334, + "hf_avg": 21.259598634719698 + }, + { + "hf_id": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", + "name": "NeuralLLaMa-3-8b-ORPO-v0.3", + "params_b": 8.03, + "ifeval": 0.5275912356990563, + "bbh": 0.4557141539616392, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.3056848404255319, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.37003125, + "hf_avg": 17.74874162408916 + }, + { + "hf_id": "Kukedlc/NeuralSynthesis-7B-v0.1", + "name": "NeuralSynthesis-7B-v0.1", + "params_b": 7.242, + "ifeval": 0.4184563624516283, + "bbh": 0.5144745481048844, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.304936835106383, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.43328125, + "hf_avg": 20.015676781413074 + }, + { + "hf_id": "Kukedlc/NeuralSynthesis-7B-v0.3", + "name": "NeuralSynthesis-7B-v0.3", + "params_b": 7.242, + "ifeval": 0.4078400865259733, + "bbh": 0.5138078814382175, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.30501994680851063, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.4345833333333333, + "hf_avg": 20.095272707991136 + }, + { + "hf_id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", + "name": "NeuralSynthesis-7b-v0.4-slerp", + "params_b": 7.242, + "ifeval": 0.3947259936967247, + "bbh": 0.5142932549151301, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.3042719414893617, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.43324999999999997, + "hf_avg": 19.530512651958592 + }, + { + "hf_id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", + "name": "Qwen-2.5-7b-Spanish-o1-CoT", + "params_b": 7.616, + "ifeval": 0.4210295349672203, + "bbh": 0.5601947823443537, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.4363364361702128, + "hf_math_lvl5": 0.2726586102719033, + "hf_musr": 0.4776770833333333, + "hf_avg": 28.573639430598366 + }, + { + "hf_id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", + "name": "3_PRYMMAL-ECE-7B-SLERP-V1", + "params_b": 1.777, + "ifeval": 0.27422572108671656, + "bbh": 0.422793974567173, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.29247007978723405, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.3841354166666667, + "hf_avg": 14.854317212289837 + }, + { + "hf_id": "LEESM/llama-2-7b-hf-lora-oki100p", + "name": "llama-2-7b-hf-lora-oki100p", + "params_b": 6.738, + "ifeval": 0.25129434345314877, + "bbh": 0.34916752720369776, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.18558843085106383, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.3687291666666666, + "hf_avg": 8.782858998727155 + }, + { + "hf_id": "LEESM/llama-2-7b-hf-lora-oki10p", + "name": "llama-2-7b-hf-lora-oki10p", + "params_b": 6.738, + "ifeval": 0.22701432199896276, + "bbh": 0.3530929513059229, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.16788563829787234, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.34752083333333333, + "hf_avg": 7.168375740980447 + }, + { + "hf_id": "LEESM/llama-3-8b-bnb-4b-kowiki231101", + "name": "llama-3-8b-bnb-4b-kowiki231101", + "params_b": 8.03, + "ifeval": 0.16848739123303944, + "bbh": 0.4130805653617178, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.24251994680851063, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.3551458333333333, + "hf_avg": 9.47249832488312 + }, + { + "hf_id": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", + "name": "llama-3-Korean-Bllossom-8B-trexlab-oki10p", + "params_b": 8.03, + "ifeval": 0.21372513818889433, + "bbh": 0.43430121169320707, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.3176529255319149, + "hf_math_lvl5": 0.04682779456193353, + "hf_musr": 0.38692708333333337, + "hf_avg": 13.509662923847069 + }, + { + "hf_id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", + "name": "EXAONE-3.0-7.8B-Instruct", + "params_b": 7.8, + "ifeval": 0.7192826145737754, + "bbh": 0.4174432647784512, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.35771276595744683, + "hf_math_lvl5": 0.30438066465256797, + "hf_musr": 0.366125, + "hf_avg": 25.733775511042666 + }, + { + "hf_id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", + "name": "EXAONE-3.5-2.4B-Instruct", + "params_b": 2.405, + "ifeval": 0.7950449252428002, + "bbh": 0.4092347113723405, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.32804188829787234, + "hf_math_lvl5": 0.3678247734138973, + "hf_musr": 0.366125, + "hf_avg": 27.143883211239544 + }, + { + "hf_id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", + "name": "EXAONE-3.5-32B-Instruct", + "params_b": 32.003, + "ifeval": 0.8391833668000904, + "bbh": 0.5760913742720142, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.4636801861702128, + "hf_math_lvl5": 0.5128398791540786, + "hf_musr": 0.38066666666666665, + "hf_avg": 37.603165755662836 + }, + { + "hf_id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", + "name": "EXAONE-3.5-7.8B-Instruct", + "params_b": 7.818, + "ifeval": 0.8136045692096969, + "bbh": 0.4727592304359862, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.4133144946808511, + "hf_math_lvl5": 0.47507552870090636, + "hf_musr": 0.3779375, + "hf_avg": 32.547229958748495 + }, + { + "hf_id": "LLM360/K2", + "name": "K2", + "params_b": 65.286, + "ifeval": 0.2252157608478836, + "bbh": 0.4971835676523677, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.30044880319148937, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.39799999999999996, + "hf_avg": 14.643753289939289 + }, + { + "hf_id": "LLM360/K2-Chat", + "name": "K2-Chat", + "params_b": 65.286, + "ifeval": 0.5151763986223221, + "bbh": 0.5358099630242067, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.3371010638297872, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.457, + "hf_avg": 24.387145154830876 + }, + { + "hf_id": "LLM4Binary/llm4decompile-1.3b-v2", + "name": "llm4decompile-1.3b-v2", + "params_b": 1.346, + "ifeval": 0.22678936333373229, + "bbh": 0.3271808417267589, + "gpqa": 0.23573825503355705, + "mmlu_pro": 0.12092752659574468, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.4071770833333333, + "hf_avg": 6.93902486034668 + }, + { + "hf_id": "Langboat/Mengzi3-8B-Chat", + "name": "Mengzi3-8B-Chat", + "params_b": 8.03, + "ifeval": 0.513977357854936, + "bbh": 0.4683725003203179, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.31416223404255317, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.4077916666666667, + "hf_avg": 20.28829250015946 + }, + { + "hf_id": "LenguajeNaturalAI/leniachat-gemma-2b-v0", + "name": "leniachat-gemma-2b-v0", + "params_b": 2.506, + "ifeval": 0.21497404664069114, + "bbh": 0.30740211895412034, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.11702127659574468, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.36590625000000004, + "hf_avg": 5.737240998088876 + }, + { + "hf_id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", + "name": "leniachat-qwen2-1.5B-v0", + "params_b": 1.543, + "ifeval": 0.22211842356059697, + "bbh": 0.36835590195612017, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.18799867021276595, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3749895833333334, + "hf_avg": 8.580803348718375 + }, + { + "hf_id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", + "name": "LCARS_AI_1x4_003_SuperAI", + "params_b": 24.154, + "ifeval": 0.41111251479407973, + "bbh": 0.49198503573704794, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.29720744680851063, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.4506145833333333, + "hf_avg": 19.518229049313643 + }, + { + "hf_id": "LeroyDyer/LCARS_AI_StarTrek_Computer", + "name": "LCARS_AI_StarTrek_Computer", + "params_b": 7.242, + "ifeval": 0.35825609383103496, + "bbh": 0.4446191188748297, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.24584441489361702, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.3950208333333333, + "hf_avg": 14.613893572975764 + }, + { + "hf_id": "LeroyDyer/LCARS_TOP_SCORE", + "name": "LCARS_TOP_SCORE", + "params_b": 7.242, + "ifeval": 0.43706587410293574, + "bbh": 0.5127371051825098, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3031083776595745, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.42928125, + "hf_avg": 20.322005404213282 + }, + { + "hf_id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", + "name": "Mixtral_AI_SwahiliTron_7b", + "params_b": 7.242, + "ifeval": 0.1533996462718919, + "bbh": 0.3055092453201354, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.12076130319148937, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.34203125, + "hf_avg": 4.358661915319342 + }, + { + "hf_id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", + "name": "SpydazWeb_AI_CyberTron_Ultra_7b", + "params_b": 7.242, + "ifeval": 0.15557276914143361, + "bbh": 0.48107736108561827, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.2865691489361702, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.41362499999999996, + "hf_avg": 13.566675630986595 + }, + { + "hf_id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", + "name": "SpydazWeb_AI_HumanAGI_002", + "params_b": 7.242, + "ifeval": 0.40876430094371824, + "bbh": 0.5043871825389313, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.3058510638297872, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.48648958333333336, + "hf_avg": 21.01474000615888 + }, + { + "hf_id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", + "name": "SpydazWeb_AI_HumanAI_RP", + "params_b": 7.242, + "ifeval": 0.2541168543907942, + "bbh": 0.33230179059744286, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.1323969414893617, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3882604166666666, + "hf_avg": 7.78223636896281 + }, + { + "hf_id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", + "name": "_Spydaz_Web_AI_AGI_R1_001", + "params_b": 7.242, + "ifeval": 0.4505046609662362, + "bbh": 0.4609124425176902, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.2734375, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.42559375, + "hf_avg": 18.185695306409894 + }, + { + "hf_id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", + "name": "_Spydaz_Web_AI_AGI_R1_002", + "params_b": 7.242, + "ifeval": 0.5306885729863429, + "bbh": 0.4682582050072746, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.28939494680851063, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.42546875, + "hf_avg": 19.869138876185648 + }, + { + "hf_id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", + "name": "_Spydaz_Web_AI_AGI_R1_OmG_001", + "params_b": 7.242, + "ifeval": 0.5817963004827191, + "bbh": 0.4907982146977475, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.29055851063829785, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.4486041666666667, + "hf_avg": 22.49357945577393 + }, + { + "hf_id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", + "name": "_Spydaz_Web_AI_AGI_R1_OmG_Coder", + "params_b": 7.242, + "ifeval": 0.4923702442851634, + "bbh": 0.46376531085099754, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.28897938829787234, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.5624583333333334, + "hf_avg": 22.644667648469394 + }, + { + "hf_id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", + "name": "_Spydaz_Web_AI_AGI_RP_R1", + "params_b": 7.242, + "ifeval": 0.5426036250482054, + "bbh": 0.4701061648636955, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.28939494680851063, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.42013541666666665, + "hf_avg": 20.102174437145493 + }, + { + "hf_id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", + "name": "_Spydaz_Web_AI_BIBLE_002", + "params_b": 7.242, + "ifeval": 0.21949538336059432, + "bbh": 0.3289070186514165, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.13680186170212766, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.34069791666666666, + "hf_avg": 6.86090172693433 + }, + { + "hf_id": "LightningRodLabs/Flashlight-v1.0", + "name": "Flashlight-v1.0", + "params_b": 14.66, + "ifeval": 0.6745446526327921, + "bbh": 0.6876833310149727, + "gpqa": 0.3422818791946309, + "mmlu_pro": 0.5402260638297872, + "hf_math_lvl5": 0.49697885196374625, + "hf_musr": 0.41009375, + "hf_avg": 40.5748695700393 + }, + { + "hf_id": "LightningRodLabs/Flashlight-v1.1", + "name": "Flashlight-v1.1", + "params_b": 14.66, + "ifeval": 0.6720967034136092, + "bbh": 0.6901141327534415, + "gpqa": 0.33976510067114096, + "mmlu_pro": 0.5415558510638298, + "hf_math_lvl5": 0.5324773413897281, + "hf_musr": 0.4047604166666667, + "hf_avg": 40.98595152297397 + }, + { + "hf_id": "LightningRodLabs/Flashlight-v1.2", + "name": "Flashlight-v1.2", + "params_b": 14.766, + "ifeval": 0.4359920566319587, + "bbh": 0.3264526807518731, + "gpqa": 0.23573825503355705, + "mmlu_pro": 0.24850398936170212, + "hf_math_lvl5": 0.1555891238670695, + "hf_musr": 0.45536458333333335, + "hf_avg": 16.37861284160058 + }, + { + "hf_id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", + "name": "2_PRYMMAL-ECE-2B-SLERP-V1", + "params_b": 2.614, + "ifeval": 0.5823459531820016, + "bbh": 0.4287069505821554, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.2677859042553192, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.43746875, + "hf_avg": 21.160448654695188 + }, + { + "hf_id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", + "name": "2_PRYMMAL-ECE-2B-SLERP-V2", + "params_b": 2.614, + "ifeval": 0.5542693386880144, + "bbh": 0.43764741906109417, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.2744348404255319, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.44816666666666666, + "hf_avg": 21.073953219725933 + }, + { + "hf_id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", + "name": "2_PRYMMAL-ECE-7B-SLERP", + "params_b": 7.616, + "ifeval": 0.5577412376937636, + "bbh": 0.5556642048146725, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.45071476063829785, + "hf_math_lvl5": 0.3632930513595166, + "hf_musr": 0.43960416666666663, + "hf_avg": 31.51530852235753 + }, + { + "hf_id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", + "name": "2_PRYMMAL-ECE-7B-SLERP-V1", + "params_b": 7.616, + "ifeval": 0.10733742026711349, + "bbh": 0.30525797550329686, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.11236702127659574, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.3910833333333333, + "hf_avg": 3.733001542831197 + }, + { + "hf_id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", + "name": "2_PRYMMAL-ECE-7B-SLERP-V2", + "params_b": 7.616, + "ifeval": 0.10733742026711349, + "bbh": 0.30525797550329686, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.11236702127659574, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.3910833333333333, + "hf_avg": 3.733001542831197 + }, + { + "hf_id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", + "name": "PRYMMAL-ECE-1B-SLERP-V1", + "params_b": 1.777, + "ifeval": 0.2874395492847866, + "bbh": 0.41904526564708194, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.2925531914893617, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.39743749999999994, + "hf_avg": 14.88329630475093 + }, + { + "hf_id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", + "name": "PRYMMAL-ECE-7B-SLERP-V8", + "params_b": 7.616, + "ifeval": 0.1258471965495995, + "bbh": 0.2955092966258663, + "gpqa": 0.25, + "mmlu_pro": 0.11278257978723404, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.36314583333333333, + "hf_avg": 3.386229392038714 + }, + { + "hf_id": "LilRg/10PRYMMAL-3B-slerp", + "name": "10PRYMMAL-3B-slerp", + "params_b": 3.821, + "ifeval": 0.1945903535951276, + "bbh": 0.5320377091634505, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3881316489361702, + "hf_math_lvl5": 0.14954682779456194, + "hf_musr": 0.45290625, + "hf_avg": 21.087895754438843 + }, + { + "hf_id": "LilRg/ECE-1B-merge-PRYMMAL", + "name": "ECE-1B-merge-PRYMMAL", + "params_b": 1.777, + "ifeval": 0.27122811916825135, + "bbh": 0.42345600176908743, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.2906416223404255, + "hf_math_lvl5": 0.10120845921450151, + "hf_musr": 0.3801041666666667, + "hf_avg": 14.497652546900952 + }, + { + "hf_id": "LilRg/ECE_Finetunning", + "name": "ECE_Finetunning", + "params_b": 16.061, + "ifeval": 0.04453849120334047, + "bbh": 0.47321596790730514, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3191489361702128, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.38394791666666667, + "hf_avg": 11.987032259556209 + }, + { + "hf_id": "LilRg/PRYMMAL-6B-slerp", + "name": "PRYMMAL-6B-slerp", + "params_b": 3.293, + "ifeval": 0.11533065599276586, + "bbh": 0.28676215692036117, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.1107878989361702, + "hf_musr": 0.36975, + "hf_avg": 3.23270592407536 + }, + { + "hf_id": "LilRg/PRYMMAL-slerp-Merge", + "name": "PRYMMAL-slerp-Merge", + "params_b": 3.821, + "ifeval": 0.304400102838247, + "bbh": 0.5364156271768925, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.3863031914893617, + "hf_math_lvl5": 0.16163141993957703, + "hf_musr": 0.46347916666666666, + "hf_avg": 23.42765486276747 + }, + { + "hf_id": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", + "name": "CodeMind-Llama3-8B-unsloth_v2-merged", + "params_b": 8.03, + "ifeval": 0.6946280314011268, + "bbh": 0.48600920882996324, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.3505651595744681, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.3316145833333333, + "hf_avg": 22.472907319238214 + }, + { + "hf_id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", + "name": "CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", + "params_b": 8.03, + "ifeval": 0.6492406813920397, + "bbh": 0.48526582322240047, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.3353557180851064, + "hf_math_lvl5": 0.06797583081570997, + "hf_musr": 0.3607916666666667, + "hf_avg": 21.711489727026887 + }, + { + "hf_id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", + "name": "CodeMind-Llama3-8B-unsloth_v4-one-merged", + "params_b": 8.03, + "ifeval": 0.32108693821283085, + "bbh": 0.47387586084568856, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.33527260638297873, + "hf_math_lvl5": 0.05513595166163142, + "hf_musr": 0.40692708333333333, + "hf_avg": 17.61324636978146 + }, + { + "hf_id": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", + "name": "CodeMind-Llama3.1-8B-unsloth-merged", + "params_b": 8.03, + "ifeval": 0.6490157227268093, + "bbh": 0.4694777854416285, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.33402593085106386, + "hf_math_lvl5": 0.10876132930513595, + "hf_musr": 0.37523958333333335, + "hf_avg": 22.317695413728828 + }, + { + "hf_id": "Locutusque/Hercules-6.0-Llama-3.1-8B", + "name": "Hercules-6.0-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.6630041622893922, + "bbh": 0.48133037900119535, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.3614527925531915, + "hf_math_lvl5": 0.16691842900302115, + "hf_musr": 0.362125, + "hf_avg": 23.836013153195413 + }, + { + "hf_id": "Locutusque/Hercules-6.1-Llama-3.1-8B", + "name": "Hercules-6.1-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.6006806384836678, + "bbh": 0.46562423765034017, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.36685505319148937, + "hf_math_lvl5": 0.17598187311178248, + "hf_musr": 0.35533333333333333, + "hf_avg": 22.723248561634563 + }, + { + "hf_id": "Locutusque/Llama-3-NeuralHercules-5.0-8B", + "name": "Llama-3-NeuralHercules-5.0-8B", + "params_b": 8.03, + "ifeval": 0.4489310584803876, + "bbh": 0.3940474241916672, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.29330119680851063, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.3880729166666667, + "hf_avg": 16.042475909115613 + }, + { + "hf_id": "Locutusque/TinyMistral-248M-v2.5", + "name": "TinyMistral-248M-v2.5", + "params_b": 0.248, + "ifeval": 0.1336409615376091, + "bbh": 0.30385761123260785, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.11353058510638298, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.37815624999999997, + "hf_avg": 4.035439468453137 + }, + { + "hf_id": "Luni/StarDust-12b-v1", + "name": "StarDust-12b-v1", + "params_b": 12.248, + "ifeval": 0.5459259210007226, + "bbh": 0.5366139363101082, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.34117353723404253, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.43244791666666665, + "hf_avg": 23.447979554409255 + }, + { + "hf_id": "Luni/StarDust-12b-v2", + "name": "StarDust-12b-v2", + "params_b": 12.248, + "ifeval": 0.5628620947973599, + "bbh": 0.5419479534912178, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.3439162234042553, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.4338125, + "hf_avg": 24.215076436734588 + }, + { + "hf_id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v4", + "params_b": 14.766, + "ifeval": 0.6943033373670748, + "bbh": 0.6419880364363972, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5251828457446809, + "hf_math_lvl5": 0.3466767371601209, + "hf_musr": 0.476875, + "hf_avg": 39.28165983782925 + }, + { + "hf_id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v5", + "params_b": 14.766, + "ifeval": 0.7485084021507378, + "bbh": 0.6466679318879384, + "gpqa": 0.3624161073825503, + "mmlu_pro": 0.5140458776595744, + "hf_math_lvl5": 0.43580060422960726, + "hf_musr": 0.4473020833333334, + "hf_avg": 40.96266321754947 + }, + { + "hf_id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8", + "params_b": 14.766, + "ifeval": 0.7874761189200211, + "bbh": 0.6419472828128271, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.5206117021276596, + "hf_math_lvl5": 0.5558912386706949, + "hf_musr": 0.43936458333333334, + "hf_avg": 42.7836905135922 + }, + { + "hf_id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.7", + "params_b": 14.766, + "ifeval": 0.7874761189200211, + "bbh": 0.6482757721443902, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.524185505319149, + "hf_math_lvl5": 0.540785498489426, + "hf_musr": 0.4380625, + "hf_avg": 43.093410973736184 + }, + { + "hf_id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.9", + "params_b": 14.766, + "ifeval": 0.7993413032974729, + "bbh": 0.6483097746745584, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.5199468085106383, + "hf_math_lvl5": 0.5370090634441088, + "hf_musr": 0.43282291666666667, + "hf_avg": 42.523098714975845 + }, + { + "hf_id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", + "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9.2", + "params_b": 14.766, + "ifeval": 0.7862272104682243, + "bbh": 0.6537693501484436, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.5283410904255319, + "hf_math_lvl5": 0.5332326283987915, + "hf_musr": 0.43809375, + "hf_avg": 43.22504860283835 + }, + { + "hf_id": "Lyte/Llama-3.2-3B-Overthinker", + "name": "Llama-3.2-3B-Overthinker", + "params_b": 3.213, + "ifeval": 0.6407975283359264, + "bbh": 0.4320093097952517, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.29853723404255317, + "hf_math_lvl5": 0.15634441087613293, + "hf_musr": 0.34190625, + "hf_avg": 21.167473631942162 + }, + { + "hf_id": "M4-ai/TinyMistral-248M-v3", + "name": "TinyMistral-248M-v3", + "params_b": 0.248, + "ifeval": 0.16386631914431488, + "bbh": 0.2884549938995566, + "gpqa": 0.2407718120805369, + "mmlu_pro": 0.11319813829787234, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.3793333333333333, + "hf_avg": 4.20563631018843 + }, + { + "hf_id": "MLP-KTLim/llama-3-Korean-Bllossom-8B", + "name": "llama-3-Korean-Bllossom-8B", + "params_b": 8.03, + "ifeval": 0.5112800702136997, + "bbh": 0.49004556470187666, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.359375, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.3674583333333334, + "hf_avg": 20.39691634614766 + }, + { + "hf_id": "MTSAIR/Cotype-Nano", + "name": "Cotype-Nano", + "params_b": 1.544, + "ifeval": 0.3747922179816221, + "bbh": 0.3864940969601492, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.24767287234042554, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.3289166666666667, + "hf_avg": 13.812756376126401 + }, + { + "hf_id": "MTSAIR/MultiVerse_70B", + "name": "MultiVerse_70B", + "params_b": 72.289, + "ifeval": 0.5249183278146429, + "bbh": 0.6183134284931178, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.48603723404255317, + "hf_math_lvl5": 0.19259818731117825, + "hf_musr": 0.47398958333333335, + "hf_avg": 32.24436452801163 + }, + { + "hf_id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", + "name": "Llama-3-8B-Magpie-Align-SFT-v0.1", + "params_b": 8.03, + "ifeval": 0.4361416596851908, + "bbh": 0.4615102744527366, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.2863198138297872, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.32773958333333336, + "hf_avg": 15.954087586908154 + }, + { + "hf_id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", + "name": "Llama-3-8B-Magpie-Align-SFT-v0.3", + "params_b": 8.03, + "ifeval": 0.5063586838477463, + "bbh": 0.45715808996720547, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.2902260638297872, + "hf_math_lvl5": 0.07326283987915408, + "hf_musr": 0.34237500000000004, + "hf_avg": 17.55322537025943 + }, + { + "hf_id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", + "name": "Llama-3-8B-Magpie-Align-v0.1", + "params_b": 8.03, + "ifeval": 0.4118117705465941, + "bbh": 0.4811441560714845, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.3006150265957447, + "hf_math_lvl5": 0.033987915407854986, + "hf_musr": 0.3046979166666667, + "hf_avg": 16.473094269110153 + }, + { + "hf_id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", + "name": "Llama-3-8B-Magpie-Align-v0.1", + "params_b": 8.03, + "ifeval": 0.4027192294223771, + "bbh": 0.47894081019705514, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.30011635638297873, + "hf_math_lvl5": 0.04607250755287009, + "hf_musr": 0.3086979166666666, + "hf_avg": 16.484004626097455 + }, + { + "hf_id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", + "name": "Llama-3-8B-Magpie-Align-v0.3", + "params_b": 8.03, + "ifeval": 0.44970566984490046, + "bbh": 0.456960506522001, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.31341422872340424, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.34060416666666665, + "hf_avg": 17.40249479320385 + }, + { + "hf_id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", + "name": "Llama-3.1-8B-Magpie-Align-SFT-v0.1", + "params_b": 8.03, + "ifeval": 0.47820671374176077, + "bbh": 0.4764157817799906, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.29429853723404253, + "hf_math_lvl5": 0.08987915407854985, + "hf_musr": 0.3397395833333334, + "hf_avg": 17.975799245688165 + }, + { + "hf_id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", + "name": "Llama-3.1-8B-Magpie-Align-v0.1", + "params_b": 8.03, + "ifeval": 0.4457838535086903, + "bbh": 0.46223963164680143, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.32621343085106386, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.31406249999999997, + "hf_avg": 17.54685483954604 + }, + { + "hf_id": "Magpie-Align/MagpieLM-8B-Chat-v0.1", + "name": "MagpieLM-8B-Chat-v0.1", + "params_b": 8.03, + "ifeval": 0.3700714105240761, + "bbh": 0.4172338260055306, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.3194813829787234, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.3500625, + "hf_avg": 15.026344312964895 + }, + { + "hf_id": "Magpie-Align/MagpieLM-8B-SFT-v0.1", + "name": "MagpieLM-8B-SFT-v0.1", + "params_b": 8.03, + "ifeval": 0.4720619068515982, + "bbh": 0.45528501595553356, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.2989527925531915, + "hf_math_lvl5": 0.0755287009063444, + "hf_musr": 0.3648854166666667, + "hf_avg": 17.78392950063439 + }, + { + "hf_id": "MagusCorp/grpo_lora_enem_llama3_7b", + "name": "grpo_lora_enem_llama3_7b", + "params_b": 8.03, + "ifeval": 0.4723622211288271, + "bbh": 0.48014581980384746, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.35738031914893614, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.397125, + "hf_avg": 21.634388075131113 + }, + { + "hf_id": "ManoloPueblo/LLM_MERGE_CC2", + "name": "LLM_MERGE_CC2", + "params_b": 7.242, + "ifeval": 0.3853087585384557, + "bbh": 0.5209367401710429, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.30319148936170215, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.45929166666666665, + "hf_avg": 20.747367714260108 + }, + { + "hf_id": "ManoloPueblo/LLM_MERGE_CC3", + "name": "LLM_MERGE_CC3", + "params_b": 7.242, + "ifeval": 0.3958751667797001, + "bbh": 0.5246290546274339, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.3155751329787234, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.4671666666666667, + "hf_avg": 21.67864029208779 + }, + { + "hf_id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", + "name": "MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", + "params_b": 2.614, + "ifeval": 0.25484159807089635, + "bbh": 0.3952730330493959, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.22739361702127658, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.40832291666666665, + "hf_avg": 12.494620122547147 + }, + { + "hf_id": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", + "name": "MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", + "params_b": 24.16, + "ifeval": 0.16973629968483622, + "bbh": 0.3464368053320647, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.13788231382978725, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3990833333333333, + "hf_avg": 6.761759514131008 + }, + { + "hf_id": "Marsouuu/general3B-ECE-PRYMMAL-Martial", + "name": "general3B-ECE-PRYMMAL-Martial", + "params_b": 3.821, + "ifeval": 0.27222658102722996, + "bbh": 0.5394350977017502, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.38763297872340424, + "hf_math_lvl5": 0.15483383685800603, + "hf_musr": 0.4700520833333333, + "hf_avg": 22.978904737174783 + }, + { + "hf_id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", + "name": "general3Bv2-ECE-PRYMMAL-Martial", + "params_b": 7.616, + "ifeval": 0.5692817280371636, + "bbh": 0.5636569831901026, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.4498005319148936, + "hf_math_lvl5": 0.36706948640483383, + "hf_musr": 0.43960416666666663, + "hf_avg": 31.917859287211872 + }, + { + "hf_id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", + "name": "lareneg1_78B-ECE-PRYMMAL-Martial", + "params_b": 1.777, + "ifeval": 0.2794961812435449, + "bbh": 0.42301343044108936, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.2922207446808511, + "hf_math_lvl5": 0.11404833836858005, + "hf_musr": 0.38673958333333336, + "hf_avg": 15.081989145779028 + }, + { + "hf_id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", + "name": "lareneg3B-ECE-PRYMMAL-Martial", + "params_b": 3.821, + "ifeval": 0.33032908239028, + "bbh": 0.5453325807578268, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.37666223404255317, + "hf_math_lvl5": 0.15181268882175228, + "hf_musr": 0.47246875, + "hf_avg": 23.942055269453448 + }, + { + "hf_id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", + "name": "lareneg3Bv2-ECE-PRYMMAL-Martial", + "params_b": 7.616, + "ifeval": 0.5753267995585047, + "bbh": 0.562336014537904, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.45113031914893614, + "hf_math_lvl5": 0.36555891238670696, + "hf_musr": 0.4369375, + "hf_avg": 32.112665840964375 + }, + { + "hf_id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", + "name": "Calme-4x7B-MoE-v0.1", + "params_b": 24.154, + "ifeval": 0.4315205875964663, + "bbh": 0.5102819889174134, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.3056848404255319, + "hf_math_lvl5": 0.08006042296072508, + "hf_musr": 0.4198854166666666, + "hf_avg": 20.023903328526014 + }, + { + "hf_id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", + "name": "Calme-4x7B-MoE-v0.2", + "params_b": 24.154, + "ifeval": 0.429447200095746, + "bbh": 0.5110766802558263, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.30576795212765956, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.43176041666666665, + "hf_avg": 20.176360946073107 + }, + { + "hf_id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", + "name": "Llama-3-70B-Instruct-v0.1", + "params_b": 70.554, + "ifeval": 0.47143800671108216, + "bbh": 0.5366257615951637, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.4617686170212766, + "hf_math_lvl5": 0.18051359516616314, + "hf_musr": 0.4433020833333334, + "hf_avg": 26.33391343739024 + }, + { + "hf_id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", + "name": "Llama-3-8B-Instruct-v0.10", + "params_b": 8.03, + "ifeval": 0.7667433520835827, + "bbh": 0.4924311866686311, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.38622007978723405, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.42143749999999996, + "hf_avg": 26.797403388133414 + }, + { + "hf_id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", + "name": "Llama-3-8B-Instruct-v0.8", + "params_b": 8.03, + "ifeval": 0.7527549125209998, + "bbh": 0.49627836815949883, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3853058510638298, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.42019791666666667, + "hf_avg": 26.888884373724007 + }, + { + "hf_id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", + "name": "Llama-3-8B-Instruct-v0.9", + "params_b": 8.03, + "ifeval": 0.763046494412603, + "bbh": 0.4936132794870085, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3845578457446808, + "hf_math_lvl5": 0.07326283987915408, + "hf_musr": 0.4148020833333333, + "hf_avg": 26.786644392116216 + }, + { + "hf_id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", + "name": "Qwen1.5-MoE-A2.7B-Wikihow", + "params_b": 14.316, + "ifeval": 0.29543278501043896, + "bbh": 0.3920071454890602, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.23803191489361702, + "hf_math_lvl5": 0.0823262839879154, + "hf_musr": 0.35021875, + "hf_avg": 12.325434589632001 + }, + { + "hf_id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", + "name": "Qwen2-7B-Instruct-v0.1", + "params_b": 7.616, + "ifeval": 0.33522498082864577, + "bbh": 0.5123061019250074, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3857214095744681, + "hf_math_lvl5": 0.2212990936555891, + "hf_musr": 0.44347916666666665, + "hf_avg": 22.98150895877193 + }, + { + "hf_id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", + "name": "Qwen2-7B-Instruct-v0.8", + "params_b": 7.616, + "ifeval": 0.27747266142723526, + "bbh": 0.4637108491317945, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.3566323138297872, + "hf_math_lvl5": 0.17673716012084592, + "hf_musr": 0.4293125, + "hf_avg": 19.558137588276004 + }, + { + "hf_id": "MaziyarPanahi/calme-2.1-phi3-4b", + "name": "calme-2.1-phi3-4b", + "params_b": 3.821, + "ifeval": 0.552520645221346, + "bbh": 0.5595320442699866, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.3745844414893617, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.40153124999999995, + "hf_avg": 25.985365252885945 + }, + { + "hf_id": "MaziyarPanahi/calme-2.1-phi3.5-4b", + "name": "calme-2.1-phi3.5-4b", + "params_b": 3.821, + "ifeval": 0.5659095644002359, + "bbh": 0.5483695590203843, + "gpqa": 0.34395973154362414, + "mmlu_pro": 0.3935339095744681, + "hf_math_lvl5": 0.2039274924471299, + "hf_musr": 0.3994583333333333, + "hf_avg": 28.000378443622285 + }, + { + "hf_id": "MaziyarPanahi/calme-2.1-qwen2-72b", + "name": "calme-2.1-qwen2-72b", + "params_b": 72.699, + "ifeval": 0.8162774770941104, + "bbh": 0.6965560971922596, + "gpqa": 0.3808724832214765, + "mmlu_pro": 0.5414727393617021, + "hf_math_lvl5": 0.4078549848942598, + "hf_musr": 0.47321875, + "hf_avg": 44.39894388647036 + }, + { + "hf_id": "MaziyarPanahi/calme-2.1-qwen2-7b", + "name": "calme-2.1-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.3816119008674761, + "bbh": 0.5045925887362795, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3692652925531915, + "hf_math_lvl5": 0.2311178247734139, + "hf_musr": 0.44369791666666664, + "hf_avg": 23.5418799100886 + }, + { + "hf_id": "MaziyarPanahi/calme-2.1-qwen2.5-72b", + "name": "calme-2.1-qwen2.5-72b", + "params_b": 72.7, + "ifeval": 0.8662360315075112, + "bbh": 0.7261624327092416, + "gpqa": 0.36325503355704697, + "mmlu_pro": 0.5619182180851063, + "hf_math_lvl5": 0.5913897280966768, + "hf_musr": 0.42984375, + "hf_avg": 47.85672202584548 + }, + { + "hf_id": "MaziyarPanahi/calme-2.1-rys-78b", + "name": "calme-2.1-rys-78b", + "params_b": 77.965, + "ifeval": 0.8135547015252862, + "bbh": 0.7097861139530462, + "gpqa": 0.39429530201342283, + "mmlu_pro": 0.5443816489361702, + "hf_math_lvl5": 0.3942598187311178, + "hf_musr": 0.4693125, + "hf_avg": 44.64399850290042 + }, + { + "hf_id": "MaziyarPanahi/calme-2.2-llama3-70b", + "name": "calme-2.2-llama3-70b", + "params_b": 70.554, + "ifeval": 0.8208486814984242, + "bbh": 0.6435431762417703, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.5206948138297872, + "hf_math_lvl5": 0.2394259818731118, + "hf_musr": 0.4445729166666667, + "hf_avg": 38.14006369052438 + }, + { + "hf_id": "MaziyarPanahi/calme-2.2-phi3-4b", + "name": "calme-2.2-phi3-4b", + "params_b": 3.821, + "ifeval": 0.5069083365470286, + "bbh": 0.5529604896487258, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3813996010638298, + "hf_math_lvl5": 0.14501510574018128, + "hf_musr": 0.3975625, + "hf_avg": 25.232640533631113 + }, + { + "hf_id": "MaziyarPanahi/calme-2.2-qwen2-72b", + "name": "calme-2.2-qwen2-72b", + "params_b": 72.706, + "ifeval": 0.8008151704145002, + "bbh": 0.6939595229335245, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.543467420212766, + "hf_math_lvl5": 0.45317220543806647, + "hf_musr": 0.4508020833333333, + "hf_avg": 44.09009576064759 + }, + { + "hf_id": "MaziyarPanahi/calme-2.2-qwen2-7b", + "name": "calme-2.2-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.35972996094806226, + "bbh": 0.5214913750127922, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.3898769946808511, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.43582291666666667, + "hf_avg": 23.58331943928233 + }, + { + "hf_id": "MaziyarPanahi/calme-2.2-qwen2.5-72b", + "name": "calme-2.2-qwen2.5-72b", + "params_b": 72.7, + "ifeval": 0.8476763875406145, + "bbh": 0.7276399007138082, + "gpqa": 0.35906040268456374, + "mmlu_pro": 0.561751994680851, + "hf_math_lvl5": 0.5891238670694864, + "hf_musr": 0.4206666666666667, + "hf_avg": 47.22457675508573 + }, + { + "hf_id": "MaziyarPanahi/calme-2.2-rys-78b", + "name": "calme-2.2-rys-78b", + "params_b": 77.965, + "ifeval": 0.7986420475449585, + "bbh": 0.7081014602379213, + "gpqa": 0.40687919463087246, + "mmlu_pro": 0.538563829787234, + "hf_math_lvl5": 0.4070996978851964, + "hf_musr": 0.45356250000000004, + "hf_avg": 44.38633440385336 + }, + { + "hf_id": "MaziyarPanahi/calme-2.3-llama3-70b", + "name": "calme-2.3-llama3-70b", + "params_b": 70.554, + "ifeval": 0.8010401290797307, + "bbh": 0.6399173489368603, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.5204454787234043, + "hf_math_lvl5": 0.2326283987915408, + "hf_musr": 0.42612500000000003, + "hf_avg": 37.067032265745496 + }, + { + "hf_id": "MaziyarPanahi/calme-2.3-phi3-4b", + "name": "calme-2.3-phi3-4b", + "params_b": 3.821, + "ifeval": 0.49264507063480456, + "bbh": 0.5537867816134527, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.3828125, + "hf_math_lvl5": 0.1472809667673716, + "hf_musr": 0.3988333333333333, + "hf_avg": 24.981612770364112 + }, + { + "hf_id": "MaziyarPanahi/calme-2.3-qwen2-72b", + "name": "calme-2.3-qwen2-72b", + "params_b": 72.706, + "ifeval": 0.3849840645044039, + "bbh": 0.6576306700720502, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5418882978723404, + "hf_math_lvl5": 0.31722054380664655, + "hf_musr": 0.4112395833333333, + "hf_avg": 33.00083092091415 + }, + { + "hf_id": "MaziyarPanahi/calme-2.3-qwen2-7b", + "name": "calme-2.3-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.3824862476008103, + "bbh": 0.5064049035932394, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3611203457446808, + "hf_math_lvl5": 0.20694864048338368, + "hf_musr": 0.4422395833333333, + "hf_avg": 23.08170100435127 + }, + { + "hf_id": "MaziyarPanahi/calme-2.3-rys-78b", + "name": "calme-2.3-rys-78b", + "params_b": 77.965, + "ifeval": 0.8065854155862002, + "bbh": 0.7107763314317289, + "gpqa": 0.40436241610738255, + "mmlu_pro": 0.5475398936170213, + "hf_math_lvl5": 0.39803625377643503, + "hf_musr": 0.45492708333333337, + "hf_avg": 44.55737382877077 + }, + { + "hf_id": "MaziyarPanahi/calme-2.4-llama3-70b", + "name": "calme-2.4-llama3-70b", + "params_b": 70.554, + "ifeval": 0.5027371817887649, + "bbh": 0.6418191966839487, + "gpqa": 0.33976510067114096, + "mmlu_pro": 0.5203623670212766, + "hf_math_lvl5": 0.24471299093655588, + "hf_musr": 0.4287916666666667, + "hf_avg": 32.486225248764676 + }, + { + "hf_id": "MaziyarPanahi/calme-2.4-qwen2-7b", + "name": "calme-2.4-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.32995452067181746, + "bbh": 0.5101416326251771, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.3976894946808511, + "hf_math_lvl5": 0.20317220543806647, + "hf_musr": 0.44528125, + "hf_avg": 22.85144077010553 + }, + { + "hf_id": "MaziyarPanahi/calme-2.4-rys-78b", + "name": "calme-2.4-rys-78b", + "params_b": 77.965, + "ifeval": 0.8010899967641414, + "bbh": 0.7279510956242796, + "gpqa": 0.40268456375838924, + "mmlu_pro": 0.7002160904255319, + "hf_math_lvl5": 0.4070996978851964, + "hf_musr": 0.5770624999999999, + "hf_avg": 50.76504719022304 + }, + { + "hf_id": "MaziyarPanahi/calme-2.5-qwen2-7b", + "name": "calme-2.5-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.31449221399220734, + "bbh": 0.4886561146965678, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3681848404255319, + "hf_math_lvl5": 0.2258308157099698, + "hf_musr": 0.45646875, + "hf_avg": 22.659538685051796 + }, + { + "hf_id": "MaziyarPanahi/calme-2.6-qwen2-7b", + "name": "calme-2.6-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.3442676542684522, + "bbh": 0.4930243946403894, + "gpqa": 0.2843959731543625, + "mmlu_pro": 0.3731715425531915, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.4586145833333333, + "hf_avg": 21.23233085446529 + }, + { + "hf_id": "MaziyarPanahi/calme-2.7-qwen2-7b", + "name": "calme-2.7-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.3592301759331906, + "bbh": 0.4883170901309997, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.3705119680851064, + "hf_math_lvl5": 0.13821752265861026, + "hf_musr": 0.48242708333333334, + "hf_avg": 22.355267151114735 + }, + { + "hf_id": "MaziyarPanahi/calme-3.1-baguette-3b", + "name": "calme-3.1-baguette-3b", + "params_b": 3.085, + "ifeval": 0.6234369251364158, + "bbh": 0.46833341042911075, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.33992686170212766, + "hf_math_lvl5": 0.25604229607250756, + "hf_musr": 0.40079166666666666, + "hf_avg": 25.581601911367752 + }, + { + "hf_id": "MaziyarPanahi/calme-3.1-instruct-3b", + "name": "calme-3.1-instruct-3b", + "params_b": 3.085, + "ifeval": 0.43359397509718656, + "bbh": 0.4812730148043098, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.355718085106383, + "hf_math_lvl5": 0.17749244712990936, + "hf_musr": 0.39520833333333333, + "hf_avg": 21.507090636089913 + }, + { + "hf_id": "MaziyarPanahi/calme-3.1-instruct-78b", + "name": "calme-3.1-instruct-78b", + "params_b": 77.965, + "ifeval": 0.8135547015252862, + "bbh": 0.7305154498840408, + "gpqa": 0.3959731543624161, + "mmlu_pro": 0.718500664893617, + "hf_math_lvl5": 0.39274924471299094, + "hf_musr": 0.5890624999999999, + "hf_avg": 51.28748997363473 + }, + { + "hf_id": "MaziyarPanahi/calme-3.1-llamaloi-3b", + "name": "calme-3.1-llamaloi-3b", + "params_b": 3.213, + "ifeval": 0.7375175645066203, + "bbh": 0.4587340004998879, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.3204787234042553, + "hf_math_lvl5": 0.1729607250755287, + "hf_musr": 0.35152083333333334, + "hf_avg": 24.0933506270088 + }, + { + "hf_id": "MaziyarPanahi/calme-3.2-baguette-3b", + "name": "calme-3.2-baguette-3b", + "params_b": 3.085, + "ifeval": 0.6338282423968404, + "bbh": 0.470862269902714, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3337765957446808, + "hf_math_lvl5": 0.2824773413897281, + "hf_musr": 0.40209374999999997, + "hf_avg": 26.332491008465908 + }, + { + "hf_id": "MaziyarPanahi/calme-3.2-instruct-3b", + "name": "calme-3.2-instruct-3b", + "params_b": 3.086, + "ifeval": 0.5533196363426819, + "bbh": 0.4865641110376735, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.36527593085106386, + "hf_math_lvl5": 0.21676737160120846, + "hf_musr": 0.40469791666666666, + "hf_avg": 24.62035249644337 + }, + { + "hf_id": "MaziyarPanahi/calme-3.2-instruct-78b", + "name": "calme-3.2-instruct-78b", + "params_b": 77.965, + "ifeval": 0.8062607215521482, + "bbh": 0.7318616272092674, + "gpqa": 0.40268456375838924, + "mmlu_pro": 0.7303025265957447, + "hf_math_lvl5": 0.4033232628398791, + "hf_musr": 0.6023645833333333, + "hf_avg": 52.08138397879168 + }, + { + "hf_id": "MaziyarPanahi/calme-3.3-baguette-3b", + "name": "calme-3.3-baguette-3b", + "params_b": 3.086, + "ifeval": 0.6359514975819713, + "bbh": 0.4678217295957521, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.3341921542553192, + "hf_math_lvl5": 0.3806646525679758, + "hf_musr": 0.39282291666666663, + "hf_avg": 27.407100524692495 + }, + { + "hf_id": "MaziyarPanahi/calme-3.3-instruct-3b", + "name": "calme-3.3-instruct-3b", + "params_b": 3.086, + "ifeval": 0.6423212631373645, + "bbh": 0.46933409427688694, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.33053523936170215, + "hf_math_lvl5": 0.37386706948640486, + "hf_musr": 0.40742708333333333, + "hf_avg": 27.77891097790277 + }, + { + "hf_id": "Minami-su/Amara-o1-7B-Qwen", + "name": "Amara-o1-7B-Qwen", + "params_b": 7.616, + "ifeval": 0.7389914316236474, + "bbh": 0.5199420077880453, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.4083277925531915, + "hf_math_lvl5": 0.5181268882175226, + "hf_musr": 0.40066666666666667, + "hf_avg": 34.488976713679584 + }, + { + "hf_id": "Minami-su/Amara-o2-7B-Qwen", + "name": "Amara-o2-7B-Qwen", + "params_b": 7.616, + "ifeval": 0.7146615424850509, + "bbh": 0.5173432604435285, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.41647273936170215, + "hf_math_lvl5": 0.4086102719033233, + "hf_musr": 0.37809374999999995, + "hf_avg": 31.034506834602485 + }, + { + "hf_id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", + "name": "Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", + "params_b": 5.453, + "ifeval": 0.5268919799465418, + "bbh": 0.3252726665015006, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.17644614361702127, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.3249166666666667, + "hf_avg": 12.430004377968435 + }, + { + "hf_id": "ModelSpace/GemmaX2-28-9B-v0.1", + "name": "GemmaX2-28-9B-v0.1", + "params_b": 10.159, + "ifeval": 0.003921816336210145, + "bbh": 0.3687226427280163, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.2230718085106383, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.35365625, + "hf_avg": 5.991108905848541 + }, + { + "hf_id": "MoonRide/Llama-3.2-3B-Khelavaster", + "name": "Llama-3.2-3B-Khelavaster", + "params_b": 3.607, + "ifeval": 0.4924954675815725, + "bbh": 0.45156712929620335, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.31216755319148937, + "hf_math_lvl5": 0.16163141993957703, + "hf_musr": 0.36990625000000005, + "hf_avg": 20.144904682630184 + }, + { + "hf_id": "MultivexAI/Gladiator-Mini-Exp-1211-3B", + "name": "Gladiator-Mini-Exp-1211-3B", + "params_b": 3.213, + "ifeval": 0.68760887777763, + "bbh": 0.44843752663028075, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.3151595744680851, + "hf_math_lvl5": 0.13746223564954682, + "hf_musr": 0.326, + "hf_avg": 22.27221013046616 + }, + { + "hf_id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", + "name": "Gladiator-Mini-Exp-1221-3B-Instruct", + "params_b": 3.213, + "ifeval": 0.6078748830879843, + "bbh": 0.4369766992416903, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.3048537234042553, + "hf_math_lvl5": 0.1351963746223565, + "hf_musr": 0.31145833333333334, + "hf_avg": 20.114352261478917 + }, + { + "hf_id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", + "name": "Gladiator-Mini-Exp-1221-3B-Instruct-V2", + "params_b": 3.213, + "ifeval": 0.6215386286165153, + "bbh": 0.438883390990549, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.3025265957446808, + "hf_math_lvl5": 0.14123867069486404, + "hf_musr": 0.30082291666666666, + "hf_avg": 20.415195595809834 + }, + { + "hf_id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", + "name": "Gladiator-Mini-Exp-1222-3B-Instruct", + "params_b": 3.213, + "ifeval": 0.6163180361440976, + "bbh": 0.4373182371021645, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.30169547872340424, + "hf_math_lvl5": 0.14123867069486404, + "hf_musr": 0.31276041666666665, + "hf_avg": 20.353088592931517 + }, + { + "hf_id": "Mxode/NanoLM-0.3B-Instruct-v1", + "name": "NanoLM-0.3B-Instruct-v1", + "params_b": 0.315, + "ifeval": 0.1536744726215331, + "bbh": 0.30282462164767127, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.11053856382978723, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.41552083333333334, + "hf_avg": 5.737739264323998 + }, + { + "hf_id": "Mxode/NanoLM-0.3B-Instruct-v1.1", + "name": "NanoLM-0.3B-Instruct-v1.1", + "params_b": 0.315, + "ifeval": 0.17827918810977095, + "bbh": 0.3014403673764691, + "gpqa": 0.25, + "mmlu_pro": 0.11211768617021277, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.42733333333333334, + "hf_avg": 5.974298608437977 + }, + { + "hf_id": "Mxode/NanoLM-0.3B-Instruct-v2", + "name": "NanoLM-0.3B-Instruct-v2", + "params_b": 0.315, + "ifeval": 0.1667885654507817, + "bbh": 0.29211039456850646, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.11344747340425532, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.3954583333333333, + "hf_avg": 5.01367055199107 + }, + { + "hf_id": "Mxode/NanoLM-1B-Instruct-v1.1", + "name": "NanoLM-1B-Instruct-v1.1", + "params_b": 1.076, + "ifeval": 0.23952889444451833, + "bbh": 0.31835012059590373, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.12150930851063829, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.34327083333333336, + "hf_avg": 6.756723242219516 + }, + { + "hf_id": "Mxode/NanoLM-1B-Instruct-v2", + "name": "NanoLM-1B-Instruct-v2", + "params_b": 1.076, + "ifeval": 0.2629844368497808, + "bbh": 0.3123145400715591, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.12375332446808511, + "hf_math_lvl5": 0.04154078549848943, + "hf_musr": 0.35520833333333335, + "hf_avg": 7.35441459615685 + }, + { + "hf_id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", + "name": "naps-gemma-2-27b-v-0.1.0", + "params_b": 27.227, + "bbh": 0.2911778102988436, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11677194148936171, + "hf_musr": 0.35753125, + "hf_avg": 1.6796019124036488 + }, + { + "hf_id": "NAPS-ai/naps-gemma-2-27b-v0.1.0", + "name": "naps-gemma-2-27b-v0.1.0", + "params_b": 27.227, + "bbh": 0.2911778102988436, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11677194148936171, + "hf_musr": 0.35753125, + "hf_avg": 1.6796019124036488 + }, + { + "hf_id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", + "name": "naps-llama-3_1-8b-instruct-v0.3", + "params_b": 8.03, + "ifeval": 0.5390818583580456, + "bbh": 0.4900525115527062, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.33984375, + "hf_math_lvl5": 0.1903323262839879, + "hf_musr": 0.37870833333333337, + "hf_avg": 23.27833752065767 + }, + { + "hf_id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", + "name": "naps-llama-3_1-8b-instruct-v0.4", + "params_b": 8.03, + "ifeval": 0.7344202272193336, + "bbh": 0.4861833360906734, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3474900265957447, + "hf_math_lvl5": 0.19637462235649547, + "hf_musr": 0.4421145833333333, + "hf_avg": 27.71508453774743 + }, + { + "hf_id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", + "name": "naps-llama-3_1-instruct-v0.5.0", + "params_b": 8.03, + "ifeval": 0.5020124381086628, + "bbh": 0.4147584365689691, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.26138630319148937, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.37127083333333327, + "hf_avg": 16.000822862572466 + }, + { + "hf_id": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", + "name": "naps-llama-3_1_instruct-v0.6.0", + "params_b": 8.03, + "ifeval": 0.3280063564675062, + "bbh": 0.45284530156109354, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.3240525265957447, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.37390624999999994, + "hf_avg": 15.779501356421212 + }, + { + "hf_id": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", + "name": "naps-llama3.1-70B-v0.2-fp16", + "params_b": 70.761, + "ifeval": 0.1844993506119319, + "bbh": 0.3040736853180832, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.10987367021276596, + "hf_musr": 0.34860416666666666, + "hf_avg": 4.215465224433852 + }, + { + "hf_id": "NCSOFT/Llama-VARCO-8B-Instruct", + "name": "Llama-VARCO-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.4470327619604871, + "bbh": 0.5022879316026018, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.31898271276595747, + "hf_math_lvl5": 0.10649546827794562, + "hf_musr": 0.3840729166666666, + "hf_avg": 20.983509272665156 + }, + { + "hf_id": "NLPark/AnFeng_v3.1-Avocet", + "name": "AnFeng_v3.1-Avocet", + "params_b": 34.393, + "ifeval": 0.5096311121158525, + "bbh": 0.582852329074409, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.44381648936170215, + "hf_math_lvl5": 0.1593655589123867, + "hf_musr": 0.44757291666666665, + "hf_avg": 28.39095634882293 + }, + { + "hf_id": "NLPark/B-and-W_Flycatcher-3AD1E", + "name": "B-and-W_Flycatcher-3AD1E", + "params_b": 14.77, + "ifeval": 0.49084650948372543, + "bbh": 0.6065117528534355, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.4740691489361702, + "hf_math_lvl5": 0.23791540785498488, + "hf_musr": 0.44227083333333334, + "hf_avg": 30.467332952617294 + }, + { + "hf_id": "NLPark/Shi-Ci-Robin-Test_3AD80", + "name": "Shi-Ci-Robin-Test_3AD80", + "params_b": 70.554, + "ifeval": 0.7226547782107031, + "bbh": 0.6704805157570325, + "gpqa": 0.3598993288590604, + "mmlu_pro": 0.5120511968085106, + "hf_math_lvl5": 0.3157099697885196, + "hf_musr": 0.46959375000000003, + "hf_avg": 39.234122021270416 + }, + { + "hf_id": "NTQAI/NxMobileLM-1.5B-SFT", + "name": "NxMobileLM-1.5B-SFT", + "params_b": 1.544, + "ifeval": 0.6392239258500778, + "bbh": 0.39571778048116, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.28174867021276595, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.35552083333333334, + "hf_avg": 18.734829287552035 + }, + { + "hf_id": "NTQAI/Nxcode-CQ-7B-orpo", + "name": "Nxcode-CQ-7B-orpo", + "params_b": 7.25, + "ifeval": 0.40072119753365515, + "bbh": 0.4143023249178217, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.16115359042553193, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.39396875, + "hf_avg": 12.373779699446112 + }, + { + "hf_id": "NYTK/PULI-GPTrio", + "name": "PULI-GPTrio", + "params_b": 7.673, + "ifeval": 0.21797164855915638, + "bbh": 0.30600290906237543, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.11369680851063829, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.38187499999999996, + "hf_avg": 5.833727911056492 + }, + { + "hf_id": "NYTK/PULI-LlumiX-32K", + "name": "PULI-LlumiX-32K", + "params_b": 6.738, + "ifeval": 0.1699612583500667, + "bbh": 0.31893582242949375, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.16805186170212766, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.39641666666666664, + "hf_avg": 6.519109356715034 + }, + { + "hf_id": "NbAiLab/nb-llama-3.1-8B-sft", + "name": "nb-llama-3.1-8B-sft", + "params_b": 8.03, + "ifeval": 0.36157838978355206, + "bbh": 0.3281509048328078, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.12217420212765957, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.3287291666666667, + "hf_avg": 8.180260967339196 + }, + { + "hf_id": "Nekochu/Llama-3.1-8B-German-ORPO", + "name": "Llama-3.1-8B-German-ORPO", + "params_b": 8.03, + "ifeval": 0.4610710692074806, + "bbh": 0.4982577044334462, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.33934507978723405, + "hf_math_lvl5": 0.11706948640483383, + "hf_musr": 0.46475, + "hf_avg": 23.254052215833834 + }, + { + "hf_id": "Nekochu/Llama-3.1-8B-french-DPO", + "name": "Llama-3.1-8B-french-DPO", + "params_b": 8.03, + "ifeval": 0.46564227361179444, + "bbh": 0.5110888403999433, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.3414228723404255, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.4215625, + "hf_avg": 21.701292312420733 + }, + { + "hf_id": "Nekochu/Luminia-13B-v3", + "name": "Luminia-13B-v3", + "params_b": 13.016, + "ifeval": 0.25231829323971505, + "bbh": 0.41121515510929624, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.22149268617021275, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.3983333333333334, + "hf_avg": 11.635076640566737 + }, + { + "hf_id": "Nekochu/Luminia-8B-RP", + "name": "Luminia-8B-RP", + "params_b": 8.03, + "ifeval": 0.5574165436597118, + "bbh": 0.5218151030627874, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3631150265957447, + "hf_math_lvl5": 0.13595166163141995, + "hf_musr": 0.3997604166666666, + "hf_avg": 24.61809279716832 + }, + { + "hf_id": "NeverSleep/Lumimaid-v0.2-12B", + "name": "Lumimaid-v0.2-12B", + "params_b": 12.248, + "ifeval": 0.10993497253952846, + "bbh": 0.5395610525850818, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.3511469414893617, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.48211458333333335, + "hf_avg": 18.147314436553692 + }, + { + "hf_id": "NeverSleep/Lumimaid-v0.2-8B", + "name": "Lumimaid-v0.2-8B", + "params_b": 8.03, + "ifeval": 0.5038109992597419, + "bbh": 0.5237767601226618, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.36361369680851063, + "hf_math_lvl5": 0.14350453172205438, + "hf_musr": 0.4303020833333333, + "hf_avg": 24.41199658071584 + }, + { + "hf_id": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", + "name": "Dolphin3.0-Llama3.1-1B-abliterated", + "params_b": 1.236, + "ifeval": 0.5311883580012146, + "bbh": 0.3240787338568713, + "gpqa": 0.2407718120805369, + "mmlu_pro": 0.1373005319148936, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.32367708333333334, + "hf_avg": 11.378391787254499 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", + "name": "Llama_3.1_8b_DeepDive_3_Prev_v1.0", + "params_b": 8.03, + "ifeval": 0.6809144181881852, + "bbh": 0.5155095936229447, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.34375, + "hf_math_lvl5": 0.1865558912386707, + "hf_musr": 0.3665833333333333, + "hf_avg": 26.648390871227136 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", + "name": "Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", + "params_b": 8.03, + "ifeval": 0.7100903380807368, + "bbh": 0.51203649030939, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.34408244680851063, + "hf_math_lvl5": 0.19259818731117825, + "hf_musr": 0.37576041666666665, + "hf_avg": 27.472783833453928 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", + "name": "Llama_3.1_8b_DobHerWild_R1_v1.1R", + "params_b": 8.03, + "ifeval": 0.759999024809727, + "bbh": 0.525696414662245, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.36884973404255317, + "hf_math_lvl5": 0.23187311178247735, + "hf_musr": 0.38521875, + "hf_avg": 29.728397716509164 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", + "name": "Llama_3.1_8b_DoberWild_v2.01", + "params_b": 8.031, + "ifeval": 0.7995662619627034, + "bbh": 0.5250767747736031, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3790724734042553, + "hf_math_lvl5": 0.2001510574018127, + "hf_musr": 0.4011875, + "hf_avg": 30.16438239831213 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02", + "name": "Llama_3.1_8b_DoberWild_v2.02", + "params_b": 8.03, + "ifeval": 0.7746368524404137, + "bbh": 0.531273698652086, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3764128989361702, + "hf_math_lvl5": 0.19939577039274925, + "hf_musr": 0.39458333333333334, + "hf_avg": 29.47563015963895 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", + "name": "Llama_3.1_8b_DoberWild_v2.03", + "params_b": 8.03, + "ifeval": 0.7764354135914928, + "bbh": 0.5294434267893284, + "gpqa": 0.3045302013422819, + "mmlu_pro": 0.37217420212765956, + "hf_math_lvl5": 0.20770392749244712, + "hf_musr": 0.39058333333333334, + "hf_avg": 29.82476968118114 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", + "name": "Llama_3.1_8b_DodoWild_v2.01", + "params_b": 8.031, + "ifeval": 0.7977677008116243, + "bbh": 0.5252760762748857, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3738364361702128, + "hf_math_lvl5": 0.1986404833836858, + "hf_musr": 0.40896874999999994, + "hf_avg": 30.309648600363573 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", + "name": "Llama_3.1_8b_DodoWild_v2.02", + "params_b": 8.03, + "ifeval": 0.8016895171478344, + "bbh": 0.5261737638679802, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.37608045212765956, + "hf_math_lvl5": 0.22734138972809667, + "hf_musr": 0.39706249999999993, + "hf_avg": 30.733559169624357 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", + "name": "Llama_3.1_8b_DodoWild_v2.03", + "params_b": 8.03, + "ifeval": 0.7941207108250552, + "bbh": 0.530825004382936, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.37857380319148937, + "hf_math_lvl5": 0.22205438066465258, + "hf_musr": 0.3958541666666667, + "hf_avg": 30.60438973869286 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", + "name": "Llama_3.1_8b_DodoWild_v2.10", + "params_b": 8.03, + "ifeval": 0.8053863748188141, + "bbh": 0.5278362703806528, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.3854720744680851, + "hf_math_lvl5": 0.1971299093655589, + "hf_musr": 0.41566666666666663, + "hf_avg": 30.406547342023725 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", + "name": "Llama_3.1_8b_Dolermed_R1_V1.01", + "params_b": 8.03, + "ifeval": 0.7533544329046928, + "bbh": 0.5312389177563648, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3732546542553192, + "hf_math_lvl5": 0.20166163141993956, + "hf_musr": 0.37470833333333337, + "hf_avg": 29.312043044222406 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", + "name": "Llama_3.1_8b_Dolermed_R1_V1.03", + "params_b": 8.03, + "ifeval": 0.7564019025075688, + "bbh": 0.5316448098766001, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.37200797872340424, + "hf_math_lvl5": 0.20921450151057402, + "hf_musr": 0.3800416666666666, + "hf_avg": 29.78932292513281 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", + "name": "Llama_3.1_8b_Dolermed_V1.01", + "params_b": 8.031, + "ifeval": 0.508657030013697, + "bbh": 0.5193615033347353, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3570478723404255, + "hf_math_lvl5": 0.13444108761329304, + "hf_musr": 0.39448958333333334, + "hf_avg": 23.45268314376055 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", + "name": "Llama_3.1_8b_Dolerstormed_V1.04", + "params_b": 8.03, + "ifeval": 0.7889001183526376, + "bbh": 0.5195180641442355, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.3888796542553192, + "hf_math_lvl5": 0.19259818731117825, + "hf_musr": 0.4029583333333333, + "hf_avg": 30.113033386800094 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", + "name": "Llama_3.1_8b_Hermedash_R1_V1.04", + "params_b": 8.03, + "ifeval": 0.7871514248859692, + "bbh": 0.5191641616026265, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.38821476063829785, + "hf_math_lvl5": 0.1865558912386707, + "hf_musr": 0.4110520833333333, + "hf_avg": 30.277769930189056 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", + "name": "Llama_3.1_8b_Hermedive_R1_V1.01", + "params_b": 8.03, + "ifeval": 0.5001141415887622, + "bbh": 0.5170855986734039, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.34266954787234044, + "hf_math_lvl5": 0.17749244712990936, + "hf_musr": 0.40084374999999994, + "hf_avg": 23.809186985379387 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", + "name": "Llama_3.1_8b_Hermedive_R1_V1.03", + "params_b": 8.03, + "ifeval": 0.6647528557560606, + "bbh": 0.5140787918844759, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3488198138297872, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.3613125, + "hf_avg": 26.50163295870381 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", + "name": "Llama_3.1_8b_Hermedive_V1.01", + "params_b": 8.031, + "ifeval": 0.5061592131101034, + "bbh": 0.4918197968512548, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3550531914893617, + "hf_math_lvl5": 0.1646525679758308, + "hf_musr": 0.36965624999999996, + "hf_avg": 22.78885888839048 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", + "name": "Llama_3.1_8b_Mediver_V1.01", + "params_b": 8.031, + "ifeval": 0.18847103463255274, + "bbh": 0.44148325896745977, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.2993683510638298, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.38978124999999997, + "hf_avg": 11.984272459370358 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", + "name": "Llama_3.1_8b_Medusa_v1.01", + "params_b": 8.031, + "ifeval": 0.7685419132346618, + "bbh": 0.5017727187674992, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.3531416223404255, + "hf_math_lvl5": 0.14652567975830816, + "hf_musr": 0.40667708333333336, + "hf_avg": 27.38168287785029 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", + "name": "Llama_3.1_8b_Smarteaz_0.2_R1", + "params_b": 8.03, + "ifeval": 0.6345529860769425, + "bbh": 0.5112504828088763, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.3645279255319149, + "hf_math_lvl5": 0.26057401812688824, + "hf_musr": 0.4188020833333333, + "hf_avg": 28.105107642426063 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", + "name": "Llama_3.1_8b_Smarteaz_V1.01", + "params_b": 8.03, + "ifeval": 0.8151283040111349, + "bbh": 0.5241273021389002, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.3735871010638298, + "hf_math_lvl5": 0.23413897280966767, + "hf_musr": 0.37892708333333336, + "hf_avg": 30.623634123187415 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", + "name": "Llama_3.1_8b_Stormeder_v1.04", + "params_b": 8.03, + "ifeval": 0.7852531283660686, + "bbh": 0.5207086605445487, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.38522273936170215, + "hf_math_lvl5": 0.18504531722054382, + "hf_musr": 0.3948958333333334, + "hf_avg": 29.709951883012703 + }, + { + "hf_id": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", + "name": "Llama_3.1_8b_Typhoon_v1.03", + "params_b": 8.03, + "ifeval": 0.8078343240379969, + "bbh": 0.5313965802672672, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3842253989361702, + "hf_math_lvl5": 0.22734138972809667, + "hf_musr": 0.38146875, + "hf_avg": 30.634801511067575 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", + "name": "Llama_3.2_1b_AquaSyn_0.1", + "params_b": 1.498, + "ifeval": 0.2741004977903075, + "bbh": 0.3284363786988483, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.1377992021276596, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.34603125, + "hf_avg": 6.988906174606645 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_Dolto_0.1", + "name": "Llama_3.2_1b_Dolto_0.1", + "params_b": 1.498, + "ifeval": 0.5433782364127182, + "bbh": 0.3350056502150862, + "gpqa": 0.23741610738255034, + "mmlu_pro": 0.13638630319148937, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.342125, + "hf_avg": 11.865611272101537 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_Odyssea_V1", + "name": "Llama_3.2_1b_Odyssea_V1", + "params_b": 1.498, + "ifeval": 0.2552660274737696, + "bbh": 0.3009715832098017, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11527593085106383, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.33936458333333336, + "hf_avg": 5.724136311067048 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", + "name": "Llama_3.2_1b_OpenTree_R1_0.1", + "params_b": 1.498, + "ifeval": 0.5366339091388627, + "bbh": 0.3279521771600605, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.16747007978723405, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.31307291666666665, + "hf_avg": 12.343255865705624 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", + "name": "Llama_3.2_1b_OrcaSun_V1", + "params_b": 1.498, + "ifeval": 0.5948605256275571, + "bbh": 0.355031362479927, + "gpqa": 0.23657718120805368, + "mmlu_pro": 0.19040890957446807, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.33803125, + "hf_avg": 14.801537525133925 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", + "name": "Llama_3.2_1b_RandomLego_RP_R1_0.1", + "params_b": 1.498, + "ifeval": 0.5542693386880144, + "bbh": 0.34277067367168224, + "gpqa": 0.25, + "mmlu_pro": 0.15633311170212766, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.3249166666666667, + "hf_avg": 12.811633255227909 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_SunOrca_V1", + "name": "Llama_3.2_1b_SunOrca_V1", + "params_b": 1.498, + "ifeval": 0.542953807009845, + "bbh": 0.34306447662530104, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.18841422872340424, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.32625, + "hf_avg": 14.008724276561884 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", + "name": "Llama_3.2_1b_Sydonia_0.1", + "params_b": 1.498, + "ifeval": 0.21967047434141412, + "bbh": 0.31210928710549807, + "gpqa": 0.22818791946308725, + "mmlu_pro": 0.12242353723404255, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.33818750000000003, + "hf_avg": 5.524505982897409 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", + "name": "Llama_3.2_1b_Syneridol_0.2", + "params_b": 1.498, + "ifeval": 0.21574865800520399, + "bbh": 0.3138849872298115, + "gpqa": 0.2348993288590604, + "mmlu_pro": 0.12267287234042554, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.33428125000000003, + "hf_avg": 5.41209307631663 + }, + { + "hf_id": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", + "name": "Llama_3.2_1b_Synopsys_0.1", + "params_b": 1.498, + "ifeval": 0.17638089158987041, + "bbh": 0.31619439082949846, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.12308843085106383, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.34609375000000003, + "hf_avg": 4.959888823517752 + }, + { + "hf_id": "Nexesenex/Llama_3.2_3b_Kermes_v1", + "name": "Llama_3.2_3b_Kermes_v1", + "params_b": 3.213, + "ifeval": 0.4851759996808468, + "bbh": 0.4409910297279671, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.2547373670212766, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.40702083333333333, + "hf_avg": 17.069896218578325 + }, + { + "hf_id": "Nexesenex/Llama_3.2_3b_Kermes_v2", + "name": "Llama_3.2_3b_Kermes_v2", + "params_b": 3.213, + "ifeval": 0.5753766672429155, + "bbh": 0.44554539692939316, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.2734375, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.37781249999999994, + "hf_avg": 18.49497166284455 + }, + { + "hf_id": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", + "name": "Llama_3.2_3b_Kermes_v2.1", + "params_b": 3.213, + "ifeval": 0.5583906257618674, + "bbh": 0.44638999626044323, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.26919880319148937, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.3963541666666666, + "hf_avg": 18.90711773153537 + }, + { + "hf_id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", + "name": "pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", + "params_b": 1.236, + "ifeval": 0.5889905450870357, + "bbh": 0.3562492190965966, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.1802692819148936, + "hf_math_lvl5": 0.07477341389728095, + "hf_musr": 0.33955208333333337, + "hf_avg": 15.011891964697455 + }, + { + "hf_id": "Nexusflow/NexusRaven-V2-13B", + "name": "NexusRaven-V2-13B", + "params_b": 13, + "ifeval": 0.1790781792311068, + "bbh": 0.39488604640507335, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.18716755319148937, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.3736875, + "hf_avg": 8.488064786804268 + }, + { + "hf_id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", + "name": "AceMath-1.5B-Instruct-dolphin-r1-200", + "params_b": 0.928, + "ifeval": 0.18080249294095221, + "bbh": 0.28148007801214714, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.11427859042553191, + "hf_musr": 0.37495833333333334, + "hf_avg": 4.389863637734279 + }, + { + "hf_id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", + "name": "DeepSeek-R1-Distill-Qwen-1.5B-500", + "params_b": 1.157, + "ifeval": 0.17485715678843247, + "bbh": 0.2601595454586609, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.1124501329787234, + "hf_musr": 0.33796875, + "hf_avg": 3.5788564701687924 + }, + { + "hf_id": "NikolaSigmoid/acemath-200", + "name": "acemath-200", + "params_b": 1.791, + "ifeval": 0.2848918646967823, + "bbh": 0.426284784119477, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.23761635638297873, + "hf_math_lvl5": 0.30513595166163143, + "hf_musr": 0.39251041666666664, + "hf_avg": 17.507477680245945 + }, + { + "hf_id": "NikolaSigmoid/phi-4-14b", + "name": "phi-4-14b", + "params_b": 14.704, + "ifeval": 0.05607898154674043, + "bbh": 0.669500080799667, + "gpqa": 0.4035234899328859, + "mmlu_pro": 0.527842420212766, + "hf_math_lvl5": 0.2938066465256798, + "hf_musr": 0.5046875000000001, + "hf_avg": 29.913839706837063 + }, + { + "hf_id": "NikolaSigmoid/phi-4-1steps", + "name": "phi-4-1steps", + "params_b": 14.704, + "ifeval": 0.05275668559422333, + "bbh": 0.6707359457278651, + "gpqa": 0.40184563758389263, + "mmlu_pro": 0.52734375, + "hf_math_lvl5": 0.2983383685800604, + "hf_musr": 0.5020520833333334, + "hf_avg": 29.87038792129772 + }, + { + "hf_id": "NikolaSigmoid/phi-4-300steps", + "name": "phi-4-300steps", + "params_b": 14.704, + "ifeval": 0.05607898154674043, + "bbh": 0.6701123802649077, + "gpqa": 0.4052013422818792, + "mmlu_pro": 0.5287566489361702, + "hf_math_lvl5": 0.2945619335347432, + "hf_musr": 0.5033541666666667, + "hf_avg": 29.960260183698754 + }, + { + "hf_id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", + "name": "Captain-Eris-BMO_Violent-GRPO-v0.420", + "params_b": 12.248, + "ifeval": 0.6312805578088361, + "bbh": 0.5078530730075063, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.359624335106383, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.4228020833333333, + "hf_avg": 25.923807450212067 + }, + { + "hf_id": "Nitral-AI/Captain-Eris_BMO-Violent-12B", + "name": "Captain-Eris_BMO-Violent-12B", + "params_b": 12.248, + "ifeval": 0.615218730745533, + "bbh": 0.5104372825851065, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.35713098404255317, + "hf_math_lvl5": 0.13670694864048338, + "hf_musr": 0.42553124999999997, + "hf_avg": 25.922969151836924 + }, + { + "hf_id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", + "name": "Captain-Eris_Violet-GRPO-v0.420", + "params_b": 12.248, + "ifeval": 0.6261597007052399, + "bbh": 0.515921407165298, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.35347406914893614, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.42791666666666667, + "hf_avg": 25.272146530177896 + }, + { + "hf_id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", + "name": "Captain-Eris_Violet-V0.420-12B", + "params_b": 12.248, + "ifeval": 0.43391866913123844, + "bbh": 0.5478099417611365, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.3722573138297872, + "hf_math_lvl5": 0.10725075528700906, + "hf_musr": 0.43306249999999996, + "hf_avg": 23.626620517485538 + }, + { + "hf_id": "Nitral-AI/Captain_BMO-12B", + "name": "Captain_BMO-12B", + "params_b": 12.248, + "ifeval": 0.4750595087700634, + "bbh": 0.5285960650424973, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.3568816489361702, + "hf_math_lvl5": 0.13972809667673716, + "hf_musr": 0.37480208333333337, + "hf_avg": 23.21048454065836 + }, + { + "hf_id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", + "name": "Hathor_Stable-v0.2-L3-8B", + "params_b": 8.03, + "ifeval": 0.7174840534226963, + "bbh": 0.5285819178301682, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.36959773936170215, + "hf_math_lvl5": 0.10498489425981873, + "hf_musr": 0.3780625, + "hf_avg": 25.917956738512263 + }, + { + "hf_id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", + "name": "Hathor_Tahsin-L3-8B-v0.85", + "params_b": 8.03, + "ifeval": 0.7110145524984818, + "bbh": 0.5279036861109899, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.37200797872340424, + "hf_math_lvl5": 0.10045317220543806, + "hf_musr": 0.3646666666666667, + "hf_avg": 25.499607921720123 + }, + { + "hf_id": "Nitral-AI/Nera_Noctis-12B", + "name": "Nera_Noctis-12B", + "params_b": 12.248, + "ifeval": 0.45617517076911485, + "bbh": 0.5193675192746302, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.3468251329787234, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.39790624999999996, + "hf_avg": 20.661351605876916 + }, + { + "hf_id": "Nohobby/MS-Schisandra-22B-v0.1", + "name": "MS-Schisandra-22B-v0.1", + "params_b": 22.247, + "ifeval": 0.6331289866443259, + "bbh": 0.5789949714896523, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.4095744680851064, + "hf_math_lvl5": 0.22280966767371602, + "hf_musr": 0.39284375, + "hf_avg": 30.1116442664472 + }, + { + "hf_id": "Nohobby/MS-Schisandra-22B-v0.2", + "name": "MS-Schisandra-22B-v0.2", + "params_b": 22.247, + "ifeval": 0.6382997114323329, + "bbh": 0.5841215984231857, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.4136469414893617, + "hf_math_lvl5": 0.20317220543806647, + "hf_musr": 0.40747916666666667, + "hf_avg": 30.28148918014972 + }, + { + "hf_id": "NotASI/FineTome-Llama3.2-1B-0929", + "name": "FineTome-Llama3.2-1B-0929", + "params_b": 1.236, + "ifeval": 0.39907223943580805, + "bbh": 0.3246274874705644, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.1428690159574468, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.3487604166666667, + "hf_avg": 9.953180599897282 + }, + { + "hf_id": "NotASI/FineTome-Llama3.2-3B-1002", + "name": "FineTome-Llama3.2-3B-1002", + "params_b": 3, + "ifeval": 0.5474496558021605, + "bbh": 0.4319470614025341, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.24368351063829788, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.3685104166666667, + "hf_avg": 16.7624003049829 + }, + { + "hf_id": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", + "name": "FineTome-v1.5-Llama3.2-1B-1007", + "params_b": 1.236, + "ifeval": 0.39237777984636324, + "bbh": 0.32405671121485663, + "gpqa": 0.25, + "mmlu_pro": 0.1427027925531915, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.34745833333333337, + "hf_avg": 9.24257019295171 + }, + { + "hf_id": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", + "name": "FineTome-v1.5-Llama3.2-3B-1007", + "params_b": 3.213, + "ifeval": 0.5507719517546776, + "bbh": 0.4312372935321582, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.2448470744680851, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.3645416666666667, + "hf_avg": 17.113696212626593 + }, + { + "hf_id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", + "name": "DeepHermes-3-Mistral-24B-Preview", + "params_b": 23.572, + "ifeval": 0.45357761849669986, + "bbh": 0.6488196385442672, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.45902593085106386, + "hf_math_lvl5": 0.25755287009063443, + "hf_musr": 0.4503333333333333, + "hf_avg": 31.98701372074271 + }, + { + "hf_id": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "name": "Hermes-2-Pro-Llama-3-8B", + "params_b": 8.031, + "ifeval": 0.5361839918084017, + "bbh": 0.507112624310082, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.30518617021276595, + "hf_math_lvl5": 0.08383685800604229, + "hf_musr": 0.4262395833333333, + "hf_avg": 22.06997572152565 + }, + { + "hf_id": "NousResearch/Hermes-2-Pro-Mistral-7B", + "name": "Hermes-2-Pro-Mistral-7B", + "params_b": 7.242, + "ifeval": 0.5668337788179807, + "bbh": 0.4995435330498075, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.29463098404255317, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.43759375, + "hf_avg": 21.840576807207686 + }, + { + "hf_id": "NousResearch/Hermes-2-Theta-Llama-3-8B", + "name": "Hermes-2-Theta-Llama-3-8B", + "params_b": 8.03, + "ifeval": 0.6517883659800441, + "bbh": 0.5206672260911865, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.33685172872340424, + "hf_math_lvl5": 0.09667673716012085, + "hf_musr": 0.3948958333333334, + "hf_avg": 24.78837646080699 + }, + { + "hf_id": "NousResearch/Hermes-3-Llama-3.1-70B", + "name": "Hermes-3-Llama-3.1-70B", + "params_b": 70.554, + "ifeval": 0.7661438316998896, + "bbh": 0.6755780641387483, + "gpqa": 0.3615771812080537, + "mmlu_pro": 0.47265625, + "hf_math_lvl5": 0.20996978851963746, + "hf_musr": 0.4948958333333333, + "hf_avg": 38.51477067349896, + "lb_name": "hermes-3-llama-3.1-70b", + "lb_global": 0.4138016666666667, + "lb_reasoning": 0.33999999999999997, + "lb_math": 0.2831666666666666, + "lb_language": 0.4377233333333333, + "lb_if": 0.5536675, + "lb_data_analysis": 0.4617 + }, + { + "hf_id": "NousResearch/Hermes-3-Llama-3.1-8B", + "name": "Hermes-3-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.6170172918966121, + "bbh": 0.5177452540141246, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3139128989361702, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.4369375, + "hf_avg": 23.49087671148001 + }, + { + "hf_id": "NousResearch/Hermes-3-Llama-3.2-3B", + "name": "Hermes-3-Llama-3.2-3B", + "params_b": 3.213, + "ifeval": 0.3824862476008103, + "bbh": 0.43519901506714875, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.25440492021276595, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.40302083333333333, + "hf_avg": 15.242119392530277 + }, + { + "hf_id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", + "name": "Nous-Hermes-2-Mistral-7B-DPO", + "params_b": 7.242, + "ifeval": 0.5762510139762497, + "bbh": 0.48526536654652347, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3015292553191489, + "hf_math_lvl5": 0.04758308157099697, + "hf_musr": 0.3999791666666667, + "hf_avg": 21.10058697437334 + }, + { + "hf_id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "name": "Nous-Hermes-2-Mixtral-8x7B-DPO", + "params_b": 46.703, + "ifeval": 0.5896898008395501, + "bbh": 0.5538851384033822, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3666057180851064, + "hf_math_lvl5": 0.12235649546827794, + "hf_musr": 0.4595416666666667, + "hf_avg": 27.353190438571634, + "arena_elo": 1164.66, + "arena_rank": 270, + "arena_votes": 3776 + }, + { + "hf_id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", + "name": "Nous-Hermes-2-Mixtral-8x7B-SFT", + "params_b": 46.703, + "ifeval": 0.5730783210769648, + "bbh": 0.5057868454026635, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.30659906914893614, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.421375, + "hf_avg": 21.841010891461725 + }, + { + "hf_id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", + "name": "Nous-Hermes-2-SOLAR-10.7B", + "params_b": 10.732, + "ifeval": 0.5278660620486975, + "bbh": 0.5414294841140173, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.3458277925531915, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.43728125, + "hf_avg": 23.412543159550665 + }, + { + "hf_id": "NousResearch/Nous-Hermes-llama-2-7b", + "name": "Nous-Hermes-llama-2-7b", + "params_b": 6.738, + "ifeval": 0.17290788441335658, + "bbh": 0.3823937686034717, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.19398271276595744, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.42571875, + "hf_avg": 9.316715938919105 + }, + { + "hf_id": "NousResearch/Yarn-Llama-2-13b-128k", + "name": "Yarn-Llama-2-13b-128k", + "params_b": 13, + "ifeval": 0.16546430138698653, + "bbh": 0.3826816443733663, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.23204787234042554, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.34575, + "hf_avg": 8.494146676691704 + }, + { + "hf_id": "NousResearch/Yarn-Llama-2-7b-128k", + "name": "Yarn-Llama-2-7b-128k", + "params_b": 7, + "ifeval": 0.14847825990593846, + "bbh": 0.32480295375597734, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.1791057180851064, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.39669791666666665, + "hf_avg": 6.814800680431223 + }, + { + "hf_id": "NousResearch/Yarn-Llama-2-7b-64k", + "name": "Yarn-Llama-2-7b-64k", + "params_b": 7, + "ifeval": 0.1699856381068897, + "bbh": 0.3326277865253592, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.17985372340425532, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.393875, + "hf_avg": 7.222883145587748 + }, + { + "hf_id": "NousResearch/Yarn-Mistral-7b-128k", + "name": "Yarn-Mistral-7b-128k", + "params_b": 7, + "ifeval": 0.19336693307091848, + "bbh": 0.4314467711273296, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.289311835106383, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.4070520833333333, + "hf_avg": 13.268755393260783 + }, + { + "hf_id": "NousResearch/Yarn-Mistral-7b-64k", + "name": "Yarn-Mistral-7b-64k", + "params_b": 7, + "ifeval": 0.2079548930171944, + "bbh": 0.42931904551037814, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.2913896276595745, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.41238541666666667, + "hf_avg": 13.540457995525927 + }, + { + "hf_id": "NousResearch/Yarn-Solar-10b-32k", + "name": "Yarn-Solar-10b-32k", + "params_b": 10, + "ifeval": 0.19421579187666504, + "bbh": 0.4986859152325069, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.32721077127659576, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.4146458333333333, + "hf_avg": 15.721261422706204 + }, + { + "hf_id": "NousResearch/Yarn-Solar-10b-64k", + "name": "Yarn-Solar-10b-64k", + "params_b": 10, + "ifeval": 0.1988867316498003, + "bbh": 0.49219907954226505, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.3148271276595745, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.40143750000000006, + "hf_avg": 15.162050446653282 + }, + { + "hf_id": "Novaciano/La_Mejor_Mezcla-3.2-1B", + "name": "La_Mejor_Mezcla-3.2-1B", + "params_b": 1.498, + "ifeval": 0.5509969104199081, + "bbh": 0.34879364478381225, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.18292885638297873, + "hf_math_lvl5": 0.08987915407854985, + "hf_musr": 0.3196145833333333, + "hf_avg": 14.056697294328709 + }, + { + "hf_id": "NucleusAI/nucleus-22B-token-500B", + "name": "nucleus-22B-token-500B", + "params_b": 21.828, + "ifeval": 0.025654153202391873, + "bbh": 0.29198007801214715, + "gpqa": 0.25, + "mmlu_pro": 0.11619015957446809, + "hf_musr": 0.3510520833333333, + "hf_avg": 1.6334163485881146 + }, + { + "hf_id": "OEvortex/Emotional-llama-8B", + "name": "Emotional-llama-8B", + "params_b": 8.03, + "ifeval": 0.3516369898535885, + "bbh": 0.4838573702054177, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.35347406914893614, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.365875, + "hf_avg": 17.789126430453027 + }, + { + "hf_id": "OEvortex/HelpingAI-15B", + "name": "HelpingAI-15B", + "params_b": 15.323, + "ifeval": 0.2030091268944179, + "bbh": 0.2936006977853758, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.11112034574468085, + "hf_musr": 0.361875, + "hf_avg": 4.515495603660534 + }, + { + "hf_id": "OEvortex/HelpingAI-3B-reloaded", + "name": "HelpingAI-3B-reloaded", + "params_b": 2.81, + "ifeval": 0.46466819150963884, + "bbh": 0.4128512897904065, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.25947473404255317, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.3524479166666667, + "hf_avg": 14.768420533149957 + }, + { + "hf_id": "OEvortex/HelpingAI2-9B", + "name": "HelpingAI2-9B", + "params_b": 8.903, + "ifeval": 0.44131238447319776, + "bbh": 0.4844617641983123, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.28997672872340424, + "hf_math_lvl5": 0.05891238670694864, + "hf_musr": 0.3710833333333334, + "hf_avg": 17.606927062260286 + }, + { + "hf_id": "OEvortex/HelpingAI2.5-10B", + "name": "HelpingAI2.5-10B", + "params_b": 10.211, + "ifeval": 0.32765617450586665, + "bbh": 0.4495657491171711, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.25748005319148937, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.37381250000000005, + "hf_avg": 13.711774162686112 + }, + { + "hf_id": "OliveiraJLT/Sagui-7B-Instruct-v0.1", + "name": "Sagui-7B-Instruct-v0.1", + "params_b": 6.738, + "ifeval": 0.28916275482386733, + "bbh": 0.3110678914743868, + "gpqa": 0.2424496644295302, + "mmlu_pro": 0.14852061170212766, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.4190520833333333, + "hf_avg": 8.579407330639999 + }, + { + "hf_id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", + "name": "NeuralStar_FusionWriter_4x7b", + "params_b": 24.154, + "ifeval": 0.5963842604289951, + "bbh": 0.47762434766958123, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.2605551861702128, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.401875, + "hf_avg": 20.071645037035577 + }, + { + "hf_id": "OnlyCheeini/greesychat-turbo", + "name": "greesychat-turbo", + "params_b": 8.03, + "ifeval": 0.023256071667619692, + "bbh": 0.30921339082318816, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11377992021276596, + "hf_musr": 0.3314270833333333, + "hf_avg": 1.8020688847846804 + }, + { + "hf_id": "Open-Orca/Mistral-7B-OpenOrca", + "name": "Mistral-7B-OpenOrca", + "params_b": 7, + "ifeval": 0.4977659277384008, + "bbh": 0.4768173517353546, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.26529255319148937, + "hf_math_lvl5": 0.035498489425981876, + "hf_musr": 0.38578124999999996, + "hf_avg": 17.72165111279921 + }, + { + "hf_id": "OpenAssistant/oasst-sft-1-pythia-12b", + "name": "oasst-sft-1-pythia-12b", + "params_b": 12, + "ifeval": 0.10553885911603435, + "bbh": 0.314662875941371, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.11128656914893617, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.33269791666666665, + "hf_avg": 3.6818304933986314 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", + "name": "openbuddy-llama3-70b-v21.2-32k", + "params_b": 70.554, + "ifeval": 0.7010476646409305, + "bbh": 0.6507443429944494, + "gpqa": 0.3422818791946309, + "mmlu_pro": 0.4832114361702128, + "hf_math_lvl5": 0.20317220543806647, + "hf_musr": 0.45796875000000004, + "hf_avg": 35.5534575981615 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", + "name": "openbuddy-llama3-8b-v21.1-8k", + "params_b": 8.03, + "ifeval": 0.5569666263292509, + "bbh": 0.47875007373484046, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.2954621010638298, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.3987708333333333, + "hf_avg": 20.162938316635703 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", + "name": "openbuddy-llama3-8b-v21.2-32k", + "params_b": 8.03, + "ifeval": 0.6191904147661538, + "bbh": 0.4856219845879779, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3298703457446808, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.377875, + "hf_avg": 22.069479074823068 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", + "name": "openbuddy-llama3.1-70b-v22.1-131k", + "params_b": 70.554, + "ifeval": 0.7332710541363582, + "bbh": 0.6698491606025763, + "gpqa": 0.375, + "mmlu_pro": 0.5304188829787234, + "hf_math_lvl5": 0.3950151057401813, + "hf_musr": 0.46295833333333336, + "hf_avg": 41.24947292836925 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", + "name": "openbuddy-llama3.1-8b-v22.2-131k", + "params_b": 8.03, + "ifeval": 0.6657269378582162, + "bbh": 0.5006515954024578, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3310339095744681, + "hf_math_lvl5": 0.1148036253776435, + "hf_musr": 0.40810416666666666, + "hf_avg": 24.4181731020086 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", + "name": "openbuddy-llama3.1-8b-v22.3-131k", + "params_b": 8.03, + "ifeval": 0.5997065563815123, + "bbh": 0.5065914870348772, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3277094414893617, + "hf_math_lvl5": 0.12084592145015106, + "hf_musr": 0.40146875, + "hf_avg": 23.317953936657844 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", + "name": "openbuddy-llama3.2-1b-v23.1-131k", + "params_b": 1.498, + "ifeval": 0.3590052172679601, + "bbh": 0.3266563226631131, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.1840093085106383, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.33421875, + "hf_avg": 9.350033548941015 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", + "name": "openbuddy-llama3.2-3b-v23.2-131k", + "params_b": 3.607, + "ifeval": 0.4319450169993395, + "bbh": 0.4072660342069299, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.2479222074468085, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.3263125, + "hf_avg": 13.797653889948387 + }, + { + "hf_id": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", + "name": "openbuddy-llama3.3-70b-v24.1-131k", + "params_b": 70.554, + "ifeval": 0.812080834408259, + "bbh": 0.6858038620320306, + "gpqa": 0.43456375838926176, + "mmlu_pro": 0.5327460106382979, + "hf_math_lvl5": 0.44108761329305135, + "hf_musr": 0.4869270833333334, + "hf_avg": 45.73679489100153 + }, + { + "hf_id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", + "name": "openbuddy-mixtral-7bx8-v18.1-32k", + "params_b": 46.741, + "ifeval": 0.549347952322061, + "bbh": 0.46561770563515265, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.38040226063829785, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.3830520833333333, + "hf_avg": 22.329808849468282 + }, + { + "hf_id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", + "name": "openbuddy-nemotron-70b-v23.1-131k", + "params_b": 70.554, + "ifeval": 0.7555275557742346, + "bbh": 0.6749472828128272, + "gpqa": 0.36325503355704697, + "mmlu_pro": 0.5174534574468085, + "hf_math_lvl5": 0.32099697885196377, + "hf_musr": 0.45375000000000004, + "hf_avg": 39.78505127265852 + }, + { + "hf_id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", + "name": "openbuddy-nemotron-70b-v23.2-131k", + "params_b": 70.554, + "ifeval": 0.7226547782107031, + "bbh": 0.6704805157570325, + "gpqa": 0.3598993288590604, + "mmlu_pro": 0.5120511968085106, + "hf_math_lvl5": 0.3157099697885196, + "hf_musr": 0.46959375000000003, + "hf_avg": 39.234122021270416 + }, + { + "hf_id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", + "name": "openbuddy-qwen2.5llamaify-14b-v23.1-200k", + "params_b": 14.77, + "ifeval": 0.630880508162786, + "bbh": 0.601319898776811, + "gpqa": 0.33305369127516776, + "mmlu_pro": 0.4673371010638298, + "hf_math_lvl5": 0.2537764350453172, + "hf_musr": 0.42404166666666665, + "hf_avg": 32.52829892906403 + }, + { + "hf_id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", + "name": "openbuddy-qwen2.5llamaify-14b-v23.3-200k", + "params_b": 14.77, + "ifeval": 0.6131453432448126, + "bbh": 0.6080855261046028, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.4794714095744681, + "hf_math_lvl5": 0.2311178247734139, + "hf_musr": 0.4345833333333333, + "hf_avg": 32.29791522872073 + }, + { + "hf_id": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", + "name": "openbuddy-qwen2.5llamaify-7b-v23.1-200k", + "params_b": 7.615, + "ifeval": 0.5672582082208539, + "bbh": 0.5509381466888461, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.394780585106383, + "hf_math_lvl5": 0.18882175226586104, + "hf_musr": 0.43632291666666667, + "hf_avg": 27.863254758763023 + }, + { + "hf_id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", + "name": "openbuddy-qwq-32b-v24.1-200k", + "params_b": 32.764, + "ifeval": 0.593661484860171, + "bbh": 0.6798496773637743, + "gpqa": 0.3808724832214765, + "mmlu_pro": 0.5490359042553191, + "hf_math_lvl5": 0.37386706948640486, + "hf_musr": 0.484875, + "hf_avg": 39.96232491540321 + }, + { + "hf_id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", + "name": "openbuddy-qwq-32b-v24.2-200k", + "params_b": 32.764, + "ifeval": 0.5969837808126881, + "bbh": 0.6771537576509328, + "gpqa": 0.3766778523489933, + "mmlu_pro": 0.5446309840425532, + "hf_math_lvl5": 0.3776435045317221, + "hf_musr": 0.47179166666666666, + "hf_avg": 39.56011201711714 + }, + { + "hf_id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", + "name": "openbuddy-yi1.5-34b-v21.3-32k", + "params_b": 34.407, + "ifeval": 0.5420041046645123, + "bbh": 0.6162574860411373, + "gpqa": 0.348993288590604, + "mmlu_pro": 0.4599401595744681, + "hf_math_lvl5": 0.1782477341389728, + "hf_musr": 0.44394791666666666, + "hf_avg": 30.92470371318825 + }, + { + "hf_id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", + "name": "openbuddy-zero-14b-v22.3-32k", + "params_b": 14.022, + "ifeval": 0.37529200299649373, + "bbh": 0.4859759816473639, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3187333776595745, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.41660416666666666, + "hf_avg": 19.406071028276457 + }, + { + "hf_id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", + "name": "openbuddy-zero-3b-v21.2-32k", + "params_b": 4.769, + "ifeval": 0.3802377691192702, + "bbh": 0.3934791831798414, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.20337433510638298, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.3566354166666667, + "hf_avg": 11.713302286048615 + }, + { + "hf_id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", + "name": "openbuddy-zero-56b-v21.2-32k", + "params_b": 56.707, + "ifeval": 0.5057092957796425, + "bbh": 0.6128345897750148, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.43991023936170215, + "hf_math_lvl5": 0.16238670694864046, + "hf_musr": 0.4305208333333333, + "hf_avg": 28.536019963329736 + }, + { + "hf_id": "OpenGenerativeAI/Bifrost", + "name": "Bifrost", + "params_b": 14.66, + "ifeval": 0.6347524568145853, + "bbh": 0.6849273974523276, + "gpqa": 0.36828859060402686, + "mmlu_pro": 0.5159574468085106, + "hf_math_lvl5": 0.2545317220543807, + "hf_musr": 0.45976041666666667, + "hf_avg": 37.14746413152554 + }, + { + "hf_id": "OpenGenerativeAI/Bifrost-14B", + "name": "Bifrost-14B", + "params_b": 14.66, + "ifeval": 0.6615302951723648, + "bbh": 0.6844897889249308, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.5073969414893617, + "hf_math_lvl5": 0.23564954682779457, + "hf_musr": 0.46239583333333334, + "hf_avg": 37.39963457924099 + }, + { + "hf_id": "OpenLLM-France/Lucie-7B", + "name": "Lucie-7B", + "params_b": 6.707, + "ifeval": 0.24964538535530173, + "bbh": 0.3492469872973046, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.14976728723404256, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.39232291666666663, + "hf_avg": 8.611731815760395 + }, + { + "hf_id": "OpenLLM-France/Lucie-7B-Instruct-human-data", + "name": "Lucie-7B-Instruct-human-data", + "params_b": 6.707, + "ifeval": 0.29460830596151544, + "bbh": 0.32842533479733, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.14295212765957446, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.37285416666666665, + "hf_avg": 8.574866924872492 + }, + { + "hf_id": "OpenLLM-France/Lucie-7B-Instruct-v1.1", + "name": "Lucie-7B-Instruct-v1.1", + "params_b": 6.707, + "ifeval": 0.3038759380665523, + "bbh": 0.38158765227444885, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.1864195478723404, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.37502083333333336, + "hf_avg": 10.999352587987943 + }, + { + "hf_id": "OpenScholar/Llama-3.1_OpenScholar-8B", + "name": "Llama-3.1_OpenScholar-8B", + "params_b": 8, + "ifeval": 0.6064010159709571, + "bbh": 0.5207740834450674, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.370844414893617, + "hf_math_lvl5": 0.16540785498489427, + "hf_musr": 0.4275104166666667, + "hf_avg": 25.961332257431867 + }, + { + "hf_id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", + "name": "Llama-3.1-8B-Lexi-Uncensored", + "params_b": 8.03, + "ifeval": 0.7776843220432896, + "bbh": 0.5057261652642643, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.37898936170212766, + "hf_math_lvl5": 0.15709969788519637, + "hf_musr": 0.3871145833333333, + "hf_avg": 27.175116142740332 + }, + { + "hf_id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", + "name": "Llama-3.1-8B-Lexi-Uncensored-V2", + "params_b": 8.03, + "ifeval": 0.7791581891603169, + "bbh": 0.5084008018783934, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3780751329787234, + "hf_math_lvl5": 0.1971299093655589, + "hf_musr": 0.3842916666666667, + "hf_avg": 28.390881228317323 + }, + { + "hf_id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", + "name": "Qwen2.5-7B-Instruct-Uncensored", + "params_b": 7.616, + "ifeval": 0.7204317876567508, + "bbh": 0.5473918652157296, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.4426529255319149, + "hf_math_lvl5": 0.4773413897280967, + "hf_musr": 0.43613541666666666, + "hf_avg": 35.71881524964477 + }, + { + "hf_id": "Orion-zhen/phi-4-abliterated", + "name": "phi-4-abliterated", + "params_b": 14.66, + "ifeval": 0.05760271634817839, + "bbh": 0.6698239306664778, + "gpqa": 0.40436241610738255, + "mmlu_pro": 0.5291722074468085, + "hf_math_lvl5": 0.3021148036253776, + "hf_musr": 0.500625, + "hf_avg": 29.97907708859114 + }, + { + "hf_id": "P0x0/Astra-v1-12B", + "name": "Astra-v1-12B", + "params_b": 12.248, + "ifeval": 0.28059437847134494, + "bbh": 0.5214506484138984, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.3460771276595745, + "hf_math_lvl5": 0.11329305135951662, + "hf_musr": 0.4051875, + "hf_avg": 19.737240466519605 + }, + { + "hf_id": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", + "name": "LLaMa-3-CursedStock-v2.0-8B", + "params_b": 8.03, + "ifeval": 0.6330791189599152, + "bbh": 0.527115950402997, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.3556349734042553, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.38562500000000005, + "hf_avg": 24.166133893343382 + }, + { + "hf_id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", + "name": "LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", + "params_b": 13.047, + "ifeval": 0.7871015572015585, + "bbh": 0.5073267838961463, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.3767453457446808, + "hf_math_lvl5": 0.2001510574018127, + "hf_musr": 0.3869895833333333, + "hf_avg": 28.894562181103485 + }, + { + "hf_id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", + "name": "LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", + "params_b": 3.213, + "ifeval": 0.693054428915278, + "bbh": 0.4556166737589294, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.312749335106383, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.37003125000000003, + "hf_avg": 22.701739941091272 + }, + { + "hf_id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", + "name": "LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", + "params_b": 3.213, + "ifeval": 0.6291573026237051, + "bbh": 0.45814952191015346, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.3115026595744681, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.365875, + "hf_avg": 21.772674131965854 + }, + { + "hf_id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", + "name": "LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", + "params_b": 3.213, + "ifeval": 0.6503898544750152, + "bbh": 0.45107942950222196, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.3107546542553192, + "hf_math_lvl5": 0.12613293051359517, + "hf_musr": 0.3687291666666667, + "hf_avg": 21.826266808747885 + }, + { + "hf_id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", + "name": "LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", + "params_b": 3.213, + "ifeval": 0.5040858256093831, + "bbh": 0.4483158594793648, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.308344414893617, + "hf_math_lvl5": 0.13066465256797583, + "hf_musr": 0.3515520833333334, + "hf_avg": 19.737295916446044 + }, + { + "hf_id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", + "name": "PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", + "params_b": 3.086, + "ifeval": 0.5084819390328772, + "bbh": 0.47105662040096935, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.35106382978723405, + "hf_math_lvl5": 0.1691842900302115, + "hf_musr": 0.44785416666666666, + "hf_avg": 23.92876945667705 + }, + { + "hf_id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", + "name": "SuperThoughts-CoT-14B-16k-o1-QwQ", + "params_b": 14.66, + "ifeval": 0.051457909458015844, + "bbh": 0.6719989821162488, + "gpqa": 0.3926174496644295, + "mmlu_pro": 0.526845079787234, + "hf_math_lvl5": 0.4199395770392749, + "hf_musr": 0.4913541666666667, + "hf_avg": 31.36951233941903 + }, + { + "hf_id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", + "name": "Superthoughts-lite-1.8B-experimental-o1", + "params_b": 1.812, + "ifeval": 0.0375193375798437, + "bbh": 0.3434736647957908, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.18508976063829788, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.33539583333333334, + "hf_avg": 5.104091175053128 + }, + { + "hf_id": "Pinkstack/Superthoughts-lite-v1", + "name": "Superthoughts-lite-v1", + "params_b": 1.711, + "ifeval": 0.1658643510330368, + "bbh": 0.3465571905256149, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.17553191489361702, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.3671770833333334, + "hf_avg": 7.399417613485791 + }, + { + "hf_id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", + "name": "Dans-PersonalityEngine-V1.1.0-12b", + "params_b": 12.248, + "ifeval": 0.7074672978807343, + "bbh": 0.5361046243199591, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.32621343085106386, + "hf_math_lvl5": 0.10498489425981873, + "hf_musr": 0.45867708333333335, + "hf_avg": 27.044944332447233 + }, + { + "hf_id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", + "name": "Dans-PersonalityEngine-V1.2.0-24b", + "params_b": 23.572, + "ifeval": 0.7886252920029965, + "bbh": 0.6421213844206719, + "gpqa": 0.3187919463087248, + "mmlu_pro": 0.5025764627659575, + "hf_math_lvl5": 0.24546827794561935, + "hf_musr": 0.42996875, + "hf_avg": 36.3754641013882 + }, + { + "hf_id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", + "name": "Dans-PersonalityEngine-v1.0.0-8b", + "params_b": 8.03, + "ifeval": 0.498190357141274, + "bbh": 0.47325544259149366, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3065159574468085, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.35415625, + "hf_avg": 19.207416194838743 + }, + { + "hf_id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", + "name": "Dans-SakuraKaze-V1.0.0-12b", + "params_b": 12.248, + "ifeval": 0.6520133246452745, + "bbh": 0.5405357251132225, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.35596742021276595, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.47452083333333334, + "hf_avg": 27.119777597401505 + }, + { + "hf_id": "PranavHarshan/LaMistral-V4", + "name": "LaMistral-V4", + "params_b": 8.03, + "ifeval": 0.623861354539289, + "bbh": 0.5184255342586473, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.35987367021276595, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.3642916666666667, + "hf_avg": 24.21076517685928 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", + "name": "OpenChat-3.5-0106_10.7B_48Layers-Appended", + "params_b": 10.732, + "ifeval": 0.5960595663949432, + "bbh": 0.4619637884426022, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3289561170212766, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.42540625, + "hf_avg": 22.735876089495267 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", + "name": "OpenChat-3.5-0106_10.7B_48Layers-Interleaved", + "params_b": 10.732, + "ifeval": 0.5960595663949432, + "bbh": 0.4619637884426022, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3298703457446808, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.42540625, + "hf_avg": 22.671701605771233 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", + "name": "OpenChat-3.5-0106_32K-PoSE", + "params_b": 7.242, + "ifeval": 0.3968991165662664, + "bbh": 0.3471309425137119, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.203125, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.42054166666666665, + "hf_avg": 12.903680149442671 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", + "name": "OpenChat-3.5-0106_8.11B_36Layers-Appended", + "params_b": 8.114, + "ifeval": 0.5975833011963811, + "bbh": 0.4619637884426022, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3289561170212766, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.42540625, + "hf_avg": 22.761271669519232 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", + "name": "OpenChat-3.5-0106_8.11B_36Layers-Interleaved", + "params_b": 8.114, + "ifeval": 0.5960595663949432, + "bbh": 0.46213045510926887, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3298703457446808, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.42407291666666663, + "hf_avg": 22.630312716882344 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", + "name": "OpenChat-3.5-0106_8.99B_40Layers-Appended", + "params_b": 8.987, + "ifeval": 0.5960595663949432, + "bbh": 0.4619637884426022, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3289561170212766, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.42540625, + "hf_avg": 22.735876089495267 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", + "name": "OpenChat-3.5-0106_8.99B_40Layers-Interleaved", + "params_b": 8.987, + "ifeval": 0.5975833011963811, + "bbh": 0.46213045510926887, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3298703457446808, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.42407291666666663, + "hf_avg": 22.65570829690631 + }, + { + "hf_id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", + "name": "OpenChat-3.5-0106_9.86B_44Layers-Appended", + "params_b": 9.859, + "ifeval": 0.5960595663949432, + "bbh": 0.4619637884426022, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3289561170212766, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.42540625, + "hf_avg": 22.735876089495267 + }, + { + "hf_id": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", + "name": "openchat-3.5-0106_Rebased_Mistral-7B-v0.2", + "params_b": 7.242, + "ifeval": 0.37062106322335847, + "bbh": 0.36271140677296004, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.2829953457446808, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.4840104166666667, + "hf_avg": 16.052276387715246 + }, + { + "hf_id": "PrimeIntellect/INTELLECT-1", + "name": "INTELLECT-1", + "params_b": 10.211, + "ifeval": 0.1757315035217667, + "bbh": 0.27598007801214713, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.11228390957446809, + "hf_musr": 0.3339375, + "hf_avg": 3.8063018019018693 + }, + { + "hf_id": "PrimeIntellect/INTELLECT-1", + "name": "INTELLECT-1", + "params_b": 10.211, + "ifeval": 0.1757315035217667, + "bbh": 0.27398007801214713, + "gpqa": 0.25, + "mmlu_pro": 0.11203457446808511, + "hf_musr": 0.3752708333333333, + "hf_avg": 4.016002158495076 + }, + { + "hf_id": "PrimeIntellect/INTELLECT-1-Instruct", + "name": "INTELLECT-1-Instruct", + "params_b": 10.211, + "bbh": 0.28698007801214714, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.10638297872340426, + "hf_math_lvl5": 0.022658610271903322, + "hf_musr": 0.3576875, + "hf_avg": 1.4059110426100174 + }, + { + "hf_id": "PygmalionAI/pygmalion-6b", + "name": "pygmalion-6b", + "params_b": 6, + "ifeval": 0.20910406610016974, + "bbh": 0.31988944643860034, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11835106382978723, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.3683541666666667, + "hf_avg": 5.430124009362374 + }, + { + "hf_id": "Quazim0t0/Imagine-v0.5-16bit", + "name": "Imagine-v0.5-16bit", + "params_b": 14.66, + "ifeval": 0.2758990589413866, + "bbh": 0.6769135492947932, + "gpqa": 0.3649328859060403, + "mmlu_pro": 0.535405585106383, + "hf_math_lvl5": 0.13972809667673716, + "hf_musr": 0.43492708333333335, + "hf_avg": 28.789019815038216 + }, + { + "hf_id": "Quazim0t0/Phi4.Turn.R1Distill.16bit", + "name": "Phi4.Turn.R1Distill.16bit", + "params_b": 14.66, + "ifeval": 0.31264378515671754, + "bbh": 0.6563340892011863, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.5256815159574468, + "hf_math_lvl5": 0.2311178247734139, + "hf_musr": 0.39021875, + "hf_avg": 27.502616984496523 + }, + { + "hf_id": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", + "name": "Phi4.Turn.R1Distill_v1.5.1-Tensors", + "params_b": 14.66, + "ifeval": 0.2995296923274689, + "bbh": 0.645570250166195, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.51171875, + "hf_math_lvl5": 0.2190332326283988, + "hf_musr": 0.39285416666666667, + "hf_avg": 26.053651878084434 + }, + { + "hf_id": "Quazim0t0/ThinkPhi1.1-Tensors", + "name": "ThinkPhi1.1-Tensors", + "params_b": 14.66, + "ifeval": 0.3907543096761038, + "bbh": 0.6449416604455037, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.4907746010638298, + "hf_math_lvl5": 0.18202416918429004, + "hf_musr": 0.418, + "hf_avg": 27.934992706320916 + }, + { + "hf_id": "Qwen/QwQ-32B", + "name": "QwQ-32B", + "params_b": 32.764, + "ifeval": 0.39767372793077926, + "bbh": 0.29829653176003074, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11959773936170212, + "hf_math_lvl5": 0.1608761329305136, + "hf_musr": 0.42063541666666665, + "hf_avg": 12.21487061660057, + "lb_name": "qwq-32b", + "lb_global": 0.7070323529411765, + "lb_reasoning": 0.8025, + "lb_coding": 0.613595, + "lb_math": 0.7608366666666667, + "lb_language": 0.5148133333333332, + "lb_if": 0.81829, + "lb_data_analysis": 0.69529, + "arena_elo": 1335.78, + "arena_rank": 142, + "arena_votes": 26000 + }, + { + "hf_id": "Qwen/Qwen1.5-0.5B", + "name": "Qwen1.5-0.5B", + "params_b": 0.62, + "ifeval": 0.17056077873375977, + "bbh": 0.3153538659142558, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.1307347074468085, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.36162500000000003, + "hf_avg": 5.3510150735736985 + }, + { + "hf_id": "Qwen/Qwen1.5-0.5B-Chat", + "name": "Qwen1.5-0.5B-Chat", + "params_b": 0.62, + "ifeval": 0.18072713732895385, + "bbh": 0.3166662152036714, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.12125997340425532, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.3837083333333333, + "hf_avg": 5.67816209115329, + "lb_name": "qwen1.5-0.5b-chat", + "lb_global": 0.0639461111111111, + "lb_reasoning": 0.04, + "lb_math": 0.044289999999999996, + "lb_language": 0.028776666666666662, + "lb_if": 0.21295750000000002, + "lb_data_analysis": 0 + }, + { + "hf_id": "Qwen/Qwen1.5-1.8B", + "name": "Qwen1.5-1.8B", + "params_b": 1.837, + "ifeval": 0.2154239639711521, + "bbh": 0.3476121558366305, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.18816489361702127, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.36051041666666667, + "hf_avg": 9.269492522098927 + }, + { + "hf_id": "Qwen/Qwen1.5-1.8B-Chat", + "name": "Qwen1.5-1.8B-Chat", + "params_b": 1.837, + "ifeval": 0.20190982149585324, + "bbh": 0.3255912875735599, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.18035239361702127, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.42596875, + "hf_avg": 9.257783499275524, + "lb_name": "qwen1.5-1.8b-chat", + "lb_global": 0.0731488888888889, + "lb_reasoning": 0.05, + "lb_math": 0.03528, + "lb_language": 0.03155666666666667, + "lb_if": 0.2290425, + "lb_data_analysis": 0.02 + }, + { + "hf_id": "Qwen/Qwen1.5-110B", + "name": "Qwen1.5-110B", + "params_b": 111.21, + "ifeval": 0.3421942667677318, + "bbh": 0.6099964981780978, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.5360704787234043, + "hf_math_lvl5": 0.24697885196374622, + "hf_musr": 0.44084375, + "hf_avg": 29.83367750486893 + }, + { + "hf_id": "Qwen/Qwen1.5-110B-Chat", + "name": "Qwen1.5-110B-Chat", + "params_b": 111.21, + "ifeval": 0.5938864435254014, + "bbh": 0.6183800385588633, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.48246343085106386, + "hf_math_lvl5": 0.23413897280966767, + "hf_musr": 0.45216666666666666, + "hf_avg": 33.12715289782008, + "lb_name": "qwen1.5-110b-chat", + "lb_global": 0.31641833333333336, + "lb_reasoning": 0.34, + "lb_math": 0.26276000000000005, + "lb_language": 0.13224333333333332, + "lb_if": 0.5526275, + "lb_data_analysis": 0.20179999999999998, + "arena_elo": 1233.96, + "arena_rank": 228, + "arena_votes": 26191, + "aider_pass_rate": 0.308 + }, + { + "hf_id": "Qwen/Qwen1.5-14B", + "name": "Qwen1.5-14B", + "params_b": 14.167, + "ifeval": 0.2905368865720732, + "bbh": 0.5080327493808331, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.36436170212765956, + "hf_math_lvl5": 0.20241691842900303, + "hf_musr": 0.41864583333333333, + "hf_avg": 20.854080062460586 + }, + { + "hf_id": "Qwen/Qwen1.5-14B-Chat", + "name": "Qwen1.5-14B-Chat", + "params_b": 14.167, + "ifeval": 0.47680820223673187, + "bbh": 0.5228587510703555, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.36178523936170215, + "hf_math_lvl5": 0.15256797583081572, + "hf_musr": 0.43997916666666664, + "hf_avg": 23.566106475051374, + "arena_elo": 1190.83, + "arena_rank": 252, + "arena_votes": 17841 + }, + { + "hf_id": "Qwen/Qwen1.5-32B", + "name": "Qwen1.5-32B", + "params_b": 32.512, + "ifeval": 0.329729562006587, + "bbh": 0.5715390555959325, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.4499667553191489, + "hf_math_lvl5": 0.3028700906344411, + "hf_musr": 0.4277916666666666, + "hf_avg": 27.2987558571606 + }, + { + "hf_id": "Qwen/Qwen1.5-32B-Chat", + "name": "Qwen1.5-32B-Chat", + "params_b": 32.512, + "ifeval": 0.5532199009738605, + "bbh": 0.6066899757930234, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.4457280585106383, + "hf_math_lvl5": 0.19561933534743203, + "hf_musr": 0.4159791666666666, + "hf_avg": 29.25746822860332, + "arena_elo": 1203.87, + "arena_rank": 245, + "arena_votes": 21744 + }, + { + "hf_id": "Qwen/Qwen1.5-4B", + "name": "Qwen1.5-4B", + "params_b": 3.95, + "ifeval": 0.24447466056729478, + "bbh": 0.40538970296725463, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.24601063829787234, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.3604479166666667, + "hf_avg": 11.76818275851784 + }, + { + "hf_id": "Qwen/Qwen1.5-4B-Chat", + "name": "Qwen1.5-4B-Chat", + "params_b": 3.95, + "ifeval": 0.31566576683200576, + "bbh": 0.40055485611486114, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.23961103723404256, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.39778125, + "hf_avg": 12.627280110791753, + "lb_name": "qwen1.5-4b-chat", + "lb_global": 0.12573166666666666, + "lb_reasoning": 0.16, + "lb_math": 0.09856, + "lb_language": 0.05798333333333333, + "lb_if": 0.27749999999999997, + "lb_data_analysis": 0.0469, + "arena_elo": 1090.06, + "arena_rank": 302, + "arena_votes": 7598 + }, + { + "hf_id": "Qwen/Qwen1.5-7B", + "name": "Qwen1.5-7B", + "params_b": 7.721, + "ifeval": 0.2684299879874289, + "bbh": 0.4559896407693445, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.29163896276595747, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.4103333333333334, + "hf_avg": 16.024674155407357 + }, + { + "hf_id": "Qwen/Qwen1.5-7B-Chat", + "name": "Qwen1.5-7B-Chat", + "params_b": 7.721, + "ifeval": 0.43711574178734647, + "bbh": 0.4510053116521351, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.2951296542553192, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.37790624999999994, + "hf_avg": 17.62098662745355, + "lb_name": "qwen1.5-7b-chat", + "lb_global": 0.19912000000000005, + "lb_reasoning": 0.22, + "lb_math": 0.15294333333333335, + "lb_language": 0.061816666666666666, + "lb_if": 0.4411675, + "lb_data_analysis": 0.0435, + "arena_elo": 1143.86, + "arena_rank": 280, + "arena_votes": 4735 + }, + { + "hf_id": "Qwen/Qwen1.5-MoE-A2.7B", + "name": "Qwen1.5-MoE-A2.7B", + "params_b": 14.316, + "ifeval": 0.265982038768246, + "bbh": 0.4113515433010766, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.2777593085106383, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.40134375000000005, + "hf_avg": 13.945920112290063 + }, + { + "hf_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat", + "name": "Qwen1.5-MoE-A2.7B-Chat", + "params_b": 14.316, + "ifeval": 0.37953851336675576, + "bbh": 0.4272088620635824, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.29230385638297873, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.38987499999999997, + "hf_avg": 15.880899856122355 + }, + { + "hf_id": "Qwen/Qwen2-0.5B", + "name": "Qwen2-0.5B", + "params_b": 0.494, + "ifeval": 0.18732186154957736, + "bbh": 0.3239117424825444, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.17195811170212766, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.37520833333333337, + "hf_avg": 7.224121473565234 + }, + { + "hf_id": "Qwen/Qwen2-0.5B-Instruct", + "name": "Qwen2-0.5B-Instruct", + "params_b": 0.494, + "ifeval": 0.22466610814860127, + "bbh": 0.31725179384863494, + "gpqa": 0.24664429530201343, + "mmlu_pro": 0.15309175531914893, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.33527083333333335, + "hf_avg": 6.586780633287707, + "lb_name": "qwen2-0.5b-instruct", + "lb_global": 0.09085166666666668, + "lb_reasoning": 0.09, + "lb_math": 0.07345666666666667, + "lb_language": 0.028049999999999995, + "lb_if": 0.2662925, + "lb_data_analysis": 0 + }, + { + "hf_id": "Qwen/Qwen2-1.5B", + "name": "Qwen2-1.5B", + "params_b": 1.544, + "ifeval": 0.21132705665412216, + "bbh": 0.35747931720577464, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.2551529255319149, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.36581250000000004, + "hf_avg": 10.445452935561454 + }, + { + "hf_id": "Qwen/Qwen2-1.5B-Instruct", + "name": "Qwen2-1.5B-Instruct", + "params_b": 1.544, + "ifeval": 0.3371232773485463, + "bbh": 0.3852232408376059, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.25008311170212766, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.42928125, + "hf_avg": 14.141936815181689, + "lb_name": "qwen2-1.5b-instruct", + "lb_global": 0.11499, + "lb_reasoning": 0.12, + "lb_math": 0.09941, + "lb_language": 0.03048333333333333, + "lb_if": 0.25896, + "lb_data_analysis": 0.0501 + }, + { + "hf_id": "Qwen/Qwen2-57B-A14B", + "name": "Qwen2-57B-A14B", + "params_b": 57.409, + "ifeval": 0.31126965340851165, + "bbh": 0.5618204938684165, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.4916057180851064, + "hf_math_lvl5": 0.1865558912386707, + "hf_musr": 0.417375, + "hf_avg": 25.0338731324107 + }, + { + "hf_id": "Qwen/Qwen2-57B-A14B-Instruct", + "name": "Qwen2-57B-A14B-Instruct", + "params_b": 57.409, + "ifeval": 0.6337783747124297, + "bbh": 0.5887606963532052, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.45752992021276595, + "hf_math_lvl5": 0.28172205438066467, + "hf_musr": 0.43613541666666666, + "hf_avg": 33.015868823547095 + }, + { + "hf_id": "Qwen/Qwen2-72B", + "name": "Qwen2-72B", + "params_b": 72.706, + "ifeval": 0.3823610243044012, + "bbh": 0.661734029856643, + "gpqa": 0.39429530201342283, + "mmlu_pro": 0.5730551861702128, + "hf_math_lvl5": 0.311178247734139, + "hf_musr": 0.47036458333333336, + "hf_avg": 35.45667093247413 + }, + { + "hf_id": "Qwen/Qwen2-7B", + "name": "Qwen2-7B", + "params_b": 7.616, + "ifeval": 0.3148667757106699, + "bbh": 0.531531595001889, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.41830119680851063, + "hf_math_lvl5": 0.2039274924471299, + "hf_musr": 0.4439166666666667, + "hf_avg": 23.9251621404824 + }, + { + "hf_id": "Qwen/Qwen2-Math-72B-Instruct", + "name": "Qwen2-Math-72B-Instruct", + "params_b": 72.706, + "ifeval": 0.569381463405985, + "bbh": 0.634337660025181, + "gpqa": 0.36828859060402686, + "mmlu_pro": 0.42727726063829785, + "hf_math_lvl5": 0.5536253776435045, + "hf_musr": 0.45169791666666664, + "hf_avg": 38.020957002292825 + }, + { + "hf_id": "Qwen/Qwen2-Math-7B", + "name": "Qwen2-Math-7B", + "params_b": 7.616, + "ifeval": 0.2687048143370701, + "bbh": 0.386954741074792, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.1196808510638298, + "hf_math_lvl5": 0.24773413897280966, + "hf_musr": 0.35933333333333334, + "hf_avg": 12.016921148016648 + }, + { + "hf_id": "Qwen/Qwen2-VL-7B-Instruct", + "name": "Qwen2-VL-7B-Instruct", + "params_b": 8.291, + "ifeval": 0.4599218961245052, + "bbh": 0.5464507159069989, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.40949135638297873, + "hf_math_lvl5": 0.1986404833836858, + "hf_musr": 0.4375, + "hf_avg": 26.493258763428177 + }, + { + "hf_id": "Qwen/Qwen2.5-0.5B", + "name": "Qwen2.5-0.5B", + "params_b": 0.5, + "ifeval": 0.16271714606133947, + "bbh": 0.32748148151196615, + "gpqa": 0.24664429530201343, + "mmlu_pro": 0.19057513297872342, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.3433333333333333, + "hf_avg": 6.550067614297009 + }, + { + "hf_id": "Qwen/Qwen2.5-0.5B-Instruct", + "name": "Qwen2.5-0.5B-Instruct", + "params_b": 0.5, + "ifeval": 0.307122878407071, + "bbh": 0.3340729214937266, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.16971409574468085, + "hf_musr": 0.33288541666666666, + "hf_avg": 8.140647319276075 + }, + { + "hf_id": "Qwen/Qwen2.5-0.5B-Instruct", + "name": "Qwen2.5-0.5B-Instruct", + "params_b": 0.494, + "ifeval": 0.31529120511354314, + "bbh": 0.3321916429549138, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.17195811170212766, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.3341875, + "hf_avg": 10.107543850719255 + }, + { + "hf_id": "Qwen/Qwen2.5-1.5B", + "name": "Qwen2.5-1.5B", + "params_b": 1.5, + "ifeval": 0.26743041795768563, + "bbh": 0.40779509451366147, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.28548869680851063, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.35759375, + "hf_avg": 13.852701161320264 + }, + { + "hf_id": "Qwen/Qwen2.5-1.5B-Instruct", + "name": "Qwen2.5-1.5B-Instruct", + "params_b": 1.5, + "ifeval": 0.4475569267321817, + "bbh": 0.4288982740422907, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.27992021276595747, + "hf_math_lvl5": 0.22054380664652568, + "hf_musr": 0.3663125, + "hf_avg": 18.430509141644382 + }, + { + "hf_id": "Qwen/Qwen2.5-14B", + "name": "Qwen2.5-14B", + "params_b": 14.77, + "ifeval": 0.3694464022127954, + "bbh": 0.616051493531774, + "gpqa": 0.38171140939597314, + "mmlu_pro": 0.5248503989361702, + "hf_math_lvl5": 0.29003021148036257, + "hf_musr": 0.4502395833333333, + "hf_avg": 31.951062693148973 + }, + { + "hf_id": "Qwen/Qwen2.5-14B-Instruct-1M", + "name": "Qwen2.5-14B-Instruct-1M", + "params_b": 14.77, + "ifeval": 0.8413564896696322, + "bbh": 0.6198222551365405, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.4849567819148936, + "hf_math_lvl5": 0.5302114803625377, + "hf_musr": 0.418, + "hf_avg": 41.559026792386994 + }, + { + "hf_id": "Qwen/Qwen2.5-32B", + "name": "Qwen2.5-32B", + "params_b": 32.764, + "ifeval": 0.40766499554515356, + "bbh": 0.6770522448726507, + "gpqa": 0.41191275167785235, + "mmlu_pro": 0.5805352393617021, + "hf_math_lvl5": 0.3564954682779456, + "hf_musr": 0.49783333333333335, + "hf_avg": 38.00796730514634 + }, + { + "hf_id": "Qwen/Qwen2.5-3B", + "name": "Qwen2.5-3B", + "params_b": 3.086, + "ifeval": 0.2689541527591236, + "bbh": 0.4612475341011634, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3203125, + "hf_math_lvl5": 0.14803625377643503, + "hf_musr": 0.4303333333333333, + "hf_avg": 18.102770217683673 + }, + { + "hf_id": "Qwen/Qwen2.5-3B-Instruct", + "name": "Qwen2.5-3B-Instruct", + "params_b": 3, + "ifeval": 0.6474919879253713, + "bbh": 0.469276665604885, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.3254654255319149, + "hf_math_lvl5": 0.3678247734138973, + "hf_musr": 0.39679166666666665, + "hf_avg": 27.16175720903232 + }, + { + "hf_id": "Qwen/Qwen2.5-72B", + "name": "Qwen2.5-72B", + "params_b": 72.706, + "ifeval": 0.4137100670664947, + "bbh": 0.6797320670694852, + "gpqa": 0.4052013422818792, + "mmlu_pro": 0.5968251329787234, + "hf_math_lvl5": 0.39123867069486407, + "hf_musr": 0.477125, + "hf_avg": 38.441143572535815 + }, + { + "hf_id": "Qwen/Qwen2.5-7B", + "name": "Qwen2.5-7B", + "params_b": 7.616, + "ifeval": 0.3374479713825982, + "bbh": 0.5416303767788616, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.4365026595744681, + "hf_math_lvl5": 0.25075528700906347, + "hf_musr": 0.4424270833333333, + "hf_avg": 26.019159924095096 + }, + { + "hf_id": "Qwen/Qwen2.5-7B-Instruct-1M", + "name": "Qwen2.5-7B-Instruct-1M", + "params_b": 7.616, + "ifeval": 0.7447616767953474, + "bbh": 0.5403941270576822, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.35048204787234044, + "hf_math_lvl5": 0.4335347432024169, + "hf_musr": 0.40869791666666666, + "hf_avg": 32.76394723937119 + }, + { + "hf_id": "Qwen/Qwen2.5-Coder-14B", + "name": "Qwen2.5-Coder-14B", + "params_b": 14.77, + "ifeval": 0.3472652561869174, + "bbh": 0.5864860091741232, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.4521276595744681, + "hf_math_lvl5": 0.22507552870090636, + "hf_musr": 0.3873645833333333, + "hf_avg": 24.829052280388357 + }, + { + "hf_id": "Qwen/Qwen2.5-Coder-14B-Instruct", + "name": "Qwen2.5-Coder-14B-Instruct", + "params_b": 14.77, + "ifeval": 0.6907560827493273, + "bbh": 0.6140296423661326, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3939494680851064, + "hf_math_lvl5": 0.324773413897281, + "hf_musr": 0.3914583333333333, + "hf_avg": 32.12283417812606, + "aider_pass_rate": 0.586 + }, + { + "hf_id": "Qwen/Qwen2.5-Coder-32B", + "name": "Qwen2.5-Coder-32B", + "params_b": 32.764, + "ifeval": 0.4363411304228336, + "bbh": 0.640395506550809, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.5302526595744681, + "hf_math_lvl5": 0.30891238670694865, + "hf_musr": 0.4528125, + "hf_avg": 33.2623633375386 + }, + { + "hf_id": "Qwen/Qwen2.5-Coder-7B", + "name": "Qwen2.5-Coder-7B", + "params_b": 7.616, + "ifeval": 0.344592348302504, + "bbh": 0.48556405534214747, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.3679355053191489, + "hf_math_lvl5": 0.19184290030211482, + "hf_musr": 0.3448541666666667, + "hf_avg": 19.209490538962246 + }, + { + "hf_id": "Qwen/Qwen2.5-Math-1.5B-Instruct", + "name": "Qwen2.5-Math-1.5B-Instruct", + "params_b": 1.544, + "ifeval": 0.1855731680829089, + "bbh": 0.37515353898426174, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.1801030585106383, + "hf_math_lvl5": 0.2628398791540785, + "hf_musr": 0.3685416666666667, + "hf_avg": 12.024869614820517 + }, + { + "hf_id": "Qwen/Qwen2.5-Math-72B-Instruct", + "name": "Qwen2.5-Math-72B-Instruct", + "params_b": 72.706, + "ifeval": 0.4003466358151926, + "bbh": 0.6452266637803764, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.4812167553191489, + "hf_math_lvl5": 0.6238670694864048, + "hf_musr": 0.44727083333333334, + "hf_avg": 36.822863862347894 + }, + { + "hf_id": "Qwen/Qwen2.5-Math-7B", + "name": "Qwen2.5-Math-7B", + "params_b": 7.616, + "ifeval": 0.24599839536873275, + "bbh": 0.4454639372840941, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.27177526595744683, + "hf_math_lvl5": 0.30513595166163143, + "hf_musr": 0.37809374999999995, + "hf_avg": 17.836657156289718 + }, + { + "hf_id": "Qwen/Qwen2.5-Math-7B-Instruct", + "name": "Qwen2.5-Math-7B-Instruct", + "params_b": 7, + "ifeval": 0.26358395723347383, + "bbh": 0.438762734452786, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.2819980053191489, + "hf_math_lvl5": 0.5808157099697885, + "hf_musr": 0.3647291666666666, + "hf_avg": 21.76814573761254 + }, + { + "hf_id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", + "name": "EVA-Qwen2.5-1.5B-FRFR", + "params_b": 1.544, + "ifeval": 0.308172316121225, + "bbh": 0.3932411333682871, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.27701130319148937, + "hf_math_lvl5": 0.1027190332326284, + "hf_musr": 0.3539375, + "hf_avg": 14.185076993762264 + }, + { + "hf_id": "RESMPDEV/Qwen2-Wukong-0.5B", + "name": "Qwen2-Wukong-0.5B", + "params_b": 0.63, + "ifeval": 0.1854235650296768, + "bbh": 0.308451428837168, + "gpqa": 0.23657718120805368, + "mmlu_pro": 0.13272938829787234, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.3524791666666667, + "hf_avg": 4.975539710746782 + }, + { + "hf_id": "RLHFlow/ArmoRM-Llama3-8B-v0.1", + "name": "ArmoRM-Llama3-8B-v0.1", + "params_b": 7.511, + "ifeval": 0.18967007539993883, + "bbh": 0.2876467446788138, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.10779587765957446, + "hf_musr": 0.3948020833333333, + "hf_avg": 4.705487409302649 + }, + { + "hf_id": "RLHFlow/LLaMA3-iterative-DPO-final", + "name": "LLaMA3-iterative-DPO-final", + "params_b": 8.03, + "ifeval": 0.53401086893886, + "bbh": 0.5058257182733729, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.32571476063829785, + "hf_math_lvl5": 0.08836858006042296, + "hf_musr": 0.3672708333333334, + "hf_avg": 21.109152797451326 + }, + { + "hf_id": "Rakuten/RakutenAI-2.0-mini-instruct", + "name": "RakutenAI-2.0-mini-instruct", + "params_b": 1.535, + "ifeval": 0.6793906833867471, + "bbh": 0.2867197270809481, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.11178523936170212, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.3249166666666667, + "hf_avg": 13.318016561874655 + }, + { + "hf_id": "Rakuten/RakutenAI-7B", + "name": "RakutenAI-7B", + "params_b": 7.373, + "ifeval": 0.1555971488982566, + "bbh": 0.43149052613615435, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.28773271276595747, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.37381250000000005, + "hf_avg": 11.546978376746557 + }, + { + "hf_id": "Rakuten/RakutenAI-7B-chat", + "name": "RakutenAI-7B-chat", + "params_b": 7.373, + "ifeval": 0.26855521128383797, + "bbh": 0.4316204035758174, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.2798371010638298, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.37895833333333334, + "hf_avg": 12.803095362134842 + }, + { + "hf_id": "Replete-AI/L3-Pneuma-8B", + "name": "L3-Pneuma-8B", + "params_b": 8.03, + "ifeval": 0.24132745559559746, + "bbh": 0.4908680380935449, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.3175698138297872, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.4105208333333333, + "hf_avg": 16.691758570090254 + }, + { + "hf_id": "Replete-AI/L3.1-Pneuma-8B", + "name": "L3.1-Pneuma-8B", + "params_b": 8.03, + "ifeval": 0.707642388861554, + "bbh": 0.504990389092237, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.36909906914893614, + "hf_math_lvl5": 0.21978851963746224, + "hf_musr": 0.3871145833333333, + "hf_avg": 27.68476446634547 + }, + { + "hf_id": "Ro-xe/FMixIA-7B-DARE-0", + "name": "FMixIA-7B-DARE-0", + "params_b": 7.242, + "ifeval": 0.3341256754300811, + "bbh": 0.5035332799973222, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3016123670212766, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.45448958333333334, + "hf_avg": 18.934625270181993 + }, + { + "hf_id": "Ro-xe/FMixIA-7B-SLERP-27", + "name": "FMixIA-7B-SLERP-27", + "params_b": 7.242, + "ifeval": 0.3765409114482905, + "bbh": 0.5150591725181265, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.30078125, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.44115624999999997, + "hf_avg": 19.827314533484948 + }, + { + "hf_id": "Ro-xe/FMixIA-7B-TIES-1", + "name": "FMixIA-7B-TIES-1", + "params_b": 7.242, + "ifeval": 0.34529160405501846, + "bbh": 0.5091539642456672, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.2992021276595745, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.46890625, + "hf_avg": 19.541189569686495 + }, + { + "hf_id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", + "name": "FMixIA-FrankenMerge-9.5B-PT-9", + "params_b": 14.141, + "ifeval": 0.19401632113902223, + "bbh": 0.5087851148631056, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.36569148936170215, + "hf_math_lvl5": 0.0030211480362537764, + "hf_musr": 0.41703124999999996, + "hf_avg": 16.18640423218573 + }, + { + "hf_id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", + "name": "Rombo-LLM-V2.5-Qwen-7b", + "params_b": 7.616, + "ifeval": 0.748183708116686, + "bbh": 0.5399745025607596, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.4282746010638298, + "hf_math_lvl5": 0.506797583081571, + "hf_musr": 0.39803125, + "hf_avg": 35.259585169099566 + }, + { + "hf_id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", + "name": "LogoS-7Bx2-MoE-13B-v0.2", + "params_b": 12.879, + "ifeval": 0.4378903531518593, + "bbh": 0.5206958722481815, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.3087599734042553, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.4226145833333333, + "hf_avg": 20.11738926127926 + }, + { + "hf_id": "SaisExperiments/Gemma-2-2B-Opus-Instruct", + "name": "Gemma-2-2B-Opus-Instruct", + "params_b": 2.614, + "ifeval": 0.474959773401242, + "bbh": 0.4292846281445681, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2650432180851064, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.4056875, + "hf_avg": 17.2459909559155 + }, + { + "hf_id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", + "name": "Gemma-2-2B-Stheno-Filtered", + "params_b": 2.614, + "ifeval": 0.4196554032190144, + "bbh": 0.4149234152222183, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.2629654255319149, + "hf_math_lvl5": 0.04607250755287009, + "hf_musr": 0.40029166666666666, + "hf_avg": 15.491102762862866 + }, + { + "hf_id": "SaisExperiments/Not-So-Small-Alpaca-24B", + "name": "Not-So-Small-Alpaca-24B", + "params_b": 23.572, + "ifeval": 0.6243611395541607, + "bbh": 0.5338637679203099, + "gpqa": 0.35906040268456374, + "mmlu_pro": 0.36943151595744683, + "hf_math_lvl5": 0.18277945619335348, + "hf_musr": 0.42816666666666664, + "hf_avg": 28.383064508687198 + }, + { + "hf_id": "SaisExperiments/QwOwO-7B-V1", + "name": "QwOwO-7B-V1", + "params_b": 7.616, + "ifeval": 0.45562551806983254, + "bbh": 0.5431230107025949, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.42237367021276595, + "hf_math_lvl5": 0.3859516616314199, + "hf_musr": 0.38348958333333333, + "hf_avg": 27.075777072300355 + }, + { + "hf_id": "SaisExperiments/RightSheep-Llama3.2-3B", + "name": "RightSheep-Llama3.2-3B", + "params_b": 3.213, + "ifeval": 0.4156338515139829, + "bbh": 0.42407794300783824, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.25398936170212766, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.3767291666666666, + "hf_avg": 15.857393775364821 + }, + { + "hf_id": "Sakalti/Anemoi-3B", + "name": "Anemoi-3B", + "params_b": 3.397, + "ifeval": 0.3803629924156793, + "bbh": 0.4921954661921298, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3765791223404255, + "hf_math_lvl5": 0.17749244712990936, + "hf_musr": 0.43706249999999996, + "hf_avg": 22.619869024533212 + }, + { + "hf_id": "Sakalti/Llama3.2-3B-Uranus-1", + "name": "Llama3.2-3B-Uranus-1", + "params_b": 3.213, + "ifeval": 0.5335365718515761, + "bbh": 0.44368258173181263, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3094248670212766, + "hf_math_lvl5": 0.14954682779456194, + "hf_musr": 0.3668645833333333, + "hf_avg": 21.030483022816096 + }, + { + "hf_id": "Sakalti/Magro-7B-v1.1", + "name": "Magro-7B-v1.1", + "params_b": 7.242, + "ifeval": 0.1204016454119514, + "bbh": 0.41790625208343796, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.27642952127659576, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.4433229166666666, + "hf_avg": 12.14114119775379 + }, + { + "hf_id": "Sakalti/Neptuno-3B", + "name": "Neptuno-3B", + "params_b": 3.397, + "ifeval": 0.42962229107656574, + "bbh": 0.48335808848564965, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.3773271276595745, + "hf_math_lvl5": 0.2552870090634441, + "hf_musr": 0.40019791666666665, + "hf_avg": 23.466342186693208 + }, + { + "hf_id": "Sakalti/Neptuno-Alpha", + "name": "Neptuno-Alpha", + "params_b": 3.397, + "ifeval": 0.3779649108809071, + "bbh": 0.49247749379461303, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3767453457446808, + "hf_math_lvl5": 0.18353474320241692, + "hf_musr": 0.43706249999999996, + "hf_avg": 22.739135010323622 + }, + { + "hf_id": "Sakalti/Oxyge1-33B", + "name": "Oxyge1-33B", + "params_b": 32.764, + "ifeval": 0.4548265269484966, + "bbh": 0.7033278292161169, + "gpqa": 0.3825503355704698, + "mmlu_pro": 0.5909242021276596, + "hf_math_lvl5": 0.4962235649546828, + "hf_musr": 0.5007812500000001, + "hf_avg": 41.60924567669207 + }, + { + "hf_id": "Sakalti/Phi3.5-Comets-3.8B", + "name": "Phi3.5-Comets-3.8B", + "params_b": 3.821, + "ifeval": 0.20942876013422163, + "bbh": 0.3335116874180515, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11527593085106383, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.3763541666666667, + "hf_avg": 5.760043006242388 + }, + { + "hf_id": "Sakalti/Qwen2.5-1B-Instruct", + "name": "Qwen2.5-1B-Instruct", + "params_b": 0.988, + "ifeval": 0.17513198313807365, + "bbh": 0.30271528035563927, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.12134308510638298, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.33688541666666666, + "hf_avg": 4.275488298707521 + }, + { + "hf_id": "Sakalti/QwenTest-7", + "name": "QwenTest-7", + "params_b": 0.988, + "ifeval": 0.16718861509683197, + "bbh": 0.3063209532879154, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.12117686170212766, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.34218750000000003, + "hf_avg": 4.255208552096732 + }, + { + "hf_id": "Sakalti/SJT-0.5B", + "name": "SJT-0.5B", + "params_b": 0.63, + "ifeval": 0.24247662867857286, + "bbh": 0.33055365550588683, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.18907912234042554, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.31958333333333333, + "hf_avg": 8.576015804911226 + }, + { + "hf_id": "Sakalti/SJT-1.5B-Alpha", + "name": "SJT-1.5B-Alpha", + "params_b": 1.777, + "ifeval": 0.3448671746521452, + "bbh": 0.4240819448548446, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.2961269946808511, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.4226145833333333, + "hf_avg": 16.911765349077854 + }, + { + "hf_id": "Sakalti/SJT-1.5B-Alpha-1.1", + "name": "SJT-1.5B-Alpha-1.1", + "params_b": 1.777, + "ifeval": 0.3439429602344003, + "bbh": 0.4243160272518483, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.296625664893617, + "hf_math_lvl5": 0.09592145015105741, + "hf_musr": 0.42391666666666666, + "hf_avg": 16.820570294667494 + }, + { + "hf_id": "Sakalti/SJT-1.7B", + "name": "SJT-1.7B", + "params_b": 1.684, + "ifeval": 0.17762980004166723, + "bbh": 0.2934008926922806, + "gpqa": 0.24161073825503357, + "mmlu_pro": 0.11328125, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.39641666666666664, + "hf_avg": 5.034230484246264 + }, + { + "hf_id": "Sakalti/SJT-14B", + "name": "SJT-14B", + "params_b": 14.766, + "ifeval": 0.5494233079340594, + "bbh": 0.6536135646865123, + "gpqa": 0.38674496644295303, + "mmlu_pro": 0.5380651595744681, + "hf_math_lvl5": 0.38444108761329304, + "hf_musr": 0.476625, + "hf_avg": 38.21030695378075 + }, + { + "hf_id": "Sakalti/SJT-2B", + "name": "SJT-2B", + "params_b": 2.614, + "ifeval": 0.21507378200951255, + "bbh": 0.29364597509285106, + "gpqa": 0.24161073825503357, + "mmlu_pro": 0.11868351063829788, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.35641666666666666, + "hf_avg": 4.821696209153122 + }, + { + "hf_id": "Sakalti/SJT-2B-V1.1", + "name": "SJT-2B-V1.1", + "params_b": 2.614, + "ifeval": 0.3977235956151899, + "bbh": 0.39838417813569243, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.21243351063829788, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.42993750000000003, + "hf_avg": 14.58950928973948 + }, + { + "hf_id": "Sakalti/SJT-3.7B", + "name": "SJT-3.7B", + "params_b": 3.783, + "ifeval": 0.10776184966998675, + "bbh": 0.3393045259885476, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.1505152925531915, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.36171875000000003, + "hf_avg": 5.0893182036401825 + }, + { + "hf_id": "Sakalti/SJT-4B", + "name": "SJT-4B", + "params_b": 3.821, + "ifeval": 0.4077403511571519, + "bbh": 0.4885743296577029, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.328125, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.4779583333333333, + "hf_avg": 21.994031536147386 + }, + { + "hf_id": "Sakalti/SJT-900M", + "name": "SJT-900M", + "params_b": 0.899, + "ifeval": 0.2410027615615456, + "bbh": 0.31692036321713823, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.11419547872340426, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.35945833333333327, + "hf_avg": 5.791993813854994 + }, + { + "hf_id": "Sakalti/SJT-Moe2x7.5B", + "name": "SJT-Moe2x7.5B", + "params_b": 13.401, + "ifeval": 0.41166216749336204, + "bbh": 0.5370697921185069, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.3953623670212766, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.43988541666666664, + "hf_avg": 25.679678830683567 + }, + { + "hf_id": "Sakalti/SJTPass-2", + "name": "SJTPass-2", + "params_b": 0.63, + "ifeval": 0.24002867945939, + "bbh": 0.33022032217255354, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.1901595744680851, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.32225, + "hf_avg": 8.578554710120141 + }, + { + "hf_id": "Sakalti/Saba-Passthrough-2", + "name": "Saba-Passthrough-2", + "params_b": 3.087, + "ifeval": 0.16913677930114318, + "bbh": 0.36724803467499195, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.20769614361702127, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.3844479166666666, + "hf_avg": 8.019416015292425 + }, + { + "hf_id": "Sakalti/Saba1-1.8B", + "name": "Saba1-1.8B", + "params_b": 1.777, + "ifeval": 0.3332768166243345, + "bbh": 0.4147375470428282, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.2925531914893617, + "hf_math_lvl5": 0.1540785498489426, + "hf_musr": 0.4238854166666666, + "hf_avg": 17.174113467091598 + }, + { + "hf_id": "Sakalti/Saba1.5-1.5B", + "name": "Saba1.5-1.5B", + "params_b": 1.544, + "ifeval": 0.3332768166243345, + "bbh": 0.4147375470428282, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.2925531914893617, + "hf_math_lvl5": 0.1540785498489426, + "hf_musr": 0.4238854166666666, + "hf_avg": 17.174113467091598 + }, + { + "hf_id": "Sakalti/Saba1.5-Pro-3B", + "name": "Saba1.5-Pro-3B", + "params_b": 2.9, + "ifeval": 0.23860468002677343, + "bbh": 0.3622910501405146, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.19581117021276595, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.44054166666666666, + "hf_avg": 10.788079074661752 + }, + { + "hf_id": "Sakalti/Saba2-14B-Preview", + "name": "Saba2-14B-Preview", + "params_b": 14.77, + "ifeval": 0.4721871301480073, + "bbh": 0.649628096691823, + "gpqa": 0.3825503355704698, + "mmlu_pro": 0.5383976063829787, + "hf_math_lvl5": 0.31268882175226587, + "hf_musr": 0.4781458333333333, + "hf_avg": 35.687158487523945 + }, + { + "hf_id": "Sakalti/Saba2-3B", + "name": "Saba2-3B", + "params_b": 3.086, + "ifeval": 0.28651533486704167, + "bbh": 0.28011877359000464, + "gpqa": 0.2617449664429531, + "mmlu_pro": 0.12101063829787234, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.39269791666666665, + "hf_avg": 7.068766556279139 + }, + { + "hf_id": "Sakalti/Saka-7.2B", + "name": "Saka-7.2B", + "params_b": 7.292, + "ifeval": 0.1544989516704566, + "bbh": 0.2945156585364917, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.11602393617021277, + "hf_musr": 0.37105208333333334, + "hf_avg": 3.869360483737708 + }, + { + "hf_id": "Sakalti/SakaMoe-3x1.6B-Instruct", + "name": "SakaMoe-3x1.6B-Instruct", + "params_b": 1.572, + "ifeval": 0.23708094522533543, + "bbh": 0.328247997224552, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.18824800531914893, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.33421875, + "hf_avg": 8.276328372160824 + }, + { + "hf_id": "Sakalti/Tara-3.8B-v1.1", + "name": "Tara-3.8B-v1.1", + "params_b": 3.821, + "ifeval": 0.40621661635571393, + "bbh": 0.4885743296577029, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.328125, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.4779583333333333, + "hf_avg": 21.968635956123418 + }, + { + "hf_id": "Sakalti/light-1.1-3B", + "name": "light-1.1-3B", + "params_b": 3.086, + "ifeval": 0.27345110972220377, + "bbh": 0.28027723572953045, + "gpqa": 0.2617449664429531, + "mmlu_pro": 0.12092752659574468, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.3900625, + "hf_avg": 6.923496536132208 + }, + { + "hf_id": "Sakalti/light-3B", + "name": "light-3B", + "params_b": 3.397, + "ifeval": 0.5337360425892188, + "bbh": 0.4831034368803701, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.3774933510638298, + "hf_math_lvl5": 0.2590634441087613, + "hf_musr": 0.40149999999999997, + "hf_avg": 25.284693562377303 + }, + { + "hf_id": "Sakalti/light-3b-beta", + "name": "light-3b-beta", + "params_b": 3.397, + "ifeval": 0.5485489612007252, + "bbh": 0.48152297262112204, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3758311170212766, + "hf_math_lvl5": 0.277190332326284, + "hf_musr": 0.40146875, + "hf_avg": 25.809275245445864 + }, + { + "hf_id": "Sakalti/light-7b-beta", + "name": "light-7b-beta", + "params_b": 7.616, + "ifeval": 0.6233870574520051, + "bbh": 0.5548193064288276, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.445561835106383, + "hf_math_lvl5": 0.3768882175226586, + "hf_musr": 0.42906249999999996, + "hf_avg": 32.68982935684443 + }, + { + "hf_id": "Sakalti/llama-3-yanyuedao-8b-instruct", + "name": "llama-3-yanyuedao-8b-instruct", + "params_b": 8.03, + "ifeval": 0.21857116894284942, + "bbh": 0.43497849055247495, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.29105718085106386, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.41985416666666664, + "hf_avg": 13.935841337684982 + }, + { + "hf_id": "Sakalti/magro-7B", + "name": "magro-7B", + "params_b": 7.242, + "ifeval": 0.13439008497453425, + "bbh": 0.4185526485966236, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.2765126329787234, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.44598958333333333, + "hf_avg": 12.382552499684506 + }, + { + "hf_id": "Sakalti/qwen2.5-2.3B", + "name": "qwen2.5-2.3B", + "params_b": 2.339, + "ifeval": 0.12879493078365403, + "bbh": 0.2849449123234445, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.11727061170212766, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.38565625, + "hf_avg": 3.7013246212545643 + }, + { + "hf_id": "Sakalti/tara-3.8B", + "name": "tara-3.8B", + "params_b": 3.821, + "ifeval": 0.4077403511571519, + "bbh": 0.4885743296577029, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.328125, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.4779583333333333, + "hf_avg": 21.994031536147386 + }, + { + "hf_id": "Sakalti/ultiima-14B", + "name": "ultiima-14B", + "params_b": 14.77, + "ifeval": 0.5700563394016764, + "bbh": 0.6491153472177067, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5380651595744681, + "hf_math_lvl5": 0.4697885196374622, + "hf_musr": 0.4717604166666667, + "hf_avg": 39.61010623253845 + }, + { + "hf_id": "Sakalti/ultiima-32B", + "name": "ultiima-32B", + "params_b": 32.764, + "ifeval": 0.6854357549080883, + "bbh": 0.7037285782797875, + "gpqa": 0.3808724832214765, + "mmlu_pro": 0.5910073138297872, + "hf_math_lvl5": 0.4962235649546828, + "hf_musr": 0.4994791666666667, + "hf_avg": 45.40321787320272 + }, + { + "hf_id": "Sakalti/ultiima-72B", + "name": "ultiima-72B", + "params_b": 72.706, + "ifeval": 0.7140121544169471, + "bbh": 0.7217809739144654, + "gpqa": 0.41442953020134227, + "mmlu_pro": 0.590591755319149, + "hf_math_lvl5": 0.5354984894259819, + "hf_musr": 0.46518750000000003, + "hf_avg": 46.76723892225559 + }, + { + "hf_id": "Sakalti/ultiima-72B-v1.5", + "name": "ultiima-72B-v1.5", + "params_b": 72.706, + "ifeval": 0.6549610588793291, + "bbh": 0.7391727188223717, + "gpqa": 0.41359060402684567, + "mmlu_pro": 0.6053856382978723, + "hf_math_lvl5": 0.4395770392749245, + "hf_musr": 0.46909375000000003, + "hf_avg": 44.89912916202218 + }, + { + "hf_id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", + "name": "LLaMA-3-8B-SFR-Iterative-DPO-R", + "params_b": 8.03, + "ifeval": 0.38156203318306536, + "bbh": 0.5011950469666927, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.3172373670212766, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.36333333333333334, + "hf_avg": 18.527751291550988 + }, + { + "hf_id": "SanjiWatsuki/Kunoichi-DPO-v2-7B", + "name": "Kunoichi-DPO-v2-7B", + "params_b": 7.242, + "ifeval": 0.5431034100630772, + "bbh": 0.4415592450869275, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.3106715425531915, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.41883333333333334, + "hf_avg": 20.5816311223652 + }, + { + "hf_id": "SanjiWatsuki/Silicon-Maid-7B", + "name": "Silicon-Maid-7B", + "params_b": 7.242, + "ifeval": 0.5367835121920947, + "bbh": 0.4127972831009074, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.308344414893617, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.41883333333333334, + "hf_avg": 19.412095533357363 + }, + { + "hf_id": "Sao10K/70B-L3.3-Cirrus-x1", + "name": "70B-L3.3-Cirrus-x1", + "params_b": 70.554, + "ifeval": 0.6680751517085777, + "bbh": 0.7028970787833794, + "gpqa": 0.44966442953020136, + "mmlu_pro": 0.5378158244680851, + "hf_math_lvl5": 0.37386706948640486, + "hf_musr": 0.4841666666666667, + "hf_avg": 43.0025823792082 + }, + { + "hf_id": "Sao10K/Fimbulvetr-11B-v2", + "name": "Fimbulvetr-11B-v2", + "params_b": 10.732, + "ifeval": 0.5100056738343152, + "bbh": 0.4544495065184342, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.33011968085106386, + "hf_math_lvl5": 0.06797583081570997, + "hf_musr": 0.43536458333333333, + "hf_avg": 21.089256463822462 + }, + { + "hf_id": "Sao10K/L3-70B-Euryale-v2.1", + "name": "L3-70B-Euryale-v2.1", + "params_b": 70.554, + "ifeval": 0.7384417789243651, + "bbh": 0.6471322811268715, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.5103889627659575, + "hf_math_lvl5": 0.21374622356495468, + "hf_musr": 0.42091666666666666, + "hf_avg": 35.43613152375533 + }, + { + "hf_id": "Sao10K/L3-70B-Euryale-v2.1", + "name": "L3-70B-Euryale-v2.1", + "params_b": 70.554, + "ifeval": 0.7281003293483512, + "bbh": 0.6502778992745041, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.5095578457446809, + "hf_math_lvl5": 0.22432024169184292, + "hf_musr": 0.41958333333333336, + "hf_avg": 35.473252668728755 + }, + { + "hf_id": "Sao10K/L3-8B-Lunaris-v1", + "name": "L3-8B-Lunaris-v1", + "params_b": 8.03, + "ifeval": 0.6894573066131198, + "bbh": 0.5235299282515419, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3787400265957447, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.3726666666666667, + "hf_avg": 25.577983918433663 + }, + { + "hf_id": "Sao10K/L3-8B-Niitama-v1", + "name": "L3-8B-Niitama-v1", + "params_b": 8.03, + "ifeval": 0.6790659893526954, + "bbh": 0.5302980131787137, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3700964095744681, + "hf_math_lvl5": 0.09818731117824774, + "hf_musr": 0.3806666666666667, + "hf_avg": 25.7912140312776 + }, + { + "hf_id": "Sao10K/L3-8B-Stheno-v3.2", + "name": "L3-8B-Stheno-v3.2", + "params_b": 8.03, + "ifeval": 0.6872841837435781, + "bbh": 0.522778637171633, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3768284574468085, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.3793645833333333, + "hf_avg": 25.884393588195138 + }, + { + "hf_id": "Sao10K/L3-8B-Stheno-v3.3-32K", + "name": "L3-8B-Stheno-v3.3-32K", + "params_b": 8.03, + "ifeval": 0.46037181345496614, + "bbh": 0.3844012923008206, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.1895777925531915, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3725416666666667, + "hf_avg": 12.649981104473916 + }, + { + "hf_id": "Sao10K/MN-12B-Lyra-v3", + "name": "MN-12B-Lyra-v3", + "params_b": 12.248, + "ifeval": 0.4486063644463357, + "bbh": 0.4803954360397243, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.32488364361702127, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.40190624999999996, + "hf_avg": 19.63563111181339 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", + "name": "Linkbricks-Horizon-AI-Avengers-V1-32B", + "params_b": 32.76, + "ifeval": 0.7971681804279312, + "bbh": 0.7000545067146033, + "gpqa": 0.3624161073825503, + "mmlu_pro": 0.5792885638297872, + "hf_math_lvl5": 0.6027190332326284, + "hf_musr": 0.45378125, + "hf_avg": 47.33695645964031 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", + "name": "Linkbricks-Horizon-AI-Avengers-V2-32B", + "params_b": 32.76, + "ifeval": 0.7956444456264933, + "bbh": 0.7023193256341814, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.5719747340425532, + "hf_math_lvl5": 0.5664652567975831, + "hf_musr": 0.41663541666666665, + "hf_avg": 43.33912719384693 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", + "name": "Linkbricks-Horizon-AI-Avengers-V3-32B", + "params_b": 32.764, + "ifeval": 0.8248702332034556, + "bbh": 0.6913199237437709, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.56640625, + "hf_math_lvl5": 0.6178247734138973, + "hf_musr": 0.42745833333333333, + "hf_avg": 46.36626657304592 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", + "name": "Linkbricks-Horizon-AI-Avengers-V4-32B", + "params_b": 32.764, + "ifeval": 0.7630963620970137, + "bbh": 0.6920204096666581, + "gpqa": 0.3615771812080537, + "mmlu_pro": 0.5752160904255319, + "hf_math_lvl5": 0.5362537764350453, + "hf_musr": 0.4642604166666667, + "hf_avg": 45.60530808037683 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", + "name": "Linkbricks-Horizon-AI-Avengers-V5-32B", + "params_b": 32.764, + "ifeval": 0.7515558717536137, + "bbh": 0.6928650089977083, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.5762134308510638, + "hf_math_lvl5": 0.5460725075528701, + "hf_musr": 0.47086458333333336, + "hf_avg": 45.672698393079905 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", + "name": "Linkbricks-Horizon-AI-Avengers-V6-32B", + "params_b": 32.76, + "ifeval": 0.8208985491828349, + "bbh": 0.6889783858832969, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.5672373670212766, + "hf_math_lvl5": 0.622356495468278, + "hf_musr": 0.42742708333333335, + "hf_avg": 46.23110024344452 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", + "name": "Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", + "params_b": 27.227, + "ifeval": 0.8145786513118525, + "bbh": 0.6463223196116569, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.45985704787234044, + "hf_math_lvl5": 0.2802114803625378, + "hf_musr": 0.4139375, + "hf_avg": 37.17096025005312 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", + "name": "Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", + "params_b": 27.227, + "ifeval": 0.81420408959339, + "bbh": 0.6403963618749583, + "gpqa": 0.35906040268456374, + "mmlu_pro": 0.4523769946808511, + "hf_math_lvl5": 0.24924471299093656, + "hf_musr": 0.44667708333333334, + "hf_avg": 37.3287116618655 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", + "name": "Linkbricks-Horizon-AI-Korean-Superb-22B", + "params_b": 22.247, + "ifeval": 0.6766679078179231, + "bbh": 0.5625539568927603, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.3871343085106383, + "hf_math_lvl5": 0.23716012084592145, + "hf_musr": 0.3907708333333333, + "hf_avg": 29.331660638255602 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", + "name": "Linkbricks-Horizon-AI-Korean-Superb-27B", + "params_b": 27.227, + "ifeval": 0.7767601076255447, + "bbh": 0.6518345685119445, + "gpqa": 0.3598993288590604, + "mmlu_pro": 0.4646775265957447, + "hf_math_lvl5": 0.2719033232628399, + "hf_musr": 0.47913541666666665, + "hf_avg": 38.3619715244158 + }, + { + "hf_id": "Saxo/Linkbricks-Horizon-AI-Superb-27B", + "name": "Linkbricks-Horizon-AI-Superb-27B", + "params_b": 27.227, + "ifeval": 0.7302235845334822, + "bbh": 0.6186245528925046, + "gpqa": 0.3573825503355705, + "mmlu_pro": 0.406000664893617, + "hf_math_lvl5": 0.22205438066465258, + "hf_musr": 0.465, + "hf_avg": 34.63303350305584 + }, + { + "hf_id": "SeaLLMs/SeaLLM-7B-v2", + "name": "SeaLLM-7B-v2", + "params_b": 7.376, + "ifeval": 0.36712367629002157, + "bbh": 0.4902100795458318, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.30826130319148937, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.4069583333333333, + "hf_avg": 18.166389781627085 + }, + { + "hf_id": "SeaLLMs/SeaLLM-7B-v2.5", + "name": "SeaLLM-7B-v2.5", + "params_b": 8.538, + "ifeval": 0.4521536190640833, + "bbh": 0.49802029594352754, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.3203125, + "hf_math_lvl5": 0.10876132930513595, + "hf_musr": 0.42032291666666666, + "hf_avg": 20.73056803415737 + }, + { + "hf_id": "SeaLLMs/SeaLLMs-v3-7B-Chat", + "name": "SeaLLMs-v3-7B-Chat", + "params_b": 7.616, + "ifeval": 0.43766539448662883, + "bbh": 0.5266406284595359, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.3894614361702128, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.417375, + "hf_avg": 24.211695856688838 + }, + { + "hf_id": "SenseLLM/ReflectionCoder-CL-34B", + "name": "ReflectionCoder-CL-34B", + "params_b": 33.744, + "ifeval": 0.4007710652180658, + "bbh": 0.39529304297033296, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.14237034574468085, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.41548958333333336, + "hf_avg": 12.147932297450376 + }, + { + "hf_id": "SenseLLM/ReflectionCoder-DS-33B", + "name": "ReflectionCoder-DS-33B", + "params_b": 33.34, + "ifeval": 0.3786641666334215, + "bbh": 0.3449447540164568, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.12017952127659574, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.3343125, + "hf_avg": 9.194495021407294 + }, + { + "hf_id": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", + "name": "Dobby-Mini-Leashed-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.7847034756667863, + "bbh": 0.5138053850165866, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.36943151595744683, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.425375, + "hf_avg": 29.438992188094076 + }, + { + "hf_id": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", + "name": "Dobby-Mini-Unhinged-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.7456858912130924, + "bbh": 0.5142440064892148, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.35846077127659576, + "hf_math_lvl5": 0.15634441087613293, + "hf_musr": 0.40128125000000003, + "hf_avg": 27.45756521473049 + }, + { + "hf_id": "Sharathhebbar24/SSH_355M", + "name": "SSH_355M", + "params_b": 0.355, + "ifeval": 0.1423589409433636, + "bbh": 0.30985907344593705, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11760305851063829, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.41775, + "hf_avg": 5.371931358828882 + }, + { + "hf_id": "Sharathhebbar24/chat_gpt2_dpo", + "name": "chat_gpt2_dpo", + "params_b": 0.124, + "ifeval": 0.09861944086135896, + "bbh": 0.29022988561565644, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11419547872340426, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.38184375, + "hf_avg": 3.406545907558898 + }, + { + "hf_id": "Sicarius-Prototyping/Brainy_LLAMA", + "name": "Brainy_LLAMA", + "params_b": 8.03, + "ifeval": 0.5204224790223274, + "bbh": 0.5117131754488634, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.3848902925531915, + "hf_math_lvl5": 0.1336858006042296, + "hf_musr": 0.4143333333333334, + "hf_avg": 24.22551281841119 + }, + { + "hf_id": "Sicarius-Prototyping/bacon_and_food", + "name": "bacon_and_food", + "params_b": 8.03, + "ifeval": 0.5860428108529812, + "bbh": 0.47245798883729967, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.3262965425531915, + "hf_math_lvl5": 0.09818731117824774, + "hf_musr": 0.3883854166666667, + "hf_avg": 22.242423887650176 + }, + { + "hf_id": "SicariusSicariiStuff/2B-ad", + "name": "2B-ad", + "params_b": 3.204, + "ifeval": 0.4378903531518593, + "bbh": 0.40922431523996955, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.2662067819148936, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.40153124999999995, + "hf_avg": 15.93131891082937 + }, + { + "hf_id": "SicariusSicariiStuff/2B_or_not_2B", + "name": "2B_or_not_2B", + "params_b": 2.506, + "ifeval": 0.2062316874781136, + "bbh": 0.3415917024092019, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.13987699468085107, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3790833333333334, + "hf_avg": 6.592012664617282 + }, + { + "hf_id": "SicariusSicariiStuff/Dusk_Rainbow", + "name": "Dusk_Rainbow", + "params_b": 8.03, + "ifeval": 0.3588057465303173, + "bbh": 0.47717504280736184, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.3443317819148936, + "hf_math_lvl5": 0.07477341389728097, + "hf_musr": 0.40252083333333327, + "hf_avg": 18.623890627323348 + }, + { + "hf_id": "SicariusSicariiStuff/Eximius_Persona_5B", + "name": "Eximius_Persona_5B", + "params_b": 5.821, + "ifeval": 0.6559850086658954, + "bbh": 0.4511736018571028, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.31399601063829785, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.38181249999999994, + "hf_avg": 21.833611932328903 + }, + { + "hf_id": "SicariusSicariiStuff/Impish_LLAMA_3B", + "name": "Impish_LLAMA_3B", + "params_b": 3.213, + "ifeval": 0.46299485365496884, + "bbh": 0.40905101627873225, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.2941323138297872, + "hf_math_lvl5": 0.11253776435045318, + "hf_musr": 0.3672708333333334, + "hf_avg": 17.791947493881953 + }, + { + "hf_id": "SicariusSicariiStuff/Impish_Mind_8B", + "name": "Impish_Mind_8B", + "params_b": 8.03, + "ifeval": 0.31791424531354584, + "bbh": 0.46736571616627115, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3308676861702128, + "hf_math_lvl5": 0.10498489425981873, + "hf_musr": 0.4069583333333333, + "hf_avg": 18.124203063482273 + }, + { + "hf_id": "SicariusSicariiStuff/Impish_QWEN_14B-1M", + "name": "Impish_QWEN_14B-1M", + "params_b": 14.77, + "ifeval": 0.7867768631675067, + "bbh": 0.6282934814011238, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.504404920212766, + "hf_math_lvl5": 0.39652567975830816, + "hf_musr": 0.46146875000000004, + "hf_avg": 40.23736559941774 + }, + { + "hf_id": "SicariusSicariiStuff/Impish_QWEN_7B-1M", + "name": "Impish_QWEN_7B-1M", + "params_b": 7.616, + "ifeval": 0.6381744881359238, + "bbh": 0.537172912933626, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.4265292553191489, + "hf_math_lvl5": 0.30891238670694865, + "hf_musr": 0.40739583333333335, + "hf_avg": 30.209083552218306 + }, + { + "hf_id": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", + "name": "LLAMA-3_8B_Unaligned_BETA", + "params_b": 8.03, + "ifeval": 0.3713203189758729, + "bbh": 0.4717234028484832, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3464926861702128, + "hf_math_lvl5": 0.08383685800604229, + "hf_musr": 0.41194791666666664, + "hf_avg": 19.14075237978403 + }, + { + "hf_id": "SicariusSicariiStuff/Phi-Line_14B", + "name": "Phi-Line_14B", + "params_b": 14.66, + "ifeval": 0.6495653754260917, + "bbh": 0.6154430096216078, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.5453789893617021, + "hf_math_lvl5": 0.3859516616314199, + "hf_musr": 0.44785416666666666, + "hf_avg": 37.56208119420943 + }, + { + "hf_id": "SicariusSicariiStuff/Phi-lthy4", + "name": "Phi-lthy4", + "params_b": 11.933, + "ifeval": 0.7679423928509688, + "bbh": 0.587935701572946, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.433344414893617, + "hf_math_lvl5": 0.13670694864048338, + "hf_musr": 0.40829166666666666, + "hf_avg": 30.269040533700707 + }, + { + "hf_id": "SicariusSicariiStuff/Redemption_Wind_24B", + "name": "Redemption_Wind_24B", + "params_b": 23.572, + "ifeval": 0.25014517037017336, + "bbh": 0.642816406969129, + "gpqa": 0.38338926174496646, + "mmlu_pro": 0.543218085106383, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.4262395833333333, + "hf_avg": 28.370595438172284 + }, + { + "hf_id": "SicariusSicariiStuff/Wingless_Imp_8B", + "name": "Wingless_Imp_8B", + "params_b": 8.03, + "ifeval": 0.743012983328679, + "bbh": 0.5120376322048542, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3638630319148936, + "hf_math_lvl5": 0.12009063444108761, + "hf_musr": 0.41483333333333333, + "hf_avg": 26.911877977477257 + }, + { + "hf_id": "SicariusSicariiStuff/Zion_Alpha", + "name": "Zion_Alpha", + "params_b": 7.242, + "ifeval": 0.3324024698910003, + "bbh": 0.49321099934509743, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.31316489361702127, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.4726875, + "hf_avg": 19.186491401477962 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", + "name": "SKY-Ko-Llama3.1-8B-lora", + "params_b": 8.03, + "ifeval": 0.5058345190760515, + "bbh": 0.5088388495224864, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3777426861702128, + "hf_math_lvl5": 0.15483383685800603, + "hf_musr": 0.3997916666666667, + "hf_avg": 24.022312491744717 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", + "name": "SKY-Ko-Llama3.1-8B-lora-epoch1", + "params_b": 8.03, + "ifeval": 0.5058345190760515, + "bbh": 0.5088388495224864, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3777426861702128, + "hf_math_lvl5": 0.15483383685800603, + "hf_musr": 0.3997916666666667, + "hf_avg": 24.022312491744717 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", + "name": "SKY-Ko-Llama3.2-1B-lora-epoch3", + "params_b": 1.236, + "ifeval": 0.3247084402718121, + "bbh": 0.3166586087861201, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.12790890957446807, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.33815625, + "hf_avg": 7.679539302024469 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", + "name": "SKY-Ko-Llama3.2-1B-lora-epoch5", + "params_b": 1.236, + "ifeval": 0.4359920566319587, + "bbh": 0.34060156188911545, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.19456449468085107, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.3471458333333333, + "hf_avg": 12.134493116614278 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", + "name": "SKY-Ko-Llama3.2-1B-lora-v2-epoch3", + "params_b": 1.236, + "ifeval": 0.4359920566319587, + "bbh": 0.34060156188911545, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.19456449468085107, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.3471458333333333, + "hf_avg": 12.134493116614278 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", + "name": "SKY-Ko-Llama3.2-1B-lora-v2-epoch5", + "params_b": 1.236, + "ifeval": 0.42467652495378927, + "bbh": 0.33968360414253995, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.19456449468085107, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.34584375, + "hf_avg": 11.804479423825526 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", + "name": "SKY-Ko-Llama3.2-3B-lora-epoch1", + "params_b": 3.213, + "ifeval": 0.5331121424487028, + "bbh": 0.4399628268031015, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.30044880319148937, + "hf_math_lvl5": 0.14577039274924472, + "hf_musr": 0.35222916666666665, + "hf_avg": 20.42021662043027 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", + "name": "SKY-Ko-Llama3.2-3B-lora-epoch2", + "params_b": 3.213, + "ifeval": 0.5331121424487028, + "bbh": 0.4399628268031015, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.30044880319148937, + "hf_math_lvl5": 0.14577039274924472, + "hf_musr": 0.35222916666666665, + "hf_avg": 20.42021662043027 + }, + { + "hf_id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", + "name": "SKY-Ko-Llama3.2-3B-lora-epoch3", + "params_b": 3.213, + "ifeval": 0.5331121424487028, + "bbh": 0.4399628268031015, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.30044880319148937, + "hf_math_lvl5": 0.14577039274924472, + "hf_musr": 0.35222916666666665, + "hf_avg": 20.42021662043027 + }, + { + "hf_id": "Skywork/Skywork-o1-Open-Llama-3.1-8B", + "name": "Skywork-o1-Open-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.3518364605912313, + "bbh": 0.45159089701897237, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.20304188829787234, + "hf_math_lvl5": 0.5211480362537765, + "hf_musr": 0.31564583333333335, + "hf_avg": 20.752994700266516 + }, + { + "hf_id": "Sourjayon/DeepSeek-R1-8b-Sify", + "name": "DeepSeek-R1-8b-Sify", + "params_b": 8.03, + "ifeval": 0.3679481553389451, + "bbh": 0.33793580116642347, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.19805518617021275, + "hf_math_lvl5": 0.24471299093655588, + "hf_musr": 0.3303125, + "hf_avg": 13.313209574216847 + }, + { + "hf_id": "Sourjayon/DeepSeek-R1-ForumNXT", + "name": "DeepSeek-R1-ForumNXT", + "params_b": 1.777, + "ifeval": 0.26028714920854445, + "bbh": 0.3310198487331462, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.16481050531914893, + "hf_math_lvl5": 0.25755287009063443, + "hf_musr": 0.3392395833333333, + "hf_avg": 11.959695622199478 + }, + { + "hf_id": "SpaceYL/ECE_Poirot", + "name": "ECE_Poirot", + "params_b": 1.544, + "ifeval": 0.3106956209524063, + "bbh": 0.42622349736626014, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.2883144946808511, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.40264583333333337, + "hf_avg": 15.742560298806495 + }, + { + "hf_id": "Spestly/Athena-1-3B", + "name": "Athena-1-3B", + "params_b": 3.086, + "ifeval": 0.5569167586448401, + "bbh": 0.47015477265388084, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.35189494680851063, + "hf_math_lvl5": 0.23791540785498488, + "hf_musr": 0.43622916666666667, + "hf_avg": 25.48204682938236 + }, + { + "hf_id": "Spestly/Atlas-Pro-1.5B-Preview", + "name": "Atlas-Pro-1.5B-Preview", + "params_b": 1.777, + "ifeval": 0.2429509257658568, + "bbh": 0.349893585329524, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.1924867021276596, + "hf_math_lvl5": 0.31948640483383683, + "hf_musr": 0.3354270833333333, + "hf_avg": 13.9538569769866 + }, + { + "hf_id": "Spestly/Atlas-Pro-7B-Preview", + "name": "Atlas-Pro-7B-Preview", + "params_b": 7.616, + "ifeval": 0.31541642840995227, + "bbh": 0.46679203304308553, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.2970412234042553, + "hf_math_lvl5": 0.5083081570996979, + "hf_musr": 0.3910833333333333, + "hf_avg": 24.63755262041268 + }, + { + "hf_id": "Steelskull/L3.3-MS-Nevoria-70b", + "name": "L3.3-MS-Nevoria-70b", + "params_b": 70.554, + "ifeval": 0.6963268571833845, + "bbh": 0.6997536580025828, + "gpqa": 0.47063758389261745, + "mmlu_pro": 0.5535239361702128, + "hf_math_lvl5": 0.3957703927492447, + "hf_musr": 0.4682291666666667, + "hf_avg": 44.041818757700526 + }, + { + "hf_id": "Steelskull/L3.3-Nevoria-R1-70b", + "name": "L3.3-Nevoria-R1-70b", + "params_b": 70.554, + "ifeval": 0.6023794642659255, + "bbh": 0.6971668662651651, + "gpqa": 0.46895973154362414, + "mmlu_pro": 0.5462932180851063, + "hf_math_lvl5": 0.46299093655589124, + "hf_musr": 0.47753125, + "hf_avg": 43.61308347965567 + }, + { + "hf_id": "SultanR/SmolTulu-1.7b-Instruct", + "name": "SmolTulu-1.7b-Instruct", + "params_b": 1.711, + "ifeval": 0.6540867121459949, + "bbh": 0.3713086260572204, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.17104388297872342, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.35403125, + "hf_avg": 16.331009858160883 + }, + { + "hf_id": "SultanR/SmolTulu-1.7b-Reinforced", + "name": "SmolTulu-1.7b-Reinforced", + "params_b": 1.711, + "ifeval": 0.6790659893526954, + "bbh": 0.3551868188444029, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.17627992021276595, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.34060416666666665, + "hf_avg": 16.5748337360141 + }, + { + "hf_id": "SultanR/SmolTulu-1.7b-it-v0", + "name": "SmolTulu-1.7b-it-v0", + "params_b": 1.711, + "ifeval": 0.6540867121459949, + "bbh": 0.3713086260572204, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.17104388297872342, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.35403125, + "hf_avg": 16.331009858160883 + }, + { + "hf_id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", + "name": "Phi-3-mini-4K-instruct-cpo-simpo", + "params_b": 3.821, + "ifeval": 0.5714049832222946, + "bbh": 0.5681534123661078, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.38605385638297873, + "hf_math_lvl5": 0.15709969788519637, + "hf_musr": 0.3963541666666666, + "hf_avg": 27.216374484698875 + }, + { + "hf_id": "T145/KRONOS-8B-V2", + "name": "KRONOS-8B-V2", + "params_b": 8.03, + "ifeval": 0.5180243974875552, + "bbh": 0.513268555595521, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.3737533244680851, + "hf_math_lvl5": 0.22658610271903323, + "hf_musr": 0.38286458333333334, + "hf_avg": 25.04979364610371 + }, + { + "hf_id": "T145/Llama-3.1-8B-Instruct-Zeus", + "name": "Llama-3.1-8B-Instruct-Zeus", + "params_b": 8.03, + "ifeval": 0.7941207108250552, + "bbh": 0.5173982439996302, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.38929521276595747, + "hf_math_lvl5": 0.19561933534743203, + "hf_musr": 0.39762499999999995, + "hf_avg": 29.649994293530643 + }, + { + "hf_id": "T145/ZEUS-8B-V13", + "name": "ZEUS-8B-V13", + "params_b": 8.03, + "ifeval": 0.7904238531540756, + "bbh": 0.5277128851736589, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.39112367021276595, + "hf_math_lvl5": 0.21374622356495468, + "hf_musr": 0.38447916666666665, + "hf_avg": 30.621361680379156 + }, + { + "hf_id": "T145/ZEUS-8B-V13-abliterated", + "name": "ZEUS-8B-V13-abliterated", + "params_b": 8.03, + "ifeval": 0.7877509452696623, + "bbh": 0.5197597316957202, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.38721742021276595, + "hf_math_lvl5": 0.17900302114803626, + "hf_musr": 0.3871458333333333, + "hf_avg": 29.488667624353806 + }, + { + "hf_id": "T145/ZEUS-8B-V17-abliterated-V4", + "name": "ZEUS-8B-V17-abliterated-V4", + "params_b": 8.03, + "ifeval": 0.7228298691915229, + "bbh": 0.5169216944225185, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.37741023936170215, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.4187083333333333, + "hf_avg": 26.58716707472223 + }, + { + "hf_id": "T145/ZEUS-8B-V2-abliterated", + "name": "ZEUS-8B-V2-abliterated", + "params_b": 8.03, + "ifeval": 0.7895495064207414, + "bbh": 0.5128868622210663, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.38248005319148937, + "hf_math_lvl5": 0.21148036253776434, + "hf_musr": 0.3910833333333333, + "hf_avg": 29.796705902139966 + }, + { + "hf_id": "T145/ZEUS-8B-V22", + "name": "ZEUS-8B-V22", + "params_b": 8.03, + "ifeval": 0.7995163942782927, + "bbh": 0.5244915522507715, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.3937832446808511, + "hf_math_lvl5": 0.22280966767371602, + "hf_musr": 0.3989583333333333, + "hf_avg": 31.143603587984483 + }, + { + "hf_id": "T145/ZEUS-8B-V30", + "name": "ZEUS-8B-V30", + "params_b": 8.03, + "ifeval": 0.7435626360279614, + "bbh": 0.5243248855841048, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.3943650265957447, + "hf_math_lvl5": 0.15861027190332327, + "hf_musr": 0.4029270833333333, + "hf_avg": 29.0957713811746 + }, + { + "hf_id": "T145/ZEUS-8B-V7", + "name": "ZEUS-8B-V7", + "params_b": 8.03, + "ifeval": 0.7786085364610345, + "bbh": 0.5070394117180643, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3812333776595745, + "hf_math_lvl5": 0.14803625377643503, + "hf_musr": 0.41616666666666663, + "hf_avg": 28.470021765028548 + }, + { + "hf_id": "T145/ZEUS-8B-V8", + "name": "ZEUS-8B-V8", + "params_b": 8.03, + "ifeval": 0.7913979352562313, + "bbh": 0.5064510419864701, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.37608045212765956, + "hf_math_lvl5": 0.13293051359516617, + "hf_musr": 0.421375, + "hf_avg": 28.22354116902905 + }, + { + "hf_id": "THUDM/glm-4-9b", + "name": "glm-4-9b", + "params_b": 9, + "ifeval": 0.1426082793654171, + "bbh": 0.5528368141665274, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.4144780585106383, + "hf_musr": 0.4385833333333333, + "hf_avg": 18.006731731716215 + }, + { + "hf_id": "THUDM/glm-4-9b-chat", + "name": "glm-4-9b-chat", + "params_b": 9, + "bbh": 0.47363884291035735, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.316655585106383, + "hf_musr": 0.3994270833333333, + "hf_avg": 10.973477297045166 + }, + { + "hf_id": "THUDM/glm-4-9b-chat-1m", + "name": "glm-4-9b-chat-1m", + "params_b": 9.484, + "bbh": 0.41800578218330303, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.31632313829787234, + "hf_musr": 0.3794583333333333, + "hf_avg": 8.922510186531982 + }, + { + "hf_id": "THUDM/glm-4-9b-chat-1m-hf", + "name": "glm-4-9b-chat-1m-hf", + "params_b": 9.484, + "ifeval": 0.5341106043076814, + "bbh": 0.3900953106836365, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.18143284574468085, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.36888541666666663, + "hf_avg": 15.139213915838658 + }, + { + "hf_id": "THUDM/glm-4-9b-chat-hf", + "name": "glm-4-9b-chat-hf", + "params_b": 9.4, + "ifeval": 0.6513140688927601, + "bbh": 0.4432308604245425, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.27742686170212766, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.35930208333333336, + "hf_avg": 20.54431273192071 + }, + { + "hf_id": "TIGER-Lab/AceCodeRM-7B", + "name": "AceCodeRM-7B", + "params_b": 7.616, + "ifeval": 0.5854931581536988, + "bbh": 0.4773230085351336, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3361037234042553, + "hf_math_lvl5": 0.3466767371601209, + "hf_musr": 0.41920833333333335, + "hf_avg": 27.34471609415536 + }, + { + "hf_id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", + "name": "AceCoder-Qwen2.5-7B-Ins-Rule", + "params_b": 7.616, + "ifeval": 0.742413462944986, + "bbh": 0.5404426673547671, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.4321808510638298, + "hf_math_lvl5": 0.49924471299093653, + "hf_musr": 0.39803125, + "hf_avg": 35.10989936210928 + }, + { + "hf_id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", + "name": "AceCoder-Qwen2.5-Coder-7B-Base-Rule", + "params_b": 7.616, + "ifeval": 0.44076273177391545, + "bbh": 0.49023782785253694, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.37450132978723405, + "hf_math_lvl5": 0.20166163141993956, + "hf_musr": 0.34488541666666667, + "hf_avg": 21.333370512930014 + }, + { + "hf_id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", + "name": "AceCoder-Qwen2.5-Coder-7B-Ins-Rule", + "params_b": 7.616, + "ifeval": 0.6222378843690297, + "bbh": 0.5089236146835355, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.34283577127659576, + "hf_math_lvl5": 0.36027190332326287, + "hf_musr": 0.40463541666666664, + "hf_avg": 28.029959679329718 + }, + { + "hf_id": "TIGER-Lab/MAmmoTH2-7B-Plus", + "name": "MAmmoTH2-7B-Plus", + "params_b": 7.242, + "ifeval": 0.5574664113441224, + "bbh": 0.42346949888019064, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.30169547872340424, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.41235416666666663, + "hf_avg": 21.633507778259585 + }, + { + "hf_id": "TIGER-Lab/Qwen2.5-Math-7B-CFT", + "name": "Qwen2.5-Math-7B-CFT", + "params_b": 7.616, + "ifeval": 0.2776976200924658, + "bbh": 0.46369414980230833, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.29446476063829785, + "hf_math_lvl5": 0.5574018126888217, + "hf_musr": 0.38866666666666666, + "hf_avg": 23.521464490408093 + }, + { + "hf_id": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", + "name": "Mistral-7B-Base-SimPO2-5e-7", + "params_b": 7.242, + "ifeval": 0.43918912928806675, + "bbh": 0.43195515014882774, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.2765957446808511, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.36041666666666666, + "hf_avg": 16.417452751929584 + }, + { + "hf_id": "Tarek07/Progenitor-V1.1-LLaMa-70B", + "name": "Progenitor-V1.1-LLaMa-70B", + "params_b": 70.554, + "ifeval": 0.6906064796960952, + "bbh": 0.6971116049173388, + "gpqa": 0.45805369127516776, + "mmlu_pro": 0.5465425531914894, + "hf_math_lvl5": 0.35725075528700906, + "hf_musr": 0.47356250000000005, + "hf_avg": 43.00294516350462 + }, + { + "hf_id": "Tarek07/Thalassic-Alpha-LLaMa-70B", + "name": "Thalassic-Alpha-LLaMa-70B", + "params_b": 70.554, + "ifeval": 0.7003484088884161, + "bbh": 0.6940408286616311, + "gpqa": 0.4437919463087248, + "mmlu_pro": 0.543467420212766, + "hf_math_lvl5": 0.3149546827794562, + "hf_musr": 0.4801979166666667, + "hf_avg": 42.2203763514307 + }, + { + "hf_id": "TeeZee/DoubleBagel-57B-v1.0", + "name": "DoubleBagel-57B-v1.0", + "params_b": 56.703, + "ifeval": 0.23363342597640924, + "bbh": 0.325078559362514, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.14777260638297873, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.43148958333333337, + "hf_avg": 8.707748481359532 + }, + { + "hf_id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", + "name": "Indic-gemma-2b-finetuned-sft-Navarasa-2.0", + "params_b": 2.506, + "ifeval": 0.21030310686755588, + "bbh": 0.3240881373468133, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.12790890957446807, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.3899375, + "hf_avg": 6.657818235070503 + }, + { + "hf_id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", + "name": "Indic-gemma-7b-finetuned-sft-Navarasa-2.0", + "params_b": 8.538, + "ifeval": 0.32368449048524583, + "bbh": 0.40229948924733394, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.23503989361702127, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.40832291666666665, + "hf_avg": 13.004827801453324 + }, + { + "hf_id": "TencentARC/LLaMA-Pro-8B", + "name": "LLaMA-Pro-8B", + "params_b": 8.357, + "ifeval": 0.2277135777514772, + "bbh": 0.3484197711435169, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.18110039893617022, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.40181249999999996, + "hf_avg": 8.816698626146762 + }, + { + "hf_id": "TencentARC/LLaMA-Pro-8B-Instruct", + "name": "LLaMA-Pro-8B-Instruct", + "params_b": 8.357, + "ifeval": 0.4486063644463357, + "bbh": 0.4224205282459997, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.19456449468085107, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.41902083333333334, + "hf_avg": 15.28346018029823 + }, + { + "hf_id": "TencentARC/MetaMath-Mistral-Pro", + "name": "MetaMath-Mistral-Pro", + "params_b": 8.987, + "ifeval": 0.21187670935340452, + "bbh": 0.44131618555883606, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.2471742021276596, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.35241666666666666, + "hf_avg": 12.5165268741835 + }, + { + "hf_id": "TencentARC/Mistral_Pro_8B_v0.1", + "name": "Mistral_Pro_8B_v0.1", + "params_b": 8.987, + "ifeval": 0.21145227995053123, + "bbh": 0.4525975968066435, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.2765126329787234, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.42422916666666666, + "hf_avg": 14.195345928021323 + }, + { + "hf_id": "TheDrummer/Cydonia-22B-v1.2", + "name": "Cydonia-22B-v1.2", + "params_b": 22.247, + "ifeval": 0.5635114828654637, + "bbh": 0.580856074392761, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.4140625, + "hf_math_lvl5": 0.20317220543806647, + "hf_musr": 0.40217708333333335, + "hf_avg": 28.7900883355216 + }, + { + "hf_id": "TheDrummer/Gemmasutra-Mini-2B-v1", + "name": "Gemmasutra-Mini-2B-v1", + "params_b": 2.614, + "ifeval": 0.25486597782771936, + "bbh": 0.35750190791471836, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.20545212765957446, + "hf_math_lvl5": 0.0377643504531722, + "hf_musr": 0.3489791666666666, + "hf_avg": 9.12929252128462 + }, + { + "hf_id": "TheDrummer/Llama-3SOME-8B-v2", + "name": "Llama-3SOME-8B-v2", + "params_b": 8.03, + "ifeval": 0.4508049752434651, + "bbh": 0.5203347869042534, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.37533244680851063, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.3832708333333333, + "hf_avg": 21.812966856684692 + }, + { + "hf_id": "TheDrummer/Ministrations-8B-v1", + "name": "Ministrations-8B-v1", + "params_b": 8.02, + "ifeval": 0.28219346888478125, + "bbh": 0.48766312602251366, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.36436170212765956, + "hf_math_lvl5": 0.18429003021148035, + "hf_musr": 0.44490625, + "hf_avg": 21.29045248198521 + }, + { + "hf_id": "TheDrummer/Rocinante-12B-v1", + "name": "Rocinante-12B-v1", + "params_b": 12.248, + "ifeval": 0.6076499244227538, + "bbh": 0.5065452085797449, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.34773936170212766, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.40171874999999996, + "hf_avg": 24.62809312692346 + }, + { + "hf_id": "TheDrummer/Tiger-Gemma-9B-v1", + "name": "Tiger-Gemma-9B-v1", + "params_b": 9.242, + "ifeval": 0.728150197032762, + "bbh": 0.5703687739329574, + "gpqa": 0.3389261744966443, + "mmlu_pro": 0.41181848404255317, + "hf_math_lvl5": 0.18353474320241692, + "hf_musr": 0.41616666666666663, + "hf_avg": 30.896643595129223 + }, + { + "hf_id": "TheDrummer/Tiger-Gemma-9B-v2", + "name": "Tiger-Gemma-9B-v2", + "params_b": 9.242, + "ifeval": 0.6985997154217476, + "bbh": 0.5617191114121779, + "gpqa": 0.33976510067114096, + "mmlu_pro": 0.41123670212765956, + "hf_math_lvl5": 0.18202416918429004, + "hf_musr": 0.40841666666666665, + "hf_avg": 29.900202484538976 + }, + { + "hf_id": "TheDrummer/Tiger-Gemma-9B-v3", + "name": "Tiger-Gemma-9B-v3", + "params_b": 9.242, + "ifeval": 0.6820635912711606, + "bbh": 0.5812231557853248, + "gpqa": 0.3389261744966443, + "mmlu_pro": 0.40591755319148937, + "hf_math_lvl5": 0.1623867069486405, + "hf_musr": 0.4003541666666666, + "hf_avg": 29.47327542727632 + }, + { + "hf_id": "TheDrunkenSnail/Mother-of-Rhodia-12B", + "name": "Mother-of-Rhodia-12B", + "params_b": 12.248, + "ifeval": 0.6504895898438365, + "bbh": 0.49479138664574934, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.35513630319148937, + "hf_math_lvl5": 0.12235649546827794, + "hf_musr": 0.41241666666666665, + "hf_avg": 25.379307438403675 + }, + { + "hf_id": "TheDrunkenSnail/Son-of-Rhodia", + "name": "Son-of-Rhodia", + "params_b": 12.248, + "ifeval": 0.7046447869430887, + "bbh": 0.5097327647725524, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.3607878989361702, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.4202916666666667, + "hf_avg": 27.216224977371535 + }, + { + "hf_id": "TheTsar1209/nemo-carpmuscle-v0.1", + "name": "nemo-carpmuscle-v0.1", + "params_b": 12.248, + "ifeval": 0.2275639746982451, + "bbh": 0.5083529697101391, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3405917553191489, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.4135, + "hf_avg": 16.794489017380517 + }, + { + "hf_id": "TheTsar1209/qwen-carpmuscle-v0.1", + "name": "qwen-carpmuscle-v0.1", + "params_b": 14.77, + "ifeval": 0.5621628390448454, + "bbh": 0.643430074129922, + "gpqa": 0.34395973154362414, + "mmlu_pro": 0.520029920212766, + "hf_math_lvl5": 0.2628398791540785, + "hf_musr": 0.41610416666666666, + "hf_avg": 33.445029423216624 + }, + { + "hf_id": "TheTsar1209/qwen-carpmuscle-v0.2", + "name": "qwen-carpmuscle-v0.2", + "params_b": 14.77, + "ifeval": 0.5256929391791557, + "bbh": 0.6386922464145662, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.5147107712765957, + "hf_math_lvl5": 0.28323262839879154, + "hf_musr": 0.43455208333333334, + "hf_avg": 33.666712709905646 + }, + { + "hf_id": "TheTsar1209/qwen-carpmuscle-v0.3", + "name": "qwen-carpmuscle-v0.3", + "params_b": 14.77, + "ifeval": 0.4476322823441801, + "bbh": 0.6151533941210218, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.5061502659574468, + "hf_math_lvl5": 0.31344410876132933, + "hf_musr": 0.4131875, + "hf_avg": 31.794404327523267 + }, + { + "hf_id": "TheTsar1209/qwen-carpmuscle-v0.4", + "name": "qwen-carpmuscle-v0.4", + "params_b": 14.77, + "ifeval": 0.7202068289915202, + "bbh": 0.6453667027727318, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.5143783244680851, + "hf_math_lvl5": 0.277190332326284, + "hf_musr": 0.45160416666666664, + "hf_avg": 37.393960224042935 + }, + { + "hf_id": "TheTsar1209/qwen-carpmuscle-v0.4.1", + "name": "qwen-carpmuscle-v0.4.1", + "params_b": 14.77, + "ifeval": 0.7359938297051822, + "bbh": 0.6506533698399672, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.5191156914893617, + "hf_math_lvl5": 0.27794561933534745, + "hf_musr": 0.44890625, + "hf_avg": 37.605164486042526 + }, + { + "hf_id": "Tijmen2/cosmosage-v3", + "name": "cosmosage-v3", + "params_b": 8.03, + "ifeval": 0.44823180272787316, + "bbh": 0.4550637900339029, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.24858710106382978, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.4198854166666666, + "hf_avg": 17.354746679203608 + }, + { + "hf_id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", + "name": "TinyLlama-1.1B-Chat-v0.1", + "params_b": 1.1, + "ifeval": 0.1478543597654224, + "bbh": 0.30835294748680114, + "gpqa": 0.22902684563758388, + "mmlu_pro": 0.10979055851063829, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.35923958333333333, + "hf_avg": 3.9575773348190606 + }, + { + "hf_id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", + "name": "TinyLlama-1.1B-Chat-v0.5", + "params_b": 1.1, + "ifeval": 0.1633665341294432, + "bbh": 0.3105046915935697, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.10962433510638298, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.36612500000000003, + "hf_avg": 4.126163903736939 + }, + { + "hf_id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "name": "TinyLlama-1.1B-Chat-v0.6", + "params_b": 1.1, + "ifeval": 0.15742119797692344, + "bbh": 0.3066976656166826, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11486037234042554, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.34221875, + "hf_avg": 4.2942762818616345 + }, + { + "hf_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "name": "TinyLlama-1.1B-Chat-v1.0", + "params_b": 1.1, + "ifeval": 0.0595763684800773, + "bbh": 0.3103562867491015, + "gpqa": 0.25, + "mmlu_pro": 0.11012300531914894, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.35152083333333334, + "hf_avg": 2.818859486124847 + }, + { + "hf_id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", + "name": "TinyLlama-1.1B-intermediate-step-1431k-3T", + "params_b": 1.1, + "ifeval": 0.22766371006706648, + "bbh": 0.3071188438267271, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.11203457446808511, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.33803125, + "hf_avg": 5.230318100791095 + }, + { + "hf_id": "TinyLlama/TinyLlama_v1.1", + "name": "TinyLlama_v1.1", + "params_b": 1.1, + "ifeval": 0.20006139266036338, + "bbh": 0.30237018045076064, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.10488696808510638, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.36996874999999996, + "hf_avg": 4.824553844580785 + }, + { + "hf_id": "ToastyPigeon/Sto-vo-kor-12B", + "name": "Sto-vo-kor-12B", + "params_b": 12.248, + "ifeval": 0.5501225636865739, + "bbh": 0.5064617128925814, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.33976063829787234, + "hf_math_lvl5": 0.10876132930513595, + "hf_musr": 0.39384375, + "hf_avg": 22.997938390529622 + }, + { + "hf_id": "Trappu/Magnum-Picaro-0.7-v2-12b", + "name": "Magnum-Picaro-0.7-v2-12b", + "params_b": 12.248, + "ifeval": 0.300278815764394, + "bbh": 0.5506661918828847, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.35804521276595747, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.47271875, + "hf_avg": 21.730064133818086 + }, + { + "hf_id": "Trappu/Nemo-Picaro-12B", + "name": "Nemo-Picaro-12B", + "params_b": 12.248, + "ifeval": 0.2577139766929525, + "bbh": 0.5489586125997546, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.36045545212765956, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.47259375, + "hf_avg": 21.362492548877267 + }, + { + "hf_id": "Triangle104/Annunaki-12b", + "name": "Annunaki-12b", + "params_b": 12.248, + "ifeval": 0.3872070550583563, + "bbh": 0.5498969437971782, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3720910904255319, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.44087499999999996, + "hf_avg": 23.36969772271661 + }, + { + "hf_id": "Triangle104/BigTalker-Lite-8B", + "name": "BigTalker-Lite-8B", + "params_b": 8.03, + "ifeval": 0.3689222374411007, + "bbh": 0.5308138241234059, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.34308510638297873, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.42084375, + "hf_avg": 20.97899274764932 + }, + { + "hf_id": "Triangle104/Chatty-Harry_V2.0", + "name": "Chatty-Harry_V2.0", + "params_b": 12.248, + "ifeval": 0.3325520729442324, + "bbh": 0.5318928049062546, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.36826795212765956, + "hf_math_lvl5": 0.13897280966767372, + "hf_musr": 0.40782291666666665, + "hf_avg": 21.833190818566663 + }, + { + "hf_id": "Triangle104/Chatty-Harry_V3.0", + "name": "Chatty-Harry_V3.0", + "params_b": 12.248, + "ifeval": 0.36749823800848413, + "bbh": 0.5526193453608234, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.37017952127659576, + "hf_math_lvl5": 0.11253776435045318, + "hf_musr": 0.44084375, + "hf_avg": 23.114766630549312 + }, + { + "hf_id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", + "name": "DS-Distilled-Hermes-Llama-3.1_TIES", + "params_b": 8.03, + "ifeval": 0.13641360479084386, + "bbh": 0.292845246551473, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.11037234042553191, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.36209375000000005, + "hf_avg": 3.378416433125681 + }, + { + "hf_id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", + "name": "DS-R1-Distill-Q2.5-10B-Harmony", + "params_b": 10.366, + "ifeval": 0.17508211545366295, + "bbh": 0.2643276743386568, + "gpqa": 0.2105704697986577, + "mmlu_pro": 0.11727061170212766, + "hf_musr": 0.31276041666666665, + "hf_avg": 3.7584241508614915 + }, + { + "hf_id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", + "name": "DS-R1-Distill-Q2.5-14B-Harmony_V0.1", + "params_b": 14.77, + "ifeval": 0.4515042309959796, + "bbh": 0.5783379428926061, + "gpqa": 0.3934563758389262, + "mmlu_pro": 0.4601063829787234, + "hf_math_lvl5": 0.5551359516616314, + "hf_musr": 0.5566875000000001, + "hf_avg": 38.40633267860269 + }, + { + "hf_id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", + "name": "DS-R1-Distill-Q2.5-7B-RP", + "params_b": 7.616, + "ifeval": 0.34454248061809334, + "bbh": 0.43834886662348205, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.2890625, + "hf_math_lvl5": 0.46827794561933533, + "hf_musr": 0.40302083333333333, + "hf_avg": 23.29151402275458 + }, + { + "hf_id": "Triangle104/DSR1-Distill-Llama-Lit-8B", + "name": "DSR1-Distill-Llama-Lit-8B", + "params_b": 8.03, + "ifeval": 0.18852090231696345, + "bbh": 0.4284056327107781, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.27975398936170215, + "hf_math_lvl5": 0.35196374622356497, + "hf_musr": 0.35346875, + "hf_avg": 17.835032823738523 + }, + { + "hf_id": "Triangle104/DSR1-Distill-Qwen-7B-RP", + "name": "DSR1-Distill-Qwen-7B-RP", + "params_b": 7.613, + "ifeval": 0.36092900171544834, + "bbh": 0.4326490703099772, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.30277593085106386, + "hf_math_lvl5": 0.48036253776435045, + "hf_musr": 0.40454166666666663, + "hf_avg": 24.099711456808993 + }, + { + "hf_id": "Triangle104/Dark-Chivalry_V1.0", + "name": "Dark-Chivalry_V1.0", + "params_b": 8.03, + "ifeval": 0.4325700253106203, + "bbh": 0.4974207759950637, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.34441489361702127, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.4181770833333333, + "hf_avg": 21.672950420599893 + }, + { + "hf_id": "Triangle104/Distilled-DarkPlanet-Allades-8B", + "name": "Distilled-DarkPlanet-Allades-8B", + "params_b": 8.03, + "ifeval": 0.3460163477351206, + "bbh": 0.4633948672868899, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.29014295212765956, + "hf_math_lvl5": 0.4003021148036254, + "hf_musr": 0.35375, + "hf_avg": 21.683057829608618 + }, + { + "hf_id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", + "name": "Distilled-DarkPlanet-Allades-8B_TIES", + "params_b": 8.03, + "ifeval": 0.3891807071902552, + "bbh": 0.5041556910813355, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.340093085106383, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.3868020833333334, + "hf_avg": 20.21392622497977 + }, + { + "hf_id": "Triangle104/Hermes-Llama-3.2-CoT", + "name": "Hermes-Llama-3.2-CoT", + "params_b": 3.213, + "ifeval": 0.4177571066991139, + "bbh": 0.4615751505493966, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.2947140957446808, + "hf_math_lvl5": 0.09516616314199396, + "hf_musr": 0.36978125, + "hf_avg": 17.621221154166122 + }, + { + "hf_id": "Triangle104/Hermes-Llama-3.2-CoT-Summary", + "name": "Hermes-Llama-3.2-CoT-Summary", + "params_b": 3.213, + "ifeval": 0.48302836473889277, + "bbh": 0.42003008354054533, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.29014295212765956, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.3575, + "hf_avg": 16.766151353052297 + }, + { + "hf_id": "Triangle104/Herodotos-14B", + "name": "Herodotos-14B", + "params_b": 14.77, + "ifeval": 0.4667415790103592, + "bbh": 0.6435044367110887, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.5290059840425532, + "hf_math_lvl5": 0.5045317220543807, + "hf_musr": 0.4795416666666667, + "hf_avg": 38.337259928842606 + }, + { + "hf_id": "Triangle104/Herodotos-14B_V0.1", + "name": "Herodotos-14B_V0.1", + "params_b": 14.77, + "ifeval": 0.1878715142488597, + "bbh": 0.30172239497895226, + "gpqa": 0.22399328859060402, + "mmlu_pro": 0.11643949468085106, + "hf_musr": 0.3683854166666667, + "hf_avg": 4.56388866448478 + }, + { + "hf_id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", + "name": "L3.1-8B-Dusky-Ink_v0.r1", + "params_b": 8.03, + "ifeval": 0.19848779017451473, + "bbh": 0.43372778578458115, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.320561835106383, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.3988333333333333, + "hf_avg": 13.969324061974744 + }, + { + "hf_id": "Triangle104/LThreePointOne-8B-HermesBlackroot", + "name": "LThreePointOne-8B-HermesBlackroot", + "params_b": 8.03, + "ifeval": 0.17920340252751588, + "bbh": 0.4998333246909241, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.32845744680851063, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3585520833333333, + "hf_avg": 15.16012857459314 + }, + { + "hf_id": "Triangle104/LThreePointOne-8B-HermesInk", + "name": "LThreePointOne-8B-HermesInk", + "params_b": 8.03, + "ifeval": 0.4031192790684273, + "bbh": 0.5222765555856439, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.34674202127659576, + "hf_math_lvl5": 0.17220543806646527, + "hf_musr": 0.4129375, + "hf_avg": 22.69615504867416 + }, + { + "hf_id": "Triangle104/Llama3.1-cc-Lit-8b", + "name": "Llama3.1-cc-Lit-8b", + "params_b": 8.03, + "ifeval": 0.2993047336622384, + "bbh": 0.3847994561866892, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.30044880319148937, + "hf_math_lvl5": 0.0030211480362537764, + "hf_musr": 0.38540625, + "hf_avg": 12.717562119702656 + }, + { + "hf_id": "Triangle104/Minerva-1.5b", + "name": "Minerva-1.5b", + "params_b": 1.777, + "ifeval": 0.2694295580171722, + "bbh": 0.4025709779119226, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.269780585106383, + "hf_math_lvl5": 0.1027190332326284, + "hf_musr": 0.3655, + "hf_avg": 14.467080512966321 + }, + { + "hf_id": "Triangle104/Minerva-1.5b_V0.2", + "name": "Minerva-1.5b_V0.2", + "params_b": 1.777, + "ifeval": 0.3083474071020448, + "bbh": 0.3989042137094949, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.29105718085106386, + "hf_math_lvl5": 0.11404833836858005, + "hf_musr": 0.3960104166666667, + "hf_avg": 15.021440291369977 + }, + { + "hf_id": "Triangle104/Minerva-10b", + "name": "Minerva-10b", + "params_b": 10.067, + "ifeval": 0.1878715142488597, + "bbh": 0.4462036157096501, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.23179853723404256, + "hf_musr": 0.36270833333333335, + "hf_avg": 10.977971126497692 + }, + { + "hf_id": "Triangle104/Minerva-14b", + "name": "Minerva-14b", + "params_b": 14.77, + "ifeval": 0.3467898509288687, + "bbh": 0.6300829439447851, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5193650265957447, + "hf_math_lvl5": 0.30513595166163143, + "hf_musr": 0.476625, + "hf_avg": 32.40842006449074 + }, + { + "hf_id": "Triangle104/Minerva-7b", + "name": "Minerva-7b", + "params_b": 7.616, + "ifeval": 0.3724196243744376, + "bbh": 0.5498400501314606, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.44439827127659576, + "hf_math_lvl5": 0.283987915407855, + "hf_musr": 0.4143333333333333, + "hf_avg": 26.501050696755055 + }, + { + "hf_id": "Triangle104/Minerva-8b", + "name": "Minerva-8b", + "params_b": 7.248, + "ifeval": 0.17208451353519771, + "bbh": 0.46686093526780637, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.30892619680851063, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.4272916666666667, + "hf_avg": 14.317747008078769 + }, + { + "hf_id": "Triangle104/Mistral-Redemption-Arc", + "name": "Mistral-Redemption-Arc", + "params_b": 23.572, + "ifeval": 0.40289432040319684, + "bbh": 0.6254876729064861, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.4509640957446808, + "hf_math_lvl5": 0.41012084592145015, + "hf_musr": 0.45951041666666664, + "hf_avg": 32.78693047639915 + }, + { + "hf_id": "Triangle104/Mistral-Small-24b-Harmony", + "name": "Mistral-Small-24b-Harmony", + "params_b": 23.572, + "ifeval": 0.16871234989826994, + "bbh": 0.6433732705921861, + "gpqa": 0.38422818791946306, + "mmlu_pro": 0.5430518617021277, + "hf_math_lvl5": 0.19108761329305135, + "hf_musr": 0.4276041666666666, + "hf_avg": 27.168346239791443 + }, + { + "hf_id": "Triangle104/Pans_Gutenbergum_V0.1", + "name": "Pans_Gutenbergum_V0.1", + "params_b": 12.248, + "ifeval": 0.309696050922663, + "bbh": 0.5541091780465247, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.3696808510638298, + "hf_math_lvl5": 0.10574018126888217, + "hf_musr": 0.4528125, + "hf_avg": 22.27617513692708 + }, + { + "hf_id": "Triangle104/Phi-4-AbliteratedRP", + "name": "Phi-4-AbliteratedRP", + "params_b": 14.66, + "ifeval": 0.49227050891634194, + "bbh": 0.6708776140201277, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.530751329787234, + "hf_math_lvl5": 0.3074018126888218, + "hf_musr": 0.5098333333333334, + "hf_avg": 37.374976244999196 + }, + { + "hf_id": "Triangle104/Phi4-RP-o1", + "name": "Phi4-RP-o1", + "params_b": 14.66, + "ifeval": 0.022007163215822904, + "bbh": 0.6652563961373095, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.5110538563829787, + "hf_math_lvl5": 0.3776435045317221, + "hf_musr": 0.4755729166666667, + "hf_avg": 28.809089491248006 + }, + { + "hf_id": "Triangle104/Porpoise-R1-Llama3.2-3b", + "name": "Porpoise-R1-Llama3.2-3b", + "params_b": 3.213, + "ifeval": 0.4352174452674459, + "bbh": 0.38236758004585686, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.21168550531914893, + "hf_math_lvl5": 0.04229607250755287, + "hf_musr": 0.357625, + "hf_avg": 13.626836330964721 + }, + { + "hf_id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", + "name": "Q2.5-14B-Instruct-1M-Harmony", + "params_b": 14.77, + "ifeval": 0.5986327389105351, + "bbh": 0.6338808682301471, + "gpqa": 0.375, + "mmlu_pro": 0.5074800531914894, + "hf_math_lvl5": 0.3768882175226586, + "hf_musr": 0.4795416666666667, + "hf_avg": 37.73826910663008 + }, + { + "hf_id": "Triangle104/Q2.5-AthensCOT", + "name": "Q2.5-AthensCOT", + "params_b": 7.616, + "ifeval": 0.45727447616767947, + "bbh": 0.5541692533534606, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.4379155585106383, + "hf_math_lvl5": 0.29154078549848944, + "hf_musr": 0.4578333333333333, + "hf_avg": 28.558050099748765 + }, + { + "hf_id": "Triangle104/Q2.5-CodeR1-3B", + "name": "Q2.5-CodeR1-3B", + "params_b": 3.085, + "ifeval": 0.35875587884590665, + "bbh": 0.4660844324968853, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.2978723404255319, + "hf_math_lvl5": 0.16389728096676737, + "hf_musr": 0.43154166666666666, + "hf_avg": 19.81078532589114 + }, + { + "hf_id": "Triangle104/Q2.5-EVACOT-7b", + "name": "Q2.5-EVACOT-7b", + "params_b": 7.616, + "ifeval": 0.5784241368457914, + "bbh": 0.5505524946794311, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.43309507978723405, + "hf_math_lvl5": 0.2824773413897281, + "hf_musr": 0.4498645833333333, + "hf_avg": 30.447241223161303 + }, + { + "hf_id": "Triangle104/Q2.5-Humane-RP", + "name": "Q2.5-Humane-RP", + "params_b": 7.616, + "ifeval": 0.4411627814199657, + "bbh": 0.5649289292164736, + "gpqa": 0.3187919463087248, + "mmlu_pro": 0.44921875, + "hf_math_lvl5": 0.3391238670694864, + "hf_musr": 0.4528125, + "hf_avg": 29.83187981671115 + }, + { + "hf_id": "Triangle104/Q2.5-Instruct-1M_Harmony", + "name": "Q2.5-Instruct-1M_Harmony", + "params_b": 7.616, + "ifeval": 0.6038034636985421, + "bbh": 0.5373243549676157, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.43658577127659576, + "hf_math_lvl5": 0.3323262839879154, + "hf_musr": 0.46878125, + "hf_avg": 32.07867587170479 + }, + { + "hf_id": "Triangle104/Q2.5-R1-3B", + "name": "Q2.5-R1-3B", + "params_b": 3.085, + "ifeval": 0.4213542290012722, + "bbh": 0.48124304786769817, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.38131648936170215, + "hf_math_lvl5": 0.2673716012084592, + "hf_musr": 0.43197916666666664, + "hf_avg": 24.667669563202328 + }, + { + "hf_id": "Triangle104/Q2.5-R1-7B", + "name": "Q2.5-R1-7B", + "params_b": 7.613, + "ifeval": 0.1346150436397647, + "bbh": 0.30065625818799685, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.1180186170212766, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.3607291666666666, + "hf_avg": 3.783468006938507 + }, + { + "hf_id": "Triangle104/Robo-Gutenberg_V1.0", + "name": "Robo-Gutenberg_V1.0", + "params_b": 14.77, + "ifeval": 0.6007559940956662, + "bbh": 0.653716560941194, + "gpqa": 0.3859060402684564, + "mmlu_pro": 0.5391456117021277, + "hf_math_lvl5": 0.4561933534743202, + "hf_musr": 0.47436458333333337, + "hf_avg": 40.348593527637 + }, + { + "hf_id": "Triangle104/RomboHermes3-R1-Llama3.2-3b", + "name": "RomboHermes3-R1-Llama3.2-3b", + "params_b": 3.213, + "ifeval": 0.300728733094855, + "bbh": 0.42639466274987187, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2957114361702128, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.36565625, + "hf_avg": 14.65828137567246 + }, + { + "hf_id": "Triangle104/Rombos-Novasky-7B_V1c", + "name": "Rombos-Novasky-7B_V1c", + "params_b": 7.616, + "ifeval": 0.40801517750679306, + "bbh": 0.4349247829177707, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.27376994680851063, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.44645833333333335, + "hf_avg": 18.209818018566242 + }, + { + "hf_id": "Triangle104/Set-70b", + "name": "Set-70b", + "params_b": 70.554, + "ifeval": 0.7642954028643998, + "bbh": 0.70142939330013, + "gpqa": 0.4463087248322148, + "mmlu_pro": 0.5442154255319149, + "hf_math_lvl5": 0.3640483383685801, + "hf_musr": 0.46956250000000005, + "hf_avg": 44.03469176472607 + }, + { + "hf_id": "Tsunami-th/Tsunami-0.5-7B-Instruct", + "name": "Tsunami-0.5-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.7400153814102137, + "bbh": 0.552369427738073, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.44132313829787234, + "hf_math_lvl5": 0.5045317220543807, + "hf_musr": 0.42571875, + "hf_avg": 36.42709650938436 + }, + { + "hf_id": "Tsunami-th/Tsunami-0.5x-7B-Instruct", + "name": "Tsunami-0.5x-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.709915247099917, + "bbh": 0.5592865858560252, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.44581117021276595, + "hf_math_lvl5": 0.4206948640483384, + "hf_musr": 0.46667708333333335, + "hf_avg": 36.004746535208355 + }, + { + "hf_id": "Tsunami-th/Tsunami-1.0-14B-Instruct", + "name": "Tsunami-1.0-14B-Instruct", + "params_b": 14.77, + "ifeval": 0.7829049145157072, + "bbh": 0.6438763263011559, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.5248503989361702, + "hf_math_lvl5": 0.45845921450151056, + "hf_musr": 0.44593750000000004, + "hf_avg": 41.840045325237675 + }, + { + "hf_id": "Tsunami-th/Tsunami-1.0-7B-Instruct", + "name": "Tsunami-1.0-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.730872972601586, + "bbh": 0.549071195618326, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.4424035904255319, + "hf_math_lvl5": 0.4335347432024169, + "hf_musr": 0.44928125, + "hf_avg": 35.74871261762576 + }, + { + "hf_id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", + "name": "Gemma-2-9B-It-SPPO-Iter1", + "params_b": 9.242, + "ifeval": 0.308221075634871, + "bbh": 0.5968934762705508, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.39070811170212766, + "hf_math_lvl5": 0.08987915407854985, + "hf_musr": 0.4099375, + "hf_avg": 22.586154586899323 + }, + { + "hf_id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", + "name": "Gemma-2-9B-It-SPPO-Iter2", + "params_b": 9.242, + "ifeval": 0.3100196367859502, + "bbh": 0.5989880877421281, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.386968085106383, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.4139375, + "hf_avg": 22.563072986134326 + }, + { + "hf_id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", + "name": "Gemma-2-9B-It-SPPO-Iter3", + "params_b": 9.242, + "ifeval": 0.31671409637539505, + "bbh": 0.6007080229268026, + "gpqa": 0.3389261744966443, + "mmlu_pro": 0.382563164893617, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.41660416666666666, + "hf_avg": 22.65046295673854 + }, + { + "hf_id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", + "name": "Llama-3-Instruct-8B-SPPO-Iter1", + "params_b": 8.03, + "ifeval": 0.7298988904994304, + "bbh": 0.5057890691082708, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.37109375, + "hf_math_lvl5": 0.1148036253776435, + "hf_musr": 0.3567916666666666, + "hf_avg": 24.76595847369129 + }, + { + "hf_id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", + "name": "Llama-3-Instruct-8B-SPPO-Iter2", + "params_b": 8, + "ifeval": 0.6988745417713889, + "bbh": 0.5088696278852957, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.36918218085106386, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.35942708333333334, + "hf_avg": 24.040942547288513 + }, + { + "hf_id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", + "name": "Llama-3-Instruct-8B-SPPO-Iter3", + "params_b": 8.03, + "ifeval": 0.6834122350917787, + "bbh": 0.50795799761689, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.3644448138297872, + "hf_math_lvl5": 0.09592145015105741, + "hf_musr": 0.36606249999999996, + "hf_avg": 23.693396308174695 + }, + { + "hf_id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", + "name": "Llama-3-Instruct-8B-SPPO-Iter3", + "params_b": 8.03, + "ifeval": 0.67029814226253, + "bbh": 0.5076407742830437, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.3657746010638298, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.3647291666666667, + "hf_avg": 23.05947024187678 + }, + { + "hf_id": "UCLA-AGI/Mistral7B-PairRM-SPPO", + "name": "Mistral7B-PairRM-SPPO", + "params_b": 7.242, + "ifeval": 0.43549227161708715, + "bbh": 0.4438979817093698, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.26205119680851063, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.39647916666666666, + "hf_avg": 16.44469675653773 + }, + { + "hf_id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", + "name": "Mistral7B-PairRM-SPPO-Iter1", + "params_b": 7.242, + "ifeval": 0.5047352136774869, + "bbh": 0.4468056921650662, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.26953125, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.3991770833333333, + "hf_avg": 17.91774579525423 + }, + { + "hf_id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", + "name": "Mistral7B-PairRM-SPPO-Iter2", + "params_b": 7.242, + "ifeval": 0.4445848127413041, + "bbh": 0.4465719945610438, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.2677027925531915, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.40854166666666664, + "hf_avg": 17.118139500757714 + }, + { + "hf_id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", + "name": "Mistral7B-PairRM-SPPO-Iter3", + "params_b": 7.242, + "ifeval": 0.4350678422142138, + "bbh": 0.4396587862984616, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.2657912234042553, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.40711458333333334, + "hf_avg": 16.488657432502865 + }, + { + "hf_id": "Unbabel/TowerInstruct-Mistral-7B-v0.2", + "name": "TowerInstruct-Mistral-7B-v0.2", + "params_b": 7.242, + "ifeval": 0.2843422119975, + "bbh": 0.388195180992626, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.19680851063829788, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.4522291666666667, + "hf_avg": 11.902717149315622 + }, + { + "hf_id": "Undi95/MG-FinalMix-72B", + "name": "MG-FinalMix-72B", + "params_b": 72.706, + "ifeval": 0.8013648231137825, + "bbh": 0.6973017446417747, + "gpqa": 0.3850671140939597, + "mmlu_pro": 0.542719414893617, + "hf_math_lvl5": 0.3972809667673716, + "hf_musr": 0.48227083333333337, + "hf_avg": 44.29736213992491 + }, + { + "hf_id": "Undi95/Phi4-abliterated", + "name": "Phi4-abliterated", + "params_b": 14.66, + "ifeval": 0.6617552538375954, + "bbh": 0.680902103041113, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.528091755319149, + "hf_math_lvl5": 0.37009063444108764, + "hf_musr": 0.4034270833333333, + "hf_avg": 37.42237137162351 + }, + { + "hf_id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", + "name": "Llama-3-SauerkrautLM-70b-Instruct", + "params_b": 70.554, + "ifeval": 0.8044621604010691, + "bbh": 0.6663247245334951, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.5392287234042553, + "hf_math_lvl5": 0.2280966767371601, + "hf_musr": 0.43393750000000003, + "hf_avg": 38.00558783767682 + }, + { + "hf_id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", + "name": "Llama-3-SauerkrautLM-8b-Instruct", + "params_b": 8.03, + "ifeval": 0.744536718130117, + "bbh": 0.494337579362695, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.3857214095744681, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.42410416666666667, + "hf_avg": 26.66765472658618 + }, + { + "hf_id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", + "name": "Llama-3.1-SauerkrautLM-70b-Instruct", + "params_b": 70.554, + "ifeval": 0.8656365111238181, + "bbh": 0.7006249194404001, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.5334940159574468, + "hf_math_lvl5": 0.3693353474320242, + "hf_musr": 0.4710833333333333, + "hf_avg": 43.413769840000015 + }, + { + "hf_id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", + "name": "Llama-3.1-SauerkrautLM-8b-Instruct", + "params_b": 8.03, + "ifeval": 0.8017393848322452, + "bbh": 0.5114932190011187, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3890458776595745, + "hf_math_lvl5": 0.19410876132930513, + "hf_musr": 0.4148020833333333, + "hf_avg": 29.931073085077475 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-1.5b", + "name": "SauerkrautLM-1.5b", + "params_b": 1.544, + "ifeval": 0.24040324117785256, + "bbh": 0.3703912164863146, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.21509308510638298, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.37390625000000005, + "hf_avg": 10.273562653094961 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-7b-HerO", + "name": "SauerkrautLM-7b-HerO", + "params_b": 7.242, + "ifeval": 0.534610389322553, + "bbh": 0.49044349935812964, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.30460438829787234, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.39238541666666665, + "hf_avg": 19.66931174717597 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-7b-LaserChat", + "name": "SauerkrautLM-7b-LaserChat", + "params_b": 7.242, + "ifeval": 0.5987823419637672, + "bbh": 0.45432707993295685, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.3304521276595745, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.4148020833333333, + "hf_avg": 22.14731649128105 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-Gemma-2b", + "name": "SauerkrautLM-Gemma-2b", + "params_b": 2.506, + "ifeval": 0.24752213017017072, + "bbh": 0.3416315376053174, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.14685837765957446, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.3675833333333333, + "hf_avg": 7.716094956089698 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-Gemma-7b", + "name": "SauerkrautLM-Gemma-7b", + "params_b": 8.538, + "ifeval": 0.3406705319662939, + "bbh": 0.41879127895858687, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.2961269946808511, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.35942708333333334, + "hf_avg": 14.801979415385233 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", + "name": "SauerkrautLM-Mixtral-8x7B-Instruct", + "params_b": 46.703, + "ifeval": 0.5601891869129465, + "bbh": 0.5277342269858817, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3650265957446808, + "hf_math_lvl5": 0.09818731117824774, + "hf_musr": 0.42041666666666666, + "hf_avg": 24.487466800390596 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", + "name": "SauerkrautLM-Nemo-12b-Instruct", + "params_b": 12.248, + "ifeval": 0.6112969144093228, + "bbh": 0.5214128647611115, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.33851396276595747, + "hf_math_lvl5": 0.12235649546827794, + "hf_musr": 0.4468958333333333, + "hf_avg": 26.219081516210622 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-Phi-3-medium", + "name": "SauerkrautLM-Phi-3-medium", + "params_b": 13.96, + "ifeval": 0.4408879550703245, + "bbh": 0.6432931765847228, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.46650598404255317, + "hf_math_lvl5": 0.16012084592145015, + "hf_musr": 0.4845, + "hf_avg": 30.407915004992635 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", + "name": "SauerkrautLM-SOLAR-Instruct", + "params_b": 10.732, + "ifeval": 0.49172085621705963, + "bbh": 0.5169447300097646, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.31831781914893614, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.3965416666666666, + "hf_avg": 21.221646172197712 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", + "name": "SauerkrautLM-gemma-2-2b-it", + "params_b": 2.614, + "ifeval": 0.13206625088099574, + "bbh": 0.42408371860644856, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.269281914893617, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.3994583333333333, + "hf_avg": 10.817668945100195 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", + "name": "SauerkrautLM-v2-14b-DPO", + "params_b": 14.77, + "ifeval": 0.7411645544931892, + "bbh": 0.6560374350756156, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.51171875, + "hf_math_lvl5": 0.3164652567975831, + "hf_musr": 0.43746875, + "hf_avg": 37.583891756672784 + }, + { + "hf_id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", + "name": "SauerkrautLM-v2-14b-SFT", + "params_b": 14.77, + "ifeval": 0.6948529900663573, + "bbh": 0.6210355880693049, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.5205285904255319, + "hf_math_lvl5": 0.3285498489425982, + "hf_musr": 0.417875, + "hf_avg": 36.22785641162885 + }, + { + "hf_id": "VIRNECT/llama-3-Korean-8B", + "name": "llama-3-Korean-8B", + "params_b": 8.03, + "ifeval": 0.5058345190760515, + "bbh": 0.49082453083378397, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.3538896276595745, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.36615624999999996, + "hf_avg": 20.245300698827084 + }, + { + "hf_id": "VIRNECT/llama-3-Korean-8B", + "name": "llama-3-Korean-8B", + "params_b": 8.03, + "ifeval": 0.5021376614050719, + "bbh": 0.491837579362695, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.3536402925531915, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.3647916666666666, + "hf_avg": 20.431609341892 + }, + { + "hf_id": "VIRNECT/llama-3-Korean-8B-r-v-0.1", + "name": "llama-3-Korean-8B-r-v-0.1", + "params_b": 16.061, + "ifeval": 0.49157125316382755, + "bbh": 0.48061568139086264, + "gpqa": 0.2424496644295302, + "mmlu_pro": 0.3259640957446808, + "hf_math_lvl5": 0.08610271903323263, + "hf_musr": 0.36748958333333337, + "hf_avg": 18.749278536149728 + }, + { + "hf_id": "ValiantLabs/Llama3-70B-Fireplace", + "name": "Llama3-70B-Fireplace", + "params_b": 70.554, + "ifeval": 0.7773596280092377, + "bbh": 0.648899361888402, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.4892785904255319, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.4448541666666667, + "hf_avg": 37.125226832822285 + }, + { + "hf_id": "ValiantLabs/Llama3-70B-ShiningValiant2", + "name": "Llama3-70B-ShiningValiant2", + "params_b": 70.554, + "ifeval": 0.6121712611426571, + "bbh": 0.6338341405069171, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.48977726063829785, + "hf_math_lvl5": 0.20770392749244712, + "hf_musr": 0.4325729166666667, + "hf_avg": 32.730483450533846 + }, + { + "hf_id": "ValiantLabs/Llama3.1-70B-ShiningValiant2", + "name": "Llama3.1-70B-ShiningValiant2", + "params_b": 70.554, + "ifeval": 0.5355346037402979, + "bbh": 0.6738408402945882, + "gpqa": 0.3926174496644295, + "mmlu_pro": 0.5172872340425532, + "hf_math_lvl5": 0.29154078549848944, + "hf_musr": 0.4681041666666667, + "hf_avg": 36.493183854101574 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-Cobalt", + "name": "Llama3.1-8B-Cobalt", + "params_b": 8.03, + "ifeval": 0.3496134700372789, + "bbh": 0.4946769968149292, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3644448138297872, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.3959479166666667, + "hf_avg": 20.239393742398576 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-Cobalt", + "name": "Llama3.1-8B-Cobalt", + "params_b": 8.03, + "ifeval": 0.7168346653545925, + "bbh": 0.4910700749859321, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.36627327127659576, + "hf_math_lvl5": 0.15332326283987915, + "hf_musr": 0.3512395833333333, + "hf_avg": 25.558664369322077 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-Enigma", + "name": "Llama3.1-8B-Enigma", + "params_b": 8.03, + "ifeval": 0.26805542626896633, + "bbh": 0.44776000880153927, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.34092420212765956, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.4196041666666666, + "hf_avg": 16.62515745828593 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-Esper2", + "name": "Llama3.1-8B-Esper2", + "params_b": 8.03, + "ifeval": 0.2567398945907968, + "bbh": 0.4469866863000255, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.29039228723404253, + "hf_math_lvl5": 0.05891238670694864, + "hf_musr": 0.3560729166666667, + "hf_avg": 13.94081009846365 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-Fireplace2", + "name": "Llama3.1-8B-Fireplace2", + "params_b": 8.03, + "ifeval": 0.5483240025354947, + "bbh": 0.4609817052543379, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.24069148936170212, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.34330208333333334, + "hf_avg": 18.312602016344886 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-Fireplace2", + "name": "Llama3.1-8B-Fireplace2", + "params_b": 8.03, + "ifeval": 0.5328118281714739, + "bbh": 0.4613311485871581, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.24235372340425532, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.33666666666666667, + "hf_avg": 18.570580608454396 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-ShiningValiant2", + "name": "Llama3.1-8B-ShiningValiant2", + "params_b": 8.03, + "ifeval": 0.6495653754260917, + "bbh": 0.477390600131639, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.33818151595744683, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.39086458333333335, + "hf_avg": 23.15728097110788 + }, + { + "hf_id": "ValiantLabs/Llama3.1-8B-ShiningValiant2", + "name": "Llama3.1-8B-ShiningValiant2", + "params_b": 8.03, + "ifeval": 0.26780608784691284, + "bbh": 0.4429290017852748, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.292719414893617, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.39591666666666664, + "hf_avg": 15.45803558114332 + }, + { + "hf_id": "ValiantLabs/Llama3.2-3B-Enigma", + "name": "Llama3.2-3B-Enigma", + "params_b": 3.213, + "ifeval": 0.2786218345102107, + "bbh": 0.3722590772046992, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.2427692819148936, + "hf_math_lvl5": 0.04380664652567976, + "hf_musr": 0.3921354166666667, + "hf_avg": 11.692730593705356 + }, + { + "hf_id": "ValiantLabs/Llama3.2-3B-Esper2", + "name": "Llama3.2-3B-Esper2", + "params_b": 3.213, + "ifeval": 0.27497484452364174, + "bbh": 0.38082611390366106, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.22573138297872342, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.3549583333333333, + "hf_avg": 10.944295126634552 + }, + { + "hf_id": "ValiantLabs/Llama3.2-3B-ShiningValiant2", + "name": "Llama3.2-3B-ShiningValiant2", + "params_b": 3.213, + "ifeval": 0.2625101397624968, + "bbh": 0.42259325337870185, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.28291223404255317, + "hf_math_lvl5": 0.0823262839879154, + "hf_musr": 0.38664583333333336, + "hf_avg": 14.39069569820728 + }, + { + "hf_id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", + "name": "Vikhr-Llama3.1-8B-Instruct-R-21-09-24", + "params_b": 8.03, + "ifeval": 0.643145742186288, + "bbh": 0.527224269970207, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.3547207446808511, + "hf_math_lvl5": 0.2175226586102719, + "hf_musr": 0.3753958333333334, + "hf_avg": 25.354951361962605 + }, + { + "hf_id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", + "name": "Vikhr-Nemo-12B-Instruct-R-21-09-24", + "params_b": 12.248, + "ifeval": 0.5999315150467426, + "bbh": 0.5212309052827618, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.33976063829787234, + "hf_math_lvl5": 0.1714501510574018, + "hf_musr": 0.40730208333333334, + "hf_avg": 25.01995412263689 + }, + { + "hf_id": "Weyaxi/Bagel-Hermes-2x34B", + "name": "Bagel-Hermes-2x34B", + "params_b": 60.814, + "ifeval": 0.5431532777474878, + "bbh": 0.49166555632285514, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.4588597074468085, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.45166666666666666, + "hf_avg": 25.611273447311106 + }, + { + "hf_id": "Weyaxi/Bagel-Hermes-34B-Slerp", + "name": "Bagel-Hermes-34B-Slerp", + "params_b": 34.389, + "ifeval": 0.4602720780861448, + "bbh": 0.5921903605860047, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.4703291223404255, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.46220833333333333, + "hf_avg": 27.24685762860255 + }, + { + "hf_id": "Weyaxi/Einstein-v4-7B", + "name": "Einstein-v4-7B", + "params_b": 7.242, + "ifeval": 0.47081299839980145, + "bbh": 0.38494699692741774, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.22589760638297873, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.4681666666666667, + "hf_avg": 16.755664054789627 + }, + { + "hf_id": "Weyaxi/Einstein-v6.1-Llama3-8B", + "name": "Einstein-v6.1-Llama3-8B", + "params_b": 8.03, + "ifeval": 0.4568245588372186, + "bbh": 0.5008295581095018, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.3130817819148936, + "hf_math_lvl5": 0.06797583081570997, + "hf_musr": 0.42128125, + "hf_avg": 20.169491366460083 + }, + { + "hf_id": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", + "name": "Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", + "params_b": 8.03, + "ifeval": 0.39270247388041507, + "bbh": 0.5043837450549643, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.30925864361702127, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.43324999999999997, + "hf_avg": 19.318742706301332 + }, + { + "hf_id": "Weyaxi/Einstein-v7-Qwen2-7B", + "name": "Einstein-v7-Qwen2-7B", + "params_b": 7.616, + "ifeval": 0.4099633417111043, + "bbh": 0.5161472249498397, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.4095744680851064, + "hf_math_lvl5": 0.19939577039274925, + "hf_musr": 0.43997916666666664, + "hf_avg": 24.806418050095402 + }, + { + "hf_id": "Weyaxi/Einstein-v8-Llama3.2-1B", + "name": "Einstein-v8-Llama3.2-1B", + "params_b": 1.236, + "ifeval": 0.18622255615101263, + "bbh": 0.30184334823943154, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11610704787234043, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.36178125, + "hf_avg": 4.640409160461389 + }, + { + "hf_id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", + "name": "SauerkrautLM-UNA-SOLAR-Instruct", + "params_b": 10.732, + "ifeval": 0.4573243438520902, + "bbh": 0.5166357112030591, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.31532579787234044, + "hf_math_lvl5": 0.04607250755287009, + "hf_musr": 0.397875, + "hf_avg": 20.476008390270348 + }, + { + "hf_id": "WizardLMTeam/WizardLM-13B-V1.0", + "name": "WizardLM-13B-V1.0", + "params_b": 13, + "ifeval": 0.18504900331121424, + "bbh": 0.29134447696551025, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11660571808510638, + "hf_musr": 0.34971875, + "hf_avg": 4.546091523510591 + }, + { + "hf_id": "WizardLMTeam/WizardLM-13B-V1.2", + "name": "WizardLM-13B-V1.2", + "params_b": 13, + "ifeval": 0.3392465325336773, + "bbh": 0.44619994364600474, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.25191156914893614, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.43784375000000003, + "hf_avg": 15.177532740883725 + }, + { + "hf_id": "WizardLMTeam/WizardLM-70B-V1.0", + "name": "WizardLM-70B-V1.0", + "params_b": 70, + "ifeval": 0.49514288753839814, + "bbh": 0.5590366047184262, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.34466422872340424, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.43911458333333336, + "hf_avg": 22.3974420993294 + }, + { + "hf_id": "Wladastic/Mini-Think-Base-1B", + "name": "Mini-Think-Base-1B", + "params_b": 1.236, + "ifeval": 0.5588405430923283, + "bbh": 0.35741728048349203, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.17719414893617022, + "hf_math_lvl5": 0.07326283987915408, + "hf_musr": 0.32748958333333333, + "hf_avg": 14.3485600949955 + }, + { + "hf_id": "Xclbr7/Arcanum-12b", + "name": "Arcanum-12b", + "params_b": 12.248, + "ifeval": 0.2906864896253053, + "bbh": 0.5265359354118465, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.3586269946808511, + "hf_math_lvl5": 0.11933534743202417, + "hf_musr": 0.41703124999999996, + "hf_avg": 20.757225902526443 + }, + { + "hf_id": "Xclbr7/Hyena-12b", + "name": "Hyena-12b", + "params_b": 12.248, + "ifeval": 0.3404455733010634, + "bbh": 0.5457182415468321, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3439162234042553, + "hf_math_lvl5": 0.11329305135951662, + "hf_musr": 0.39842708333333327, + "hf_avg": 20.76453411894911 + }, + { + "hf_id": "Xclbr7/caliburn-12b", + "name": "caliburn-12b", + "params_b": 12.248, + "ifeval": 0.35763108551975425, + "bbh": 0.5518630300231809, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.36751994680851063, + "hf_math_lvl5": 0.11253776435045318, + "hf_musr": 0.4291875, + "hf_avg": 22.94686461976949 + }, + { + "hf_id": "Xclbr7/caliburn-v2-12b", + "name": "caliburn-v2-12b", + "params_b": 12.248, + "ifeval": 0.2966816934622358, + "bbh": 0.5141426125097639, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.37840757978723405, + "hf_math_lvl5": 0.10498489425981873, + "hf_musr": 0.43703125, + "hf_avg": 20.96611306962392 + }, + { + "hf_id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", + "name": "Qwen2.5-Dyanka-7B-Preview", + "params_b": 7.616, + "ifeval": 0.7640205765147586, + "bbh": 0.5543342320067098, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.43758311170212766, + "hf_math_lvl5": 0.4879154078549849, + "hf_musr": 0.44807291666666665, + "hf_avg": 37.29594433542609 + }, + { + "hf_id": "Xkev/Llama-3.2V-11B-cot", + "name": "Llama-3.2V-11B-cot", + "params_b": 10.67, + "ifeval": 0.41580894249480266, + "bbh": 0.495871783411897, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.35871010638297873, + "hf_math_lvl5": 0.1555891238670695, + "hf_musr": 0.4158541666666667, + "hf_avg": 21.759029142464396 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", + "name": "Qwen2.5-14B-1M-YOYO-V3", + "params_b": 14.766, + "ifeval": 0.8398327548681941, + "bbh": 0.6448491305599157, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.5206948138297872, + "hf_math_lvl5": 0.5354984894259819, + "hf_musr": 0.414125, + "hf_avg": 42.55942702673594 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", + "name": "Qwen2.5-14B-YOYO-0510-v2", + "params_b": 14.77, + "ifeval": 0.594710922574325, + "bbh": 0.6552826977321495, + "gpqa": 0.38171140939597314, + "mmlu_pro": 0.5380651595744681, + "hf_math_lvl5": 0.44410876132930516, + "hf_musr": 0.47439583333333335, + "hf_avg": 39.98094603659342 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-1005", + "name": "Qwen2.5-14B-YOYO-1005", + "params_b": 14.77, + "ifeval": 0.5971588717935079, + "bbh": 0.6542059787912534, + "gpqa": 0.3808724832214765, + "mmlu_pro": 0.5382313829787234, + "hf_math_lvl5": 0.452416918429003, + "hf_musr": 0.47303125, + "hf_avg": 40.08590414271186 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", + "name": "Qwen2.5-14B-YOYO-1005-v2", + "params_b": 14.77, + "ifeval": 0.595310442958018, + "bbh": 0.6551321410649699, + "gpqa": 0.38422818791946306, + "mmlu_pro": 0.5371509308510638, + "hf_math_lvl5": 0.4433534743202417, + "hf_musr": 0.4730625, + "hf_avg": 39.99172395122441 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-1010", + "name": "Qwen2.5-14B-YOYO-1010", + "params_b": 14.77, + "ifeval": 0.5898648918203699, + "bbh": 0.6539973096042956, + "gpqa": 0.38338926174496646, + "mmlu_pro": 0.5375664893617021, + "hf_math_lvl5": 0.4509063444108761, + "hf_musr": 0.47439583333333335, + "hf_avg": 40.008009214120385 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-1010", + "name": "Qwen2.5-14B-YOYO-1010", + "params_b": 14.77, + "ifeval": 0.7904737208384863, + "bbh": 0.6405986391086301, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.49443151595744683, + "hf_musr": 0.4180625, + "hf_avg": 31.959648585770537 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", + "name": "Qwen2.5-14B-YOYO-1010-v2", + "params_b": 14.77, + "ifeval": 0.594710922574325, + "bbh": 0.6552826977321495, + "gpqa": 0.38171140939597314, + "mmlu_pro": 0.5380651595744681, + "hf_math_lvl5": 0.44410876132930516, + "hf_musr": 0.47439583333333335, + "hf_avg": 39.98094603659342 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", + "name": "Qwen2.5-14B-YOYO-SCE", + "params_b": 14.77, + "ifeval": 0.5843694729983111, + "bbh": 0.6489486805510399, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5380651595744681, + "hf_math_lvl5": 0.46148036253776437, + "hf_musr": 0.47042708333333333, + "hf_avg": 39.669216951931844 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-V4", + "name": "Qwen2.5-14B-YOYO-V4", + "params_b": 14.766, + "ifeval": 0.8397828871837835, + "bbh": 0.6490345839036636, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.5169547872340425, + "hf_math_lvl5": 0.5347432024169184, + "hf_musr": 0.41152083333333334, + "hf_avg": 42.28551883596217 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", + "name": "Qwen2.5-14B-YOYO-V4-p1", + "params_b": 14.766, + "ifeval": 0.8203488964835526, + "bbh": 0.6515535751177631, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.5019946808510638, + "hf_math_lvl5": 0.5332326283987915, + "hf_musr": 0.41942708333333334, + "hf_avg": 42.45828532336958 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", + "name": "Qwen2.5-14B-YOYO-V4-p2", + "params_b": 14.766, + "ifeval": 0.8047868544351211, + "bbh": 0.6338919627514907, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.49675864361702127, + "hf_math_lvl5": 0.5166163141993958, + "hf_musr": 0.44345833333333334, + "hf_avg": 41.58466487593814 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-latest", + "name": "Qwen2.5-14B-YOYO-latest", + "params_b": 14.77, + "ifeval": 0.591063932587756, + "bbh": 0.6656232526900528, + "gpqa": 0.3825503355704698, + "mmlu_pro": 0.5370678191489362, + "hf_math_lvl5": 0.4418429003021148, + "hf_musr": 0.469125, + "hf_avg": 40.078312739638136 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", + "name": "Qwen2.5-14B-YOYO-latest-V2", + "params_b": 14.766, + "ifeval": 0.7771346693440072, + "bbh": 0.6299023045601466, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.5223570478723404, + "hf_math_lvl5": 0.5158610271903323, + "hf_musr": 0.42993750000000003, + "hf_avg": 41.84546724325599 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-14B-it-restore", + "name": "Qwen2.5-14B-it-restore", + "params_b": 14.766, + "ifeval": 0.8209484168672456, + "bbh": 0.6387730309916794, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.4900265957446808, + "hf_math_lvl5": 0.5370090634441088, + "hf_musr": 0.40872916666666664, + "hf_avg": 41.50259115054908 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-7B-it-restore", + "name": "Qwen2.5-7B-it-restore", + "params_b": 7.613, + "ifeval": 0.7530796065550517, + "bbh": 0.5406524352251431, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.42877327127659576, + "hf_math_lvl5": 0.5, + "hf_musr": 0.40069791666666665, + "hf_avg": 35.33330306100891 + }, + { + "hf_id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", + "name": "Qwen2.5-Coder-14B-YOYO-1010", + "params_b": 14.77, + "ifeval": 0.5335864395359867, + "bbh": 0.6186663964199025, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.4074966755319149, + "hf_math_lvl5": 0.3217522658610272, + "hf_musr": 0.4422395833333333, + "hf_avg": 32.054639724074 + }, + { + "hf_id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", + "name": "ZYH-LLM-Qwen2.5-14B", + "params_b": 14.77, + "ifeval": 0.594111402190632, + "bbh": 0.6644460038734455, + "gpqa": 0.3859060402684564, + "mmlu_pro": 0.5350731382978723, + "hf_math_lvl5": 0.411631419939577, + "hf_musr": 0.47569791666666666, + "hf_avg": 39.823588693279426 + }, + { + "hf_id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", + "name": "ZYH-LLM-Qwen2.5-14B-V2", + "params_b": 14.766, + "ifeval": 0.5070834275278483, + "bbh": 0.6452083564140533, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.5371509308510638, + "hf_math_lvl5": 0.3542296072507553, + "hf_musr": 0.46890625, + "hf_avg": 36.566362481938945 + }, + { + "hf_id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", + "name": "ZYH-LLM-Qwen2.5-14B-V3", + "params_b": 14.766, + "ifeval": 0.8577928784513978, + "bbh": 0.6359248665982408, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.4881150265957447, + "hf_math_lvl5": 0.527190332326284, + "hf_musr": 0.40215625, + "hf_avg": 41.62825190779884 + }, + { + "hf_id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", + "name": "ZYH-LLM-Qwen2.5-14B-V4", + "params_b": 14.766, + "ifeval": 0.8364605912312664, + "bbh": 0.651497220848125, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.5203623670212766, + "hf_math_lvl5": 0.5392749244712991, + "hf_musr": 0.44342708333333336, + "hf_avg": 43.137421470516735 + }, + { + "hf_id": "Yash21/TinyYi-7B-Test", + "name": "TinyYi-7B-Test", + "params_b": 6.061, + "ifeval": 0.18564852369490728, + "bbh": 0.29098007801214715, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.10912566489361702, + "hf_musr": 0.3364479166666667, + "hf_avg": 4.495167294967694 + }, + { + "hf_id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", + "name": "2PRYMMAL-Yi1.5-6B-SLERP", + "params_b": 6.061, + "ifeval": 0.28259351853083153, + "bbh": 0.46647504291710673, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3169880319148936, + "hf_math_lvl5": 0.11329305135951662, + "hf_musr": 0.47560416666666666, + "hf_avg": 18.991811258176195 + }, + { + "hf_id": "Youlln/3PRYMMAL-PHI3-3B-SLERP", + "name": "3PRYMMAL-PHI3-3B-SLERP", + "params_b": 3, + "ifeval": 0.3655500738041729, + "bbh": 0.5421833887682153, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.4001828457446808, + "hf_math_lvl5": 0.1714501510574018, + "hf_musr": 0.46484375, + "hf_avg": 25.138740994778118 + }, + { + "hf_id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", + "name": "4PRYMMAL-GEMMA2-9B-SLERP", + "params_b": 9.242, + "ifeval": 0.2713766140507188, + "bbh": 0.5922529923998928, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.42096077127659576, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.46719791666666666, + "hf_avg": 23.688707771567948 + }, + { + "hf_id": "Youlln/ECE-MIRAGE-1-12B", + "name": "ECE-MIRAGE-1-12B", + "params_b": 15.21, + "ifeval": 0.20698081091503875, + "bbh": 0.30107140221306034, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.11095412234042554, + "hf_musr": 0.3219375, + "hf_avg": 4.785720345472222 + }, + { + "hf_id": "Youlln/ECE-MIRAGE-1-15B", + "name": "ECE-MIRAGE-1-15B", + "params_b": 15.21, + "ifeval": 0.20698081091503875, + "bbh": 0.30107140221306034, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.11095412234042554, + "hf_musr": 0.3219375, + "hf_avg": 4.785720345472222 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", + "name": "ECE-PRYMMAL-0.5B-FT-V3", + "params_b": 0.494, + "ifeval": 0.16419101317836673, + "bbh": 0.30931341134548046, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.11610704787234043, + "hf_math_lvl5": 0.0030211480362537764, + "hf_musr": 0.3644479166666667, + "hf_avg": 4.392856379585042 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", + "name": "ECE-PRYMMAL-0.5B-FT-V3-MUSR", + "params_b": 0.494, + "ifeval": 0.15334977858748122, + "bbh": 0.3041148294962408, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.1644780585106383, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.36603125000000003, + "hf_avg": 5.5387028579477615 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", + "name": "ECE-PRYMMAL-0.5B-FT-V4-MUSR", + "params_b": 0.494, + "ifeval": 0.1137570535069172, + "bbh": 0.3038362724383693, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.13214760638297873, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.3528854166666667, + "hf_avg": 4.211186680289756 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", + "name": "ECE-PRYMMAL-0.5B-SLERP-V2", + "params_b": 0.494, + "ifeval": 0.1611934112599015, + "bbh": 0.2934774313772131, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.10945811170212766, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.3831145833333333, + "hf_avg": 4.6271949147226215 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", + "name": "ECE-PRYMMAL-0.5B-SLERP-V3", + "params_b": 0.494, + "ifeval": 0.16701352411601217, + "bbh": 0.29383772587210827, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.10871010638297872, + "hf_musr": 0.354125, + "hf_avg": 3.6630142582547953 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", + "name": "ECE-PRYMMAL-YL-1B-SLERP-V1", + "params_b": 1.544, + "ifeval": 0.32510848991786234, + "bbh": 0.4208506248736219, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.2935505319148936, + "hf_math_lvl5": 0.10725075528700906, + "hf_musr": 0.4265833333333333, + "hf_avg": 16.681936382880597 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", + "name": "ECE-PRYMMAL-YL-1B-SLERP-V2", + "params_b": 1.544, + "ifeval": 0.32510848991786234, + "bbh": 0.4208506248736219, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.2935505319148936, + "hf_math_lvl5": 0.10725075528700906, + "hf_musr": 0.4265833333333333, + "hf_avg": 16.681936382880597 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", + "name": "ECE-PRYMMAL-YL-7B-SLERP-V4", + "params_b": 7.616, + "ifeval": 0.2509696494190969, + "bbh": 0.37697272812325017, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.2131815159574468, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.3744895833333333, + "hf_avg": 10.869547461805958 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL0.5-FT", + "name": "ECE-PRYMMAL0.5-FT", + "params_b": 0.494, + "ifeval": 0.18507338306803725, + "bbh": 0.31320911187036277, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.14768949468085107, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.330125, + "hf_avg": 5.585741676013818 + }, + { + "hf_id": "Youlln/ECE-PRYMMAL1B-FT-V1", + "name": "ECE-PRYMMAL1B-FT-V1", + "params_b": 1.544, + "ifeval": 0.2143745262569981, + "bbh": 0.4032647427840684, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.2742686170212766, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.34165625, + "hf_avg": 11.84779796618845 + }, + { + "hf_id": "Youlln/ECE-Qwen0.5B-FT-V2", + "name": "ECE-Qwen0.5B-FT-V2", + "params_b": 0.494, + "ifeval": 0.25259311958935626, + "bbh": 0.328970813623839, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.16655585106382978, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.30628125, + "hf_avg": 7.5746869620543364 + }, + { + "hf_id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", + "name": "ECE.EIFFEIL.ia-0.5B-SLERP", + "params_b": 0.63, + "ifeval": 0.2561403742071038, + "bbh": 0.33056720460862643, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.1903257978723404, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.31021875, + "hf_avg": 8.829965651830063 + }, + { + "hf_id": "Yuma42/KangalKhan-RawRuby-7B", + "name": "KangalKhan-RawRuby-7B", + "params_b": 7.242, + "ifeval": 0.547674614467391, + "bbh": 0.47547278683676025, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.30227726063829785, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.39495833333333336, + "hf_avg": 20.49108954626372 + }, + { + "hf_id": "Yuma42/Llama3.1-IgneousIguana-8B", + "name": "Llama3.1-IgneousIguana-8B", + "params_b": 8.03, + "ifeval": 0.8133297428600558, + "bbh": 0.5190512670457804, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.39735704787234044, + "hf_math_lvl5": 0.21978851963746224, + "hf_musr": 0.42026041666666664, + "hf_avg": 31.476166913247212 + }, + { + "hf_id": "Yuma42/Llama3.1-SuperHawk-8B", + "name": "Llama3.1-SuperHawk-8B", + "params_b": 8.03, + "ifeval": 0.7986420475449585, + "bbh": 0.5199931545260023, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.39453125, + "hf_math_lvl5": 0.2348942598187311, + "hf_musr": 0.40835416666666663, + "hf_avg": 31.135471049209404 + }, + { + "hf_id": "Z1-Coder/Z1-Coder-7B", + "name": "Z1-Coder-7B", + "params_b": 7.613, + "ifeval": 0.3215113676157041, + "bbh": 0.48418251218099567, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.37591422872340424, + "hf_math_lvl5": 0.324773413897281, + "hf_musr": 0.36215625, + "hf_avg": 21.533348699576006 + }, + { + "hf_id": "ZHLiu627/zephyr-7b-gemma-rpo-avg", + "name": "zephyr-7b-gemma-rpo-avg", + "params_b": 8.538, + "ifeval": 0.30060350979844586, + "bbh": 0.41832761356743015, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.2830784574468085, + "hf_math_lvl5": 0.04984894259818731, + "hf_musr": 0.40810416666666666, + "hf_avg": 14.588311638428499 + }, + { + "hf_id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", + "name": "L3-Aspire-Heart-Matrix-8B", + "params_b": 8.03, + "ifeval": 0.48335305877294465, + "bbh": 0.5384211938486898, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.3784906914893617, + "hf_math_lvl5": 0.18277945619335348, + "hf_musr": 0.4187083333333333, + "hf_avg": 25.81522350685034 + }, + { + "hf_id": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", + "name": "Llama-3.1-8B-AthenaSky-MegaMix", + "params_b": 8.03, + "ifeval": 0.63008151704145, + "bbh": 0.5163423288466883, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.3503989361702128, + "hf_math_lvl5": 0.2794561933534743, + "hf_musr": 0.35384375, + "hf_avg": 26.791594885986026 + }, + { + "hf_id": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", + "name": "Llama-3.1-8B-RainbowLight-EtherealMix", + "params_b": 8.03, + "ifeval": 0.49734149833552754, + "bbh": 0.5154785280029148, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.363031914893617, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.39470833333333327, + "hf_avg": 22.830979754905858 + }, + { + "hf_id": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", + "name": "Llama-3.1-8B-SpecialTitanFusion", + "params_b": 8.03, + "ifeval": 0.7402403400754443, + "bbh": 0.5438928349489152, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.3621176861702128, + "hf_math_lvl5": 0.23338368580060423, + "hf_musr": 0.38739583333333333, + "hf_avg": 29.23340398888008 + }, + { + "hf_id": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", + "name": "Llama-3.1-8B-SuperNova-EtherealHermes", + "params_b": 8.03, + "ifeval": 0.7338705745200512, + "bbh": 0.5244464882599044, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.37450132978723405, + "hf_math_lvl5": 0.17447129909365558, + "hf_musr": 0.4065833333333333, + "hf_avg": 28.405539774407757 + }, + { + "hf_id": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", + "name": "Llama-3.1-8B-SuperTulu-LexiNova", + "params_b": 8.03, + "ifeval": 0.4164583305629064, + "bbh": 0.5078595074869328, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3367686170212766, + "hf_math_lvl5": 0.25302114803625375, + "hf_musr": 0.39706249999999993, + "hf_avg": 23.300170759550948 + }, + { + "hf_id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", + "name": "Qwen-2.5-Aether-SlerpFusion-7B", + "params_b": 7.616, + "ifeval": 0.6261597007052399, + "bbh": 0.5462236205548866, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.43267952127659576, + "hf_math_lvl5": 0.27341389728096677, + "hf_musr": 0.41778125, + "hf_avg": 30.118329802811974 + }, + { + "hf_id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", + "name": "Qwen2.5-7B-CelestialHarmony-1M", + "params_b": 7.613, + "ifeval": 0.5943862285402732, + "bbh": 0.5431374181474681, + "gpqa": 0.3187919463087248, + "mmlu_pro": 0.4386635638297872, + "hf_math_lvl5": 0.3474320241691843, + "hf_musr": 0.4595416666666667, + "hf_avg": 32.03891571870145 + }, + { + "hf_id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", + "name": "Qwen2.5-7B-HomerAnvita-NerdMix", + "params_b": 7.616, + "ifeval": 0.7707649037886142, + "bbh": 0.5541319848156986, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.4431515957446808, + "hf_math_lvl5": 0.38368580060422963, + "hf_musr": 0.43905208333333334, + "hf_avg": 35.64186520551805 + }, + { + "hf_id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", + "name": "Qwen2.5-7B-HomerCreative-Mix", + "params_b": 7.616, + "ifeval": 0.7835044348994002, + "bbh": 0.5548068560095062, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.4447307180851064, + "hf_math_lvl5": 0.3564954682779456, + "hf_musr": 0.43495833333333334, + "hf_avg": 34.907245009987854 + }, + { + "hf_id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", + "name": "Qwen2.5-7B-Qandora-CySec", + "params_b": 7.616, + "ifeval": 0.6773172958860268, + "bbh": 0.5490022663689288, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.4484707446808511, + "hf_math_lvl5": 0.2930513595166163, + "hf_musr": 0.4286041666666667, + "hf_avg": 32.02349981666794 + }, + { + "hf_id": "ZeusLabs/L3-Aethora-15B-V2", + "name": "L3-Aethora-15B-V2", + "params_b": 15.01, + "ifeval": 0.7208063493752133, + "bbh": 0.5010910465463698, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.3499833776595745, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.3870833333333333, + "hf_avg": 24.69871364353165 + }, + { + "hf_id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", + "name": "SELM-Llama-3-8B-Instruct-iter-3", + "params_b": 8.03, + "ifeval": 0.6902817856620433, + "bbh": 0.5046089390770511, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.3783244680851064, + "hf_math_lvl5": 0.08610271903323263, + "hf_musr": 0.38451041666666663, + "hf_avg": 24.042937650204056 + }, + { + "hf_id": "aaditya/Llama3-OpenBioLLM-70B", + "name": "Llama3-OpenBioLLM-70B", + "params_b": 70, + "ifeval": 0.7596743307756753, + "bbh": 0.6398872375485518, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.4867021276595745, + "hf_math_lvl5": 0.1971299093655589, + "hf_musr": 0.44171875, + "hf_avg": 34.979020412011955 + }, + { + "hf_id": "abacusai/Dracarys-72B-Instruct", + "name": "Dracarys-72B-Instruct", + "params_b": 72.706, + "ifeval": 0.7855778224001206, + "bbh": 0.6944066392084981, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5456283244680851, + "hf_math_lvl5": 0.39652567975830816, + "hf_musr": 0.4558229166666667, + "hf_avg": 43.377212135916615, + "lb_name": "dracarys-72b-instruct", + "lb_global": 0.42820444444444444, + "lb_reasoning": 0.36, + "lb_math": 0.42769999999999997, + "lb_language": 0.31169, + "lb_if": 0.680835, + "lb_data_analysis": 0.15360000000000001 + }, + { + "hf_id": "abacusai/Liberated-Qwen1.5-14B", + "name": "Liberated-Qwen1.5-14B", + "params_b": 14, + "ifeval": 0.36310212458499, + "bbh": 0.49480009174671863, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.35123005319148937, + "hf_math_lvl5": 0.16012084592145015, + "hf_musr": 0.41746875, + "hf_avg": 20.50814223648692 + }, + { + "hf_id": "abacusai/Llama-3-Smaug-8B", + "name": "Llama-3-Smaug-8B", + "params_b": 8.03, + "ifeval": 0.48667535472546175, + "bbh": 0.4930712769667174, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.3184840425531915, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.36224999999999996, + "hf_avg": 19.067762376529682 + }, + { + "hf_id": "abacusai/Smaug-34B-v0.1", + "name": "Smaug-34B-v0.1", + "params_b": 34.389, + "ifeval": 0.5015625207782018, + "bbh": 0.5357785983493821, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.4542885638297872, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.397875, + "hf_avg": 24.95321808709686 + }, + { + "hf_id": "abacusai/Smaug-72B-v0.1", + "name": "Smaug-72B-v0.1", + "params_b": 72.289, + "ifeval": 0.5167001334237601, + "bbh": 0.5995632330786429, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.4623503989361702, + "hf_math_lvl5": 0.19108761329305135, + "hf_musr": 0.4473229166666666, + "hf_avg": 29.737299261857203 + }, + { + "hf_id": "abacusai/Smaug-Llama-3-70B-Instruct-32K", + "name": "Smaug-Llama-3-70B-Instruct-32K", + "params_b": 70.554, + "ifeval": 0.7761107195574409, + "bbh": 0.6493108088828602, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.47647938829787234, + "hf_math_lvl5": 0.27492447129909364, + "hf_musr": 0.4207916666666667, + "hf_avg": 35.76489160177244 + }, + { + "hf_id": "abacusai/Smaug-Mixtral-v0.1", + "name": "Smaug-Mixtral-v0.1", + "params_b": 46.703, + "ifeval": 0.5554428915278129, + "bbh": 0.5162245602454115, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3351894946808511, + "hf_math_lvl5": 0.09516616314199396, + "hf_musr": 0.4298125, + "hf_avg": 23.821471245327263 + }, + { + "hf_id": "abacusai/bigstral-12b-32k", + "name": "bigstral-12b-32k", + "params_b": 12.476, + "ifeval": 0.41938057686937324, + "bbh": 0.4700122314782882, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.26412898936170215, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.45597916666666666, + "hf_avg": 18.135141503206025 + }, + { + "hf_id": "abacusai/bigyi-15b", + "name": "bigyi-15b", + "params_b": 15.058, + "ifeval": 0.20940327220663396, + "bbh": 0.4345298820215116, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.30028257978723405, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.35378125, + "hf_avg": 13.051824492532413 + }, + { + "hf_id": "abhishek/autotrain-0tmgq-5tpbg", + "name": "autotrain-0tmgq-5tpbg", + "params_b": 0.135, + "ifeval": 0.19571514692127998, + "bbh": 0.3134513987945074, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.11510970744680851, + "hf_musr": 0.36504166666666665, + "hf_avg": 4.856618645954227 + }, + { + "hf_id": "abhishek/autotrain-0tmgq-5tpbg", + "name": "autotrain-0tmgq-5tpbg", + "params_b": 0.135, + "ifeval": 0.19516549422199764, + "bbh": 0.3127326480314375, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11436170212765957, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.35837499999999994, + "hf_avg": 5.051545214400249 + }, + { + "hf_id": "abhishek/autotrain-llama3-70b-orpo-v1", + "name": "autotrain-llama3-70b-orpo-v1", + "params_b": 70.554, + "ifeval": 0.4233023932055834, + "bbh": 0.5997985900252331, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.11220079787234043, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.35790625000000004, + "hf_avg": 14.813377033004464 + }, + { + "hf_id": "abhishek/autotrain-llama3-70b-orpo-v2", + "name": "autotrain-llama3-70b-orpo-v2", + "params_b": 70.554, + "ifeval": 0.5406055931594835, + "bbh": 0.5899473641612185, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.48179853723404253, + "hf_math_lvl5": 0.2107250755287009, + "hf_musr": 0.41133333333333333, + "hf_avg": 28.867313366854955 + }, + { + "hf_id": "abhishek/autotrain-llama3-orpo-v2", + "name": "autotrain-llama3-orpo-v2", + "params_b": 8.03, + "ifeval": 0.4371656094717572, + "bbh": 0.31593828880846425, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.22182513297872342, + "hf_math_lvl5": 0.04682779456193353, + "hf_musr": 0.3792395833333333, + "hf_avg": 12.276280777825411 + }, + { + "hf_id": "abhishek/autotrain-vr4a1-e5mms", + "name": "autotrain-vr4a1-e5mms", + "params_b": 16.061, + "ifeval": 0.21422492320376602, + "bbh": 0.5000624442873264, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.36668882978723405, + "hf_math_lvl5": 0.14123867069486404, + "hf_musr": 0.389125, + "hf_avg": 18.65996836134808 + }, + { + "hf_id": "adamo1139/Yi-34B-200K-AEZAKMI-v2", + "name": "Yi-34B-200K-AEZAKMI-v2", + "params_b": 34.389, + "ifeval": 0.4555257827010111, + "bbh": 0.5383819237015192, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.4512965425531915, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.38860416666666664, + "hf_avg": 23.8273490799763 + }, + { + "hf_id": "adriszmar/QAIMath-Qwen2.5-7B-TIES", + "name": "QAIMath-Qwen2.5-7B-TIES", + "params_b": 7.616, + "ifeval": 0.174632198123202, + "bbh": 0.3126379538396578, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.10871010638297872, + "hf_musr": 0.40959375, + "hf_avg": 5.469542016632626 + }, + { + "hf_id": "adriszmar/QAIMath-Qwen2.5-7B-TIES", + "name": "QAIMath-Qwen2.5-7B-TIES", + "params_b": 7.616, + "ifeval": 0.16853725891745014, + "bbh": 0.31242688274884584, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.10663231382978723, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.39629166666666665, + "hf_avg": 4.988441667082756 + }, + { + "hf_id": "aevalone/distill_qw_test", + "name": "distill_qw_test", + "params_b": 7.616, + "ifeval": 0.740889728143548, + "bbh": 0.5245748734435777, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.4091589095744681, + "hf_math_lvl5": 0.4780966767371601, + "hf_musr": 0.38596874999999997, + "hf_avg": 33.68409846940663 + }, + { + "hf_id": "agentlans/Gemma2-9B-AdvancedFuse", + "name": "Gemma2-9B-AdvancedFuse", + "params_b": 9.242, + "ifeval": 0.15427288483446144, + "bbh": 0.585936684475517, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.4000166223404255, + "hf_math_lvl5": 0.10045317220543806, + "hf_musr": 0.4230833333333333, + "hf_avg": 20.434579949396277 + }, + { + "hf_id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", + "name": "Llama-3.2-1B-Instruct-CrashCourse12K", + "params_b": 1.236, + "ifeval": 0.5395062877609188, + "bbh": 0.35481032861183426, + "gpqa": 0.2407718120805369, + "mmlu_pro": 0.1809341755319149, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.32104166666666667, + "hf_avg": 13.437967590003339 + }, + { + "hf_id": "agentlans/Llama3.1-Daredevilish", + "name": "Llama3.1-Daredevilish", + "params_b": 8.03, + "ifeval": 0.6291573026237051, + "bbh": 0.5012506630648397, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3696808510638298, + "hf_math_lvl5": 0.12915407854984895, + "hf_musr": 0.40909375, + "hf_avg": 25.570843048894687 + }, + { + "hf_id": "agentlans/Llama3.1-Daredevilish-Instruct", + "name": "Llama3.1-Daredevilish-Instruct", + "params_b": 8.03, + "ifeval": 0.7925969760236173, + "bbh": 0.5235442557198345, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3877160904255319, + "hf_math_lvl5": 0.17220543806646527, + "hf_musr": 0.3910833333333333, + "hf_avg": 29.365203712781987 + }, + { + "hf_id": "agentlans/Llama3.1-LexiHermes-SuperStorm", + "name": "Llama3.1-LexiHermes-SuperStorm", + "params_b": 8.03, + "ifeval": 0.7834545672149895, + "bbh": 0.5266460888159817, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.3843916223404255, + "hf_math_lvl5": 0.16163141993957703, + "hf_musr": 0.3962604166666667, + "hf_avg": 29.430987251890873 + }, + { + "hf_id": "agentlans/Llama3.1-SuperDeepFuse", + "name": "Llama3.1-SuperDeepFuse", + "params_b": 8.03, + "ifeval": 0.7761605872418517, + "bbh": 0.5048544889908054, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.3774933510638298, + "hf_math_lvl5": 0.18277945619335348, + "hf_musr": 0.369875, + "hf_avg": 27.387201422763493 + }, + { + "hf_id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", + "name": "Llama3.1-SuperDeepFuse-CrashCourse12K", + "params_b": 8.03, + "ifeval": 0.718732961874493, + "bbh": 0.5215513828266275, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.3631150265957447, + "hf_math_lvl5": 0.18051359516616314, + "hf_musr": 0.40264583333333337, + "hf_avg": 27.995793391642223 + }, + { + "hf_id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", + "name": "Qwen2.5-0.5B-Instruct-CrashCourse-dropout", + "params_b": 0.494, + "ifeval": 0.2948831323111566, + "bbh": 0.3311726760218689, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.16082114361702127, + "hf_math_lvl5": 0.04229607250755287, + "hf_musr": 0.3341875, + "hf_avg": 8.433362269175692 + }, + { + "hf_id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", + "name": "13_outOf_32_pruned_layers_llama3.1-8b", + "params_b": 5.195, + "ifeval": 0.17480728910402177, + "bbh": 0.2883257760266153, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11286569148936171, + "hf_musr": 0.3803229166666666, + "hf_avg": 4.404258622194578 + }, + { + "hf_id": "ai21labs/Jamba-v0.1", + "name": "Jamba-v0.1", + "params_b": 51.57, + "ifeval": 0.20255920956395698, + "bbh": 0.36022602451645724, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.24916888297872342, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.35902083333333334, + "hf_avg": 9.218365089520882 + }, + { + "hf_id": "ai4bharat/Airavata", + "name": "Airavata", + "params_b": 6.87, + "ifeval": 0.05585402288150995, + "bbh": 0.36276862514633795, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.1634807180851064, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.3762916666666667, + "hf_avg": 5.550973263891643 + }, + { + "hf_id": "aixonlab/Aether-12b", + "name": "Aether-12b", + "params_b": 12.248, + "ifeval": 0.23468286369056326, + "bbh": 0.5179400750435481, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.3410073138297872, + "hf_math_lvl5": 0.10649546827794562, + "hf_musr": 0.38286458333333334, + "hf_avg": 18.045942870286208 + }, + { + "hf_id": "aixonlab/Grey-12b", + "name": "Grey-12b", + "params_b": 12.248, + "ifeval": 0.39679938119744496, + "bbh": 0.5698957505959833, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.3779089095744681, + "hf_math_lvl5": 0.09818731117824774, + "hf_musr": 0.4516354166666667, + "hf_avg": 23.68155281704829 + }, + { + "hf_id": "aixonlab/Zara-14b-v1.2", + "name": "Zara-14b-v1.2", + "params_b": 14.766, + "ifeval": 0.6197400674654362, + "bbh": 0.6405368457456163, + "gpqa": 0.38171140939597314, + "mmlu_pro": 0.5263464095744681, + "hf_math_lvl5": 0.35347432024169184, + "hf_musr": 0.46747916666666667, + "hf_avg": 37.99891251390576 + }, + { + "hf_id": "akhadangi/Llama3.2.1B.0.01-First", + "name": "Llama3.2.1B.0.01-First", + "params_b": 1.236, + "ifeval": 0.08135857303066973, + "bbh": 0.31891926453372005, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.1196808510638298, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.3193958333333333, + "hf_avg": 3.1098917488910103 + }, + { + "hf_id": "akhadangi/Llama3.2.1B.0.01-Last", + "name": "Llama3.2.1B.0.01-Last", + "params_b": 1.236, + "ifeval": 0.09165015492227291, + "bbh": 0.3159283874883156, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.12267287234042554, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.3206354166666667, + "hf_avg": 3.2621298672901786 + }, + { + "hf_id": "akhadangi/Llama3.2.1B.0.1-First", + "name": "Llama3.2.1B.0.1-First", + "params_b": 1.236, + "ifeval": 0.10009330797838623, + "bbh": 0.3119615016336897, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.11693816489361702, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.330125, + "hf_avg": 3.269240763959394 + }, + { + "hf_id": "akhadangi/Llama3.2.1B.0.1-Last", + "name": "Llama3.2.1B.0.1-Last", + "params_b": 1.236, + "ifeval": 0.09497245087479, + "bbh": 0.3163776768490709, + "gpqa": 0.23825503355704697, + "mmlu_pro": 0.11776928191489362, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.3340625, + "hf_avg": 3.2667219224258432 + }, + { + "hf_id": "akhadangi/Llama3.2.1B.BaseFiT", + "name": "Llama3.2.1B.BaseFiT", + "params_b": 1.236, + "ifeval": 0.08827799128534511, + "bbh": 0.31745151457535453, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.1171875, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.3220625, + "hf_avg": 3.318002610011419 + }, + { + "hf_id": "akjindal53244/Llama-3.1-Storm-8B", + "name": "Llama-3.1-Storm-8B", + "params_b": 8.03, + "ifeval": 0.803263119633683, + "bbh": 0.5196330402870707, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.3812333776595745, + "hf_math_lvl5": 0.1623867069486405, + "hf_musr": 0.4028333333333334, + "hf_avg": 29.365249771767235 + }, + { + "hf_id": "akjindal53244/Llama-3.1-Storm-8B", + "name": "Llama-3.1-Storm-8B", + "params_b": 8.03, + "ifeval": 0.8050616807847621, + "bbh": 0.5188671226840744, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.3803191489361702, + "hf_math_lvl5": 0.17220543806646524, + "hf_musr": 0.4028020833333333, + "hf_avg": 29.943924239133935 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-70B", + "name": "Llama-3.1-Tulu-3-70B", + "params_b": 70.554, + "ifeval": 0.8291167435737177, + "bbh": 0.6163626496199947, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.46451130319148937, + "hf_math_lvl5": 0.4501510574018127, + "hf_musr": 0.4948333333333334, + "hf_avg": 42.33178738532094, + "arena_elo": 1286.53, + "arena_rank": 198, + "arena_votes": 2846 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-70B", + "name": "Llama-3.1-Tulu-3-70B", + "params_b": 70.554, + "ifeval": 0.8379344583482937, + "bbh": 0.6156847169556112, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.4655917553191489, + "hf_math_lvl5": 0.38293051359516617, + "hf_musr": 0.49880208333333337, + "hf_avg": 41.45452740659843 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-70B-DPO", + "name": "Llama-3.1-Tulu-3-70B-DPO", + "params_b": 70, + "ifeval": 0.8281925291559729, + "bbh": 0.6146203626958501, + "gpqa": 0.37583892617449666, + "mmlu_pro": 0.4632646276595745, + "hf_math_lvl5": 0.44939577039274925, + "hf_musr": 0.4922604166666667, + "hf_avg": 42.22441492015548 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-70B-SFT", + "name": "Llama-3.1-Tulu-3-70B-SFT", + "params_b": 70.554, + "ifeval": 0.8050616807847621, + "bbh": 0.5951437800580934, + "gpqa": 0.3447986577181208, + "mmlu_pro": 0.46243351063829785, + "hf_math_lvl5": 0.33157099697885195, + "hf_musr": 0.5026145833333334, + "hf_avg": 38.848492068127356 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-8B", + "name": "Llama-3.1-Tulu-3-8B", + "params_b": 8.03, + "ifeval": 0.8266687943545348, + "bbh": 0.4049833102731906, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.2826628989361702, + "hf_math_lvl5": 0.19637462235649547, + "hf_musr": 0.41746875, + "hf_avg": 26.034998081672143, + "arena_elo": 1220.55, + "arena_rank": 240, + "arena_votes": 2895 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-8B", + "name": "Llama-3.1-Tulu-3-8B", + "params_b": 8.03, + "ifeval": 0.8254697535871487, + "bbh": 0.40608256120952024, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.2820811170212766, + "hf_math_lvl5": 0.21148036253776434, + "hf_musr": 0.41746875, + "hf_avg": 26.260868015453667 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-8B-DPO", + "name": "Llama-3.1-Tulu-3-8B-DPO", + "params_b": 8, + "ifeval": 0.8029384255996312, + "bbh": 0.4079428557044153, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.2898105053191489, + "hf_math_lvl5": 0.236404833836858, + "hf_musr": 0.41613541666666665, + "hf_avg": 26.463980035063702 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-8B-RM", + "name": "Llama-3.1-Tulu-3-8B-RM", + "params_b": 8, + "ifeval": 0.16701352411601217, + "bbh": 0.2950041147470504, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.10821143617021277, + "hf_musr": 0.3764166666666667, + "hf_avg": 4.235057018188027 + }, + { + "hf_id": "allenai/Llama-3.1-Tulu-3-8B-SFT", + "name": "Llama-3.1-Tulu-3-8B-SFT", + "params_b": 8.03, + "ifeval": 0.7403400754442657, + "bbh": 0.3871863270501647, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.28116688829787234, + "hf_math_lvl5": 0.11782477341389729, + "hf_musr": 0.4267708333333333, + "hf_avg": 22.596940551752763 + }, + { + "hf_id": "allenai/OLMo-1.7-7B-hf", + "name": "OLMo-1.7-7B-hf", + "ifeval": 0.1568970332052288, + "bbh": 0.3013695911207614, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.11236702127659574, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.34748958333333335, + "hf_avg": 3.8002319134201135 + }, + { + "hf_id": "allenai/OLMo-1B-hf", + "name": "OLMo-1B-hf", + "params_b": 1.177, + "ifeval": 0.21819660722438686, + "bbh": 0.30519468988429327, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.11735372340425532, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.40978125, + "hf_avg": 6.633923959022838 + }, + { + "hf_id": "allenai/OLMo-2-1124-7B-Instruct", + "name": "OLMo-2-1124-7B-Instruct", + "params_b": 7.299, + "ifeval": 0.7244034716773715, + "bbh": 0.40223602474417786, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.2672041223404255, + "hf_math_lvl5": 0.1487915407854985, + "hf_musr": 0.35083333333333333, + "hf_avg": 21.785857000415522 + }, + { + "hf_id": "allenai/OLMo-7B-Instruct-hf", + "name": "OLMo-7B-Instruct-hf", + "params_b": 7, + "ifeval": 0.3472652561869174, + "bbh": 0.3706469866662716, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.17852393617021275, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.37647916666666664, + "hf_avg": 10.84897342143646 + }, + { + "hf_id": "allenai/OLMo-7B-hf", + "name": "OLMo-7B-hf", + "params_b": 6.888, + "ifeval": 0.2719273749207658, + "bbh": 0.32791316587362274, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.11727061170212766, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.3486666666666667, + "hf_avg": 6.864268027495356 + }, + { + "hf_id": "allenai/OLMoE-1B-7B-0125-Instruct", + "name": "OLMoE-1B-7B-0125-Instruct", + "params_b": 6.919, + "ifeval": 0.6757436934001781, + "bbh": 0.38245348916008676, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.19148936170212766, + "hf_math_lvl5": 0.08987915407854985, + "hf_musr": 0.3635833333333333, + "hf_avg": 17.50987629325843 + }, + { + "hf_id": "allenai/OLMoE-1B-7B-0924", + "name": "OLMoE-1B-7B-0924", + "params_b": 6.919, + "ifeval": 0.21847143357402804, + "bbh": 0.3393437931177341, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.1739527925531915, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.34879166666666667, + "hf_avg": 7.266580603765461 + }, + { + "hf_id": "allenai/OLMoE-1B-7B-0924-Instruct", + "name": "OLMoE-1B-7B-0924-Instruct", + "params_b": 6.919, + "ifeval": 0.4667415790103592, + "bbh": 0.3901610626816106, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.18758311170212766, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.3848229166666666, + "hf_avg": 13.698377341715071 + }, + { + "hf_id": "allknowingroger/Chocolatine-24B", + "name": "Chocolatine-24B", + "params_b": 24.184, + "ifeval": 0.19581488229010136, + "bbh": 0.6191260063262436, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.4566156914893617, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.43232291666666667, + "hf_avg": 21.34573359059476 + }, + { + "hf_id": "allknowingroger/Gemma2Slerp1-27B", + "name": "Gemma2Slerp1-27B", + "params_b": 27.227, + "ifeval": 0.7186332265056716, + "bbh": 0.6398902146527521, + "gpqa": 0.3640939597315436, + "mmlu_pro": 0.44564494680851063, + "hf_math_lvl5": 0.2583081570996979, + "hf_musr": 0.47671875, + "hf_avg": 36.50763943363299 + }, + { + "hf_id": "allknowingroger/Gemma2Slerp2-27B", + "name": "Gemma2Slerp2-27B", + "params_b": 27.227, + "ifeval": 0.7545534736720789, + "bbh": 0.6557274121032689, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.46226728723404253, + "hf_math_lvl5": 0.27870090634441086, + "hf_musr": 0.46208333333333335, + "hf_avg": 37.93170029272168 + }, + { + "hf_id": "allknowingroger/Gemma2Slerp3-27B", + "name": "Gemma2Slerp3-27B", + "params_b": 27.227, + "ifeval": 0.7426384216102164, + "bbh": 0.6499638721230724, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.4640957446808511, + "hf_math_lvl5": 0.27416918429003023, + "hf_musr": 0.47402083333333334, + "hf_avg": 37.531456425236875 + }, + { + "hf_id": "allknowingroger/Gemma2Slerp4-27B", + "name": "Gemma2Slerp4-27B", + "params_b": 27.227, + "ifeval": 0.7496575752337131, + "bbh": 0.6529581339749019, + "gpqa": 0.36661073825503354, + "mmlu_pro": 0.46492686170212766, + "hf_math_lvl5": 0.2719033232628399, + "hf_musr": 0.4502395833333333, + "hf_avg": 37.36755448762331 + }, + { + "hf_id": "allknowingroger/GemmaSlerp-9B", + "name": "GemmaSlerp-9B", + "params_b": 9.242, + "ifeval": 0.704320092909037, + "bbh": 0.592057786577488, + "gpqa": 0.34395973154362414, + "mmlu_pro": 0.41605718085106386, + "hf_math_lvl5": 0.21601208459214502, + "hf_musr": 0.46732291666666664, + "hf_avg": 33.18611821246378 + }, + { + "hf_id": "allknowingroger/GemmaSlerp2-9B", + "name": "GemmaSlerp2-9B", + "params_b": 9.242, + "ifeval": 0.7281003293483512, + "bbh": 0.598271299766216, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.42386968085106386, + "hf_math_lvl5": 0.2107250755287009, + "hf_musr": 0.47671875, + "hf_avg": 34.25757823554644 + }, + { + "hf_id": "allknowingroger/GemmaSlerp4-10B", + "name": "GemmaSlerp4-10B", + "params_b": 10.159, + "ifeval": 0.7326216660682544, + "bbh": 0.6027862253440982, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.4250332446808511, + "hf_math_lvl5": 0.2243202416918429, + "hf_musr": 0.45398958333333334, + "hf_avg": 34.06301244708342 + }, + { + "hf_id": "allknowingroger/GemmaSlerp5-10B", + "name": "GemmaSlerp5-10B", + "params_b": 10.159, + "ifeval": 0.7353444416370785, + "bbh": 0.605447654436423, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.4328457446808511, + "hf_math_lvl5": 0.21827794561933533, + "hf_musr": 0.46078125, + "hf_avg": 34.382403041414115 + }, + { + "hf_id": "allknowingroger/GemmaStock1-27B", + "name": "GemmaStock1-27B", + "params_b": 27.227, + "ifeval": 0.7509064836855099, + "bbh": 0.6565607454366021, + "gpqa": 0.3640939597315436, + "mmlu_pro": 0.47298869680851063, + "hf_math_lvl5": 0.263595166163142, + "hf_musr": 0.45268749999999996, + "hf_avg": 37.51365913910503 + }, + { + "hf_id": "allknowingroger/HomerSlerp1-7B", + "name": "HomerSlerp1-7B", + "params_b": 7.616, + "ifeval": 0.46212050692163464, + "bbh": 0.551818027489446, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.4503823138297872, + "hf_math_lvl5": 0.2719033232628399, + "hf_musr": 0.43585416666666665, + "hf_avg": 28.483742375570117 + }, + { + "hf_id": "allknowingroger/HomerSlerp2-7B", + "name": "HomerSlerp2-7B", + "params_b": 7.616, + "ifeval": 0.44868172005833407, + "bbh": 0.5648943315947, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.45146276595744683, + "hf_math_lvl5": 0.29682779456193353, + "hf_musr": 0.43557291666666664, + "hf_avg": 28.948900169100085 + }, + { + "hf_id": "allknowingroger/HomerSlerp3-7B", + "name": "HomerSlerp3-7B", + "params_b": 7.616, + "ifeval": 0.4362668829815999, + "bbh": 0.5598063466560873, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.45345744680851063, + "hf_math_lvl5": 0.3021148036253776, + "hf_musr": 0.44617708333333334, + "hf_avg": 28.953652934928698 + }, + { + "hf_id": "allknowingroger/HomerSlerp4-7B", + "name": "HomerSlerp4-7B", + "params_b": 7.616, + "ifeval": 0.43741605606457534, + "bbh": 0.5570767234678723, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.44722406914893614, + "hf_math_lvl5": 0.3270392749244713, + "hf_musr": 0.44084375, + "hf_avg": 29.144845188798246 + }, + { + "hf_id": "allknowingroger/LimyQstar-7B-slerp", + "name": "LimyQstar-7B-slerp", + "params_b": 7.242, + "ifeval": 0.34911368502240725, + "bbh": 0.5023559424245442, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.3103390957446808, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.4146458333333333, + "hf_avg": 18.67252497281834 + }, + { + "hf_id": "allknowingroger/Marco-01-slerp1-7B", + "name": "Marco-01-slerp1-7B", + "params_b": 7.616, + "ifeval": 0.46811571075856506, + "bbh": 0.5540943469864194, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.44830452127659576, + "hf_math_lvl5": 0.3157099697885196, + "hf_musr": 0.4451875, + "hf_avg": 29.48531675671586 + }, + { + "hf_id": "allknowingroger/Meme-7B-slerp", + "name": "Meme-7B-slerp", + "params_b": 7.242, + "ifeval": 0.5163754393897082, + "bbh": 0.4660944195552204, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.281000664893617, + "hf_math_lvl5": 0.04380664652567976, + "hf_musr": 0.4223020833333333, + "hf_avg": 19.276080557709445 + }, + { + "hf_id": "allknowingroger/MistralPhi3-11B", + "name": "MistralPhi3-11B", + "params_b": 11.234, + "ifeval": 0.1942911474886634, + "bbh": 0.6234314600705605, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.46875, + "hf_musr": 0.4266770833333333, + "hf_avg": 21.627095011873774 + }, + { + "hf_id": "allknowingroger/Mistralmash1-7B-s", + "name": "Mistralmash1-7B-s", + "params_b": 7.242, + "ifeval": 0.39610012544493056, + "bbh": 0.5277485757172445, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3292885638297872, + "hf_math_lvl5": 0.09214501510574018, + "hf_musr": 0.4267083333333333, + "hf_avg": 20.913865809674476 + }, + { + "hf_id": "allknowingroger/Mistralmash2-7B-s", + "name": "Mistralmash2-7B-s", + "params_b": 7.242, + "ifeval": 0.4101883003763348, + "bbh": 0.530485814102601, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3345246010638298, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.43724999999999997, + "hf_avg": 21.389680975325334 + }, + { + "hf_id": "allknowingroger/MixTAO-19B-pass", + "name": "MixTAO-19B-pass", + "params_b": 19.188, + "ifeval": 0.3814368098866563, + "bbh": 0.5128248798224987, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.31050531914893614, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.47827083333333337, + "hf_avg": 20.627592394280352 + }, + { + "hf_id": "allknowingroger/MixTaoTruthful-13B-slerp", + "name": "MixTaoTruthful-13B-slerp", + "params_b": 12.879, + "ifeval": 0.41388515804731446, + "bbh": 0.5207335343585151, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3100066489361702, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.42924999999999996, + "hf_avg": 20.252975968080015 + }, + { + "hf_id": "allknowingroger/MultiCalm-7B-slerp", + "name": "MultiCalm-7B-slerp", + "params_b": 7.242, + "ifeval": 0.3926526061960044, + "bbh": 0.5121891599770304, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3032746010638298, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.43194791666666665, + "hf_avg": 19.472289426361296 + }, + { + "hf_id": "allknowingroger/MultiMash-12B-slerp", + "name": "MultiMash-12B-slerp", + "params_b": 12.879, + "ifeval": 0.39744876926554873, + "bbh": 0.5141827379810838, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.3067652925531915, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.44379166666666664, + "hf_avg": 20.179903850442244 + }, + { + "hf_id": "allknowingroger/MultiMash10-13B-slerp", + "name": "MultiMash10-13B-slerp", + "params_b": 12.879, + "ifeval": 0.41628323958208663, + "bbh": 0.5186335995744094, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3116688829787234, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.43179166666666663, + "hf_avg": 20.4264364824254 + }, + { + "hf_id": "allknowingroger/MultiMash11-13B-slerp", + "name": "MultiMash11-13B-slerp", + "params_b": 12.879, + "ifeval": 0.4251009543566625, + "bbh": 0.5193864686484946, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.30851063829787234, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.43728125, + "hf_avg": 20.614675908435512 + }, + { + "hf_id": "allknowingroger/MultiMash2-12B-slerp", + "name": "MultiMash2-12B-slerp", + "params_b": 12.879, + "ifeval": 0.42607503645881817, + "bbh": 0.5133973498532299, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3042719414893617, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.4228020833333333, + "hf_avg": 19.84014306475432 + }, + { + "hf_id": "allknowingroger/MultiMash6-12B-slerp", + "name": "MultiMash6-12B-slerp", + "params_b": 12.879, + "ifeval": 0.43004672047943904, + "bbh": 0.5195916915718951, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.30909242021276595, + "hf_math_lvl5": 0.07250755287009064, + "hf_musr": 0.4305833333333333, + "hf_avg": 20.27642710504546 + }, + { + "hf_id": "allknowingroger/MultiMash7-12B-slerp", + "name": "MultiMash7-12B-slerp", + "params_b": 12.879, + "ifeval": 0.42127887338927383, + "bbh": 0.5111135397195524, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.3029421542553192, + "hf_math_lvl5": 0.06948640483383686, + "hf_musr": 0.42794791666666665, + "hf_avg": 19.792293511824642 + }, + { + "hf_id": "allknowingroger/MultiMash8-13B-slerp", + "name": "MultiMash8-13B-slerp", + "params_b": 12.879, + "ifeval": 0.4320702402957486, + "bbh": 0.5178483059643324, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.31258311170212766, + "hf_math_lvl5": 0.0770392749244713, + "hf_musr": 0.4423958333333333, + "hf_avg": 21.07486448648852 + }, + { + "hf_id": "allknowingroger/MultiMash9-13B-slerp", + "name": "MultiMash9-13B-slerp", + "params_b": 12.879, + "ifeval": 0.4187810564856802, + "bbh": 0.5193579939678727, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.3100066489361702, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.4398229166666667, + "hf_avg": 20.642969652890788 + }, + { + "hf_id": "allknowingroger/MultiMerge-7B-slerp", + "name": "MultiMerge-7B-slerp", + "params_b": 7.242, + "ifeval": 0.3947758613811354, + "bbh": 0.5140224933103638, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3036901595744681, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.42797916666666663, + "hf_avg": 19.542246727772383 + }, + { + "hf_id": "allknowingroger/Multimash3-12B-slerp", + "name": "Multimash3-12B-slerp", + "params_b": 12.879, + "ifeval": 0.44371046600796993, + "bbh": 0.5176624678276028, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.3067652925531915, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.4343958333333333, + "hf_avg": 20.47073324126141 + }, + { + "hf_id": "allknowingroger/MultiverseEx26-7B-slerp", + "name": "MultiverseEx26-7B-slerp", + "params_b": 7.242, + "ifeval": 0.3938516469633905, + "bbh": 0.5133591871690678, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3035239361702128, + "hf_math_lvl5": 0.0755287009063444, + "hf_musr": 0.4293125, + "hf_avg": 19.69589878762239 + }, + { + "hf_id": "allknowingroger/NeuralWestSeverus-7B-slerp", + "name": "NeuralWestSeverus-7B-slerp", + "params_b": 7.242, + "ifeval": 0.41356046401326263, + "bbh": 0.5244283854305991, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.3137466755319149, + "hf_math_lvl5": 0.07326283987915408, + "hf_musr": 0.45287499999999997, + "hf_avg": 20.675370567029706 + }, + { + "hf_id": "allknowingroger/Neuralcoven-7B-slerp", + "name": "Neuralcoven-7B-slerp", + "params_b": 7.242, + "ifeval": 0.3858584112377381, + "bbh": 0.530287217712165, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3293716755319149, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.429, + "hf_avg": 20.363670451495135 + }, + { + "hf_id": "allknowingroger/Neuralmultiverse-7B-slerp", + "name": "Neuralmultiverse-7B-slerp", + "params_b": 7.242, + "ifeval": 0.3769154731667531, + "bbh": 0.5165722210470375, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.30418882978723405, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.42804166666666665, + "hf_avg": 19.36103031824635 + }, + { + "hf_id": "allknowingroger/Ph3della5-14B", + "name": "Ph3della5-14B", + "params_b": 13.96, + "ifeval": 0.47985567183960776, + "bbh": 0.6331746353794991, + "gpqa": 0.3422818791946309, + "mmlu_pro": 0.4787234042553192, + "hf_math_lvl5": 0.17673716012084592, + "hf_musr": 0.4386145833333333, + "hf_avg": 30.46973875649518 + }, + { + "hf_id": "allknowingroger/Ph3merge-14B", + "name": "Ph3merge-14B", + "params_b": 13.619, + "ifeval": 0.27012881376968667, + "bbh": 0.638087568868341, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.4611037234042553, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.4334375, + "hf_avg": 23.68333279737927 + }, + { + "hf_id": "allknowingroger/Ph3task1-14B", + "name": "Ph3task1-14B", + "params_b": 13.96, + "ifeval": 0.46946435457918323, + "bbh": 0.63178060736657, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.4734042553191489, + "hf_math_lvl5": 0.16691842900302115, + "hf_musr": 0.45077083333333334, + "hf_avg": 30.54839802644553 + }, + { + "hf_id": "allknowingroger/Ph3task2-14B", + "name": "Ph3task2-14B", + "params_b": 13.96, + "ifeval": 0.4713127834146731, + "bbh": 0.6098412220695854, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.44597739361702127, + "hf_math_lvl5": 0.14652567975830816, + "hf_musr": 0.4535, + "hf_avg": 28.611110586227724 + }, + { + "hf_id": "allknowingroger/Ph3task3-14B", + "name": "Ph3task3-14B", + "params_b": 13.96, + "ifeval": 0.4962421929369628, + "bbh": 0.6297915743094921, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.47706117021276595, + "hf_math_lvl5": 0.17598187311178248, + "hf_musr": 0.44255208333333335, + "hf_avg": 30.710221507611795 + }, + { + "hf_id": "allknowingroger/Ph3unsloth-3B-slerp", + "name": "Ph3unsloth-3B-slerp", + "params_b": 3.821, + "ifeval": 0.18944511673470835, + "bbh": 0.5468077356147099, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.3700964095744681, + "hf_math_lvl5": 0.10120845921450151, + "hf_musr": 0.45278124999999997, + "hf_avg": 20.153514887760053 + }, + { + "hf_id": "allknowingroger/Phi3mash1-17B-pass", + "name": "Phi3mash1-17B-pass", + "params_b": 16.687, + "ifeval": 0.18842116694814204, + "bbh": 0.6128878795560929, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.45894281914893614, + "hf_musr": 0.445125, + "hf_avg": 21.34996880563698 + }, + { + "hf_id": "allknowingroger/Qwen2.5-7B-task2", + "name": "Qwen2.5-7B-task2", + "params_b": 7.616, + "ifeval": 0.45270327176336567, + "bbh": 0.5625940266685543, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.4517121010638298, + "hf_math_lvl5": 0.3549848942598187, + "hf_musr": 0.43696874999999996, + "hf_avg": 29.877934142366954 + }, + { + "hf_id": "allknowingroger/Qwen2.5-7B-task3", + "name": "Qwen2.5-7B-task3", + "params_b": 7.616, + "ifeval": 0.512903540383959, + "bbh": 0.5397623813486384, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.45013297872340424, + "hf_math_lvl5": 0.26057401812688824, + "hf_musr": 0.43557291666666664, + "hf_avg": 28.738760798626867 + }, + { + "hf_id": "allknowingroger/Qwen2.5-7B-task4", + "name": "Qwen2.5-7B-task4", + "params_b": 7.616, + "ifeval": 0.5005385709916355, + "bbh": 0.5583446038580263, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.45611702127659576, + "hf_math_lvl5": 0.311178247734139, + "hf_musr": 0.43954166666666666, + "hf_avg": 30.06180942847573 + }, + { + "hf_id": "allknowingroger/Qwen2.5-7B-task7", + "name": "Qwen2.5-7B-task7", + "params_b": 7.616, + "ifeval": 0.42842325030917966, + "bbh": 0.555243179835915, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.4133144946808511, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.4325625, + "hf_avg": 24.016939559246442 + }, + { + "hf_id": "allknowingroger/Qwen2.5-7B-task8", + "name": "Qwen2.5-7B-task8", + "params_b": 7.616, + "ifeval": 0.4645185884564068, + "bbh": 0.5524895381578828, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.44331781914893614, + "hf_math_lvl5": 0.3527190332326284, + "hf_musr": 0.45144791666666667, + "hf_avg": 30.109425461585875 + }, + { + "hf_id": "allknowingroger/Qwen2.5-slerp-14B", + "name": "Qwen2.5-slerp-14B", + "params_b": 14.77, + "ifeval": 0.49282016161562425, + "bbh": 0.65124197415124, + "gpqa": 0.3674496644295302, + "mmlu_pro": 0.5378989361702128, + "hf_math_lvl5": 0.4622356495468278, + "hf_musr": 0.47439583333333335, + "hf_avg": 38.16277613953164 + }, + { + "hf_id": "allknowingroger/QwenSlerp12-7B", + "name": "QwenSlerp12-7B", + "params_b": 7.616, + "ifeval": 0.5075577246151324, + "bbh": 0.5556448443090559, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.4460605053191489, + "hf_math_lvl5": 0.2945619335347432, + "hf_musr": 0.45947916666666666, + "hf_avg": 29.989027415795306 + }, + { + "hf_id": "allknowingroger/QwenSlerp6-14B", + "name": "QwenSlerp6-14B", + "params_b": 14.766, + "ifeval": 0.6866846633598851, + "bbh": 0.6384454358065165, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.5405585106382979, + "hf_math_lvl5": 0.3723564954682779, + "hf_musr": 0.46896875, + "hf_avg": 39.53457080924675 + }, + { + "hf_id": "allknowingroger/QwenStock1-14B", + "name": "QwenStock1-14B", + "params_b": 14.766, + "ifeval": 0.5634117474966422, + "bbh": 0.6528491305599156, + "gpqa": 0.3766778523489933, + "mmlu_pro": 0.5418051861702128, + "hf_math_lvl5": 0.3768882175226586, + "hf_musr": 0.47296875, + "hf_avg": 38.14564924678278 + }, + { + "hf_id": "allknowingroger/QwenStock2-14B", + "name": "QwenStock2-14B", + "params_b": 14.766, + "ifeval": 0.5563427261887348, + "bbh": 0.656885010139055, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.5405585106382979, + "hf_math_lvl5": 0.38821752265861026, + "hf_musr": 0.47560416666666666, + "hf_avg": 38.41917529577626 + }, + { + "hf_id": "allknowingroger/QwenStock3-14B", + "name": "QwenStock3-14B", + "params_b": 14.766, + "ifeval": 0.5615134509767417, + "bbh": 0.6565322062808641, + "gpqa": 0.3783557046979866, + "mmlu_pro": 0.5428025265957447, + "hf_math_lvl5": 0.3776435045317221, + "hf_musr": 0.4755729166666667, + "hf_avg": 38.32000426856688 + }, + { + "hf_id": "allknowingroger/Qwenslerp2-14B", + "name": "Qwenslerp2-14B", + "params_b": 14.77, + "ifeval": 0.5007136619724553, + "bbh": 0.6554876216007552, + "gpqa": 0.36828859060402686, + "mmlu_pro": 0.5403091755319149, + "hf_math_lvl5": 0.44561933534743203, + "hf_musr": 0.4729375, + "hf_avg": 38.08598326949303 + }, + { + "hf_id": "allknowingroger/Qwenslerp2-7B", + "name": "Qwenslerp2-7B", + "params_b": 7.616, + "ifeval": 0.5294396645345462, + "bbh": 0.5609127334788001, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.4515458776595745, + "hf_math_lvl5": 0.3421450151057402, + "hf_musr": 0.4356041666666666, + "hf_avg": 30.810469270516734 + }, + { + "hf_id": "allknowingroger/Qwenslerp3-14B", + "name": "Qwenslerp3-14B", + "params_b": 14.77, + "ifeval": 0.5052349986923584, + "bbh": 0.6520835120117142, + "gpqa": 0.375, + "mmlu_pro": 0.5394780585106383, + "hf_math_lvl5": 0.44637462235649544, + "hf_musr": 0.46760416666666665, + "hf_avg": 38.080923366964626 + }, + { + "hf_id": "allknowingroger/Qwenslerp3-7B", + "name": "Qwenslerp3-7B", + "params_b": 7.616, + "ifeval": 0.501837347127843, + "bbh": 0.5580160200086862, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.45420545212765956, + "hf_math_lvl5": 0.3217522658610272, + "hf_musr": 0.45151041666666664, + "hf_avg": 30.63274991183272 + }, + { + "hf_id": "allknowingroger/ROGERphi-7B-slerp", + "name": "ROGERphi-7B-slerp", + "params_b": 7.242, + "ifeval": 0.3861332375873793, + "bbh": 0.5195583428468424, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.3052692819148936, + "hf_math_lvl5": 0.07326283987915408, + "hf_musr": 0.46853125, + "hf_avg": 20.70747082131683 + }, + { + "hf_id": "allknowingroger/RogerMerge-7B-slerp", + "name": "RogerMerge-7B-slerp", + "params_b": 7.242, + "ifeval": 0.39330199426410817, + "bbh": 0.5160176493085935, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.30302526595744683, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.43197916666666664, + "hf_avg": 19.61773581081775 + }, + { + "hf_id": "allknowingroger/Strangecoven-7B-slerp", + "name": "Strangecoven-7B-slerp", + "params_b": 7.242, + "ifeval": 0.37464261492839, + "bbh": 0.5368022290282338, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.33643617021276595, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.4198854166666666, + "hf_avg": 20.311977055139792 + }, + { + "hf_id": "allknowingroger/WestlakeMaziyar-7B-slerp", + "name": "WestlakeMaziyar-7B-slerp", + "params_b": 7.242, + "ifeval": 0.48377748817581795, + "bbh": 0.5245479952765804, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3077626329787234, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.44738541666666665, + "hf_avg": 22.18341722963105 + }, + { + "hf_id": "allknowingroger/YamMaths-7B-slerp", + "name": "YamMaths-7B-slerp", + "params_b": 7.242, + "ifeval": 0.4148093724650594, + "bbh": 0.5155845857281723, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.3130817819148936, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.43836458333333334, + "hf_avg": 20.55230690685158 + }, + { + "hf_id": "allknowingroger/Yibuddy-35B", + "name": "Yibuddy-35B", + "params_b": 34.389, + "ifeval": 0.4234774841864032, + "bbh": 0.5916185369526096, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.44888630319148937, + "hf_math_lvl5": 0.15709969788519637, + "hf_musr": 0.45045833333333335, + "hf_avg": 28.283170568255755 + }, + { + "hf_id": "allknowingroger/Yislerp-34B", + "name": "Yislerp-34B", + "params_b": 34.389, + "ifeval": 0.3691970637907419, + "bbh": 0.6158722731484186, + "gpqa": 0.35822147651006714, + "mmlu_pro": 0.4751496010638298, + "hf_math_lvl5": 0.21601208459214502, + "hf_musr": 0.456625, + "hf_avg": 29.39892579588029 + }, + { + "hf_id": "allknowingroger/Yislerp2-34B", + "name": "Yislerp2-34B", + "params_b": 34.389, + "ifeval": 0.39994658616914236, + "bbh": 0.6245771970170245, + "gpqa": 0.3640939597315436, + "mmlu_pro": 0.472406914893617, + "hf_math_lvl5": 0.229607250755287, + "hf_musr": 0.45296875, + "hf_avg": 30.433865107288046 + }, + { + "hf_id": "allknowingroger/limyClown-7B-slerp", + "name": "limyClown-7B-slerp", + "params_b": 7.242, + "ifeval": 0.4017451473202215, + "bbh": 0.5147517317055973, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.30377327127659576, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.4293125, + "hf_avg": 19.70388869479609 + }, + { + "hf_id": "allura-org/L3.1-8b-RP-Ink", + "name": "L3.1-8b-RP-Ink", + "params_b": 8.03, + "ifeval": 0.7811063533646281, + "bbh": 0.48284724308518095, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.3427526595744681, + "hf_math_lvl5": 0.14803625377643503, + "hf_musr": 0.3608229166666667, + "hf_avg": 25.096017377265284 + }, + { + "hf_id": "allura-org/MN-12b-RP-Ink", + "name": "MN-12b-RP-Ink", + "params_b": 12.248, + "ifeval": 0.7186332265056716, + "bbh": 0.4833826588550261, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3513962765957447, + "hf_math_lvl5": 0.11858006042296072, + "hf_musr": 0.38184375000000004, + "hf_avg": 24.976661361688898 + }, + { + "hf_id": "allura-org/MS-Meadowlark-22B", + "name": "MS-Meadowlark-22B", + "params_b": 22.247, + "ifeval": 0.669698621878837, + "bbh": 0.5162576933217772, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.38231382978723405, + "hf_math_lvl5": 0.18353474320241692, + "hf_musr": 0.3842604166666667, + "hf_avg": 27.097964406963452 + }, + { + "hf_id": "allura-org/Mistral-Small-24b-Sertraline-0304", + "name": "Mistral-Small-24b-Sertraline-0304", + "params_b": 23.572, + "ifeval": 0.6799902037704402, + "bbh": 0.6524908933699552, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.5105551861702128, + "hf_math_lvl5": 0.22280966767371602, + "hf_musr": 0.4395104166666666, + "hf_avg": 35.369805475643055 + }, + { + "hf_id": "allura-org/Mistral-Small-Sisyphus-24b-2503", + "name": "Mistral-Small-Sisyphus-24b-2503", + "params_b": 23.572, + "ifeval": 0.6848362345243952, + "bbh": 0.6269790835863639, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.5127160904255319, + "hf_math_lvl5": 0.25, + "hf_musr": 0.39768749999999997, + "hf_avg": 32.50290014221418 + }, + { + "hf_id": "allura-org/MoE-Girl-1BA-7BT", + "name": "MoE-Girl-1BA-7BT", + "params_b": 6.919, + "ifeval": 0.27050337548814923, + "bbh": 0.3139175363262408, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.12175864361702128, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.34355208333333337, + "hf_avg": 6.402799107780404 + }, + { + "hf_id": "allura-org/TQ2.5-14B-Aletheia-v1", + "name": "TQ2.5-14B-Aletheia-v1", + "params_b": 14.77, + "ifeval": 0.7530297388706411, + "bbh": 0.6585074769185942, + "gpqa": 0.3624161073825503, + "mmlu_pro": 0.5241023936170213, + "hf_math_lvl5": 0.33987915407854985, + "hf_musr": 0.44515625, + "hf_avg": 39.48247192029189 + }, + { + "hf_id": "allura-org/TQ2.5-14B-Neon-v1", + "name": "TQ2.5-14B-Neon-v1", + "params_b": 14.77, + "ifeval": 0.6754189993661264, + "bbh": 0.655304131044165, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5252659574468085, + "hf_math_lvl5": 0.36027190332326287, + "hf_musr": 0.461, + "hf_avg": 39.14031038134542 + }, + { + "hf_id": "allura-org/Teleut-7b", + "name": "Teleut-7b", + "params_b": 7.616, + "ifeval": 0.6378752820294595, + "bbh": 0.5141277814496585, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.4130651595744681, + "hf_math_lvl5": 0.24093655589123866, + "hf_musr": 0.4640416666666667, + "hf_avg": 30.22949441247033 + }, + { + "hf_id": "aloobun/Meta-Llama-3-7B-28Layers", + "name": "Meta-Llama-3-7B-28Layers", + "params_b": 7.158, + "ifeval": 0.19636453498938372, + "bbh": 0.4437497014253391, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3159906914893617, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.35892708333333334, + "hf_avg": 13.375690297994408 + }, + { + "hf_id": "aloobun/d-SmolLM2-360M", + "name": "d-SmolLM2-360M", + "params_b": 0.362, + "ifeval": 0.20970358648386284, + "bbh": 0.3195784405636826, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.11693816489361702, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3980625, + "hf_avg": 6.184070904256285 + }, + { + "hf_id": "alpindale/WizardLM-2-8x22B", + "name": "WizardLM-2-8x22B", + "params_b": 140.621, + "ifeval": 0.5272166739805937, + "bbh": 0.6377307938917097, + "gpqa": 0.38171140939597314, + "mmlu_pro": 0.45960771276595747, + "hf_math_lvl5": 0.25, + "hf_musr": 0.4387083333333333, + "hf_avg": 33.059051837739325, + "aider_pass_rate": 0.278 + }, + { + "hf_id": "alpindale/magnum-72b-v1", + "name": "magnum-72b-v1", + "params_b": 72.706, + "ifeval": 0.7606484128778308, + "bbh": 0.6982215794373214, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5467918882978723, + "hf_math_lvl5": 0.39803625377643503, + "hf_musr": 0.4489375, + "hf_avg": 42.9290547455837 + }, + { + "hf_id": "altomek/YiSM-34B-0rn", + "name": "YiSM-34B-0rn", + "params_b": 34.389, + "ifeval": 0.428373382624769, + "bbh": 0.6140009573868866, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.4695811170212766, + "hf_math_lvl5": 0.2280966767371601, + "hf_musr": 0.445, + "hf_avg": 30.51201240834246 + }, + { + "hf_id": "amazon/MegaBeam-Mistral-7B-300k", + "name": "MegaBeam-Mistral-7B-300k", + "params_b": 7.242, + "ifeval": 0.520347123410329, + "bbh": 0.4227731731112974, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.2549035904255319, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.39799999999999996, + "hf_avg": 17.022470504123003 + }, + { + "hf_id": "amd/AMD-Llama-135m", + "name": "AMD-Llama-135m", + "params_b": 0.135, + "ifeval": 0.18422452426229072, + "bbh": 0.2973931917569524, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.11685505319148937, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.37796874999999996, + "hf_avg": 4.759627159992882 + }, + { + "hf_id": "amd/AMD-Llama-135m", + "name": "AMD-Llama-135m", + "params_b": 0.134, + "ifeval": 0.19184319826948054, + "bbh": 0.29694449748780255, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11685505319148937, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.38457291666666665, + "hf_avg": 5.228976558960189 + }, + { + "hf_id": "anakin87/gemma-2b-orpo", + "name": "gemma-2b-orpo", + "params_b": 2.506, + "ifeval": 0.24779695651981187, + "bbh": 0.34261709435617754, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.1305684840425532, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.37276041666666665, + "hf_avg": 7.284706228625474 + }, + { + "hf_id": "anthracite-org/magnum-v1-72b", + "name": "magnum-v1-72b", + "params_b": 72.706, + "ifeval": 0.7606484128778308, + "bbh": 0.6982215794373214, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5486203457446809, + "hf_math_lvl5": 0.39803625377643503, + "hf_musr": 0.4489375, + "hf_avg": 42.96291506867274 + }, + { + "hf_id": "anthracite-org/magnum-v2-12b", + "name": "magnum-v2-12b", + "params_b": 12.248, + "ifeval": 0.376166349729828, + "bbh": 0.5020864013200114, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.31673869680851063, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.41790625, + "hf_avg": 18.795821563358565 + }, + { + "hf_id": "anthracite-org/magnum-v2-72b", + "name": "magnum-v2-72b", + "params_b": 72.706, + "ifeval": 0.7560273407891063, + "bbh": 0.7005076514129516, + "gpqa": 0.3859060402684564, + "mmlu_pro": 0.5456283244680851, + "hf_math_lvl5": 0.3542296072507553, + "hf_musr": 0.4371875, + "hf_avg": 41.78287226692161 + }, + { + "hf_id": "anthracite-org/magnum-v2.5-12b-kto", + "name": "magnum-v2.5-12b-kto", + "params_b": 12.248, + "ifeval": 0.3865576669902525, + "bbh": 0.5076961186254344, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.3214760638297872, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.40863541666666664, + "hf_avg": 18.982789989560633 + }, + { + "hf_id": "anthracite-org/magnum-v3-27b-kto", + "name": "magnum-v3-27b-kto", + "params_b": 27.227, + "ifeval": 0.5674831668860845, + "bbh": 0.586040577894583, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.42378656914893614, + "hf_math_lvl5": 0.18126888217522658, + "hf_musr": 0.38546874999999997, + "hf_avg": 29.33708001780754 + }, + { + "hf_id": "anthracite-org/magnum-v3-34b", + "name": "magnum-v3-34b", + "params_b": 34.389, + "ifeval": 0.5115294086357531, + "bbh": 0.6087828692085228, + "gpqa": 0.36073825503355705, + "mmlu_pro": 0.47523271276595747, + "hf_math_lvl5": 0.19486404833836857, + "hf_musr": 0.3872395833333333, + "hf_avg": 29.66608133452966 + }, + { + "hf_id": "anthracite-org/magnum-v3-9b-chatml", + "name": "magnum-v3-9b-chatml", + "params_b": 9.242, + "ifeval": 0.12747066671985885, + "bbh": 0.5427688488887096, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.4242021276595745, + "hf_math_lvl5": 0.06948640483383686, + "hf_musr": 0.4432291666666666, + "hf_avg": 19.50411636926871 + }, + { + "hf_id": "anthracite-org/magnum-v3-9b-customgemma2", + "name": "magnum-v3-9b-customgemma2", + "params_b": 9.242, + "ifeval": 0.1272955757390391, + "bbh": 0.5340136936916174, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.4204621010638298, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.45646875, + "hf_avg": 19.20026712602973 + }, + { + "hf_id": "anthracite-org/magnum-v4-12b", + "name": "magnum-v4-12b", + "params_b": 12.248, + "ifeval": 0.33929640021808805, + "bbh": 0.5176693046591915, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.3603723404255319, + "hf_math_lvl5": 0.11782477341389729, + "hf_musr": 0.40928125, + "hf_avg": 20.27642686776778 + }, + { + "hf_id": "anthracite-org/magnum-v4-22b", + "name": "magnum-v4-22b", + "params_b": 22.247, + "ifeval": 0.5628620947973599, + "bbh": 0.548612004937422, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.3829787234042553, + "hf_math_lvl5": 0.2001510574018127, + "hf_musr": 0.44078124999999996, + "hf_avg": 27.854369713005507 + }, + { + "hf_id": "anthracite-org/magnum-v4-27b", + "name": "magnum-v4-27b", + "params_b": 27.227, + "ifeval": 0.34541682735142754, + "bbh": 0.5867298109891389, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.43758311170212766, + "hf_math_lvl5": 0.1797583081570997, + "hf_musr": 0.4379895833333333, + "hf_avg": 26.63300380472101 + }, + { + "hf_id": "anthracite-org/magnum-v4-9b", + "name": "magnum-v4-9b", + "params_b": 9.242, + "ifeval": 0.3502628581053826, + "bbh": 0.5336423991931557, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.3952792553191489, + "hf_math_lvl5": 0.13066465256797583, + "hf_musr": 0.45157291666666666, + "hf_avg": 23.798994622099844 + }, + { + "hf_id": "apple/DCLM-7B", + "name": "DCLM-7B", + "params_b": 7, + "ifeval": 0.21727239280664196, + "bbh": 0.42321423668184166, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.3110871010638298, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.3920729166666667, + "hf_avg": 14.112858289728544 + }, + { + "hf_id": "appvoid/arco-2", + "name": "arco-2", + "params_b": 0.514, + "ifeval": 0.19913717824261848, + "bbh": 0.31456676274830814, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.1116190159574468, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.35359375, + "hf_avg": 5.137100838868086 + }, + { + "hf_id": "appvoid/arco-2-instruct", + "name": "arco-2-instruct", + "params_b": 0.514, + "ifeval": 0.2164479137577184, + "bbh": 0.31330470624451107, + "gpqa": 0.23825503355704697, + "mmlu_pro": 0.11128656914893617, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.34959375, + "hf_avg": 5.382510586018164 + }, + { + "hf_id": "arcee-ai/Arcee-Blitz", + "name": "Arcee-Blitz", + "params_b": 23.572, + "ifeval": 0.5543435861292482, + "bbh": 0.6606628431550884, + "gpqa": 0.3850671140939597, + "mmlu_pro": 0.6153590425531915, + "hf_math_lvl5": 0.34818731117824775, + "hf_musr": 0.50471875, + "hf_avg": 40.01232729285768 + }, + { + "hf_id": "arcee-ai/Arcee-Maestro-7B-Preview", + "name": "Arcee-Maestro-7B-Preview", + "params_b": 7.613, + "ifeval": 0.2750247122080524, + "bbh": 0.4648373015709704, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.3039394946808511, + "hf_math_lvl5": 0.49924471299093653, + "hf_musr": 0.3885416666666666, + "hf_avg": 23.793130973366363 + }, + { + "hf_id": "arcee-ai/Arcee-Nova", + "name": "Arcee-Nova", + "params_b": 72.706, + "ifeval": 0.7907485471881275, + "bbh": 0.694196965855899, + "gpqa": 0.3850671140939597, + "mmlu_pro": 0.5452127659574468, + "hf_math_lvl5": 0.4380664652567976, + "hf_musr": 0.45616666666666666, + "hf_avg": 44.05339262826514 + }, + { + "hf_id": "arcee-ai/Arcee-Spark", + "name": "Arcee-Spark", + "params_b": 7.616, + "ifeval": 0.5620874834328471, + "bbh": 0.5489474198567446, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3822307180851064, + "hf_math_lvl5": 0.29531722054380666, + "hf_musr": 0.40209374999999997, + "hf_avg": 28.406546265844867 + }, + { + "hf_id": "arcee-ai/Arcee-Spark", + "name": "Arcee-Spark", + "params_b": 7.616, + "ifeval": 0.571829412625168, + "bbh": 0.5480864114714127, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.38131648936170215, + "hf_math_lvl5": 0.11404833836858004, + "hf_musr": 0.4007604166666667, + "hf_avg": 25.443168747377587 + }, + { + "hf_id": "arcee-ai/Llama-3.1-SuperNova-Lite", + "name": "Llama-3.1-SuperNova-Lite", + "params_b": 8.03, + "ifeval": 0.8017393848322452, + "bbh": 0.5151992115104819, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.3877160904255319, + "hf_math_lvl5": 0.18277945619335348, + "hf_musr": 0.41632291666666665, + "hf_avg": 30.193463980461605 + }, + { + "hf_id": "arcee-ai/Llama-Spark", + "name": "Llama-Spark", + "params_b": 8.03, + "ifeval": 0.7910732412221794, + "bbh": 0.5053504145749979, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.3720910904255319, + "hf_math_lvl5": 0.13897280966767372, + "hf_musr": 0.35933333333333334, + "hf_avg": 27.037236901530367 + }, + { + "hf_id": "arcee-ai/SuperNova-Medius", + "name": "SuperNova-Medius", + "params_b": 14.77, + "ifeval": 0.7183584001560305, + "bbh": 0.6377284463115707, + "gpqa": 0.33305369127516776, + "mmlu_pro": 0.5034906914893617, + "hf_math_lvl5": 0.4690332326283988, + "hf_musr": 0.4232708333333333, + "hf_avg": 39.15430388277812 + }, + { + "hf_id": "arcee-ai/Virtuoso-Lite", + "name": "Virtuoso-Lite", + "params_b": 10.306, + "ifeval": 0.8099575792231279, + "bbh": 0.6098520975127147, + "gpqa": 0.34395973154362414, + "mmlu_pro": 0.4440658244680851, + "hf_math_lvl5": 0.25302114803625375, + "hf_musr": 0.4595416666666667, + "hf_avg": 36.41610644523877 + }, + { + "hf_id": "arcee-ai/Virtuoso-Small", + "name": "Virtuoso-Small", + "params_b": 14.77, + "ifeval": 0.7935211904413622, + "bbh": 0.6517633129454784, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.5191156914893617, + "hf_math_lvl5": 0.4093655589123867, + "hf_musr": 0.43390625, + "hf_avg": 40.53607757653 + }, + { + "hf_id": "arcee-ai/Virtuoso-Small-v2", + "name": "Virtuoso-Small-v2", + "params_b": 14.766, + "ifeval": 0.8273181824226385, + "bbh": 0.6554097094586643, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.518783244680851, + "hf_math_lvl5": 0.466012084592145, + "hf_musr": 0.43133333333333335, + "hf_avg": 42.475701925930025 + }, + { + "hf_id": "arcee-ai/raspberry-3B", + "name": "raspberry-3B", + "params_b": 3.086, + "ifeval": 0.31541642840995227, + "bbh": 0.42689280188827033, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.285405585106383, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.41232291666666665, + "hf_avg": 15.852706041886336 + }, + { + "hf_id": "argilla/notus-7b-v1", + "name": "notus-7b-v1", + "params_b": 7.242, + "ifeval": 0.508207112683236, + "bbh": 0.4511857407381495, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3003656914893617, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.33641666666666664, + "hf_avg": 18.474261772943255 + }, + { + "hf_id": "argilla/notux-8x7b-v1", + "name": "notux-8x7b-v1", + "params_b": 46.703, + "ifeval": 0.5422290633297429, + "bbh": 0.5363304164516353, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.3660239361702128, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.41759375, + "hf_avg": 24.47858356291764 + }, + { + "hf_id": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", + "name": "Llama-3.1-8B-MagPie-Ultra", + "params_b": 8.03, + "ifeval": 0.5756514935925566, + "bbh": 0.46196134634468616, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.31441156914893614, + "hf_math_lvl5": 0.0770392749244713, + "hf_musr": 0.35425, + "hf_avg": 19.84899133862277 + }, + { + "hf_id": "arisin/orca-platypus-13B-slerp", + "name": "orca-platypus-13B-slerp", + "params_b": 13.016, + "ifeval": 0.26718107953563214, + "bbh": 0.46306234976954946, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.2592253989361702, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.4253125, + "hf_avg": 14.79190764445101 + }, + { + "hf_id": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", + "name": "LLAMA_Harsha_8_B_ORDP_10k", + "params_b": 8.03, + "ifeval": 0.34639090945358314, + "bbh": 0.4668707690948544, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.281000664893617, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.36965625, + "hf_avg": 16.221361486666726 + }, + { + "hf_id": "ashercn97/a1-v002", + "name": "a1-v002", + "params_b": 7.616, + "ifeval": 0.2584631001298776, + "bbh": 0.5261137844506322, + "gpqa": 0.3187919463087248, + "mmlu_pro": 0.41747007978723405, + "hf_math_lvl5": 0.23413897280966767, + "hf_musr": 0.41591666666666666, + "hf_avg": 22.8816161973256 + }, + { + "hf_id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", + "name": "Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", + "params_b": 8.03, + "ifeval": 0.4521037513796726, + "bbh": 0.4939066588253951, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.3564660904255319, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.3863958333333333, + "hf_avg": 20.968535229566395 + }, + { + "hf_id": "automerger/YamshadowExperiment28-7B", + "name": "YamshadowExperiment28-7B", + "params_b": 7.242, + "ifeval": 0.4070156074770498, + "bbh": 0.5150030227855061, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.30601728723404253, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.4306145833333333, + "hf_avg": 19.884270029585778 + }, + { + "hf_id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", + "name": "GRAG-NEMO-12B-ORPO-HESSIAN-AI", + "params_b": 12.248, + "bbh": 0.26065954545866094, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.10605053191489362, + "hf_musr": 0.3446666666666667, + "hf_avg": 0.7378506308720487 + }, + { + "hf_id": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", + "name": "Mistral-7B-v0.1-signtensors-1-over-2", + "params_b": 7.242, + "ifeval": 0.21792178087474567, + "bbh": 0.4422884892437673, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.2999501329787234, + "hf_math_lvl5": 0.033987915407854986, + "hf_musr": 0.40060416666666665, + "hf_avg": 14.370486963136566 + }, + { + "hf_id": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", + "name": "Mistral-7B-v0.1-signtensors-1-over-4", + "params_b": 7, + "ifeval": 0.2133007087860211, + "bbh": 0.35070947402846286, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.2310505319148936, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.34603125, + "hf_avg": 8.747197547964134 + }, + { + "hf_id": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", + "name": "Mistral-7B-v0.1-signtensors-3-over-8", + "params_b": 7.242, + "ifeval": 0.23942915907569692, + "bbh": 0.4299940969601492, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.30011635638297873, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.38175000000000003, + "hf_avg": 13.813468900343905 + }, + { + "hf_id": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", + "name": "Mistral-7B-v0.1-signtensors-5-over-16", + "params_b": 7.242, + "ifeval": 0.21182684166899385, + "bbh": 0.4124151161773006, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.29579454787234044, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.3686041666666667, + "hf_avg": 12.28452908669469 + }, + { + "hf_id": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", + "name": "Mistral-7B-v0.1-signtensors-7-over-16", + "params_b": 7.242, + "ifeval": 0.22936253584932426, + "bbh": 0.43158208189876196, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.30302526595744683, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.39520833333333333, + "hf_avg": 14.246704735704872 + }, + { + "hf_id": "aws-prototyping/MegaBeam-Mistral-7B-512k", + "name": "MegaBeam-Mistral-7B-512k", + "params_b": 7.242, + "ifeval": 0.5972586071623293, + "bbh": 0.3662336639946533, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.25889295212765956, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.3993645833333333, + "hf_avg": 17.582481722488744 + }, + { + "hf_id": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", + "name": "romulus-mistral-nemo-12b-simpo", + "params_b": 12.248, + "ifeval": 0.607924750772395, + "bbh": 0.5395057669562011, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.3469082446808511, + "hf_math_lvl5": 0.11404833836858005, + "hf_musr": 0.42330208333333336, + "hf_avg": 25.176086393973254 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B", + "name": "MISCHIEVOUS-12B", + "params_b": 12.248, + "ifeval": 0.3851835352420466, + "bbh": 0.5404981575206657, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.3671875, + "hf_math_lvl5": 0.12764350453172205, + "hf_musr": 0.4144895833333333, + "hf_avg": 22.61927817846981 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", + "name": "MISCHIEVOUS-12B-Mix_0.1v", + "params_b": 12.248, + "ifeval": 0.36362628935668473, + "bbh": 0.5436022524587655, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.3673537234042553, + "hf_math_lvl5": 0.13293051359516617, + "hf_musr": 0.41315624999999995, + "hf_avg": 22.611076276517252 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", + "name": "MISCHIEVOUS-12B-Mix_0.2v", + "params_b": 12.248, + "ifeval": 0.3623773809048879, + "bbh": 0.5434355857920987, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.36627327127659576, + "hf_math_lvl5": 0.12613293051359517, + "hf_musr": 0.41582291666666665, + "hf_avg": 22.454256608568887 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", + "name": "MISCHIEVOUS-12B-Mix_0.3v", + "params_b": 12.248, + "ifeval": 0.38698209639312575, + "bbh": 0.5431389316665282, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.3663563829787234, + "hf_math_lvl5": 0.1336858006042296, + "hf_musr": 0.41312499999999996, + "hf_avg": 22.795688006734604 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", + "name": "MISCHIEVOUS-12B-Mix_0.4v", + "params_b": 12.248, + "ifeval": 0.6508142838778884, + "bbh": 0.5094241395384186, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.36826795212765956, + "hf_math_lvl5": 0.1351963746223565, + "hf_musr": 0.41762499999999997, + "hf_avg": 26.653906427315196 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", + "name": "MISCHIEVOUS-12B-Mix_0.5v", + "params_b": 12.248, + "ifeval": 0.3745672593163916, + "bbh": 0.5421932988679541, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.36610704787234044, + "hf_math_lvl5": 0.13670694864048338, + "hf_musr": 0.41315624999999995, + "hf_avg": 22.635396518839368 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", + "name": "MISCHIEVOUS-12B-Mix_0.6v", + "params_b": 12.248, + "ifeval": 0.43656608908806416, + "bbh": 0.5448909065942131, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.3661901595744681, + "hf_math_lvl5": 0.12537764350453173, + "hf_musr": 0.4184895833333333, + "hf_avg": 23.874344518139583 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", + "name": "MISCHIEVOUS-12B-Mix_III_IV_V", + "params_b": 12.248, + "ifeval": 0.40309379114083965, + "bbh": 0.54645347832278, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.3664394946808511, + "hf_math_lvl5": 0.12915407854984895, + "hf_musr": 0.41982291666666666, + "hf_avg": 23.23673015528421 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", + "name": "MISCHIEVOUS-12B-Mix_III_ex_V", + "params_b": 12.248, + "ifeval": 0.43162032296528763, + "bbh": 0.5448926891254073, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.3648603723404255, + "hf_math_lvl5": 0.13217522658610273, + "hf_musr": 0.4197916666666666, + "hf_avg": 23.80784193287211 + }, + { + "hf_id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", + "name": "MISCHIEVOUS-12B-Mix_Neo", + "params_b": 12.248, + "ifeval": 0.6249606599378538, + "bbh": 0.5077574728717519, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.36851728723404253, + "hf_math_lvl5": 0.13293051359516617, + "hf_musr": 0.41502083333333334, + "hf_avg": 26.077670184051115 + }, + { + "hf_id": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", + "name": "Mistral-Nemo-VICIOUS_MESH-12B-2407", + "params_b": 12.248, + "ifeval": 0.6705729686121713, + "bbh": 0.5155964285724085, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.36768617021276595, + "hf_math_lvl5": 0.13670694864048338, + "hf_musr": 0.4309895833333333, + "hf_avg": 27.48211728638603 + }, + { + "hf_id": "bamec66557/NameLess-12B-prob", + "name": "NameLess-12B-prob", + "params_b": 12.248, + "ifeval": 0.6602315190361574, + "bbh": 0.5158141019151304, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.3684341755319149, + "hf_math_lvl5": 0.12613293051359517, + "hf_musr": 0.433625, + "hf_avg": 27.18904881817572 + }, + { + "hf_id": "bamec66557/VICIOUS_MESH-12B-ALPHA", + "name": "VICIOUS_MESH-12B-ALPHA", + "params_b": 12.248, + "ifeval": 0.6365011502812536, + "bbh": 0.5093679898057982, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.3696808510638298, + "hf_math_lvl5": 0.13670694864048338, + "hf_musr": 0.4202916666666667, + "hf_avg": 26.45551197855697 + }, + { + "hf_id": "bamec66557/VICIOUS_MESH-12B-BETA", + "name": "VICIOUS_MESH-12B-BETA", + "params_b": 12.248, + "ifeval": 0.6720967034136092, + "bbh": 0.5155964285724085, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.36785239361702127, + "hf_math_lvl5": 0.13293051359516617, + "hf_musr": 0.4309895833333333, + "hf_avg": 27.466293297388987 + }, + { + "hf_id": "bamec66557/VICIOUS_MESH-12B-GAMMA", + "name": "VICIOUS_MESH-12B-GAMMA", + "params_b": 12.248, + "ifeval": 0.6361764562472019, + "bbh": 0.5181908355069679, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.3666057180851064, + "hf_math_lvl5": 0.13066465256797583, + "hf_musr": 0.43632291666666667, + "hf_avg": 26.916848818972074 + }, + { + "hf_id": "bamec66557/VICIOUS_MESH-12B-NEMO", + "name": "VICIOUS_MESH-12B-NEMO", + "params_b": 12.248, + "ifeval": 0.40221944440750546, + "bbh": 0.5441680901949261, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.37159242021276595, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.42506249999999995, + "hf_avg": 23.316444114380346 + }, + { + "hf_id": "bamec66557/VICIOUS_MESH-12B-OMEGA", + "name": "VICIOUS_MESH-12B-OMEGA", + "params_b": 12.248, + "ifeval": 0.6699734482284783, + "bbh": 0.516644373777888, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.36768617021276595, + "hf_math_lvl5": 0.13444108761329304, + "hf_musr": 0.43232291666666667, + "hf_avg": 27.49554497317065 + }, + { + "hf_id": "benhaotang/phi4-qwq-sky-t1", + "name": "phi4-qwq-sky-t1", + "params_b": 14.66, + "ifeval": 0.04596249063595704, + "bbh": 0.6710520703782934, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5244348404255319, + "hf_math_lvl5": 0.41012084592145015, + "hf_musr": 0.48995833333333333, + "hf_avg": 31.01825262068425 + }, + { + "hf_id": "beomi/gemma-mling-7b", + "name": "gemma-mling-7b", + "params_b": 8.538, + "ifeval": 0.20290939152559653, + "bbh": 0.40675941947154004, + "gpqa": 0.25, + "mmlu_pro": 0.2632978723404255, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.37585416666666666, + "hf_avg": 11.392173694644192 + }, + { + "hf_id": "beowolx/CodeNinja-1.0-OpenChat-7B", + "name": "CodeNinja-1.0-OpenChat-7B", + "params_b": 7.242, + "ifeval": 0.5446770125489258, + "bbh": 0.4441338669403703, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3015292553191489, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.42432291666666666, + "hf_avg": 20.460682433184903 + }, + { + "hf_id": "berkeley-nest/Starling-LM-7B-alpha", + "name": "Starling-LM-7B-alpha", + "params_b": 7.242, + "ifeval": 0.5480491761858536, + "bbh": 0.4440065261164004, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3171542553191489, + "hf_math_lvl5": 0.08383685800604229, + "hf_musr": 0.41201041666666666, + "hf_avg": 20.83936104726783, + "arena_elo": 1167.52, + "arena_rank": 268, + "arena_votes": 10224 + }, + { + "hf_id": "bfuzzy1/acheron-m", + "name": "acheron-m", + "params_b": 0.514, + "ifeval": 0.17583123889058808, + "bbh": 0.29284447696551025, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11128656914893617, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.3486666666666667, + "hf_avg": 4.225197951458999 + }, + { + "hf_id": "bfuzzy1/acheron-m1a-llama", + "name": "acheron-m1a-llama", + "params_b": 0.514, + "ifeval": 0.11245827737070972, + "bbh": 0.29560475093811295, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11461103723404255, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.36330208333333336, + "hf_avg": 3.348613471275291 + }, + { + "hf_id": "bfuzzy1/llambses-1", + "name": "llambses-1", + "params_b": 7.242, + "ifeval": 0.3553837152089788, + "bbh": 0.5046977405175623, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.31399601063829785, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.45290625, + "hf_avg": 19.83707295838704 + }, + { + "hf_id": "bhuvneshsaini/merged_model", + "name": "merged_model", + "params_b": 4.715, + "ifeval": 0.1812767900282362, + "bbh": 0.3359777949071243, + "gpqa": 0.25, + "mmlu_pro": 0.14453125, + "hf_musr": 0.34971875, + "hf_avg": 5.795748828202385 + }, + { + "hf_id": "bigcode/starcoder2-15b", + "name": "starcoder2-15b", + "params_b": 15.958, + "ifeval": 0.2780223141265177, + "bbh": 0.4447957841230437, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.23528922872340424, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.35009375000000004, + "hf_avg": 12.539175421645837 + }, + { + "hf_id": "bigcode/starcoder2-3b", + "name": "starcoder2-3b", + "params_b": 3.03, + "ifeval": 0.20370838264693236, + "bbh": 0.35087141384601755, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.1636469414893617, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.34345833333333337, + "hf_avg": 6.549147626379535 + }, + { + "hf_id": "bigcode/starcoder2-7b", + "name": "starcoder2-7b", + "params_b": 7.174, + "ifeval": 0.22091938279321088, + "bbh": 0.36609857669123036, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.16422872340425532, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.3793333333333333, + "hf_avg": 8.2934383764798 + }, + { + "hf_id": "bigscience/bloom-1b1", + "name": "bloom-1b1", + "params_b": 1.065, + "ifeval": 0.13733781920858879, + "bbh": 0.31072762377370394, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.1107878989361702, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.36999999999999994, + "hf_avg": 4.025155876068456 + }, + { + "hf_id": "bigscience/bloom-1b7", + "name": "bloom-1b7", + "params_b": 1.722, + "ifeval": 0.10438968603305895, + "bbh": 0.314054919904072, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.10862699468085106, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.38857291666666666, + "hf_avg": 4.046754480742192 + }, + { + "hf_id": "bigscience/bloom-3b", + "name": "bloom-3b", + "params_b": 3.003, + "ifeval": 0.1270961050013963, + "bbh": 0.3062918592346337, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.11328125, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.3980625, + "hf_avg": 4.387894128649155 + }, + { + "hf_id": "bigscience/bloom-560m", + "name": "bloom-560m", + "params_b": 0.559, + "ifeval": 0.06202431769926019, + "bbh": 0.3025950541549823, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.11643949468085106, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.4030833333333333, + "hf_avg": 3.50724359916236 + }, + { + "hf_id": "bigscience/bloom-7b1", + "name": "bloom-7b1", + "params_b": 7.069, + "ifeval": 0.13221696210499254, + "bbh": 0.3113718529627139, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.11045545212765957, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.34869791666666666, + "hf_avg": 3.795510241848182 + }, + { + "hf_id": "bond005/meno-tiny-0.1", + "name": "meno-tiny-0.1", + "params_b": 1.544, + "ifeval": 0.45497613000172876, + "bbh": 0.4262909130965971, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.2785904255319149, + "hf_math_lvl5": 0.13897280966767372, + "hf_musr": 0.4184583333333333, + "hf_avg": 18.850916827026232 + }, + { + "hf_id": "bosonai/Higgs-Llama-3-70B", + "name": "Higgs-Llama-3-70B", + "params_b": 70.554, + "ifeval": 0.5560678998390935, + "bbh": 0.625765879603832, + "gpqa": 0.36661073825503354, + "mmlu_pro": 0.49019281914893614, + "hf_math_lvl5": 0.25226586102719034, + "hf_musr": 0.44708333333333333, + "hf_avg": 33.525397972968115 + }, + { + "hf_id": "braindao/DeepSeek-R1-Distill-Qwen-14B", + "name": "DeepSeek-R1-Distill-Qwen-14B", + "params_b": 14.77, + "ifeval": 0.4171575863154209, + "bbh": 0.30329653176003074, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.11269946808510638, + "hf_math_lvl5": 0.17598187311178248, + "hf_musr": 0.4487916666666667, + "hf_avg": 13.942561374741304 + }, + { + "hf_id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", + "name": "DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", + "params_b": 14.77, + "ifeval": 0.3751922676276723, + "bbh": 0.4926903187457697, + "gpqa": 0.3447986577181208, + "mmlu_pro": 0.42428523936170215, + "hf_math_lvl5": 0.5015105740181269, + "hf_musr": 0.4220625, + "hf_avg": 29.311295170279617 + }, + { + "hf_id": "braindao/DeepSeek-R1-Distill-Qwen-7B", + "name": "DeepSeek-R1-Distill-Qwen-7B", + "params_b": 7.616, + "ifeval": 0.39679938119744496, + "bbh": 0.2886778102988436, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.1141123670212766, + "hf_math_lvl5": 0.19184290030211482, + "hf_musr": 0.37666666666666665, + "hf_avg": 11.398600284940265 + }, + { + "hf_id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", + "name": "DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", + "params_b": 7.616, + "ifeval": 0.3654503384353515, + "bbh": 0.2958444769655102, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.11328125, + "hf_math_lvl5": 0.17371601208459214, + "hf_musr": 0.38460416666666664, + "hf_avg": 10.72103457820772 + }, + { + "hf_id": "braindao/Qwen2.5-14B", + "name": "Qwen2.5-14B", + "params_b": 14.77, + "ifeval": 0.540854931581537, + "bbh": 0.5852660409288039, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.48836436170212766, + "hf_math_lvl5": 0.29229607250755285, + "hf_musr": 0.41235416666666663, + "hf_avg": 32.436238941896505 + }, + { + "hf_id": "braindao/iq-code-evmind-0.5b", + "name": "iq-code-evmind-0.5b", + "params_b": 0.494, + "ifeval": 0.3215612353001148, + "bbh": 0.31637440507987097, + "gpqa": 0.24161073825503357, + "mmlu_pro": 0.11893284574468085, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.33037500000000003, + "hf_avg": 7.0224136272788416 + }, + { + "hf_id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", + "name": "3Bgeneral-ECE-PRYMMAL-Martial", + "params_b": 3.821, + "ifeval": 0.32893057088525113, + "bbh": 0.5458008312900208, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.3933676861702128, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.43728125, + "hf_avg": 23.281187153244208 + }, + { + "hf_id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", + "name": "3Bgeneralv2-ECE-PRYMMAL-Martial", + "params_b": 3, + "ifeval": 0.567708125551315, + "bbh": 0.5607195549186694, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.45054853723404253, + "hf_math_lvl5": 0.3496978851963746, + "hf_musr": 0.43563541666666666, + "hf_avg": 31.48239660410299 + }, + { + "hf_id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", + "name": "3Blareneg-ECE-PRYMMAL-Martial", + "params_b": 3.821, + "ifeval": 0.28763902002242936, + "bbh": 0.535846215598753, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.4015957446808511, + "hf_math_lvl5": 0.12084592145015106, + "hf_musr": 0.4428958333333333, + "hf_avg": 22.756317201307535 + }, + { + "hf_id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", + "name": "3Blarenegv2-ECE-PRYMMAL-Martial", + "params_b": 7.616, + "ifeval": 0.5661843907498769, + "bbh": 0.5607195549186694, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.45054853723404253, + "hf_math_lvl5": 0.3496978851963746, + "hf_musr": 0.43563541666666666, + "hf_avg": 31.457001024079023 + }, + { + "hf_id": "bunnycore/HyperLlama-3.1-8B", + "name": "HyperLlama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.7883005979689446, + "bbh": 0.5103385292046213, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.3783244680851064, + "hf_math_lvl5": 0.18277945619335348, + "hf_musr": 0.38292708333333336, + "hf_avg": 28.44897570570595 + }, + { + "hf_id": "bunnycore/Phi-4-Model-Stock", + "name": "Phi-4-Model-Stock", + "params_b": 14.66, + "ifeval": 0.6878837041272712, + "bbh": 0.6889699980822082, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.5368184840425532, + "hf_math_lvl5": 0.4297583081570997, + "hf_musr": 0.44413541666666667, + "hf_avg": 40.7857160927999 + }, + { + "hf_id": "bunnycore/Phi-4-RP-v0", + "name": "Phi-4-RP-v0", + "params_b": 14.66, + "ifeval": 0.6827129793392643, + "bbh": 0.685633603278299, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.5364029255319149, + "hf_math_lvl5": 0.33157099697885195, + "hf_musr": 0.41409375, + "hf_avg": 38.21180801915013 + }, + { + "hf_id": "bunnycore/Phi-4-ReasoningRP", + "name": "Phi-4-ReasoningRP", + "params_b": 14.66, + "ifeval": 0.6736204382150472, + "bbh": 0.6922187070022994, + "gpqa": 0.34395973154362414, + "mmlu_pro": 0.5420545212765957, + "hf_math_lvl5": 0.4569486404833837, + "hf_musr": 0.44909375, + "hf_avg": 40.953870582332804 + }, + { + "hf_id": "bunnycore/Phi-4-Stock-RP", + "name": "Phi-4-Stock-RP", + "params_b": 14.66, + "ifeval": 0.6399231816025922, + "bbh": 0.6859633715492438, + "gpqa": 0.35822147651006714, + "mmlu_pro": 0.5316655585106383, + "hf_math_lvl5": 0.3413897280966767, + "hf_musr": 0.47147916666666667, + "hf_avg": 39.044084455528825 + }, + { + "hf_id": "bunnycore/SmolLM2-1.7-Persona", + "name": "SmolLM2-1.7-Persona", + "params_b": 1.711, + "ifeval": 0.5465254413844156, + "bbh": 0.3623213930905173, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.1973902925531915, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.334125, + "hf_avg": 14.527349011570474 + }, + { + "hf_id": "bunnycore/SmolLM2-1.7B-roleplay-lora", + "name": "SmolLM2-1.7B-roleplay-lora", + "params_b": 3.423, + "ifeval": 0.5382075116247114, + "bbh": 0.3610343412303005, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.19664228723404256, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.33945833333333336, + "hf_avg": 14.47905997638928 + }, + { + "hf_id": "byroneverson/Mistral-Small-Instruct-2409-abliterated", + "name": "Mistral-Small-Instruct-2409-abliterated", + "params_b": 22.247, + "ifeval": 0.6970759806203096, + "bbh": 0.5237864400325174, + "gpqa": 0.33305369127516776, + "mmlu_pro": 0.39228723404255317, + "hf_math_lvl5": 0.24773413897280966, + "hf_musr": 0.36971875000000004, + "hf_avg": 28.805845718641834 + }, + { + "hf_id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", + "name": "Yi-1.5-9B-Chat-16K-abliterated", + "params_b": 8.829, + "ifeval": 0.5528453392553979, + "bbh": 0.5282050829986801, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.38231382978723405, + "hf_math_lvl5": 0.14123867069486404, + "hf_musr": 0.4734375, + "hf_avg": 26.948135419447976 + }, + { + "hf_id": "byroneverson/Yi-1.5-9B-Chat-abliterated", + "name": "Yi-1.5-9B-Chat-abliterated", + "params_b": 8.829, + "ifeval": 0.5723291976400395, + "bbh": 0.5401219363002313, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.3715093085106383, + "hf_math_lvl5": 0.1661631419939577, + "hf_musr": 0.43886458333333334, + "hf_avg": 26.270006043497983 + }, + { + "hf_id": "carsenk/flippa-v6", + "name": "flippa-v6", + "params_b": 16.061, + "ifeval": 0.3439429602344003, + "bbh": 0.5046972457053399, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3667719414893617, + "hf_math_lvl5": 0.1404833836858006, + "hf_musr": 0.40887500000000004, + "hf_avg": 20.776367026140395 + }, + { + "hf_id": "carsenk/phi3.5_mini_exp_825_uncensored", + "name": "phi3.5_mini_exp_825_uncensored", + "params_b": 3.821, + "ifeval": 0.13641360479084386, + "bbh": 0.29647345147918264, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11751994680851063, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.36441666666666667, + "hf_avg": 3.6431087408611895 + }, + { + "hf_id": "cckm/tinymistral_950m", + "name": "tinymistral_950m", + "params_b": 0.955, + "ifeval": 0.23952889444451833, + "bbh": 0.29694562621388126, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.10962433510638298, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.3553645833333334, + "hf_avg": 5.219822920055284 + }, + { + "hf_id": "cgato/TheSalt-L3-8b-v0.3.2", + "name": "TheSalt-L3-8b-v0.3.2", + "params_b": 8.03, + "ifeval": 0.27050337548814923, + "bbh": 0.29679653176003074, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.11394614361702128, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.38962499999999994, + "hf_avg": 7.39988938907955 + }, + { + "hf_id": "chargoddard/prometheus-2-llama-3-8b", + "name": "prometheus-2-llama-3-8b", + "params_b": 8.03, + "ifeval": 0.5288900118352637, + "bbh": 0.4931144581470071, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.30867686170212766, + "hf_math_lvl5": 0.0823262839879154, + "hf_musr": 0.33958333333333335, + "hf_avg": 19.318861828334345 + }, + { + "hf_id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", + "name": "Llama-3-Instruct-8B-SimPO-ExPO", + "params_b": 8.03, + "ifeval": 0.6433707008515184, + "bbh": 0.4764515968840137, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.340093085106383, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.3920104166666667, + "hf_avg": 23.054922214082467 + }, + { + "hf_id": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", + "name": "Mistral7B-PairRM-SPPO-ExPO", + "params_b": 7.242, + "ifeval": 0.36734863495525205, + "bbh": 0.3882191262277366, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.2551529255319149, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.40553124999999995, + "hf_avg": 13.617148888995521 + }, + { + "hf_id": "cjvt/GaMS-1B", + "name": "GaMS-1B", + "params_b": 1.54, + "ifeval": 0.163541625110263, + "bbh": 0.3074752552734472, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11486037234042554, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.36841666666666667, + "hf_avg": 4.6217597962340875 + }, + { + "hf_id": "cloudyu/Llama-3-70Bx2-MOE", + "name": "Llama-3-70Bx2-MOE", + "params_b": 126.926, + "ifeval": 0.5482486469234964, + "bbh": 0.6636234572270707, + "gpqa": 0.3934563758389262, + "mmlu_pro": 0.5142121010638298, + "hf_math_lvl5": 0.2175226586102719, + "hf_musr": 0.48118750000000005, + "hf_avg": 35.66646489034437 + }, + { + "hf_id": "cloudyu/Mixtral_11Bx2_MoE_19B", + "name": "Mixtral_11Bx2_MoE_19B", + "params_b": 19.188, + "ifeval": 0.3850837998732253, + "bbh": 0.5208516020145867, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.33111702127659576, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.4296875, + "hf_avg": 20.407079261242234 + }, + { + "hf_id": "cloudyu/Mixtral_34Bx2_MoE_60B", + "name": "Mixtral_34Bx2_MoE_60B", + "params_b": 60.814, + "ifeval": 0.4537770892343427, + "bbh": 0.5869701263465353, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.47664561170212766, + "hf_math_lvl5": 0.0770392749244713, + "hf_musr": 0.4625208333333333, + "hf_avg": 27.611169192851037 + }, + { + "hf_id": "cloudyu/Mixtral_7Bx2_MoE", + "name": "Mixtral_7Bx2_MoE", + "params_b": 12.879, + "ifeval": 0.4480068440626427, + "bbh": 0.5159732691655027, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.30435505319148937, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.44729166666666664, + "hf_avg": 21.447315990602657 + }, + { + "hf_id": "cloudyu/S1-Llama-3.2-3Bx4-MoE", + "name": "S1-Llama-3.2-3Bx4-MoE", + "params_b": 9.555, + "ifeval": 0.530214275899059, + "bbh": 0.43578925882973, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.30435505319148937, + "hf_math_lvl5": 0.12009063444108761, + "hf_musr": 0.345625, + "hf_avg": 19.960751542556377 + }, + { + "hf_id": "cloudyu/Yi-34Bx2-MoE-60B-DPO", + "name": "Yi-34Bx2-MoE-60B-DPO", + "params_b": 60.814, + "ifeval": 0.531887613753729, + "bbh": 0.516831447641953, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.46766954787234044, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.43746875, + "hf_avg": 26.04350240565636 + }, + { + "hf_id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", + "name": "Llama-3.1-8B-paraphrase-type-generation-apty-ipo", + "params_b": 8.03, + "ifeval": 0.1326668794354535, + "bbh": 0.3800219303191354, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.2590591755319149, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.43321875, + "hf_avg": 10.051762435192133 + }, + { + "hf_id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", + "name": "Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", + "params_b": 8.03, + "ifeval": 0.13184240038652995, + "bbh": 0.37889016032903705, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.2562333776595745, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.43055208333333334, + "hf_avg": 10.070699081571052 + }, + { + "hf_id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", + "name": "Llama-3.1-8B-paraphrase-type-generation-etpc", + "params_b": 8.03, + "ifeval": 0.12085156274241235, + "bbh": 0.3780811415223316, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.25556848404255317, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.43185416666666665, + "hf_avg": 9.681788175038944 + }, + { + "hf_id": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", + "name": "Dolphin3.0-Llama3.1-8B", + "params_b": 8.03, + "ifeval": 0.7621222799948582, + "bbh": 0.4916366353921198, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.2992021276595745, + "hf_math_lvl5": 0.12311178247734139, + "hf_musr": 0.36534375, + "hf_avg": 25.26984394173985 + }, + { + "hf_id": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", + "name": "Dolphin3.0-Llama3.2-1B", + "params_b": 1.236, + "ifeval": 0.5427787160290252, + "bbh": 0.31222474255909144, + "gpqa": 0.22986577181208054, + "mmlu_pro": 0.13754986702127658, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.32488541666666665, + "hf_avg": 11.140988411587436 + }, + { + "hf_id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", + "name": "Dolphin3.0-Qwen2.5-0.5B", + "params_b": 0.494, + "ifeval": 0.4697136930012367, + "bbh": 0.31142229157184026, + "gpqa": 0.2348993288590604, + "mmlu_pro": 0.14128989361702127, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.35545833333333327, + "hf_avg": 10.626273337296174 + }, + { + "hf_id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", + "name": "Dolphin3.0-R1-Mistral-24B", + "params_b": 23.572, + "ifeval": 0.406816136739407, + "bbh": 0.5359697041031141, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.300531914893617, + "hf_math_lvl5": 0.3119335347432024, + "hf_musr": 0.3951770833333333, + "hf_avg": 23.513141611997387 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9-llama3-8b", + "name": "dolphin-2.9-llama3-8b", + "params_b": 8.03, + "ifeval": 0.38503393218881454, + "bbh": 0.49499220166609187, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.277094414893617, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.43753125, + "hf_avg": 18.415461238797384 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", + "name": "dolphin-2.9.1-llama-3-70b", + "params_b": 70.554, + "ifeval": 0.3760167466765959, + "bbh": 0.5204919312821467, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.41298204787234044, + "hf_math_lvl5": 0.18202416918429004, + "hf_musr": 0.49756249999999996, + "hf_avg": 25.53438611403665 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", + "name": "dolphin-2.9.1-yi-1.5-34b", + "params_b": 34.389, + "ifeval": 0.3852588908540451, + "bbh": 0.6076225600626862, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.4518783244680851, + "hf_math_lvl5": 0.1865558912386707, + "hf_musr": 0.45979166666666665, + "hf_avg": 28.3072040459155 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", + "name": "dolphin-2.9.1-yi-1.5-9b", + "params_b": 8.829, + "ifeval": 0.44653297694561545, + "bbh": 0.5484314644603556, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.3966921542553192, + "hf_math_lvl5": 0.15181268882175228, + "hf_musr": 0.4348020833333333, + "hf_avg": 25.639724124308234 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", + "name": "dolphin-2.9.2-Phi-3-Medium", + "params_b": -1, + "ifeval": 0.4247762603226107, + "bbh": 0.6456739302686527, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.45553523936170215, + "hf_math_lvl5": 0.18277945619335348, + "hf_musr": 0.4190520833333333, + "hf_avg": 28.614516488633864 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", + "name": "dolphin-2.9.2-Phi-3-Medium-abliterated", + "params_b": 13.96, + "ifeval": 0.36125369574950017, + "bbh": 0.612322545411745, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.4493849734042553, + "hf_math_lvl5": 0.12386706948640483, + "hf_musr": 0.4111770833333333, + "hf_avg": 25.590063720348784 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", + "name": "dolphin-2.9.2-Phi-3-Medium-abliterated", + "params_b": 13.96, + "ifeval": 0.4123614232458765, + "bbh": 0.638289226729353, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.45246010638297873, + "hf_math_lvl5": 0.18202416918429004, + "hf_musr": 0.43492708333333335, + "hf_avg": 28.53887227810586 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", + "name": "dolphin-2.9.2-qwen2-72b", + "params_b": 72, + "ifeval": 0.6343778950961227, + "bbh": 0.6296364939584073, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.547124335106383, + "hf_math_lvl5": 0.2802114803625378, + "hf_musr": 0.45207291666666666, + "hf_avg": 36.97892776605849 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", + "name": "dolphin-2.9.2-qwen2-7b", + "params_b": 7.616, + "ifeval": 0.3534599307614906, + "bbh": 0.48938263759195594, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.4050864361702128, + "hf_math_lvl5": 0.13444108761329304, + "hf_musr": 0.41914583333333333, + "hf_avg": 21.27208217477453 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", + "name": "dolphin-2.9.3-Yi-1.5-34B-32k", + "params_b": 34, + "ifeval": 0.3639266036339136, + "bbh": 0.6046995537773227, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.4630152925531915, + "hf_math_lvl5": 0.16691842900302115, + "hf_musr": 0.43105208333333334, + "hf_avg": 27.098382647973178 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", + "name": "dolphin-2.9.3-mistral-7B-32k", + "params_b": 7.248, + "ifeval": 0.4126362495955177, + "bbh": 0.48125401481062013, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.2820811170212766, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.4642604166666667, + "hf_avg": 19.348695949526363 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", + "name": "dolphin-2.9.3-mistral-nemo-12b", + "params_b": 12.248, + "ifeval": 0.5600894515441251, + "bbh": 0.5480369183144175, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.3376828457446808, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.4429895833333333, + "hf_avg": 24.9724308416491 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", + "name": "dolphin-2.9.4-gemma2-2b", + "params_b": 2.614, + "ifeval": 0.08955127949396491, + "bbh": 0.40813187411055213, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.2105219414893617, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.41796875, + "hf_avg": 9.835205324051353 + }, + { + "hf_id": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", + "name": "dolphin-2.9.4-llama3.1-8b", + "params_b": 8.03, + "ifeval": 0.27572396796056686, + "bbh": 0.35236263850832567, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.12367021276595745, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.3236145833333333, + "hf_avg": 7.1318611826224165 + }, + { + "hf_id": "cpayne1303/cp2024", + "name": "cp2024", + "params_b": 0.031, + "ifeval": 0.16581448334862608, + "bbh": 0.29853854089245085, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.11012300531914894, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.3383125, + "hf_avg": 3.702132658945494 + }, + { + "hf_id": "cpayne1303/cp2024-instruct", + "name": "cp2024-instruct", + "params_b": 0.031, + "ifeval": 0.17061064641817045, + "bbh": 0.2946778102988436, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11668882978723404, + "hf_musr": 0.3686354166666666, + "hf_avg": 4.319731373654743 + }, + { + "hf_id": "cpayne1303/llama-43m-beta", + "name": "llama-43m-beta", + "params_b": 0.043, + "ifeval": 0.19156837191983936, + "bbh": 0.29767781029884355, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.11319813829787234, + "hf_musr": 0.3871770833333333, + "hf_avg": 5.288331692594867 + }, + { + "hf_id": "cpayne1303/llama-43m-beta", + "name": "llama-43m-beta", + "params_b": 0.043, + "ifeval": 0.19489066787235645, + "bbh": 0.29646319842669744, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.11112034574468085, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.3885416666666666, + "hf_avg": 5.422628758277313 + }, + { + "hf_id": "cpayne1303/smallcp2024", + "name": "smallcp2024", + "params_b": 0.002, + "ifeval": 0.1581958093414363, + "bbh": 0.3027047714604053, + "gpqa": 0.23070469798657717, + "mmlu_pro": 0.11136968085106383, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.34246874999999993, + "hf_avg": 3.543848434170488 + }, + { + "hf_id": "cstr/llama3.1-8b-spaetzle-v90", + "name": "llama3.1-8b-spaetzle-v90", + "params_b": 8.03, + "ifeval": 0.7356192679867197, + "bbh": 0.5302860633332208, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.37308843085106386, + "hf_math_lvl5": 0.14954682779456194, + "hf_musr": 0.41343749999999996, + "hf_avg": 27.855367438578327 + }, + { + "hf_id": "cyberagent/calm3-22b-chat", + "name": "calm3-22b-chat", + "params_b": 22.543, + "ifeval": 0.509131327100981, + "bbh": 0.4991683247746046, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.29496343085106386, + "hf_math_lvl5": 0.06948640483383686, + "hf_musr": 0.45532291666666663, + "hf_avg": 21.451118364125847 + }, + { + "hf_id": "darkc0de/BuddyGlassUncensored2025.2", + "name": "BuddyGlassUncensored2025.2", + "params_b": 10.306, + "ifeval": 0.7731131176389756, + "bbh": 0.6095411371819216, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.43359375, + "hf_math_lvl5": 0.24018126888217523, + "hf_musr": 0.4070833333333333, + "hf_avg": 33.625792878130945 + }, + { + "hf_id": "databricks/dbrx-base", + "name": "dbrx-base", + "ifeval": 0.08214723926380368, + "bbh": 0.5195833333333334, + "gpqa": 0.32666666666666666, + "mmlu_pro": 0.35, + "hf_math_lvl5": 0.1, + "hf_musr": 0.4066666666666667, + "hf_avg": 16.35943247356884 + }, + { + "hf_id": "databricks/dbrx-instruct", + "name": "dbrx-instruct", + "params_b": 131.597, + "ifeval": 0.5415796752616391, + "bbh": 0.5428960796934387, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.36826795212765956, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.42692708333333335, + "hf_avg": 25.19901027244322 + }, + { + "hf_id": "databricks/dolly-v1-6b", + "name": "dolly-v1-6b", + "params_b": 6, + "ifeval": 0.22244311759464885, + "bbh": 0.3172089528774696, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.12657912234042554, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.40041666666666664, + "hf_avg": 6.981231710564127 + }, + { + "hf_id": "databricks/dolly-v2-12b", + "name": "dolly-v2-12b", + "params_b": 12, + "ifeval": 0.23550734273948679, + "bbh": 0.33199731673771277, + "gpqa": 0.2407718120805369, + "mmlu_pro": 0.11286569148936171, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.37390625000000005, + "hf_avg": 6.3704357034963754, + "arena_elo": 979.94, + "arena_rank": 314, + "arena_votes": 3412 + }, + { + "hf_id": "databricks/dolly-v2-3b", + "name": "dolly-v2-3b", + "params_b": 3, + "ifeval": 0.22471597583301195, + "bbh": 0.30792785961544844, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.11452792553191489, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.33378125, + "hf_avg": 5.59965824307081 + }, + { + "hf_id": "databricks/dolly-v2-7b", + "name": "dolly-v2-7b", + "params_b": 7, + "ifeval": 0.2009856070781083, + "bbh": 0.31730628122070326, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.1149434840425532, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.35530208333333335, + "hf_avg": 5.647360474812998 + }, + { + "hf_id": "davidkim205/Rhea-72b-v0.5", + "name": "Rhea-72b-v0.5", + "params_b": 72, + "ifeval": 0.014538092261865185, + "bbh": 0.30783395929068597, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.11660571808510638, + "hf_math_lvl5": 0.17371601208459214, + "hf_musr": 0.42413541666666665, + "hf_avg": 5.99895584588256 + }, + { + "hf_id": "davidkim205/nox-solar-10.7b-v4", + "name": "nox-solar-10.7b-v4", + "params_b": 10.732, + "ifeval": 0.3753418706809044, + "bbh": 0.4814038018918371, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3332779255319149, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.42984375, + "hf_avg": 18.514321082123697 + }, + { + "hf_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "name": "DeepSeek-R1-Distill-Llama-70B", + "params_b": 70.554, + "ifeval": 0.43359397509718656, + "bbh": 0.5634962649702303, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.4748171542553192, + "hf_math_lvl5": 0.3074018126888218, + "hf_musr": 0.43421875000000004, + "hf_avg": 27.809426360756188, + "lb_name": "deepseek-r1-distill-llama-70b", + "lb_global": 0.565669411764706, + "lb_reasoning": 0.54375, + "lb_coding": 0.466475, + "lb_math": 0.58802, + "lb_language": 0.37050333333333335, + "lb_if": 0.699375, + "lb_data_analysis": 0.608095 + }, + { + "hf_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "name": "DeepSeek-R1-Distill-Llama-8B", + "params_b": 8.03, + "ifeval": 0.37823973723054827, + "bbh": 0.323935108539057, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.20894281914893617, + "hf_math_lvl5": 0.21978851963746224, + "hf_musr": 0.32497916666666665, + "hf_avg": 13.059950104920146 + }, + { + "hf_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "name": "DeepSeek-R1-Distill-Qwen-1.5B", + "params_b": 1.777, + "ifeval": 0.34634104176917246, + "bbh": 0.32409879947333436, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.11868351063829788, + "hf_math_lvl5": 0.1691842900302115, + "hf_musr": 0.36345833333333327, + "hf_avg": 10.351036796154286 + }, + { + "hf_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "name": "DeepSeek-R1-Distill-Qwen-14B", + "params_b": 14.77, + "ifeval": 0.43816517950150047, + "bbh": 0.5905573130283358, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.4666722074468085, + "hf_math_lvl5": 0.5702416918429003, + "hf_musr": 0.536625, + "hf_avg": 38.22146462032291 + }, + { + "hf_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "name": "DeepSeek-R1-Distill-Qwen-32B", + "params_b": 32.764, + "ifeval": 0.4186314534324481, + "bbh": 0.41969150892898055, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.46866688829787234, + "hf_math_lvl5": 0.17069486404833836, + "hf_musr": 0.4526041666666667, + "hf_avg": 22.96226839270608, + "lb_name": "deepseek-r1-distill-qwen-32b", + "lb_global": 0.4805958823529412, + "lb_reasoning": 0.45375, + "lb_coding": 0.47030000000000005, + "lb_math": 0.6013166666666666, + "lb_language": 0.30915, + "lb_if": 0.557125, + "lb_data_analysis": 0.4694 + }, + { + "hf_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "name": "DeepSeek-R1-Distill-Qwen-7B", + "params_b": 7.616, + "ifeval": 0.40376866713653103, + "bbh": 0.34425676981862185, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.2321309840425532, + "hf_math_lvl5": 0.19561933534743203, + "hf_musr": 0.36628124999999995, + "hf_avg": 14.99492256865316 + }, + { + "hf_id": "deepseek-ai/deepseek-llm-67b-chat", + "name": "deepseek-llm-67b-chat", + "params_b": 67, + "ifeval": 0.5587153197959193, + "bbh": 0.5243416179742358, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.3943650265957447, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.5058645833333334, + "hf_avg": 27.310631874736753, + "arena_elo": 1184.35, + "arena_rank": 254, + "arena_votes": 4933 + }, + { + "hf_id": "deepseek-ai/deepseek-llm-7b-base", + "name": "deepseek-llm-7b-base", + "params_b": 7, + "ifeval": 0.217871913190335, + "bbh": 0.35030315829299524, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.18060172872340424, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.37378124999999995, + "hf_avg": 8.227098434870664 + }, + { + "hf_id": "deepseek-ai/deepseek-llm-7b-chat", + "name": "deepseek-llm-7b-chat", + "params_b": 7, + "ifeval": 0.4170822307034225, + "bbh": 0.3632079760108669, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.21334773936170212, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.46677083333333336, + "hf_avg": 14.823156850686358 + }, + { + "hf_id": "deepseek-ai/deepseek-moe-16b-base", + "name": "deepseek-moe-16b-base", + "params_b": 16.376, + "ifeval": 0.2449744455821664, + "bbh": 0.3409461055246395, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.1505152925531915, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.36578125, + "hf_avg": 7.466333791660237 + }, + { + "hf_id": "deepseek-ai/deepseek-moe-16b-chat", + "name": "deepseek-moe-16b-chat", + "params_b": 16.376, + "ifeval": 0.36629919724109805, + "bbh": 0.3274953026448241, + "gpqa": 0.22483221476510068, + "mmlu_pro": 0.1963929521276596, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.38076041666666666, + "hf_avg": 10.290615224333424 + }, + { + "hf_id": "dfurman/CalmeRys-78B-Orpo-v0.1", + "name": "CalmeRys-78B-Orpo-v0.1", + "params_b": 77.965, + "ifeval": 0.8163273447785211, + "bbh": 0.7262282792249927, + "gpqa": 0.4001677852348993, + "mmlu_pro": 0.7012134308510638, + "hf_math_lvl5": 0.40634441087613293, + "hf_musr": 0.5901770833333333, + "hf_avg": 51.23132307602696 + }, + { + "hf_id": "dfurman/Llama-3-70B-Orpo-v0.1", + "name": "Llama-3-70B-Orpo-v0.1", + "params_b": 70.554, + "ifeval": 0.20490742341431845, + "bbh": 0.46552376347015506, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.38929521276595747, + "hf_math_lvl5": 0.1578549848942598, + "hf_musr": 0.4534375, + "hf_avg": 18.300061469360834 + }, + { + "hf_id": "dfurman/Llama-3-8B-Orpo-v0.1", + "name": "Llama-3-8B-Orpo-v0.1", + "params_b": 8.03, + "ifeval": 0.28351773294857646, + "bbh": 0.3842420919898036, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.22980385638297873, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.3566354166666667, + "hf_avg": 10.89448042775765 + }, + { + "hf_id": "dfurman/Llama-3-8B-Orpo-v0.1", + "name": "Llama-3-8B-Orpo-v0.1", + "params_b": 8.03, + "ifeval": 0.3000039894147528, + "bbh": 0.3852967582460245, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.22805851063829788, + "hf_math_lvl5": 0.041540785498489434, + "hf_musr": 0.357875, + "hf_avg": 11.076157946218345 + }, + { + "hf_id": "dfurman/Qwen2-72B-Orpo-v0.1", + "name": "Qwen2-72B-Orpo-v0.1", + "params_b": 72.699, + "ifeval": 0.7879759039348928, + "bbh": 0.6969024790545039, + "gpqa": 0.38422818791946306, + "mmlu_pro": 0.5454621010638298, + "hf_math_lvl5": 0.40558912386706947, + "hf_musr": 0.47842708333333334, + "hf_avg": 44.172299850567384 + }, + { + "hf_id": "dicta-il/dictalm2.0", + "name": "dictalm2.0", + "params_b": 7.251, + "ifeval": 0.24132745559559746, + "bbh": 0.4017869112495909, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.2604720744680851, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.38196874999999997, + "hf_avg": 11.895185345587594 + }, + { + "hf_id": "dicta-il/dictalm2.0-instruct", + "name": "dictalm2.0-instruct", + "params_b": 7.251, + "ifeval": 0.44121264910437635, + "bbh": 0.42560784985912875, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.2604720744680851, + "hf_math_lvl5": 0.022658610271903322, + "hf_musr": 0.39458333333333334, + "hf_avg": 16.7792214696191 + }, + { + "hf_id": "distilbert/distilgpt2", + "name": "distilgpt2", + "params_b": 0.088, + "ifeval": 0.06110010328151527, + "bbh": 0.3037988148650536, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11868351063829788, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.42072916666666665, + "hf_avg": 4.002273827220365 + }, + { + "hf_id": "divyanshukunwar/SASTRI_1_9B", + "name": "SASTRI_1_9B", + "params_b": 5.211, + "ifeval": 0.4207292206899914, + "bbh": 0.4680499051118341, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3187333776595745, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.3831145833333333, + "hf_avg": 19.42175962571148 + }, + { + "hf_id": "dnhkng/RYS-Medium", + "name": "RYS-Medium", + "params_b": 18.731, + "ifeval": 0.4406131287206833, + "bbh": 0.6284726872432828, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.4325964095744681, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.40692708333333333, + "hf_avg": 26.44775194883569 + }, + { + "hf_id": "dnhkng/RYS-Llama-3-8B-Instruct", + "name": "RYS-Llama-3-8B-Instruct", + "params_b": 8.248, + "ifeval": 0.6957772044841022, + "bbh": 0.4808708123069005, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.355718085106383, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.33834375, + "hf_avg": 21.922774736650837 + }, + { + "hf_id": "dnhkng/RYS-Llama-3-Huge-Instruct", + "name": "RYS-Llama-3-Huge-Instruct", + "params_b": 99.646, + "ifeval": 0.7685917809190725, + "bbh": 0.6480872171360044, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.510970744680851, + "hf_math_lvl5": 0.22885196374622357, + "hf_musr": 0.4207604166666667, + "hf_avg": 34.64400590559008 + }, + { + "hf_id": "dnhkng/RYS-Llama-3-Large-Instruct", + "name": "RYS-Llama-3-Large-Instruct", + "params_b": 73.976, + "ifeval": 0.8050616807847621, + "bbh": 0.65252690724939, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.5137134308510638, + "hf_math_lvl5": 0.23036253776435045, + "hf_musr": 0.41803125, + "hf_avg": 35.981216135127745 + }, + { + "hf_id": "dnhkng/RYS-Llama3.1-Large", + "name": "RYS-Llama3.1-Large", + "params_b": 81.677, + "ifeval": 0.8492001223420524, + "bbh": 0.6899112229777242, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5248503989361702, + "hf_math_lvl5": 0.3504531722054381, + "hf_musr": 0.4553958333333334, + "hf_avg": 42.70529151024601 + }, + { + "hf_id": "dnhkng/RYS-Phi-3-medium-4k-instruct", + "name": "RYS-Phi-3-medium-4k-instruct", + "params_b": 17.709, + "ifeval": 0.4391392616036561, + "bbh": 0.6226313539198264, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.484624335106383, + "hf_math_lvl5": 0.1608761329305136, + "hf_musr": 0.42528125, + "hf_avg": 29.093689949569903 + }, + { + "hf_id": "dnhkng/RYS-XLarge", + "name": "RYS-XLarge", + "params_b": 77.965, + "ifeval": 0.7995662619627034, + "bbh": 0.7050033079850099, + "gpqa": 0.38422818791946306, + "mmlu_pro": 0.5428025265957447, + "hf_math_lvl5": 0.425226586102719, + "hf_musr": 0.49696875, + "hf_avg": 45.345219749056206 + }, + { + "hf_id": "dnhkng/RYS-XLarge-base", + "name": "RYS-XLarge-base", + "params_b": 77.972, + "ifeval": 0.7910233735377686, + "bbh": 0.7047291858548728, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.5430518617021277, + "hf_math_lvl5": 0.37915407854984895, + "hf_musr": 0.4902708333333334, + "hf_avg": 44.096835700317605 + }, + { + "hf_id": "dreamgen/WizardLM-2-7B", + "name": "WizardLM-2-7B", + "params_b": 7.242, + "ifeval": 0.45829842595424586, + "bbh": 0.34867856163972016, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.2660405585106383, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.39409374999999996, + "hf_avg": 14.877542593987686 + }, + { + "hf_id": "dwikitheduck/gemma-2-2b-id", + "name": "gemma-2-2b-id", + "params_b": 2, + "ifeval": 0.38785644312646006, + "bbh": 0.39621721241423097, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.21733710106382978, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.41542708333333334, + "hf_avg": 14.849648233221265 + }, + { + "hf_id": "dwikitheduck/gemma-2-2b-id-inst", + "name": "gemma-2-2b-id-inst", + "params_b": 2, + "ifeval": 0.38785644312646006, + "bbh": 0.39621721241423097, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.21733710106382978, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.41542708333333334, + "hf_avg": 14.849648233221265 + }, + { + "hf_id": "dwikitheduck/gemma-2-2b-id-instruct", + "name": "gemma-2-2b-id-instruct", + "params_b": 2, + "ifeval": 0.38785644312646006, + "bbh": 0.39621721241423097, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.21733710106382978, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.41542708333333334, + "hf_avg": 14.849648233221265 + }, + { + "hf_id": "dwikitheduck/gen-inst-1", + "name": "gen-inst-1", + "params_b": 14.77, + "ifeval": 0.7750114141588762, + "bbh": 0.6419926671215591, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5088929521276596, + "hf_math_lvl5": 0.4554380664652568, + "hf_musr": 0.42054166666666665, + "hf_avg": 40.880198014659875 + }, + { + "hf_id": "dwikitheduck/gen-try1", + "name": "gen-try1", + "params_b": 14.77, + "ifeval": 0.7522052598217175, + "bbh": 0.6358510933470735, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.5110538563829787, + "hf_math_lvl5": 0.41012084592145015, + "hf_musr": 0.4415625, + "hf_avg": 39.412127202941626 + }, + { + "hf_id": "dzakwan/dzakwan-MoE-4x7b-Beta", + "name": "dzakwan-MoE-4x7b-Beta", + "params_b": 24.154, + "ifeval": 0.44426011870725235, + "bbh": 0.514044131159397, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3107546542553192, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.42673958333333334, + "hf_avg": 20.76930305324143 + }, + { + "hf_id": "ehristoforu/Falcon3-MoE-2x7B-Insruct", + "name": "Falcon3-MoE-2x7B-Insruct", + "params_b": 13.401, + "ifeval": 0.7642954028643998, + "bbh": 0.564789641564995, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.40949135638297873, + "hf_math_lvl5": 0.4123867069486405, + "hf_musr": 0.4840416666666667, + "hf_avg": 36.66765115739224 + }, + { + "hf_id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", + "name": "Gemma2-9B-it-psy10k-mental_health", + "params_b": 9.242, + "ifeval": 0.5886658510529839, + "bbh": 0.5539376944027642, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.38289561170212766, + "hf_math_lvl5": 0.16314199395770393, + "hf_musr": 0.40860416666666666, + "hf_avg": 27.19248947734485 + }, + { + "hf_id": "ehristoforu/Gemma2-9b-it-train6", + "name": "Gemma2-9b-it-train6", + "params_b": 9.242, + "ifeval": 0.7025215317579578, + "bbh": 0.5898092579133603, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.39419880319148937, + "hf_math_lvl5": 0.19108761329305135, + "hf_musr": 0.40841666666666665, + "hf_avg": 30.5339867395368 + }, + { + "hf_id": "ehristoforu/HappyLlama1", + "name": "HappyLlama1", + "params_b": 8.03, + "ifeval": 0.7362686560548235, + "bbh": 0.49957323097428485, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.35455452127659576, + "hf_math_lvl5": 0.14274924471299094, + "hf_musr": 0.42868749999999994, + "hf_avg": 26.735379379762946 + }, + { + "hf_id": "ehristoforu/RQwen-v0.1", + "name": "RQwen-v0.1", + "params_b": 14.77, + "ifeval": 0.7624968417133207, + "bbh": 0.6446435015804635, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.5201961436170213, + "hf_math_lvl5": 0.4645015105740181, + "hf_musr": 0.41390625, + "hf_avg": 39.73075710536471 + }, + { + "hf_id": "ehristoforu/RQwen-v0.2", + "name": "RQwen-v0.2", + "params_b": 14.77, + "ifeval": 0.7503568309862276, + "bbh": 0.6426888858891955, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.515874335106383, + "hf_math_lvl5": 0.3270392749244713, + "hf_musr": 0.4206666666666667, + "hf_avg": 37.702469412365296 + }, + { + "hf_id": "ehristoforu/SoRu-0009", + "name": "SoRu-0009", + "params_b": 0.494, + "ifeval": 0.25818827378023645, + "bbh": 0.3149981683579724, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.12391954787234043, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.3369479166666667, + "hf_avg": 6.300240741893716 + }, + { + "hf_id": "ehristoforu/coolqwen-3b-it", + "name": "coolqwen-3b-it", + "params_b": 3.085, + "ifeval": 0.6472670292601409, + "bbh": 0.485089343991756, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3601230053191489, + "hf_math_lvl5": 0.36706948640483383, + "hf_musr": 0.41251041666666666, + "hf_avg": 28.654353578279625 + }, + { + "hf_id": "ehristoforu/falcon3-ultraset", + "name": "falcon3-ultraset", + "params_b": 7.456, + "ifeval": 0.7135123694020753, + "bbh": 0.5583684420918801, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.398188164893617, + "hf_math_lvl5": 0.2122356495468278, + "hf_musr": 0.48531250000000004, + "hf_avg": 32.53688653155465 + }, + { + "hf_id": "ehristoforu/fd-lora-merged-16x32", + "name": "fd-lora-merged-16x32", + "params_b": 1.776, + "ifeval": 0.3480897352358409, + "bbh": 0.3307564619842368, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.12051196808510638, + "hf_math_lvl5": 0.17069486404833836, + "hf_musr": 0.35142708333333333, + "hf_avg": 10.454537831938053 + }, + { + "hf_id": "ehristoforu/fd-lora-merged-64x128", + "name": "fd-lora-merged-64x128", + "params_b": 1.777, + "ifeval": 0.3281060918363276, + "bbh": 0.33447107385638297, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.15367353723404256, + "hf_math_lvl5": 0.18731117824773413, + "hf_musr": 0.3368229166666667, + "hf_avg": 11.210864438231852 + }, + { + "hf_id": "ehristoforu/qwen2.5-with-lora-think-3b-it", + "name": "qwen2.5-with-lora-think-3b-it", + "params_b": 3.086, + "ifeval": 0.5319374814381397, + "bbh": 0.4686847308109022, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.3402593085106383, + "hf_math_lvl5": 0.236404833836858, + "hf_musr": 0.43095833333333333, + "hf_avg": 24.256524060080704 + }, + { + "hf_id": "ehristoforu/rmoe-v1", + "name": "rmoe-v1", + "params_b": 11.026, + "ifeval": 0.26500795666609045, + "bbh": 0.29292907133609175, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.1124501329787234, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.36634374999999997, + "hf_avg": 5.841232027005013 + }, + { + "hf_id": "ehristoforu/rufalcon3-3b-it", + "name": "rufalcon3-3b-it", + "params_b": 3.228, + "ifeval": 0.5942111375594533, + "bbh": 0.41554222543957625, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.2347905585106383, + "hf_math_lvl5": 0.1782477341389728, + "hf_musr": 0.38953124999999994, + "hf_avg": 20.64141899254107 + }, + { + "hf_id": "ehristoforu/ruphi-4b", + "name": "ruphi-4b", + "params_b": 3.821, + "ifeval": 0.17518185082248433, + "bbh": 0.29060336568338, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.11261635638297872, + "hf_musr": 0.35117708333333336, + "hf_avg": 4.080739303536478 + }, + { + "hf_id": "ehristoforu/tmoe", + "name": "tmoe", + "params_b": 11.026, + "ifeval": 0.11930234001338672, + "bbh": 0.30728601408520645, + "gpqa": 0.2231543624161074, + "mmlu_pro": 0.11909906914893617, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.36990624999999994, + "hf_avg": 3.652324770801055 + }, + { + "hf_id": "ehristoforu/tmoe-v2", + "name": "tmoe-v2", + "params_b": 11.026, + "ifeval": 0.19026959578363187, + "bbh": 0.2896740649804915, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.11003989361702128, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.4150833333333333, + "hf_avg": 5.712206463926581 + }, + { + "hf_id": "elinas/Chronos-Gold-12B-1.0", + "name": "Chronos-Gold-12B-1.0", + "params_b": 12.248, + "ifeval": 0.3165656014929277, + "bbh": 0.5514664110708439, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.351811835106383, + "hf_math_lvl5": 0.06948640483383686, + "hf_musr": 0.47398958333333335, + "hf_avg": 21.828167948467158 + }, + { + "hf_id": "ell44ot/gemma-2b-def", + "name": "gemma-2b-def", + "params_b": 1.546, + "ifeval": 0.26930433472076315, + "bbh": 0.31586532094752634, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.15724734042553193, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.36702083333333335, + "hf_avg": 8.12291928068411 + }, + { + "hf_id": "euclaise/ReMask-3B", + "name": "ReMask-3B", + "params_b": 2.795, + "ifeval": 0.2419269759792905, + "bbh": 0.3516779692917367, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.13572140957446807, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.33409375, + "hf_avg": 7.294404589328096 + }, + { + "hf_id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", + "name": "Thinker-Llama-3.2-3B-Instruct-Reasoning", + "params_b": 3.213, + "ifeval": 0.44388555698878973, + "bbh": 0.4273125047156003, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.2886469414893617, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.36553125000000003, + "hf_avg": 17.331991514504523 + }, + { + "hf_id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", + "name": "Thinker-Qwen2.5-0.5B-Instruct-Reasoning", + "params_b": 0.494, + "ifeval": 0.2476473534665798, + "bbh": 0.3292122979013761, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.16472739361702127, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.33821875, + "hf_avg": 8.066116427700267 + }, + { + "hf_id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", + "name": "Thinker-SmolLM2-135M-Instruct-Reasoning", + "params_b": 0.135, + "ifeval": 0.25836336476105626, + "bbh": 0.3071349750892843, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.109375, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.366125, + "hf_avg": 5.843149272359046 + }, + { + "hf_id": "experiment-llm/exp-3-q-r", + "name": "exp-3-q-r", + "params_b": 7.616, + "ifeval": 0.6035785050333116, + "bbh": 0.5397159253811645, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.43159906914893614, + "hf_math_lvl5": 0.27870090634441086, + "hf_musr": 0.43154166666666666, + "hf_avg": 29.50441044971987 + }, + { + "hf_id": "facebook/opt-1.3b", + "name": "opt-1.3b", + "params_b": 1.3, + "ifeval": 0.23832985367713222, + "bbh": 0.3093947052760125, + "gpqa": 0.2424496644295302, + "mmlu_pro": 0.11070478723404255, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.342, + "hf_avg": 5.276689334204645 + }, + { + "hf_id": "facebook/opt-30b", + "name": "opt-30b", + "params_b": 30, + "ifeval": 0.2452991396162183, + "bbh": 0.30703447525623373, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.1163563829787234, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.36041666666666666, + "hf_avg": 6.276874107966858 + }, + { + "hf_id": "failspy/Llama-3-8B-Instruct-MopeyMule", + "name": "Llama-3-8B-Instruct-MopeyMule", + "params_b": 8.03, + "ifeval": 0.6750444376476638, + "bbh": 0.383874490132152, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.17644614361702127, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.35130208333333335, + "hf_avg": 15.638132592588441 + }, + { + "hf_id": "failspy/Llama-3-8B-Instruct-abliterated", + "name": "Llama-3-8B-Instruct-abliterated", + "params_b": 8.03, + "ifeval": 0.5908888416069362, + "bbh": 0.4353752684977051, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.2741855053191489, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.41158333333333336, + "hf_avg": 19.190256107675243 + }, + { + "hf_id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", + "name": "Meta-Llama-3-70B-Instruct-abliterated-v3.5", + "params_b": 70.554, + "ifeval": 0.7746867201248244, + "bbh": 0.574710022890038, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.44522938829787234, + "hf_math_lvl5": 0.1283987915407855, + "hf_musr": 0.39818749999999997, + "hf_avg": 30.12935413704135 + }, + { + "hf_id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", + "name": "Meta-Llama-3-8B-Instruct-abliterated-v3", + "params_b": 8.03, + "ifeval": 0.7244533393617822, + "bbh": 0.4924562150856957, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.3653590425531915, + "hf_math_lvl5": 0.09592145015105741, + "hf_musr": 0.36218749999999994, + "hf_avg": 23.933089775002568 + }, + { + "hf_id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", + "name": "Phi-3-medium-4k-instruct-abliterated-v3", + "params_b": 13.96, + "ifeval": 0.6319299458769398, + "bbh": 0.6304799176474429, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.4399933510638298, + "hf_math_lvl5": 0.1593655589123867, + "hf_musr": 0.4604166666666667, + "hf_avg": 31.85112078051007 + }, + { + "hf_id": "failspy/llama-3-70B-Instruct-abliterated", + "name": "llama-3-70B-Instruct-abliterated", + "params_b": 70.554, + "ifeval": 0.8023389052159382, + "bbh": 0.6464853840398571, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.5145445478723404, + "hf_math_lvl5": 0.243202416918429, + "hf_musr": 0.4127604166666667, + "hf_avg": 35.89001866123902 + }, + { + "hf_id": "fblgit/TheBeagle-v2beta-32B-MGS", + "name": "TheBeagle-v2beta-32B-MGS", + "params_b": 32.764, + "ifeval": 0.518074265171966, + "bbh": 0.7032634749563558, + "gpqa": 0.3825503355704698, + "mmlu_pro": 0.5915059840425532, + "hf_math_lvl5": 0.4947129909365559, + "hf_musr": 0.50075, + "hf_avg": 42.642045426579536 + }, + { + "hf_id": "fblgit/TheBeagle-v2beta-32B-MGS", + "name": "TheBeagle-v2beta-32B-MGS", + "params_b": 32.764, + "ifeval": 0.4503051902285935, + "bbh": 0.703542441088263, + "gpqa": 0.401006711409396, + "mmlu_pro": 0.5910904255319149, + "hf_math_lvl5": 0.3942598187311178, + "hf_musr": 0.5021145833333334, + "hf_avg": 40.286669657817164 + }, + { + "hf_id": "fblgit/UNA-SimpleSmaug-34b-v1beta", + "name": "UNA-SimpleSmaug-34b-v1beta", + "params_b": 34.389, + "ifeval": 0.45562551806983254, + "bbh": 0.5286654104993475, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.4539561170212766, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.4255625, + "hf_avg": 24.2920916316972 + }, + { + "hf_id": "fblgit/UNA-TheBeagle-7b-v1", + "name": "UNA-TheBeagle-7b-v1", + "params_b": 7.242, + "ifeval": 0.36887236975669, + "bbh": 0.5028691097522866, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3019448138297872, + "hf_math_lvl5": 0.0770392749244713, + "hf_musr": 0.4564375, + "hf_avg": 19.646170777958677 + }, + { + "hf_id": "fblgit/UNA-ThePitbull-21.4B-v2", + "name": "UNA-ThePitbull-21.4B-v2", + "params_b": 21.421, + "ifeval": 0.3790387283518841, + "bbh": 0.635038821016254, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.3515625, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.3921666666666666, + "hf_avg": 23.0265687976468 + }, + { + "hf_id": "fblgit/cybertron-v4-qw7B-MGS", + "name": "cybertron-v4-qw7B-MGS", + "params_b": 7.616, + "ifeval": 0.6263846593704703, + "bbh": 0.5591772533435835, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.44730718085106386, + "hf_math_lvl5": 0.34894259818731116, + "hf_musr": 0.43709375, + "hf_avg": 32.40351871985633 + }, + { + "hf_id": "fblgit/cybertron-v4-qw7B-UNAMGS", + "name": "cybertron-v4-qw7B-UNAMGS", + "params_b": 7.616, + "ifeval": 0.6090240561709597, + "bbh": 0.5642509108139038, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.4500498670212766, + "hf_math_lvl5": 0.3731117824773414, + "hf_musr": 0.4343333333333333, + "hf_avg": 33.05949412905735 + }, + { + "hf_id": "fblgit/juanako-7b-UNA", + "name": "juanako-7b-UNA", + "params_b": 7.242, + "ifeval": 0.4837276204914073, + "bbh": 0.507001145736535, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.277094414893617, + "hf_math_lvl5": 0.033987915407854986, + "hf_musr": 0.46449999999999997, + "hf_avg": 20.863068015988574 + }, + { + "hf_id": "fblgit/miniclaus-qw1.5B-UNAMGS", + "name": "miniclaus-qw1.5B-UNAMGS", + "params_b": 1.777, + "ifeval": 0.3348005514257725, + "bbh": 0.4238588294007628, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.2937167553191489, + "hf_math_lvl5": 0.10876132930513595, + "hf_musr": 0.42934374999999997, + "hf_avg": 17.04510204797917 + }, + { + "hf_id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", + "name": "miniclaus-qw1.5B-UNAMGS-GRPO", + "params_b": 1.544, + "ifeval": 0.3518364605912313, + "bbh": 0.423443453814005, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.2945478723404255, + "hf_math_lvl5": 0.11027190332326284, + "hf_musr": 0.42543749999999997, + "hf_avg": 17.440457138521808 + }, + { + "hf_id": "fblgit/pancho-v1-qw25-3B-UNAMGS", + "name": "pancho-v1-qw25-3B-UNAMGS", + "params_b": 3.397, + "ifeval": 0.536134124123991, + "bbh": 0.49258278193390775, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3765791223404255, + "hf_math_lvl5": 0.15709969788519637, + "hf_musr": 0.4027395833333333, + "hf_avg": 23.860634894188152 + }, + { + "hf_id": "fblgit/una-cybertron-7b-v2-bf16", + "name": "una-cybertron-7b-v2-bf16", + "params_b": 7.242, + "ifeval": 0.47371086494944525, + "bbh": 0.3973388920486269, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.2442652925531915, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.4473229166666666, + "hf_avg": 17.217324719799368 + }, + { + "hf_id": "fhai50032/RolePlayLake-7B", + "name": "RolePlayLake-7B", + "params_b": 7.242, + "ifeval": 0.5056594280952318, + "bbh": 0.5252170095233862, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3159906914893617, + "hf_math_lvl5": 0.07250755287009064, + "hf_musr": 0.4459270833333333, + "hf_avg": 22.754717527935792 + }, + { + "hf_id": "fhai50032/Unaligned-Thinker-PHI-4", + "name": "Unaligned-Thinker-PHI-4", + "params_b": 14.66, + "ifeval": 0.056254072527560206, + "bbh": 0.6642576780946753, + "gpqa": 0.3808724832214765, + "mmlu_pro": 0.5147107712765957, + "hf_math_lvl5": 0.33534743202416917, + "hf_musr": 0.4678541666666667, + "hf_avg": 28.899268178294438 + }, + { + "hf_id": "flammenai/Llama3.1-Flammades-70B", + "name": "Llama3.1-Flammades-70B", + "params_b": 70.554, + "ifeval": 0.7058438277104748, + "bbh": 0.6659721866694542, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.47523271276595747, + "hf_math_lvl5": 0.20921450151057402, + "hf_musr": 0.48705208333333333, + "hf_avg": 36.994120533008164 + }, + { + "hf_id": "flammenai/Mahou-1.2a-llama3-8B", + "name": "Mahou-1.2a-llama3-8B", + "params_b": 8.03, + "ifeval": 0.50925655039739, + "bbh": 0.5093660540433169, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.38173204787234044, + "hf_math_lvl5": 0.08383685800604229, + "hf_musr": 0.38466666666666666, + "hf_avg": 21.791261651208817 + }, + { + "hf_id": "flammenai/Mahou-1.2a-mistral-7B", + "name": "Mahou-1.2a-mistral-7B", + "params_b": 7.242, + "ifeval": 0.4552010886669592, + "bbh": 0.5118111474458115, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.31632313829787234, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.38962500000000005, + "hf_avg": 19.578990668864712 + }, + { + "hf_id": "flammenai/Mahou-1.5-llama3.1-70B", + "name": "Mahou-1.5-llama3.1-70B", + "params_b": 70.554, + "ifeval": 0.7146615424850509, + "bbh": 0.6650860641288713, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.47490026595744683, + "hf_math_lvl5": 0.20996978851963746, + "hf_musr": 0.4950208333333333, + "hf_avg": 37.34491319969544 + }, + { + "hf_id": "flammenai/Mahou-1.5-mistral-nemo-12B", + "name": "Mahou-1.5-mistral-nemo-12B", + "params_b": 12.248, + "ifeval": 0.6751441730164851, + "bbh": 0.5522361927910235, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.3602061170212766, + "hf_math_lvl5": 0.08685800604229607, + "hf_musr": 0.4520416666666667, + "hf_avg": 26.88532589921436 + }, + { + "hf_id": "flammenai/flammen15-gutenberg-DPO-v1-7B", + "name": "flammen15-gutenberg-DPO-v1-7B", + "params_b": 7.242, + "ifeval": 0.47980580415519714, + "bbh": 0.5202983979716951, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3185671542553192, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.4293125, + "hf_avg": 21.612698429198762 + }, + { + "hf_id": "fluently-lm/FluentlyLM-Prinum", + "name": "FluentlyLM-Prinum", + "params_b": 32.764, + "ifeval": 0.809033364805383, + "bbh": 0.7143813967889198, + "gpqa": 0.38674496644295303, + "mmlu_pro": 0.5807845744680851, + "hf_math_lvl5": 0.5400302114803626, + "hf_musr": 0.44714583333333335, + "hf_avg": 47.21693789116021 + }, + { + "hf_id": "fluently-lm/Llama-TI-8B", + "name": "Llama-TI-8B", + "params_b": 8.03, + "ifeval": 0.28803906966847964, + "bbh": 0.520085504155627, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.343999335106383, + "hf_math_lvl5": 0.19637462235649547, + "hf_musr": 0.4102708333333333, + "hf_avg": 21.062119760175154 + }, + { + "hf_id": "fluently-lm/Llama-TI-8B-Instruct", + "name": "Llama-TI-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.7716392505219485, + "bbh": 0.5252143041749421, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.37258976063829785, + "hf_math_lvl5": 0.23036253776435045, + "hf_musr": 0.38134375000000004, + "hf_avg": 29.67161684095501 + }, + { + "hf_id": "fluently-sets/FalconThink3-10B-IT", + "name": "FalconThink3-10B-IT", + "params_b": 10.306, + "ifeval": 0.7326216660682544, + "bbh": 0.620016981648187, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.4434840425531915, + "hf_math_lvl5": 0.24471299093655588, + "hf_musr": 0.44788541666666665, + "hf_avg": 34.620674425220905 + }, + { + "hf_id": "fluently-sets/reasoning-1-1k-demo", + "name": "reasoning-1-1k-demo", + "params_b": 14.77, + "ifeval": 0.7524800861713586, + "bbh": 0.6396692351083745, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.4773936170212766, + "hf_math_lvl5": 0.4282477341389728, + "hf_musr": 0.4060625, + "hf_avg": 38.341663670523 + }, + { + "hf_id": "freewheelin/free-evo-qwen72b-v0.8-re", + "name": "free-evo-qwen72b-v0.8-re", + "params_b": 72.288, + "ifeval": 0.533086654521115, + "bbh": 0.6127477065378042, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.4870345744680851, + "hf_math_lvl5": 0.18051359516616314, + "hf_musr": 0.4871666666666667, + "hf_avg": 32.4749309635116 + }, + { + "hf_id": "freewheelin/free-solar-evo-v0.1", + "name": "free-solar-evo-v0.1", + "params_b": 10.732, + "ifeval": 0.20500715878313985, + "bbh": 0.4502211109638701, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.3414228723404255, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.4945833333333334, + "hf_avg": 16.42145204334013 + }, + { + "hf_id": "freewheelin/free-solar-evo-v0.11", + "name": "free-solar-evo-v0.11", + "params_b": 10.732, + "ifeval": 0.20265894493277836, + "bbh": 0.4545155032474882, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.34674202127659576, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.5052187499999999, + "hf_avg": 16.77976316256584 + }, + { + "hf_id": "freewheelin/free-solar-evo-v0.13", + "name": "free-solar-evo-v0.13", + "params_b": 10.732, + "ifeval": 0.2320598234905606, + "bbh": 0.4554839670962904, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.34699135638297873, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.50515625, + "hf_avg": 17.40590132423633 + }, + { + "hf_id": "fulim/FineLlama-3.1-8B", + "name": "FineLlama-3.1-8B", + "params_b": 8, + "ifeval": 0.14388267574480157, + "bbh": 0.456920741562608, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.31673869680851063, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.38673958333333336, + "hf_avg": 13.250843761720356 + }, + { + "hf_id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", + "name": "Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", + "params_b": 8.03, + "ifeval": 0.40094615619888563, + "bbh": 0.3984844272016949, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.16539228723404256, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.36504166666666665, + "hf_avg": 12.108403733798744 + }, + { + "hf_id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", + "name": "Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", + "params_b": 8.03, + "ifeval": 0.45505148561372716, + "bbh": 0.5043660783243713, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.36785239361702127, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.37375, + "hf_avg": 20.99904174519205 + }, + { + "hf_id": "gbueno86/Brinebreath-Llama-3.1-70B", + "name": "Brinebreath-Llama-3.1-70B", + "params_b": 70.554, + "ifeval": 0.5532952565858589, + "bbh": 0.6880562247706813, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.5196143617021277, + "hf_math_lvl5": 0.297583081570997, + "hf_musr": 0.45406250000000004, + "hf_avg": 36.25499179659365 + }, + { + "hf_id": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", + "name": "Meta-LLama-3-Cat-Smaug-LLama-70b", + "params_b": 70.554, + "ifeval": 0.8071849359698933, + "bbh": 0.6674314931312052, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.5074800531914894, + "hf_math_lvl5": 0.2938066465256798, + "hf_musr": 0.43682291666666667, + "hf_avg": 38.69613307300818 + }, + { + "hf_id": "ghost-x/ghost-8b-beta-1608", + "name": "ghost-8b-beta-1608", + "params_b": 8.03, + "ifeval": 0.42727407722620425, + "bbh": 0.45165496100352914, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.2839926861702128, + "hf_math_lvl5": 0.06948640483383686, + "hf_musr": 0.35158333333333336, + "hf_avg": 16.04724368250191 + }, + { + "hf_id": "gmonsoon/SahabatAI-Llama-11B-Test", + "name": "SahabatAI-Llama-11B-Test", + "params_b": 11.52, + "ifeval": 0.33757319467900726, + "bbh": 0.4727584153058988, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.3182347074468085, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.40013541666666663, + "hf_avg": 16.26561908313948 + }, + { + "hf_id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", + "name": "SahabatAI-MediChatIndo-8B-v1", + "params_b": 8.03, + "ifeval": 0.41628323958208663, + "bbh": 0.4508834027881236, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3107546542553192, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.3753958333333333, + "hf_avg": 17.29986493129456 + }, + { + "hf_id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", + "name": "gemma2-9b-sahabatai-v1-instruct-BaseTIES", + "params_b": 9.242, + "ifeval": 0.7377923908562614, + "bbh": 0.6077244532441547, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.43467420212765956, + "hf_math_lvl5": 0.19939577039274925, + "hf_musr": 0.47780208333333335, + "hf_avg": 33.8045690236527 + }, + { + "hf_id": "godlikehhd/alpaca_data_full_2", + "name": "alpaca_data_full_2", + "params_b": 1.544, + "ifeval": 0.31781450994472443, + "bbh": 0.4216953430035033, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.285405585106383, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.40515625000000005, + "hf_avg": 16.073236944099644 + }, + { + "hf_id": "godlikehhd/alpaca_data_full_3B", + "name": "alpaca_data_full_3B", + "params_b": 3.086, + "ifeval": 0.36957162550920447, + "bbh": 0.46841893776834337, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.335688164893617, + "hf_math_lvl5": 0.1336858006042296, + "hf_musr": 0.4954791666666667, + "hf_avg": 21.16254856881883 + }, + { + "hf_id": "godlikehhd/alpaca_data_ifd_max_2600", + "name": "alpaca_data_ifd_max_2600", + "params_b": 1.544, + "ifeval": 0.3042504997850149, + "bbh": 0.40285133876405865, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.29163896276595747, + "hf_math_lvl5": 0.09894259818731117, + "hf_musr": 0.3508645833333333, + "hf_avg": 15.169551135723685 + }, + { + "hf_id": "godlikehhd/alpaca_data_ifd_max_2600_3B", + "name": "alpaca_data_ifd_max_2600_3B", + "params_b": 3.086, + "ifeval": 0.298155560579263, + "bbh": 0.4626377955326701, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.32878989361702127, + "hf_math_lvl5": 0.1593655589123867, + "hf_musr": 0.43455208333333334, + "hf_avg": 18.603034635124093 + }, + { + "hf_id": "godlikehhd/alpaca_data_ifd_min_2600", + "name": "alpaca_data_ifd_min_2600", + "params_b": 1.544, + "ifeval": 0.3749673089624419, + "bbh": 0.4219047173013076, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.289311835106383, + "hf_math_lvl5": 0.09667673716012085, + "hf_musr": 0.36562500000000003, + "hf_avg": 16.51777329215277 + }, + { + "hf_id": "godlikehhd/alpaca_data_ins_max_5200", + "name": "alpaca_data_ins_max_5200", + "params_b": 1.544, + "ifeval": 0.32750657145263457, + "bbh": 0.41550742328078477, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.2915558510638298, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.361375, + "hf_avg": 15.68373909505621 + }, + { + "hf_id": "godlikehhd/alpaca_data_ins_min_2600", + "name": "alpaca_data_ins_min_2600", + "params_b": 1.544, + "ifeval": 0.33300199027469335, + "bbh": 0.41873469888886056, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.28798204787234044, + "hf_math_lvl5": 0.11102719033232629, + "hf_musr": 0.38534375000000004, + "hf_avg": 16.228325384264654 + }, + { + "hf_id": "godlikehhd/alpaca_data_ins_min_5200", + "name": "alpaca_data_ins_min_5200", + "params_b": 1.544, + "ifeval": 0.3359995921931586, + "bbh": 0.4289279419241076, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.29488031914893614, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.39055208333333336, + "hf_avg": 16.38323586874574 + }, + { + "hf_id": "godlikehhd/alpaca_data_sampled_ifd_5200", + "name": "alpaca_data_sampled_ifd_5200", + "params_b": 1.544, + "ifeval": 0.2923853154075631, + "bbh": 0.4032969715626326, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.2896442819148936, + "hf_math_lvl5": 0.12537764350453173, + "hf_musr": 0.3520729166666667, + "hf_avg": 15.703649205002767 + }, + { + "hf_id": "godlikehhd/alpaca_data_sampled_ifd_new_5200", + "name": "alpaca_data_sampled_ifd_new_5200", + "params_b": 1.544, + "ifeval": 0.36632468516868577, + "bbh": 0.4177831234050982, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.29247007978723405, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.36125, + "hf_avg": 16.53778065653717 + }, + { + "hf_id": "godlikehhd/alpaca_data_score_max_0.1_2600", + "name": "alpaca_data_score_max_0.1_2600", + "params_b": 1.544, + "ifeval": 0.3287554799044313, + "bbh": 0.42522607952607777, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.29230385638297873, + "hf_math_lvl5": 0.09894259818731117, + "hf_musr": 0.37064583333333334, + "hf_avg": 15.917241105745404 + }, + { + "hf_id": "godlikehhd/alpaca_data_score_max_0.3_2600", + "name": "alpaca_data_score_max_0.3_2600", + "params_b": 1.544, + "ifeval": 0.33752332699459653, + "bbh": 0.4151448369012765, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.29130651595744683, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.37594791666666666, + "hf_avg": 15.960793093536113 + }, + { + "hf_id": "godlikehhd/alpaca_data_score_max_0.7_2600", + "name": "alpaca_data_score_max_0.7_2600", + "params_b": 1.544, + "ifeval": 0.3639764713183243, + "bbh": 0.41845266250678703, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.2982878989361702, + "hf_math_lvl5": 0.10725075528700906, + "hf_musr": 0.3468645833333333, + "hf_avg": 16.630379005915092 + }, + { + "hf_id": "godlikehhd/alpaca_data_score_max_2500", + "name": "alpaca_data_score_max_2500", + "params_b": 1.544, + "ifeval": 0.3563577973111345, + "bbh": 0.41801375075895447, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.2939660904255319, + "hf_math_lvl5": 0.09516616314199396, + "hf_musr": 0.36270833333333335, + "hf_avg": 16.49115247845423 + }, + { + "hf_id": "godlikehhd/alpaca_data_score_max_2600_3B", + "name": "alpaca_data_score_max_2600_3B", + "params_b": 3.086, + "ifeval": 0.33577463352792813, + "bbh": 0.4716306839273412, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.3341921542553192, + "hf_math_lvl5": 0.15483383685800603, + "hf_musr": 0.44744791666666667, + "hf_avg": 19.567090626055247 + }, + { + "hf_id": "godlikehhd/alpaca_data_score_max_5200", + "name": "alpaca_data_score_max_5200", + "params_b": 1.544, + "ifeval": 0.34454248061809334, + "bbh": 0.42417102847687554, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.29446476063829785, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.3877916666666667, + "hf_avg": 16.366037527264833 + }, + { + "hf_id": "google/codegemma-1.1-2b", + "name": "codegemma-1.1-2b", + "params_b": 2.506, + "ifeval": 0.22936253584932426, + "bbh": 0.3353417790248454, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.1278257978723404, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3871458333333333, + "hf_avg": 7.133867903047553 + }, + { + "hf_id": "google/flan-t5-base", + "name": "flan-t5-base", + "params_b": 0.248, + "ifeval": 0.18907055501624578, + "bbh": 0.3525980599300322, + "gpqa": 0.23825503355704697, + "mmlu_pro": 0.13572140957446807, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.36711458333333336, + "hf_avg": 6.415642124982084 + }, + { + "hf_id": "google/flan-t5-large", + "name": "flan-t5-large", + "params_b": 0.783, + "ifeval": 0.22009490374428736, + "bbh": 0.41531150356794316, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.17087765957446807, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.40832291666666665, + "hf_avg": 9.658122925542836 + }, + { + "hf_id": "google/flan-t5-small", + "name": "flan-t5-small", + "params_b": 0.077, + "ifeval": 0.1524255641697363, + "bbh": 0.3282901097640842, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.1233377659574468, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.41229166666666667, + "hf_avg": 6.129661810537869 + }, + { + "hf_id": "google/flan-t5-xl", + "name": "flan-t5-xl", + "params_b": 2.85, + "ifeval": 0.22374189373085634, + "bbh": 0.45310636062112314, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.21467752659574468, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.41809375, + "hf_avg": 11.70507257989283 + }, + { + "hf_id": "google/flan-t5-xl", + "name": "flan-t5-xl", + "params_b": 2.85, + "ifeval": 0.2206944241279804, + "bbh": 0.45372172155693963, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.21417885638297873, + "hf_math_lvl5": 0.0007552870090634442, + "hf_musr": 0.42203125, + "hf_avg": 11.58716743755607 + }, + { + "hf_id": "google/flan-t5-xxl", + "name": "flan-t5-xxl", + "params_b": 11.267, + "ifeval": 0.2200450360598767, + "bbh": 0.5065888015776924, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.23429188829787234, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.42175, + "hf_avg": 13.662077060970686 + }, + { + "hf_id": "google/flan-ul2", + "name": "flan-ul2", + "params_b": 19.46, + "ifeval": 0.23925406809487715, + "bbh": 0.5053738049125648, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.24933510638297873, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.3843541666666666, + "hf_avg": 13.675998692966035 + }, + { + "hf_id": "google/gemma-1.1-2b-it", + "name": "gemma-1.1-2b-it", + "params_b": 2.506, + "ifeval": 0.30674831668860847, + "bbh": 0.3184634974814922, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.14835438829787234, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.33939583333333334, + "hf_avg": 8.053373854341979, + "arena_elo": 1114.08, + "arena_rank": 297, + "arena_votes": 10853 + }, + { + "hf_id": "google/gemma-1.1-7b-it", + "name": "gemma-1.1-7b-it", + "params_b": 8.538, + "ifeval": 0.5039107346285633, + "bbh": 0.3935297962833251, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.2583942819148936, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.42302083333333335, + "hf_avg": 17.693584228972615, + "lb_name": "gemma-1.1-7b-it", + "lb_global": 0.20704722222222224, + "lb_reasoning": 0.22, + "lb_math": 0.15214000000000003, + "lb_language": 0.10647333333333332, + "lb_if": 0.443375, + "lb_data_analysis": 0.0726, + "arena_elo": 1180.13, + "arena_rank": 259, + "arena_votes": 23893 + }, + { + "hf_id": "google/gemma-2-2b", + "name": "gemma-2-2b", + "params_b": 2.614, + "ifeval": 0.19931226922343825, + "bbh": 0.3655966996422591, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.21800199468085107, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.4231770833333333, + "hf_avg": 10.129463155055184 + }, + { + "hf_id": "google/gemma-2-2b", + "name": "gemma-2-2b", + "params_b": 2.614, + "ifeval": 0.20176021844262113, + "bbh": 0.3708674612470255, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.22165890957446807, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.421875, + "hf_avg": 10.359615568466916 + }, + { + "hf_id": "google/gemma-2-2b-it", + "name": "gemma-2-2b-it", + "params_b": 2.614, + "ifeval": 0.5668337788179807, + "bbh": 0.41992308914274706, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.25498670212765956, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.39288541666666665, + "hf_avg": 17.046939294966545, + "arena_elo": 1198.7, + "arena_rank": 247, + "arena_votes": 46618 + }, + { + "hf_id": "google/gemma-2-2b-jpn-it", + "name": "gemma-2-2b-jpn-it", + "params_b": 2.614, + "ifeval": 0.5077826832803628, + "bbh": 0.42255698900658106, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.2578125, + "hf_math_lvl5": 0.03474320241691843, + "hf_musr": 0.39638541666666666, + "hf_avg": 17.11540570593849 + }, + { + "hf_id": "google/gemma-2-2b-jpn-it", + "name": "gemma-2-2b-jpn-it", + "params_b": 2.614, + "ifeval": 0.5288401441508531, + "bbh": 0.4178440226217119, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.2466755319148936, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.37276041666666665, + "hf_avg": 16.678630066922224 + }, + { + "hf_id": "google/gemma-2b", + "name": "gemma-2b", + "params_b": 2.506, + "ifeval": 0.20375825033134307, + "bbh": 0.33656381705857935, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.13655252659574468, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.39778125, + "hf_avg": 7.321959810488082 + }, + { + "hf_id": "google/gemma-2b-it", + "name": "gemma-2b-it", + "params_b": 2.506, + "ifeval": 0.26902950837112194, + "bbh": 0.31508191988788464, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.13530585106382978, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.334125, + "hf_avg": 7.485804130315127, + "arena_elo": 1091.54, + "arena_rank": 301, + "arena_votes": 4779 + }, + { + "hf_id": "google/gemma-7b", + "name": "gemma-7b", + "params_b": 8.538, + "ifeval": 0.2659321710838353, + "bbh": 0.43615285239286355, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.2947972074468085, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.4062395833333334, + "hf_avg": 15.442818570272307 + }, + { + "hf_id": "google/gemma-7b-it", + "name": "gemma-7b-it", + "params_b": 8.538, + "ifeval": 0.3868324933398937, + "bbh": 0.36459012743300967, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.16946476063829788, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.42742708333333335, + "hf_avg": 13.067087110466217, + "arena_elo": 1135.91, + "arena_rank": 287, + "arena_votes": 8925 + }, + { + "hf_id": "google/mt5-base", + "name": "mt5-base", + "params_b": 0.39, + "ifeval": 0.1645157072124186, + "bbh": 0.28831600228488835, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.10696476063829788, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.36720833333333336, + "hf_avg": 3.716339524462038 + }, + { + "hf_id": "google/mt5-small", + "name": "mt5-small", + "params_b": 0.17, + "ifeval": 0.17180968718555653, + "bbh": 0.2765842029929075, + "gpqa": 0.2424496644295302, + "mmlu_pro": 0.11228390957446809, + "hf_musr": 0.38575, + "hf_avg": 4.2559281732773515 + }, + { + "hf_id": "google/mt5-xl", + "name": "mt5-xl", + "params_b": 3.23, + "ifeval": 0.19596448534333347, + "bbh": 0.304735837080435, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.11195146276595745, + "hf_musr": 0.3795208333333333, + "hf_avg": 5.191420153031625 + }, + { + "hf_id": "google/mt5-xxl", + "name": "mt5-xxl", + "params_b": 11.9, + "ifeval": 0.23575668116154028, + "bbh": 0.2959344159116905, + "gpqa": 0.24161073825503357, + "mmlu_pro": 0.10887632978723404, + "hf_musr": 0.36894791666666665, + "hf_avg": 5.10307678308611 + }, + { + "hf_id": "google/recurrentgemma-2b", + "name": "recurrentgemma-2b", + "params_b": 2.683, + "ifeval": 0.3017028151970106, + "bbh": 0.31973582830084474, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.11760305851063829, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.3445729166666667, + "hf_avg": 7.01512699699078 + }, + { + "hf_id": "google/recurrentgemma-2b-it", + "name": "recurrentgemma-2b-it", + "params_b": 2.683, + "ifeval": 0.2949329999955673, + "bbh": 0.33300047272606553, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.1402094414893617, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3340625, + "hf_avg": 7.995905374047496 + }, + { + "hf_id": "google/recurrentgemma-9b", + "name": "recurrentgemma-9b", + "params_b": 9, + "ifeval": 0.31159434744256354, + "bbh": 0.39562568669428394, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.2604720744680851, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.3802604166666667, + "hf_avg": 13.709460856107865 + }, + { + "hf_id": "google/recurrentgemma-9b-it", + "name": "recurrentgemma-9b-it", + "params_b": 9, + "ifeval": 0.5010383560065071, + "bbh": 0.4367189649027647, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.2843251329787234, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.43790625, + "hf_avg": 19.218115006306835 + }, + { + "hf_id": "google/switch-base-8", + "name": "switch-base-8", + "params_b": 0.62, + "ifeval": 0.15852050337548815, + "bbh": 0.28763132730669333, + "gpqa": 0.25, + "mmlu_pro": 0.10979055851063829, + "hf_musr": 0.35173958333333327, + "hf_avg": 3.2959502683966075 + }, + { + "hf_id": "google/umt5-base", + "name": "umt5-base", + "params_b": -1, + "ifeval": 0.174632198123202, + "bbh": 0.27877262328945457, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.10779587765957446, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.33821875, + "hf_avg": 3.516574726407488 + }, + { + "hf_id": "gpt2", + "name": "gpt2", + "params_b": 0.137, + "ifeval": 0.1934168007553292, + "bbh": 0.3036385401516729, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.1149434840425532, + "hf_math_lvl5": 0.0030211480362537764, + "hf_musr": 0.43241666666666667, + "hf_avg": 6.39102973137443 + }, + { + "hf_id": "gpt2", + "name": "gpt2", + "params_b": 0.137, + "ifeval": 0.08333333333333333, + "bbh": 0.30833333333333335, + "gpqa": 0.23333333333333334, + "mmlu_pro": 0.1, + "hf_musr": 0.4333333333333333, + "hf_avg": 5.977736928104574 + }, + { + "hf_id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", + "name": "Llama-3-8B-Instruct-Gradient-1048k", + "params_b": 8.03, + "ifeval": 0.4455588948434598, + "bbh": 0.4345903107069573, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.29404920212765956, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.42975, + "hf_avg": 18.283333977044872 + }, + { + "hf_id": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", + "name": "DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.4797060687863757, + "bbh": 0.5269400362212973, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.3956948138297872, + "hf_math_lvl5": 0.22205438066465258, + "hf_musr": 0.44078124999999996, + "hf_avg": 26.940480296007976 + }, + { + "hf_id": "grimjim/Gigantes-v1-gemma2-9b-it", + "name": "Gigantes-v1-gemma2-9b-it", + "params_b": 9.242, + "ifeval": 0.692454908531585, + "bbh": 0.597792552822268, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.42253989361702127, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.45547916666666666, + "hf_avg": 33.23742832143159 + }, + { + "hf_id": "grimjim/Gigantes-v2-gemma2-9b-it", + "name": "Gigantes-v2-gemma2-9b-it", + "params_b": 9.242, + "ifeval": 0.7350696152874374, + "bbh": 0.5986559388303995, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.4259474734042553, + "hf_math_lvl5": 0.20166163141993956, + "hf_musr": 0.45947916666666666, + "hf_avg": 33.876785775265446 + }, + { + "hf_id": "grimjim/Gigantes-v3-gemma2-9b-it", + "name": "Gigantes-v3-gemma2-9b-it", + "params_b": 9.242, + "ifeval": 0.697625633319592, + "bbh": 0.5983513792324827, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.4226230053191489, + "hf_math_lvl5": 0.20996978851963746, + "hf_musr": 0.4608125, + "hf_avg": 33.49043715511172 + }, + { + "hf_id": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", + "name": "HuatuoSkywork-o1-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.3961499931293413, + "bbh": 0.48863582396592203, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.30950797872340424, + "hf_math_lvl5": 0.38821752265861026, + "hf_musr": 0.38385416666666666, + "hf_avg": 24.47799926837126 + }, + { + "hf_id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", + "name": "Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", + "params_b": 8.03, + "ifeval": 0.42712447417297217, + "bbh": 0.4961694535006833, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3625332446808511, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.40432291666666664, + "hf_avg": 20.83668391168813 + }, + { + "hf_id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", + "name": "Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", + "params_b": 8, + "ifeval": 0.6805897241541332, + "bbh": 0.5021734091176594, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.3684341755319149, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.38851041666666664, + "hf_avg": 24.04094205100742 + }, + { + "hf_id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", + "name": "Llama-3.1-8B-Instruct-abliterated_via_adapter", + "params_b": 8.03, + "ifeval": 0.48695018107510296, + "bbh": 0.510526564708187, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.3651097074468085, + "hf_math_lvl5": 0.13972809667673716, + "hf_musr": 0.40103125, + "hf_avg": 23.217301725050117 + }, + { + "hf_id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", + "name": "Llama-3.1-Bonsaikraft-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.42500121898784116, + "bbh": 0.5286855891530357, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3764128989361702, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.4235104166666667, + "hf_avg": 22.856640245514402 + }, + { + "hf_id": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", + "name": "Llama-Nephilim-Metamorphosis-v2-8B", + "params_b": 8.03, + "ifeval": 0.4544519652300341, + "bbh": 0.5013477378974034, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.38090093085106386, + "hf_math_lvl5": 0.13972809667673716, + "hf_musr": 0.40909375000000003, + "hf_avg": 23.00221313012307 + }, + { + "hf_id": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", + "name": "Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", + "params_b": 8.03, + "ifeval": 0.43659157701565177, + "bbh": 0.5287189378780882, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.3683510638297872, + "hf_math_lvl5": 0.30060422960725075, + "hf_musr": 0.3998541666666666, + "hf_avg": 25.961665053235674 + }, + { + "hf_id": "grimjim/Magnolia-v1-Gemma2-8k-9B", + "name": "Magnolia-v1-Gemma2-8k-9B", + "params_b": 9.242, + "ifeval": 0.35308536904302806, + "bbh": 0.5589031767575711, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.4242021276595745, + "hf_math_lvl5": 0.16842900302114805, + "hf_musr": 0.46446875, + "hf_avg": 25.51284798552607 + }, + { + "hf_id": "grimjim/Magnolia-v2-12B", + "name": "Magnolia-v2-12B", + "params_b": 12.248, + "ifeval": 0.3506119318962575, + "bbh": 0.5290279354217235, + "gpqa": 0.3187919463087248, + "mmlu_pro": 0.3601230053191489, + "hf_math_lvl5": 0.12915407854984895, + "hf_musr": 0.41712499999999997, + "hf_avg": 21.577222272679126 + }, + { + "hf_id": "grimjim/Magnolia-v2-Gemma2-8k-9B", + "name": "Magnolia-v2-Gemma2-8k-9B", + "params_b": 9.242, + "ifeval": 0.7384417789243651, + "bbh": 0.6015773428405322, + "gpqa": 0.3573825503355705, + "mmlu_pro": 0.4331781914893617, + "hf_math_lvl5": 0.2280966767371601, + "hf_musr": 0.44884375, + "hf_avg": 34.30134506171324 + }, + { + "hf_id": "grimjim/Magnolia-v3-12B", + "name": "Magnolia-v3-12B", + "params_b": 12.248, + "ifeval": 0.39649906692021614, + "bbh": 0.5326669270363916, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.3615359042553192, + "hf_math_lvl5": 0.1351963746223565, + "hf_musr": 0.4183958333333333, + "hf_avg": 22.786766373418164 + }, + { + "hf_id": "grimjim/Magnolia-v3-Gemma2-8k-9B", + "name": "Magnolia-v3-Gemma2-8k-9B", + "params_b": 9.242, + "ifeval": 0.7378422585406721, + "bbh": 0.6015406636327695, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.43367686170212766, + "hf_math_lvl5": 0.23187311178247735, + "hf_musr": 0.4488125, + "hf_avg": 34.353725481327054 + }, + { + "hf_id": "grimjim/Magnolia-v4-12B", + "name": "Magnolia-v4-12B", + "params_b": 12.248, + "ifeval": 0.34179421712168156, + "bbh": 0.5430894084668724, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.3671875, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.42112499999999997, + "hf_avg": 22.593840281150975 + }, + { + "hf_id": "grimjim/Magnolia-v5a-12B", + "name": "Magnolia-v5a-12B", + "params_b": 12.248, + "ifeval": 0.41136185321613317, + "bbh": 0.5311764105029141, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.3601230053191489, + "hf_math_lvl5": 0.13746223564954682, + "hf_musr": 0.4144895833333333, + "hf_avg": 22.851795395693376 + }, + { + "hf_id": "grimjim/Magot-v1-Gemma2-8k-9B", + "name": "Magot-v1-Gemma2-8k-9B", + "params_b": 9.242, + "ifeval": 0.29967818720993633, + "bbh": 0.6019447732218105, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.43367686170212766, + "hf_math_lvl5": 0.09894259818731117, + "hf_musr": 0.44884375, + "hf_avg": 24.587402832733005 + }, + { + "hf_id": "grimjim/Magot-v2-Gemma2-8k-9B", + "name": "Magot-v2-Gemma2-8k-9B", + "params_b": 9.242, + "ifeval": 0.7347449212533854, + "bbh": 0.5896713649821103, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.4222905585106383, + "hf_math_lvl5": 0.20166163141993956, + "hf_musr": 0.4343958333333333, + "hf_avg": 32.97995555494225 + }, + { + "hf_id": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", + "name": "SauerHuatuoSkywork-o1-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.5219462138237654, + "bbh": 0.5222077363554879, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.39910239361702127, + "hf_math_lvl5": 0.1729607250755287, + "hf_musr": 0.45268749999999996, + "hf_avg": 26.68447204012895 + }, + { + "hf_id": "grimjim/llama-3-Nephilim-v1-8B", + "name": "llama-3-Nephilim-v1-8B", + "params_b": 8.03, + "ifeval": 0.4277239945566652, + "bbh": 0.5131817939007638, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.37957114361702127, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.41362499999999996, + "hf_avg": 21.729737138539548 + }, + { + "hf_id": "grimjim/llama-3-Nephilim-v2-8B", + "name": "llama-3-Nephilim-v2-8B", + "params_b": 8.03, + "ifeval": 0.39222817679313116, + "bbh": 0.5048214936442625, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.3641123670212766, + "hf_math_lvl5": 0.10649546827794562, + "hf_musr": 0.3895, + "hf_avg": 20.600249913059073 + }, + { + "hf_id": "grimjim/llama-3-Nephilim-v2.1-8B", + "name": "llama-3-Nephilim-v2.1-8B", + "params_b": 8.03, + "ifeval": 0.38950540122430705, + "bbh": 0.5095042703104161, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.3644448138297872, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.3935, + "hf_avg": 20.434967127814037 + }, + { + "hf_id": "grimjim/llama-3-Nephilim-v3-8B", + "name": "llama-3-Nephilim-v3-8B", + "params_b": 8.03, + "ifeval": 0.4173825449806513, + "bbh": 0.5012671264428366, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.3612034574468085, + "hf_math_lvl5": 0.09516616314199396, + "hf_musr": 0.3989270833333334, + "hf_avg": 20.600988872221105 + }, + { + "hf_id": "gupta-tanish/llama-7b-dpo-baseline", + "name": "llama-7b-dpo-baseline", + "params_b": 6.738, + "ifeval": 0.26930433472076315, + "bbh": 0.3896894398264714, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.20279255319148937, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.445625, + "hf_avg": 11.857290104453797 + }, + { + "hf_id": "gz987/qwen2.5-7b-cabs-v0.1", + "name": "qwen2.5-7b-cabs-v0.1", + "params_b": 7.616, + "ifeval": 0.7505817896514582, + "bbh": 0.5481580818735207, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.4405751329787234, + "hf_math_lvl5": 0.479607250755287, + "hf_musr": 0.437625, + "hf_avg": 36.56161315485646 + }, + { + "hf_id": "gz987/qwen2.5-7b-cabs-v0.2", + "name": "qwen2.5-7b-cabs-v0.2", + "params_b": 7.616, + "ifeval": 0.7417640748768822, + "bbh": 0.5516262466675281, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.43974401595744683, + "hf_math_lvl5": 0.4901812688821752, + "hf_musr": 0.44286458333333334, + "hf_avg": 36.614018827547945 + }, + { + "hf_id": "gz987/qwen2.5-7b-cabs-v0.3", + "name": "qwen2.5-7b-cabs-v0.3", + "params_b": 7.616, + "ifeval": 0.7569515552068511, + "bbh": 0.5494465314719504, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.4401595744680851, + "hf_math_lvl5": 0.493202416918429, + "hf_musr": 0.44295833333333334, + "hf_avg": 36.93504663341147 + }, + { + "hf_id": "gz987/qwen2.5-7b-cabs-v0.4", + "name": "qwen2.5-7b-cabs-v0.4", + "params_b": 7.616, + "ifeval": 0.7582503313430586, + "bbh": 0.5524401094760039, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.4395777925531915, + "hf_math_lvl5": 0.48489425981873113, + "hf_musr": 0.44295833333333334, + "hf_avg": 36.88194596713293 + }, + { + "hf_id": "h2oai/h2o-danube-1.8b-chat", + "name": "h2o-danube-1.8b-chat", + "params_b": 1.831, + "ifeval": 0.2198699450790569, + "bbh": 0.3219657593234448, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.13139960106382978, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.3988645833333333, + "hf_avg": 6.953761979021041 + }, + { + "hf_id": "h2oai/h2o-danube3-4b-base", + "name": "h2o-danube3-4b-base", + "params_b": 3.962, + "ifeval": 0.23380851695722904, + "bbh": 0.3599083951265592, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.2109375, + "hf_math_lvl5": 0.022658610271903322, + "hf_musr": 0.37781250000000005, + "hf_avg": 10.0908487494014 + }, + { + "hf_id": "h2oai/h2o-danube3-4b-chat", + "name": "h2o-danube3-4b-chat", + "params_b": 3.962, + "ifeval": 0.3628771659197596, + "bbh": 0.3466170643135169, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.22282247340425532, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.378125, + "hf_avg": 11.571247407067567 + }, + { + "hf_id": "h2oai/h2o-danube3-500m-chat", + "name": "h2o-danube3-500m-chat", + "params_b": 0.514, + "ifeval": 0.2207941594968018, + "bbh": 0.3034691168308313, + "gpqa": 0.23070469798657717, + "mmlu_pro": 0.11436170212765957, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.34339583333333334, + "hf_avg": 5.204440277019856 + }, + { + "hf_id": "h2oai/h2o-danube3.1-4b-chat", + "name": "h2o-danube3.1-4b-chat", + "params_b": 3.962, + "ifeval": 0.5021121734774842, + "bbh": 0.3608421638178268, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.2718583776595745, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.41015625, + "hf_avg": 16.41212819519095 + }, + { + "hf_id": "haoranxu/ALMA-13B-R", + "name": "ALMA-13B-R", + "params_b": 13, + "ifeval": 0.003921816336210145, + "bbh": 0.345656261205981, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.18168218085106383, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.35279166666666667, + "hf_avg": 3.8773019782821123 + }, + { + "hf_id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", + "name": "Llama-3-Instruct-8B-CPO-SimPO", + "params_b": 8.03, + "ifeval": 0.7046447869430887, + "bbh": 0.5048301774821616, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3686003989361702, + "hf_math_lvl5": 0.1027190332326284, + "hf_musr": 0.3566666666666667, + "hf_avg": 24.910737255485603 + }, + { + "hf_id": "haoranxu/Llama-3-Instruct-8B-SimPO", + "name": "Llama-3-Instruct-8B-SimPO", + "params_b": 8.03, + "ifeval": 0.7347449212533854, + "bbh": 0.49792360151415016, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.37333776595744683, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.35660416666666667, + "hf_avg": 24.990729234610715 + }, + { + "hf_id": "hon9kon9ize/CantoneseLLMChat-v0.5", + "name": "CantoneseLLMChat-v0.5", + "params_b": 6.069, + "ifeval": 0.3230849701015528, + "bbh": 0.43452388803059244, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.2504155585106383, + "hf_math_lvl5": 0.04154078549848943, + "hf_musr": 0.4706458333333334, + "hf_avg": 15.959800818271107 + }, + { + "hf_id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", + "name": "CantoneseLLMChat-v1.0-7B", + "params_b": 7.616, + "ifeval": 0.44548353923146145, + "bbh": 0.4865734655539633, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.3784906914893617, + "hf_math_lvl5": 0.2107250755287009, + "hf_musr": 0.3882916666666667, + "hf_avg": 23.503869934977185 + }, + { + "hf_id": "hotmailuser/Deepseek-qwen-modelstock-2B", + "name": "Deepseek-qwen-modelstock-2B", + "params_b": 1.777, + "ifeval": 0.21487431127186973, + "bbh": 0.3549242330959277, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.19107380319148937, + "hf_math_lvl5": 0.33987915407854985, + "hf_musr": 0.34745833333333337, + "hf_avg": 13.734549593045678 + }, + { + "hf_id": "hotmailuser/Falcon3Slerp1-10B", + "name": "Falcon3Slerp1-10B", + "params_b": 10.306, + "ifeval": 0.5694069513335727, + "bbh": 0.616984966186231, + "gpqa": 0.34395973154362414, + "mmlu_pro": 0.4401595744680851, + "hf_math_lvl5": 0.2598187311178248, + "hf_musr": 0.43176041666666665, + "hf_avg": 31.776680017784997 + }, + { + "hf_id": "hotmailuser/Falcon3Slerp2-10B", + "name": "Falcon3Slerp2-10B", + "params_b": 10.306, + "ifeval": 0.6117966994241945, + "bbh": 0.6164263500746402, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.4369182180851064, + "hf_math_lvl5": 0.23187311178247735, + "hf_musr": 0.4095625, + "hf_avg": 31.308605732994934 + }, + { + "hf_id": "hotmailuser/Falcon3Slerp4-10B", + "name": "Falcon3Slerp4-10B", + "params_b": 10.306, + "ifeval": 0.6072254950198805, + "bbh": 0.611433776236228, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.4387466755319149, + "hf_math_lvl5": 0.22885196374622357, + "hf_musr": 0.40175, + "hf_avg": 30.717492522295817 + }, + { + "hf_id": "hotmailuser/FalconSlerp-3B", + "name": "FalconSlerp-3B", + "params_b": 3.228, + "ifeval": 0.5694568190179834, + "bbh": 0.46239111387485293, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.29679188829787234, + "hf_math_lvl5": 0.17598187311178248, + "hf_musr": 0.3989270833333333, + "hf_avg": 22.47273640583823 + }, + { + "hf_id": "hotmailuser/FalconSlerp1-7B", + "name": "FalconSlerp1-7B", + "params_b": 7.456, + "ifeval": 0.5394564200765082, + "bbh": 0.5354677787663963, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.4128989361702128, + "hf_math_lvl5": 0.23791540785498488, + "hf_musr": 0.44525, + "hf_avg": 28.681197911346505 + }, + { + "hf_id": "hotmailuser/FalconSlerp2-7B", + "name": "FalconSlerp2-7B", + "params_b": 7.456, + "ifeval": 0.6160432097944565, + "bbh": 0.5537805428914538, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.4140625, + "hf_math_lvl5": 0.2983383685800604, + "hf_musr": 0.44788541666666665, + "hf_avg": 31.286856255059003 + }, + { + "hf_id": "hotmailuser/FalconSlerp3-10B", + "name": "FalconSlerp3-10B", + "params_b": 10.306, + "ifeval": 0.6001564737119731, + "bbh": 0.6060288025434474, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.4323470744680851, + "hf_math_lvl5": 0.22734138972809667, + "hf_musr": 0.4030833333333333, + "hf_avg": 30.415115815404064 + }, + { + "hf_id": "hotmailuser/FalconSlerp3-7B", + "name": "FalconSlerp3-7B", + "params_b": 7.456, + "ifeval": 0.6096235765546527, + "bbh": 0.5532966528909408, + "gpqa": 0.3187919463087248, + "mmlu_pro": 0.41273271276595747, + "hf_math_lvl5": 0.3157099697885196, + "hf_musr": 0.45067708333333334, + "hf_avg": 31.531501918893497 + }, + { + "hf_id": "hotmailuser/FalconSlerp6-7B", + "name": "FalconSlerp6-7B", + "params_b": 7.456, + "ifeval": 0.6026542906155667, + "bbh": 0.5383801786207648, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.39951795212765956, + "hf_math_lvl5": 0.20468277945619334, + "hf_musr": 0.44921875, + "hf_avg": 28.79520632630417 + }, + { + "hf_id": "hotmailuser/Gemma2Crono-27B", + "name": "Gemma2Crono-27B", + "params_b": 27.227, + "ifeval": 0.7086164709637096, + "bbh": 0.6505341690680219, + "gpqa": 0.37080536912751677, + "mmlu_pro": 0.4632646276595745, + "hf_math_lvl5": 0.24244712990936557, + "hf_musr": 0.45668749999999997, + "hf_avg": 36.28874920037128 + }, + { + "hf_id": "hotmailuser/Gemma2atlas-27B", + "name": "Gemma2atlas-27B", + "params_b": 27.227, + "ifeval": 0.7213560020744957, + "bbh": 0.6544960921220462, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.4749833776595745, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.44453125000000004, + "hf_avg": 35.809591577124344 + }, + { + "hf_id": "hotmailuser/Llama-Hermes-slerp-8B", + "name": "Llama-Hermes-slerp-8B", + "params_b": 8.03, + "ifeval": 0.3390470617960345, + "bbh": 0.5310290010444968, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.33311170212765956, + "hf_math_lvl5": 0.08006042296072508, + "hf_musr": 0.4077916666666667, + "hf_avg": 19.59642323585999 + }, + { + "hf_id": "hotmailuser/Llama-Hermes-slerp2-8B", + "name": "Llama-Hermes-slerp2-8B", + "params_b": 8.03, + "ifeval": 0.3728440537773109, + "bbh": 0.5265283171967207, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.33793218085106386, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.42481250000000004, + "hf_avg": 20.611252287971684 + }, + { + "hf_id": "hotmailuser/LlamaStock-8B", + "name": "LlamaStock-8B", + "params_b": 8.03, + "ifeval": 0.4249513513034304, + "bbh": 0.5328942883826541, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.3806515957446808, + "hf_math_lvl5": 0.16993957703927492, + "hf_musr": 0.41293749999999996, + "hf_avg": 24.411960823539555 + }, + { + "hf_id": "hotmailuser/Mistral-modelstock-24B", + "name": "Mistral-modelstock-24B", + "params_b": 23.572, + "ifeval": 0.3424192254329623, + "bbh": 0.645229041403176, + "gpqa": 0.41023489932885904, + "mmlu_pro": 0.5069813829787234, + "hf_math_lvl5": 0.13066465256797583, + "hf_musr": 0.4590416666666666, + "hf_avg": 29.725473787931804 + }, + { + "hf_id": "hotmailuser/Mistral-modelstock2-24B", + "name": "Mistral-modelstock2-24B", + "params_b": 23.572, + "ifeval": 0.43184528163051816, + "bbh": 0.6689381929188762, + "gpqa": 0.3926174496644295, + "mmlu_pro": 0.5318317819148937, + "hf_math_lvl5": 0.24018126888217523, + "hf_musr": 0.46161458333333333, + "hf_avg": 33.81477606165948 + }, + { + "hf_id": "hotmailuser/Qwen2.5-HomerSlerp-7B", + "name": "Qwen2.5-HomerSlerp-7B", + "params_b": 7.616, + "ifeval": 0.44878145542715553, + "bbh": 0.5632506117591088, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.4548703457446808, + "hf_math_lvl5": 0.33157099697885195, + "hf_musr": 0.4383333333333333, + "hf_avg": 29.43258692955959 + }, + { + "hf_id": "hotmailuser/QwenModelStock-1.8B", + "name": "QwenModelStock-1.8B", + "params_b": 1.777, + "ifeval": 0.3263075306852484, + "bbh": 0.41881762650909504, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.2958776595744681, + "hf_math_lvl5": 0.09894259818731117, + "hf_musr": 0.4359166666666667, + "hf_avg": 16.665220644656603 + }, + { + "hf_id": "hotmailuser/QwenSlerp-14B", + "name": "QwenSlerp-14B", + "params_b": 14.766, + "ifeval": 0.7024716640735471, + "bbh": 0.6491286917834284, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.5399767287234043, + "hf_math_lvl5": 0.38368580060422963, + "hf_musr": 0.4634479166666667, + "hf_avg": 40.34991665759889 + }, + { + "hf_id": "hotmailuser/QwenSlerp-3B", + "name": "QwenSlerp-3B", + "params_b": 3.397, + "ifeval": 0.4333690164319561, + "bbh": 0.4892345530653528, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3693484042553192, + "hf_math_lvl5": 0.27492447129909364, + "hf_musr": 0.43166666666666664, + "hf_avg": 24.511060317673625 + }, + { + "hf_id": "hotmailuser/QwenSlerp-7B", + "name": "QwenSlerp-7B", + "params_b": 7.616, + "ifeval": 0.4672912317096415, + "bbh": 0.5636352508232924, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.45088098404255317, + "hf_math_lvl5": 0.34441087613293053, + "hf_musr": 0.4409375, + "hf_avg": 30.086749997612042 + }, + { + "hf_id": "hotmailuser/QwenSlerp2-14B", + "name": "QwenSlerp2-14B", + "params_b": 14.766, + "ifeval": 0.7036707048409332, + "bbh": 0.6492799322983842, + "gpqa": 0.3808724832214765, + "mmlu_pro": 0.5378989361702128, + "hf_math_lvl5": 0.39652567975830816, + "hf_musr": 0.48065625, + "hf_avg": 40.85962754388034 + }, + { + "hf_id": "hotmailuser/QwenSlerp2-3B", + "name": "QwenSlerp2-3B", + "params_b": 3.397, + "ifeval": 0.4280486885907171, + "bbh": 0.4801760257099328, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3741688829787234, + "hf_math_lvl5": 0.26057401812688824, + "hf_musr": 0.4251875, + "hf_avg": 24.03431047768953 + }, + { + "hf_id": "hotmailuser/QwenSlerp3-14B", + "name": "QwenSlerp3-14B", + "params_b": 14.766, + "ifeval": 0.6632291209546226, + "bbh": 0.6266526215170748, + "gpqa": 0.36661073825503354, + "mmlu_pro": 0.5262632978723404, + "hf_math_lvl5": 0.43051359516616317, + "hf_musr": 0.48078125, + "hf_avg": 39.791587426587995 + }, + { + "hf_id": "hotmailuser/QwenSparse-7B", + "name": "QwenSparse-7B", + "params_b": 7.616, + "ifeval": 0.10858632871891026, + "bbh": 0.28956619468137906, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11220079787234043, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.35622916666666665, + "hf_avg": 3.4771858660491386 + }, + { + "hf_id": "hotmailuser/QwenStock-0.5B", + "name": "QwenStock-0.5B", + "params_b": 0.63, + "ifeval": 0.20490742341431845, + "bbh": 0.2911778102988436, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11668882978723404, + "hf_musr": 0.35753125, + "hf_avg": 5.093186530380667 + }, + { + "hf_id": "hotmailuser/QwenStock-1.7B", + "name": "QwenStock-1.7B", + "params_b": 1.777, + "ifeval": 0.32141163224688274, + "bbh": 0.4187550547805281, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.2954621010638298, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.44121875, + "hf_avg": 16.75925374781586 + }, + { + "hf_id": "hotmailuser/QwenStock1-14B", + "name": "QwenStock1-14B", + "params_b": 14.766, + "ifeval": 0.6693240601603745, + "bbh": 0.6502248812491821, + "gpqa": 0.3859060402684564, + "mmlu_pro": 0.5416389627659575, + "hf_math_lvl5": 0.37009063444108764, + "hf_musr": 0.47811458333333334, + "hf_avg": 39.99889430195756 + }, + { + "hf_id": "hotmailuser/RombosBeagle-v2beta-MGS-32B", + "name": "RombosBeagle-v2beta-MGS-32B", + "params_b": 32.764, + "ifeval": 0.5156761836371937, + "bbh": 0.7037350002757341, + "gpqa": 0.3800335570469799, + "mmlu_pro": 0.5907579787234043, + "hf_math_lvl5": 0.49924471299093653, + "hf_musr": 0.5020833333333333, + "hf_avg": 42.65614620168921 + }, + { + "hf_id": "huggyllama/llama-13b", + "name": "llama-13b", + "params_b": 13.016, + "ifeval": 0.24105262924595627, + "bbh": 0.39878925581174585, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.19522938829787234, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.34621875, + "hf_avg": 9.39218439885523, + "arena_elo": 972.002, + "arena_rank": 315, + "arena_votes": 2391 + }, + { + "hf_id": "huggyllama/llama-65b", + "name": "llama-65b", + "params_b": 65.286, + "ifeval": 0.25259311958935626, + "bbh": 0.4702556052882764, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.3077626329787234, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.35945833333333327, + "hf_avg": 13.688031554930518 + }, + { + "hf_id": "huggyllama/llama-7b", + "name": "llama-7b", + "params_b": 6.738, + "ifeval": 0.25009530268576263, + "bbh": 0.32773134782898566, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.13131648936170212, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.33539583333333334, + "hf_avg": 6.4149999259207915 + }, + { + "hf_id": "huihui-ai/QwQ-32B-Coder-Fusion-7030", + "name": "QwQ-32B-Coder-Fusion-7030", + "params_b": 32.764, + "ifeval": 0.38650779930584184, + "bbh": 0.6177864730931621, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.4367519946808511, + "hf_math_lvl5": 0.2794561933534743, + "hf_musr": 0.39222916666666663, + "hf_avg": 26.719670343952135 + }, + { + "hf_id": "huihui-ai/QwQ-32B-Coder-Fusion-8020", + "name": "QwQ-32B-Coder-Fusion-8020", + "params_b": 32.764, + "ifeval": 0.6020547702318737, + "bbh": 0.6664531829718748, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.5367353723404256, + "hf_math_lvl5": 0.459214501510574, + "hf_musr": 0.42934374999999997, + "hf_avg": 38.81040369162885 + }, + { + "hf_id": "huihui-ai/QwQ-32B-Coder-Fusion-9010", + "name": "QwQ-32B-Coder-Fusion-9010", + "params_b": 32.764, + "ifeval": 0.5778246164620984, + "bbh": 0.6727405551499568, + "gpqa": 0.3615771812080537, + "mmlu_pro": 0.5600066489361702, + "hf_math_lvl5": 0.5317220543806647, + "hf_musr": 0.4681979166666667, + "hf_avg": 41.581938708070844 + }, + { + "hf_id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", + "name": "Qwen2.5-14B-Instruct-abliterated-v2", + "params_b": 14.77, + "ifeval": 0.8327637335602867, + "bbh": 0.6323822447052897, + "gpqa": 0.3338926174496644, + "mmlu_pro": 0.49617686170212766, + "hf_math_lvl5": 0.5302114803625377, + "hf_musr": 0.42196875, + "hf_avg": 41.74807839195605 + }, + { + "hf_id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", + "name": "Qwen2.5-72B-Instruct-abliterated", + "params_b": 72.706, + "ifeval": 0.8592667455684251, + "bbh": 0.7189881596250237, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5536901595744681, + "hf_math_lvl5": 0.6012084592145015, + "hf_musr": 0.4232708333333333, + "hf_avg": 48.10647092442315 + }, + { + "hf_id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", + "name": "Qwen2.5-7B-Instruct-abliterated", + "params_b": 7.616, + "ifeval": 0.7546033413564897, + "bbh": 0.5261589972829911, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.41796875, + "hf_math_lvl5": 0.45770392749244715, + "hf_musr": 0.39666666666666667, + "hf_avg": 34.27590445973728 + }, + { + "hf_id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", + "name": "Qwen2.5-7B-Instruct-abliterated-v2", + "params_b": 7.616, + "ifeval": 0.7606484128778308, + "bbh": 0.5376688442794247, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.42079454787234044, + "hf_math_lvl5": 0.4637462235649547, + "hf_musr": 0.3980625, + "hf_avg": 34.729008711870584 + }, + { + "hf_id": "iFaz/llama32_1B_en_emo_v1", + "name": "llama32_1B_en_emo_v1", + "params_b": 0.765, + "ifeval": 0.44083808738591385, + "bbh": 0.33802631394113886, + "gpqa": 0.25, + "mmlu_pro": 0.17611369680851063, + "hf_math_lvl5": 0.0377643504531722, + "hf_musr": 0.34888541666666667, + "hf_avg": 10.934841101081998 + }, + { + "hf_id": "iFaz/llama32_3B_en_emo_1000_stp", + "name": "llama32_3B_en_emo_1000_stp", + "params_b": 1.848, + "ifeval": 0.7295243287809678, + "bbh": 0.45218477635502685, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.3123337765957447, + "hf_math_lvl5": 0.14652567975830816, + "hf_musr": 0.3620625, + "hf_avg": 23.648597853327 + }, + { + "hf_id": "iFaz/llama32_3B_en_emo_2000_stp", + "name": "llama32_3B_en_emo_2000_stp", + "params_b": 1.848, + "ifeval": 0.7368681764385165, + "bbh": 0.45345889848516396, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.3097573138297872, + "hf_math_lvl5": 0.15332326283987915, + "hf_musr": 0.35269791666666667, + "hf_avg": 23.911624360048616 + }, + { + "hf_id": "iFaz/llama32_3B_en_emo_300_stp", + "name": "llama32_3B_en_emo_300_stp", + "params_b": 1.848, + "ifeval": 0.725552644760347, + "bbh": 0.45045681689917494, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.3148271276595745, + "hf_math_lvl5": 0.16012084592145015, + "hf_musr": 0.3620625, + "hf_avg": 23.75197000352155 + }, + { + "hf_id": "iFaz/llama32_3B_en_emo_5000_stp", + "name": "llama32_3B_en_emo_5000_stp", + "params_b": 1.848, + "ifeval": 0.7100404703963262, + "bbh": 0.4567949942342784, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.30668218085106386, + "hf_math_lvl5": 0.12915407854984895, + "hf_musr": 0.34460416666666666, + "hf_avg": 23.22281886664426 + }, + { + "hf_id": "iFaz/llama32_3B_en_emo_v2", + "name": "llama32_3B_en_emo_v2", + "params_b": 1.848, + "ifeval": 0.5454017562290279, + "bbh": 0.4283518305582969, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.3003656914893617, + "hf_math_lvl5": 0.10876132930513595, + "hf_musr": 0.34822916666666665, + "hf_avg": 19.303216399935796 + }, + { + "hf_id": "iFaz/llama32_3B_en_emo_v3", + "name": "llama32_3B_en_emo_v3", + "params_b": 1.848, + "ifeval": 0.5759263199421978, + "bbh": 0.43013596402782367, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.27102726063829785, + "hf_math_lvl5": 0.06797583081570997, + "hf_musr": 0.35527083333333337, + "hf_avg": 18.26359573830477 + }, + { + "hf_id": "iRyanBell/ARC1", + "name": "ARC1", + "params_b": 8.03, + "ifeval": 0.441112913735555, + "bbh": 0.4902999658144703, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3371010638297872, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.3990520833333333, + "hf_avg": 19.66167494214361 + }, + { + "hf_id": "iRyanBell/ARC1-II", + "name": "ARC1-II", + "params_b": 8.03, + "ifeval": 0.17083560508340093, + "bbh": 0.33817781029884353, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.1685505319148936, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.4912916666666667, + "hf_avg": 9.559430625313345 + }, + { + "hf_id": "ibivibiv/colossus_120b", + "name": "colossus_120b", + "params_b": 117.749, + "ifeval": 0.42759877126025614, + "bbh": 0.6061408586494191, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.3961103723404255, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.4733125, + "hf_avg": 25.415203305397444 + }, + { + "hf_id": "ibivibiv/multimaster-7b-v6", + "name": "multimaster-7b-v6", + "params_b": 35.428, + "ifeval": 0.4473075883101283, + "bbh": 0.519351871026721, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.30950797872340424, + "hf_math_lvl5": 0.055891238670694864, + "hf_musr": 0.43957291666666665, + "hf_avg": 21.089768673380153 + }, + { + "hf_id": "ibm/PowerLM-3b", + "name": "PowerLM-3b", + "params_b": 3.512, + "ifeval": 0.33212764354135915, + "bbh": 0.3679456724439114, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.20162898936170212, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.3562916666666667, + "hf_avg": 11.524079220212199 + }, + { + "hf_id": "ibm/merlinite-7b", + "name": "merlinite-7b", + "params_b": 7.242, + "ifeval": 0.2498703440205322, + "bbh": 0.50071326118705, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3068484042553192, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.44115624999999997, + "hf_avg": 16.751033885892248 + }, + { + "hf_id": "ibm-granite/granite-3.0-1b-a400m-base", + "name": "granite-3.0-1b-a400m-base", + "params_b": 1.335, + "ifeval": 0.24040324117785256, + "bbh": 0.3221205531032148, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.11519281914893617, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.3367291666666667, + "hf_avg": 6.0307899730101875 + }, + { + "hf_id": "ibm-granite/granite-3.0-1b-a400m-instruct", + "name": "granite-3.0-1b-a400m-instruct", + "params_b": 1.335, + "ifeval": 0.33315159332792543, + "bbh": 0.3223950988485842, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.12441821808510638, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.36228124999999994, + "hf_avg": 8.0692284950461 + }, + { + "hf_id": "ibm-granite/granite-3.0-2b-base", + "name": "granite-3.0-2b-base", + "params_b": 2.634, + "ifeval": 0.3873821460391761, + "bbh": 0.40474805593806223, + "gpqa": 0.28020134228187926, + "mmlu_pro": 0.23811502659574468, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.3434270833333333, + "hf_avg": 14.095784874796749 + }, + { + "hf_id": "ibm-granite/granite-3.0-2b-instruct", + "name": "granite-3.0-2b-instruct", + "params_b": 2.634, + "ifeval": 0.513977357854936, + "bbh": 0.44119772062630297, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.2814162234042553, + "hf_math_lvl5": 0.09214501510574018, + "hf_musr": 0.35148958333333336, + "hf_avg": 18.396095114284222, + "arena_elo": 1155.93, + "arena_rank": 272, + "arena_votes": 6837 + }, + { + "hf_id": "ibm-granite/granite-3.0-3b-a800m-base", + "name": "granite-3.0-3b-a800m-base", + "params_b": 3.374, + "ifeval": 0.2732261510569733, + "bbh": 0.36674974971308566, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.18907912234042554, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.34196875, + "hf_avg": 9.489841451458394 + }, + { + "hf_id": "ibm-granite/granite-3.0-3b-a800m-instruct", + "name": "granite-3.0-3b-a800m-instruct", + "params_b": 3.374, + "ifeval": 0.4298217618142085, + "bbh": 0.37527805291733446, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.21517619680851063, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.3486666666666667, + "hf_avg": 13.698124325974995 + }, + { + "hf_id": "ibm-granite/granite-3.0-8b-base", + "name": "granite-3.0-8b-base", + "params_b": 8.171, + "ifeval": 0.4583482936386566, + "bbh": 0.4943760637365333, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.3312832446808511, + "hf_math_lvl5": 0.10120845921450151, + "hf_musr": 0.40813541666666664, + "hf_avg": 21.690924398799496 + }, + { + "hf_id": "ibm-granite/granite-3.0-8b-instruct", + "name": "granite-3.0-8b-instruct", + "params_b": 8.171, + "ifeval": 0.5309633993359841, + "bbh": 0.5191874631840226, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.34566156914893614, + "hf_math_lvl5": 0.1419939577039275, + "hf_musr": 0.3900625, + "hf_avg": 24.027678753483297, + "arena_elo": 1181.92, + "arena_rank": 258, + "arena_votes": 6643 + }, + { + "hf_id": "ibm-granite/granite-3.1-1b-a400m-base", + "name": "granite-3.1-1b-a400m-base", + "params_b": 1.335, + "ifeval": 0.2519437315212525, + "bbh": 0.3298699546506724, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.11394614361702128, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.3500625, + "hf_avg": 6.312391508281799 + }, + { + "hf_id": "ibm-granite/granite-3.1-1b-a400m-instruct", + "name": "granite-3.1-1b-a400m-instruct", + "params_b": 1.335, + "ifeval": 0.46863987553025976, + "bbh": 0.3279834385375178, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.12167553191489362, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.33025, + "hf_avg": 10.127255876383268 + }, + { + "hf_id": "ibm-granite/granite-3.1-2b-base", + "name": "granite-3.1-2b-base", + "params_b": 2.534, + "ifeval": 0.35216115462528313, + "bbh": 0.4047188028918873, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.22506648936170212, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.3485729166666667, + "hf_avg": 13.202826259598206 + }, + { + "hf_id": "ibm-granite/granite-3.1-2b-instruct", + "name": "granite-3.1-2b-instruct", + "params_b": 2.534, + "ifeval": 0.628557782240012, + "bbh": 0.44089858558056544, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.28191489361702127, + "hf_math_lvl5": 0.15256797583081572, + "hf_musr": 0.3605416666666667, + "hf_avg": 21.712212822028288, + "arena_elo": 1179.3, + "arena_rank": 261, + "arena_votes": 3191 + }, + { + "hf_id": "ibm-granite/granite-3.1-3b-a800m-base", + "name": "granite-3.1-3b-a800m-base", + "params_b": 3.299, + "ifeval": 0.2996294276962903, + "bbh": 0.362822992347764, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.1792719414893617, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.3275208333333333, + "hf_avg": 10.00105158723934 + }, + { + "hf_id": "ibm-granite/granite-3.1-3b-a800m-instruct", + "name": "granite-3.1-3b-a800m-instruct", + "params_b": 3.299, + "ifeval": 0.5516462984880118, + "bbh": 0.4009494521947192, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.21476063829787234, + "hf_math_lvl5": 0.11404833836858005, + "hf_musr": 0.3486354166666667, + "hf_avg": 17.277676062054883 + }, + { + "hf_id": "ibm-granite/granite-3.1-8b-base", + "name": "granite-3.1-8b-base", + "params_b": 8.171, + "ifeval": 0.4221033524381973, + "bbh": 0.4776956677111636, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3232214095744681, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.3922291666666667, + "hf_avg": 20.05719991900457 + }, + { + "hf_id": "ibm-granite/granite-3.1-8b-instruct", + "name": "granite-3.1-8b-instruct", + "params_b": 8.171, + "ifeval": 0.7207564816908026, + "bbh": 0.5364460433816018, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.3537234042553192, + "hf_math_lvl5": 0.21978851963746224, + "hf_musr": 0.47070833333333334, + "hf_avg": 30.6030430081627, + "arena_elo": 1208.56, + "arena_rank": 244, + "arena_votes": 3092 + }, + { + "hf_id": "ibm-granite/granite-3.2-2b-instruct", + "name": "granite-3.2-2b-instruct", + "params_b": 2.534, + "ifeval": 0.6151688630611223, + "bbh": 0.43872707491212865, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.2783410904255319, + "hf_math_lvl5": 0.14425981873111782, + "hf_musr": 0.3645729166666667, + "hf_avg": 21.25014812377563 + }, + { + "hf_id": "ibm-granite/granite-3.2-8b-instruct", + "name": "granite-3.2-8b-instruct", + "params_b": 8.171, + "ifeval": 0.7274509412802475, + "bbh": 0.5401759656246116, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.35123005319148937, + "hf_math_lvl5": 0.23791540785498488, + "hf_musr": 0.4561979166666667, + "hf_avg": 30.7704488980163 + }, + { + "hf_id": "ibm-granite/granite-7b-base", + "name": "granite-7b-base", + "params_b": 6.738, + "ifeval": 0.24142719096441884, + "bbh": 0.34804372716106186, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.18342752659574468, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.35548958333333336, + "hf_avg": 7.908701929835419 + }, + { + "hf_id": "ibm-granite/granite-7b-instruct", + "name": "granite-7b-instruct", + "params_b": 6.738, + "ifeval": 0.2972313461615181, + "bbh": 0.37229529603269523, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.2286402925531915, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.40199999999999997, + "hf_avg": 12.03495955436329 + }, + { + "hf_id": "icefog72/Ice0.15-02.10-RP", + "name": "Ice0.15-02.10-RP", + "params_b": 7.242, + "ifeval": 0.5343355629729118, + "bbh": 0.4976384736188401, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.30659906914893614, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.43197916666666664, + "hf_avg": 21.49132746100335 + }, + { + "hf_id": "icefog72/Ice0.16-02.10-RP", + "name": "Ice0.16-02.10-RP", + "params_b": 7.242, + "ifeval": 0.5069083365470286, + "bbh": 0.4945564313654156, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3067652925531915, + "hf_math_lvl5": 0.05891238670694864, + "hf_musr": 0.433375, + "hf_avg": 21.076418616812266 + }, + { + "hf_id": "icefog72/Ice0.17-03.10-RP", + "name": "Ice0.17-03.10-RP", + "params_b": 7.242, + "ifeval": 0.5123538876846767, + "bbh": 0.5006815748225494, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.30851063829787234, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.433375, + "hf_avg": 21.41440427176671 + }, + { + "hf_id": "icefog72/Ice0.31-08.11-RP", + "name": "Ice0.31-08.11-RP", + "params_b": 7.242, + "ifeval": 0.5145768782386291, + "bbh": 0.5032134100285419, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3130817819148936, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.42766666666666664, + "hf_avg": 21.886899252180523 + }, + { + "hf_id": "icefog72/Ice0.32-10.11-RP", + "name": "Ice0.32-10.11-RP", + "params_b": 7.242, + "ifeval": 0.49154576523623983, + "bbh": 0.5047695597611622, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.3100066489361702, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.4382083333333333, + "hf_avg": 21.63483115225961 + }, + { + "hf_id": "icefog72/Ice0.34b-14.11-RP", + "name": "Ice0.34b-14.11-RP", + "params_b": 7.242, + "ifeval": 0.47620868185303883, + "bbh": 0.5067195329696937, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.3125, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.4419895833333333, + "hf_avg": 21.681833980913552 + }, + { + "hf_id": "icefog72/Ice0.34n-14.11-RP", + "name": "Ice0.34n-14.11-RP", + "params_b": 7.242, + "ifeval": 0.47865663107222167, + "bbh": 0.5091090160356474, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.31241688829787234, + "hf_math_lvl5": 0.07250755287009064, + "hf_musr": 0.4379583333333333, + "hf_avg": 21.87840965107064 + }, + { + "hf_id": "icefog72/Ice0.37-18.11-RP", + "name": "Ice0.37-18.11-RP", + "params_b": 7.242, + "ifeval": 0.4972162750391184, + "bbh": 0.5084310833712639, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.3143284574468085, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.43392708333333335, + "hf_avg": 21.913941249727642 + }, + { + "hf_id": "icefog72/Ice0.40-20.11-RP", + "name": "Ice0.40-20.11-RP", + "params_b": 7.242, + "ifeval": 0.4762585495374495, + "bbh": 0.509308586549064, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.30992353723404253, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.44459374999999995, + "hf_avg": 21.79272576511275 + }, + { + "hf_id": "icefog72/Ice0.41-22.11-RP", + "name": "Ice0.41-22.11-RP", + "params_b": 7.242, + "ifeval": 0.4620451513096362, + "bbh": 0.4723318624775949, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.26180186170212766, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.45597916666666666, + "hf_avg": 19.03517499742875 + }, + { + "hf_id": "icefog72/Ice0.68-25.01-RP", + "name": "Ice0.68-25.01-RP", + "params_b": 7.242, + "ifeval": 0.5513714721383707, + "bbh": 0.5130058094823416, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3011968085106383, + "hf_math_lvl5": 0.07250755287009064, + "hf_musr": 0.44456249999999997, + "hf_avg": 23.242050794562672 + }, + { + "hf_id": "icefog72/Ice0.69-25.01-RP", + "name": "Ice0.69-25.01-RP", + "params_b": 7.242, + "ifeval": 0.5437527981311808, + "bbh": 0.5097683665599672, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.29654255319148937, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.4485625, + "hf_avg": 22.81762296294448 + }, + { + "hf_id": "icefog72/Ice0.70-25.01-RP", + "name": "Ice0.70-25.01-RP", + "params_b": 7.242, + "ifeval": 0.549797869652522, + "bbh": 0.513632436415875, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.2996176861702128, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.45119791666666664, + "hf_avg": 23.100001948439864 + }, + { + "hf_id": "icefog72/Ice0.77-02.02-RP", + "name": "Ice0.77-02.02-RP", + "params_b": 7.242, + "ifeval": 0.5309633993359841, + "bbh": 0.5109257300160749, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.29986702127659576, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.4765, + "hf_avg": 22.480867406637245 + }, + { + "hf_id": "icefog72/IceCocoaRP-7b", + "name": "IceCocoaRP-7b", + "params_b": 7.242, + "ifeval": 0.4962421929369628, + "bbh": 0.4937902147076245, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.3098404255319149, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.4197916666666666, + "hf_avg": 20.9218542806483 + }, + { + "hf_id": "icefog72/IceCoffeeRP-7b", + "name": "IceCoffeeRP-7b", + "params_b": 7.242, + "ifeval": 0.4959174989029109, + "bbh": 0.48887216244327214, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.2974567819148936, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.4159791666666666, + "hf_avg": 20.34382476557608 + }, + { + "hf_id": "icefog72/IceDrinkByFrankensteinV3RP", + "name": "IceDrinkByFrankensteinV3RP", + "params_b": 7.242, + "ifeval": 0.4974911013887596, + "bbh": 0.4832523723413275, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.292719414893617, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.4253125, + "hf_avg": 19.805345696389196 + }, + { + "hf_id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", + "name": "IceDrinkNameGoesHereRP-7b-Model_Stock", + "params_b": 7.242, + "ifeval": 0.49684171332065585, + "bbh": 0.46578646938927254, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.2816655585106383, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.4067395833333334, + "hf_avg": 18.6570669982101 + }, + { + "hf_id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", + "name": "IceDrinkNameNotFoundRP-7b-Model_Stock", + "params_b": 7.242, + "ifeval": 0.5130032757527804, + "bbh": 0.502625425089929, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.3064328457446808, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.4371875, + "hf_avg": 21.38126177033122 + }, + { + "hf_id": "icefog72/IceDrunkCherryRP-7b", + "name": "IceDrunkCherryRP-7b", + "params_b": 7.242, + "ifeval": 0.48982255969715904, + "bbh": 0.4846629039263151, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.3009474734042553, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.4291875, + "hf_avg": 20.271642861184226 + }, + { + "hf_id": "icefog72/IceDrunkenCherryRP-7b", + "name": "IceDrunkenCherryRP-7b", + "params_b": 7.242, + "ifeval": 0.4762585495374495, + "bbh": 0.509308586549064, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.30992353723404253, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.44459374999999995, + "hf_avg": 21.79272576511275 + }, + { + "hf_id": "icefog72/IceLemonTeaRP-32k-7b", + "name": "IceLemonTeaRP-32k-7b", + "params_b": 7.242, + "ifeval": 0.5212214701436633, + "bbh": 0.49973852418379305, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3067652925531915, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.42903125, + "hf_avg": 21.372435824981753 + }, + { + "hf_id": "icefog72/IceMartiniRP-7b", + "name": "IceMartiniRP-7b", + "params_b": 7.242, + "ifeval": 0.5044603873278457, + "bbh": 0.4972421837639585, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3073470744680851, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.4344895833333333, + "hf_avg": 21.146002703846154 + }, + { + "hf_id": "icefog72/IceNalyvkaRP-7b", + "name": "IceNalyvkaRP-7b", + "params_b": 7.242, + "ifeval": 0.549797869652522, + "bbh": 0.513632436415875, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.2996176861702128, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.45119791666666664, + "hf_avg": 23.100001948439864 + }, + { + "hf_id": "icefog72/IceSakeRP-7b", + "name": "IceSakeRP-7b", + "params_b": 7.242, + "ifeval": 0.5227950726295119, + "bbh": 0.5119287057484642, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3176529255319149, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.41300000000000003, + "hf_avg": 21.56363636347618 + }, + { + "hf_id": "icefog72/IceSakeV8RP-7b", + "name": "IceSakeV8RP-7b", + "params_b": 7.242, + "ifeval": 0.6085741388404988, + "bbh": 0.48847141337960176, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.301030585106383, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.3992708333333333, + "hf_avg": 21.689486698109572 + }, + { + "hf_id": "ifable/gemma-2-Ifable-9B", + "name": "gemma-2-Ifable-9B", + "params_b": 9.242, + "ifeval": 0.2984292787581395, + "bbh": 0.5866115556693244, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.4226230053191489, + "hf_math_lvl5": 0.13972809667673716, + "hf_musr": 0.40525000000000005, + "hf_avg": 23.56844886390901 + }, + { + "hf_id": "ilsp/Llama-Krikri-8B-Instruct", + "name": "Llama-Krikri-8B-Instruct", + "params_b": 8.202, + "ifeval": 0.6078748830879843, + "bbh": 0.504664191645287, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3312832446808511, + "hf_math_lvl5": 0.11782477341389729, + "hf_musr": 0.4079791666666666, + "hf_avg": 24.18078391296001 + }, + { + "hf_id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", + "name": "Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", + "params_b": 1.933, + "ifeval": 0.45884807865352817, + "bbh": 0.4146016381618061, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.2960438829787234, + "hf_math_lvl5": 0.10498489425981873, + "hf_musr": 0.349875, + "hf_avg": 17.182396518039905 + }, + { + "hf_id": "instruction-pretrain/InstructLM-500M", + "name": "InstructLM-500M", + "params_b": 0.5, + "ifeval": 0.1027662158627996, + "bbh": 0.29408717872529677, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.1141123670212766, + "hf_musr": 0.3528229166666667, + "hf_avg": 2.8543503197666724 + }, + { + "hf_id": "internlm/internlm2-1_8b", + "name": "internlm2-1_8b", + "params_b": 8, + "ifeval": 0.2197702097102355, + "bbh": 0.3879732800028095, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.15882646276595744, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.38128125, + "hf_avg": 8.748129853272754 + }, + { + "hf_id": "internlm/internlm2-7b", + "name": "internlm2-7b", + "ifeval": 0.22803680981595092, + "bbh": 0.5825, + "gpqa": 0.33666666666666667, + "mmlu_pro": 0.19, + "hf_math_lvl5": 0.08571428571428572, + "hf_musr": 0.43999999999999995, + "hf_avg": 17.92336611649886 + }, + { + "hf_id": "internlm/internlm2-chat-1_8b", + "name": "internlm2-chat-1_8b", + "params_b": 1.889, + "ifeval": 0.2386545477111841, + "bbh": 0.4452271664119214, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.18392619680851063, + "hf_math_lvl5": 0.0324773413897281, + "hf_musr": 0.36305208333333333, + "hf_avg": 10.641800452239107 + }, + { + "hf_id": "internlm/internlm2_5-1_8b-chat", + "name": "internlm2_5-1_8b-chat", + "params_b": 1.89, + "ifeval": 0.38490870889240547, + "bbh": 0.4488926786996439, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.12990359042553193, + "hf_math_lvl5": 0.15861027190332327, + "hf_musr": 0.35939583333333336, + "hf_avg": 14.749842142996677 + }, + { + "hf_id": "internlm/internlm2_5-20b-chat", + "name": "internlm2_5-20b-chat", + "params_b": 19.86, + "ifeval": 0.7009977969565198, + "bbh": 0.7473580533672884, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.39976728723404253, + "hf_math_lvl5": 0.4078549848942598, + "hf_musr": 0.4558229166666667, + "hf_avg": 38.87959582082076, + "arena_elo": 1191.28, + "arena_rank": 251, + "arena_votes": 9902 + }, + { + "hf_id": "internlm/internlm2_5-7b-chat", + "name": "internlm2_5-7b-chat", + "params_b": 7.738, + "ifeval": 0.5538692890419642, + "bbh": 0.7073179916851792, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.3776595744680851, + "hf_math_lvl5": 0.25302114803625375, + "hf_musr": 0.45938541666666666, + "hf_avg": 32.974747665791206 + }, + { + "hf_id": "intervitens/mini-magnum-12b-v1.1", + "name": "mini-magnum-12b-v1.1", + "params_b": 12.248, + "ifeval": 0.5155509603407846, + "bbh": 0.506180035650624, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.3291223404255319, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.4004479166666666, + "hf_avg": 21.028735164842907 + }, + { + "hf_id": "inumulaisk/eval_model", + "name": "eval_model", + "params_b": 1.777, + "ifeval": 0.19314197440568803, + "bbh": 0.35118774303346373, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.16638962765957446, + "hf_math_lvl5": 0.297583081570997, + "hf_musr": 0.35796875, + "hf_avg": 12.24174390101748 + }, + { + "hf_id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", + "name": "Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", + "params_b": 10.732, + "ifeval": 0.45547591501660034, + "bbh": 0.5158439010792586, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3145777925531915, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.3992395833333333, + "hf_avg": 20.348078956309553 + }, + { + "hf_id": "invisietch/EtherealRainbow-v0.2-8B", + "name": "EtherealRainbow-v0.2-8B", + "params_b": 8.03, + "ifeval": 0.39032988027323057, + "bbh": 0.5102035205059678, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.36527593085106386, + "hf_math_lvl5": 0.0823262839879154, + "hf_musr": 0.38267708333333333, + "hf_avg": 20.106576639387253 + }, + { + "hf_id": "invisietch/EtherealRainbow-v0.3-8B", + "name": "EtherealRainbow-v0.3-8B", + "params_b": 8.03, + "ifeval": 0.36822298168858625, + "bbh": 0.5096758454539693, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.36261635638297873, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.39039583333333333, + "hf_avg": 19.791231655800487 + }, + { + "hf_id": "invisietch/MiS-Firefly-v0.2-22B", + "name": "MiS-Firefly-v0.2-22B", + "params_b": 22.247, + "ifeval": 0.5371082062261466, + "bbh": 0.5513523591170696, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3620345744680851, + "hf_math_lvl5": 0.16540785498489427, + "hf_musr": 0.46937500000000004, + "hf_avg": 26.754186150522894 + }, + { + "hf_id": "invisietch/Nimbus-Miqu-v0.1-70B", + "name": "Nimbus-Miqu-v0.1-70B", + "params_b": 68.977, + "ifeval": 0.46466819150963884, + "bbh": 0.601030667794844, + "gpqa": 0.3389261744966443, + "mmlu_pro": 0.3853058510638298, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.41331249999999997, + "hf_avg": 24.80805215493564 + }, + { + "hf_id": "irahulpandey/mistralai-7B-slerp-v0.1", + "name": "mistralai-7B-slerp-v0.1", + "params_b": 7.242, + "ifeval": 0.4966167546554254, + "bbh": 0.5010682924547378, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.2951296542553192, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.45497916666666666, + "hf_avg": 21.352696815222927 + }, + { + "hf_id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", + "name": "pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", + "params_b": 0.407, + "ifeval": 0.15722172723928066, + "bbh": 0.2863444769655102, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11685505319148937, + "hf_musr": 0.3606979166666667, + "hf_avg": 3.81661033477558 + }, + { + "hf_id": "jayasuryajsk/Qwen2.5-3B-reasoner", + "name": "Qwen2.5-3B-reasoner", + "params_b": 3.086, + "ifeval": 0.4159585455480348, + "bbh": 0.46511772991620703, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.3482380319148936, + "hf_math_lvl5": 0.2084592145015106, + "hf_musr": 0.41229166666666667, + "hf_avg": 22.088475922135004 + }, + { + "hf_id": "jebcarter/psyonic-cetacean-20B", + "name": "psyonic-cetacean-20B", + "params_b": 19.994, + "ifeval": 0.25436619281284767, + "bbh": 0.4907386156835858, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.28856382978723405, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.46611458333333333, + "hf_avg": 16.012258737866414 + }, + { + "hf_id": "jebish7/aya-expanse-8b", + "name": "aya-expanse-8b", + "params_b": 8.028, + "ifeval": 0.37911408396388246, + "bbh": 0.496904421264497, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.31025598404255317, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.3868958333333334, + "hf_avg": 18.10730460297795 + }, + { + "hf_id": "jebish7/gemma-2-2b-it", + "name": "gemma-2-2b-it", + "params_b": 2.614, + "ifeval": 0.12717035244263, + "bbh": 0.43951564907099594, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.27152593085106386, + "hf_math_lvl5": 0.033987915407854986, + "hf_musr": 0.42444791666666665, + "hf_avg": 12.36333618408293 + }, + { + "hf_id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", + "name": "Qwen-7B-nerd-uncensored-v1.0", + "params_b": 7.616, + "ifeval": 0.6135952605752737, + "bbh": 0.5421083753999172, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.4362533244680851, + "hf_math_lvl5": 0.28700906344410876, + "hf_musr": 0.47929166666666667, + "hf_avg": 31.82039597916994 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-minperplexity-2", + "name": "Qwen2.5-7B-minperplexity-2", + "params_b": 7, + "ifeval": 0.509730847484674, + "bbh": 0.552390586276348, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.4345910904255319, + "hf_math_lvl5": 0.3013595166163142, + "hf_musr": 0.46245833333333336, + "hf_avg": 30.04145905845039 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", + "name": "Qwen2.5-7B-nerd-uncensored-v0.9", + "params_b": 7.616, + "ifeval": 0.6048274134851084, + "bbh": 0.5469701834138724, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.4363364361702128, + "hf_math_lvl5": 0.2945619335347432, + "hf_musr": 0.48198958333333336, + "hf_avg": 31.896915034467217 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", + "name": "Qwen2.5-7B-nerd-uncensored-v1.0", + "params_b": 7.616, + "ifeval": 0.7695159953368174, + "bbh": 0.541762771903226, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.4253656914893617, + "hf_math_lvl5": 0.47129909365558914, + "hf_musr": 0.4551145833333334, + "hf_avg": 36.19369887340693 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", + "name": "Qwen2.5-7B-nerd-uncensored-v1.1", + "params_b": 7.616, + "ifeval": 0.6626296005709296, + "bbh": 0.48640249867140106, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.3849734042553192, + "hf_math_lvl5": 0.13293051359516617, + "hf_musr": 0.38429166666666664, + "hf_avg": 24.684283405688316 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", + "name": "Qwen2.5-7B-nerd-uncensored-v1.2", + "params_b": 7.616, + "ifeval": 0.49646715160219335, + "bbh": 0.494592979290867, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3968583776595745, + "hf_math_lvl5": 0.12084592145015106, + "hf_musr": 0.41724999999999995, + "hf_avg": 23.398595020008255 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", + "name": "Qwen2.5-7B-nerd-uncensored-v1.3", + "params_b": 7, + "ifeval": 0.49951462120506923, + "bbh": 0.5026055485090198, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.4015957446808511, + "hf_math_lvl5": 0.12311178247734139, + "hf_musr": 0.41873958333333333, + "hf_avg": 24.06198639675365 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", + "name": "Qwen2.5-7B-nerd-uncensored-v1.4", + "params_b": 7.616, + "ifeval": 0.6078748830879843, + "bbh": 0.5467076263362468, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.44190492021276595, + "hf_math_lvl5": 0.2809667673716012, + "hf_musr": 0.47138541666666667, + "hf_avg": 31.56603265287841 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", + "name": "Qwen2.5-7B-nerd-uncensored-v1.5", + "params_b": 7.616, + "ifeval": 0.5650352176669016, + "bbh": 0.5522599149696679, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.44481382978723405, + "hf_math_lvl5": 0.2756797583081571, + "hf_musr": 0.49820833333333336, + "hf_avg": 31.834944840883523 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", + "name": "Qwen2.5-7B-nerd-uncensored-v1.7", + "params_b": 7.616, + "ifeval": 0.4201551882338861, + "bbh": 0.5391718355132782, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.42802526595744683, + "hf_math_lvl5": 0.29154078549848944, + "hf_musr": 0.48484375, + "hf_avg": 28.677649337210752 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", + "name": "Qwen2.5-7B-nerd-uncensored-v1.8", + "params_b": 7.616, + "ifeval": 0.6255601803215468, + "bbh": 0.5446899383425835, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.4343417553191489, + "hf_math_lvl5": 0.270392749244713, + "hf_musr": 0.47671875, + "hf_avg": 31.635927180803225 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-olm-v1.0", + "name": "Qwen2.5-7B-olm-v1.0", + "params_b": 7.616, + "ifeval": 0.5331365222055258, + "bbh": 0.5659918212629057, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.4566156914893617, + "hf_math_lvl5": 0.2862537764350453, + "hf_musr": 0.42776041666666664, + "hf_avg": 30.108563094878495 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-olm-v1.1", + "name": "Qwen2.5-7B-olm-v1.1", + "params_b": 7.616, + "ifeval": 0.4329445870290828, + "bbh": 0.5478077656573704, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.4354222074468085, + "hf_math_lvl5": 0.38293051359516617, + "hf_musr": 0.48081250000000003, + "hf_avg": 30.253116755964555 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-olm-v1.2", + "name": "Qwen2.5-7B-olm-v1.2", + "params_b": 7.616, + "ifeval": 0.42025492360270744, + "bbh": 0.5533340429711561, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.4387466755319149, + "hf_math_lvl5": 0.2847432024169184, + "hf_musr": 0.46878125, + "hf_avg": 28.53753917623895 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-olm-v1.3", + "name": "Qwen2.5-7B-olm-v1.3", + "params_b": 7.616, + "ifeval": 0.4218540140161438, + "bbh": 0.5531852688351706, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.44697473404255317, + "hf_math_lvl5": 0.3104229607250755, + "hf_musr": 0.4700520833333333, + "hf_avg": 29.2967667117791 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-olm-v1.4", + "name": "Qwen2.5-7B-olm-v1.4", + "params_b": 7.616, + "ifeval": 0.4545018329144448, + "bbh": 0.5581962445576828, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.4457280585106383, + "hf_math_lvl5": 0.29229607250755285, + "hf_musr": 0.46220833333333333, + "hf_avg": 29.182337218829193 + }, + { + "hf_id": "jeffmeloy/Qwen2.5-7B-olm-v1.5", + "name": "Qwen2.5-7B-olm-v1.5", + "params_b": 7.616, + "ifeval": 0.4546514359676769, + "bbh": 0.5543943528577703, + "gpqa": 0.33976510067114096, + "mmlu_pro": 0.43991023936170215, + "hf_math_lvl5": 0.28172205438066467, + "hf_musr": 0.4539270833333333, + "hf_avg": 29.23575275519298 + }, + { + "hf_id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", + "name": "jeffmeloy_Qwen2.5-7B-minperplexity-1", + "params_b": 7.616, + "ifeval": 0.37571643239936703, + "bbh": 0.5582354546195324, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.4367519946808511, + "hf_math_lvl5": 0.29154078549848944, + "hf_musr": 0.42903125, + "hf_avg": 27.475832080246903 + }, + { + "hf_id": "jeonsworld/CarbonVillain-en-10.7B-v4", + "name": "CarbonVillain-en-10.7B-v4", + "params_b": 10.732, + "ifeval": 0.45792386423578324, + "bbh": 0.516795955873779, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.31416223404255317, + "hf_math_lvl5": 0.04682779456193353, + "hf_musr": 0.3965416666666666, + "hf_avg": 20.328676385842854 + }, + { + "hf_id": "jieliu/Storm-7B", + "name": "Storm-7B", + "params_b": 7.242, + "ifeval": 0.3424192254329623, + "bbh": 0.5187285371254579, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3119182180851064, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.4428958333333333, + "hf_avg": 19.763877835116066 + }, + { + "hf_id": "jiviai/medX_v2", + "name": "medX_v2", + "params_b": 8.03, + "ifeval": 0.37431792089433813, + "bbh": 0.4508721125093523, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.34283577127659576, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.34984375, + "hf_avg": 17.31888331901421 + }, + { + "hf_id": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", + "name": "Qwen2.5-3B-Infinity-Instruct-0625", + "params_b": 3.086, + "ifeval": 0.35575827692744144, + "bbh": 0.4773774601029352, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.3198969414893617, + "hf_math_lvl5": 0.13670694864048338, + "hf_musr": 0.39809374999999997, + "hf_avg": 18.5491918072634 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.42712447417297217, + "bbh": 0.5035519809362171, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.37391954787234044, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.4637604166666667, + "hf_avg": 22.442126802141008 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.42532591302189304, + "bbh": 0.5018845446835877, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.37242353723404253, + "hf_math_lvl5": 0.09667673716012085, + "hf_musr": 0.41502083333333334, + "hf_avg": 21.44635595224064 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.33774828565982706, + "bbh": 0.4917135045463188, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.3533078457446808, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.5017708333333334, + "hf_avg": 20.2797757714617 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.4273993005226133, + "bbh": 0.5125777877188348, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.37391954787234044, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.42264583333333333, + "hf_avg": 21.833220529017808 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.32036219453272874, + "bbh": 0.48835763921755193, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.33444148936170215, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.5097708333333334, + "hf_avg": 19.44753152983463 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.43963904661852776, + "bbh": 0.5140041302485145, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.36959773936170215, + "hf_math_lvl5": 0.08006042296072508, + "hf_musr": 0.43979166666666664, + "hf_avg": 22.371145290205714 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.2814443454478561, + "bbh": 0.4854325756272537, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3295378989361702, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.5163125000000001, + "hf_avg": 18.480197691407646 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.4302218114602588, + "bbh": 0.5157097379648965, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.36627327127659576, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.43315624999999996, + "hf_avg": 21.77274590923878 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.2789963962286732, + "bbh": 0.48611535229340735, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3304521276595745, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.5150104166666667, + "hf_avg": 18.508789697368925 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.4222784434190171, + "bbh": 0.5153764046315631, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3650265957446808, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.4384270833333333, + "hf_avg": 21.931665888548704 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.4358923212631374, + "bbh": 0.5040935986635269, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3762466755319149, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.45315625, + "hf_avg": 22.174229898217675 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.4201551882338861, + "bbh": 0.501124270710985, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.3699301861702128, + "hf_math_lvl5": 0.09818731117824774, + "hf_musr": 0.41502083333333334, + "hf_avg": 21.303402130290717 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.35178659290682057, + "bbh": 0.49985217584312186, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.3611203457446808, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.48710416666666667, + "hf_avg": 20.586572385911417 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.42038014689911657, + "bbh": 0.5107301269172088, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.37101063829787234, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.42785416666666665, + "hf_avg": 21.723601789562878 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.34541682735142754, + "bbh": 0.4983827321097329, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3531416223404255, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.49113541666666666, + "hf_avg": 20.250340892451284 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.40916435058976847, + "bbh": 0.513665952913411, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.366938164893617, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.43569791666666663, + "hf_avg": 21.442231277824096 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.29038728351884113, + "bbh": 0.4967337534367295, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.34898603723404253, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.4990729166666667, + "hf_avg": 19.28808146621785 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.41988036188424493, + "bbh": 0.5146905664948336, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.3615359042553192, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.43576041666666665, + "hf_avg": 21.626887508307224 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", + "params_b": 8.03, + "ifeval": 0.29131149793658606, + "bbh": 0.49182964384768835, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.34541223404255317, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.4976770833333333, + "hf_avg": 19.06061039400886 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", + "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", + "params_b": 8.03, + "ifeval": 0.41623337189767595, + "bbh": 0.5138610942606995, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3624501329787234, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.43172916666666666, + "hf_avg": 21.361497158834098 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_dare_linear", + "name": "Llama-3-8B-Instruct_dare_linear", + "params_b": 8.03, + "ifeval": 0.21454961723781787, + "bbh": 0.4282807940700452, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.24143949468085107, + "hf_musr": 0.49792708333333335, + "hf_avg": 14.123522915539402 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", + "name": "Llama-3-8B-Instruct_dare_ties-density-0.1", + "params_b": 8.03, + "ifeval": 0.18907055501624578, + "bbh": 0.41187360174735804, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.22647938829787234, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.46580208333333334, + "hf_avg": 11.632495167787908 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", + "name": "Llama-3-8B-Instruct_dare_ties-density-0.3", + "params_b": 8.03, + "ifeval": 0.21132705665412216, + "bbh": 0.4558569854124363, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.30402260638297873, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.5069479166666667, + "hf_avg": 15.96894703975495 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", + "name": "Llama-3-8B-Instruct_dare_ties-density-0.7", + "params_b": 8.03, + "ifeval": 0.20338368861288048, + "bbh": 0.4722858888388635, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3148271276595745, + "hf_math_lvl5": 0.0030211480362537764, + "hf_musr": 0.5110104166666667, + "hf_avg": 16.77203043517005 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", + "name": "Llama-3-8B-Instruct_dare_ties-density-0.9", + "params_b": 8.03, + "ifeval": 0.21607335203925582, + "bbh": 0.46639610671811504, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3143284574468085, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.5230416666666667, + "hf_avg": 17.30976967313407 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_linear", + "name": "Llama-3-8B-Instruct_linear", + "params_b": 8.03, + "ifeval": 0.4308213318439518, + "bbh": 0.5031496839210309, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.37117686170212766, + "hf_math_lvl5": 0.10045317220543806, + "hf_musr": 0.40971874999999996, + "hf_avg": 21.370872595590694 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", + "name": "Llama-3-8B-Instruct_ties-density-0.1", + "params_b": 8.03, + "ifeval": 0.41161229980895137, + "bbh": 0.5021445196013956, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.36003989361702127, + "hf_math_lvl5": 0.07930513595166164, + "hf_musr": 0.417375, + "hf_avg": 20.428512140604372 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", + "name": "Llama-3-8B-Instruct_ties-density-0.3", + "params_b": 8.03, + "ifeval": 0.3626278274977061, + "bbh": 0.49061122520005807, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.33211436170212766, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.40248958333333335, + "hf_avg": 18.85496971399459 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", + "name": "Llama-3-8B-Instruct_ties-density-0.5", + "params_b": 8.03, + "ifeval": 0.37966373666316483, + "bbh": 0.47931248948849836, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.31748670212765956, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.3879791666666667, + "hf_avg": 18.221595666098747 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", + "name": "Llama-3-8B-Instruct_ties-density-0.7", + "params_b": 8.03, + "ifeval": 0.3681232463197649, + "bbh": 0.4738186124296502, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.3152426861702128, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.3880729166666667, + "hf_avg": 18.056542734095075 + }, + { + "hf_id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", + "name": "Llama-3-8B-Instruct_ties-density-0.9", + "params_b": 8.03, + "ifeval": 0.3858085435533274, + "bbh": 0.47354321136013144, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.3181515957446808, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.3880416666666667, + "hf_avg": 18.13585052124981 + }, + { + "hf_id": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", + "name": "Chocolatine-14B-Instruct-4k-DPO", + "params_b": 13.96, + "ifeval": 0.4688648341954902, + "bbh": 0.6299582409761587, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.4763962765957447, + "hf_math_lvl5": 0.1782477341389728, + "hf_musr": 0.44388541666666664, + "hf_avg": 30.316420737984473 + }, + { + "hf_id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", + "name": "Chocolatine-14B-Instruct-DPO-v1.2", + "params_b": 13.96, + "ifeval": 0.6852107962428579, + "bbh": 0.6438408959901142, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.46966422872340424, + "hf_math_lvl5": 0.20921450151057402, + "hf_musr": 0.4267708333333333, + "hf_avg": 33.79581095053082 + }, + { + "hf_id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", + "name": "Chocolatine-14B-Instruct-DPO-v1.3", + "params_b": 14.66, + "ifeval": 0.703995398874985, + "bbh": 0.6846125547592651, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.5374002659574468, + "hf_math_lvl5": 0.5619335347432024, + "hf_musr": 0.42339583333333336, + "hf_avg": 42.42049137915473 + }, + { + "hf_id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", + "name": "Chocolatine-2-14B-Instruct-v2.0", + "params_b": 14.66, + "ifeval": 0.0885273297073986, + "bbh": 0.6769929749559443, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.5301695478723404, + "hf_math_lvl5": 0.48036253776435045, + "hf_musr": 0.5021145833333334, + "hf_avg": 33.39132529353342 + }, + { + "hf_id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", + "name": "Chocolatine-2-14B-Instruct-v2.0.3", + "params_b": 14.766, + "ifeval": 0.7037205725253439, + "bbh": 0.6548026688308357, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.5374002659574468, + "hf_math_lvl5": 0.4206948640483384, + "hf_musr": 0.47681250000000003, + "hf_avg": 41.32784968296863 + }, + { + "hf_id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", + "name": "Chocolatine-2-14B-Instruct-v2.0b2", + "params_b": 14.766, + "ifeval": 0.7240787776433197, + "bbh": 0.6475822300543483, + "gpqa": 0.38338926174496646, + "mmlu_pro": 0.5369015957446809, + "hf_math_lvl5": 0.3950151057401813, + "hf_musr": 0.48075, + "hf_avg": 41.24635883058548 + }, + { + "hf_id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", + "name": "Chocolatine-2-14B-Instruct-v2.0b3", + "params_b": 14.766, + "ifeval": 0.7322969720342026, + "bbh": 0.646878884179919, + "gpqa": 0.37919463087248323, + "mmlu_pro": 0.5337433510638298, + "hf_math_lvl5": 0.4108761329305136, + "hf_musr": 0.47811458333333334, + "hf_avg": 41.4335223068507 + }, + { + "hf_id": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", + "name": "Chocolatine-3B-Instruct-DPO-Revised", + "params_b": 3.821, + "ifeval": 0.5622625744136669, + "bbh": 0.5539982344792619, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.3988530585106383, + "hf_math_lvl5": 0.18051359516616314, + "hf_musr": 0.44534375, + "hf_avg": 28.22663122048826 + }, + { + "hf_id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", + "name": "Chocolatine-3B-Instruct-DPO-v1.0", + "params_b": 3.821, + "ifeval": 0.3737184005106451, + "bbh": 0.5471398082537478, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.3937001329787234, + "hf_math_lvl5": 0.1782477341389728, + "hf_musr": 0.4754791666666667, + "hf_avg": 25.429590501932733 + }, + { + "hf_id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", + "name": "Chocolatine-3B-Instruct-DPO-v1.2", + "params_b": 3.821, + "ifeval": 0.5455014915978493, + "bbh": 0.5487182027245813, + "gpqa": 0.3389261744966443, + "mmlu_pro": 0.3877160904255319, + "hf_math_lvl5": 0.20468277945619334, + "hf_musr": 0.41542708333333334, + "hf_avg": 27.861913021910855 + }, + { + "hf_id": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", + "name": "Distilucie-7B-Math-Instruct-DPO-v0.1", + "params_b": 6.707, + "ifeval": 0.30475028479988653, + "bbh": 0.38346961466103785, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.1809341755319149, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.3644479166666667, + "hf_avg": 11.128777731316648 + }, + { + "hf_id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", + "name": "Lucie-7B-Instruct-DPO-v1.1", + "params_b": 6.707, + "ifeval": 0.31209413245743517, + "bbh": 0.37810118011411814, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.18375997340425532, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.40159374999999997, + "hf_avg": 11.704829753358311 + }, + { + "hf_id": "jpacifico/Lucie-Boosted-7B-Instruct", + "name": "Lucie-Boosted-7B-Instruct", + "params_b": 6.707, + "ifeval": 0.25661467129438775, + "bbh": 0.34654827210803724, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.1629820478723404, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.369875, + "hf_avg": 8.306616536792435 + }, + { + "hf_id": "jsfs11/MixtureofMerges-MoE-4x7b-v4", + "name": "MixtureofMerges-MoE-4x7b-v4", + "params_b": 24.154, + "ifeval": 0.40299405577201824, + "bbh": 0.5169007103786006, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.30319148936170215, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.43855208333333334, + "hf_avg": 20.022361213958778 + }, + { + "hf_id": "jsfs11/MixtureofMerges-MoE-4x7b-v5", + "name": "MixtureofMerges-MoE-4x7b-v5", + "params_b": 24.154, + "ifeval": 0.41993022956865567, + "bbh": 0.5198481257083689, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3097573138297872, + "hf_math_lvl5": 0.0755287009063444, + "hf_musr": 0.4304895833333333, + "hf_avg": 20.434941072567536 + }, + { + "hf_id": "kaist-ai/janus-7b", + "name": "janus-7b", + "params_b": 7.242, + "ifeval": 0.37751499355044615, + "bbh": 0.4693667591541633, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.28740026595744683, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.4401041666666667, + "hf_avg": 17.616998787714394 + }, + { + "hf_id": "kaist-ai/janus-dpo-7b", + "name": "janus-dpo-7b", + "params_b": 7.242, + "ifeval": 0.4002712802031942, + "bbh": 0.4772581104894978, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.2976230053191489, + "hf_math_lvl5": 0.04154078549848943, + "hf_musr": 0.43873958333333335, + "hf_avg": 18.53164895276002 + }, + { + "hf_id": "kaist-ai/janus-rm-7b", + "name": "janus-rm-7b", + "params_b": 7.111, + "ifeval": 0.177804891022487, + "bbh": 0.3056467446788138, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.11261635638297872, + "hf_musr": 0.38829166666666665, + "hf_avg": 4.775598832496902 + }, + { + "hf_id": "kaist-ai/mistral-orpo-capybara-7k", + "name": "mistral-orpo-capybara-7k", + "params_b": 7.242, + "ifeval": 0.536733644507684, + "bbh": 0.4488995185492166, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.297124335106383, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.3963541666666666, + "hf_avg": 19.22089458033309 + }, + { + "hf_id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", + "name": "T3Q-Qwen2.5-7B-it-KOR-Safe", + "params_b": 7.616, + "ifeval": 0.6081497094376255, + "bbh": 0.5549941776226351, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.44639295212765956, + "hf_math_lvl5": 0.37613293051359514, + "hf_musr": 0.42772916666666666, + "hf_avg": 32.418098677759176 + }, + { + "hf_id": "keeeeenw/MicroLlama", + "name": "MicroLlama", + "params_b": 0.305, + "ifeval": 0.19853765785892544, + "bbh": 0.3007313991347165, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.11377992021276596, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.36981249999999993, + "hf_avg": 5.266088341806957 + }, + { + "hf_id": "kekmodel/StopCarbon-10.7B-v5", + "name": "StopCarbon-10.7B-v5", + "params_b": 10.732, + "ifeval": 0.47283651821611106, + "bbh": 0.5177716413471513, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.3156582446808511, + "hf_math_lvl5": 0.055891238670694864, + "hf_musr": 0.4019375, + "hf_avg": 20.93299222055526 + }, + { + "hf_id": "kevin009/llamaRAGdrama", + "name": "llamaRAGdrama", + "params_b": 7.242, + "ifeval": 0.2598372318780835, + "bbh": 0.4007385667099335, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.27235704787234044, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.43157291666666664, + "hf_avg": 13.348717196012394 + }, + { + "hf_id": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", + "name": "Llama-3.1-8B-Reason-Blend-888k", + "params_b": 8.03, + "ifeval": 0.583170432230925, + "bbh": 0.4789526925494476, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3100066489361702, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.3379375, + "hf_avg": 21.101958513598035 + }, + { + "hf_id": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", + "name": "chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", + "params_b": 9.3, + "ifeval": 0.5455014915978493, + "bbh": 0.42890394469736065, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.2798371010638298, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.38206249999999997, + "hf_avg": 17.99671713302148 + }, + { + "hf_id": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", + "name": "chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", + "params_b": 4.132, + "ifeval": 0.4863251727638222, + "bbh": 0.49871846432893613, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3480718085106383, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.39828125, + "hf_avg": 22.110300080411182 + }, + { + "hf_id": "kms7530/chemeng_qwen-math-7b_24_1_100_1", + "name": "chemeng_qwen-math-7b_24_1_100_1", + "params_b": 8.911, + "ifeval": 0.211052230304481, + "bbh": 0.3578007894497858, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.21584109042553193, + "hf_math_lvl5": 0.2243202416918429, + "hf_musr": 0.3686979166666666, + "hf_avg": 11.66485615226577 + }, + { + "hf_id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", + "name": "chemeng_qwen-math-7b_24_1_100_1_nonmath", + "params_b": 15.231, + "ifeval": 0.25836336476105626, + "bbh": 0.3892856967853256, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.24517952127659576, + "hf_math_lvl5": 0.30966767371601206, + "hf_musr": 0.40869791666666666, + "hf_avg": 16.982090044361417 + }, + { + "hf_id": "kno10/ende-chat-0.0.5", + "name": "ende-chat-0.0.5", + "params_b": 7.891, + "ifeval": 0.3404455733010634, + "bbh": 0.3604365707523862, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.17902260638297873, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.39384375, + "hf_avg": 10.850085123772722 + }, + { + "hf_id": "kno10/ende-chat-0.0.7", + "name": "ende-chat-0.0.7", + "params_b": 7.891, + "ifeval": 0.440063476021401, + "bbh": 0.37918745577624335, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.19664228723404256, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.386125, + "hf_avg": 13.371913816595809 + }, + { + "hf_id": "kyutai/helium-1-preview-2b", + "name": "helium-1-preview-2b", + "params_b": 2.173, + "ifeval": 0.26136096667952147, + "bbh": 0.3638164815956466, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.18725066489361702, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.3549583333333333, + "hf_avg": 9.329143600411902 + }, + { + "hf_id": "kz919/QwQ-0.5B-Distilled-SFT", + "name": "QwQ-0.5B-Distilled-SFT", + "params_b": 0.494, + "ifeval": 0.3076725311063534, + "bbh": 0.3256291569645335, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.15874335106382978, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.3408541666666667, + "hf_avg": 9.089107211771186 + }, + { + "hf_id": "laislemke/LLaMA-2-vicuna-7b-slerp", + "name": "LLaMA-2-vicuna-7b-slerp", + "params_b": 6.738, + "ifeval": 0.29320979445648654, + "bbh": 0.29862163052356266, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.13422539893617022, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.3833020833333333, + "hf_avg": 7.694402356108033 + }, + { + "hf_id": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", + "name": "ECE-PRYMMAL-0.5B-FT-V5-MUSR", + "params_b": 0.494, + "ifeval": 0.21377500587330506, + "bbh": 0.32694393820046386, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.15334109042553193, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.32625, + "hf_avg": 7.057838460114436 + }, + { + "hf_id": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", + "name": "ECE-PRYMMAL-0.5B-SLERP-V4", + "params_b": 0.494, + "ifeval": 0.15639724819035714, + "bbh": 0.2894308596288922, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.11685505319148937, + "hf_musr": 0.37892708333333336, + "hf_avg": 4.3809433295396625 + }, + { + "hf_id": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", + "name": "ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", + "params_b": 0.494, + "ifeval": 0.1437075847639818, + "bbh": 0.3031946898842932, + "gpqa": 0.2348993288590604, + "mmlu_pro": 0.11211768617021277, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.3646041666666667, + "hf_avg": 3.6107222690519762 + }, + { + "hf_id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", + "name": "ECE-PRYMMAL-YL-1B-SLERP-V3", + "params_b": 1.544, + "ifeval": 0.325008754549041, + "bbh": 0.42245501886651654, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.2931349734042553, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.42128125, + "hf_avg": 16.44790148688646 + }, + { + "hf_id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", + "name": "ECE-PRYMMAL-YL-1B-SLERP-V4", + "params_b": 1.544, + "ifeval": 0.33235260220658963, + "bbh": 0.4170742409015322, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.289311835106383, + "hf_math_lvl5": 0.10045317220543806, + "hf_musr": 0.4306145833333333, + "hf_avg": 16.43838512195718 + }, + { + "hf_id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", + "name": "ECE-PRYMMAL-YL-6B-SLERP-V1", + "params_b": 6.061, + "ifeval": 0.3264072660540699, + "bbh": 0.46293726502592586, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.32139295212765956, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.48639583333333336, + "hf_avg": 20.037041744632443 + }, + { + "hf_id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", + "name": "ECE-PRYMMAL-YL-6B-SLERP-V2", + "params_b": 6.061, + "ifeval": 0.3248835312526319, + "bbh": 0.46293726502592586, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.32139295212765956, + "hf_math_lvl5": 0.1268882175226586, + "hf_musr": 0.48639583333333336, + "hf_avg": 20.01164616460848 + }, + { + "hf_id": "langgptai/Qwen-las-v0.1", + "name": "Qwen-las-v0.1", + "params_b": 7.901, + "ifeval": 0.33010412372504955, + "bbh": 0.38925525629956187, + "gpqa": 0.24664429530201343, + "mmlu_pro": 0.2325465425531915, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.37009374999999994, + "hf_avg": 11.633178497007412 + }, + { + "hf_id": "langgptai/qwen1.5-7b-chat-sa-v0.1", + "name": "qwen1.5-7b-chat-sa-v0.1", + "params_b": 15.443, + "ifeval": 0.42677429221133256, + "bbh": 0.4325267992878656, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.29928523936170215, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.3551458333333333, + "hf_avg": 16.580170752646193 + }, + { + "hf_id": "lars1234/Mistral-Small-24B-Instruct-2501-writer", + "name": "Mistral-Small-24B-Instruct-2501-writer", + "params_b": 23.572, + "ifeval": 0.6565346613651777, + "bbh": 0.6733164099871131, + "gpqa": 0.38926174496644295, + "mmlu_pro": 0.5447972074468085, + "hf_math_lvl5": 0.3557401812688822, + "hf_musr": 0.46453125, + "hf_avg": 39.855790317231644 + }, + { + "hf_id": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", + "name": "Llama-3.1-8B-MultiReflection-Instruct", + "params_b": 8.03, + "ifeval": 0.7125382872999197, + "bbh": 0.5009088261495708, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.37242353723404253, + "hf_math_lvl5": 0.17069486404833836, + "hf_musr": 0.3681979166666667, + "hf_avg": 26.87834660945809 + }, + { + "hf_id": "lemon07r/Gemma-2-Ataraxy-9B", + "name": "Gemma-2-Ataraxy-9B", + "params_b": 10.159, + "ifeval": 0.3008772279773224, + "bbh": 0.5931298417725773, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.4226230053191489, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.4424270833333333, + "hf_avg": 23.711508058895607 + }, + { + "hf_id": "lemon07r/Gemma-2-Ataraxy-v4d-9B", + "name": "Gemma-2-Ataraxy-v4d-9B", + "params_b": 10.159, + "ifeval": 0.7250029920610646, + "bbh": 0.6054158192304304, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.4345910904255319, + "hf_math_lvl5": 0.23338368580060423, + "hf_musr": 0.4541458333333333, + "hf_avg": 34.242385930783996 + }, + { + "hf_id": "lemon07r/Llama-3-RedMagic4-8B", + "name": "Llama-3-RedMagic4-8B", + "params_b": 8.03, + "ifeval": 0.4864005283758206, + "bbh": 0.42560489470390417, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3676030585106383, + "hf_math_lvl5": 0.08987915407854985, + "hf_musr": 0.3766354166666666, + "hf_avg": 19.430990497953946 + }, + { + "hf_id": "lemon07r/llama-3-NeuralMahou-8b", + "name": "llama-3-NeuralMahou-8b", + "params_b": 8.03, + "ifeval": 0.49009738604680025, + "bbh": 0.41841123683301523, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.3690159574468085, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.3872708333333333, + "hf_avg": 19.846074213837827 + }, + { + "hf_id": "lesubra/ECE-EIFFEL-3B", + "name": "ECE-EIFFEL-3B", + "params_b": 3.821, + "ifeval": 0.3469405621528655, + "bbh": 0.5101583259186949, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.3820644946808511, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.43622916666666667, + "hf_avg": 22.50442333553349 + }, + { + "hf_id": "lesubra/ECE-EIFFEL-3Bv2", + "name": "ECE-EIFFEL-3Bv2", + "params_b": 3.821, + "ifeval": 0.30130276555096036, + "bbh": 0.5424007873371969, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.39993351063829785, + "hf_math_lvl5": 0.11858006042296072, + "hf_musr": 0.4442916666666667, + "hf_avg": 23.141091471464545 + }, + { + "hf_id": "lesubra/ECE-EIFFEL-3Bv3", + "name": "ECE-EIFFEL-3Bv3", + "params_b": 3.821, + "ifeval": 0.3786142989490109, + "bbh": 0.5469446669064592, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.39752327127659576, + "hf_math_lvl5": 0.16691842900302115, + "hf_musr": 0.46751041666666665, + "hf_avg": 25.50122739306737 + }, + { + "hf_id": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", + "name": "ECE-PRYMMAL-3B-SLERP-V1", + "params_b": 3.821, + "ifeval": 0.2932840418977203, + "bbh": 0.5340594627933309, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.3900432180851064, + "hf_math_lvl5": 0.1661631419939577, + "hf_musr": 0.45951041666666664, + "hf_avg": 23.135359696186224 + }, + { + "hf_id": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", + "name": "ECE-PRYMMAL-3B-SLERP-V2", + "params_b": 3.821, + "ifeval": 0.2932840418977203, + "bbh": 0.5340594627933309, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.3900432180851064, + "hf_math_lvl5": 0.1661631419939577, + "hf_musr": 0.45951041666666664, + "hf_avg": 23.135359696186224 + }, + { + "hf_id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", + "name": "ECE-PRYMMAL-3B-SLERP_2-V1", + "params_b": 3.821, + "ifeval": 0.3649006857360692, + "bbh": 0.5411447467732948, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3990192819148936, + "hf_math_lvl5": 0.16767371601208458, + "hf_musr": 0.4661458333333333, + "hf_avg": 24.961424176446453 + }, + { + "hf_id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", + "name": "ECE-PRYMMAL-3B-SLERP_2-V2", + "params_b": 3.821, + "ifeval": 0.3664244205375071, + "bbh": 0.5411447467732948, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.3990192819148936, + "hf_math_lvl5": 0.16767371601208458, + "hf_musr": 0.4661458333333333, + "hf_avg": 24.98681975647042 + }, + { + "hf_id": "lesubra/merge-test", + "name": "merge-test", + "params_b": 3.821, + "ifeval": 0.538257379309122, + "bbh": 0.5240434385320306, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.38738364361702127, + "hf_math_lvl5": 0.12084592145015106, + "hf_musr": 0.44190625, + "hf_avg": 26.075520921002326 + }, + { + "hf_id": "lightblue/suzume-llama-3-8B-multilingual", + "name": "suzume-llama-3-8B-multilingual", + "params_b": 8.03, + "ifeval": 0.6678003253589365, + "bbh": 0.49499524187359745, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.33834773936170215, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.39768749999999997, + "hf_avg": 23.98630635028447 + }, + { + "hf_id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", + "name": "suzume-llama-3-8B-multilingual-orpo-borda-full", + "params_b": 8.03, + "ifeval": 0.5817464327983085, + "bbh": 0.4714219934773132, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.33095079787234044, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.3221875, + "hf_avg": 20.301707657676165 + }, + { + "hf_id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", + "name": "suzume-llama-3-8B-multilingual-orpo-borda-half", + "params_b": 8.03, + "ifeval": 0.6249107922534431, + "bbh": 0.47074584910573014, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.36136968085106386, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.35158333333333336, + "hf_avg": 21.50979670721306 + }, + { + "hf_id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", + "name": "suzume-llama-3-8B-multilingual-orpo-borda-top25", + "params_b": 8.03, + "ifeval": 0.6636535503574958, + "bbh": 0.4864641205580417, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.3684341755319149, + "hf_math_lvl5": 0.1042296072507553, + "hf_musr": 0.35660416666666667, + "hf_avg": 23.684768112420983 + }, + { + "hf_id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", + "name": "suzume-llama-3-8B-multilingual-orpo-borda-top75", + "params_b": 8.03, + "ifeval": 0.6687245397766814, + "bbh": 0.48333166095856117, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.37691156914893614, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.3816875, + "hf_avg": 23.647119777794057 + }, + { + "hf_id": "llmat/Mistral-v0.3-7B-ORPO", + "name": "Mistral-v0.3-7B-ORPO", + "params_b": 7.248, + "ifeval": 0.3770406964631622, + "bbh": 0.39776607302918093, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.2278091755319149, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.35552083333333334, + "hf_avg": 12.399290010550855 + }, + { + "hf_id": "llmat/Mistral-v0.3-7B-ORPO", + "name": "Mistral-v0.3-7B-ORPO", + "params_b": 7.248, + "ifeval": 0.3639764713183243, + "bbh": 0.400465557804411, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.23013630319148937, + "hf_math_lvl5": 0.0015105740181268882, + "hf_musr": 0.3528541666666667, + "hf_avg": 12.024321589275658 + }, + { + "hf_id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", + "name": "ECE-PRYMMAL-YL-1B-SLERP-V5", + "params_b": 1.544, + "ifeval": 0.33125329680802496, + "bbh": 0.42329545804357255, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.29305186170212766, + "hf_math_lvl5": 0.11102719033232629, + "hf_musr": 0.3868020833333334, + "hf_avg": 15.836336402400669 + }, + { + "hf_id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", + "name": "ECE-PRYMMAL-YL-1B-SLERP-V6", + "params_b": 1.357, + "ifeval": 0.13876181864120535, + "bbh": 0.3944027089700251, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.2349567819148936, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.39279166666666665, + "hf_avg": 9.395273583656481 + }, + { + "hf_id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", + "name": "ECE-PRYMMAL-YL-3B-SLERP-V1", + "params_b": 2.81, + "ifeval": 0.23463299600615256, + "bbh": 0.4018418465179459, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.2849900265957447, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.3364479166666667, + "hf_avg": 11.626794158168616 + }, + { + "hf_id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", + "name": "ECE-PRYMMAL-YL-3B-SLERP-V2", + "params_b": 2.81, + "ifeval": 0.2309361383351729, + "bbh": 0.39897709281426197, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.28997672872340424, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3587708333333333, + "hf_avg": 11.813468340726132 + }, + { + "hf_id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", + "name": "ECE-PRYMMAL-YL-3B-SLERP-V3", + "params_b": 3.821, + "ifeval": 0.35808100285021516, + "bbh": 0.5473121918055145, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.40433843085106386, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.43613541666666666, + "hf_avg": 23.42685471667849 + }, + { + "hf_id": "lmsys/vicuna-13b-v1.3", + "name": "vicuna-13b-v1.3", + "params_b": 13, + "ifeval": 0.3343506340953115, + "bbh": 0.3384399312777569, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.2243184840425532, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3727291666666666, + "hf_avg": 10.435533675653645 + }, + { + "hf_id": "lmsys/vicuna-7b-v1.3", + "name": "vicuna-7b-v1.3", + "params_b": 7, + "ifeval": 0.29086158060612505, + "bbh": 0.3298410006592924, + "gpqa": 0.2424496644295302, + "mmlu_pro": 0.18375997340425532, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3793333333333333, + "hf_avg": 8.525809191714858 + }, + { + "hf_id": "lmsys/vicuna-7b-v1.5", + "name": "vicuna-7b-v1.5", + "params_b": 7, + "ifeval": 0.23515716077784724, + "bbh": 0.39470436842233775, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.21467752659574468, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.42311458333333335, + "hf_avg": 10.885152314855338, + "lb_name": "vicuna-7b-v1.5", + "lb_global": 0.1478638888888889, + "lb_reasoning": 0.14, + "lb_math": 0.07103666666666666, + "lb_language": 0.08660000000000001, + "lb_if": 0.41754499999999994, + "lb_data_analysis": 0.02 + }, + { + "hf_id": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", + "name": "llama-3.1-8b-instruct-ortho-v7", + "params_b": 8.03, + "ifeval": 0.3514618988727687, + "bbh": 0.39069140261362917, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.1973902925531915, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.36159375, + "hf_avg": 11.812819354645235 + }, + { + "hf_id": "lordjia/Llama-3-Cantonese-8B-Instruct", + "name": "Llama-3-Cantonese-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.6669259786256023, + "bbh": 0.4814148018954038, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.35147938829787234, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.40460416666666665, + "hf_avg": 24.271708884717096 + }, + { + "hf_id": "lordjia/Qwen2-Cantonese-7B-Instruct", + "name": "Qwen2-Cantonese-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.5435278394659503, + "bbh": 0.5215311346221223, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.38430851063829785, + "hf_math_lvl5": 0.25604229607250756, + "hf_musr": 0.40038541666666666, + "hf_avg": 26.309196155583507 + }, + { + "hf_id": "lt-asset/nova-1.3b", + "name": "nova-1.3b", + "params_b": 1.347, + "ifeval": 0.1214255951985177, + "bbh": 0.31700122104895806, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11419547872340426, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.36978125, + "hf_avg": 3.8536506008151683 + }, + { + "hf_id": "lunahr/thea-3b-50r-u1", + "name": "thea-3b-50r-u1", + "params_b": 3.213, + "ifeval": 0.6030288523340293, + "bbh": 0.41046731029294475, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2808344414893617, + "hf_math_lvl5": 0.1042296072507553, + "hf_musr": 0.3181875, + "hf_avg": 19.037090298507373 + }, + { + "hf_id": "lunahr/thea-v2-3b-50r", + "name": "thea-v2-3b-50r", + "params_b": 3.213, + "ifeval": 0.370396104558128, + "bbh": 0.4194416192911743, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.2409408244680851, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.3221875, + "hf_avg": 12.953795195391686 + }, + { + "hf_id": "m42-health/Llama3-Med42-70B", + "name": "Llama3-Med42-70B", + "params_b": 70.554, + "ifeval": 0.6291074349392944, + "bbh": 0.6687891109485058, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.4962599734042553, + "hf_math_lvl5": 0.2258308157099698, + "hf_musr": 0.46289583333333334, + "hf_avg": 35.68301603364157 + }, + { + "hf_id": "macadeliccc/Samantha-Qwen-2-7B", + "name": "Samantha-Qwen-2-7B", + "params_b": 7.616, + "ifeval": 0.4377152621710395, + "bbh": 0.5082341412476951, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.3779089095744681, + "hf_math_lvl5": 0.21148036253776434, + "hf_musr": 0.4799479166666667, + "hf_avg": 25.06508576450433 + }, + { + "hf_id": "macadeliccc/magistrate-3.2-3b-base", + "name": "magistrate-3.2-3b-base", + "params_b": 3.213, + "ifeval": 0.1159301763764589, + "bbh": 0.3342701056047533, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.16888297872340424, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.39759374999999997, + "hf_avg": 6.046097363125316 + }, + { + "hf_id": "macadeliccc/magistrate-3.2-3b-it", + "name": "magistrate-3.2-3b-it", + "params_b": 3.213, + "ifeval": 0.22918744486850445, + "bbh": 0.3256506790327196, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.15924202127659576, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3763229166666667, + "hf_avg": 7.088076253678785 + }, + { + "hf_id": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", + "name": "Phi3_intent_v56_3_w_unknown_5_lr_0.002", + "params_b": 3.821, + "ifeval": 0.20181008612703183, + "bbh": 0.3281563256810973, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.1471908244680851, + "hf_musr": 0.41229166666666667, + "hf_avg": 7.1244683931064765 + }, + { + "hf_id": "maldv/Awqward2.5-32B-Instruct", + "name": "Awqward2.5-32B-Instruct", + "params_b": 32.764, + "ifeval": 0.8254697535871487, + "bbh": 0.6974465506773041, + "gpqa": 0.34060402684563756, + "mmlu_pro": 0.5723071808510638, + "hf_math_lvl5": 0.6231117824773413, + "hf_musr": 0.42748958333333337, + "hf_avg": 46.74902268350894 + }, + { + "hf_id": "maldv/Lytta2.5-32B-Instruct", + "name": "Lytta2.5-32B-Instruct", + "params_b": 32.764, + "ifeval": 0.25079455843827714, + "bbh": 0.559971089357847, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.5048204787234043, + "hf_math_lvl5": 0.34441087613293053, + "hf_musr": 0.37685416666666666, + "hf_avg": 24.790451743833525 + }, + { + "hf_id": "maldv/Qwentile2.5-32B-Instruct", + "name": "Qwentile2.5-32B-Instruct", + "params_b": 32.764, + "ifeval": 0.7393161256576994, + "bbh": 0.6962837451098368, + "gpqa": 0.38422818791946306, + "mmlu_pro": 0.5879321808510638, + "hf_math_lvl5": 0.5219033232628398, + "hf_musr": 0.4682291666666667, + "hf_avg": 45.9002633632381 + }, + { + "hf_id": "maldv/badger-kappa-llama-3-8b", + "name": "badger-kappa-llama-3-8b", + "params_b": 8.03, + "ifeval": 0.46946435457918323, + "bbh": 0.5084927997756815, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3695146276595745, + "hf_math_lvl5": 0.08610271903323263, + "hf_musr": 0.3765104166666666, + "hf_avg": 21.166688498001093 + }, + { + "hf_id": "maldv/badger-lambda-llama-3-8b", + "name": "badger-lambda-llama-3-8b", + "params_b": 8.03, + "ifeval": 0.4860758343417687, + "bbh": 0.49634866510444836, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.37666223404255317, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.3753645833333333, + "hf_avg": 20.943850248259498 + }, + { + "hf_id": "maldv/badger-mu-llama-3-8b", + "name": "badger-mu-llama-3-8b", + "params_b": 8.03, + "ifeval": 0.49194581488229006, + "bbh": 0.514287576852281, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.3673537234042553, + "hf_math_lvl5": 0.055891238670694864, + "hf_musr": 0.35545833333333327, + "hf_avg": 20.322170525640896 + }, + { + "hf_id": "maldv/badger-writer-llama-3-8b", + "name": "badger-writer-llama-3-8b", + "params_b": 8.03, + "ifeval": 0.5303140112678804, + "bbh": 0.4863893856673737, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3759973404255319, + "hf_math_lvl5": 0.0755287009063444, + "hf_musr": 0.35809375000000004, + "hf_avg": 21.09688762786649 + }, + { + "hf_id": "marcuscedricridia/Cheng-1", + "name": "Cheng-1", + "params_b": 7.613, + "ifeval": 0.7788833628106757, + "bbh": 0.5524677845280024, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.43492353723404253, + "hf_math_lvl5": 0.48942598187311176, + "hf_musr": 0.4073333333333333, + "hf_avg": 36.05830324427348 + }, + { + "hf_id": "mattshumer/Reflection-Llama-3.1-70B", + "name": "Reflection-Llama-3.1-70B", + "params_b": 70.554, + "ifeval": 0.00452133671990319, + "bbh": 0.645001286484342, + "gpqa": 0.36325503355704697, + "mmlu_pro": 0.4955119680851064, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.45765625000000004, + "hf_avg": 24.392555308681647 + }, + { + "hf_id": "mattshumer/ref_70_e3", + "name": "ref_70_e3", + "params_b": 70.554, + "ifeval": 0.6294321289733462, + "bbh": 0.6500839481104265, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.5302526595744681, + "hf_math_lvl5": 0.2794561933534743, + "hf_musr": 0.4327604166666667, + "hf_avg": 35.395599658838286 + }, + { + "hf_id": "maywell/Qwen2-7B-Multilingual-RP", + "name": "Qwen2-7B-Multilingual-RP", + "params_b": 7.616, + "ifeval": 0.4347176602525743, + "bbh": 0.5062058680861069, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3858876329787234, + "hf_math_lvl5": 0.2243202416918429, + "hf_musr": 0.3695625, + "hf_avg": 23.450878693391584 + }, + { + "hf_id": "meditsolutions/Llama-3.1-MedIT-SUN-8B", + "name": "Llama-3.1-MedIT-SUN-8B", + "params_b": 8.03, + "ifeval": 0.7837293935646308, + "bbh": 0.5186924904597405, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.3916223404255319, + "hf_math_lvl5": 0.20921450151057402, + "hf_musr": 0.40562499999999996, + "hf_avg": 30.19415971775715 + }, + { + "hf_id": "meditsolutions/Llama-3.2-SUN-1B-Instruct", + "name": "Llama-3.2-SUN-1B-Instruct", + "params_b": 1.498, + "ifeval": 0.6412973133507981, + "bbh": 0.34738999022447486, + "gpqa": 0.2424496644295302, + "mmlu_pro": 0.17810837765957446, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.35136458333333337, + "hf_avg": 15.524297116936125 + }, + { + "hf_id": "meditsolutions/Llama-3.2-SUN-1B-chat", + "name": "Llama-3.2-SUN-1B-chat", + "params_b": 1.498, + "ifeval": 0.5481743994822625, + "bbh": 0.35144575516411386, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.18375997340425532, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.3249166666666667, + "hf_avg": 13.641365564259702 + }, + { + "hf_id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", + "name": "Llama-3.2-SUN-2.4B-checkpoint-26000", + "params_b": 2.209, + "ifeval": 0.28139447776344545, + "bbh": 0.3017752699243885, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.1344747340425532, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.41033333333333327, + "hf_avg": 8.143484964590147 + }, + { + "hf_id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", + "name": "Llama-3.2-SUN-2.4B-checkpoint-34800", + "params_b": 2.209, + "ifeval": 0.25009530268576263, + "bbh": 0.3161124673749052, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.13572140957446807, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.4022395833333334, + "hf_avg": 8.193102548827737 + }, + { + "hf_id": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", + "name": "Llama-3.2-SUN-2.4B-v1.0.0", + "params_b": 2.472, + "ifeval": 0.5636865738462834, + "bbh": 0.3390826682107771, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.15425531914893617, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.32094791666666667, + "hf_avg": 13.31712952335647 + }, + { + "hf_id": "meditsolutions/Llama-3.2-SUN-2.5B-chat", + "name": "Llama-3.2-SUN-2.5B-chat", + "params_b": 2.472, + "ifeval": 0.560414145578177, + "bbh": 0.3574734302161124, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.1813497340425532, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.3155208333333333, + "hf_avg": 13.98771014319157 + }, + { + "hf_id": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", + "name": "Llama-3.2-SUN-HDIC-1B-Instruct", + "params_b": 1.498, + "ifeval": 0.6826631116548536, + "bbh": 0.3507731670753292, + "gpqa": 0.23657718120805368, + "mmlu_pro": 0.16871675531914893, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.3593645833333334, + "hf_avg": 15.901863518867478 + }, + { + "hf_id": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", + "name": "MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", + "params_b": 7.646, + "ifeval": 0.36550020611976225, + "bbh": 0.4034845834509661, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.21899933510638298, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.42534374999999996, + "hf_avg": 14.528174671948811 + }, + { + "hf_id": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", + "name": "MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", + "params_b": 11.169, + "ifeval": 0.5814217387642566, + "bbh": 0.5671722290858499, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.3499833776595745, + "hf_math_lvl5": 0.20770392749244712, + "hf_musr": 0.43845833333333334, + "hf_avg": 28.55070593519018 + }, + { + "hf_id": "meditsolutions/MedIT-Mesh-3B-Instruct", + "name": "MedIT-Mesh-3B-Instruct", + "params_b": 3.821, + "ifeval": 0.5814217387642566, + "bbh": 0.5575523356865378, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.4011801861702128, + "hf_math_lvl5": 0.20317220543806647, + "hf_musr": 0.4047604166666667, + "hf_avg": 28.318227712460025 + }, + { + "hf_id": "meditsolutions/SmolLM2-MedIT-Upscale-2B", + "name": "SmolLM2-MedIT-Upscale-2B", + "params_b": 2.114, + "ifeval": 0.6429207835210575, + "bbh": 0.3551122445928012, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.19705784574468085, + "hf_math_lvl5": 0.055891238670694864, + "hf_musr": 0.33136458333333335, + "hf_avg": 15.922534280948163 + }, + { + "hf_id": "meetkai/functionary-small-v3.1", + "name": "functionary-small-v3.1", + "params_b": 8.03, + "ifeval": 0.6274584768414474, + "bbh": 0.4981781042779377, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.33485704787234044, + "hf_math_lvl5": 0.15709969788519637, + "hf_musr": 0.3833645833333333, + "hf_avg": 24.08333552224185 + }, + { + "hf_id": "meraGPT/mera-mix-4x7B", + "name": "mera-mix-4x7B", + "params_b": 24.154, + "ifeval": 0.4831779677921249, + "bbh": 0.40189899163661713, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.27476728723404253, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.40565625, + "hf_avg": 17.854958732939675 + }, + { + "hf_id": "meta-llama/Llama-2-13b-chat-hf", + "name": "Llama-2-13b-chat-hf", + "params_b": 13.016, + "ifeval": 0.398472719052115, + "bbh": 0.33427367066714186, + "gpqa": 0.23154362416107382, + "mmlu_pro": 0.19232047872340424, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.40072916666666664, + "hf_avg": 11.12963532656997 + }, + { + "hf_id": "meta-llama/Llama-2-13b-hf", + "name": "Llama-2-13b-hf", + "params_b": 13.016, + "ifeval": 0.24824687385027283, + "bbh": 0.41256242233835055, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.23778257978723405, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.35375, + "hf_avg": 11.065185981273997 + }, + { + "hf_id": "meta-llama/Llama-2-70b-chat-hf", + "name": "Llama-2-70b-chat-hf", + "params_b": 68.977, + "ifeval": 0.49579227560650185, + "bbh": 0.30424741461642657, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.2432679521276596, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.3686666666666667, + "hf_avg": 13.073695775827504 + }, + { + "hf_id": "meta-llama/Llama-2-70b-hf", + "name": "Llama-2-70b-hf", + "params_b": 68.977, + "ifeval": 0.2406780675274937, + "bbh": 0.5472591190449342, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.37175864361702127, + "hf_math_lvl5": 0.0324773413897281, + "hf_musr": 0.41235416666666663, + "hf_avg": 18.372598605703004 + }, + { + "hf_id": "meta-llama/Llama-2-7b-chat-hf", + "name": "Llama-2-7b-chat-hf", + "params_b": 6.738, + "ifeval": 0.3986478100329348, + "bbh": 0.3113546355002185, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.16879986702127658, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3675520833333333, + "hf_avg": 9.609483264152255, + "lb_name": "llama-2-7b-chat-hf", + "lb_global": 0.14056999999999997, + "lb_reasoning": 0.18, + "lb_math": 0.04784, + "lb_language": 0.06858666666666667, + "lb_if": 0.44883500000000004, + "lb_data_analysis": 0 + }, + { + "hf_id": "meta-llama/Llama-2-7b-hf", + "name": "Llama-2-7b-hf", + "params_b": 6.738, + "ifeval": 0.2518938638368418, + "bbh": 0.34961958199821835, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.18608710106382978, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.37006249999999996, + "hf_avg": 8.806357596540016 + }, + { + "hf_id": "meta-llama/Llama-3.1-70B", + "name": "Llama-3.1-70B", + "params_b": 70.554, + "ifeval": 0.16843752354862876, + "bbh": 0.626006918317161, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.4654255319148936, + "hf_math_lvl5": 0.18429003021148038, + "hf_musr": 0.4571875, + "hf_avg": 26.200215843375947 + }, + { + "hf_id": "meta-llama/Llama-3.1-8B", + "name": "Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.12459828809780273, + "bbh": 0.46595905446007296, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.32878989361702127, + "hf_math_lvl5": 0.06570996978851963, + "hf_musr": 0.3811875, + "hf_avg": 14.42086519266696 + }, + { + "hf_id": "meta-llama/Llama-3.2-1B", + "name": "Llama-3.2-1B", + "params_b": 1.24, + "ifeval": 0.14777900415342402, + "bbh": 0.31149540964608097, + "gpqa": 0.22818791946308725, + "mmlu_pro": 0.12034574468085106, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.3447291666666667, + "hf_avg": 4.195140014045501 + }, + { + "hf_id": "meta-llama/Llama-3.2-1B-Instruct", + "name": "Llama-3.2-1B-Instruct", + "params_b": 1.24, + "ifeval": 0.5698313807364459, + "bbh": 0.34968498061768266, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.16821808510638298, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.3328541666666667, + "hf_avg": 14.443126333711135, + "arena_elo": 1111.19, + "arena_rank": 298, + "arena_votes": 8045 + }, + { + "hf_id": "meta-llama/Llama-3.2-3B", + "name": "Llama-3.2-3B", + "params_b": 3.213, + "ifeval": 0.13374069690643048, + "bbh": 0.3905117116991059, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.2487533244680851, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.35771875000000003, + "hf_avg": 8.697822716562822 + }, + { + "hf_id": "meta-llama/Meta-Llama-3-70B", + "name": "Meta-Llama-3-70B", + "params_b": 70.554, + "ifeval": 0.1603190645265673, + "bbh": 0.6461074599904467, + "gpqa": 0.3976510067114094, + "mmlu_pro": 0.4709109042553192, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.4518229166666667, + "hf_avg": 26.705350171613343 + }, + { + "hf_id": "meta-llama/Meta-Llama-3-70B-Instruct", + "name": "Meta-Llama-3-70B-Instruct", + "params_b": 70.554, + "ifeval": 0.8099077115387172, + "bbh": 0.6546699432372051, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.5206948138297872, + "hf_math_lvl5": 0.24471299093655588, + "hf_musr": 0.4153645833333333, + "hf_avg": 36.37222412927012, + "lb_name": "meta-llama-3-70b-instruct", + "lb_global": 0.40031833333333333, + "lb_reasoning": 0.3, + "lb_math": 0.32315, + "lb_language": 0.34107000000000004, + "lb_if": 0.63504, + "lb_data_analysis": 0.3862 + }, + { + "hf_id": "meta-llama/Meta-Llama-3-8B", + "name": "Meta-Llama-3-8B", + "params_b": 8.03, + "ifeval": 0.14550614591506092, + "bbh": 0.4597905195240255, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.32097739361702127, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.36140625, + "hf_avg": 13.626857071686075 + }, + { + "hf_id": "meta-llama/Meta-Llama-3-8B-Instruct", + "name": "Meta-Llama-3-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.7408398604591373, + "bbh": 0.49887111136169526, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.3664394946808511, + "hf_math_lvl5": 0.08685800604229607, + "hf_musr": 0.3568229166666667, + "hf_avg": 23.908735693936837, + "lb_name": "meta-llama-3-8b-instruct", + "lb_global": 0.29630944444444446, + "lb_reasoning": 0.26, + "lb_math": 0.19664333333333336, + "lb_language": 0.1871866666666667, + "lb_if": 0.5714174999999999, + "lb_data_analysis": 0.17 + }, + { + "hf_id": "meta-llama/Meta-Llama-3-8B-Instruct", + "name": "Meta-Llama-3-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.47823220166934843, + "bbh": 0.4910264175128683, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.359125664893617, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.3805416666666666, + "hf_avg": 20.609159446025874, + "lb_name": "meta-llama-3-8b-instruct", + "lb_global": 0.29630944444444446, + "lb_reasoning": 0.26, + "lb_math": 0.19664333333333336, + "lb_language": 0.1871866666666667, + "lb_if": 0.5714174999999999, + "lb_data_analysis": 0.17 + }, + { + "hf_id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", + "name": "Qwen2.5-0.5B-cinstruct-stage1", + "params_b": 0.63, + "ifeval": 0.14817905379947427, + "bbh": 0.32557832478283544, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.11394614361702128, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.35003125, + "hf_avg": 4.551664639073516 + }, + { + "hf_id": "microsoft/DialoGPT-medium", + "name": "DialoGPT-medium", + "params_b": 0.345, + "ifeval": 0.14790422744983311, + "bbh": 0.3014156380141994, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.1118683510638298, + "hf_musr": 0.4286666666666667, + "hf_avg": 5.251433606790305 + }, + { + "hf_id": "microsoft/Orca-2-13b", + "name": "Orca-2-13b", + "params_b": 13, + "ifeval": 0.3127933882099496, + "bbh": 0.48844897288396094, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.27493351063829785, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.5129687500000001, + "hf_avg": 18.501871091807203 + }, + { + "hf_id": "microsoft/Orca-2-7b", + "name": "Orca-2-7b", + "params_b": 7, + "ifeval": 0.2183462102776189, + "bbh": 0.4452132267545943, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.23188164893617022, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.5026145833333333, + "hf_avg": 14.404830081400474 + }, + { + "hf_id": "microsoft/Phi-3-medium-128k-instruct", + "name": "Phi-3-medium-128k-instruct", + "params_b": 13.96, + "ifeval": 0.6040029344361849, + "bbh": 0.6382322530870549, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.47116023936170215, + "hf_math_lvl5": 0.19184290030211482, + "hf_musr": 0.4129479166666667, + "hf_avg": 32.026356176108685, + "lb_name": "phi-3-medium-128k-instruct", + "lb_global": 0.30903888888888886, + "lb_reasoning": 0.32, + "lb_math": 0.17606, + "lb_language": 0.1275666666666667, + "lb_if": 0.5615424999999999, + "lb_data_analysis": 0.24180000000000001 + }, + { + "hf_id": "microsoft/Phi-3-medium-4k-instruct", + "name": "Phi-3-medium-4k-instruct", + "params_b": 13.96, + "ifeval": 0.6422713954529538, + "bbh": 0.6412464890555547, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.4675864361702128, + "hf_math_lvl5": 0.19561933534743203, + "hf_musr": 0.42575, + "hf_avg": 33.09765943937642, + "lb_name": "phi-3-medium-4k-instruct", + "lb_global": 0.3108511111111112, + "lb_reasoning": 0.35, + "lb_math": 0.19595333333333334, + "lb_language": 0.13909000000000002, + "lb_if": 0.5330400000000001, + "lb_data_analysis": 0.2044, + "arena_elo": 1197.96, + "arena_rank": 248, + "arena_votes": 25055 + }, + { + "hf_id": "microsoft/Phi-3-mini-128k-instruct", + "name": "Phi-3-mini-128k-instruct", + "params_b": 3.821, + "ifeval": 0.5976331688807919, + "bbh": 0.5574531792679852, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.3734208776595745, + "hf_math_lvl5": 0.1404833836858006, + "hf_musr": 0.3936875, + "hf_avg": 26.343809931865636, + "lb_name": "phi-3-mini-128k-instruct", + "lb_global": 0.23699333333333333, + "lb_reasoning": 0.2075, + "lb_math": 0.15718666666666667, + "lb_language": 0.09153666666666667, + "lb_if": 0.39083500000000004, + "lb_data_analysis": 0.26030000000000003, + "arena_elo": 1129.15, + "arena_rank": 289, + "arena_votes": 20691 + }, + { + "hf_id": "microsoft/Phi-3-mini-4k-instruct", + "name": "Phi-3-mini-4k-instruct", + "params_b": 3.821, + "ifeval": 0.5612884923115112, + "bbh": 0.5675972626334875, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.38663563829787234, + "hf_math_lvl5": 0.1163141993957704, + "hf_musr": 0.3950208333333333, + "hf_avg": 25.967732638041607, + "lb_name": "phi-3-mini-4k-instruct", + "lb_global": 0.23234333333333337, + "lb_reasoning": 0.2525, + "lb_math": 0.14958000000000002, + "lb_language": 0.08559, + "lb_if": 0.363625, + "lb_data_analysis": 0.2232, + "arena_elo": 1128.34, + "arena_rank": 290, + "arena_votes": 20115 + }, + { + "hf_id": "microsoft/Phi-3-mini-4k-instruct", + "name": "Phi-3-mini-4k-instruct", + "params_b": 3.821, + "ifeval": 0.547674614467391, + "bbh": 0.5490718919495822, + "gpqa": 0.33221476510067116, + "mmlu_pro": 0.4021775265957447, + "hf_math_lvl5": 0.16389728096676737, + "hf_musr": 0.42841666666666667, + "hf_avg": 27.562174043592282, + "lb_name": "phi-3-mini-4k-instruct", + "lb_global": 0.23234333333333337, + "lb_reasoning": 0.2525, + "lb_math": 0.14958000000000002, + "lb_language": 0.08559, + "lb_if": 0.363625, + "lb_data_analysis": 0.2232 + }, + { + "hf_id": "microsoft/Phi-3-small-128k-instruct", + "name": "Phi-3-small-128k-instruct", + "params_b": 7.392, + "ifeval": 0.6368258443153056, + "bbh": 0.6202176778696983, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.4490525265957447, + "hf_math_lvl5": 0.2026086956521739, + "hf_musr": 0.43784375000000003, + "hf_avg": 31.96780316372565, + "lb_name": "phi-3-small-128k-instruct", + "lb_global": 0.3067472222222223, + "lb_reasoning": 0.31000000000000005, + "lb_math": 0.2359, + "lb_language": 0.15531999999999999, + "lb_if": 0.5346675000000001, + "lb_data_analysis": 0.2189 + }, + { + "hf_id": "microsoft/Phi-3-small-8k-instruct", + "name": "Phi-3-small-8k-instruct", + "params_b": 7.392, + "ifeval": 0.6496651107949131, + "bbh": 0.6208364880870563, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.4506316489361702, + "hf_math_lvl5": 0.18869565217391304, + "hf_musr": 0.45579166666666665, + "hf_avg": 32.34201367038736, + "lb_name": "phi-3-small-8k-instruct", + "lb_global": 0.26268647058823524, + "lb_reasoning": 0.14875, + "lb_math": 0.17580333333333334, + "lb_language": 0.12944, + "lb_if": 0.472, + "lb_data_analysis": 0.2343, + "arena_elo": 1171.16, + "arena_rank": 266, + "arena_votes": 17763 + }, + { + "hf_id": "microsoft/Phi-4-mini-instruct", + "name": "Phi-4-mini-instruct", + "params_b": 3.836, + "ifeval": 0.7377923908562614, + "bbh": 0.568862935505404, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.39320146276595747, + "hf_math_lvl5": 0.16993957703927492, + "hf_musr": 0.3873020833333333, + "hf_avg": 29.412433568419846 + }, + { + "hf_id": "microsoft/phi-1", + "name": "phi-1", + "params_b": 1.418, + "ifeval": 0.20680571993421898, + "bbh": 0.31394755895837845, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.11619015957446809, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.35251041666666666, + "hf_avg": 5.574318195377169 + }, + { + "hf_id": "microsoft/phi-1_5", + "name": "phi-1_5", + "params_b": 1.418, + "ifeval": 0.2032839532440591, + "bbh": 0.33597583211996657, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.16913231382978725, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.34041666666666665, + "hf_avg": 7.170966845799231 + }, + { + "hf_id": "microsoft/phi-2", + "name": "phi-2", + "params_b": 2.78, + "ifeval": 0.273875539125077, + "bbh": 0.4881208771249696, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.26279920212765956, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.4098958333333333, + "hf_avg": 15.534291558214901 + }, + { + "hf_id": "migtissera/Llama-3-70B-Synthia-v3.5", + "name": "Llama-3-70B-Synthia-v3.5", + "params_b": 70.554, + "ifeval": 0.6076499244227538, + "bbh": 0.6488638026271278, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.4658410904255319, + "hf_math_lvl5": 0.21148036253776434, + "hf_musr": 0.49219791666666673, + "hf_avg": 35.56935395079021 + }, + { + "hf_id": "migtissera/Llama-3-8B-Synthia-v3.5", + "name": "Llama-3-8B-Synthia-v3.5", + "params_b": 8.03, + "ifeval": 0.5069582042314393, + "bbh": 0.4887940933660044, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.30302526595744683, + "hf_math_lvl5": 0.06570996978851963, + "hf_musr": 0.40438541666666666, + "hf_avg": 19.948440145189334 + }, + { + "hf_id": "migtissera/Tess-3-7B-SFT", + "name": "Tess-3-7B-SFT", + "params_b": 7.248, + "ifeval": 0.3946262583279033, + "bbh": 0.46073483895076217, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.30335771276595747, + "hf_math_lvl5": 0.04003021148036254, + "hf_musr": 0.4112708333333333, + "hf_avg": 17.20945620208202 + }, + { + "hf_id": "migtissera/Tess-3-Mistral-Nemo-12B", + "name": "Tess-3-Mistral-Nemo-12B", + "params_b": 12.248, + "ifeval": 0.335499807178287, + "bbh": 0.489942302453045, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.25648271276595747, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.44578125, + "hf_avg": 16.720173026027776 + }, + { + "hf_id": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", + "name": "Tess-v2.5-Phi-3-medium-128k-14B", + "params_b": 13.96, + "ifeval": 0.45387682460316403, + "bbh": 0.6206613823135703, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3731715425531915, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.41130208333333335, + "hf_avg": 24.14120139478955 + }, + { + "hf_id": "migtissera/Tess-v2.5.2-Qwen2-72B", + "name": "Tess-v2.5.2-Qwen2-72B", + "params_b": 72, + "ifeval": 0.44943084349525925, + "bbh": 0.6646791891060648, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.5561003989361702, + "hf_math_lvl5": 0.2938066465256798, + "hf_musr": 0.41883333333333334, + "hf_avg": 33.60333761198978 + }, + { + "hf_id": "migtissera/Trinity-2-Codestral-22B", + "name": "Trinity-2-Codestral-22B", + "params_b": 22.247, + "ifeval": 0.4202050559182968, + "bbh": 0.5593244825460373, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.3307845744680851, + "hf_math_lvl5": 0.09667673716012085, + "hf_musr": 0.4110520833333333, + "hf_avg": 21.995244491801476 + }, + { + "hf_id": "migtissera/Trinity-2-Codestral-22B-v0.2", + "name": "Trinity-2-Codestral-22B-v0.2", + "params_b": 22.247, + "ifeval": 0.43446832183052075, + "bbh": 0.5686364683055418, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.33402593085106386, + "hf_math_lvl5": 0.08383685800604229, + "hf_musr": 0.40447916666666667, + "hf_avg": 21.869825084599302 + }, + { + "hf_id": "migtissera/Trinity-2-Codestral-22B-v0.2", + "name": "Trinity-2-Codestral-22B-v0.2", + "params_b": 22.247, + "ifeval": 0.44301121025545553, + "bbh": 0.5706466356198404, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3353557180851064, + "hf_math_lvl5": 0.08685800604229607, + "hf_musr": 0.4031458333333333, + "hf_avg": 22.250269105139868 + }, + { + "hf_id": "ministral/Ministral-3b-instruct", + "name": "Ministral-3b-instruct", + "params_b": 3.316, + "ifeval": 0.1357642167227401, + "bbh": 0.31918598478332383, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.10929188829787234, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.33825, + "hf_avg": 3.520082773406306 + }, + { + "hf_id": "mistral-community/Mistral-7B-v0.2", + "name": "Mistral-7B-v0.2", + "params_b": 7.242, + "ifeval": 0.22663976028050017, + "bbh": 0.4510187962797583, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.2952958776595745, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.4031770833333333, + "hf_avg": 14.215362442692104 + }, + { + "hf_id": "mistral-community/Mixtral-8x22B-v0.1", + "name": "Mixtral-8x22B-v0.1", + "ifeval": 0.3166564417177914, + "bbh": 0.38000000000000006, + "gpqa": 0.33, + "mmlu_pro": 0.36, + "hf_math_lvl5": 0.15428571428571428, + "hf_musr": 0.35333333333333333, + "hf_avg": 16.827390145446955 + }, + { + "hf_id": "mistral-community/mixtral-8x22B-v0.3", + "name": "mixtral-8x22B-v0.3", + "params_b": 140.63, + "ifeval": 0.25826362939223485, + "bbh": 0.6250002178435845, + "gpqa": 0.3775167785234899, + "mmlu_pro": 0.46392952127659576, + "hf_math_lvl5": 0.18353474320241692, + "hf_musr": 0.4036979166666667, + "hf_avg": 25.801994725345732 + }, + { + "hf_id": "mistralai/Codestral-22B-v0.1", + "name": "Codestral-22B-v0.1", + "params_b": 22.247, + "ifeval": 0.5771752283939946, + "bbh": 0.5139136921003167, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.3155751329787234, + "hf_math_lvl5": 0.10045317220543806, + "hf_musr": 0.4187083333333333, + "hf_avg": 23.27991740686463 + }, + { + "hf_id": "mistralai/Mistral-7B-Instruct-v0.1", + "name": "Mistral-7B-Instruct-v0.1", + "params_b": 7.242, + "ifeval": 0.4487060998151571, + "bbh": 0.33548084759810987, + "gpqa": 0.25, + "mmlu_pro": 0.24143949468085107, + "hf_math_lvl5": 0.022658610271903322, + "hf_musr": 0.38476041666666666, + "hf_avg": 12.771229395030618 + }, + { + "hf_id": "mistralai/Mistral-7B-Instruct-v0.2", + "name": "Mistral-7B-Instruct-v0.2", + "params_b": 7.242, + "ifeval": 0.5496227786717023, + "bbh": 0.44597355203292793, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.2716921542553192, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.39660416666666665, + "hf_avg": 18.50789159273764, + "lb_name": "mistral-7b-instruct-v0.2", + "lb_global": 0.2214827777777778, + "lb_reasoning": 0.16999999999999998, + "lb_math": 0.17082666666666668, + "lb_language": 0.09055, + "lb_if": 0.5165025, + "lb_data_analysis": 0.059300000000000005, + "arena_elo": 1149.61, + "arena_rank": 277, + "arena_votes": 19402 + }, + { + "hf_id": "mistralai/Mistral-7B-Instruct-v0.3", + "name": "Mistral-7B-Instruct-v0.3", + "params_b": 7.248, + "ifeval": 0.5465254413844156, + "bbh": 0.47219631712648397, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.30751329787234044, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.37390625000000005, + "hf_avg": 19.225098776905867, + "lb_name": "mistral-7b-instruct-v0.3", + "lb_global": 0.23554444444444445, + "lb_reasoning": 0.21000000000000002, + "lb_math": 0.14561666666666664, + "lb_language": 0.11853000000000001, + "lb_if": 0.5236675000000001, + "lb_data_analysis": 0.09659999999999999 + }, + { + "hf_id": "mistralai/Mistral-7B-v0.1", + "name": "Mistral-7B-v0.1", + "params_b": 7.242, + "ifeval": 0.2385548123423627, + "bbh": 0.4419401145517045, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.30127992021276595, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.4139375, + "hf_avg": 14.575358924083135 + }, + { + "hf_id": "mistralai/Mistral-7B-v0.3", + "name": "Mistral-7B-v0.3", + "params_b": 7.248, + "ifeval": 0.22663976028050017, + "bbh": 0.45168546294642503, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.2952958776595745, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.4031770833333333, + "hf_avg": 14.229760590840252 + }, + { + "hf_id": "mistralai/Mistral-Large-Instruct-2411", + "name": "Mistral-Large-Instruct-2411", + "params_b": 122.61, + "ifeval": 0.8400577135334246, + "bbh": 0.6746647735675069, + "gpqa": 0.43708053691275167, + "mmlu_pro": 0.5561835106382979, + "hf_math_lvl5": 0.4954682779456193, + "hf_musr": 0.454, + "hf_avg": 46.524214355965 + }, + { + "hf_id": "mistralai/Mistral-Nemo-Base-2407", + "name": "Mistral-Nemo-Base-2407", + "params_b": 11.58, + "ifeval": 0.16299197241098062, + "bbh": 0.5035062000369291, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.34715757978723405, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.3921354166666667, + "hf_avg": 15.239356042755924 + }, + { + "hf_id": "mistralai/Mistral-Small-24B-Base-2501", + "name": "Mistral-Small-24B-Base-2501", + "params_b": 23.572, + "ifeval": 0.16723848278124265, + "bbh": 0.6441860347172437, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.5406416223404256, + "hf_math_lvl5": 0.1971299093655589, + "hf_musr": 0.42366666666666664, + "hf_avg": 27.195130187978368 + }, + { + "hf_id": "mistralai/Mistral-Small-Instruct-2409", + "name": "Mistral-Small-Instruct-2409", + "params_b": 22.05, + "ifeval": 0.666975846310013, + "bbh": 0.5213075098146217, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.39602726063829785, + "hf_math_lvl5": 0.14350453172205438, + "hf_musr": 0.36320833333333336, + "hf_avg": 26.262748976418276 + }, + { + "hf_id": "mistralai/Mistral-Small-Instruct-2409", + "name": "Mistral-Small-Instruct-2409", + "params_b": 22.247, + "ifeval": 0.6282829558903709, + "bbh": 0.5830283846898211, + "gpqa": 0.33305369127516776, + "mmlu_pro": 0.409906914893617, + "hf_math_lvl5": 0.2039274924471299, + "hf_musr": 0.4063333333333334, + "hf_avg": 29.918947504475216 + }, + { + "hf_id": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "name": "Mixtral-8x22B-Instruct-v0.1", + "params_b": 140.621, + "ifeval": 0.7183584001560305, + "bbh": 0.6124924926272018, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.44830452127659576, + "hf_math_lvl5": 0.18731117824773413, + "hf_musr": 0.43111458333333336, + "hf_avg": 33.88568028808198, + "lb_name": "mixtral-8x22b-instruct-v0.1", + "lb_global": 0.3626677777777778, + "lb_reasoning": 0.32, + "lb_math": 0.24550000000000002, + "lb_language": 0.26477666666666666, + "lb_if": 0.63167, + "lb_data_analysis": 0.255, + "arena_elo": 1229.48, + "arena_rank": 232, + "arena_votes": 51417 + }, + { + "hf_id": "mistralai/Mixtral-8x22B-v0.1", + "name": "Mixtral-8x22B-v0.1", + "params_b": 140.621, + "ifeval": 0.25826362939223485, + "bbh": 0.6239807473187268, + "gpqa": 0.37583892617449666, + "mmlu_pro": 0.46392952127659576, + "hf_math_lvl5": 0.18353474320241692, + "hf_musr": 0.4036979166666667, + "hf_avg": 25.74093627522265 + }, + { + "hf_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "name": "Mixtral-8x7B-Instruct-v0.1", + "params_b": 46.703, + "ifeval": 0.5599143605633053, + "bbh": 0.49623654013356494, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.36918218085106386, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.42032291666666666, + "hf_avg": 23.8171027058463, + "lb_name": "mixtral-8x7b-instruct-v0.1", + "lb_global": 0.24569777777777776, + "lb_reasoning": 0.18, + "lb_math": 0.20709333333333335, + "lb_language": 0.13761333333333334, + "lb_if": 0.4480825, + "lb_data_analysis": 0.1619, + "arena_elo": 1197.12, + "arena_rank": 249, + "arena_votes": 73505 + }, + { + "hf_id": "mistralai/Mixtral-8x7B-v0.1", + "name": "Mixtral-8x7B-v0.1", + "params_b": 46.703, + "ifeval": 0.24152692633324024, + "bbh": 0.508666743762444, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.3849734042553192, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.43213541666666666, + "hf_avg": 19.56528101279984 + }, + { + "hf_id": "mistralai/Mixtral-8x7B-v0.1", + "name": "Mixtral-8x7B-v0.1", + "params_b": 46.703, + "ifeval": 0.23260947618984296, + "bbh": 0.5097711377553386, + "gpqa": 0.32046979865771813, + "mmlu_pro": 0.3871343085106383, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.4413125, + "hf_avg": 19.665108918316083 + }, + { + "hf_id": "mixtao/MixTAO-7Bx2-MoE-v8.1", + "name": "MixTAO-7Bx2-MoE-v8.1", + "params_b": 12.879, + "ifeval": 0.41623337189767595, + "bbh": 0.5189059391733521, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3123337765957447, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.4463333333333333, + "hf_avg": 21.07792698379691 + }, + { + "hf_id": "mkurman/llama-3.2-MEDIT-3B-o1", + "name": "llama-3.2-MEDIT-3B-o1", + "params_b": 3.607, + "ifeval": 0.43816517950150047, + "bbh": 0.43996584807961553, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.27410239361702127, + "hf_math_lvl5": 0.13066465256797583, + "hf_musr": 0.3565416666666667, + "hf_avg": 17.028893537984814 + }, + { + "hf_id": "mkurman/phi4-MedIT-10B-o1", + "name": "phi4-MedIT-10B-o1", + "params_b": 10.255, + "ifeval": 0.34629117408476173, + "bbh": 0.519820312240642, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.3507313829787234, + "hf_math_lvl5": 0.1148036253776435, + "hf_musr": 0.39679166666666665, + "hf_avg": 18.92073804361014 + }, + { + "hf_id": "mlabonne/AlphaMonarch-7B", + "name": "AlphaMonarch-7B", + "params_b": 7.242, + "ifeval": 0.49394384677101205, + "bbh": 0.4625522037183211, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.24725731382978725, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.41213541666666664, + "hf_avg": 17.63062119200965 + }, + { + "hf_id": "mlabonne/Beyonder-4x7B-v3", + "name": "Beyonder-4x7B-v3", + "params_b": 24.154, + "ifeval": 0.5608385749810503, + "bbh": 0.4670522037183211, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.2512466755319149, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.40454166666666663, + "hf_avg": 19.40685869832663 + }, + { + "hf_id": "mlabonne/BigQwen2.5-52B-Instruct", + "name": "BigQwen2.5-52B-Instruct", + "params_b": 52.268, + "ifeval": 0.7913480675718205, + "bbh": 0.7121004678698547, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.5519448138297872, + "hf_math_lvl5": 0.547583081570997, + "hf_musr": 0.41130208333333335, + "hf_avg": 43.55000484859215 + }, + { + "hf_id": "mlabonne/BigQwen2.5-Echo-47B-Instruct", + "name": "BigQwen2.5-Echo-47B-Instruct", + "params_b": 47.392, + "ifeval": 0.7356691356711305, + "bbh": 0.6125111878044905, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.4734042553191489, + "hf_math_lvl5": 0.4380664652567976, + "hf_musr": 0.4124791666666667, + "hf_avg": 37.03189501778666 + }, + { + "hf_id": "mlabonne/ChimeraLlama-3-8B-v2", + "name": "ChimeraLlama-3-8B-v2", + "params_b": 8.03, + "ifeval": 0.44688315890725494, + "bbh": 0.5045597361952603, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3568816489361702, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.3790833333333334, + "hf_avg": 20.120505180125022 + }, + { + "hf_id": "mlabonne/ChimeraLlama-3-8B-v3", + "name": "ChimeraLlama-3-8B-v3", + "params_b": 8.03, + "ifeval": 0.44078821970150317, + "bbh": 0.49781902726529204, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.36685505319148937, + "hf_math_lvl5": 0.08836858006042296, + "hf_musr": 0.4003541666666666, + "hf_avg": 20.697130053703557 + }, + { + "hf_id": "mlabonne/Daredevil-8B", + "name": "Daredevil-8B", + "params_b": 8.03, + "ifeval": 0.45477665926408595, + "bbh": 0.5194408746721715, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.383061835106383, + "hf_math_lvl5": 0.10649546827794562, + "hf_musr": 0.393875, + "hf_avg": 22.40964638898709 + }, + { + "hf_id": "mlabonne/Daredevil-8B-abliterated", + "name": "Daredevil-8B-abliterated", + "params_b": 8.03, + "ifeval": 0.44263664853699297, + "bbh": 0.4254272523147253, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.3700964095744681, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.40702083333333333, + "hf_avg": 19.687995572747763 + }, + { + "hf_id": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", + "name": "Hermes-3-Llama-3.1-70B-lorablated", + "params_b": 70.554, + "ifeval": 0.34244360518978534, + "bbh": 0.6693171063183693, + "gpqa": 0.36577181208053694, + "mmlu_pro": 0.4679188829787234, + "hf_math_lvl5": 0.2243202416918429, + "hf_musr": 0.5029270833333334, + "hf_avg": 31.74585457101121 + }, + { + "hf_id": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", + "name": "Meta-Llama-3.1-8B-Instruct-abliterated", + "params_b": 8.03, + "ifeval": 0.7329463601023063, + "bbh": 0.48740648734902187, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.3503158244680851, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.36488541666666663, + "hf_avg": 23.202552271437714 + }, + { + "hf_id": "mlabonne/NeuralBeagle14-7B", + "name": "NeuralBeagle14-7B", + "params_b": 7.242, + "ifeval": 0.49351941736813876, + "bbh": 0.46278709452353844, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.2601396276595745, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.43194791666666665, + "hf_avg": 18.91007634574491 + }, + { + "hf_id": "mlabonne/NeuralDaredevil-8B-abliterated", + "name": "NeuralDaredevil-8B-abliterated", + "params_b": 8.03, + "ifeval": 0.756077208473517, + "bbh": 0.5110566504436299, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.38414228723404253, + "hf_math_lvl5": 0.09063444108761329, + "hf_musr": 0.4019375, + "hf_avg": 27.186740676442742 + }, + { + "hf_id": "mlabonne/NeuralDaredevil-8B-abliterated", + "name": "NeuralDaredevil-8B-abliterated", + "params_b": 8.03, + "ifeval": 0.41623337189767595, + "bbh": 0.5123964057729099, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3801529255319149, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.4149583333333333, + "hf_avg": 21.499914415098534 + }, + { + "hf_id": "mlabonne/OrpoLlama-3-8B", + "name": "OrpoLlama-3-8B", + "params_b": 8.03, + "ifeval": 0.36527524745453177, + "bbh": 0.4424079063503051, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.2705285904255319, + "hf_math_lvl5": 0.055891238670694864, + "hf_musr": 0.3579375, + "hf_avg": 15.157036730116474 + }, + { + "hf_id": "mlabonne/phixtral-2x2_8", + "name": "phixtral-2x2_8", + "params_b": 4.458, + "ifeval": 0.3431184811854767, + "bbh": 0.48885941873076205, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.2550698138297872, + "hf_math_lvl5": 0.035498489425981876, + "hf_musr": 0.3643541666666667, + "hf_avg": 15.553113591688318 + }, + { + "hf_id": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", + "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", + "params_b": 0.494, + "ifeval": 0.3368983186833158, + "bbh": 0.32921013057720044, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.16381316489361702, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.3249166666666667, + "hf_avg": 9.708088086359409 + }, + { + "hf_id": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", + "name": "Mistral-Small-24B-Instruct-2501-bf16", + "params_b": 23.572, + "ifeval": 0.6282829558903709, + "bbh": 0.6713272911918485, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5394780585106383, + "hf_math_lvl5": 0.32250755287009064, + "hf_musr": 0.4618333333333333, + "hf_avg": 38.66942426997438 + }, + { + "hf_id": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", + "name": "Llama-3-70B-japanese-suzume-vector-v0.1", + "params_b": 70.554, + "ifeval": 0.4648931501748693, + "bbh": 0.6541763652331517, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.5224401595744681, + "hf_math_lvl5": 0.2326283987915408, + "hf_musr": 0.4140625, + "hf_avg": 30.38004447303165 + }, + { + "hf_id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", + "name": "DeepSeek-R1-ReDistill-Llama3-8B-v1.1", + "params_b": 8.03, + "ifeval": 0.370396104558128, + "bbh": 0.34730320150504124, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.2198304521276596, + "hf_math_lvl5": 0.3285498489425982, + "hf_musr": 0.33955208333333337, + "hf_avg": 15.806892418961331 + }, + { + "hf_id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", + "name": "DeepSeek-R1-ReDistill-Qwen-7B-v1.1", + "params_b": 7.616, + "ifeval": 0.34731512387132807, + "bbh": 0.36983762765044165, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.23262965425531915, + "hf_math_lvl5": 0.3496978851963746, + "hf_musr": 0.40088541666666666, + "hf_avg": 17.737904923149653 + }, + { + "hf_id": "moeru-ai/L3.1-Moe-2x8B-v0.2", + "name": "L3.1-Moe-2x8B-v0.2", + "params_b": 13.668, + "ifeval": 0.7347947889377962, + "bbh": 0.5255688392585965, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.38580452127659576, + "hf_math_lvl5": 0.16993957703927492, + "hf_musr": 0.41985416666666664, + "hf_avg": 28.878094301902436 + }, + { + "hf_id": "moeru-ai/L3.1-Moe-4x8B-v0.1", + "name": "L3.1-Moe-4x8B-v0.1", + "params_b": 24.942, + "ifeval": 0.433219413378724, + "bbh": 0.49392781736367014, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.34541223404255317, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.3609166666666667, + "hf_avg": 19.441557290549515 + }, + { + "hf_id": "moeru-ai/L3.1-Moe-4x8B-v0.2", + "name": "L3.1-Moe-4x8B-v0.2", + "params_b": 24.942, + "ifeval": 0.5406554608438943, + "bbh": 0.446625675582615, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.27626329787234044, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.3233958333333333, + "hf_avg": 18.31051307041189 + }, + { + "hf_id": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", + "name": "Llama-3_1-8B-Instruct-orca-ORPO", + "params_b": 16.061, + "ifeval": 0.22728914834860392, + "bbh": 0.28653625778742803, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11677194148936171, + "hf_musr": 0.34447916666666667, + "hf_avg": 4.832138103419669 + }, + { + "hf_id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", + "name": "gemma-2-2b-LoRA-MonsterInstruct", + "params_b": 2.614, + "ifeval": 0.3902545246612322, + "bbh": 0.36496861927498697, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.19872007978723405, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.3643854166666667, + "hf_avg": 12.519872991689503 + }, + { + "hf_id": "mosaicml/mpt-7b", + "name": "mpt-7b", + "params_b": 7, + "ifeval": 0.21519900530592162, + "bbh": 0.32997415960801324, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.12059507978723404, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.36723958333333334, + "hf_avg": 6.032029339143736 + }, + { + "hf_id": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", + "name": "Qwen2.5-1.5B-Instruct-CoT-Reflection", + "params_b": 1.544, + "ifeval": 0.2870394996387363, + "bbh": 0.41093712633583523, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.26512632978723405, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.3211979166666667, + "hf_avg": 11.862792186560862 + }, + { + "hf_id": "mrdayl/OpenCognito", + "name": "OpenCognito", + "params_b": 3.086, + "ifeval": 0.40621661635571393, + "bbh": 0.4705607805549634, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3443317819148936, + "hf_math_lvl5": 0.21148036253776434, + "hf_musr": 0.42934374999999997, + "hf_avg": 22.28566396506271 + }, + { + "hf_id": "mrdayl/OpenCognito-r1", + "name": "OpenCognito-r1", + "params_b": 3.086, + "ifeval": 0.42412687225450696, + "bbh": 0.4673346036303057, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.3474900265957447, + "hf_math_lvl5": 0.1903323262839879, + "hf_musr": 0.42407291666666663, + "hf_avg": 22.15261487014885 + }, + { + "hf_id": "mrdayl/OpenCognito-r2", + "name": "OpenCognito-r2", + "params_b": 3.086, + "ifeval": 0.3958751667797001, + "bbh": 0.46882818163435913, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.34616023936170215, + "hf_math_lvl5": 0.20241691842900303, + "hf_musr": 0.42016666666666663, + "hf_avg": 21.967469611464537 + }, + { + "hf_id": "mrdayl/OpenThink", + "name": "OpenThink", + "params_b": 1.777, + "ifeval": 0.20540720842919008, + "bbh": 0.34597850879756104, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.18500664893617022, + "hf_math_lvl5": 0.28851963746223563, + "hf_musr": 0.32888541666666665, + "hf_avg": 12.3979226641244 + }, + { + "hf_id": "mrm8488/phi-4-14B-grpo-gsm8k-3e", + "name": "phi-4-14B-grpo-gsm8k-3e", + "params_b": 14.66, + "ifeval": 0.688533092195375, + "bbh": 0.6805415739665394, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.526845079787234, + "hf_math_lvl5": 0.452416918429003, + "hf_musr": 0.39939583333333334, + "hf_avg": 39.20729001022834 + }, + { + "hf_id": "mrm8488/phi-4-14B-grpo-limo", + "name": "phi-4-14B-grpo-limo", + "params_b": 14.66, + "ifeval": 0.681239112222237, + "bbh": 0.678485424233919, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.5260970744680851, + "hf_math_lvl5": 0.4569486404833837, + "hf_musr": 0.3980625, + "hf_avg": 39.0640880591307 + }, + { + "hf_id": "mukaj/Llama-3.1-Hawkish-8B", + "name": "Llama-3.1-Hawkish-8B", + "params_b": 8.03, + "ifeval": 0.6720468357291984, + "bbh": 0.4883822828416351, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.33311170212765956, + "hf_math_lvl5": 0.243202416918429, + "hf_musr": 0.39672916666666663, + "hf_avg": 26.581501102977555 + }, + { + "hf_id": "natong19/Mistral-Nemo-Instruct-2407-abliterated", + "name": "Mistral-Nemo-Instruct-2407-abliterated", + "params_b": 12.248, + "ifeval": 0.6392239258500778, + "bbh": 0.5048447739625885, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.351811835106383, + "hf_math_lvl5": 0.13217522658610273, + "hf_musr": 0.4033333333333333, + "hf_avg": 25.017625420548132 + }, + { + "hf_id": "natong19/Qwen2-7B-Instruct-abliterated", + "name": "Qwen2-7B-Instruct-abliterated", + "params_b": 7.616, + "ifeval": 0.5836945970026197, + "bbh": 0.5553035842403061, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3842253989361702, + "hf_math_lvl5": 0.2764350453172205, + "hf_musr": 0.4034270833333333, + "hf_avg": 28.515341600524966 + }, + { + "hf_id": "nazimali/Mistral-Nemo-Kurdish", + "name": "Mistral-Nemo-Kurdish", + "params_b": 12.248, + "ifeval": 0.3401208792670115, + "bbh": 0.5133321102266589, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3234707446808511, + "hf_math_lvl5": 0.09592145015105741, + "hf_musr": 0.4115729166666667, + "hf_avg": 19.48223781673813 + }, + { + "hf_id": "nazimali/Mistral-Nemo-Kurdish-Instruct", + "name": "Mistral-Nemo-Kurdish-Instruct", + "params_b": 12.248, + "ifeval": 0.4963917959901949, + "bbh": 0.4699417600389813, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3062666223404255, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.397875, + "hf_avg": 18.555957634452803 + }, + { + "hf_id": "nazimali/Mistral-Nemo-Kurdish-Instruct", + "name": "Mistral-Nemo-Kurdish-Instruct", + "params_b": 12.248, + "ifeval": 0.4860004787297703, + "bbh": 0.47214400722999256, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.30867686170212766, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.40057291666666667, + "hf_avg": 19.94862211656385 + }, + { + "hf_id": "nbeerbower/BigKartoffel-mistral-nemo-20B", + "name": "BigKartoffel-mistral-nemo-20B", + "params_b": 20.427, + "ifeval": 0.5857181168189294, + "bbh": 0.55148305168682, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.3529753989361702, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.42804166666666665, + "hf_avg": 23.763763321614324 + }, + { + "hf_id": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", + "name": "DoppelKartoffel-Mistral-Nemo-23B", + "params_b": 23.153, + "ifeval": 0.5191480826429429, + "bbh": 0.5217926041279988, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.3080119680851064, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.3794895833333333, + "hf_avg": 19.833865427806835 + }, + { + "hf_id": "nbeerbower/Dumpling-Qwen2.5-1.5B", + "name": "Dumpling-Qwen2.5-1.5B", + "params_b": 1.544, + "ifeval": 0.3698963195432563, + "bbh": 0.4159743091354106, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.2771775265957447, + "hf_math_lvl5": 0.11706948640483383, + "hf_musr": 0.37276041666666665, + "hf_avg": 15.625094396198312 + }, + { + "hf_id": "nbeerbower/Dumpling-Qwen2.5-14B", + "name": "Dumpling-Qwen2.5-14B", + "params_b": 14.77, + "ifeval": 0.6064010159709571, + "bbh": 0.6450644262798378, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.5170378989361702, + "hf_math_lvl5": 0.30966767371601206, + "hf_musr": 0.43539583333333337, + "hf_avg": 34.79872132286164 + }, + { + "hf_id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", + "name": "Dumpling-Qwen2.5-7B-1k-r16", + "params_b": 7.616, + "ifeval": 0.4860004787297703, + "bbh": 0.5214228032573378, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.39586103723404253, + "hf_math_lvl5": 0.236404833836858, + "hf_musr": 0.4229895833333333, + "hf_avg": 25.2956618399845 + }, + { + "hf_id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", + "name": "Dumpling-Qwen2.5-7B-1k-r64-2e-5", + "params_b": 7.616, + "ifeval": 0.417906709752346, + "bbh": 0.5300548108450988, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.41215093085106386, + "hf_math_lvl5": 0.21148036253776434, + "hf_musr": 0.4486041666666667, + "hf_avg": 25.052668360308672 + }, + { + "hf_id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", + "name": "EVA-abliterated-TIES-Qwen2.5-1.5B", + "params_b": 1.777, + "ifeval": 0.41148707651254224, + "bbh": 0.39965589836197535, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.27119348404255317, + "hf_math_lvl5": 0.13746223564954682, + "hf_musr": 0.35018750000000004, + "hf_avg": 15.375831467238891 + }, + { + "hf_id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", + "name": "EVA-abliterated-TIES-Qwen2.5-14B", + "params_b": 14.77, + "ifeval": 0.783554302583811, + "bbh": 0.6372016353633118, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.5211103723404256, + "hf_math_lvl5": 0.5045317220543807, + "hf_musr": 0.4406666666666667, + "hf_avg": 42.16442636538604 + }, + { + "hf_id": "nbeerbower/Flammades-Mistral-Nemo-12B", + "name": "Flammades-Mistral-Nemo-12B", + "params_b": 12.248, + "ifeval": 0.38415958545548035, + "bbh": 0.5299609345270283, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.36610704787234044, + "hf_math_lvl5": 0.0755287009063444, + "hf_musr": 0.480625, + "hf_avg": 22.56672421581219 + }, + { + "hf_id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", + "name": "Gemma2-Gutenberg-Doppel-9B", + "params_b": 9.242, + "ifeval": 0.7171094917042337, + "bbh": 0.5870114193661848, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.41273271276595747, + "hf_math_lvl5": 0.19788519637462235, + "hf_musr": 0.46078125, + "hf_avg": 32.54244427540016 + }, + { + "hf_id": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", + "name": "Hermes2-Gutenberg2-Mistral-7B", + "params_b": 7.242, + "ifeval": 0.37214479802479644, + "bbh": 0.4981450458280896, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.29928523936170215, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.46230208333333334, + "hf_avg": 19.363860758860653 + }, + { + "hf_id": "nbeerbower/Kartoffel-Deepfry-12B", + "name": "Kartoffel-Deepfry-12B", + "params_b": 12.248, + "ifeval": 0.5021620411618949, + "bbh": 0.5365374219062301, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.3582114361702128, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.4791666666666667, + "hf_avg": 24.14748758265313 + }, + { + "hf_id": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", + "name": "Llama-3.1-Nemotron-lorablated-70B", + "params_b": 70.554, + "ifeval": 0.7228797368759337, + "bbh": 0.6825051293384551, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5343251329787234, + "hf_math_lvl5": 0.3338368580060423, + "hf_musr": 0.4681666666666667, + "hf_avg": 40.87645031021374 + }, + { + "hf_id": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", + "name": "Llama3.1-Gutenberg-Doppel-70B", + "params_b": 70.554, + "ifeval": 0.7092159913474027, + "bbh": 0.6660891255994471, + "gpqa": 0.3447986577181208, + "mmlu_pro": 0.4736535904255319, + "hf_math_lvl5": 0.2122356495468278, + "hf_musr": 0.48971875, + "hf_avg": 36.9233900222014 + }, + { + "hf_id": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", + "name": "Lyra-Gutenberg-mistral-nemo-12B", + "params_b": 12.248, + "ifeval": 0.34948824674086976, + "bbh": 0.5586245741555749, + "gpqa": 0.3338926174496644, + "mmlu_pro": 0.36278257978723405, + "hf_math_lvl5": 0.10120845921450151, + "hf_musr": 0.43566666666666665, + "hf_avg": 22.867363852612016 + }, + { + "hf_id": "nbeerbower/Lyra4-Gutenberg-12B", + "name": "Lyra4-Gutenberg-12B", + "params_b": 12.248, + "ifeval": 0.2212185888996751, + "bbh": 0.538669487933139, + "gpqa": 0.3187919463087248, + "mmlu_pro": 0.35713098404255317, + "hf_math_lvl5": 0.1299093655589124, + "hf_musr": 0.4037916666666666, + "hf_avg": 19.84411922998086 + }, + { + "hf_id": "nbeerbower/Lyra4-Gutenberg2-12B", + "name": "Lyra4-Gutenberg2-12B", + "params_b": 12.248, + "ifeval": 0.25851296781428834, + "bbh": 0.5344527944750038, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.35654920212765956, + "hf_math_lvl5": 0.11706948640483383, + "hf_musr": 0.39721874999999995, + "hf_avg": 19.944881796280978 + }, + { + "hf_id": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", + "name": "Mahou-1.5-mistral-nemo-12B-lorablated", + "params_b": 12.248, + "ifeval": 0.6824880206740338, + "bbh": 0.5496040380079439, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.35738031914893614, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.45216666666666666, + "hf_avg": 27.050923083044733 + }, + { + "hf_id": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", + "name": "Mistral-Gutenberg-Doppel-7B-FFT", + "params_b": 7.242, + "ifeval": 0.5716798095719358, + "bbh": 0.40762540890255944, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2728557180851064, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.4059375, + "hf_avg": 18.33827572865676 + }, + { + "hf_id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", + "name": "Mistral-Nemo-Gutenberg-Doppel-12B", + "params_b": 12.248, + "ifeval": 0.3567068711020093, + "bbh": 0.5274606999473499, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.35787898936170215, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.41321874999999997, + "hf_avg": 21.53798697951247 + }, + { + "hf_id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", + "name": "Mistral-Nemo-Gutenberg-Doppel-12B-v2", + "params_b": 12.248, + "ifeval": 0.6535869271311232, + "bbh": 0.5374496172235809, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.3546376329787234, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.42330208333333336, + "hf_avg": 25.90126357738714 + }, + { + "hf_id": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", + "name": "Mistral-Nemo-Moderne-12B-FFT-experimental", + "params_b": 12.248, + "ifeval": 0.33522498082864577, + "bbh": 0.5234089179237257, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.3454953457446808, + "hf_math_lvl5": 0.0770392749244713, + "hf_musr": 0.3714895833333333, + "hf_avg": 18.119465502148433 + }, + { + "hf_id": "nbeerbower/Mistral-Nemo-Prism-12B", + "name": "Mistral-Nemo-Prism-12B", + "params_b": 12.248, + "ifeval": 0.6858103166265509, + "bbh": 0.5475186352291487, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3581283244680851, + "hf_math_lvl5": 0.08685800604229607, + "hf_musr": 0.46261458333333333, + "hf_avg": 27.924007108863105 + }, + { + "hf_id": "nbeerbower/Mistral-Nemo-Prism-12B-v2", + "name": "Mistral-Nemo-Prism-12B-v2", + "params_b": 12.248, + "ifeval": 0.6974006746543615, + "bbh": 0.5491875637377679, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3567154255319149, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.45997916666666666, + "hf_avg": 28.07046513725069 + }, + { + "hf_id": "nbeerbower/Mistral-Nemo-Prism-12B-v7", + "name": "Mistral-Nemo-Prism-12B-v7", + "params_b": 12.248, + "ifeval": 0.6961517662025647, + "bbh": 0.5521104600038905, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.35904255319148937, + "hf_math_lvl5": 0.08685800604229607, + "hf_musr": 0.46388541666666666, + "hf_avg": 28.034788345956486 + }, + { + "hf_id": "nbeerbower/Mistral-Small-Drummer-22B", + "name": "Mistral-Small-Drummer-22B", + "params_b": 22.247, + "ifeval": 0.6331289866443259, + "bbh": 0.5793201948136216, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.40949135638297873, + "hf_math_lvl5": 0.18882175226586104, + "hf_musr": 0.40636458333333336, + "hf_avg": 29.819408797614738 + }, + { + "hf_id": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", + "name": "Mistral-Small-Gutenberg-Doppel-22B", + "params_b": 22.247, + "ifeval": 0.48932277468228746, + "bbh": 0.5858932329112819, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.41240026595744683, + "hf_math_lvl5": 0.21827794561933533, + "hf_musr": 0.39706250000000004, + "hf_avg": 27.972039733086746 + }, + { + "hf_id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", + "name": "Qwen2.5-Gutenberg-Doppel-14B", + "params_b": 14.77, + "ifeval": 0.8090832324897937, + "bbh": 0.6381735755183319, + "gpqa": 0.33305369127516776, + "mmlu_pro": 0.49210438829787234, + "hf_math_lvl5": 0.5415407854984894, + "hf_musr": 0.4100625, + "hf_avg": 41.327794892427 + }, + { + "hf_id": "nbeerbower/SmolNemo-12B-FFT-experimental", + "name": "SmolNemo-12B-FFT-experimental", + "params_b": 12.248, + "ifeval": 0.3348005514257725, + "bbh": 0.3336088810494464, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.12167553191489362, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.38469791666666664, + "hf_avg": 8.496288137169051 + }, + { + "hf_id": "nbeerbower/gemma2-gutenberg-27B", + "name": "gemma2-gutenberg-27B", + "params_b": 27.227, + "ifeval": 0.29470804133033685, + "bbh": 0.37965683503451614, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.19822140957446807, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.3727291666666666, + "hf_avg": 10.4236638747709 + }, + { + "hf_id": "nbeerbower/gemma2-gutenberg-9B", + "name": "gemma2-gutenberg-9B", + "params_b": 9.242, + "ifeval": 0.2795948084416016, + "bbh": 0.5950904001490335, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.4192154255319149, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.45951041666666664, + "hf_avg": 23.719246452807738 + }, + { + "hf_id": "nbeerbower/llama-3-gutenberg-8B", + "name": "llama-3-gutenberg-8B", + "params_b": 8.03, + "ifeval": 0.4371910973993448, + "bbh": 0.49936002561994197, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.383061835106383, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.40730208333333334, + "hf_avg": 21.30881705657422 + }, + { + "hf_id": "nbeerbower/llama3.1-cc-8B", + "name": "llama3.1-cc-8B", + "params_b": 8.03, + "ifeval": 0.5068086011782071, + "bbh": 0.4871187428614386, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.3346908244680851, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.38851041666666664, + "hf_avg": 20.256041660753873 + }, + { + "hf_id": "nbeerbower/llama3.1-kartoffeldes-70B", + "name": "llama3.1-kartoffeldes-70B", + "params_b": 70.554, + "ifeval": 0.8230218043679659, + "bbh": 0.6893878613110068, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.4988364361702128, + "hf_math_lvl5": 0.3217522658610272, + "hf_musr": 0.46460416666666665, + "hf_avg": 41.11056038125863 + }, + { + "hf_id": "nbeerbower/mistral-nemo-bophades-12B", + "name": "mistral-nemo-bophades-12B", + "params_b": 12.248, + "ifeval": 0.6794405510711579, + "bbh": 0.4988471515853883, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.35006648936170215, + "hf_math_lvl5": 0.12311178247734139, + "hf_musr": 0.41778125, + "hf_avg": 25.72860228091272 + }, + { + "hf_id": "nbeerbower/mistral-nemo-bophades3-12B", + "name": "mistral-nemo-bophades3-12B", + "params_b": 12.248, + "ifeval": 0.6577835698169745, + "bbh": 0.544933208169299, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.3371010638297872, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.4604479166666667, + "hf_avg": 27.16558651541493 + }, + { + "hf_id": "nbeerbower/mistral-nemo-cc-12B", + "name": "mistral-nemo-cc-12B", + "params_b": 12.248, + "ifeval": 0.14353249378316202, + "bbh": 0.5399409546487519, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.3597905585106383, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.44236458333333334, + "hf_avg": 17.20341027317436 + }, + { + "hf_id": "nbeerbower/mistral-nemo-gutades-12B", + "name": "mistral-nemo-gutades-12B", + "params_b": 12.248, + "ifeval": 0.3425189608017837, + "bbh": 0.5407194259684368, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.3560505319148936, + "hf_math_lvl5": 0.11782477341389729, + "hf_musr": 0.4040416666666667, + "hf_avg": 21.075924659452493 + }, + { + "hf_id": "nbeerbower/mistral-nemo-gutenberg-12B", + "name": "mistral-nemo-gutenberg-12B", + "params_b": 12.248, + "ifeval": 0.350386973231027, + "bbh": 0.5281363707697807, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3562167553191489, + "hf_math_lvl5": 0.1163141993957704, + "hf_musr": 0.41706250000000006, + "hf_avg": 21.024154964814482 + }, + { + "hf_id": "nbeerbower/mistral-nemo-gutenberg-12B-v2", + "name": "mistral-nemo-gutenberg-12B-v2", + "params_b": 12.248, + "ifeval": 0.6203395878491292, + "bbh": 0.5397203788283472, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.34990026595744683, + "hf_math_lvl5": 0.10876132930513595, + "hf_musr": 0.4286979166666667, + "hf_avg": 25.514319837220018 + }, + { + "hf_id": "nbeerbower/mistral-nemo-gutenberg-12B-v3", + "name": "mistral-nemo-gutenberg-12B-v3", + "params_b": 12.248, + "ifeval": 0.21827085466562057, + "bbh": 0.544065799051091, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.3644448138297872, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.44503125, + "hf_avg": 19.290512133580634 + }, + { + "hf_id": "nbeerbower/mistral-nemo-gutenberg-12B-v4", + "name": "mistral-nemo-gutenberg-12B-v4", + "params_b": 12.248, + "ifeval": 0.237929804031082, + "bbh": 0.5269028864823667, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.3575465425531915, + "hf_math_lvl5": 0.12613293051359517, + "hf_musr": 0.4104270833333333, + "hf_avg": 19.83898144810974 + }, + { + "hf_id": "nbeerbower/mistral-nemo-gutenberg2-12B-test", + "name": "mistral-nemo-gutenberg2-12B-test", + "params_b": 12.248, + "ifeval": 0.33847192116916447, + "bbh": 0.525477908630255, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.35546875, + "hf_math_lvl5": 0.1163141993957704, + "hf_musr": 0.4157291666666667, + "hf_avg": 20.970579950625815 + }, + { + "hf_id": "nbeerbower/mistral-nemo-kartoffel-12B", + "name": "mistral-nemo-kartoffel-12B", + "params_b": 12.248, + "ifeval": 0.7031709198260616, + "bbh": 0.5483796436144805, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.35846077127659576, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.46528125000000004, + "hf_avg": 28.21995794267338 + }, + { + "hf_id": "nbeerbower/mistral-nemo-narwhal-12B", + "name": "mistral-nemo-narwhal-12B", + "params_b": 12.248, + "ifeval": 0.5549187267561182, + "bbh": 0.5057374929934754, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.34832114361702127, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.38469791666666664, + "hf_avg": 21.240878416352583 + }, + { + "hf_id": "nbeerbower/mistral-nemo-wissenschaft-12B", + "name": "mistral-nemo-wissenschaft-12B", + "params_b": 12.248, + "ifeval": 0.6520133246452745, + "bbh": 0.5040306120993181, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.35322473404255317, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.41778125, + "hf_avg": 25.50992586494861 + }, + { + "hf_id": "nbrahme/IndusQ", + "name": "IndusQ", + "params_b": 1.176, + "ifeval": 0.24397487555242311, + "bbh": 0.30624035198474986, + "gpqa": 0.26510067114093966, + "mmlu_pro": 0.11203457446808511, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.3366354166666667, + "hf_avg": 5.636134043635501 + }, + { + "hf_id": "necva/IE-cont-Llama3.1-8B", + "name": "IE-cont-Llama3.1-8B", + "params_b": 8.03, + "ifeval": 0.20490742341431845, + "bbh": 0.2911778102988436, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11668882978723404, + "hf_musr": 0.35753125, + "hf_avg": 5.093186530380667 + }, + { + "hf_id": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", + "name": "jessi-v0.1-qwen2.5-7b-instruct", + "params_b": 7.616, + "ifeval": 0.7326715337526651, + "bbh": 0.5292315105257686, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.42278922872340424, + "hf_math_lvl5": 0.4086102719033233, + "hf_musr": 0.3913645833333333, + "hf_avg": 32.775632974338876 + }, + { + "hf_id": "neopolita/jessi-v0.1-virtuoso-small", + "name": "jessi-v0.1-virtuoso-small", + "params_b": 14.77, + "ifeval": 0.7959192719761344, + "bbh": 0.6442861439957068, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.5129654255319149, + "hf_math_lvl5": 0.33987915407854985, + "hf_musr": 0.43616666666666665, + "hf_avg": 38.86960435987991 + }, + { + "hf_id": "neopolita/jessi-v0.2-falcon3-7b-instruct", + "name": "jessi-v0.2-falcon3-7b-instruct", + "params_b": 7.456, + "ifeval": 0.5770754930251731, + "bbh": 0.5363079188886575, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.3904587765957447, + "hf_math_lvl5": 0.2537764350453172, + "hf_musr": 0.44788541666666665, + "hf_avg": 29.040362033793667 + }, + { + "hf_id": "neopolita/jessi-v0.3-falcon3-7b-instruct", + "name": "jessi-v0.3-falcon3-7b-instruct", + "params_b": 7.456, + "ifeval": 0.7509064836855099, + "bbh": 0.538793502664194, + "gpqa": 0.3196308724832215, + "mmlu_pro": 0.3970246010638298, + "hf_math_lvl5": 0.18882175226586104, + "hf_musr": 0.46915625, + "hf_avg": 31.56157882788928 + }, + { + "hf_id": "neopolita/jessi-v0.4-falcon3-7b-instruct", + "name": "jessi-v0.4-falcon3-7b-instruct", + "params_b": 7.456, + "ifeval": 0.7603735865281896, + "bbh": 0.5521668757306609, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.40043218085106386, + "hf_math_lvl5": 0.3768882175226586, + "hf_musr": 0.49712500000000004, + "hf_avg": 35.58265268401903 + }, + { + "hf_id": "neopolita/jessi-v0.5-falcon3-7b-instruct", + "name": "jessi-v0.5-falcon3-7b-instruct", + "params_b": 7.456, + "ifeval": 0.7411645544931892, + "bbh": 0.5589627302276082, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.3966090425531915, + "hf_math_lvl5": 0.37386706948640486, + "hf_musr": 0.48652083333333335, + "hf_avg": 35.17362960818127 + }, + { + "hf_id": "neopolita/jessi-v0.6-falcon3-7b-instruct", + "name": "jessi-v0.6-falcon3-7b-instruct", + "params_b": 7.456, + "ifeval": 0.7401904723910335, + "bbh": 0.5508818723957883, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.3956948138297872, + "hf_math_lvl5": 0.3564954682779456, + "hf_musr": 0.49042708333333335, + "hf_avg": 34.54828176574102 + }, + { + "hf_id": "neopolita/loki-v0.1-virtuoso", + "name": "loki-v0.1-virtuoso", + "params_b": 14.77, + "ifeval": 0.7819308324135517, + "bbh": 0.6467251502613163, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.5128823138297872, + "hf_math_lvl5": 0.3391238670694864, + "hf_musr": 0.43753125, + "hf_avg": 39.196962121784466 + }, + { + "hf_id": "netcat420/Llama3.1-MFANN-8b", + "name": "Llama3.1-MFANN-8b", + "params_b": 8.03, + "ifeval": 0.29695651981187693, + "bbh": 0.4281154680742545, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.27252327127659576, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.33790625, + "hf_avg": 13.117063178518029 + }, + { + "hf_id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", + "name": "MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", + "params_b": 8.03, + "ifeval": 0.4209796672828096, + "bbh": 0.49237606236472237, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.35222739361702127, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.37276041666666665, + "hf_avg": 19.213728182261345 + }, + { + "hf_id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", + "name": "MFANN-Llama3.1-Abliterated-SLERP-V4", + "params_b": 8.03, + "ifeval": 0.41688275996577967, + "bbh": 0.4908971108837563, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.35164561170212766, + "hf_math_lvl5": 0.06797583081570997, + "hf_musr": 0.38209374999999995, + "hf_avg": 19.39947102160331 + }, + { + "hf_id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", + "name": "MFANN-llama3.1-abliterated-SLERP-v3", + "params_b": 8.03, + "ifeval": 0.37993856301280604, + "bbh": 0.49305765460927126, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.35305851063829785, + "hf_math_lvl5": 0.06419939577039276, + "hf_musr": 0.36603125000000003, + "hf_avg": 18.042261525952018 + }, + { + "hf_id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", + "name": "MFANN-llama3.1-abliterated-SLERP-v3.1", + "params_b": 8.03, + "ifeval": 0.4201551882338861, + "bbh": 0.492068920606988, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3543051861702128, + "hf_math_lvl5": 0.06948640483383686, + "hf_musr": 0.3686354166666667, + "hf_avg": 18.96623306213071 + }, + { + "hf_id": "netcat420/MFANN3b", + "name": "MFANN3b", + "params_b": 2.78, + "ifeval": 0.2524435165361241, + "bbh": 0.4433128382028508, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.23055186170212766, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.36060416666666667, + "hf_avg": 12.652447668077093 + }, + { + "hf_id": "netcat420/MFANN3bv0.15", + "name": "MFANN3bv0.15", + "params_b": 2.78, + "ifeval": 0.2012105657433388, + "bbh": 0.453931293669888, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.24684175531914893, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.3957916666666667, + "hf_avg": 11.92455505796081 + }, + { + "hf_id": "netcat420/MFANN3bv0.18", + "name": "MFANN3bv0.18", + "params_b": 2.78, + "ifeval": 0.22064455644356973, + "bbh": 0.4514366169824164, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.25, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.40236458333333336, + "hf_avg": 12.649876255790772 + }, + { + "hf_id": "netcat420/MFANN3bv0.20", + "name": "MFANN3bv0.20", + "params_b": 2.78, + "ifeval": 0.21934578030736224, + "bbh": 0.4493365019423472, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.25, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.4077291666666667, + "hf_avg": 12.5720051522728 + }, + { + "hf_id": "netcat420/MFANN3bv0.21", + "name": "MFANN3bv0.21", + "params_b": 2.78, + "ifeval": 0.1909189838517356, + "bbh": 0.44700236898039053, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.23927859042553193, + "hf_math_lvl5": 0.03172205438066465, + "hf_musr": 0.37594791666666666, + "hf_avg": 12.00855289802916 + }, + { + "hf_id": "netcat420/MFANN3bv0.22", + "name": "MFANN3bv0.22", + "params_b": 2.78, + "ifeval": 0.1979381374752324, + "bbh": 0.44851095830051274, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.2517453457446808, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.35213541666666665, + "hf_avg": 12.256506387257199 + }, + { + "hf_id": "netcat420/MFANN3bv0.24", + "name": "MFANN3bv0.24", + "params_b": 2.78, + "ifeval": 0.2200450360598767, + "bbh": 0.4407346600666096, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.23520611702127658, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.3520729166666667, + "hf_avg": 11.81028360635554 + }, + { + "hf_id": "netcat420/MFANN3bv1.2", + "name": "MFANN3bv1.2", + "params_b": 2.775, + "ifeval": 0.2686050789682487, + "bbh": 0.3659932511014956, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.14502992021276595, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.31555208333333334, + "hf_avg": 8.060474667336623 + }, + { + "hf_id": "netcat420/MFANN3bv1.4", + "name": "MFANN3bv1.4", + "params_b": 2.78, + "ifeval": 0.35243598097492435, + "bbh": 0.4808549324972969, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.2705285904255319, + "hf_math_lvl5": 0.03700906344410876, + "hf_musr": 0.3707708333333333, + "hf_avg": 16.497599656783464 + }, + { + "hf_id": "netcat420/MFANNv0.19", + "name": "MFANNv0.19", + "params_b": 8.03, + "ifeval": 0.30567449921763146, + "bbh": 0.47313832038755316, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.24725731382978725, + "hf_math_lvl5": 0.04154078549848943, + "hf_musr": 0.35269791666666667, + "hf_avg": 14.38906583250676 + }, + { + "hf_id": "netcat420/MFANNv0.20", + "name": "MFANNv0.20", + "params_b": 8.03, + "ifeval": 0.34786477657061043, + "bbh": 0.4574431878198548, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.32022938829787234, + "hf_math_lvl5": 0.04984894259818731, + "hf_musr": 0.38739583333333333, + "hf_avg": 16.46165681188948 + }, + { + "hf_id": "netcat420/MFANNv0.21", + "name": "MFANNv0.21", + "params_b": 8.03, + "ifeval": 0.3233099287667832, + "bbh": 0.45763723048372523, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.3031083776595745, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.3993333333333333, + "hf_avg": 15.886167017885262 + }, + { + "hf_id": "netcat420/MFANNv0.22.1", + "name": "MFANNv0.22.1", + "params_b": 8.03, + "ifeval": 0.3089469274857378, + "bbh": 0.46608928527824584, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.33427526595744683, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.3753020833333333, + "hf_avg": 15.66737725944644 + }, + { + "hf_id": "netcat420/MFANNv0.23", + "name": "MFANNv0.23", + "params_b": 8.03, + "ifeval": 0.3127435205255389, + "bbh": 0.4898102063834755, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.33876329787234044, + "hf_math_lvl5": 0.04984894259818731, + "hf_musr": 0.3767916666666667, + "hf_avg": 16.652655864272365 + }, + { + "hf_id": "netcat420/MFANNv0.24", + "name": "MFANNv0.24", + "params_b": 8.03, + "ifeval": 0.3162409074588758, + "bbh": 0.479027491915232, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3347739361702128, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.3753958333333333, + "hf_avg": 16.398373723349362 + }, + { + "hf_id": "netcat420/MFANNv0.25", + "name": "MFANNv0.25", + "params_b": 8.03, + "ifeval": 0.34666573580322435, + "bbh": 0.47940650861209216, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.33427526595744683, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.36879166666666663, + "hf_avg": 16.59696503760558 + }, + { + "hf_id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", + "name": "Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", + "params_b": 7.616, + "ifeval": 0.5878413720040603, + "bbh": 0.5236664966992856, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.390375664893617, + "hf_math_lvl5": 0.3376132930513595, + "hf_musr": 0.39257291666666666, + "hf_avg": 28.031122057978823 + }, + { + "hf_id": "netcat420/Qwen2.5-7b-MFANN-slerp", + "name": "Qwen2.5-7b-MFANN-slerp", + "params_b": 7.616, + "ifeval": 0.6532123654126606, + "bbh": 0.5088729928004616, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.3416722074468085, + "hf_math_lvl5": 0.28700906344410876, + "hf_musr": 0.40730208333333334, + "hf_avg": 27.709222898184922 + }, + { + "hf_id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", + "name": "Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", + "params_b": 7.616, + "ifeval": 0.5742274941599401, + "bbh": 0.5071448530886461, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.3156582446808511, + "hf_math_lvl5": 0.256797583081571, + "hf_musr": 0.40584375, + "hf_avg": 25.396741920049962 + }, + { + "hf_id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", + "name": "qwen2.5-MFANN-7b-SLERP-V1.2", + "params_b": 7.616, + "ifeval": 0.6606060807546199, + "bbh": 0.5111030308243185, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.34383311170212766, + "hf_math_lvl5": 0.28700906344410876, + "hf_musr": 0.4259375, + "hf_avg": 28.515188837080995 + }, + { + "hf_id": "netease-youdao/Confucius-o1-14B", + "name": "Confucius-o1-14B", + "params_b": 14.77, + "ifeval": 0.6378497941018719, + "bbh": 0.6299772409698484, + "gpqa": 0.3649328859060403, + "mmlu_pro": 0.5265126329787234, + "hf_math_lvl5": 0.4312688821752266, + "hf_musr": 0.4338125, + "hf_avg": 38.52750121655241 + }, + { + "hf_id": "newsbang/Homer-7B-v0.1", + "name": "Homer-7B-v0.1", + "params_b": 7.616, + "ifeval": 0.6108724850064495, + "bbh": 0.5601389961416444, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.4474734042553192, + "hf_math_lvl5": 0.3859516616314199, + "hf_musr": 0.43569791666666663, + "hf_avg": 33.058437903590075 + }, + { + "hf_id": "newsbang/Homer-v1.0-Qwen2.5-72B", + "name": "Homer-v1.0-Qwen2.5-72B", + "params_b": 72.706, + "ifeval": 0.7627716680629618, + "bbh": 0.7309799550978827, + "gpqa": 0.4161073825503356, + "mmlu_pro": 0.6145279255319149, + "hf_math_lvl5": 0.4901812688821752, + "hf_musr": 0.4677291666666667, + "hf_avg": 47.464376408361055 + }, + { + "hf_id": "newsbang/Homer-v1.0-Qwen2.5-7B", + "name": "Homer-v1.0-Qwen2.5-7B", + "params_b": 7.616, + "ifeval": 0.6392737935344885, + "bbh": 0.5655254177370223, + "gpqa": 0.3221476510067114, + "mmlu_pro": 0.45345744680851063, + "hf_math_lvl5": 0.3323262839879154, + "hf_musr": 0.42782291666666666, + "hf_avg": 32.62357604568507 + }, + { + "hf_id": "nguyentd/FinancialAdvice-Qwen2.5-7B", + "name": "FinancialAdvice-Qwen2.5-7B", + "params_b": 7.616, + "ifeval": 0.449605934476079, + "bbh": 0.4730934153895792, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.375249335106383, + "hf_math_lvl5": 0.1148036253776435, + "hf_musr": 0.40248958333333335, + "hf_avg": 21.287932324484498 + }, + { + "hf_id": "nhyha/N3N_Delirium-v1_1030_0227", + "name": "N3N_Delirium-v1_1030_0227", + "params_b": 10.159, + "ifeval": 0.8022890375315275, + "bbh": 0.5890686677822234, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.41497672872340424, + "hf_math_lvl5": 0.2107250755287009, + "hf_musr": 0.40981249999999997, + "hf_avg": 33.094478522688185 + }, + { + "hf_id": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", + "name": "N3N_Llama-3.1-8B-Instruct_1028_0216", + "params_b": 8.03, + "ifeval": 0.4796063334175543, + "bbh": 0.5053741309920361, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.36377992021276595, + "hf_math_lvl5": 0.17069486404833836, + "hf_musr": 0.40503125, + "hf_avg": 23.479351772868437 + }, + { + "hf_id": "nhyha/N3N_gemma-2-9b-it_20241029_1532", + "name": "N3N_gemma-2-9b-it_20241029_1532", + "params_b": 10.159, + "ifeval": 0.6751940407008958, + "bbh": 0.5863124381827675, + "gpqa": 0.34060402684563756, + "mmlu_pro": 0.4122340425531915, + "hf_math_lvl5": 0.2122356495468278, + "hf_musr": 0.4593541666666667, + "hf_avg": 32.14813042759547 + }, + { + "hf_id": "nhyha/N3N_gemma-2-9b-it_20241110_2026", + "name": "N3N_gemma-2-9b-it_20241110_2026", + "params_b": 10.159, + "ifeval": 0.6282829558903709, + "bbh": 0.5867149609980419, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.40201130319148937, + "hf_math_lvl5": 0.1608761329305136, + "hf_musr": 0.40730208333333334, + "hf_avg": 29.119584296103525 + }, + { + "hf_id": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", + "name": "merge_Qwen2.5-7B-Instruct_20241023_0314", + "params_b": 7.616, + "ifeval": 0.5694568190179834, + "bbh": 0.5558529241660143, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.45420545212765956, + "hf_math_lvl5": 0.3542296072507553, + "hf_musr": 0.42506249999999995, + "hf_avg": 31.4495497347518 + }, + { + "hf_id": "nidum/Nidum-Limitless-Gemma-2B", + "name": "Nidum-Limitless-Gemma-2B", + "params_b": 2.506, + "ifeval": 0.24235140538216376, + "bbh": 0.3078801520076317, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.11735372340425532, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.37403125, + "hf_avg": 6.166007951282463 + }, + { + "hf_id": "nisten/franqwenstein-35b", + "name": "franqwenstein-35b", + "params_b": 34.714, + "ifeval": 0.37986320740080765, + "bbh": 0.6646579178049268, + "gpqa": 0.4035234899328859, + "mmlu_pro": 0.5730551861702128, + "hf_math_lvl5": 0.3406344410876133, + "hf_musr": 0.49402083333333335, + "hf_avg": 36.57133162829882 + }, + { + "hf_id": "nisten/franqwenstein-35b", + "name": "franqwenstein-35b", + "params_b": 34.714, + "ifeval": 0.39135383005979685, + "bbh": 0.6591132598701116, + "gpqa": 0.35906040268456374, + "mmlu_pro": 0.5610871010638298, + "hf_math_lvl5": 0.304380664652568, + "hf_musr": 0.4681041666666667, + "hf_avg": 34.451116831224226 + }, + { + "hf_id": "nisten/tqwendo-36b", + "name": "tqwendo-36b", + "params_b": 35.69, + "ifeval": 0.6777672132164878, + "bbh": 0.6431830832659088, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.4380817819148936, + "hf_math_lvl5": 0.41540785498489424, + "hf_musr": 0.44295833333333334, + "hf_avg": 37.04172043230399 + }, + { + "hf_id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", + "name": "Mistral-NeMo-Minitron-Upscale-v1", + "params_b": 12.451, + "ifeval": 0.16484040124647048, + "bbh": 0.44679984097967057, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.2537400265957447, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3803541666666667, + "hf_avg": 10.990222795976974 + }, + { + "hf_id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", + "name": "Mistral-NeMo-Minitron-Upscale-v2", + "params_b": 12.451, + "ifeval": 0.15727159492369136, + "bbh": 0.3949668154807224, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.1926529255319149, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3790833333333334, + "hf_avg": 8.345444097364796 + }, + { + "hf_id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", + "name": "Mistral-NeMo-Minitron-Upscale-v3", + "params_b": 12.451, + "ifeval": 0.14120976786038822, + "bbh": 0.30524522602918064, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11710438829787234, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.40984375, + "hf_avg": 5.202259190321894 + }, + { + "hf_id": "nlpguy/StarFusion-alpha1", + "name": "StarFusion-alpha1", + "params_b": 7.242, + "ifeval": 0.5660092997690572, + "bbh": 0.4428694115507034, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.3190658244680851, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.40810416666666666, + "hf_avg": 20.8283235530266 + }, + { + "hf_id": "nothingiisreal/L3.1-8B-Celeste-V1.5", + "name": "L3.1-8B-Celeste-V1.5", + "params_b": 8.03, + "ifeval": 0.7326715337526651, + "bbh": 0.5011796822721141, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.37042885638297873, + "hf_math_lvl5": 0.14652567975830816, + "hf_musr": 0.37486458333333333, + "hf_avg": 26.172145831649967 + }, + { + "hf_id": "nothingiisreal/MN-12B-Starcannon-v2", + "name": "MN-12B-Starcannon-v2", + "params_b": 12.248, + "ifeval": 0.3925273828995953, + "bbh": 0.5004499888471767, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.31283244680851063, + "hf_math_lvl5": 0.05966767371601209, + "hf_musr": 0.39781249999999996, + "hf_avg": 18.181449904707133 + }, + { + "hf_id": "nvidia/AceInstruct-1.5B", + "name": "AceInstruct-1.5B", + "params_b": 1.777, + "ifeval": 0.3947758613811354, + "bbh": 0.3931958135346713, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.2573969414893617, + "hf_math_lvl5": 0.31268882175226587, + "hf_musr": 0.34600000000000003, + "hf_avg": 18.11586485792988 + }, + { + "hf_id": "nvidia/AceInstruct-72B", + "name": "AceInstruct-72B", + "params_b": 72.706, + "ifeval": 0.711888899231816, + "bbh": 0.6139041785911337, + "gpqa": 0.3213087248322148, + "mmlu_pro": 0.48736702127659576, + "hf_math_lvl5": 0.6261329305135952, + "hf_musr": 0.42060416666666667, + "hf_avg": 40.40502231212503 + }, + { + "hf_id": "nvidia/AceInstruct-7B", + "name": "AceInstruct-7B", + "params_b": 7.616, + "ifeval": 0.5422290633297429, + "bbh": 0.550118130896558, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.417719414893617, + "hf_math_lvl5": 0.5294561933534743, + "hf_musr": 0.4255, + "hf_avg": 33.05654347527337 + }, + { + "hf_id": "nvidia/AceMath-1.5B-Instruct", + "name": "AceMath-1.5B-Instruct", + "params_b": 1.777, + "ifeval": 0.32123654126606294, + "bbh": 0.4024301274933693, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.20636635638297873, + "hf_math_lvl5": 0.5287009063444109, + "hf_musr": 0.3606979166666667, + "hf_avg": 20.18985986442293 + }, + { + "hf_id": "nvidia/AceMath-72B-Instruct", + "name": "AceMath-72B-Instruct", + "params_b": 72.706, + "ifeval": 0.494993284485166, + "bbh": 0.640215611099268, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.44107380319148937, + "hf_math_lvl5": 0.7145015105740181, + "hf_musr": 0.40615625, + "hf_avg": 36.65560439276686 + }, + { + "hf_id": "nvidia/AceMath-72B-RM", + "name": "AceMath-72B-RM", + "params_b": 71.461, + "ifeval": 0.14125963554479892, + "bbh": 0.2717426350897727, + "gpqa": 0.23406040268456377, + "mmlu_pro": 0.11785239361702128, + "hf_musr": 0.3351458333333333, + "hf_avg": 3.4288267214523853 + }, + { + "hf_id": "nvidia/AceMath-7B-Instruct", + "name": "AceMath-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.45317756885064964, + "bbh": 0.49938547326244365, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.33834773936170215, + "hf_math_lvl5": 0.6336858006042296, + "hf_musr": 0.4192708333333333, + "hf_avg": 30.327490047445718 + }, + { + "hf_id": "nvidia/AceMath-7B-RM", + "name": "AceMath-7B-RM", + "params_b": 7.071, + "ifeval": 0.14937809456686035, + "bbh": 0.2422689292768334, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.11386303191489362, + "hf_musr": 0.35800000000000004, + "hf_avg": 3.2243900063069617 + }, + { + "hf_id": "nvidia/Hymba-1.5B-Base", + "name": "Hymba-1.5B-Base", + "params_b": 1.523, + "ifeval": 0.2295121389025563, + "bbh": 0.32564785214182224, + "gpqa": 0.2558724832214765, + "mmlu_pro": 0.19223736702127658, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.3566354166666667, + "hf_avg": 8.035282134433706 + }, + { + "hf_id": "nvidia/Hymba-1.5B-Instruct", + "name": "Hymba-1.5B-Instruct", + "params_b": 1.523, + "ifeval": 0.6009055971488984, + "bbh": 0.3067133908231881, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.20403922872340424, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.33158333333333334, + "hf_avg": 14.192383567083992 + }, + { + "hf_id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", + "name": "Llama-3.1-Minitron-4B-Depth-Base", + "params_b": 4.02, + "ifeval": 0.16069362624502986, + "bbh": 0.4170704193104893, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.2798371010638298, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.40106250000000004, + "hf_avg": 11.658051143450892 + }, + { + "hf_id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", + "name": "Llama-3.1-Nemotron-70B-Instruct-HF", + "params_b": 70.554, + "ifeval": 0.7380672172059026, + "bbh": 0.6316000668895038, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.49185505319148937, + "hf_math_lvl5": 0.42673716012084595, + "hf_musr": 0.4327604166666667, + "hf_avg": 36.90717314950126, + "aider_pass_rate": 0.368 + }, + { + "hf_id": "nvidia/Minitron-4B-Base", + "name": "Minitron-4B-Base", + "params_b": 4, + "ifeval": 0.2217937295265451, + "bbh": 0.4083876243992497, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.261968085106383, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.413375, + "hf_avg": 11.977737055629914 + }, + { + "hf_id": "nvidia/Minitron-8B-Base", + "name": "Minitron-8B-Base", + "params_b": 7.22, + "ifeval": 0.24242676099416216, + "bbh": 0.43950631883576047, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.31806848404255317, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.40255208333333337, + "hf_avg": 14.21649076588472 + }, + { + "hf_id": "nvidia/Mistral-NeMo-Minitron-8B-Base", + "name": "Mistral-NeMo-Minitron-8B-Base", + "params_b": 7.88, + "ifeval": 0.19456597383830457, + "bbh": 0.5219098090521418, + "gpqa": 0.32550335570469796, + "mmlu_pro": 0.37957114361702127, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.40915625000000005, + "hf_avg": 17.697925857529604 + }, + { + "hf_id": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", + "name": "Mistral-NeMo-Minitron-8B-Instruct", + "params_b": 8.414, + "ifeval": 0.5003889679384035, + "bbh": 0.5320919605840294, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.39910239361702127, + "hf_math_lvl5": 0.1163141993957704, + "hf_musr": 0.38857291666666666, + "hf_avg": 23.57259648330948 + }, + { + "hf_id": "nvidia/Nemotron-Mini-4B-Instruct", + "name": "Nemotron-Mini-4B-Instruct", + "params_b": 4, + "ifeval": 0.6668761109411916, + "bbh": 0.3864840798591535, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.26263297872340424, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.3767291666666666, + "hf_avg": 18.363511312885766 + }, + { + "hf_id": "nvidia/OpenMath2-Llama3.1-8B", + "name": "OpenMath2-Llama3.1-8B", + "params_b": 8.03, + "ifeval": 0.23305939352030391, + "bbh": 0.40955241401694514, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.15533577127659576, + "hf_math_lvl5": 0.2673716012084592, + "hf_musr": 0.34355208333333337, + "hf_avg": 12.751664730325508 + }, + { + "hf_id": "occiglot/occiglot-7b-es-en-instruct", + "name": "occiglot-7b-es-en-instruct", + "params_b": 7.242, + "ifeval": 0.3485141646387142, + "bbh": 0.4110970229781084, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.2310505319148936, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.37375, + "hf_avg": 12.457903975085708 + }, + { + "hf_id": "olabs-ai/reflection_model", + "name": "reflection_model", + "params_b": 9.3, + "ifeval": 0.15986914719610634, + "bbh": 0.4712508645838735, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.33111702127659576, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.35083333333333333, + "hf_avg": 14.079165535571102 + }, + { + "hf_id": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", + "name": "Llama_3.2_1b-autoredteam_helpfulness-train", + "params_b": 1.498, + "ifeval": 0.2765484470094904, + "bbh": 0.31150775306414563, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11319813829787234, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.345875, + "hf_avg": 6.603005260709961 + }, + { + "hf_id": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", + "name": "RedPajama-3B-v1-AutoRedteam-Harmless-only", + "params_b": 2.776, + "ifeval": 0.152475431854147, + "bbh": 0.3123669789182832, + "gpqa": 0.23154362416107382, + "mmlu_pro": 0.10995678191489362, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.366125, + "hf_avg": 3.9505437890141017 + }, + { + "hf_id": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", + "name": "RedPajama3b_v1-autoredteam_helpfulness-train", + "params_b": 2.776, + "ifeval": 0.2847666414003732, + "bbh": 0.30927408550278385, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.11070478723404255, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.35796875, + "hf_avg": 6.038454723335806 + }, + { + "hf_id": "ontocord/starcoder2_3b-AutoRedteam", + "name": "starcoder2_3b-AutoRedteam", + "params_b": 3.181, + "ifeval": 0.15737133029251277, + "bbh": 0.3497644619743598, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.13364361702127658, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.3645729166666667, + "hf_avg": 5.416510549136297 + }, + { + "hf_id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", + "name": "wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", + "params_b": 3.759, + "ifeval": 0.146105666298754, + "bbh": 0.29981162881428614, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.1141123670212766, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.39257291666666666, + "hf_avg": 4.837021332568196 + }, + { + "hf_id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", + "name": "wide_3b_sft_stage1.2-ss1-expert_how-to", + "params_b": 3.759, + "ifeval": 0.12454842041339201, + "bbh": 0.3047398483929371, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11527593085106383, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.36581250000000004, + "hf_avg": 4.2152701749908585 + }, + { + "hf_id": "oobabooga/CodeBooga-34B-v0.1", + "name": "CodeBooga-34B-v0.1", + "params_b": 33.744, + "ifeval": 0.5250180631834643, + "bbh": 0.3427441185661722, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.23595412234042554, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.43102083333333335, + "hf_avg": 15.66170616238157 + }, + { + "hf_id": "oopere/Llama-FinSent-S", + "name": "Llama-FinSent-S", + "params_b": 0.914, + "ifeval": 0.21187670935340452, + "bbh": 0.31562055310321474, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.11303191489361702, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.3832395833333333, + "hf_avg": 5.811805794594786 + }, + { + "hf_id": "oopere/Llama-FinSent-S", + "name": "Llama-FinSent-S", + "params_b": 0.914, + "ifeval": 0.2163980460733077, + "bbh": 0.3169254117559263, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11336436170212766, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3831770833333333, + "hf_avg": 5.86640376735036 + }, + { + "hf_id": "oopere/pruned10-llama-3.2-3B", + "name": "pruned10-llama-3.2-3B", + "params_b": 3.001, + "ifeval": 0.17762980004166723, + "bbh": 0.3340421117164456, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.16397938829787234, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3721666666666667, + "hf_avg": 6.919943456537481 + }, + { + "hf_id": "oopere/pruned20-llama-1b", + "name": "pruned20-llama-1b", + "params_b": 1.075, + "ifeval": 0.19936213690784896, + "bbh": 0.30313627830972034, + "gpqa": 0.25, + "mmlu_pro": 0.11228390957446809, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.36314583333333333, + "hf_avg": 4.989519628599489 + }, + { + "hf_id": "oopere/pruned20-llama-3.2-3b", + "name": "pruned20-llama-3.2-3b", + "params_b": 2.79, + "ifeval": 0.17887870849346402, + "bbh": 0.32478483912909756, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.12799202127659576, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.34184375, + "hf_avg": 5.656559779770029 + }, + { + "hf_id": "oopere/pruned40-llama-1b", + "name": "pruned40-llama-1b", + "params_b": 0.914, + "ifeval": 0.22843832143157933, + "bbh": 0.29691563801419935, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.10821143617021277, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.4286666666666667, + "hf_avg": 6.608357202270273 + }, + { + "hf_id": "oopere/pruned40-llama-3.2-1B", + "name": "pruned40-llama-3.2-1B", + "params_b": 0.914, + "ifeval": 0.22663976028050017, + "bbh": 0.2982489713475327, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.11145279255319149, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.43523958333333335, + "hf_avg": 6.877693765654211 + }, + { + "hf_id": "oopere/pruned40-llama-3.2-3b", + "name": "pruned40-llama-3.2-3b", + "params_b": 2.367, + "ifeval": 0.21829634259320824, + "bbh": 0.31671170280977073, + "gpqa": 0.22986577181208054, + "mmlu_pro": 0.11768617021276596, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3539375, + "hf_avg": 5.371284651830273 + }, + { + "hf_id": "oopere/pruned60-llama-1b", + "name": "pruned60-llama-1b", + "params_b": 0.753, + "ifeval": 0.18285039251408486, + "bbh": 0.3016193474185398, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.11727061170212766, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.40879166666666666, + "hf_avg": 5.46756672622325 + }, + { + "hf_id": "oopere/pruned60-llama-3.2-3b", + "name": "pruned60-llama-3.2-3b", + "params_b": 1.944, + "ifeval": 0.1824758307956223, + "bbh": 0.31662597093352013, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.11311502659574468, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.3633333333333333, + "hf_avg": 5.128680683633621 + }, + { + "hf_id": "open-atlas/Atlas-Flash-1.5B-Preview", + "name": "Atlas-Flash-1.5B-Preview", + "params_b": 1.777, + "ifeval": 0.3269569187533522, + "bbh": 0.3215460102660847, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.13738364361702127, + "hf_math_lvl5": 0.2212990936555891, + "hf_musr": 0.34879166666666667, + "hf_avg": 11.111374803615627 + }, + { + "hf_id": "open-atlas/Atlas-Flash-7B-Preview", + "name": "Atlas-Flash-7B-Preview", + "params_b": 7.616, + "ifeval": 0.3907543096761038, + "bbh": 0.3541994356643969, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.27842420212765956, + "hf_math_lvl5": 0.25755287009063443, + "hf_musr": 0.38358333333333333, + "hf_avg": 17.496191215913385 + }, + { + "hf_id": "open-neo/Kyro-n1-3B", + "name": "Kyro-n1-3B", + "params_b": 3.086, + "ifeval": 0.45949746672163194, + "bbh": 0.46853756471175373, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.34225398936170215, + "hf_math_lvl5": 0.2854984894259819, + "hf_musr": 0.40879166666666666, + "hf_avg": 23.492573830963973 + }, + { + "hf_id": "open-neo/Kyro-n1-7B", + "name": "Kyro-n1-7B", + "params_b": 7.616, + "ifeval": 0.5572669406064796, + "bbh": 0.5386561160683788, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.433344414893617, + "hf_math_lvl5": 0.38972809667673713, + "hf_musr": 0.38841666666666663, + "hf_avg": 28.918698187681176 + }, + { + "hf_id": "open-thoughts/OpenThinker-7B", + "name": "OpenThinker-7B", + "params_b": 7.616, + "ifeval": 0.4088895242401273, + "bbh": 0.5342727589615611, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.41647273936170215, + "hf_math_lvl5": 0.4259818731117825, + "hf_musr": 0.38199999999999995, + "hf_avg": 26.578519435108664 + }, + { + "hf_id": "openai-community/gpt2", + "name": "gpt2", + "params_b": 0.137, + "ifeval": 0.17925327021192655, + "bbh": 0.3035711244213359, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11594082446808511, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.44705208333333335, + "hf_avg": 6.510807087761722 + }, + { + "hf_id": "openai-community/gpt2", + "name": "gpt2", + "params_b": 0.137, + "ifeval": 0.17795449407571912, + "bbh": 0.30165801067653053, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11652260638297872, + "hf_math_lvl5": 0.005287009063444109, + "hf_musr": 0.43902083333333336, + "hf_avg": 6.33423541829189 + }, + { + "hf_id": "openai-community/gpt2-large", + "name": "gpt2-large", + "params_b": 0.812, + "ifeval": 0.20478220011790937, + "bbh": 0.30688418760118824, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.11419547872340426, + "hf_math_lvl5": 0.012084592145015104, + "hf_musr": 0.3788645833333333, + "hf_avg": 5.567707192929642 + }, + { + "hf_id": "openai-community/gpt2-medium", + "name": "gpt2-medium", + "params_b": 0.38, + "ifeval": 0.22084402718121252, + "bbh": 0.3050280232176266, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.11818484042553191, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.3884479166666666, + "hf_avg": 5.902340287154445 + }, + { + "hf_id": "openai-community/gpt2-xl", + "name": "gpt2-xl", + "params_b": 1.608, + "ifeval": 0.20385798570016445, + "bbh": 0.30085761123260785, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11311502659574468, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.37095833333333333, + "hf_avg": 5.093480678758688 + }, + { + "hf_id": "openbmb/MiniCPM-S-1B-sft-llama-format", + "name": "MiniCPM-S-1B-sft-llama-format", + "params_b": 1, + "ifeval": 0.3328767669782843, + "bbh": 0.30493136322070497, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.1858377659574468, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.33167708333333334, + "hf_avg": 8.996065699730169 + }, + { + "hf_id": "openchat/openchat-3.5-0106", + "name": "openchat-3.5-0106", + "params_b": 7.242, + "ifeval": 0.5966590867786362, + "bbh": 0.46169787083960595, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3291223404255319, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.42543749999999997, + "hf_avg": 22.70925524673515, + "arena_elo": 1182.27, + "arena_rank": 256, + "arena_votes": 12636 + }, + { + "hf_id": "openchat/openchat-3.5-1210", + "name": "openchat-3.5-1210", + "params_b": 7.242, + "ifeval": 0.603678240402133, + "bbh": 0.4535356846447984, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.3142453457446808, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.4414375, + "hf_avg": 22.727849608659103 + }, + { + "hf_id": "openchat/openchat-3.6-8b-20240522", + "name": "openchat-3.6-8b-20240522", + "params_b": 8.03, + "ifeval": 0.5343355629729118, + "bbh": 0.5338412089001999, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.32288896276595747, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.3998541666666667, + "hf_avg": 23.10731592675394 + }, + { + "hf_id": "openchat/openchat_3.5", + "name": "openchat_3.5", + "params_b": 7, + "ifeval": 0.5931118321608887, + "bbh": 0.44263196862832893, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.31532579787234044, + "hf_math_lvl5": 0.07250755287009064, + "hf_musr": 0.4228645833333333, + "hf_avg": 21.635827111564595, + "arena_elo": 1182.2, + "arena_rank": 257, + "arena_votes": 7967 + }, + { + "hf_id": "openchat/openchat_v3.2", + "name": "openchat_v3.2", + "params_b": 13, + "ifeval": 0.2980558252104416, + "bbh": 0.4330564283474314, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.2421875, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.433625, + "hf_avg": 13.833145550526877 + }, + { + "hf_id": "openchat/openchat_v3.2_super", + "name": "openchat_v3.2_super", + "params_b": 13, + "ifeval": 0.2861906408329898, + "bbh": 0.42212089838803973, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.24251994680851063, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.41613541666666665, + "hf_avg": 12.92357500340055 + }, + { + "hf_id": "oxyapi/oxy-1-small", + "name": "oxy-1-small", + "params_b": 14.77, + "ifeval": 0.6244608749229821, + "bbh": 0.5884593784818278, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5000831117021277, + "hf_math_lvl5": 0.36027190332326287, + "hf_musr": 0.4486666666666667, + "hf_avg": 36.10082946641055 + }, + { + "hf_id": "ozone-ai/0x-lite", + "name": "0x-lite", + "params_b": 14.77, + "ifeval": 0.7739874643723099, + "bbh": 0.6340580988016683, + "gpqa": 0.31963087248322153, + "mmlu_pro": 0.5183676861702128, + "hf_math_lvl5": 0.5045317220543807, + "hf_musr": 0.4220625, + "hf_avg": 40.48460307410717 + }, + { + "hf_id": "ozone-research/Chirp-01", + "name": "Chirp-01", + "params_b": 3.086, + "ifeval": 0.6347524568145853, + "bbh": 0.4649560260501419, + "gpqa": 0.2718120805369128, + "mmlu_pro": 0.3508144946808511, + "hf_math_lvl5": 0.3466767371601209, + "hf_musr": 0.4487291666666667, + "hf_avg": 28.252602808689176 + }, + { + "hf_id": "paloalma/ECE-TW3-JRGL-V1", + "name": "ECE-TW3-JRGL-V1", + "params_b": 68.977, + "ifeval": 0.5534947273235016, + "bbh": 0.6283667540784627, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.422124335106383, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.46208333333333335, + "hf_avg": 30.236001116528566 + }, + { + "hf_id": "paloalma/ECE-TW3-JRGL-V2", + "name": "ECE-TW3-JRGL-V2", + "params_b": 72.288, + "ifeval": 0.2254894790267601, + "bbh": 0.6030988136029874, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.4587765957446808, + "hf_math_lvl5": 0.18504531722054382, + "hf_musr": 0.47932291666666665, + "hf_avg": 25.79271499886927 + }, + { + "hf_id": "paloalma/ECE-TW3-JRGL-V5", + "name": "ECE-TW3-JRGL-V5", + "params_b": 72.289, + "ifeval": 0.4552509563513699, + "bbh": 0.6024712037668832, + "gpqa": 0.3414429530201342, + "mmlu_pro": 0.46476063829787234, + "hf_math_lvl5": 0.18353474320241692, + "hf_musr": 0.4620520833333333, + "hf_avg": 29.49204681730805 + }, + { + "hf_id": "paloalma/Le_Triomphant-ECE-TW3", + "name": "Le_Triomphant-ECE-TW3", + "params_b": 72.289, + "ifeval": 0.5402055435134332, + "bbh": 0.6112057897556996, + "gpqa": 0.348993288590604, + "mmlu_pro": 0.476313164893617, + "hf_math_lvl5": 0.19486404833836857, + "hf_musr": 0.4725, + "hf_avg": 31.996294194213135 + }, + { + "hf_id": "paloalma/TW3-JRGL-v2", + "name": "TW3-JRGL-v2", + "params_b": 72.289, + "ifeval": 0.5316127874040878, + "bbh": 0.6137525505395743, + "gpqa": 0.35906040268456374, + "mmlu_pro": 0.4857878989361702, + "hf_math_lvl5": 0.17900302114803626, + "hf_musr": 0.48583333333333334, + "hf_avg": 32.46253888329018 + }, + { + "hf_id": "pankajmathur/Al_Dente_v1_8b", + "name": "Al_Dente_v1_8b", + "params_b": 8.03, + "ifeval": 0.3693721547715617, + "bbh": 0.48347371404380524, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.2859873670212766, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.3987083333333334, + "hf_avg": 17.300059065636294 + }, + { + "hf_id": "pankajmathur/model_007_13b_v2", + "name": "model_007_13b_v2", + "params_b": 13, + "ifeval": 0.30564901129004374, + "bbh": 0.4702292766687601, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.24609375, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.46109375, + "hf_avg": 16.00740369607647 + }, + { + "hf_id": "pankajmathur/orca_mini_3b", + "name": "orca_mini_3b", + "params_b": 3.426, + "ifeval": 0.07421419611076388, + "bbh": 0.3196070040004752, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.11452792553191489, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.3349270833333333, + "hf_avg": 3.1252754271378507 + }, + { + "hf_id": "pankajmathur/orca_mini_7b", + "name": "orca_mini_7b", + "params_b": 7, + "ifeval": 0.04121619525082337, + "bbh": 0.3332228472650342, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.12458444148936171, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.36975, + "hf_avg": 3.405696058391802 + }, + { + "hf_id": "pankajmathur/orca_mini_phi-4", + "name": "orca_mini_phi-4", + "params_b": 14.66, + "ifeval": 0.7780588837617521, + "bbh": 0.6856329737542378, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5255152925531915, + "hf_math_lvl5": 0.29531722054380666, + "hf_musr": 0.47030208333333334, + "hf_avg": 40.67628242552744 + }, + { + "hf_id": "pankajmathur/orca_mini_v2_7b", + "name": "orca_mini_v2_7b", + "params_b": 7, + "ifeval": 0.13578859647956312, + "bbh": 0.35363417847864514, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.1541722074468085, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.35933333333333334, + "hf_avg": 5.502368522121576 + }, + { + "hf_id": "pankajmathur/orca_mini_v3_13b", + "name": "orca_mini_v3_13b", + "params_b": 13, + "ifeval": 0.28966253983873896, + "bbh": 0.4710970361474938, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.23046875, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.45979166666666665, + "hf_avg": 15.041296989515962 + }, + { + "hf_id": "pankajmathur/orca_mini_v3_70b", + "name": "orca_mini_v3_70b", + "params_b": 70, + "ifeval": 0.4014703209705803, + "bbh": 0.5949312065598904, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.3757480053191489, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.5078541666666667, + "hf_avg": 25.29815949268638 + }, + { + "hf_id": "pankajmathur/orca_mini_v3_7b", + "name": "orca_mini_v3_7b", + "params_b": 7, + "ifeval": 0.2820937335159599, + "bbh": 0.4095332668279368, + "gpqa": 0.24664429530201343, + "mmlu_pro": 0.20836103723404256, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.49823958333333335, + "hf_avg": 13.644021205601403 + }, + { + "hf_id": "pankajmathur/orca_mini_v5_8b", + "name": "orca_mini_v5_8b", + "params_b": 8.03, + "ifeval": 0.48060479527653294, + "bbh": 0.5064242853619262, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.3075964095744681, + "hf_math_lvl5": 0.09894259818731117, + "hf_musr": 0.4000104166666667, + "hf_avg": 20.498300732442388 + }, + { + "hf_id": "pankajmathur/orca_mini_v5_8b_dpo", + "name": "orca_mini_v5_8b_dpo", + "params_b": 8, + "ifeval": 0.48964746871633935, + "bbh": 0.5074598658862709, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.31158577127659576, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.389375, + "hf_avg": 20.33420699350285 + }, + { + "hf_id": "pankajmathur/orca_mini_v5_8b_orpo", + "name": "orca_mini_v5_8b_orpo", + "params_b": 8, + "ifeval": 0.08243239050164675, + "bbh": 0.496374377369289, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.2947140957446808, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.41312499999999996, + "hf_avg": 12.99373010020274 + }, + { + "hf_id": "pankajmathur/orca_mini_v6_8b", + "name": "orca_mini_v6_8b", + "params_b": 8.03, + "ifeval": 0.011116060940526692, + "bbh": 0.30286959112076134, + "gpqa": 0.23825503355704697, + "mmlu_pro": 0.1124501329787234, + "hf_math_lvl5": 0.0037764350453172208, + "hf_musr": 0.3554583333333334, + "hf_avg": 1.4763387606479779 + }, + { + "hf_id": "pankajmathur/orca_mini_v6_8b_dpo", + "name": "orca_mini_v6_8b_dpo", + "params_b": 8, + "ifeval": 0.3882564927725103, + "bbh": 0.520280774453148, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.359624335106383, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.40903125, + "hf_avg": 20.392492362112517 + }, + { + "hf_id": "pankajmathur/orca_mini_v7_72b", + "name": "orca_mini_v7_72b", + "params_b": 72.706, + "ifeval": 0.5929622291076566, + "bbh": 0.6842301988001044, + "gpqa": 0.3850671140939597, + "mmlu_pro": 0.5621675531914894, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.5070416666666667, + "hf_avg": 36.215290911204995 + }, + { + "hf_id": "pankajmathur/orca_mini_v7_7b", + "name": "orca_mini_v7_7b", + "params_b": 7.616, + "ifeval": 0.4387646998851935, + "bbh": 0.5274909601771501, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.4167220744680851, + "hf_math_lvl5": 0.12084592145015106, + "hf_musr": 0.43597916666666664, + "hf_avg": 23.986504270832167 + }, + { + "hf_id": "pankajmathur/orca_mini_v8_1_70b", + "name": "orca_mini_v8_1_70b", + "params_b": 70.554, + "ifeval": 0.8571434903832941, + "bbh": 0.6781305630707934, + "gpqa": 0.43288590604026844, + "mmlu_pro": 0.49833776595744683, + "hf_math_lvl5": 0.3527190332326284, + "hf_musr": 0.44370833333333337, + "hf_avg": 43.191232219399744 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_0_3B-Instruct", + "name": "orca_mini_v9_0_3B-Instruct", + "params_b": 3.213, + "ifeval": 0.5753766672429155, + "bbh": 0.4412946064233128, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.2603058510638298, + "hf_math_lvl5": 0.14652567975830816, + "hf_musr": 0.36590625000000004, + "hf_avg": 20.660838216117185 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_1_1B-Instruct", + "name": "orca_mini_v9_1_1B-Instruct", + "params_b": 1.236, + "ifeval": 0.3629270336041702, + "bbh": 0.3205118362595434, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.13738364361702127, + "hf_math_lvl5": 0.04607250755287009, + "hf_musr": 0.3380625, + "hf_avg": 9.063245765360724 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_2_14B", + "name": "orca_mini_v9_2_14B", + "params_b": 14.66, + "ifeval": 0.7780588837617521, + "bbh": 0.6856329737542378, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5255152925531915, + "hf_math_lvl5": 0.29531722054380666, + "hf_musr": 0.47030208333333334, + "hf_avg": 40.67628242552744 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_2_70b", + "name": "orca_mini_v9_2_70b", + "params_b": 70.554, + "ifeval": 0.8382591523823455, + "bbh": 0.6744868732778627, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.48213098404255317, + "hf_math_lvl5": 0.2938066465256798, + "hf_musr": 0.47098958333333335, + "hf_avg": 40.72477703174064 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_4_70B", + "name": "orca_mini_v9_4_70B", + "params_b": 70.554, + "ifeval": 0.8014645584826039, + "bbh": 0.6418899297276105, + "gpqa": 0.36577181208053694, + "mmlu_pro": 0.45362367021276595, + "hf_math_lvl5": 0.32628398791540786, + "hf_musr": 0.4647291666666667, + "hf_avg": 39.32550746750958 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_5_1B-Instruct", + "name": "orca_mini_v9_5_1B-Instruct", + "params_b": 1.236, + "ifeval": 0.46379384477630464, + "bbh": 0.3337001077145985, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.13696808510638298, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.31815625, + "hf_avg": 10.693501511833993 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", + "name": "orca_mini_v9_5_1B-Instruct_preview", + "params_b": 1.236, + "ifeval": 0.3935768206137493, + "bbh": 0.32769514793198123, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.13272938829787234, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.33945833333333336, + "hf_avg": 9.541822835013244 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_5_3B-Instruct", + "name": "orca_mini_v9_5_3B-Instruct", + "params_b": 3.213, + "ifeval": 0.7207066140063919, + "bbh": 0.44963802133275826, + "gpqa": 0.2869127516778524, + "mmlu_pro": 0.2882313829787234, + "hf_math_lvl5": 0.1321752265861027, + "hf_musr": 0.4269895833333333, + "hf_avg": 24.152680776912927 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_6_1B-Instruct", + "name": "orca_mini_v9_6_1B-Instruct", + "params_b": 1.236, + "ifeval": 0.6085741388404988, + "bbh": 0.3561349568441982, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.18085106382978725, + "hf_math_lvl5": 0.0770392749244713, + "hf_musr": 0.33955208333333337, + "hf_avg": 15.323670701472977 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_6_3B-Instruct", + "name": "orca_mini_v9_6_3B-Instruct", + "params_b": 3.213, + "ifeval": 0.7316475839660989, + "bbh": 0.45683272658133456, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.28507313829787234, + "hf_math_lvl5": 0.13293051359516617, + "hf_musr": 0.4067708333333333, + "hf_avg": 24.086826299328436 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_7_1B-Instruct", + "name": "orca_mini_v9_7_1B-Instruct", + "params_b": 1.236, + "ifeval": 0.5610136659618701, + "bbh": 0.3181526961435924, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.1344747340425532, + "hf_math_lvl5": 0.0445619335347432, + "hf_musr": 0.35269791666666667, + "hf_avg": 12.485692413724527 + }, + { + "hf_id": "pankajmathur/orca_mini_v9_7_3B-Instruct", + "name": "orca_mini_v9_7_3B-Instruct", + "params_b": 3.213, + "ifeval": 0.5618381450107935, + "bbh": 0.3297133908231881, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.13746675531914893, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.361875, + "hf_avg": 13.034702793317214 + }, + { + "hf_id": "paulml/ECE-ILAB-Q1", + "name": "ECE-ILAB-Q1", + "params_b": 72.706, + "ifeval": 0.7864521691334547, + "bbh": 0.6717755530661759, + "gpqa": 0.38674496644295303, + "mmlu_pro": 0.550531914893617, + "hf_math_lvl5": 0.3557401812688822, + "hf_musr": 0.46137500000000004, + "hf_avg": 42.50307248816295 + }, + { + "hf_id": "pints-ai/1.5-Pints-16K-v0.1", + "name": "1.5-Pints-16K-v0.1", + "params_b": 1.566, + "ifeval": 0.1635914927946737, + "bbh": 0.3133077677150869, + "gpqa": 0.23573825503355705, + "mmlu_pro": 0.1118683510638298, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.357875, + "hf_avg": 4.250927888464816 + }, + { + "hf_id": "pints-ai/1.5-Pints-2K-v0.1", + "name": "1.5-Pints-2K-v0.1", + "params_b": 1.566, + "ifeval": 0.17615593292463996, + "bbh": 0.29801943389750435, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.11037234042553191, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.35018749999999993, + "hf_avg": 4.044439684591542 + }, + { + "hf_id": "piotr25691/thea-3b-25r", + "name": "thea-3b-25r", + "params_b": 3.213, + "ifeval": 0.7344202272193336, + "bbh": 0.44844100293649863, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.3182347074468085, + "hf_math_lvl5": 0.1782477341389728, + "hf_musr": 0.33145833333333335, + "hf_avg": 23.996071012635184 + }, + { + "hf_id": "piotr25691/thea-c-3b-25r", + "name": "thea-c-3b-25r", + "params_b": 3.213, + "ifeval": 0.7401904723910335, + "bbh": 0.4532410175874399, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.3178191489361702, + "hf_math_lvl5": 0.15256797583081572, + "hf_musr": 0.33148958333333334, + "hf_avg": 23.25479609330286 + }, + { + "hf_id": "piotr25691/thea-rp-3b-25r", + "name": "thea-rp-3b-25r", + "params_b": 3.213, + "ifeval": 0.6577835698169745, + "bbh": 0.4390291036559586, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.30601728723404253, + "hf_math_lvl5": 0.13217522658610273, + "hf_musr": 0.381875, + "hf_avg": 21.845382189211346 + }, + { + "hf_id": "postbot/gpt2-medium-emailgen", + "name": "gpt2-medium-emailgen", + "params_b": 0.38, + "ifeval": 0.1492030035860406, + "bbh": 0.31304286003933807, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.1146941489361702, + "hf_musr": 0.3911145833333333, + "hf_avg": 4.743048119298616 + }, + { + "hf_id": "prince-canuma/Ministral-8B-Instruct-2410-HF", + "name": "Ministral-8B-Instruct-2410-HF", + "params_b": 8.02, + "ifeval": 0.5911636679565775, + "bbh": 0.4585611339334732, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.32978723404255317, + "hf_math_lvl5": 0.19184290030211482, + "hf_musr": 0.41375, + "hf_avg": 23.74474818460179 + }, + { + "hf_id": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", + "name": "Llama-3-8B-ProLong-512k-Base", + "params_b": 8.03, + "ifeval": 0.5322123077877808, + "bbh": 0.5033213133882991, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.33294547872340424, + "hf_math_lvl5": 0.06873111782477341, + "hf_musr": 0.4222708333333333, + "hf_avg": 21.679044932010054 + }, + { + "hf_id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", + "name": "Llama-3-8B-ProLong-512k-Instruct", + "params_b": 8.03, + "ifeval": 0.5508218194390884, + "bbh": 0.5028310716285619, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.32313829787234044, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.42664583333333334, + "hf_avg": 21.942343537569432 + }, + { + "hf_id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", + "name": "Llama-3-8B-ProLong-512k-Instruct", + "params_b": 8.03, + "ifeval": 0.3977734632996006, + "bbh": 0.49830327201612584, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.3246343085106383, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.425, + "hf_avg": 19.242001937233777 + }, + { + "hf_id": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", + "name": "Llama-3-8B-ProLong-64k-Base", + "params_b": 8.03, + "ifeval": 0.5200722970606879, + "bbh": 0.49271325981523906, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.3347739361702128, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.4340520833333333, + "hf_avg": 21.65219829915355 + }, + { + "hf_id": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", + "name": "Llama-3-8B-ProLong-64k-Instruct", + "params_b": 8.03, + "ifeval": 0.5563172382611471, + "bbh": 0.5083040804243396, + "gpqa": 0.2953020134228188, + "mmlu_pro": 0.32746010638297873, + "hf_math_lvl5": 0.0649546827794562, + "hf_musr": 0.43969791666666663, + "hf_avg": 23.020991799409 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT", + "name": "Llama-3-Base-8B-SFT", + "params_b": 8.03, + "ifeval": 0.27959591661236627, + "bbh": 0.464303802632615, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3093417553191489, + "hf_math_lvl5": 0.04003021148036254, + "hf_musr": 0.4117916666666667, + "hf_avg": 15.96420649511064 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", + "name": "Llama-3-Base-8B-SFT-CPO", + "params_b": 8.03, + "ifeval": 0.37034623687371726, + "bbh": 0.4594875922440002, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.2976230053191489, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.3608541666666667, + "hf_avg": 15.953789309641664 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", + "name": "Llama-3-Base-8B-SFT-DPO", + "params_b": 8.03, + "ifeval": 0.41111251479407973, + "bbh": 0.46658506064913546, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3078457446808511, + "hf_math_lvl5": 0.04154078549848943, + "hf_musr": 0.38673958333333336, + "hf_avg": 18.376219112296344 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", + "name": "Llama-3-Base-8B-SFT-IPO", + "params_b": 8.03, + "ifeval": 0.4486562321307464, + "bbh": 0.4690068582318399, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3115026595744681, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.3919479166666667, + "hf_avg": 18.722473272112868 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", + "name": "Llama-3-Base-8B-SFT-KTO", + "params_b": 8.03, + "ifeval": 0.4522533544329047, + "bbh": 0.4692852292721417, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.3054355053191489, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.3841979166666667, + "hf_avg": 18.6446163938307 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", + "name": "Llama-3-Base-8B-SFT-ORPO", + "params_b": 8.03, + "ifeval": 0.45165383404921167, + "bbh": 0.47340573024653915, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.30826130319148937, + "hf_math_lvl5": 0.04682779456193353, + "hf_musr": 0.3706770833333333, + "hf_avg": 19.268325889820545 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", + "name": "Llama-3-Base-8B-SFT-RDPO", + "params_b": 8.03, + "ifeval": 0.4480068440626427, + "bbh": 0.46620140448752295, + "gpqa": 0.3062080536912752, + "mmlu_pro": 0.30144614361702127, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.4027395833333334, + "hf_avg": 19.142302231808696 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", + "name": "Llama-3-Base-8B-SFT-RRHF", + "params_b": 8.03, + "ifeval": 0.3357247658435174, + "bbh": 0.4520360167602379, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.2888962765957447, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.37222916666666667, + "hf_avg": 16.28272427355282 + }, + { + "hf_id": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", + "name": "Llama-3-Base-8B-SFT-SLiC-HF", + "params_b": 8.03, + "ifeval": 0.4890479483326463, + "bbh": 0.4704075127777334, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.30634973404255317, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.40909375000000003, + "hf_avg": 19.743113491063596 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-CPO", + "name": "Llama-3-Instruct-8B-CPO", + "params_b": 8.03, + "ifeval": 0.7292993701157373, + "bbh": 0.4998793158888361, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.36519281914893614, + "hf_math_lvl5": 0.09894259818731117, + "hf_musr": 0.35139583333333335, + "hf_avg": 23.999076429407367 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", + "name": "Llama-3-Instruct-8B-CPO-v0.2", + "params_b": 8.03, + "ifeval": 0.7505817896514582, + "bbh": 0.5026669871217129, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.37059507978723405, + "hf_math_lvl5": 0.10800604229607251, + "hf_musr": 0.36190625000000004, + "hf_avg": 24.883954972962346 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-DPO", + "name": "Llama-3-Instruct-8B-DPO", + "params_b": 8.03, + "ifeval": 0.6757436934001781, + "bbh": 0.4991303079139502, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.36652260638297873, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.37381250000000005, + "hf_avg": 23.498239725981037 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", + "name": "Llama-3-Instruct-8B-DPO-v0.2", + "params_b": 8.03, + "ifeval": 0.7208063493752133, + "bbh": 0.505620320855615, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.37691156914893614, + "hf_math_lvl5": 0.08987915407854985, + "hf_musr": 0.3844479166666666, + "hf_avg": 25.208963221170475 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-KTO", + "name": "Llama-3-Instruct-8B-KTO", + "params_b": 8.03, + "ifeval": 0.6864098370102439, + "bbh": 0.4981903187457697, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.35987367021276595, + "hf_math_lvl5": 0.07250755287009064, + "hf_musr": 0.36984374999999997, + "hf_avg": 23.419046565350595 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", + "name": "Llama-3-Instruct-8B-KTO-v0.2", + "params_b": 8.03, + "ifeval": 0.7290245437660962, + "bbh": 0.5079766897761946, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.3667719414893617, + "hf_math_lvl5": 0.09969788519637462, + "hf_musr": 0.37775, + "hf_avg": 24.659390077356022 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-ORPO", + "name": "Llama-3-Instruct-8B-ORPO", + "params_b": 8.03, + "ifeval": 0.712813113649561, + "bbh": 0.5001206199104097, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.36461103723404253, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.35018750000000004, + "hf_avg": 23.622591862806686 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", + "name": "Llama-3-Instruct-8B-ORPO-v0.2", + "params_b": 8.03, + "ifeval": 0.7633213207622442, + "bbh": 0.507835231782556, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.37308843085106386, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.37796874999999996, + "hf_avg": 25.9661449482921 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-RDPO", + "name": "Llama-3-Instruct-8B-RDPO", + "params_b": 8.03, + "ifeval": 0.6660017642078574, + "bbh": 0.5033626077797596, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.36070478723404253, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.3752083333333333, + "hf_avg": 23.603754396673683 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", + "name": "Llama-3-Instruct-8B-RDPO-v0.2", + "params_b": 8.03, + "ifeval": 0.7076922565459647, + "bbh": 0.5049218189829557, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.37741023936170215, + "hf_math_lvl5": 0.08685800604229607, + "hf_musr": 0.3804479166666666, + "hf_avg": 25.03222506807271 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-RRHF", + "name": "Llama-3-Instruct-8B-RRHF", + "params_b": 8.03, + "ifeval": 0.7274509412802475, + "bbh": 0.49105468765647214, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.36436170212765956, + "hf_math_lvl5": 0.09667673716012085, + "hf_musr": 0.3475520833333334, + "hf_avg": 24.084494106293988 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", + "name": "Llama-3-Instruct-8B-RRHF-v0.2", + "params_b": 8.03, + "ifeval": 0.712488419615509, + "bbh": 0.49838952572927536, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.3482380319148936, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.37378125, + "hf_avg": 23.753750599972534 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", + "name": "Llama-3-Instruct-8B-SLiC-HF", + "params_b": 8.03, + "ifeval": 0.7399655137258031, + "bbh": 0.5029422936734547, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.35846077127659576, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.3722916666666667, + "hf_avg": 25.308144085338295 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", + "name": "Llama-3-Instruct-8B-SLiC-HF-v0.2", + "params_b": 8.03, + "ifeval": 0.7109646848140712, + "bbh": 0.49838952572927536, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.3482380319148936, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.37378125, + "hf_avg": 23.728355019948566 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-SimPO", + "name": "Llama-3-Instruct-8B-SimPO", + "params_b": 8.03, + "ifeval": 0.6503898544750152, + "bbh": 0.48446848524905367, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.3489029255319149, + "hf_math_lvl5": 0.08610271903323263, + "hf_musr": 0.39483333333333337, + "hf_avg": 23.664165370275043 + }, + { + "hf_id": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", + "name": "Llama-3-Instruct-8B-SimPO-v0.2", + "params_b": 8.03, + "ifeval": 0.6808645505037745, + "bbh": 0.503833834044343, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.36220079787234044, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.3988020833333334, + "hf_avg": 24.751539678625036 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-CPO", + "name": "Mistral-7B-Base-SFT-CPO", + "params_b": 7.242, + "ifeval": 0.46549267055856236, + "bbh": 0.43821512506663574, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.26512632978723405, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.4070833333333333, + "hf_avg": 17.3989699937128 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-DPO", + "name": "Mistral-7B-Base-SFT-DPO", + "params_b": 7.242, + "ifeval": 0.44033830237104216, + "bbh": 0.43501123979612694, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.26454454787234044, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.41222916666666665, + "hf_avg": 16.311853642274055 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-IPO", + "name": "Mistral-7B-Base-SFT-IPO", + "params_b": 7.242, + "ifeval": 0.48295300912689443, + "bbh": 0.4458024605899282, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.2791722074468085, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.37762500000000004, + "hf_avg": 17.273368181499578 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-KTO", + "name": "Mistral-7B-Base-SFT-KTO", + "params_b": 7.242, + "ifeval": 0.478481540091402, + "bbh": 0.44764334464528677, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.28715093085106386, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.43678124999999995, + "hf_avg": 19.012992284438702 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", + "name": "Mistral-7B-Base-SFT-RDPO", + "params_b": 7.242, + "ifeval": 0.46064663980460735, + "bbh": 0.44395328626924213, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.27767619680851063, + "hf_math_lvl5": 0.02190332326283988, + "hf_musr": 0.3579375, + "hf_avg": 16.4909336267166 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", + "name": "Mistral-7B-Base-SFT-RRHF", + "params_b": 7.242, + "ifeval": 0.44066299640509404, + "bbh": 0.42805937403716016, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.23977726063829788, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.4186770833333333, + "hf_avg": 16.18202454301279 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", + "name": "Mistral-7B-Base-SFT-SLiC-HF", + "params_b": 7.242, + "ifeval": 0.5127284494031392, + "bbh": 0.44223991890402176, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.2780917553191489, + "hf_math_lvl5": 0.035498489425981876, + "hf_musr": 0.42608333333333337, + "hf_avg": 19.005885681302413 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", + "name": "Mistral-7B-Base-SFT-SimPO", + "params_b": 7.242, + "ifeval": 0.47006387496287627, + "bbh": 0.4398050727924064, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.27019614361702127, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.39706250000000004, + "hf_avg": 17.032014558172765 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-CPO", + "name": "Mistral-7B-Instruct-CPO", + "params_b": 7.242, + "ifeval": 0.4203047912871182, + "bbh": 0.406922267565148, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.2701130319148936, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.41784375, + "hf_avg": 15.540359200506423 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-DPO", + "name": "Mistral-7B-Instruct-DPO", + "params_b": 7.242, + "ifeval": 0.517624347841505, + "bbh": 0.4060358459697702, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.2748503989361702, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.3833333333333333, + "hf_avg": 16.56219551319486 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-IPO", + "name": "Mistral-7B-Instruct-IPO", + "params_b": 7.242, + "ifeval": 0.4929198969844457, + "bbh": 0.4322183023180588, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.2707779255319149, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.43241666666666667, + "hf_avg": 17.71968449150264 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-KTO", + "name": "Mistral-7B-Instruct-KTO", + "params_b": 7.242, + "ifeval": 0.4907966417993147, + "bbh": 0.4139586477181159, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.28125, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.3952708333333333, + "hf_avg": 16.702591779643708 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-ORPO", + "name": "Mistral-7B-Instruct-ORPO", + "params_b": 7.242, + "ifeval": 0.4719621714827768, + "bbh": 0.41040615756566107, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.2662067819148936, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.3912395833333333, + "hf_avg": 16.088293109620597 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-RDPO", + "name": "Mistral-7B-Instruct-RDPO", + "params_b": 7.242, + "ifeval": 0.4887232542985944, + "bbh": 0.40501479745073615, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.27767619680851063, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.3873333333333333, + "hf_avg": 16.433078686509017 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-RRHF", + "name": "Mistral-7B-Instruct-RRHF", + "params_b": 7.242, + "ifeval": 0.49601723427173233, + "bbh": 0.41897663476657404, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.26512632978723405, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.397875, + "hf_avg": 16.892023987011328 + }, + { + "hf_id": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", + "name": "Mistral-7B-Instruct-SLiC-HF", + "params_b": 7.242, + "ifeval": 0.5115294086357531, + "bbh": 0.4040013641288438, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.27152593085106386, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.39130208333333333, + "hf_avg": 16.389143637193502 + }, + { + "hf_id": "princeton-nlp/Sheared-LLaMA-1.3B", + "name": "Sheared-LLaMA-1.3B", + "params_b": 1.3, + "ifeval": 0.2197702097102355, + "bbh": 0.31970467392464424, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.11710438829787234, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3713020833333333, + "hf_avg": 5.580925572139816 + }, + { + "hf_id": "princeton-nlp/Sheared-LLaMA-2.7B", + "name": "Sheared-LLaMA-2.7B", + "params_b": 2.7, + "ifeval": 0.24165214962964932, + "bbh": 0.32586855691245953, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.11868351063829788, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3567291666666667, + "hf_avg": 6.437920061018112 + }, + { + "hf_id": "princeton-nlp/gemma-2-9b-it-SimPO", + "name": "gemma-2-9b-it-SimPO", + "params_b": 9.242, + "ifeval": 0.3206857803960159, + "bbh": 0.5839179923162123, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.39752327127659576, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.41232291666666665, + "hf_avg": 22.3449346084354, + "arena_elo": 1279.39, + "arena_rank": 203, + "arena_votes": 10069 + }, + { + "hf_id": "prithivMLmods/Bellatrix-1.5B-xElite", + "name": "Bellatrix-1.5B-xElite", + "params_b": 1.777, + "ifeval": 0.1964144026737944, + "bbh": 0.35011984799236834, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.1657247340425532, + "hf_math_lvl5": 0.28700906344410876, + "hf_musr": 0.36190625000000004, + "hf_avg": 12.228869542511774 + }, + { + "hf_id": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", + "name": "Bellatrix-Tiny-1.5B-R1", + "params_b": 1.544, + "ifeval": 0.33522498082864577, + "bbh": 0.40221745714531076, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.27509973404255317, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.3682916666666667, + "hf_avg": 14.322564666933152 + }, + { + "hf_id": "prithivMLmods/Bellatrix-Tiny-1B-v2", + "name": "Bellatrix-Tiny-1B-v2", + "params_b": 1.236, + "ifeval": 0.15095169705270903, + "bbh": 0.3267684418723903, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.14926861702127658, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.34302083333333333, + "hf_avg": 6.033864136016213 + }, + { + "hf_id": "prithivMLmods/Blaze-14B-xElite", + "name": "Blaze-14B-xElite", + "params_b": 14.66, + "ifeval": 0.03632029681245762, + "bbh": 0.6627817236091689, + "gpqa": 0.39429530201342283, + "mmlu_pro": 0.5111369680851063, + "hf_math_lvl5": 0.3693353474320242, + "hf_musr": 0.46248958333333334, + "hf_avg": 29.122992008208428 + }, + { + "hf_id": "prithivMLmods/COCO-7B-Instruct-1M", + "name": "COCO-7B-Instruct-1M", + "params_b": 7.616, + "ifeval": 0.4743103853331383, + "bbh": 0.5409956853800891, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.41863364361702127, + "hf_math_lvl5": 0.3496978851963746, + "hf_musr": 0.4382395833333333, + "hf_avg": 28.952308445124856 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-14B-Elite", + "name": "Calcium-Opus-14B-Elite", + "params_b": 14.766, + "ifeval": 0.6051521075191603, + "bbh": 0.6317361472468987, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5301695478723404, + "hf_math_lvl5": 0.4788519637462236, + "hf_musr": 0.4859583333333333, + "hf_avg": 40.07735278337776 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-14B-Elite", + "name": "Calcium-Opus-14B-Elite", + "params_b": 14.766, + "ifeval": 0.6063511482865463, + "bbh": 0.6295900497885079, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.5306682180851063, + "hf_math_lvl5": 0.37084592145015105, + "hf_musr": 0.48732291666666666, + "hf_avg": 38.249365220151496 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-14B-Elite-1M", + "name": "Calcium-Opus-14B-Elite-1M", + "params_b": 14.77, + "ifeval": 0.5612884923115112, + "bbh": 0.6329399079569701, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.5152094414893617, + "hf_math_lvl5": 0.44561933534743203, + "hf_musr": 0.46760416666666665, + "hf_avg": 37.61517905195512 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-14B-Elite2", + "name": "Calcium-Opus-14B-Elite2", + "params_b": 14.766, + "ifeval": 0.6176168122803052, + "bbh": 0.6318256156619112, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.5300864361702128, + "hf_math_lvl5": 0.4690332326283988, + "hf_musr": 0.49395833333333333, + "hf_avg": 40.249808935674835 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", + "name": "Calcium-Opus-14B-Elite2-R1", + "params_b": 14.766, + "ifeval": 0.6325793339450436, + "bbh": 0.6362357624539174, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5247672872340425, + "hf_math_lvl5": 0.3338368580060423, + "hf_musr": 0.48998958333333337, + "hf_avg": 38.56373181629569 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-14B-Elite3", + "name": "Calcium-Opus-14B-Elite3", + "params_b": 14.766, + "ifeval": 0.5428285837134359, + "bbh": 0.6350402275340573, + "gpqa": 0.37080536912751677, + "mmlu_pro": 0.5334940159574468, + "hf_math_lvl5": 0.4705438066465257, + "hf_musr": 0.4794791666666667, + "hf_avg": 38.80335311719453 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-14B-Elite4", + "name": "Calcium-Opus-14B-Elite4", + "params_b": 14.766, + "ifeval": 0.6111971790405014, + "bbh": 0.6195264951573699, + "gpqa": 0.35570469798657717, + "mmlu_pro": 0.514876994680851, + "hf_math_lvl5": 0.36253776435045315, + "hf_musr": 0.46871875, + "hf_avg": 36.74386944728247 + }, + { + "hf_id": "prithivMLmods/Calcium-Opus-20B-v1", + "name": "Calcium-Opus-20B-v1", + "params_b": 19.173, + "ifeval": 0.3092716215197897, + "bbh": 0.599033246250772, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.4734042553191489, + "hf_math_lvl5": 0.36178247734138974, + "hf_musr": 0.49433333333333335, + "hf_avg": 31.041733901500965 + }, + { + "hf_id": "prithivMLmods/Codepy-Deepthink-3B", + "name": "Codepy-Deepthink-3B", + "params_b": 3.213, + "ifeval": 0.43271962836385236, + "bbh": 0.4259451388094382, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.3090093085106383, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.3310208333333333, + "hf_avg": 17.43076520482992 + }, + { + "hf_id": "prithivMLmods/Coma-II-14B", + "name": "Coma-II-14B", + "params_b": 14.766, + "ifeval": 0.416832892281369, + "bbh": 0.6320713788922736, + "gpqa": 0.4001677852348993, + "mmlu_pro": 0.5039893617021277, + "hf_math_lvl5": 0.5513595166163142, + "hf_musr": 0.5351041666666667, + "hf_avg": 39.45146855133309 + }, + { + "hf_id": "prithivMLmods/Condor-Opus-14B-Exp", + "name": "Condor-Opus-14B-Exp", + "params_b": 14.77, + "ifeval": 0.40431831983581346, + "bbh": 0.6154220154262888, + "gpqa": 0.39177852348993286, + "mmlu_pro": 0.5014128989361702, + "hf_math_lvl5": 0.5226586102719033, + "hf_musr": 0.5193854166666667, + "hf_avg": 37.617199777677776 + }, + { + "hf_id": "prithivMLmods/Cygnus-II-14B", + "name": "Cygnus-II-14B", + "params_b": 14.766, + "ifeval": 0.6184412913292286, + "bbh": 0.6660565208074918, + "gpqa": 0.3875838926174497, + "mmlu_pro": 0.5390625, + "hf_math_lvl5": 0.4395770392749245, + "hf_musr": 0.46884375, + "hf_avg": 40.52948756458168 + }, + { + "hf_id": "prithivMLmods/Deepthink-Llama-3-8B-Preview", + "name": "Deepthink-Llama-3-8B-Preview", + "params_b": 8.03, + "ifeval": 0.29553252037926037, + "bbh": 0.4664510845126107, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.2738530585106383, + "hf_math_lvl5": 0.3549848942598187, + "hf_musr": 0.37070833333333336, + "hf_avg": 20.957476415581784 + }, + { + "hf_id": "prithivMLmods/Deepthink-Reasoning-14B", + "name": "Deepthink-Reasoning-14B", + "params_b": 14.77, + "ifeval": 0.5423542866261519, + "bbh": 0.6334054936091441, + "gpqa": 0.36661073825503354, + "mmlu_pro": 0.5295877659574468, + "hf_math_lvl5": 0.4229607250755287, + "hf_musr": 0.47315625, + "hf_avg": 37.765949130344175 + }, + { + "hf_id": "prithivMLmods/Deepthink-Reasoning-7B", + "name": "Deepthink-Reasoning-7B", + "params_b": 7.616, + "ifeval": 0.48400244684104843, + "bbh": 0.5505070216145282, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.43492353723404253, + "hf_math_lvl5": 0.33459214501510576, + "hf_musr": 0.4432291666666666, + "hf_avg": 29.122241455484666 + }, + { + "hf_id": "prithivMLmods/Dinobot-Opus-14B-Exp", + "name": "Dinobot-Opus-14B-Exp", + "params_b": 14.77, + "ifeval": 0.8239958864701216, + "bbh": 0.6370093752306357, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.4979222074468085, + "hf_math_lvl5": 0.5317220543806647, + "hf_musr": 0.42603125000000003, + "hf_avg": 41.765081012881176 + }, + { + "hf_id": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", + "name": "Elita-0.1-Distilled-R1-abliterated", + "params_b": 7.616, + "ifeval": 0.35423454212600347, + "bbh": 0.38277850218543213, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.2757646276595745, + "hf_math_lvl5": 0.3066465256797583, + "hf_musr": 0.36596875, + "hf_avg": 17.39921663367112 + }, + { + "hf_id": "prithivMLmods/Elita-1", + "name": "Elita-1", + "params_b": 14.766, + "ifeval": 0.4906470387460826, + "bbh": 0.6520409113818334, + "gpqa": 0.37583892617449666, + "mmlu_pro": 0.5381482712765957, + "hf_math_lvl5": 0.3429003021148036, + "hf_musr": 0.48341666666666666, + "hf_avg": 36.545369563806254 + }, + { + "hf_id": "prithivMLmods/Epimetheus-14B-Axo", + "name": "Epimetheus-14B-Axo", + "params_b": 14.766, + "ifeval": 0.554643900406477, + "bbh": 0.6613340892011862, + "gpqa": 0.3926174496644295, + "mmlu_pro": 0.5304188829787234, + "hf_math_lvl5": 0.41012084592145015, + "hf_musr": 0.4819583333333333, + "hf_avg": 39.08056006814201 + }, + { + "hf_id": "prithivMLmods/Equuleus-Opus-14B-Exp", + "name": "Equuleus-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.7000735825387749, + "bbh": 0.6433769213927613, + "gpqa": 0.38674496644295303, + "mmlu_pro": 0.5374002659574468, + "hf_math_lvl5": 0.45845921450151056, + "hf_musr": 0.4951666666666667, + "hf_avg": 42.199750941616415 + }, + { + "hf_id": "prithivMLmods/Eridanus-Opus-14B-r999", + "name": "Eridanus-Opus-14B-r999", + "params_b": 14.77, + "ifeval": 0.638574537781974, + "bbh": 0.6583918169279829, + "gpqa": 0.39429530201342283, + "mmlu_pro": 0.5361535904255319, + "hf_math_lvl5": 0.3859516616314199, + "hf_musr": 0.476875, + "hf_avg": 40.111313344425874 + }, + { + "hf_id": "prithivMLmods/Evac-Opus-14B-Exp", + "name": "Evac-Opus-14B-Exp", + "params_b": 14.77, + "ifeval": 0.5916135852870383, + "bbh": 0.6475440673701862, + "gpqa": 0.3884228187919463, + "mmlu_pro": 0.5316655585106383, + "hf_math_lvl5": 0.4214501510574018, + "hf_musr": 0.47278125, + "hf_avg": 39.323054993397584 + }, + { + "hf_id": "prithivMLmods/FastThink-0.5B-Tiny", + "name": "FastThink-0.5B-Tiny", + "params_b": 0.494, + "ifeval": 0.25798880304259364, + "bbh": 0.3205583807088257, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.16489361702127658, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.3566354166666667, + "hf_avg": 7.516955448134702 + }, + { + "hf_id": "prithivMLmods/GWQ-9B-Preview", + "name": "GWQ-9B-Preview", + "params_b": 9.242, + "ifeval": 0.5065836425129767, + "bbh": 0.5805745804247511, + "gpqa": 0.33976510067114096, + "mmlu_pro": 0.39835438829787234, + "hf_math_lvl5": 0.22658610271903323, + "hf_musr": 0.4951041666666667, + "hf_avg": 30.15453648521797 + }, + { + "hf_id": "prithivMLmods/GWQ-9B-Preview2", + "name": "GWQ-9B-Preview2", + "params_b": 9.242, + "ifeval": 0.5208967761096114, + "bbh": 0.5797218710843371, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.3996841755319149, + "hf_math_lvl5": 0.23716012084592145, + "hf_musr": 0.48598958333333336, + "hf_avg": 30.047187909173402 + }, + { + "hf_id": "prithivMLmods/GWQ2b", + "name": "GWQ2b", + "params_b": 2.614, + "ifeval": 0.41148707651254224, + "bbh": 0.41433702954085216, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.24725731382978725, + "hf_math_lvl5": 0.06268882175226587, + "hf_musr": 0.43111458333333336, + "hf_avg": 16.42971150895339 + }, + { + "hf_id": "prithivMLmods/Gaea-Opus-14B-Exp", + "name": "Gaea-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.5956351369920699, + "bbh": 0.6560465337491567, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5400598404255319, + "hf_math_lvl5": 0.42749244712990936, + "hf_musr": 0.48589583333333336, + "hf_avg": 40.11380825495326 + }, + { + "hf_id": "prithivMLmods/Galactic-Qwen-14B-Exp1", + "name": "Galactic-Qwen-14B-Exp1", + "params_b": 14.766, + "ifeval": 0.5832202999153357, + "bbh": 0.6582262489447345, + "gpqa": 0.3934563758389262, + "mmlu_pro": 0.539561170212766, + "hf_math_lvl5": 0.40181268882175225, + "hf_musr": 0.4780520833333333, + "hf_avg": 39.469504673008466 + }, + { + "hf_id": "prithivMLmods/Galactic-Qwen-14B-Exp2", + "name": "Galactic-Qwen-14B-Exp2", + "params_b": 14.766, + "ifeval": 0.6620300801872365, + "bbh": 0.7203002699449659, + "gpqa": 0.39932885906040266, + "mmlu_pro": 0.5690658244680851, + "hf_math_lvl5": 0.3474320241691843, + "hf_musr": 0.5353854166666667, + "hf_avg": 43.56371836153858 + }, + { + "hf_id": "prithivMLmods/Gauss-Opus-14B-R999", + "name": "Gauss-Opus-14B-R999", + "params_b": 14.77, + "ifeval": 0.39065457430728245, + "bbh": 0.6227831608555382, + "gpqa": 0.39177852348993286, + "mmlu_pro": 0.500748005319149, + "hf_math_lvl5": 0.5755287009063444, + "hf_musr": 0.5338333333333334, + "hf_avg": 38.802494751158555 + }, + { + "hf_id": "prithivMLmods/Jolt-v0.1", + "name": "Jolt-v0.1", + "params_b": 14.766, + "ifeval": 0.5092066827129793, + "bbh": 0.6521408461659391, + "gpqa": 0.3800335570469799, + "mmlu_pro": 0.5386469414893617, + "hf_math_lvl5": 0.3564954682779456, + "hf_musr": 0.48471875000000003, + "hf_avg": 37.194359554127736 + }, + { + "hf_id": "prithivMLmods/Lacerta-Opus-14B-Elite8", + "name": "Lacerta-Opus-14B-Elite8", + "params_b": 14.766, + "ifeval": 0.614144913274556, + "bbh": 0.6401384743047456, + "gpqa": 0.3783557046979866, + "mmlu_pro": 0.5321642287234043, + "hf_math_lvl5": 0.3648036253776435, + "hf_musr": 0.4635416666666667, + "hf_avg": 38.069826308128185 + }, + { + "hf_id": "prithivMLmods/Llama-3.1-5B-Instruct", + "name": "Llama-3.1-5B-Instruct", + "params_b": 5.413, + "ifeval": 0.14066011516110588, + "bbh": 0.3051074188361172, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.11835106382978723, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.35400000000000004, + "hf_avg": 4.207173577211278 + }, + { + "hf_id": "prithivMLmods/Llama-3.1-8B-Open-SFT", + "name": "Llama-3.1-8B-Open-SFT", + "params_b": 8.03, + "ifeval": 0.4122616878770551, + "bbh": 0.4967982234773378, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.35222739361702127, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.39036458333333335, + "hf_avg": 21.04370403547396 + }, + { + "hf_id": "prithivMLmods/Llama-3.2-3B-Math-Oct", + "name": "Llama-3.2-3B-Math-Oct", + "params_b": 3.213, + "ifeval": 0.4585233846194763, + "bbh": 0.4371840952508727, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.2911402925531915, + "hf_math_lvl5": 0.11555891238670694, + "hf_musr": 0.34698958333333335, + "hf_avg": 17.441953756100272 + }, + { + "hf_id": "prithivMLmods/Llama-3.2-6B-AlgoCode", + "name": "Llama-3.2-6B-AlgoCode", + "params_b": 6.339, + "ifeval": 0.21357553513566227, + "bbh": 0.37477424449567703, + "gpqa": 0.2869127516778524, + "mmlu_pro": 0.17977061170212766, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.40134374999999994, + "hf_avg": 9.301000017101474 + }, + { + "hf_id": "prithivMLmods/Llama-8B-Distill-CoT", + "name": "Llama-8B-Distill-CoT", + "params_b": 8.03, + "ifeval": 0.3341511633576688, + "bbh": 0.4297620873695442, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.273188164893617, + "hf_math_lvl5": 0.4003021148036254, + "hf_musr": 0.3719791666666667, + "hf_avg": 20.756374392650613 + }, + { + "hf_id": "prithivMLmods/Llama-Deepsync-1B", + "name": "Llama-Deepsync-1B", + "params_b": 1.236, + "ifeval": 0.3570071853792382, + "bbh": 0.33856262083940014, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.17378656914893617, + "hf_math_lvl5": 0.04380664652567976, + "hf_musr": 0.35651041666666666, + "hf_avg": 10.269419049452202 + }, + { + "hf_id": "prithivMLmods/Llama-Deepsync-3B", + "name": "Llama-Deepsync-3B", + "params_b": 3.213, + "ifeval": 0.4302218114602588, + "bbh": 0.4291521655271033, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.3031083776595745, + "hf_math_lvl5": 0.11782477341389729, + "hf_musr": 0.33238541666666666, + "hf_avg": 17.176506909330996 + }, + { + "hf_id": "prithivMLmods/Llama-Express.1-Math", + "name": "Llama-Express.1-Math", + "params_b": 1.236, + "ifeval": 0.5084320713484665, + "bbh": 0.33638140090435265, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.16098736702127658, + "hf_math_lvl5": 0.055891238670694864, + "hf_musr": 0.31434375, + "hf_avg": 12.170622691501343 + }, + { + "hf_id": "prithivMLmods/LwQ-10B-Instruct", + "name": "LwQ-10B-Instruct", + "params_b": 10.732, + "ifeval": 0.3934770852449279, + "bbh": 0.5121712029712329, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.331781914893617, + "hf_math_lvl5": 0.04003021148036254, + "hf_musr": 0.45439583333333333, + "hf_avg": 20.967461043098936 + }, + { + "hf_id": "prithivMLmods/LwQ-Reasoner-10B", + "name": "LwQ-Reasoner-10B", + "params_b": 10.306, + "ifeval": 0.29413400887423147, + "bbh": 0.5866254169962443, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.41472739361702127, + "hf_math_lvl5": 0.3580060422960725, + "hf_musr": 0.40785416666666663, + "hf_avg": 26.99437823708219 + }, + { + "hf_id": "prithivMLmods/Magellanic-Opus-14B-Exp", + "name": "Magellanic-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.6866347956754744, + "bbh": 0.6382505935140227, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5272606382978723, + "hf_math_lvl5": 0.37990936555891236, + "hf_musr": 0.49262500000000004, + "hf_avg": 40.055124365076594 + }, + { + "hf_id": "prithivMLmods/Megatron-Corpus-14B-Exp", + "name": "Megatron-Corpus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.49826571275327247, + "bbh": 0.6355171004470184, + "gpqa": 0.36325503355704697, + "mmlu_pro": 0.5260139627659575, + "hf_math_lvl5": 0.3429003021148036, + "hf_musr": 0.4766875, + "hf_avg": 35.55395685759063 + }, + { + "hf_id": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", + "name": "Megatron-Corpus-14B-Exp.v2", + "params_b": 14.766, + "ifeval": 0.48704991644392437, + "bbh": 0.632146083740281, + "gpqa": 0.3422818791946309, + "mmlu_pro": 0.48096742021276595, + "hf_math_lvl5": 0.2590634441087613, + "hf_musr": 0.449, + "hf_avg": 31.898674141338944 + }, + { + "hf_id": "prithivMLmods/Megatron-Opus-14B-2.0", + "name": "Megatron-Opus-14B-2.0", + "params_b": 14.66, + "ifeval": 0.6693739278447852, + "bbh": 0.6870557211788685, + "gpqa": 0.35906040268456374, + "mmlu_pro": 0.5170378989361702, + "hf_math_lvl5": 0.27794561933534745, + "hf_musr": 0.41403125, + "hf_avg": 36.80518021675177 + }, + { + "hf_id": "prithivMLmods/Megatron-Opus-14B-2.1", + "name": "Megatron-Opus-14B-2.1", + "params_b": 14.66, + "ifeval": 0.02455484780382718, + "bbh": 0.6726960005125086, + "gpqa": 0.38338926174496646, + "mmlu_pro": 0.5173703457446809, + "hf_math_lvl5": 0.2998489425981873, + "hf_musr": 0.49275, + "hf_avg": 28.50969719045679 + }, + { + "hf_id": "prithivMLmods/Megatron-Opus-14B-Exp", + "name": "Megatron-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.4979410187192206, + "bbh": 0.6516090109599467, + "gpqa": 0.375, + "mmlu_pro": 0.5400598404255319, + "hf_math_lvl5": 0.35347432024169184, + "hf_musr": 0.48865625, + "hf_avg": 36.964774794825566 + }, + { + "hf_id": "prithivMLmods/Megatron-Opus-7B-Exp", + "name": "Megatron-Opus-7B-Exp", + "params_b": 7.456, + "ifeval": 0.6017300761978217, + "bbh": 0.5367154102661396, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.3900432180851064, + "hf_math_lvl5": 0.1971299093655589, + "hf_musr": 0.4185833333333333, + "hf_avg": 27.617726041323735 + }, + { + "hf_id": "prithivMLmods/Messier-Opus-14B-Elite7", + "name": "Messier-Opus-14B-Elite7", + "params_b": 14.766, + "ifeval": 0.7113392465325337, + "bbh": 0.6498611961862557, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5403922872340425, + "hf_math_lvl5": 0.4070996978851964, + "hf_musr": 0.4885625, + "hf_avg": 41.66277236825811 + }, + { + "hf_id": "prithivMLmods/Pegasus-Opus-14B-Exp", + "name": "Pegasus-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.6981752860188744, + "bbh": 0.6547548394062034, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5412234042553191, + "hf_math_lvl5": 0.4086102719033233, + "hf_musr": 0.4859583333333333, + "hf_avg": 41.62328046376648 + }, + { + "hf_id": "prithivMLmods/Phi-4-Empathetic", + "name": "Phi-4-Empathetic", + "params_b": 14.66, + "ifeval": 0.049659348306936704, + "bbh": 0.6726820578371974, + "gpqa": 0.3800335570469799, + "mmlu_pro": 0.5065658244680851, + "hf_math_lvl5": 0.2620845921450151, + "hf_musr": 0.49913541666666666, + "hf_avg": 28.208396720096133 + }, + { + "hf_id": "prithivMLmods/Phi-4-Math-IO", + "name": "Phi-4-Math-IO", + "params_b": 14.66, + "ifeval": 0.05897684809638426, + "bbh": 0.6668255086606543, + "gpqa": 0.39848993288590606, + "mmlu_pro": 0.5205285904255319, + "hf_math_lvl5": 0.45770392749244715, + "hf_musr": 0.4872916666666667, + "hf_avg": 31.821782745368306 + }, + { + "hf_id": "prithivMLmods/Phi-4-QwQ", + "name": "Phi-4-QwQ", + "params_b": 14.66, + "ifeval": 0.05592937849350833, + "bbh": 0.6695574237334824, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.5275099734042553, + "hf_math_lvl5": 0.45770392749244715, + "hf_musr": 0.4650625, + "hf_avg": 31.262674551400632 + }, + { + "hf_id": "prithivMLmods/Phi-4-o1", + "name": "Phi-4-o1", + "params_b": 14.66, + "ifeval": 0.028976449154908976, + "bbh": 0.6688727399756971, + "gpqa": 0.3825503355704698, + "mmlu_pro": 0.5173703457446809, + "hf_math_lvl5": 0.3995468277945619, + "hf_musr": 0.49777083333333333, + "hf_avg": 30.20428963456544 + }, + { + "hf_id": "prithivMLmods/Porpoise-Opus-14B-Exp", + "name": "Porpoise-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.7098155117310957, + "bbh": 0.6518903547146537, + "gpqa": 0.3934563758389262, + "mmlu_pro": 0.5396442819148937, + "hf_math_lvl5": 0.4040785498489426, + "hf_musr": 0.4925625, + "hf_avg": 41.769424398512 + }, + { + "hf_id": "prithivMLmods/Primal-Opus-14B-Optimus-v1", + "name": "Primal-Opus-14B-Optimus-v1", + "params_b": 14.766, + "ifeval": 0.5013131823561483, + "bbh": 0.6419423743359406, + "gpqa": 0.3724832214765101, + "mmlu_pro": 0.5259308510638298, + "hf_math_lvl5": 0.338368580060423, + "hf_musr": 0.48471875000000003, + "hf_avg": 36.0644115881976 + }, + { + "hf_id": "prithivMLmods/Primal-Opus-14B-Optimus-v2", + "name": "Primal-Opus-14B-Optimus-v2", + "params_b": 14.766, + "ifeval": 0.6403730989330532, + "bbh": 0.6543780845512958, + "gpqa": 0.39177852348993286, + "mmlu_pro": 0.542220744680851, + "hf_math_lvl5": 0.4206948640483384, + "hf_musr": 0.48998958333333337, + "hf_avg": 40.91271332337514 + }, + { + "hf_id": "prithivMLmods/QwQ-LCoT-14B-Conversational", + "name": "QwQ-LCoT-14B-Conversational", + "params_b": 14.77, + "ifeval": 0.4047427492386867, + "bbh": 0.6239828933798323, + "gpqa": 0.3498322147651007, + "mmlu_pro": 0.527842420212766, + "hf_math_lvl5": 0.4652567975830816, + "hf_musr": 0.48471875000000003, + "hf_avg": 35.68306678902542 + }, + { + "hf_id": "prithivMLmods/QwQ-LCoT-3B-Instruct", + "name": "QwQ-LCoT-3B-Instruct", + "params_b": 3.086, + "ifeval": 0.4354424039326764, + "bbh": 0.47629783868435643, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.3582114361702128, + "hf_math_lvl5": 0.2824773413897281, + "hf_musr": 0.43579166666666663, + "hf_avg": 24.021306557737763 + }, + { + "hf_id": "prithivMLmods/QwQ-LCoT-7B-Instruct", + "name": "QwQ-LCoT-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.4986901421561457, + "bbh": 0.5466466326018563, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.4334275265957447, + "hf_math_lvl5": 0.3716012084592145, + "hf_musr": 0.4801875, + "hf_avg": 30.863799774866248 + }, + { + "hf_id": "prithivMLmods/QwQ-LCoT2-7B-Instruct", + "name": "QwQ-LCoT2-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.5561177675235043, + "bbh": 0.5424862934133593, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.4341755319148936, + "hf_math_lvl5": 0.3270392749244713, + "hf_musr": 0.4564375, + "hf_avg": 30.323930375701078 + }, + { + "hf_id": "prithivMLmods/QwQ-MathOct-7B", + "name": "QwQ-MathOct-7B", + "params_b": 7.616, + "ifeval": 0.4684404047926169, + "bbh": 0.5485512215016556, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.4330119680851064, + "hf_math_lvl5": 0.29531722054380666, + "hf_musr": 0.4600625, + "hf_avg": 28.497758657229422 + }, + { + "hf_id": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", + "name": "QwQ-R1-Distill-1.5B-CoT", + "params_b": 1.777, + "ifeval": 0.21939564799177294, + "bbh": 0.36662076641982305, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.19132313829787234, + "hf_math_lvl5": 0.33459214501510576, + "hf_musr": 0.34339583333333334, + "hf_avg": 13.93165110675273 + }, + { + "hf_id": "prithivMLmods/QwQ-R1-Distill-7B-CoT", + "name": "QwQ-R1-Distill-7B-CoT", + "params_b": 7.616, + "ifeval": 0.3500378994401522, + "bbh": 0.438788672517715, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.2804188829787234, + "hf_math_lvl5": 0.46827794561933533, + "hf_musr": 0.37790624999999994, + "hf_avg": 22.192243475184284 + }, + { + "hf_id": "prithivMLmods/Qwen-7B-Distill-Reasoner", + "name": "Qwen-7B-Distill-Reasoner", + "params_b": 7.616, + "ifeval": 0.3395712265677292, + "bbh": 0.4409329229697952, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.2818317819148936, + "hf_math_lvl5": 0.3950151057401813, + "hf_musr": 0.36596874999999995, + "hf_avg": 21.484736250167103 + }, + { + "hf_id": "prithivMLmods/SmolLM2-CoT-360M", + "name": "SmolLM2-CoT-360M", + "params_b": 0.362, + "ifeval": 0.22156877086131466, + "bbh": 0.31352960121180296, + "gpqa": 0.23657718120805368, + "mmlu_pro": 0.1085438829787234, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.3793958333333333, + "hf_avg": 5.9507483871396545 + }, + { + "hf_id": "prithivMLmods/Sombrero-Opus-14B-Elite5", + "name": "Sombrero-Opus-14B-Elite5", + "params_b": 14.766, + "ifeval": 0.7880756393037142, + "bbh": 0.6501539892126272, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.520029920212766, + "hf_math_lvl5": 0.5354984894259819, + "hf_musr": 0.4286666666666667, + "hf_avg": 42.32332845547639 + }, + { + "hf_id": "prithivMLmods/Sombrero-Opus-14B-Elite6", + "name": "Sombrero-Opus-14B-Elite6", + "params_b": 14.766, + "ifeval": 0.7226049105262924, + "bbh": 0.6487937804559186, + "gpqa": 0.3934563758389262, + "mmlu_pro": 0.5389793882978723, + "hf_math_lvl5": 0.4078549848942598, + "hf_musr": 0.48859375, + "hf_avg": 41.88084511903062 + }, + { + "hf_id": "prithivMLmods/Sombrero-Opus-14B-Sm1", + "name": "Sombrero-Opus-14B-Sm1", + "params_b": 14.77, + "ifeval": 0.3812872068334242, + "bbh": 0.635462046379832, + "gpqa": 0.4035234899328859, + "mmlu_pro": 0.512466755319149, + "hf_math_lvl5": 0.5664652567975831, + "hf_musr": 0.5298958333333333, + "hf_avg": 39.223819597959995 + }, + { + "hf_id": "prithivMLmods/Sombrero-Opus-14B-Sm2", + "name": "Sombrero-Opus-14B-Sm2", + "params_b": 14.77, + "ifeval": 0.4272242095417935, + "bbh": 0.6609367219259568, + "gpqa": 0.3884228187919463, + "mmlu_pro": 0.5344913563829787, + "hf_math_lvl5": 0.486404833836858, + "hf_musr": 0.5088125, + "hf_avg": 38.980475230049855 + }, + { + "hf_id": "prithivMLmods/Sombrero-Opus-14B-Sm4", + "name": "Sombrero-Opus-14B-Sm4", + "params_b": 14.77, + "ifeval": 0.4346932804957513, + "bbh": 0.6612776404137711, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5300033244680851, + "hf_math_lvl5": 0.4879154078549849, + "hf_musr": 0.5191666666666667, + "hf_avg": 39.38545109416284 + }, + { + "hf_id": "prithivMLmods/Sombrero-Opus-14B-Sm5", + "name": "Sombrero-Opus-14B-Sm5", + "params_b": 14.766, + "ifeval": 0.6851609285584471, + "bbh": 0.6563944936055776, + "gpqa": 0.38674496644295303, + "mmlu_pro": 0.5399767287234043, + "hf_math_lvl5": 0.4093655589123867, + "hf_musr": 0.480625, + "hf_avg": 41.113179800786455 + }, + { + "hf_id": "prithivMLmods/Sqweeks-7B-Instruct", + "name": "Sqweeks-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.21579852568961466, + "bbh": 0.4666692459456812, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3133311170212766, + "hf_math_lvl5": 0.5143504531722054, + "hf_musr": 0.44760416666666664, + "hf_avg": 23.920659418100314 + }, + { + "hf_id": "prithivMLmods/Tadpole-Opus-14B-Exp", + "name": "Tadpole-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.5749522378400422, + "bbh": 0.636858708544215, + "gpqa": 0.3859060402684564, + "mmlu_pro": 0.5322473404255319, + "hf_math_lvl5": 0.31344410876132933, + "hf_musr": 0.47284375, + "hf_avg": 36.8786925782139 + }, + { + "hf_id": "prithivMLmods/Taurus-Opus-7B", + "name": "Taurus-Opus-7B", + "params_b": 7.456, + "ifeval": 0.42232831110342783, + "bbh": 0.5367364587851736, + "gpqa": 0.3263422818791946, + "mmlu_pro": 0.3951130319148936, + "hf_math_lvl5": 0.21676737160120846, + "hf_musr": 0.43988541666666664, + "hf_avg": 25.88865048007519 + }, + { + "hf_id": "prithivMLmods/Triangulum-10B", + "name": "Triangulum-10B", + "params_b": 10.306, + "ifeval": 0.3229353670483207, + "bbh": 0.5968023910391113, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.4178025265957447, + "hf_math_lvl5": 0.3549848942598187, + "hf_musr": 0.41724999999999995, + "hf_avg": 28.30066565051636 + }, + { + "hf_id": "prithivMLmods/Triangulum-5B", + "name": "Triangulum-5B", + "params_b": 5.413, + "ifeval": 0.1283206336963701, + "bbh": 0.3124115848614622, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.12234042553191489, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.3445416666666667, + "hf_avg": 4.0117920459317 + }, + { + "hf_id": "prithivMLmods/Triangulum-v2-10B", + "name": "Triangulum-v2-10B", + "params_b": 10.306, + "ifeval": 0.6705231009277606, + "bbh": 0.6064531367418446, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.44664228723404253, + "hf_math_lvl5": 0.24471299093655588, + "hf_musr": 0.42807291666666664, + "hf_avg": 32.83383929882561 + }, + { + "hf_id": "prithivMLmods/Tucana-Opus-14B-r999", + "name": "Tucana-Opus-14B-r999", + "params_b": 14.77, + "ifeval": 0.606725710005009, + "bbh": 0.6556888858891955, + "gpqa": 0.39177852348993286, + "mmlu_pro": 0.5383976063829787, + "hf_math_lvl5": 0.40634441087613293, + "hf_musr": 0.47303125, + "hf_avg": 39.75066582236797 + }, + { + "hf_id": "prithivMLmods/Tulu-MathLingo-8B", + "name": "Tulu-MathLingo-8B", + "params_b": 8.03, + "ifeval": 0.5589402784611497, + "bbh": 0.4658807905856453, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.304438164893617, + "hf_math_lvl5": 0.14501510574018128, + "hf_musr": 0.38642708333333337, + "hf_avg": 21.79779228784994 + }, + { + "hf_id": "prithivMLmods/Viper-Coder-Hybrid-v1.2", + "name": "Viper-Coder-Hybrid-v1.2", + "params_b": 14.766, + "ifeval": 0.6735705705306365, + "bbh": 0.6390749226915919, + "gpqa": 0.37416107382550334, + "mmlu_pro": 0.5242686170212766, + "hf_math_lvl5": 0.3330815709969788, + "hf_musr": 0.48217708333333337, + "hf_avg": 38.82547529143809 + }, + { + "hf_id": "prithivMLmods/Viper-Coder-Hybrid-v1.3", + "name": "Viper-Coder-Hybrid-v1.3", + "params_b": 14.766, + "ifeval": 0.7554776880898239, + "bbh": 0.6470999423290662, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.5097240691489362, + "hf_math_lvl5": 0.4516616314199396, + "hf_musr": 0.4403229166666667, + "hf_avg": 40.41199336479261 + }, + { + "hf_id": "prithivMLmods/Viper-Coder-HybridMini-v1.3", + "name": "Viper-Coder-HybridMini-v1.3", + "params_b": 7.616, + "ifeval": 0.610372699991578, + "bbh": 0.5365472959273401, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.4351728723404255, + "hf_math_lvl5": 0.46299093655589124, + "hf_musr": 0.45048958333333333, + "hf_avg": 33.800748621674735 + }, + { + "hf_id": "prithivMLmods/Viper-Coder-v0.1", + "name": "Viper-Coder-v0.1", + "params_b": 14.766, + "ifeval": 0.5521460835028835, + "bbh": 0.6143056870893655, + "gpqa": 0.3540268456375839, + "mmlu_pro": 0.3927859042553192, + "hf_math_lvl5": 0.3270392749244713, + "hf_musr": 0.43944791666666666, + "hf_avg": 31.99646602214953 + }, + { + "hf_id": "prithivMLmods/Viper-Coder-v1.1", + "name": "Viper-Coder-v1.1", + "params_b": 14.77, + "ifeval": 0.443236168920686, + "bbh": 0.6492289468853992, + "gpqa": 0.401006711409396, + "mmlu_pro": 0.523188164893617, + "hf_math_lvl5": 0.5460725075528701, + "hf_musr": 0.5219270833333334, + "hf_avg": 40.26026084781071 + }, + { + "hf_id": "prithivMLmods/Viper-Coder-v1.6-r999", + "name": "Viper-Coder-v1.6-r999", + "params_b": 14.77, + "ifeval": 0.4432860366050967, + "bbh": 0.6492289468853992, + "gpqa": 0.401006711409396, + "mmlu_pro": 0.523188164893617, + "hf_math_lvl5": 0.5657099697885196, + "hf_musr": 0.5219270833333334, + "hf_avg": 40.588383013145055 + }, + { + "hf_id": "prithivMLmods/Viper-Coder-v1.7-Vsm6", + "name": "Viper-Coder-v1.7-Vsm6", + "params_b": 14.766, + "ifeval": 0.5003889679384035, + "bbh": 0.6502342489348574, + "gpqa": 0.39681208053691275, + "mmlu_pro": 0.5287566489361702, + "hf_math_lvl5": 0.4645015105740181, + "hf_musr": 0.47675, + "hf_avg": 38.68288111891892 + }, + { + "hf_id": "prithivMLmods/Viper-OneCoder-UIGEN", + "name": "Viper-OneCoder-UIGEN", + "params_b": 14.77, + "ifeval": 0.4691895282295421, + "bbh": 0.6046507657311738, + "gpqa": 0.3422818791946309, + "mmlu_pro": 0.390375664893617, + "hf_math_lvl5": 0.3867069486404834, + "hf_musr": 0.45141666666666663, + "hf_avg": 31.347294081128084 + }, + { + "hf_id": "prithivMLmods/Volans-Opus-14B-Exp", + "name": "Volans-Opus-14B-Exp", + "params_b": 14.766, + "ifeval": 0.5867675545330834, + "bbh": 0.6521211711040636, + "gpqa": 0.3850671140939597, + "mmlu_pro": 0.5384807180851063, + "hf_math_lvl5": 0.425226586102719, + "hf_musr": 0.4871979166666667, + "hf_avg": 39.707074751250644 + }, + { + "hf_id": "prithivMLmods/WebMind-7B-v0.1", + "name": "WebMind-7B-v0.1", + "params_b": 7.616, + "ifeval": 0.5278161943642867, + "bbh": 0.5433559211614739, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.4279421542553192, + "hf_math_lvl5": 0.3648036253776435, + "hf_musr": 0.4537395833333333, + "hf_avg": 30.805047322905597 + }, + { + "hf_id": "pszemraj/Llama-3-6.3b-v0.1", + "name": "Llama-3-6.3b-v0.1", + "params_b": 6.3, + "ifeval": 0.10438968603305895, + "bbh": 0.41968070468284147, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2839926861702128, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.3908333333333333, + "hf_avg": 10.384306670382523 + }, + { + "hf_id": "pszemraj/Mistral-v0.3-6B", + "name": "Mistral-v0.3-6B", + "params_b": 5.939, + "ifeval": 0.2453744952282167, + "bbh": 0.3774050646438491, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.2142619680851064, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.39077083333333335, + "hf_avg": 10.122379227914847 + }, + { + "hf_id": "qingy2019/LLaMa_3.2_3B_Catalysts", + "name": "LLaMa_3.2_3B_Catalysts", + "params_b": 3, + "ifeval": 0.499239794855428, + "bbh": 0.44681268798954793, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.30078125, + "hf_math_lvl5": 0.12915407854984895, + "hf_musr": 0.37877083333333333, + "hf_avg": 19.930930685006384 + }, + { + "hf_id": "qingy2019/Qwen2.5-Math-14B-Instruct", + "name": "Qwen2.5-Math-14B-Instruct", + "params_b": 14, + "ifeval": 0.6066259746361875, + "bbh": 0.6350068875885949, + "gpqa": 0.3724832214765101, + "mmlu_pro": 0.5330784574468085, + "hf_math_lvl5": 0.3716012084592145, + "hf_musr": 0.4757291666666667, + "hf_avg": 38.153923519286515 + }, + { + "hf_id": "qingy2019/Qwen2.5-Math-14B-Instruct", + "name": "Qwen2.5-Math-14B-Instruct", + "params_b": 14, + "ifeval": 0.6005310354304356, + "bbh": 0.6356492397286339, + "gpqa": 0.3691275167785235, + "mmlu_pro": 0.5339095744680851, + "hf_math_lvl5": 0.2764350453172205, + "hf_musr": 0.4756666666666667, + "hf_avg": 36.380504031909005 + }, + { + "hf_id": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", + "name": "Qwen2.5-Math-14B-Instruct-Alpha", + "params_b": 14, + "ifeval": 0.5980830862112528, + "bbh": 0.6375080075350833, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.5330784574468085, + "hf_math_lvl5": 0.31419939577039274, + "hf_musr": 0.4649375, + "hf_avg": 36.84070516730529 + }, + { + "hf_id": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", + "name": "Benchmaxx-Llama-3.2-1B-Instruct", + "params_b": 1.236, + "ifeval": 0.20136016879657087, + "bbh": 0.8269136508088061, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.11128656914893617, + "hf_math_lvl5": 0.48036253776435045, + "hf_musr": 0.3446354166666667, + "hf_avg": 25.69666706814397 + }, + { + "hf_id": "qingy2024/Falcon3-2x10B-MoE-Instruct", + "name": "Falcon3-2x10B-MoE-Instruct", + "params_b": 18.799, + "ifeval": 0.7849783020164276, + "bbh": 0.6184925726037823, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.44232047872340424, + "hf_math_lvl5": 0.2794561933534743, + "hf_musr": 0.42835416666666665, + "hf_avg": 35.53368391277467 + }, + { + "hf_id": "qingy2024/OwO-14B-Instruct", + "name": "OwO-14B-Instruct", + "params_b": 14.77, + "ifeval": 0.1383119013107444, + "bbh": 0.6164807172760662, + "gpqa": 0.3640939597315436, + "mmlu_pro": 0.5181183510638298, + "hf_math_lvl5": 0.4161631419939577, + "hf_musr": 0.44068749999999995, + "hf_avg": 29.28644722085275 + }, + { + "hf_id": "qingy2024/QwQ-14B-Math-v0.2", + "name": "QwQ-14B-Math-v0.2", + "params_b": 14.77, + "ifeval": 0.33909692948044523, + "bbh": 0.573097955260854, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.47997007978723405, + "hf_math_lvl5": 0.4811178247734139, + "hf_musr": 0.40209374999999997, + "hf_avg": 28.935415225800252 + }, + { + "hf_id": "qingy2024/Qwarkstar-4B-Instruct-Preview", + "name": "Qwarkstar-4B-Instruct-Preview", + "params_b": 4.473, + "ifeval": 0.5324372664530114, + "bbh": 0.43584381808469397, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.250249335106383, + "hf_math_lvl5": 0.1283987915407855, + "hf_musr": 0.38959374999999996, + "hf_avg": 18.873009662500476 + }, + { + "hf_id": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", + "name": "Qwen2.5-Math-14B-Instruct-Alpha", + "params_b": 14.77, + "ifeval": 0.7704402097545624, + "bbh": 0.646486159387426, + "gpqa": 0.348993288590604, + "mmlu_pro": 0.49659242021276595, + "hf_math_lvl5": 0.42900302114803623, + "hf_musr": 0.40209374999999997, + "hf_avg": 39.35285681111407 + }, + { + "hf_id": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", + "name": "Qwen2.5-Math-14B-Instruct-Preview", + "params_b": 14.77, + "ifeval": 0.7825802204816554, + "bbh": 0.6293942245934432, + "gpqa": 0.34060402684563756, + "mmlu_pro": 0.49933510638297873, + "hf_math_lvl5": 0.47583081570996977, + "hf_musr": 0.4114583333333333, + "hf_avg": 39.91810674601417 + }, + { + "hf_id": "qingy2024/Qwen2.6-Math-14B-Instruct", + "name": "Qwen2.6-Math-14B-Instruct", + "params_b": 14, + "ifeval": 0.38623186478543603, + "bbh": 0.6324437508110833, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.5241023936170213, + "hf_math_lvl5": 0.42900302114803623, + "hf_musr": 0.4758541666666667, + "hf_avg": 35.19645398109978 + }, + { + "hf_id": "qq8933/OpenLongCoT-Base-Gemma2-2B", + "name": "OpenLongCoT-Base-Gemma2-2B", + "params_b": 3.204, + "ifeval": 0.1965141380426158, + "bbh": 0.3106362870893106, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.1315658244680851, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.32225, + "hf_avg": 5.473141918546371 + }, + { + "hf_id": "raphgg/test-2.5-72B", + "name": "test-2.5-72B", + "params_b": 72.706, + "ifeval": 0.8437047035199936, + "bbh": 0.7266099425567868, + "gpqa": 0.38926174496644295, + "mmlu_pro": 0.5836934840425532, + "hf_math_lvl5": 0.4108761329305136, + "hf_musr": 0.48118750000000005, + "hf_avg": 46.73987796559809 + }, + { + "hf_id": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", + "name": "Mistral-NeMo-Minitron-8B-Chat", + "params_b": 8.414, + "ifeval": 0.4451843331249973, + "bbh": 0.47594353379058535, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.2403590425531915, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.4304270833333333, + "hf_avg": 17.54564856271941 + }, + { + "hf_id": "rasyosef/Phi-1_5-Instruct-v0.1", + "name": "Phi-1_5-Instruct-v0.1", + "params_b": 1.415, + "ifeval": 0.24022815019703275, + "bbh": 0.3117898107092894, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.15616688829787234, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.34215625, + "hf_avg": 6.864747864349624 + }, + { + "hf_id": "rasyosef/phi-2-instruct-apo", + "name": "phi-2-instruct-apo", + "params_b": 2.775, + "ifeval": 0.31459194936102874, + "bbh": 0.44450964630048634, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.21550864361702127, + "hf_math_lvl5": 0.030211480362537766, + "hf_musr": 0.33421875, + "hf_avg": 12.547052522669906 + }, + { + "hf_id": "rasyosef/phi-2-instruct-v0.1", + "name": "phi-2-instruct-v0.1", + "params_b": 2.775, + "ifeval": 0.3681476260765879, + "bbh": 0.47261184292654473, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.22465093085106383, + "hf_musr": 0.3523541666666667, + "hf_avg": 14.218631101919177 + }, + { + "hf_id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "name": "Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "params_b": 10.159, + "ifeval": 0.7648949232480928, + "bbh": 0.597438766061506, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.4207114361702128, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.4244791666666667, + "hf_avg": 29.873991757143997 + }, + { + "hf_id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "name": "Gemma-2-Ataraxy-Gemmasutra-9B-slerp", + "params_b": 10.159, + "ifeval": 0.28536505361330156, + "bbh": 0.5983926033872208, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.4162234042553192, + "hf_math_lvl5": 0.10045317220543806, + "hf_musr": 0.46065625, + "hf_avg": 23.910552802690432 + }, + { + "hf_id": "recoilme/recoilme-gemma-2-9B-v0.2", + "name": "recoilme-gemma-2-9B-v0.2", + "params_b": 10.159, + "ifeval": 0.7591745457608035, + "bbh": 0.6025964285724085, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.41630651595744683, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.409875, + "hf_avg": 30.048864030373213 + }, + { + "hf_id": "recoilme/recoilme-gemma-2-9B-v0.2", + "name": "recoilme-gemma-2-9B-v0.2", + "params_b": 10.159, + "ifeval": 0.2746989100032359, + "bbh": 0.6030832642626502, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.4122340425531915, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.46859375, + "hf_avg": 23.76285134105471 + }, + { + "hf_id": "recoilme/recoilme-gemma-2-9B-v0.3", + "name": "recoilme-gemma-2-9B-v0.3", + "params_b": 10.159, + "ifeval": 0.743937197746424, + "bbh": 0.5992527878628748, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.4072473404255319, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.4203854166666667, + "hf_avg": 30.2074720895527 + }, + { + "hf_id": "recoilme/recoilme-gemma-2-9B-v0.3", + "name": "recoilme-gemma-2-9B-v0.3", + "params_b": 10.159, + "ifeval": 0.57607592299543, + "bbh": 0.6019827101058847, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.4039228723404255, + "hf_math_lvl5": 0.18882175226586104, + "hf_musr": 0.46322916666666664, + "hf_avg": 30.37598877443187 + }, + { + "hf_id": "recoilme/recoilme-gemma-2-9B-v0.4", + "name": "recoilme-gemma-2-9B-v0.4", + "params_b": 10.159, + "ifeval": 0.2561891337207498, + "bbh": 0.5967285833554881, + "gpqa": 0.34060402684563756, + "mmlu_pro": 0.4405751329787234, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.4726875, + "hf_avg": 24.138127567307112 + }, + { + "hf_id": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", + "name": "AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", + "params_b": 12.248, + "ifeval": 0.5359590331431713, + "bbh": 0.5128840998052852, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.3179853723404255, + "hf_math_lvl5": 0.11329305135951662, + "hf_musr": 0.38178124999999996, + "hf_avg": 22.776537692077188 + }, + { + "hf_id": "redrix/patricide-12B-Unslop-Mell", + "name": "patricide-12B-Unslop-Mell", + "params_b": 12.248, + "ifeval": 0.40739016919551235, + "bbh": 0.5398666865853622, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.3570478723404255, + "hf_math_lvl5": 0.13141993957703926, + "hf_musr": 0.4025833333333333, + "hf_avg": 23.021830606105613 + }, + { + "hf_id": "refuelai/Llama-3-Refueled", + "name": "Llama-3-Refueled", + "params_b": 8.03, + "ifeval": 0.4619952836252255, + "bbh": 0.5870766201705051, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.30950797872340424, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.44540625, + "hf_avg": 23.18144830627433 + }, + { + "hf_id": "rhymes-ai/Aria", + "name": "Aria", + "params_b": 25.307, + "ifeval": 0.4773079872516035, + "bbh": 0.5695312446413633, + "gpqa": 0.3624161073825503, + "mmlu_pro": 0.44049202127659576, + "hf_math_lvl5": 0.1933534743202417, + "hf_musr": 0.43375, + "hf_avg": 28.870163995252046 + }, + { + "hf_id": "rhysjones/phi-2-orange-v2", + "name": "phi-2-orange-v2", + "params_b": 2.78, + "ifeval": 0.3669740732367895, + "bbh": 0.4770220109816213, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.25324135638297873, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.3629583333333333, + "hf_avg": 15.324185096371076 + }, + { + "hf_id": "riaz/FineLlama-3.1-8B", + "name": "FineLlama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.43734070045257695, + "bbh": 0.45857296498013483, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.29637632978723405, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.3762916666666667, + "hf_avg": 17.660648060300424 + }, + { + "hf_id": "riaz/FineLlama-3.1-8B", + "name": "FineLlama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.413660199382084, + "bbh": 0.456451981676995, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.29778922872340424, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.37762500000000004, + "hf_avg": 17.14751095671923 + }, + { + "hf_id": "rmdhirr/Gluon-8B", + "name": "Gluon-8B", + "params_b": 8.03, + "ifeval": 0.5052848663767692, + "bbh": 0.5153305292144984, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.38081781914893614, + "hf_math_lvl5": 0.14425981873111782, + "hf_musr": 0.4038854166666667, + "hf_avg": 23.97696294457469 + }, + { + "hf_id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", + "name": "Rombos-Coder-V2.5-Qwen-14b", + "params_b": 14.77, + "ifeval": 0.7047445223119102, + "bbh": 0.6165135323666455, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3939494680851064, + "hf_math_lvl5": 0.3300604229607251, + "hf_musr": 0.3914583333333333, + "hf_avg": 32.44560837708102 + }, + { + "hf_id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", + "name": "Rombos-Coder-V2.5-Qwen-7b", + "params_b": 7.616, + "ifeval": 0.6210388436016436, + "bbh": 0.5077090028113894, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.33976063829787234, + "hf_math_lvl5": 0.3338368580060423, + "hf_musr": 0.3979375, + "hf_avg": 27.405414768306354 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", + "name": "Rombos-LLM-V2.5-Qwen-0.5b", + "params_b": 0.63, + "ifeval": 0.28466690603155187, + "bbh": 0.32936751831436256, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.18658577127659576, + "hf_math_lvl5": 0.06797583081570997, + "hf_musr": 0.32358333333333333, + "hf_avg": 9.38591999487923 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", + "name": "Rombos-LLM-V2.5-Qwen-1.5b", + "params_b": 1.777, + "ifeval": 0.3402461025634206, + "bbh": 0.4256703145864387, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.2922207446808511, + "hf_math_lvl5": 0.08534743202416918, + "hf_musr": 0.4185520833333333, + "hf_avg": 16.35438589717866 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", + "name": "Rombos-LLM-V2.5-Qwen-14b", + "params_b": 14.77, + "ifeval": 0.5840447789642593, + "bbh": 0.6481086261669653, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5375664893617021, + "hf_math_lvl5": 0.4554380664652568, + "hf_musr": 0.4717291666666667, + "hf_avg": 39.50095591766085 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", + "name": "Rombos-LLM-V2.5-Qwen-32b", + "params_b": 32.764, + "ifeval": 0.6826631116548536, + "bbh": 0.7045537070859799, + "gpqa": 0.39681208053691275, + "mmlu_pro": 0.5915890957446809, + "hf_math_lvl5": 0.4954682779456193, + "hf_musr": 0.5034166666666667, + "hf_avg": 45.83301184834238 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", + "name": "Rombos-LLM-V2.5-Qwen-3b", + "params_b": 3.397, + "ifeval": 0.5342358276040905, + "bbh": 0.4808896246368473, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.37608045212765956, + "hf_math_lvl5": 0.2794561933534743, + "hf_musr": 0.4041666666666666, + "hf_avg": 25.921782051813356 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", + "name": "Rombos-LLM-V2.5-Qwen-72b", + "params_b": 72.706, + "ifeval": 0.715535889218385, + "bbh": 0.7229589065788488, + "gpqa": 0.39848993288590606, + "mmlu_pro": 0.593500664893617, + "hf_math_lvl5": 0.5422960725075529, + "hf_musr": 0.4599166666666667, + "hf_avg": 46.50088713558663 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", + "name": "Rombos-LLM-V2.5-Qwen-7b", + "params_b": 7.616, + "ifeval": 0.6237117514860571, + "bbh": 0.5543885046903589, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.4468916223404255, + "hf_math_lvl5": 0.3814199395770393, + "hf_musr": 0.42909375, + "hf_avg": 32.74880353587022 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", + "name": "Rombos-LLM-V2.5.1-Qwen-3b", + "params_b": 3.397, + "ifeval": 0.2595125378440316, + "bbh": 0.3884043024656656, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.27194148936170215, + "hf_math_lvl5": 0.09138972809667675, + "hf_musr": 0.39911458333333333, + "hf_avg": 13.357124972053427 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", + "name": "Rombos-LLM-V2.5.1-Qwen-3b", + "params_b": 3.397, + "ifeval": 0.2566401592219755, + "bbh": 0.39000839740376536, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.27410239361702127, + "hf_math_lvl5": 0.12084592145015106, + "hf_musr": 0.39911458333333333, + "hf_avg": 13.608595258365547 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", + "name": "Rombos-LLM-V2.6-Nemotron-70b", + "params_b": 70.554, + "ifeval": 0.7526551771521784, + "bbh": 0.6937699482580332, + "gpqa": 0.40604026845637586, + "mmlu_pro": 0.5329122340425532, + "hf_math_lvl5": 0.3330815709969788, + "hf_musr": 0.46686458333333336, + "hf_avg": 41.94623049260674 + }, + { + "hf_id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", + "name": "Rombos-LLM-V2.6-Qwen-14b", + "params_b": 14.77, + "ifeval": 0.8431550508207113, + "bbh": 0.6442096596344892, + "gpqa": 0.3338926174496644, + "mmlu_pro": 0.49609375, + "hf_math_lvl5": 0.5211480362537765, + "hf_musr": 0.4220625, + "hf_avg": 42.19934519573359 + }, + { + "hf_id": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", + "name": "rombos_Replete-Coder-Instruct-8b-Merged", + "params_b": 8.03, + "ifeval": 0.5387571643239937, + "bbh": 0.4461693860075828, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.18085106382978725, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.36603125, + "hf_avg": 16.433823987631932 + }, + { + "hf_id": "rombodawg/rombos_Replete-Coder-Llama3-8B", + "name": "rombos_Replete-Coder-Llama3-8B", + "params_b": 8.03, + "ifeval": 0.4714125187834945, + "bbh": 0.32762771025266835, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.13347739361702127, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.39663541666666663, + "hf_avg": 11.971032787867797 + }, + { + "hf_id": "rootxhacker/Apollo-70B", + "name": "Apollo-70B", + "params_b": 70.554, + "ifeval": 0.5098560707810831, + "bbh": 0.6804215148524603, + "gpqa": 0.45721476510067116, + "mmlu_pro": 0.5279255319148937, + "hf_math_lvl5": 0.5611782477341389, + "hf_musr": 0.4947708333333333, + "hf_avg": 43.15901797697317 + }, + { + "hf_id": "rootxhacker/Apollo_v2-32B", + "name": "Apollo_v2-32B", + "params_b": 32.764, + "ifeval": 0.4280486885907171, + "bbh": 0.7072274795963693, + "gpqa": 0.3783557046979866, + "mmlu_pro": 0.5869348404255319, + "hf_math_lvl5": 0.42749244712990936, + "hf_musr": 0.4993854166666667, + "hf_avg": 39.81170120905305 + }, + { + "hf_id": "rootxhacker/apollo-7B", + "name": "apollo-7B", + "params_b": 7.616, + "ifeval": 0.29533304964161755, + "bbh": 0.3636262699883149, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.17478390957446807, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.41312499999999996, + "hf_avg": 10.721175786000847 + }, + { + "hf_id": "rubenroy/Geneva-12B-GCv2-5m", + "name": "Geneva-12B-GCv2-5m", + "params_b": 12.248, + "ifeval": 0.2586381911106974, + "bbh": 0.5278373390214104, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.3249667553191489, + "hf_math_lvl5": 0.08006042296072508, + "hf_musr": 0.3524791666666667, + "hf_avg": 16.956630719456832 + }, + { + "hf_id": "rubenroy/Gilgamesh-72B", + "name": "Gilgamesh-72B", + "params_b": 72.706, + "ifeval": 0.8486006019583594, + "bbh": 0.7253327589560739, + "gpqa": 0.39429530201342283, + "mmlu_pro": 0.5802027925531915, + "hf_math_lvl5": 0.4380664652567976, + "hf_musr": 0.46264583333333337, + "hf_avg": 46.793671661266096 + }, + { + "hf_id": "rubenroy/Zurich-14B-GCv2-5m", + "name": "Zurich-14B-GCv2-5m", + "params_b": 14.77, + "ifeval": 0.6163679038285084, + "bbh": 0.6308359017750411, + "gpqa": 0.3615771812080537, + "mmlu_pro": 0.5232712765957447, + "hf_math_lvl5": 0.3074018126888218, + "hf_musr": 0.4874479166666667, + "hf_avg": 37.06368897687889 + }, + { + "hf_id": "ruizhe1217/sft-s1-qwen-0.5b", + "name": "sft-s1-qwen-0.5b", + "params_b": 0.494, + "ifeval": 0.27487510915482033, + "bbh": 0.33005365550588683, + "gpqa": 0.27097315436241615, + "mmlu_pro": 0.1891622340425532, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.31958333333333333, + "hf_avg": 9.240285567836873 + }, + { + "hf_id": "sabersaleh/Llama2-7B-CPO", + "name": "Llama2-7B-CPO", + "params_b": 7, + "ifeval": 0.1545488193548673, + "bbh": 0.3457919655499851, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.1605718085106383, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.40482291666666664, + "hf_avg": 7.303190047047342 + }, + { + "hf_id": "sabersaleh/Llama2-7B-DPO", + "name": "Llama2-7B-DPO", + "params_b": 7, + "ifeval": 0.14533105493424114, + "bbh": 0.3512218731420535, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.16256648936170212, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.4113645833333333, + "hf_avg": 7.558004575587002 + }, + { + "hf_id": "sabersaleh/Llama2-7B-IPO", + "name": "Llama2-7B-IPO", + "params_b": 7, + "ifeval": 0.17685518867715438, + "bbh": 0.3474552716912811, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.16173537234042554, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.4047604166666667, + "hf_avg": 7.804715247914783 + }, + { + "hf_id": "sabersaleh/Llama2-7B-KTO", + "name": "Llama2-7B-KTO", + "params_b": 7, + "ifeval": 0.15284999357260956, + "bbh": 0.35007577568366255, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.1636469414893617, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.41669791666666667, + "hf_avg": 7.882716013659315 + }, + { + "hf_id": "sabersaleh/Llama2-7B-SPO", + "name": "Llama2-7B-SPO", + "params_b": 7, + "ifeval": 0.15667207453999832, + "bbh": 0.33834029554844597, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.17569813829787234, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.3874270833333333, + "hf_avg": 7.352632078107196 + }, + { + "hf_id": "sabersaleh/Llama2-7B-SimPO", + "name": "Llama2-7B-SimPO", + "params_b": 7, + "ifeval": 0.1658643510330368, + "bbh": 0.34891553101294254, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.16414561170212766, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.40069791666666665, + "hf_avg": 7.610783251808889 + }, + { + "hf_id": "sabersaleh/Llama3", + "name": "Llama3", + "params_b": 8.03, + "ifeval": 0.3320777758569484, + "bbh": 0.47821899796340944, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.316156914893617, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.39334375000000005, + "hf_avg": 17.458608830485915 + }, + { + "hf_id": "sabersalehk/Llama3-001-300", + "name": "Llama3-001-300", + "params_b": 8.03, + "ifeval": 0.3178643776291351, + "bbh": 0.47445771982516544, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.3158244680851064, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.40639583333333335, + "hf_avg": 17.12107684025811 + }, + { + "hf_id": "sabersalehk/Llama3-SimPO", + "name": "Llama3-SimPO", + "params_b": 8.03, + "ifeval": 0.36420142998355476, + "bbh": 0.48735382942408356, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3156582446808511, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.40459375000000003, + "hf_avg": 18.716097714176378 + }, + { + "hf_id": "sabersalehk/Llama3_001_200", + "name": "Llama3_001_200", + "params_b": 8.03, + "ifeval": 0.321836061649756, + "bbh": 0.4727921518419169, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.31831781914893614, + "hf_math_lvl5": 0.0513595166163142, + "hf_musr": 0.4037291666666667, + "hf_avg": 17.287940552816096 + }, + { + "hf_id": "sabersalehk/Llama3_01_300", + "name": "Llama3_01_300", + "params_b": 8.03, + "ifeval": 0.2958827023408999, + "bbh": 0.4691387139601247, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.31241688829787234, + "hf_math_lvl5": 0.04984894259818731, + "hf_musr": 0.40648958333333335, + "hf_avg": 16.69882386756007 + }, + { + "hf_id": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", + "name": "Fimbulvetr-Kuro-Lotus-10.7B", + "params_b": 10.732, + "ifeval": 0.49394384677101205, + "bbh": 0.4342316286386943, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.33892952127659576, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.4445104166666667, + "hf_avg": 20.677867158266114 + }, + { + "hf_id": "saishf/Neural-SOVLish-Devil-8B-L3", + "name": "Neural-SOVLish-Devil-8B-L3", + "params_b": 8.03, + "ifeval": 0.41988036188424493, + "bbh": 0.5141802159065874, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3807347074468085, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.4109583333333333, + "hf_avg": 21.691330731173803 + }, + { + "hf_id": "saishshinde15/TethysAI_Base_Reasoning", + "name": "TethysAI_Base_Reasoning", + "params_b": 3.086, + "ifeval": 0.6368757119997164, + "bbh": 0.4518558867290183, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3236369680851064, + "hf_math_lvl5": 0.31419939577039274, + "hf_musr": 0.4074583333333333, + "hf_avg": 26.354839025207614 + }, + { + "hf_id": "saishshinde15/TethysAI_Vortex_Reasoning", + "name": "TethysAI_Vortex_Reasoning", + "params_b": 3.086, + "ifeval": 0.40211970903868405, + "bbh": 0.4693805860486275, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.3380984042553192, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.40844791666666663, + "hf_avg": 21.791497955306028 + }, + { + "hf_id": "sakaltcommunity/novablast-preview", + "name": "novablast-preview", + "params_b": 32.764, + "ifeval": 0.4530279657974175, + "bbh": 0.7042765234852668, + "gpqa": 0.38171140939597314, + "mmlu_pro": 0.5915059840425532, + "hf_math_lvl5": 0.48942598187311176, + "hf_musr": 0.5021145833333334, + "hf_avg": 41.51641804260023 + }, + { + "hf_id": "sakaltcommunity/sakaltum-7b", + "name": "sakaltum-7b", + "params_b": 7.242, + "ifeval": 0.2603868845773658, + "bbh": 0.4575213514148995, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.2769281914893617, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.3775, + "hf_avg": 13.528323522415517 + }, + { + "hf_id": "saltlux/luxia-21.4b-alignment-v1.0", + "name": "luxia-21.4b-alignment-v1.0", + "params_b": 21.421, + "ifeval": 0.36929679915956326, + "bbh": 0.6373342606775594, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.34034242021276595, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.43284374999999997, + "hf_avg": 23.454573936782676 + }, + { + "hf_id": "saltlux/luxia-21.4b-alignment-v1.2", + "name": "luxia-21.4b-alignment-v1.2", + "params_b": 21.421, + "ifeval": 0.41153694419695297, + "bbh": 0.6371180708112368, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.34732380319148937, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.4458958333333334, + "hf_avg": 24.58071047593133 + }, + { + "hf_id": "sam-paech/Darkest-muse-v1", + "name": "Darkest-muse-v1", + "params_b": 10.159, + "ifeval": 0.7344202272193336, + "bbh": 0.5968439530708949, + "gpqa": 0.34395973154362414, + "mmlu_pro": 0.4183843085106383, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.4502083333333333, + "hf_avg": 33.447324199858144 + }, + { + "hf_id": "sam-paech/Delirium-v1", + "name": "Delirium-v1", + "params_b": 9.242, + "ifeval": 0.7207564816908026, + "bbh": 0.5962113834521733, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.4189660904255319, + "hf_math_lvl5": 0.2107250755287009, + "hf_musr": 0.45144791666666667, + "hf_avg": 33.09183474861921 + }, + { + "hf_id": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", + "name": "OpenHathi-7B-Hi-v0.1-Base", + "params_b": 6.87, + "ifeval": 0.18040244329490196, + "bbh": 0.33540458231510667, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.15433843085106383, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.36584375, + "hf_avg": 6.3386943375795655 + }, + { + "hf_id": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", + "name": "Mistral-7B-Instruct-sa-v0.1", + "params_b": 14.483, + "ifeval": 0.4335186194851882, + "bbh": 0.32727821561411724, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.2362034574468085, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.38999999999999996, + "hf_avg": 12.263004871086814 + }, + { + "hf_id": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", + "name": "Phi-3-mini-4k-instruct-sa-v0.1", + "params_b": 7.642, + "ifeval": 0.5020623057930734, + "bbh": 0.5502038722383045, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.39852061170212766, + "hf_math_lvl5": 0.14803625377643503, + "hf_musr": 0.40730208333333334, + "hf_avg": 25.82414451642259 + }, + { + "hf_id": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", + "name": "deepseek-llm-7b-chat-sa-v0.1", + "params_b": 7, + "ifeval": 0.4035935761557113, + "bbh": 0.37177200995276305, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.22091090425531915, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.4173125, + "hf_avg": 13.208049800984782 + }, + { + "hf_id": "senseable/WestLake-7B-v2", + "name": "WestLake-7B-v2", + "params_b": 7.242, + "ifeval": 0.4418620371724801, + "bbh": 0.4073276290688943, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.27642952127659576, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.39371874999999995, + "hf_avg": 16.257065193895503 + }, + { + "hf_id": "sequelbox/Llama3.1-70B-PlumChat", + "name": "Llama3.1-70B-PlumChat", + "params_b": 70.554, + "ifeval": 0.5616131863455631, + "bbh": 0.6752815345736151, + "gpqa": 0.39093959731543626, + "mmlu_pro": 0.516373005319149, + "hf_math_lvl5": 0.3028700906344411, + "hf_musr": 0.47737500000000005, + "hf_avg": 37.409205844366646 + }, + { + "hf_id": "sequelbox/Llama3.1-8B-MOTH", + "name": "Llama3.1-8B-MOTH", + "params_b": 8.03, + "ifeval": 0.5244938984117696, + "bbh": 0.490246673015408, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.3338597074468085, + "hf_math_lvl5": 0.1216012084592145, + "hf_musr": 0.3689166666666666, + "hf_avg": 20.83650360825358 + }, + { + "hf_id": "sequelbox/Llama3.1-8B-PlumChat", + "name": "Llama3.1-8B-PlumChat", + "params_b": 8.03, + "ifeval": 0.42427647530773904, + "bbh": 0.3873291395699702, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.21268284574468085, + "hf_math_lvl5": 0.03625377643504532, + "hf_musr": 0.3754583333333333, + "hf_avg": 13.214730095703656 + }, + { + "hf_id": "sequelbox/Llama3.1-8B-PlumCode", + "name": "Llama3.1-8B-PlumCode", + "params_b": 8.03, + "ifeval": 0.20448299401144518, + "bbh": 0.3368086861425416, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.23354388297872342, + "hf_math_lvl5": 0.027190332326283987, + "hf_musr": 0.37734375000000003, + "hf_avg": 9.823999899420981 + }, + { + "hf_id": "sequelbox/Llama3.1-8B-PlumMath", + "name": "Llama3.1-8B-PlumMath", + "params_b": 8.03, + "ifeval": 0.224241678745728, + "bbh": 0.40323023090048143, + "gpqa": 0.3179530201342282, + "mmlu_pro": 0.29753989361702127, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.39185416666666667, + "hf_avg": 13.936685074512214 + }, + { + "hf_id": "sequelbox/gemma-2-9B-MOTH", + "name": "gemma-2-9B-MOTH", + "params_b": 9.242, + "ifeval": 0.20588150551647405, + "bbh": 0.30797000521562534, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11402925531914894, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.3409479166666667, + "hf_avg": 4.729557867247653 + }, + { + "hf_id": "sethuiyer/Llamaverse-3.1-8B-Instruct", + "name": "Llamaverse-3.1-8B-Instruct", + "params_b": 8.03, + "ifeval": 0.6185410266980501, + "bbh": 0.5414159562743479, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.3523105053191489, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.3761666666666667, + "hf_avg": 26.19209991300966 + }, + { + "hf_id": "sethuiyer/Qwen2.5-7B-Anvita", + "name": "Qwen2.5-7B-Anvita", + "params_b": 7.616, + "ifeval": 0.6480416406246536, + "bbh": 0.5465860266784314, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.4165558510638298, + "hf_math_lvl5": 0.20166163141993956, + "hf_musr": 0.43365625, + "hf_avg": 29.898362120988242 + }, + { + "hf_id": "shadowml/BeagSake-7B", + "name": "BeagSake-7B", + "params_b": 7.242, + "ifeval": 0.5215960318621258, + "bbh": 0.47110342371098474, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.25847739361702127, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.41235416666666663, + "hf_avg": 19.000757229169064 + }, + { + "hf_id": "shadowml/Mixolar-4x7b", + "name": "Mixolar-4x7b", + "params_b": 36.099, + "ifeval": 0.3893303102434873, + "bbh": 0.5215949876221495, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.33053523936170215, + "hf_math_lvl5": 0.0581570996978852, + "hf_musr": 0.42575, + "hf_avg": 20.252696525459793 + }, + { + "hf_id": "shivam9980/NEPALI-LLM", + "name": "NEPALI-LLM", + "params_b": 10.273, + "ifeval": 0.041666112581284324, + "bbh": 0.3828457133787513, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.2064494680851064, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.41219791666666666, + "hf_avg": 6.93055339969496 + }, + { + "hf_id": "shivam9980/mistral-7b-news-cnn-merged", + "name": "mistral-7b-news-cnn-merged", + "params_b": 7.723, + "ifeval": 0.4634192830578421, + "bbh": 0.3635484854246454, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.28274601063829785, + "hf_math_lvl5": 0.0188821752265861, + "hf_musr": 0.45226041666666666, + "hf_avg": 17.196276123590845 + }, + { + "hf_id": "shuttleai/shuttle-3", + "name": "shuttle-3", + "params_b": 72.706, + "ifeval": 0.815403130360776, + "bbh": 0.7420334281529087, + "gpqa": 0.41191275167785235, + "mmlu_pro": 0.5716422872340425, + "hf_math_lvl5": 0.45996978851963743, + "hf_musr": 0.4376875, + "hf_avg": 46.70460730741495 + }, + { + "hf_id": "shyamieee/Padma-v7.0", + "name": "Padma-v7.0", + "params_b": 7.242, + "ifeval": 0.3841097177710696, + "bbh": 0.5118785631761485, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.3029421542553192, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.43855208333333334, + "hf_avg": 19.75621841010717 + }, + { + "hf_id": "silma-ai/SILMA-9B-Instruct-v1.0", + "name": "SILMA-9B-Instruct-v1.0", + "params_b": 9.242, + "ifeval": 0.5841943820174914, + "bbh": 0.5219015032853501, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.39195478723404253, + "hf_math_lvl5": 0.1163141993957704, + "hf_musr": 0.46369791666666665, + "hf_avg": 26.308011915634108 + }, + { + "hf_id": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", + "name": "SILMA-Kashif-2B-Instruct-v1.0", + "params_b": 2.614, + "ifeval": 0.11807781131841291, + "bbh": 0.37932201246317715, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.22581449468085107, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.4042604166666666, + "hf_avg": 8.452456221236272 + }, + { + "hf_id": "siqi00/Mistral-7B-DFT", + "name": "Mistral-7B-DFT", + "params_b": 7.242, + "ifeval": 0.5568668909604294, + "bbh": 0.46648773367771273, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.2962932180851064, + "hf_math_lvl5": 0.0377643504531722, + "hf_musr": 0.41911458333333335, + "hf_avg": 20.75522180931725 + }, + { + "hf_id": "siqi00/Mistral-7B-DFT2", + "name": "Mistral-7B-DFT2", + "params_b": 7.242, + "ifeval": 0.5803723010501026, + "bbh": 0.39683798240076246, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.28523936170212766, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.44007291666666665, + "hf_avg": 19.8755968493133 + }, + { + "hf_id": "skumar9/Llama-medx_v2", + "name": "Llama-medx_v2", + "params_b": 8.03, + "ifeval": 0.4462337708391512, + "bbh": 0.4908589512175783, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.34632646276595747, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.36612500000000003, + "hf_avg": 19.88386236667153 + }, + { + "hf_id": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", + "name": "Llama2-7b-sft-chat-custom-template-dpo", + "params_b": 6.738, + "ifeval": 0.2352823840742563, + "bbh": 0.36884662302661564, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.19464760638297873, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.44286458333333334, + "hf_avg": 10.140548181946363 + }, + { + "hf_id": "sometimesanotion/Lamarck-14B-v0.3", + "name": "Lamarck-14B-v0.3", + "params_b": 14.766, + "ifeval": 0.5031616111916382, + "bbh": 0.6611400465373158, + "gpqa": 0.3884228187919463, + "mmlu_pro": 0.5410571808510638, + "hf_math_lvl5": 0.3406344410876133, + "hf_musr": 0.4688125, + "hf_avg": 36.853034263826295 + }, + { + "hf_id": "sometimesanotion/Lamarck-14B-v0.6", + "name": "Lamarck-14B-v0.6", + "params_b": 14.766, + "ifeval": 0.6972510716011294, + "bbh": 0.6460312233782931, + "gpqa": 0.38926174496644295, + "mmlu_pro": 0.5399767287234043, + "hf_math_lvl5": 0.4040785498489426, + "hf_musr": 0.4846875, + "hf_avg": 41.16744391023699 + }, + { + "hf_id": "sometimesanotion/Lamarck-14B-v0.7-Fusion", + "name": "Lamarck-14B-v0.7-Fusion", + "params_b": 14.766, + "ifeval": 0.6821134589555713, + "bbh": 0.6543636625652262, + "gpqa": 0.401006711409396, + "mmlu_pro": 0.5390625, + "hf_math_lvl5": 0.4040785498489426, + "hf_musr": 0.49913541666666666, + "hf_avg": 41.68165196521005 + }, + { + "hf_id": "sometimesanotion/Lamarck-14B-v0.7-rc4", + "name": "Lamarck-14B-v0.7-rc4", + "params_b": 14.766, + "ifeval": 0.7210811757248545, + "bbh": 0.6509652911243554, + "gpqa": 0.38926174496644295, + "mmlu_pro": 0.5399767287234043, + "hf_math_lvl5": 0.4025679758308157, + "hf_musr": 0.4911979166666667, + "hf_avg": 41.79013452973212 + }, + { + "hf_id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", + "name": "Qwen2.5-14B-Vimarckoso-v3", + "params_b": 14, + "ifeval": 0.7256523801291683, + "bbh": 0.641460062329604, + "gpqa": 0.3800335570469799, + "mmlu_pro": 0.5343251329787234, + "hf_math_lvl5": 0.4003021148036254, + "hf_musr": 0.4806875, + "hf_avg": 41.026521660758924 + }, + { + "hf_id": "sometimesanotion/Qwenvergence-14B-v11", + "name": "Qwenvergence-14B-v11", + "params_b": 14.766, + "ifeval": 0.7192327468893647, + "bbh": 0.6367548394062034, + "gpqa": 0.3724832214765101, + "mmlu_pro": 0.5327460106382979, + "hf_math_lvl5": 0.4645015105740181, + "hf_musr": 0.4754479166666667, + "hf_avg": 41.51678135973019 + }, + { + "hf_id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", + "name": "Qwenvergence-14B-v13-Prose-DS", + "params_b": 14.766, + "ifeval": 0.717808747456748, + "bbh": 0.6405077084802886, + "gpqa": 0.38338926174496646, + "mmlu_pro": 0.534906914893617, + "hf_math_lvl5": 0.3859516616314199, + "hf_musr": 0.49265625, + "hf_avg": 41.078775784940596 + }, + { + "hf_id": "sometimesanotion/Qwenvergence-14B-v3-Prose", + "name": "Qwenvergence-14B-v3-Prose", + "params_b": 14.766, + "ifeval": 0.49177072390147036, + "bbh": 0.6512913170949324, + "gpqa": 0.3951342281879195, + "mmlu_pro": 0.5369847074468085, + "hf_math_lvl5": 0.3648036253776435, + "hf_musr": 0.49389583333333337, + "hf_avg": 37.52186681948957 + }, + { + "hf_id": "sometimesanotion/Qwenvergence-14B-v6-Prose", + "name": "Qwenvergence-14B-v6-Prose", + "params_b": 14, + "ifeval": 0.5990073006289978, + "bbh": 0.6543750230807198, + "gpqa": 0.3884228187919463, + "mmlu_pro": 0.5370678191489362, + "hf_math_lvl5": 0.3564954682779456, + "hf_musr": 0.48865625, + "hf_avg": 38.950847394921304 + }, + { + "hf_id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", + "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", + "params_b": 7.723, + "ifeval": 0.28933784580468713, + "bbh": 0.38041816886828617, + "gpqa": 0.24664429530201343, + "mmlu_pro": 0.14012632978723405, + "hf_math_lvl5": 0.011329305135951661, + "hf_musr": 0.3860625, + "hf_avg": 8.889814609614424 + }, + { + "hf_id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", + "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", + "params_b": 7.723, + "ifeval": 0.3199377651298555, + "bbh": 0.39586243698929185, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.21243351063829788, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.4271770833333333, + "hf_avg": 12.932104434694521 + }, + { + "hf_id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", + "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", + "params_b": 7.723, + "ifeval": 0.37644117607946914, + "bbh": 0.3828367247244511, + "gpqa": 0.2651006711409396, + "mmlu_pro": 0.20553523936170212, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.4404166666666667, + "hf_avg": 13.424509088727826 + }, + { + "hf_id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", + "name": "zephyr-sft-bnb-4bit-DPO-mtbc-213steps", + "params_b": 7.242, + "ifeval": 0.4275489035758454, + "bbh": 0.4197290890050172, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.27086103723404253, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.40863541666666664, + "hf_avg": 15.853792634957259 + }, + { + "hf_id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", + "name": "zephyr-sft-bnb-4bit-DPO-mtbo-180steps", + "params_b": 7.242, + "ifeval": 0.40871443325930756, + "bbh": 0.4322585223071556, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.27476728723404253, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.38851041666666664, + "hf_avg": 15.602364526823635 + }, + { + "hf_id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", + "name": "zephyr-sft-bnb-4bit-DPO-mtbr-180steps", + "params_b": 7.242, + "ifeval": 0.4032190144372487, + "bbh": 0.43053552565190517, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.2711103723404255, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.42575, + "hf_avg": 16.475407146329456 + }, + { + "hf_id": "sophosympatheia/Midnight-Miqu-70B-v1.5", + "name": "Midnight-Miqu-70B-v1.5", + "params_b": 68.977, + "ifeval": 0.6118465671086051, + "bbh": 0.5606228371685053, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.38248005319148937, + "hf_math_lvl5": 0.0702416918429003, + "hf_musr": 0.42441666666666666, + "hf_avg": 25.99019477918401 + }, + { + "hf_id": "speakleash/Bielik-11B-v2", + "name": "Bielik-11B-v2", + "params_b": 11.169, + "ifeval": 0.23810489501190177, + "bbh": 0.49308409091594996, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.3137466755319149, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.39244791666666673, + "hf_avg": 15.989069166924798 + }, + { + "hf_id": "speakleash/Bielik-11B-v2.0-Instruct", + "name": "Bielik-11B-v2.0-Instruct", + "params_b": 11.169, + "ifeval": 0.5252430218486948, + "bbh": 0.5361579931173499, + "gpqa": 0.31711409395973156, + "mmlu_pro": 0.3351063829787234, + "hf_math_lvl5": 0.11858006042296072, + "hf_musr": 0.4467083333333333, + "hf_avg": 24.661167243528862 + }, + { + "hf_id": "speakleash/Bielik-11B-v2.1-Instruct", + "name": "Bielik-11B-v2.1-Instruct", + "params_b": 11.169, + "ifeval": 0.5089817240477489, + "bbh": 0.5530119844151298, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.34466422872340424, + "hf_math_lvl5": 0.26661631419939574, + "hf_musr": 0.4185208333333333, + "hf_avg": 27.19716415968224 + }, + { + "hf_id": "speakleash/Bielik-11B-v2.2-Instruct", + "name": "Bielik-11B-v2.2-Instruct", + "params_b": 11.169, + "ifeval": 0.5551935531057595, + "bbh": 0.5596561190863629, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.3486535904255319, + "hf_math_lvl5": 0.2681268882175227, + "hf_musr": 0.41712499999999997, + "hf_avg": 27.9792775947469 + }, + { + "hf_id": "speakleash/Bielik-11B-v2.3-Instruct", + "name": "Bielik-11B-v2.3-Instruct", + "params_b": 11.169, + "ifeval": 0.558290890393046, + "bbh": 0.5662699020280031, + "gpqa": 0.34060402684563756, + "mmlu_pro": 0.34441489361702127, + "hf_math_lvl5": 0.2084592145015106, + "hf_musr": 0.4518229166666667, + "hf_avg": 28.331123935725582 + }, + { + "hf_id": "spmurrayzzz/Mistral-Syndicate-7B", + "name": "Mistral-Syndicate-7B", + "params_b": 7.242, + "ifeval": 0.249595517670891, + "bbh": 0.42450570755678535, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.2631316489361702, + "hf_math_lvl5": 0.033987915407854986, + "hf_musr": 0.43855208333333334, + "hf_avg": 14.012817664482597 + }, + { + "hf_id": "spow12/ChatWaifu_12B_v2.0", + "name": "ChatWaifu_12B_v2.0", + "params_b": 12.248, + "ifeval": 0.47675833455232114, + "bbh": 0.5207681738205238, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.33876329787234044, + "hf_math_lvl5": 0.07099697885196375, + "hf_musr": 0.44317708333333333, + "hf_avg": 21.979985555353448 + }, + { + "hf_id": "spow12/ChatWaifu_22B_v2.0_preview", + "name": "ChatWaifu_22B_v2.0_preview", + "params_b": 22.247, + "ifeval": 0.6744947849483814, + "bbh": 0.6170153091362338, + "gpqa": 0.31543624161073824, + "mmlu_pro": 0.39876994680851063, + "hf_math_lvl5": 0.18882175226586104, + "hf_musr": 0.3685416666666667, + "hf_avg": 29.545969322343282 + }, + { + "hf_id": "spow12/ChatWaifu_v1.4", + "name": "ChatWaifu_v1.4", + "params_b": 12.248, + "ifeval": 0.5690567693719332, + "bbh": 0.5176247229970669, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.3474900265957447, + "hf_math_lvl5": 0.10574018126888217, + "hf_musr": 0.47433333333333333, + "hf_avg": 25.706734115105316 + }, + { + "hf_id": "spow12/ChatWaifu_v2.0_22B", + "name": "ChatWaifu_v2.0_22B", + "params_b": 22.247, + "ifeval": 0.6510891102275296, + "bbh": 0.592630190761292, + "gpqa": 0.32466442953020136, + "mmlu_pro": 0.3835605053191489, + "hf_math_lvl5": 0.18580060422960726, + "hf_musr": 0.3841979166666667, + "hf_avg": 28.838097623831434 + }, + { + "hf_id": "spow12/ChatWaifu_v2.0_22B", + "name": "ChatWaifu_v2.0_22B", + "params_b": 22.247, + "ifeval": 0.6517384982956334, + "bbh": 0.5908050619550995, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.3812333776595745, + "hf_math_lvl5": 0.20317220543806647, + "hf_musr": 0.3841979166666667, + "hf_avg": 29.03230470609974 + }, + { + "hf_id": "ssmits/Qwen2.5-95B-Instruct", + "name": "Qwen2.5-95B-Instruct", + "params_b": 94.648, + "ifeval": 0.8431051831363006, + "bbh": 0.7037799697488242, + "gpqa": 0.3640939597315436, + "mmlu_pro": 0.5216921542553191, + "hf_math_lvl5": 0.5302114803625377, + "hf_musr": 0.4283854166666667, + "hf_avg": 45.257345532181205 + }, + { + "hf_id": "stabilityai/StableBeluga2", + "name": "StableBeluga2", + "params_b": 68.977, + "ifeval": 0.37871403431783224, + "bbh": 0.5824128134553807, + "gpqa": 0.3162751677852349, + "mmlu_pro": 0.3326130319148936, + "hf_math_lvl5": 0.04380664652567976, + "hf_musr": 0.47296875, + "hf_avg": 22.808722961321305 + }, + { + "hf_id": "stabilityai/stablelm-2-12b", + "name": "stablelm-2-12b", + "params_b": 12.143, + "ifeval": 0.1569214129620518, + "bbh": 0.4508654171114765, + "gpqa": 0.2785234899328859, + "mmlu_pro": 0.3071808510638298, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.44788541666666665, + "hf_avg": 13.998663061157224 + }, + { + "hf_id": "stabilityai/stablelm-2-12b-chat", + "name": "stablelm-2-12b-chat", + "params_b": 12.143, + "ifeval": 0.4081647805600252, + "bbh": 0.4672024731282805, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.2734375, + "hf_math_lvl5": 0.05362537764350453, + "hf_musr": 0.3914270833333333, + "hf_avg": 16.778178021081665 + }, + { + "hf_id": "stabilityai/stablelm-2-1_6b", + "name": "stablelm-2-1_6b", + "params_b": 1.645, + "ifeval": 0.11570521771122844, + "bbh": 0.338457720511071, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.1463597074468085, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.38819791666666664, + "hf_avg": 5.316831473392678 + }, + { + "hf_id": "stabilityai/stablelm-2-1_6b-chat", + "name": "stablelm-2-1_6b-chat", + "params_b": 1.645, + "ifeval": 0.30599919325168334, + "bbh": 0.3390172395486522, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.16215093085106383, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.35796875, + "hf_avg": 8.867360692101089 + }, + { + "hf_id": "stabilityai/stablelm-2-zephyr-1_6b", + "name": "stablelm-2-zephyr-1_6b", + "params_b": 1.645, + "ifeval": 0.32793100085550786, + "bbh": 0.3351608706280727, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.17137632978723405, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.3511458333333333, + "hf_avg": 9.458167591621253 + }, + { + "hf_id": "stabilityai/stablelm-3b-4e1t", + "name": "stablelm-3b-4e1t", + "params_b": 2.795, + "ifeval": 0.22031986240951784, + "bbh": 0.3504211415826912, + "gpqa": 0.23741610738255034, + "mmlu_pro": 0.1668882978723404, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.37778124999999996, + "hf_avg": 7.3261912916856 + }, + { + "hf_id": "stabilityai/stablelm-zephyr-3b", + "name": "stablelm-zephyr-3b", + "params_b": 2.795, + "ifeval": 0.36832271705740766, + "bbh": 0.3866361442837871, + "gpqa": 0.23909395973154363, + "mmlu_pro": 0.17677859042553193, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.4183020833333333, + "hf_avg": 12.369206962303688 + }, + { + "hf_id": "sthenno/tempesthenno-fusion-0309", + "name": "tempesthenno-fusion-0309", + "params_b": 14.766, + "ifeval": 0.7691913013027656, + "bbh": 0.6580880569586895, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.5258477393617021, + "hf_math_lvl5": 0.47658610271903323, + "hf_musr": 0.4325104166666667, + "hf_avg": 42.138889973963565 + }, + { + "hf_id": "sthenno/tempesthenno-kto-0205-ckpt80", + "name": "tempesthenno-kto-0205-ckpt80", + "params_b": 14.766, + "ifeval": 0.8054362425032248, + "bbh": 0.654273895095419, + "gpqa": 0.34815436241610737, + "mmlu_pro": 0.5285904255319149, + "hf_math_lvl5": 0.459214501510574, + "hf_musr": 0.4247604166666667, + "hf_avg": 41.7909439445551 + }, + { + "hf_id": "sthenno/tempesthenno-nuslerp-001", + "name": "tempesthenno-nuslerp-001", + "params_b": 14.766, + "ifeval": 0.7926468437080281, + "bbh": 0.6577675676172494, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.5256815159574468, + "hf_math_lvl5": 0.47583081570996977, + "hf_musr": 0.43, + "hf_avg": 42.58615155936238 + }, + { + "hf_id": "sthenno/tempesthenno-nuslerp-0124", + "name": "tempesthenno-nuslerp-0124", + "params_b": 14.766, + "ifeval": 0.7003982765728267, + "bbh": 0.6468547741903091, + "gpqa": 0.3901006711409396, + "mmlu_pro": 0.5352393617021277, + "hf_math_lvl5": 0.411631419939577, + "hf_musr": 0.48592708333333334, + "hf_avg": 41.287889272020685 + }, + { + "hf_id": "sthenno/tempesthenno-ppo-ckpt40", + "name": "tempesthenno-ppo-ckpt40", + "params_b": 14.766, + "ifeval": 0.7923221496739761, + "bbh": 0.6549600322869433, + "gpqa": 0.3775167785234899, + "mmlu_pro": 0.5291722074468085, + "hf_math_lvl5": 0.4735649546827795, + "hf_musr": 0.4351770833333333, + "hf_avg": 42.73562035742862 + }, + { + "hf_id": "sthenno/tempesthenno-sft-0309-ckpt10", + "name": "tempesthenno-sft-0309-ckpt10", + "params_b": 14.766, + "ifeval": 0.7743620260907724, + "bbh": 0.6551647758995857, + "gpqa": 0.3716442953020134, + "mmlu_pro": 0.5257646276595744, + "hf_math_lvl5": 0.47205438066465255, + "hf_musr": 0.4364166666666667, + "hf_avg": 42.192396685999725 + }, + { + "hf_id": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", + "name": "tempesthenno-sft-0314-stage1-ckpt50", + "params_b": 14.766, + "ifeval": 0.7393659933421101, + "bbh": 0.6601015847983588, + "gpqa": 0.3733221476510067, + "mmlu_pro": 0.5301695478723404, + "hf_math_lvl5": 0.46827794561933533, + "hf_musr": 0.44286458333333334, + "hf_avg": 41.88689161687085 + }, + { + "hf_id": "sthenno/tempestissimo-14b-0309", + "name": "tempestissimo-14b-0309", + "params_b": 14.766, + "ifeval": 0.7548781677061308, + "bbh": 0.6587329699954757, + "gpqa": 0.36661073825503354, + "mmlu_pro": 0.528091755319149, + "hf_math_lvl5": 0.479607250755287, + "hf_musr": 0.43123958333333334, + "hf_avg": 41.88723985978792 + }, + { + "hf_id": "sthenno-com/miscii-14b-0130", + "name": "miscii-14b-0130", + "params_b": 14.766, + "ifeval": 0.6647029880716498, + "bbh": 0.6505409113818335, + "gpqa": 0.38171140939597314, + "mmlu_pro": 0.5363198138297872, + "hf_math_lvl5": 0.43202416918429004, + "hf_musr": 0.4911666666666667, + "hf_avg": 41.085925833196264 + }, + { + "hf_id": "sthenno-com/miscii-14b-0218", + "name": "miscii-14b-0218", + "params_b": 14.766, + "ifeval": 0.7655941790006073, + "bbh": 0.6558708629267258, + "gpqa": 0.38338926174496646, + "mmlu_pro": 0.5297539893617021, + "hf_math_lvl5": 0.5143504531722054, + "hf_musr": 0.4272708333333333, + "hf_avg": 42.89726019720522 + }, + { + "hf_id": "sthenno-com/miscii-14b-1028", + "name": "miscii-14b-1028", + "params_b": 14.77, + "ifeval": 0.8236711924360696, + "bbh": 0.64483340535341, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.5152925531914894, + "hf_math_lvl5": 0.5030211480362538, + "hf_musr": 0.41815625, + "hf_avg": 42.38069997703781 + }, + { + "hf_id": "sthenno-com/miscii-14b-1225", + "name": "miscii-14b-1225", + "params_b": 14.766, + "ifeval": 0.787800812954073, + "bbh": 0.6571708988407374, + "gpqa": 0.3775167785234899, + "mmlu_pro": 0.5271775265957447, + "hf_math_lvl5": 0.4516616314199396, + "hf_musr": 0.4365729166666667, + "hf_avg": 42.34951191764194 + }, + { + "hf_id": "streamerbtw1002/Nexuim-R1-7B-Instruct", + "name": "Nexuim-R1-7B-Instruct", + "params_b": 7.616, + "ifeval": 0.6934289906337407, + "bbh": 0.5175174748142363, + "gpqa": 0.25922818791946306, + "mmlu_pro": 0.413813164893617, + "hf_math_lvl5": 0.44561933534743203, + "hf_musr": 0.33555208333333336, + "hf_avg": 30.443047255620304 + }, + { + "hf_id": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", + "name": "Llama-3-8B-Instruct-MultiMoose", + "params_b": 8.03, + "ifeval": 0.23181048506850713, + "bbh": 0.2822965317600308, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.109375, + "hf_musr": 0.3485416666666667, + "hf_avg": 4.768701692265338 + }, + { + "hf_id": "suayptalha/Clarus-7B-v0.1", + "name": "Clarus-7B-v0.1", + "params_b": 7.616, + "ifeval": 0.7454110648634512, + "bbh": 0.5496611433440965, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.4387466755319149, + "hf_math_lvl5": 0.49244712990936557, + "hf_musr": 0.44295833333333334, + "hf_avg": 36.705259716178944 + }, + { + "hf_id": "suayptalha/Clarus-7B-v0.2", + "name": "Clarus-7B-v0.2", + "params_b": 7.613, + "ifeval": 0.7679423928509688, + "bbh": 0.5490057426751466, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.4399933510638298, + "hf_math_lvl5": 0.48564954682779454, + "hf_musr": 0.44165625000000003, + "hf_avg": 36.860642918566676 + }, + { + "hf_id": "suayptalha/Clarus-7B-v0.3", + "name": "Clarus-7B-v0.3", + "params_b": 7.616, + "ifeval": 0.7509064836855099, + "bbh": 0.5525985716155296, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.4384973404255319, + "hf_math_lvl5": 0.4879154078549849, + "hf_musr": 0.44022916666666667, + "hf_avg": 36.776154141235224 + }, + { + "hf_id": "suayptalha/DeepSeek-R1-Distill-Llama-3B", + "name": "DeepSeek-R1-Distill-Llama-3B", + "params_b": 3.213, + "ifeval": 0.7092658590318134, + "bbh": 0.44517853159705956, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.29778922872340424, + "hf_math_lvl5": 0.20921450151057402, + "hf_musr": 0.33958333333333335, + "hf_avg": 23.27368245692195 + }, + { + "hf_id": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", + "name": "Falcon3-Jessi-v0.4-7B-Slerp", + "params_b": 7.456, + "ifeval": 0.7676176988169169, + "bbh": 0.5590927389495824, + "gpqa": 0.31208053691275167, + "mmlu_pro": 0.406000664893617, + "hf_math_lvl5": 0.39652567975830816, + "hf_musr": 0.48121875000000003, + "hf_avg": 36.077231715414364 + }, + { + "hf_id": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", + "name": "HomerCreativeAnvita-Mix-Qw7B", + "params_b": 7.616, + "ifeval": 0.7807816593305763, + "bbh": 0.5564653181490319, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.4444813829787234, + "hf_math_lvl5": 0.3610271903323263, + "hf_musr": 0.44159375, + "hf_avg": 35.464381527434064 + }, + { + "hf_id": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", + "name": "Komodo-Llama-3.2-3B-v2-fp16", + "params_b": 3, + "ifeval": 0.6340532010620709, + "bbh": 0.43549964909074995, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.28523936170212766, + "hf_math_lvl5": 0.10649546827794562, + "hf_musr": 0.34057291666666667, + "hf_avg": 20.317372826484497 + }, + { + "hf_id": "suayptalha/Lamarckvergence-14B", + "name": "Lamarckvergence-14B", + "params_b": 14.766, + "ifeval": 0.7655941790006073, + "bbh": 0.651698573892736, + "gpqa": 0.36325503355704697, + "mmlu_pro": 0.5283410904255319, + "hf_math_lvl5": 0.5400302114803626, + "hf_musr": 0.44215625000000003, + "hf_avg": 43.320333136542786 + }, + { + "hf_id": "suayptalha/Lix-14B-v0.1", + "name": "Lix-14B-v0.1", + "params_b": 14.766, + "ifeval": 0.7813313120298586, + "bbh": 0.6607910825152539, + "gpqa": 0.3699664429530201, + "mmlu_pro": 0.5314162234042553, + "hf_math_lvl5": 0.5294561933534743, + "hf_musr": 0.43378125, + "hf_avg": 43.31763225045196 + }, + { + "hf_id": "suayptalha/Maestro-10B", + "name": "Maestro-10B", + "params_b": 10.306, + "ifeval": 0.7767601076255447, + "bbh": 0.5746090622656775, + "gpqa": 0.33305369127516776, + "mmlu_pro": 0.42179188829787234, + "hf_math_lvl5": 0.19108761329305135, + "hf_musr": 0.43972916666666667, + "hf_avg": 32.83184082460664 + }, + { + "hf_id": "suayptalha/Rombos-2.5-T.E-8.1", + "name": "Rombos-2.5-T.E-8.1", + "params_b": 7.616, + "ifeval": 0.6925047762159957, + "bbh": 0.5514641249478369, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.4445644946808511, + "hf_math_lvl5": 0.49244712990936557, + "hf_musr": 0.41663541666666665, + "hf_avg": 35.40416180893529 + }, + { + "hf_id": "sumink/Qwenftmodel", + "name": "Qwenftmodel", + "params_b": 1.544, + "ifeval": 0.17290899258412123, + "bbh": 0.38226970256668574, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.23387632978723405, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.36171875000000003, + "hf_avg": 10.104913951155993 + }, + { + "hf_id": "sumink/Qwenmplus", + "name": "Qwenmplus", + "params_b": 1.543, + "ifeval": 0.20403307668098425, + "bbh": 0.3675511408391697, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.19921875, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.38283333333333336, + "hf_avg": 9.390911516269796 + }, + { + "hf_id": "sumink/Qwensci", + "name": "Qwensci", + "params_b": 1.543, + "ifeval": 0.17398281005509825, + "bbh": 0.3281870591856875, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.12599734042553193, + "hf_math_lvl5": 0.02039274924471299, + "hf_musr": 0.3608854166666667, + "hf_avg": 5.5625396368925974 + }, + { + "hf_id": "sumink/bbhqwen", + "name": "bbhqwen", + "params_b": 3.086, + "ifeval": 0.18085236062536292, + "bbh": 0.3388245916050106, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.16165226063829788, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.43523958333333335, + "hf_avg": 7.833827413484496 + }, + { + "hf_id": "sumink/bbhqwen2", + "name": "bbhqwen2", + "params_b": 3.086, + "ifeval": 0.15329991090307052, + "bbh": 0.30663248168563745, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.1149434840425532, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.44305208333333335, + "hf_avg": 6.258601237356344 + }, + { + "hf_id": "sumink/bbhqwen3", + "name": "bbhqwen3", + "params_b": 3.086, + "ifeval": 0.1942911474886634, + "bbh": 0.2950842029929075, + "gpqa": 0.2575503355704698, + "mmlu_pro": 0.11660571808510638, + "hf_musr": 0.3796145833333333, + "hf_avg": 4.947842576914937 + }, + { + "hf_id": "sumink/bbhqwen4", + "name": "bbhqwen4", + "params_b": 3.086, + "ifeval": 0.14485675784695717, + "bbh": 0.3199395559502713, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.15093085106382978, + "hf_math_lvl5": 0.006042296072507553, + "hf_musr": 0.4028958333333333, + "hf_avg": 5.656083792655539 + }, + { + "hf_id": "sumink/bbhqwen5", + "name": "bbhqwen5", + "params_b": 3.086, + "ifeval": 0.1521507378200951, + "bbh": 0.29130964476405813, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.11311502659574468, + "hf_math_lvl5": 0.0022658610271903325, + "hf_musr": 0.4019375, + "hf_avg": 5.199436501746352 + }, + { + "hf_id": "sumink/bbhqwen6", + "name": "bbhqwen6", + "params_b": 3.086, + "ifeval": 0.18929551368147626, + "bbh": 0.2782242419852629, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.11527593085106383, + "hf_math_lvl5": 0.0007552870090634441, + "hf_musr": 0.35796875, + "hf_avg": 4.3661285339650675 + }, + { + "hf_id": "sumink/flflmillama", + "name": "flflmillama", + "params_b": 3.213, + "ifeval": 0.16756317681529453, + "bbh": 0.38511286094747693, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.20960771276595744, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.35911458333333335, + "hf_avg": 9.04335499884608 + }, + { + "hf_id": "sumink/ftgpt", + "name": "ftgpt", + "params_b": 0.124, + "ifeval": 0.0787100449030794, + "bbh": 0.29190853217047663, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.1171875, + "hf_musr": 0.41384375, + "hf_avg": 3.951784139825086 + }, + { + "hf_id": "sumink/llamaft", + "name": "llamaft", + "params_b": 3.213, + "ifeval": 0.16086871722584964, + "bbh": 0.3762775648269859, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.21143617021276595, + "hf_math_lvl5": 0.01661631419939577, + "hf_musr": 0.3498125, + "hf_avg": 8.156199769325905 + }, + { + "hf_id": "sumink/llamamerge", + "name": "llamamerge", + "params_b": 13.016, + "ifeval": 0.26718107953563214, + "bbh": 0.46316160070587903, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.2589760638297872, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.42397916666666663, + "hf_avg": 14.736806871689032 + }, + { + "hf_id": "sumink/llftfl7", + "name": "llftfl7", + "params_b": 3.213, + "ifeval": 0.17143512546709397, + "bbh": 0.37864273336631166, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.17428523936170212, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.36320833333333336, + "hf_avg": 7.811247007957387 + }, + { + "hf_id": "sumink/qwft", + "name": "qwft", + "params_b": 7.616, + "ifeval": 0.11965252197502627, + "bbh": 0.30021752093452153, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.11294880319148937, + "hf_musr": 0.3580625, + "hf_avg": 3.1061410184104887 + }, + { + "hf_id": "sumink/somerft", + "name": "somerft", + "params_b": 1.543, + "ifeval": 0.14305819669587805, + "bbh": 0.3093455213252133, + "gpqa": 0.2483221476510067, + "mmlu_pro": 0.11170212765957446, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.40447916666666667, + "hf_avg": 4.941854251868015 + }, + { + "hf_id": "sunbaby/BrainCog-8B-0.1-Instruct", + "name": "BrainCog-8B-0.1-Instruct", + "params_b": 8.03, + "ifeval": 0.4253004250943053, + "bbh": 0.46182179983247446, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.28582114361702127, + "hf_math_lvl5": 0.09667673716012085, + "hf_musr": 0.36559375, + "hf_avg": 18.380632683786093 + }, + { + "hf_id": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", + "name": "LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", + "params_b": 8.03, + "ifeval": 0.4815046299374548, + "bbh": 0.4935698792285044, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.3723404255319149, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.43873958333333335, + "hf_avg": 21.82755308278925 + }, + { + "hf_id": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", + "name": "tangled-llama-pints-1.5b-v0.1-instruct", + "params_b": 1.5, + "ifeval": 0.15090182936829835, + "bbh": 0.31434444692284963, + "gpqa": 0.23993288590604026, + "mmlu_pro": 0.11087101063829788, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.37613541666666667, + "hf_avg": 4.366498008182751 + }, + { + "hf_id": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", + "name": "tangled-llama-pints-1.5b-v0.2-instruct", + "params_b": 1.5, + "ifeval": 0.1724092075692496, + "bbh": 0.3158349391752727, + "gpqa": 0.24161073825503357, + "mmlu_pro": 0.11170212765957446, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.3642916666666667, + "hf_avg": 4.745857038769314 + }, + { + "hf_id": "tanliboy/lambda-gemma-2-9b-dpo", + "name": "lambda-gemma-2-9b-dpo", + "params_b": 9.242, + "ifeval": 0.45008023156336296, + "bbh": 0.547172399190412, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.379155585106383, + "hf_math_lvl5": 0.09441087613293052, + "hf_musr": 0.40165625, + "hf_avg": 22.91040441700333 + }, + { + "hf_id": "tanliboy/lambda-gemma-2-9b-dpo", + "name": "lambda-gemma-2-9b-dpo", + "params_b": 9.242, + "ifeval": 0.18292463995531855, + "bbh": 0.5487911206515993, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.3804853723404255, + "hf_musr": 0.40562499999999996, + "hf_avg": 16.97010860262216 + }, + { + "hf_id": "tanliboy/lambda-qwen2.5-14b-dpo-test", + "name": "lambda-qwen2.5-14b-dpo-test", + "params_b": 14.77, + "ifeval": 0.8231215397367873, + "bbh": 0.6393505282981286, + "gpqa": 0.3624161073825503, + "mmlu_pro": 0.4847905585106383, + "hf_math_lvl5": 0.5460725075528701, + "hf_musr": 0.42603125000000003, + "hf_avg": 42.617400826626636 + }, + { + "hf_id": "tanliboy/lambda-qwen2.5-32b-dpo-test", + "name": "lambda-qwen2.5-32b-dpo-test", + "params_b": 32.764, + "ifeval": 0.8083839767372794, + "bbh": 0.6763904009446838, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.565658244680851, + "hf_math_lvl5": 0.6102719033232629, + "hf_musr": 0.42742708333333335, + "hf_avg": 45.924592588188716 + }, + { + "hf_id": "tannedbum/L3-Nymeria-Maid-8B", + "name": "L3-Nymeria-Maid-8B", + "params_b": 8.03, + "ifeval": 0.7250029920610646, + "bbh": 0.5146055785516804, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.37466755319148937, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.37505208333333334, + "hf_avg": 26.043175935330492 + }, + { + "hf_id": "tannedbum/L3-Nymeria-v2-8B", + "name": "L3-Nymeria-v2-8B", + "params_b": 8.03, + "ifeval": 0.7168346653545925, + "bbh": 0.5224198261531375, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.37533244680851063, + "hf_math_lvl5": 0.09214501510574018, + "hf_musr": 0.369875, + "hf_avg": 25.709418052441055 + }, + { + "hf_id": "tannedbum/L3-Rhaenys-8B", + "name": "L3-Rhaenys-8B", + "params_b": 8.03, + "ifeval": 0.7362686560548235, + "bbh": 0.5299209893116719, + "gpqa": 0.2978187919463087, + "mmlu_pro": 0.3799035904255319, + "hf_math_lvl5": 0.08761329305135952, + "hf_musr": 0.3724791666666667, + "hf_avg": 26.454823240339877 + }, + { + "hf_id": "teknium/CollectiveCognition-v1.1-Mistral-7B", + "name": "CollectiveCognition-v1.1-Mistral-7B", + "params_b": 7, + "ifeval": 0.27904626391308396, + "bbh": 0.4493426704276236, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.28366023936170215, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.3869270833333333, + "hf_avg": 14.256397052482129 + }, + { + "hf_id": "teknium/OpenHermes-13B", + "name": "OpenHermes-13B", + "params_b": 13, + "ifeval": 0.2668065178171696, + "bbh": 0.42064384521911524, + "gpqa": 0.2726510067114094, + "mmlu_pro": 0.23894614361702127, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.4042604166666666, + "hf_avg": 12.182264325006635 + }, + { + "hf_id": "teknium/OpenHermes-2-Mistral-7B", + "name": "OpenHermes-2-Mistral-7B", + "params_b": 7, + "ifeval": 0.5286151854856226, + "bbh": 0.4947516371878204, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2931349734042553, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.45197916666666665, + "hf_avg": 21.44047612236278 + }, + { + "hf_id": "teknium/OpenHermes-2.5-Mistral-7B", + "name": "OpenHermes-2.5-Mistral-7B", + "params_b": 7.242, + "ifeval": 0.5571417173100706, + "bbh": 0.4870013259924984, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.3054355053191489, + "hf_math_lvl5": 0.05060422960725076, + "hf_musr": 0.4241979166666667, + "hf_avg": 21.317189027423062, + "lb_name": "openhermes-2.5-mistral-7b", + "lb_global": 0.26324, + "lb_reasoning": 0.26, + "lb_math": 0.20446, + "lb_language": 0.11368333333333334, + "lb_if": 0.52779, + "lb_data_analysis": 0.1738, + "arena_elo": 1175.12, + "arena_rank": 263, + "arena_votes": 5006 + }, + { + "hf_id": "teknium/OpenHermes-7B", + "name": "OpenHermes-7B", + "params_b": 7, + "ifeval": 0.1812513021006485, + "bbh": 0.362033648602934, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.19331781914893617, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.4323854166666667, + "hf_avg": 9.569248719177576 + }, + { + "hf_id": "tensopolis/falcon3-10b-tensopolis-v1", + "name": "falcon3-10b-tensopolis-v1", + "params_b": 10.306, + "ifeval": 0.7816560060639104, + "bbh": 0.618226655000786, + "gpqa": 0.3296979865771812, + "mmlu_pro": 0.4419880319148936, + "hf_math_lvl5": 0.27492447129909364, + "hf_musr": 0.43753125, + "hf_avg": 35.588967109820835 + }, + { + "hf_id": "tensopolis/falcon3-10b-tensopolis-v2", + "name": "falcon3-10b-tensopolis-v2", + "params_b": 10.306, + "ifeval": 0.7792080568447275, + "bbh": 0.618226655000786, + "gpqa": 0.3271812080536913, + "mmlu_pro": 0.4424035904255319, + "hf_math_lvl5": 0.26661631419939574, + "hf_musr": 0.4296875, + "hf_avg": 35.190439950096184 + }, + { + "hf_id": "tensopolis/lamarckvergence-14b-tensopolis-v1", + "name": "lamarckvergence-14b-tensopolis-v1", + "params_b": 14.766, + "ifeval": 0.7603735865281896, + "bbh": 0.6561154329558933, + "gpqa": 0.36073825503355705, + "mmlu_pro": 0.5250166223404256, + "hf_math_lvl5": 0.5166163141993958, + "hf_musr": 0.44745833333333335, + "hf_avg": 42.91732437905944 + }, + { + "hf_id": "tensopolis/mistral-small-2501-tensopolis-v1", + "name": "mistral-small-2501-tensopolis-v1", + "params_b": 23.572, + "ifeval": 0.7762104549262623, + "bbh": 0.6474735931872574, + "gpqa": 0.3573825503355705, + "mmlu_pro": 0.4464760638297872, + "hf_math_lvl5": 0.44410876132930516, + "hf_musr": 0.42797916666666663, + "hf_avg": 39.245150466059876 + }, + { + "hf_id": "tensopolis/mistral-small-r1-tensopolis", + "name": "mistral-small-r1-tensopolis", + "params_b": 23.572, + "ifeval": 0.462220242290456, + "bbh": 0.5435969591888976, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.4035073138297872, + "hf_math_lvl5": 0.290785498489426, + "hf_musr": 0.37375, + "hf_avg": 25.876977987240537 + }, + { + "hf_id": "tensopolis/phi-4-tensopolis-v1", + "name": "phi-4-tensopolis-v1", + "params_b": 14.66, + "ifeval": 0.6766679078179231, + "bbh": 0.6871833310149728, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.5383976063829787, + "hf_math_lvl5": 0.49395770392749244, + "hf_musr": 0.4140625, + "hf_avg": 40.45533321662008 + }, + { + "hf_id": "tensopolis/qwen2.5-14b-tensopolis-v1", + "name": "qwen2.5-14b-tensopolis-v1", + "params_b": 14.77, + "ifeval": 0.7990166092634211, + "bbh": 0.6363595324538928, + "gpqa": 0.3347315436241611, + "mmlu_pro": 0.49110704787234044, + "hf_math_lvl5": 0.5294561933534743, + "hf_musr": 0.41933333333333334, + "hf_avg": 41.14159022263494 + }, + { + "hf_id": "tensopolis/qwen2.5-7b-tensopolis-v1", + "name": "qwen2.5-7b-tensopolis-v1", + "params_b": 7.616, + "ifeval": 0.7660939640154789, + "bbh": 0.5378740884658956, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.42686170212765956, + "hf_math_lvl5": 0.4561933534743202, + "hf_musr": 0.433875, + "hf_avg": 35.49167672948769 + }, + { + "hf_id": "tensopolis/qwen2.5-7b-tensopolis-v2", + "name": "qwen2.5-7b-tensopolis-v2", + "params_b": 7.616, + "ifeval": 0.752105524452896, + "bbh": 0.5414622323974015, + "gpqa": 0.2902684563758389, + "mmlu_pro": 0.42428523936170215, + "hf_math_lvl5": 0.4818731117824773, + "hf_musr": 0.42463541666666665, + "hf_avg": 35.37814971872235 + }, + { + "hf_id": "tensopolis/virtuoso-lite-tensopolis-v1", + "name": "virtuoso-lite-tensopolis-v1", + "params_b": 10.306, + "ifeval": 0.806910109620252, + "bbh": 0.610185430846048, + "gpqa": 0.3447986577181208, + "mmlu_pro": 0.4434840425531915, + "hf_math_lvl5": 0.2545317220543807, + "hf_musr": 0.4582395833333333, + "hf_avg": 36.38947458816892 + }, + { + "hf_id": "tensopolis/virtuoso-lite-tensopolis-v2", + "name": "virtuoso-lite-tensopolis-v2", + "params_b": 10.306, + "ifeval": 0.8029384255996312, + "bbh": 0.6100187641793813, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.44398271276595747, + "hf_math_lvl5": 0.25, + "hf_musr": 0.4595416666666667, + "hf_avg": 36.256178767362854 + }, + { + "hf_id": "tensopolis/virtuoso-small-tensopolis-v1", + "name": "virtuoso-small-tensopolis-v1", + "params_b": 14.77, + "ifeval": 0.7856276900845313, + "bbh": 0.6415395136436205, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.4968417553191489, + "hf_math_lvl5": 0.3527190332326284, + "hf_musr": 0.43263541666666666, + "hf_avg": 38.413589685550576 + }, + { + "hf_id": "tensopolis/virtuoso-small-tensopolis-v2", + "name": "virtuoso-small-tensopolis-v2", + "params_b": 14.77, + "ifeval": 0.8020142111818863, + "bbh": 0.6515835977499008, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.515375664893617, + "hf_math_lvl5": 0.38746223564954685, + "hf_musr": 0.43523958333333335, + "hf_avg": 40.113839873543775 + }, + { + "hf_id": "tensopolis/virtuoso-small-v2-tensopolis-v1", + "name": "virtuoso-small-v2-tensopolis-v1", + "params_b": 14.766, + "ifeval": 0.8419061423689145, + "bbh": 0.6544753426578069, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.5175365691489362, + "hf_math_lvl5": 0.452416918429003, + "hf_musr": 0.45092708333333337, + "hf_avg": 42.6978958480128 + }, + { + "hf_id": "tensoropera/Fox-1-1.6B", + "name": "Fox-1-1.6B", + "params_b": 1.665, + "ifeval": 0.27659831469390106, + "bbh": 0.3307369914593792, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.1371343085106383, + "hf_math_lvl5": 0.017371601208459216, + "hf_musr": 0.35498958333333336, + "hf_avg": 7.764365648440015 + }, + { + "hf_id": "tenyx/Llama3-TenyxChat-70B", + "name": "Llama3-TenyxChat-70B", + "params_b": 70.554, + "ifeval": 0.8087086707713311, + "bbh": 0.6511486901811531, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.5210272606382979, + "hf_math_lvl5": 0.23564954682779457, + "hf_musr": 0.42603125000000003, + "hf_avg": 36.69601460825378 + }, + { + "hf_id": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", + "name": "Qwen2.5-Coder-7B-Instruct-20241106", + "params_b": 7.616, + "ifeval": 0.6101477413263474, + "bbh": 0.5007976986224548, + "gpqa": 0.29194630872483224, + "mmlu_pro": 0.33527260638297873, + "hf_math_lvl5": 0.38821752265861026, + "hf_musr": 0.4072708333333333, + "hf_avg": 28.330798858289842 + }, + { + "hf_id": "theprint/CleverBoi-7B-v2", + "name": "CleverBoi-7B-v2", + "params_b": 7.736, + "ifeval": 0.21699756645700075, + "bbh": 0.45317253321634526, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.27086103723404253, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.46953125, + "hf_avg": 15.095914857389625 + }, + { + "hf_id": "theprint/CleverBoi-7B-v3", + "name": "CleverBoi-7B-v3", + "params_b": 7.736, + "ifeval": 0.23823011830831084, + "bbh": 0.4414430902840938, + "gpqa": 0.26593959731543626, + "mmlu_pro": 0.28681848404255317, + "hf_math_lvl5": 0.04003021148036254, + "hf_musr": 0.4071770833333333, + "hf_avg": 13.690467425790892 + }, + { + "hf_id": "theprint/CleverBoi-Llama-3.1-8B-v2", + "name": "CleverBoi-Llama-3.1-8B-v2", + "params_b": 9.3, + "ifeval": 0.19613957632415324, + "bbh": 0.46678160110644784, + "gpqa": 0.2860738255033557, + "mmlu_pro": 0.31881648936170215, + "hf_math_lvl5": 0.052870090634441085, + "hf_musr": 0.37346875, + "hf_avg": 14.145587569893708 + }, + { + "hf_id": "theprint/CleverBoi-Nemo-12B-v2", + "name": "CleverBoi-Nemo-12B-v2", + "params_b": 13.933, + "ifeval": 0.2045827293802666, + "bbh": 0.5241085887165254, + "gpqa": 0.313758389261745, + "mmlu_pro": 0.3228058510638298, + "hf_math_lvl5": 0.10347432024169184, + "hf_musr": 0.4186770833333333, + "hf_avg": 17.858393307746045 + }, + { + "hf_id": "theprint/Code-Llama-Bagel-8B", + "name": "Code-Llama-Bagel-8B", + "params_b": 8.03, + "ifeval": 0.2529676813078188, + "bbh": 0.46974200049001086, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.28216422872340424, + "hf_math_lvl5": 0.06117824773413897, + "hf_musr": 0.3679791666666667, + "hf_avg": 14.665251333761097 + }, + { + "hf_id": "theprint/Conversely-Mistral-7B", + "name": "Conversely-Mistral-7B", + "params_b": 14.496, + "ifeval": 0.2608113139802391, + "bbh": 0.4672348146697077, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.28257978723404253, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.4188958333333333, + "hf_avg": 15.032655723513159 + }, + { + "hf_id": "theprint/Llama-3.2-3B-VanRossum", + "name": "Llama-3.2-3B-VanRossum", + "params_b": 3.696, + "ifeval": 0.4782820693537591, + "bbh": 0.42787418229776697, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.27701130319148937, + "hf_math_lvl5": 0.09743202416918428, + "hf_musr": 0.3441666666666667, + "hf_avg": 17.58480896190996 + }, + { + "hf_id": "theprint/ReWiz-7B", + "name": "ReWiz-7B", + "params_b": 7.736, + "ifeval": 0.40479261692309737, + "bbh": 0.4564215411912313, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.2670378989361702, + "hf_math_lvl5": 0.04078549848942598, + "hf_musr": 0.46115625, + "hf_avg": 17.7870406487212 + }, + { + "hf_id": "theprint/ReWiz-Llama-3.1-8B-v2", + "name": "ReWiz-Llama-3.1-8B-v2", + "params_b": 9.3, + "ifeval": 0.23790542427425895, + "bbh": 0.46324275457450953, + "gpqa": 0.3028523489932886, + "mmlu_pro": 0.3310339095744681, + "hf_math_lvl5": 0.05740181268882175, + "hf_musr": 0.381375, + "hf_avg": 15.893327785890378 + }, + { + "hf_id": "theprint/ReWiz-Llama-3.2-3B", + "name": "ReWiz-Llama-3.2-3B", + "params_b": 3.213, + "ifeval": 0.4648931501748693, + "bbh": 0.4343257577815292, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.28873005319148937, + "hf_math_lvl5": 0.1095166163141994, + "hf_musr": 0.361375, + "hf_avg": 18.186254213130717 + }, + { + "hf_id": "theprint/ReWiz-Nemo-12B-Instruct", + "name": "ReWiz-Nemo-12B-Instruct", + "params_b": 12.248, + "ifeval": 0.10623811486854878, + "bbh": 0.5092407647626753, + "gpqa": 0.3238255033557047, + "mmlu_pro": 0.33394281914893614, + "hf_math_lvl5": 0.1042296072507553, + "hf_musr": 0.4095625, + "hf_avg": 16.173142405102812 + }, + { + "hf_id": "theprint/ReWiz-Qwen-2.5-14B", + "name": "ReWiz-Qwen-2.5-14B", + "params_b": 16.743, + "ifeval": 0.27854647889821227, + "bbh": 0.6179492756426455, + "gpqa": 0.3800335570469799, + "mmlu_pro": 0.5092253989361702, + "hf_math_lvl5": 0.29229607250755285, + "hf_musr": 0.45389583333333333, + "hf_avg": 30.03173400795292 + }, + { + "hf_id": "theprint/WorldBuilder-12B", + "name": "WorldBuilder-12B", + "params_b": 13.933, + "ifeval": 0.13743755457741016, + "bbh": 0.5010100641541125, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.31923204787234044, + "hf_math_lvl5": 0.0445619335347432, + "hf_musr": 0.4066458333333334, + "hf_avg": 14.516406576626443 + }, + { + "hf_id": "theprint/phi-3-mini-4k-python", + "name": "phi-3-mini-4k-python", + "params_b": 4.132, + "ifeval": 0.24087753826513653, + "bbh": 0.493759004635898, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.35771276595744683, + "hf_math_lvl5": 0.10498489425981873, + "hf_musr": 0.3921666666666666, + "hf_avg": 17.728138144118287 + }, + { + "hf_id": "thomas-yanxin/XinYuan-Qwen2-1_5B", + "name": "XinYuan-Qwen2-1_5B", + "params_b": 1.777, + "ifeval": 0.2985556102253133, + "bbh": 0.3635491993150823, + "gpqa": 0.2701342281879195, + "mmlu_pro": 0.23570478723404256, + "hf_math_lvl5": 0.06722054380664652, + "hf_musr": 0.36339583333333336, + "hf_avg": 11.515091263493494 + }, + { + "hf_id": "thomas-yanxin/XinYuan-Qwen2-7B", + "name": "XinYuan-Qwen2-7B", + "params_b": 7.616, + "ifeval": 0.44376033369238066, + "bbh": 0.4936629157238895, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.3924534574468085, + "hf_math_lvl5": 0.14577039274924472, + "hf_musr": 0.40581249999999996, + "hf_avg": 22.431711657583364 + }, + { + "hf_id": "thomas-yanxin/XinYuan-Qwen2-7B-0917", + "name": "XinYuan-Qwen2-7B-0917", + "params_b": 7.616, + "ifeval": 0.37191983935956596, + "bbh": 0.5169215573786009, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.4245345744680851, + "hf_math_lvl5": 0.19788519637462235, + "hf_musr": 0.4401041666666667, + "hf_avg": 24.546893470710334 + }, + { + "hf_id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", + "name": "XinYuan-Qwen2.5-7B-0917", + "params_b": 7.616, + "ifeval": 0.35770644113175265, + "bbh": 0.5184106116987492, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.38821476063829785, + "hf_math_lvl5": 0.1933534743202417, + "hf_musr": 0.3675520833333333, + "hf_avg": 21.39759488657262 + }, + { + "hf_id": "tianyil1/MistralForCausalLM_Cal_DPO", + "name": "MistralForCausalLM_Cal_DPO", + "params_b": 7.242, + "ifeval": 0.5327619604870633, + "bbh": 0.43814239617517153, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.2763464095744681, + "hf_math_lvl5": 0.028700906344410877, + "hf_musr": 0.39765625, + "hf_avg": 18.088644447193257 + }, + { + "hf_id": "tiiuae/Falcon3-10B-Base", + "name": "Falcon3-10B-Base", + "params_b": 10.306, + "ifeval": 0.3647754624396601, + "bbh": 0.595004253437141, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.4240359042553192, + "hf_math_lvl5": 0.24924471299093656, + "hf_musr": 0.43979166666666664, + "hf_avg": 27.617850879493677 + }, + { + "hf_id": "tiiuae/Falcon3-10B-Instruct", + "name": "Falcon3-10B-Instruct", + "params_b": 10.306, + "ifeval": 0.7816560060639104, + "bbh": 0.6170469398052084, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.44290226063829785, + "hf_math_lvl5": 0.2764350453172205, + "hf_musr": 0.43232291666666667, + "hf_avg": 35.47541146366702 + }, + { + "hf_id": "tiiuae/Falcon3-1B-Base", + "name": "Falcon3-1B-Base", + "params_b": 1.669, + "ifeval": 0.24280132271262472, + "bbh": 0.3571153918015637, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.16082114361702127, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.41473958333333333, + "hf_avg": 9.8880961034972 + }, + { + "hf_id": "tiiuae/Falcon3-1B-Instruct", + "name": "Falcon3-1B-Instruct", + "params_b": 1.669, + "ifeval": 0.5556678501930433, + "bbh": 0.3744535691366672, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.18384308510638298, + "hf_math_lvl5": 0.0634441087613293, + "hf_musr": 0.4188958333333333, + "hf_avg": 16.164597322515025 + }, + { + "hf_id": "tiiuae/Falcon3-3B-Base", + "name": "Falcon3-3B-Base", + "params_b": 3.228, + "ifeval": 0.2764985793250797, + "bbh": 0.4421367825874385, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.2878989361702128, + "hf_math_lvl5": 0.11782477341389729, + "hf_musr": 0.3749895833333334, + "hf_avg": 15.738743193619 + }, + { + "hf_id": "tiiuae/Falcon3-3B-Instruct", + "name": "Falcon3-3B-Instruct", + "params_b": 3.228, + "ifeval": 0.6976755010040027, + "bbh": 0.4754430332167569, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.300531914893617, + "hf_math_lvl5": 0.25, + "hf_musr": 0.41359375, + "hf_avg": 26.60234489991349 + }, + { + "hf_id": "tiiuae/Falcon3-7B-Base", + "name": "Falcon3-7B-Base", + "params_b": 7.456, + "ifeval": 0.34159474638403875, + "bbh": 0.5098880466426711, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.3910405585106383, + "hf_math_lvl5": 0.19410876132930513, + "hf_musr": 0.47020833333333334, + "hf_avg": 24.745725360383613 + }, + { + "hf_id": "tiiuae/Falcon3-7B-Instruct", + "name": "Falcon3-7B-Instruct", + "params_b": 7.456, + "ifeval": 0.7612479332615238, + "bbh": 0.563244278519333, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.4087433510638298, + "hf_math_lvl5": 0.4086102719033233, + "hf_musr": 0.48267708333333337, + "hf_avg": 36.404684964282325 + }, + { + "hf_id": "tiiuae/Falcon3-Mamba-7B-Base", + "name": "Falcon3-Mamba-7B-Base", + "params_b": 7.273, + "ifeval": 0.28911288713945665, + "bbh": 0.4699280188827039, + "gpqa": 0.30956375838926176, + "mmlu_pro": 0.30377327127659576, + "hf_math_lvl5": 0.19410876132930513, + "hf_musr": 0.3431458333333333, + "hf_avg": 18.138791975781 + }, + { + "hf_id": "tiiuae/Falcon3-Mamba-7B-Instruct", + "name": "Falcon3-Mamba-7B-Instruct", + "params_b": 7.273, + "ifeval": 0.7165099713205406, + "bbh": 0.4678957688410694, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.3369348404255319, + "hf_math_lvl5": 0.30060422960725075, + "hf_musr": 0.38686458333333335, + "hf_avg": 28.109654708582763 + }, + { + "hf_id": "tiiuae/falcon-11B", + "name": "falcon-11B", + "params_b": 11.103, + "ifeval": 0.3261324397044287, + "bbh": 0.43916370355493844, + "gpqa": 0.2709731543624161, + "mmlu_pro": 0.23894614361702127, + "hf_math_lvl5": 0.027945619335347432, + "hf_musr": 0.39864583333333337, + "hf_avg": 13.851902586180215 + }, + { + "hf_id": "tiiuae/falcon-40b", + "name": "falcon-40b", + "params_b": 40, + "ifeval": 0.24964538535530173, + "bbh": 0.4018532495595801, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.25049867021276595, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.36314583333333333, + "hf_avg": 11.40130446230009 + }, + { + "hf_id": "tiiuae/falcon-40b-instruct", + "name": "falcon-40b-instruct", + "params_b": 40, + "ifeval": 0.24544874266945038, + "bbh": 0.40538675151591974, + "gpqa": 0.25, + "mmlu_pro": 0.2261469414893617, + "hf_math_lvl5": 0.019637462235649546, + "hf_musr": 0.37622916666666667, + "hf_avg": 10.484506782098748 + }, + { + "hf_id": "tiiuae/falcon-7b", + "name": "falcon-7b", + "params_b": 7, + "ifeval": 0.182051401392749, + "bbh": 0.32852446117322215, + "gpqa": 0.24496644295302014, + "mmlu_pro": 0.11253324468085106, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.37784375, + "hf_avg": 5.1734447203194796 + }, + { + "hf_id": "tiiuae/falcon-7b-instruct", + "name": "falcon-7b-instruct", + "params_b": 7, + "ifeval": 0.19688869976107837, + "bbh": 0.32034221512355765, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.1155252659574468, + "hf_math_lvl5": 0.012084592145015106, + "hf_musr": 0.3633645833333334, + "hf_avg": 5.1165739086852 + }, + { + "hf_id": "tiiuae/falcon-mamba-7b", + "name": "falcon-mamba-7b", + "params_b": 7, + "ifeval": 0.3335760227307987, + "bbh": 0.4284854988604366, + "gpqa": 0.3104026845637584, + "mmlu_pro": 0.23021941489361702, + "hf_math_lvl5": 0.0445619335347432, + "hf_musr": 0.42103124999999997, + "hf_avg": 15.179238027611218 + }, + { + "hf_id": "tinycompany/BiBo-v0.7", + "name": "BiBo-v0.7", + "params_b": 2.943, + "ifeval": 0.3738181358794665, + "bbh": 0.43108167584271034, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.2650432180851064, + "hf_math_lvl5": 0.0823262839879154, + "hf_musr": 0.40441666666666665, + "hf_avg": 15.965356863831778 + }, + { + "hf_id": "tinycompany/ShawtyIsBad-bgem3", + "name": "ShawtyIsBad-bgem3", + "params_b": 1.436, + "ifeval": 0.2608113139802391, + "bbh": 0.38529707856388956, + "gpqa": 0.3053691275167785, + "mmlu_pro": 0.25831117021276595, + "hf_math_lvl5": 0.04833836858006042, + "hf_musr": 0.36946875, + "hf_avg": 12.610397878155718 + }, + { + "hf_id": "tinycompany/ShawtyIsBad-e5-large", + "name": "ShawtyIsBad-e5-large", + "params_b": 1.436, + "ifeval": 0.24682287441765627, + "bbh": 0.3873483842947396, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.25689827127659576, + "hf_math_lvl5": 0.045317220543806644, + "hf_musr": 0.37204166666666666, + "hf_avg": 12.316339936795012 + }, + { + "hf_id": "tinycompany/ShawtyIsBad-ib", + "name": "ShawtyIsBad-ib", + "params_b": 1.436, + "ifeval": 0.2565149359255664, + "bbh": 0.3880457874839807, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.258061835106383, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.3641041666666667, + "hf_avg": 12.35455968391428 + }, + { + "hf_id": "tinycompany/ShawtyIsBad-nomic-moe", + "name": "ShawtyIsBad-nomic-moe", + "params_b": 1.436, + "ifeval": 0.2607614462958284, + "bbh": 0.3878019225656597, + "gpqa": 0.3070469798657718, + "mmlu_pro": 0.2572307180851064, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.37470833333333337, + "hf_avg": 12.741407000136505 + }, + { + "hf_id": "tinycompany/ShawtyIsBad-nomic1.5", + "name": "ShawtyIsBad-nomic1.5", + "params_b": 1.436, + "ifeval": 0.2543916807404354, + "bbh": 0.3873599493472512, + "gpqa": 0.311241610738255, + "mmlu_pro": 0.25673204787234044, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.36283333333333334, + "hf_avg": 12.504999335501656 + }, + { + "hf_id": "tinycompany/SigmaBoi-base", + "name": "SigmaBoi-base", + "params_b": 2.943, + "ifeval": 0.24469961923252526, + "bbh": 0.4314363391906919, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.2816655585106383, + "hf_math_lvl5": 0.07779456193353475, + "hf_musr": 0.43427083333333333, + "hf_avg": 15.2508076506235 + }, + { + "hf_id": "tinycompany/SigmaBoi-bge-m3", + "name": "SigmaBoi-bge-m3", + "params_b": 2.943, + "ifeval": 0.24502431326657714, + "bbh": 0.43509173985964184, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.28191489361702127, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.4383020833333333, + "hf_avg": 15.455458242923891 + }, + { + "hf_id": "tinycompany/SigmaBoi-bgem3", + "name": "SigmaBoi-bgem3", + "params_b": 2.943, + "ifeval": 0.24502431326657714, + "bbh": 0.43509173985964184, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.28191489361702127, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.4383020833333333, + "hf_avg": 15.455458242923891 + }, + { + "hf_id": "tinycompany/SigmaBoi-ib", + "name": "SigmaBoi-ib", + "params_b": 2.943, + "ifeval": 0.24774708883540117, + "bbh": 0.4343622024096135, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.2824135638297872, + "hf_math_lvl5": 0.07401812688821752, + "hf_musr": 0.42896874999999995, + "hf_avg": 14.968209771816907 + }, + { + "hf_id": "tinycompany/SigmaBoi-nomic-moe", + "name": "SigmaBoi-nomic-moe", + "params_b": 2.943, + "ifeval": 0.2474223948013493, + "bbh": 0.43341835214223373, + "gpqa": 0.29278523489932884, + "mmlu_pro": 0.28366023936170215, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.43163541666666666, + "hf_avg": 15.186432252407451 + }, + { + "hf_id": "tinycompany/SigmaBoi-nomic1.5", + "name": "SigmaBoi-nomic1.5", + "params_b": 2.943, + "ifeval": 0.24469961923252526, + "bbh": 0.43705348265770266, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.28407579787234044, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.4316041666666666, + "hf_avg": 15.473023890881102 + }, + { + "hf_id": "tinycompany/SigmaBoi-nomic1.5-fp32", + "name": "SigmaBoi-nomic1.5-fp32", + "params_b": 2.943, + "ifeval": 0.24622335403396323, + "bbh": 0.43705348265770266, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.28407579787234044, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.4316041666666666, + "hf_avg": 15.498419470905068 + }, + { + "hf_id": "tinycompany/Tamed-Shawty", + "name": "Tamed-Shawty", + "params_b": 1.562, + "ifeval": 0.38308576798450333, + "bbh": 0.3837059588999942, + "gpqa": 0.2625838926174497, + "mmlu_pro": 0.2601396276595745, + "hf_math_lvl5": 0.07175226586102719, + "hf_musr": 0.35009375000000004, + "hf_avg": 13.533997252162253 + }, + { + "hf_id": "togethercomputer/GPT-JT-6B-v1", + "name": "GPT-JT-6B-v1", + "params_b": 6, + "ifeval": 0.20610646418170453, + "bbh": 0.33026609127426704, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.16256648936170212, + "hf_math_lvl5": 0.010574018126888218, + "hf_musr": 0.37365625, + "hf_avg": 6.877706827738106 + }, + { + "hf_id": "togethercomputer/GPT-NeoXT-Chat-Base-20B", + "name": "GPT-NeoXT-Chat-Base-20B", + "params_b": 20, + "ifeval": 0.18297561581049393, + "bbh": 0.33209702572173033, + "gpqa": 0.25, + "mmlu_pro": 0.11452792553191489, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.3460625, + "hf_avg": 5.140295456712411 + }, + { + "hf_id": "togethercomputer/LLaMA-2-7B-32K", + "name": "LLaMA-2-7B-32K", + "params_b": 7, + "ifeval": 0.18649738250065384, + "bbh": 0.33995175217301715, + "gpqa": 0.25, + "mmlu_pro": 0.17677859042553193, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.3753645833333333, + "hf_avg": 6.8377158675946506 + }, + { + "hf_id": "togethercomputer/Llama-2-7B-32K-Instruct", + "name": "Llama-2-7B-32K-Instruct", + "params_b": 7, + "ifeval": 0.2130003945087922, + "bbh": 0.34434724239927544, + "gpqa": 0.2516778523489933, + "mmlu_pro": 0.17810837765957446, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.40559375, + "hf_avg": 8.25854218578891 + }, + { + "hf_id": "togethercomputer/RedPajama-INCITE-7B-Base", + "name": "RedPajama-INCITE-7B-Base", + "params_b": 7, + "ifeval": 0.20822971936683554, + "bbh": 0.31948898765013445, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.1196808510638298, + "hf_math_lvl5": 0.015861027190332326, + "hf_musr": 0.36199999999999993, + "hf_avg": 5.56181429403527 + }, + { + "hf_id": "togethercomputer/RedPajama-INCITE-7B-Chat", + "name": "RedPajama-INCITE-7B-Chat", + "params_b": 7, + "ifeval": 0.1557977278066641, + "bbh": 0.3175449328457368, + "gpqa": 0.2525167785234899, + "mmlu_pro": 0.11211768617021277, + "hf_math_lvl5": 0.006797583081570997, + "hf_musr": 0.3447604166666667, + "hf_avg": 4.050900591245242 + }, + { + "hf_id": "togethercomputer/RedPajama-INCITE-7B-Instruct", + "name": "RedPajama-INCITE-7B-Instruct", + "params_b": 7, + "ifeval": 0.2055069437980115, + "bbh": 0.337743947089799, + "gpqa": 0.25083892617449666, + "mmlu_pro": 0.1272440159574468, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.3685104166666666, + "hf_avg": 6.456725492718323 + }, + { + "hf_id": "togethercomputer/RedPajama-INCITE-Base-3B-v1", + "name": "RedPajama-INCITE-Base-3B-v1", + "params_b": 3, + "ifeval": 0.22936253584932426, + "bbh": 0.3060403878987615, + "gpqa": 0.24328859060402686, + "mmlu_pro": 0.11112034574468085, + "hf_math_lvl5": 0.014350453172205438, + "hf_musr": 0.37387499999999996, + "hf_avg": 5.521090384654182 + }, + { + "hf_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "name": "RedPajama-INCITE-Chat-3B-v1", + "params_b": 3, + "ifeval": 0.16521496296493304, + "bbh": 0.32166937119202416, + "gpqa": 0.24412751677852348, + "mmlu_pro": 0.11269946808510638, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.3684479166666667, + "hf_avg": 4.848823926757166 + }, + { + "hf_id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", + "name": "RedPajama-INCITE-Instruct-3B-v1", + "params_b": 3, + "ifeval": 0.2124263620526869, + "bbh": 0.3146017752057237, + "gpqa": 0.24748322147651006, + "mmlu_pro": 0.11095412234042554, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.38860416666666664, + "hf_avg": 5.777231560126986 + }, + { + "hf_id": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", + "name": "Llama-3-Swallow-8B-Instruct-v0.1", + "params_b": 8.03, + "ifeval": 0.5507719517546776, + "bbh": 0.5009389976232003, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3087599734042553, + "hf_math_lvl5": 0.07477341389728097, + "hf_musr": 0.43569791666666663, + "hf_avg": 22.34514981791089 + }, + { + "hf_id": "tomasmcm/sky-t1-coder-32b-flash", + "name": "sky-t1-coder-32b-flash", + "params_b": 32.764, + "ifeval": 0.7780090160773414, + "bbh": 0.6822440044314982, + "gpqa": 0.36828859060402686, + "mmlu_pro": 0.5782081117021277, + "hf_math_lvl5": 0.5422960725075529, + "hf_musr": 0.4232708333333333, + "hf_avg": 44.868558910231286 + }, + { + "hf_id": "trthminh1112/autotrain-llama32-1b-finetune", + "name": "autotrain-llama32-1b-finetune", + "params_b": 1.1, + "ifeval": 0.17685518867715438, + "bbh": 0.29956269409410674, + "gpqa": 0.25671140939597314, + "mmlu_pro": 0.10987367021276596, + "hf_math_lvl5": 0.015105740181268883, + "hf_musr": 0.35127083333333337, + "hf_avg": 4.586088234847661 + }, + { + "hf_id": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", + "name": "Qwen2.5-7B-Instruct-QwQ-v0.1", + "params_b": 7.616, + "ifeval": 0.6017300761978217, + "bbh": 0.5101062293388118, + "gpqa": 0.2684563758389262, + "mmlu_pro": 0.4080784574468085, + "hf_math_lvl5": 0.3814199395770393, + "hf_musr": 0.3794270833333333, + "hf_avg": 28.427900675364572 + }, + { + "hf_id": "universalml/NepaliGPT-2.0", + "name": "NepaliGPT-2.0", + "params_b": 8.03, + "ifeval": 0.03649538779327739, + "bbh": 0.46604761322722105, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.3299534574468085, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.4656770833333333, + "hf_avg": 12.586353879208668 + }, + { + "hf_id": "unsloth/Llama-3.2-1B-Instruct", + "name": "Llama-3.2-1B-Instruct", + "params_b": 1.236, + "ifeval": 0.5809973093613834, + "bbh": 0.34847036874553655, + "gpqa": 0.2676174496644295, + "mmlu_pro": 0.17420212765957446, + "hf_math_lvl5": 0.0823262839879154, + "hf_musr": 0.3196145833333333, + "hf_avg": 14.532450983938604 + }, + { + "hf_id": "unsloth/Phi-3-mini-4k-instruct", + "name": "Phi-3-mini-4k-instruct", + "params_b": 3.821, + "ifeval": 0.544027624480822, + "bbh": 0.5500239467441027, + "gpqa": 0.32298657718120805, + "mmlu_pro": 0.4030917553191489, + "hf_math_lvl5": 0.16389728096676737, + "hf_musr": 0.42841666666666667, + "hf_avg": 27.342019856110284, + "lb_name": "phi-3-mini-4k-instruct", + "lb_global": 0.23234333333333337, + "lb_reasoning": 0.2525, + "lb_math": 0.14958000000000002, + "lb_language": 0.08559, + "lb_if": 0.363625, + "lb_data_analysis": 0.2232 + }, + { + "hf_id": "unsloth/phi-4-bnb-4bit", + "name": "phi-4-bnb-4bit", + "params_b": 8.058, + "ifeval": 0.6729710501469435, + "bbh": 0.6769854242339189, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.5255984042553191, + "hf_math_lvl5": 0.4607250755287009, + "hf_musr": 0.40072916666666664, + "hf_avg": 39.06049481375357 + }, + { + "hf_id": "unsloth/phi-4-unsloth-bnb-4bit", + "name": "phi-4-unsloth-bnb-4bit", + "params_b": 8.483, + "ifeval": 0.6793906833867471, + "bbh": 0.6791089896968764, + "gpqa": 0.33640939597315433, + "mmlu_pro": 0.5285904255319149, + "hf_math_lvl5": 0.4561933534743202, + "hf_musr": 0.40339583333333334, + "hf_avg": 39.21645140571378 + }, + { + "hf_id": "upstage/SOLAR-10.7B-Instruct-v1.0", + "name": "SOLAR-10.7B-Instruct-v1.0", + "params_b": 10.732, + "ifeval": 0.4736609972650345, + "bbh": 0.5162494941446991, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.31382978723404253, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.3899375, + "hf_avg": 20.57236409322395, + "arena_elo": 1152.34, + "arena_rank": 274, + "arena_votes": 4155 + }, + { + "hf_id": "upstage/SOLAR-10.7B-v1.0", + "name": "SOLAR-10.7B-v1.0", + "params_b": 10.732, + "ifeval": 0.24212644671693329, + "bbh": 0.5093873084711799, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.3400099734042553, + "hf_math_lvl5": 0.026435045317220542, + "hf_musr": 0.43715624999999997, + "hf_avg": 16.8549748598855 + }, + { + "hf_id": "upstage/solar-pro-preview-instruct", + "name": "solar-pro-preview-instruct", + "params_b": 22.14, + "ifeval": 0.8415814483348626, + "bbh": 0.6816843051379534, + "gpqa": 0.37080536912751677, + "mmlu_pro": 0.52734375, + "hf_math_lvl5": 0.22054380664652568, + "hf_musr": 0.44165625000000003, + "hf_avg": 39.93865486453309 + }, + { + "hf_id": "utkmst/chimera-beta-test2-lora-merged", + "name": "chimera-beta-test2-lora-merged", + "params_b": 8.03, + "ifeval": 0.6054269338688014, + "bbh": 0.47957156724192185, + "gpqa": 0.3036912751677852, + "mmlu_pro": 0.2992021276595745, + "hf_math_lvl5": 0.09516616314199396, + "hf_musr": 0.4117916666666667, + "hf_avg": 22.38980642084878 + }, + { + "hf_id": "uukuguy/speechless-code-mistral-7b-v1.0", + "name": "speechless-code-mistral-7b-v1.0", + "params_b": 7, + "ifeval": 0.36652415590632853, + "bbh": 0.4571712887094195, + "gpqa": 0.28439597315436244, + "mmlu_pro": 0.3145777925531915, + "hf_math_lvl5": 0.05211480362537765, + "hf_musr": 0.45017708333333334, + "hf_avg": 18.19259169032542 + }, + { + "hf_id": "uukuguy/speechless-codellama-34b-v2.0", + "name": "speechless-codellama-34b-v2.0", + "params_b": 34, + "ifeval": 0.46042168113937687, + "bbh": 0.4813126697444618, + "gpqa": 0.2692953020134229, + "mmlu_pro": 0.25423869680851063, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.37870833333333337, + "hf_avg": 17.209357596769955 + }, + { + "hf_id": "uukuguy/speechless-coder-ds-6.7b", + "name": "speechless-coder-ds-6.7b", + "params_b": 6.7, + "ifeval": 0.25046986440422525, + "bbh": 0.4036373344669979, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.171875, + "hf_math_lvl5": 0.021148036253776436, + "hf_musr": 0.3819375, + "hf_avg": 9.714852002598894 + }, + { + "hf_id": "uukuguy/speechless-instruct-mistral-7b-v0.2", + "name": "speechless-instruct-mistral-7b-v0.2", + "params_b": 7.242, + "ifeval": 0.3261324397044287, + "bbh": 0.4606667950681749, + "gpqa": 0.28187919463087246, + "mmlu_pro": 0.2902260638297872, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.4901770833333334, + "hf_avg": 18.10671348498029 + }, + { + "hf_id": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", + "name": "speechless-mistral-dolphin-orca-platypus-samantha-7b", + "params_b": 7.242, + "ifeval": 0.37002154283966543, + "bbh": 0.4982774952761688, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.2990359042553192, + "hf_math_lvl5": 0.02945619335347432, + "hf_musr": 0.43613541666666666, + "hf_avg": 18.340089485864258 + }, + { + "hf_id": "uukuguy/speechless-zephyr-code-functionary-7b", + "name": "speechless-zephyr-code-functionary-7b", + "params_b": 7.242, + "ifeval": 0.2695791610704043, + "bbh": 0.46642753957194555, + "gpqa": 0.30033557046979864, + "mmlu_pro": 0.3094248670212766, + "hf_math_lvl5": 0.04229607250755287, + "hf_musr": 0.4267708333333333, + "hf_avg": 16.460834340340238 + }, + { + "hf_id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", + "name": "Qwen2.5-14B-Gutenberg-1e-Delta", + "params_b": 14.77, + "ifeval": 0.8045120280854798, + "bbh": 0.639849930188539, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.4930186170212766, + "hf_math_lvl5": 0.5264350453172205, + "hf_musr": 0.40730208333333334, + "hf_avg": 40.87901381911441 + }, + { + "hf_id": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", + "name": "Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", + "params_b": 14.77, + "ifeval": 0.8197493760998595, + "bbh": 0.639010174859259, + "gpqa": 0.3313758389261745, + "mmlu_pro": 0.4923537234042553, + "hf_math_lvl5": 0.5324773413897281, + "hf_musr": 0.4113645833333333, + "hf_avg": 41.36227864094851 + }, + { + "hf_id": "v000000/Qwen2.5-Lumen-14B", + "name": "Qwen2.5-Lumen-14B", + "params_b": 14.77, + "ifeval": 0.8063604569209697, + "bbh": 0.6390809511149668, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.49027593085106386, + "hf_math_lvl5": 0.5362537764350453, + "hf_musr": 0.41139583333333335, + "hf_avg": 41.137851148922614 + }, + { + "hf_id": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", + "name": "Llama-3.1-8B-Base-Instruct-SLERP", + "params_b": 8.03, + "ifeval": 0.290711977552893, + "bbh": 0.5057443268070797, + "gpqa": 0.2961409395973154, + "mmlu_pro": 0.3621176861702128, + "hf_math_lvl5": 0.12009063444108761, + "hf_musr": 0.40106250000000004, + "hf_avg": 19.27479354795199 + }, + { + "hf_id": "vhab10/Llama-3.2-Instruct-3B-TIES", + "name": "Llama-3.2-Instruct-3B-TIES", + "params_b": 1.848, + "ifeval": 0.4727367828472896, + "bbh": 0.43323649966514094, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.2915558510638298, + "hf_math_lvl5": 0.09818731117824774, + "hf_musr": 0.34965625, + "hf_avg": 17.33432617767801 + }, + { + "hf_id": "vhab10/llama-3-8b-merged-linear", + "name": "llama-3-8b-merged-linear", + "params_b": 4.65, + "ifeval": 0.5916634529714491, + "bbh": 0.49370937443498536, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.37042885638297873, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.4190520833333333, + "hf_avg": 23.91136833689406 + }, + { + "hf_id": "vicgalle/CarbonBeagle-11B", + "name": "CarbonBeagle-11B", + "params_b": 10.732, + "ifeval": 0.5415298075772285, + "bbh": 0.5293652486530874, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.32762632978723405, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.40203125, + "hf_avg": 22.470185912589034 + }, + { + "hf_id": "vicgalle/CarbonBeagle-11B-truthy", + "name": "CarbonBeagle-11B-truthy", + "params_b": 10.732, + "ifeval": 0.5212214701436633, + "bbh": 0.5348420085288232, + "gpqa": 0.29949664429530204, + "mmlu_pro": 0.335688164893617, + "hf_math_lvl5": 0.04909365558912387, + "hf_musr": 0.37396874999999996, + "hf_avg": 21.31996250525653 + }, + { + "hf_id": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", + "name": "Configurable-Hermes-2-Pro-Llama-3-8B", + "params_b": 8.031, + "ifeval": 0.5762510139762497, + "bbh": 0.5054841203275775, + "gpqa": 0.29697986577181207, + "mmlu_pro": 0.3097573138297872, + "hf_math_lvl5": 0.07628398791540786, + "hf_musr": 0.4183645833333333, + "hf_avg": 22.56595247127864 + }, + { + "hf_id": "vicgalle/Configurable-Yi-1.5-9B-Chat", + "name": "Configurable-Yi-1.5-9B-Chat", + "params_b": 8.829, + "ifeval": 0.43234506664538974, + "bbh": 0.5452196737175008, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.4015126329787234, + "hf_math_lvl5": 0.20468277945619334, + "hf_musr": 0.42711458333333335, + "hf_avg": 26.162899498605693 + }, + { + "hf_id": "vicgalle/ConfigurableBeagle-11B", + "name": "ConfigurableBeagle-11B", + "params_b": 10.732, + "ifeval": 0.5834452585805663, + "bbh": 0.5286592318626696, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.33743351063829785, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.39530208333333333, + "hf_avg": 22.622956003400244 + }, + { + "hf_id": "vicgalle/ConfigurableHermes-7B", + "name": "ConfigurableHermes-7B", + "params_b": 7.242, + "ifeval": 0.5410798902467675, + "bbh": 0.4572969627830424, + "gpqa": 0.27684563758389263, + "mmlu_pro": 0.3025265957446808, + "hf_math_lvl5": 0.04758308157099698, + "hf_musr": 0.4056875, + "hf_avg": 19.536295414907375 + }, + { + "hf_id": "vicgalle/ConfigurableSOLAR-10.7B", + "name": "ConfigurableSOLAR-10.7B", + "params_b": 10.732, + "ifeval": 0.5099558061499045, + "bbh": 0.48668100977360457, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.31732047872340424, + "hf_math_lvl5": 0.06646525679758308, + "hf_musr": 0.38047916666666665, + "hf_avg": 20.153450201779847 + }, + { + "hf_id": "vicgalle/Humanish-RP-Llama-3.1-8B", + "name": "Humanish-RP-Llama-3.1-8B", + "params_b": 8.03, + "ifeval": 0.6669259786256023, + "bbh": 0.5100385476143247, + "gpqa": 0.28691275167785235, + "mmlu_pro": 0.34765625, + "hf_math_lvl5": 0.15181268882175228, + "hf_musr": 0.39520833333333333, + "hf_avg": 25.423199454688373 + }, + { + "hf_id": "vicgalle/Merge-Mistral-Prometheus-7B", + "name": "Merge-Mistral-Prometheus-7B", + "params_b": 7.242, + "ifeval": 0.48480143796238423, + "bbh": 0.420139773821292, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.2716921542553192, + "hf_math_lvl5": 0.01812688821752266, + "hf_musr": 0.41, + "hf_avg": 16.58664234759141 + }, + { + "hf_id": "vicgalle/Merge-Mixtral-Prometheus-8x7B", + "name": "Merge-Mixtral-Prometheus-8x7B", + "params_b": 46.703, + "ifeval": 0.5744025851407598, + "bbh": 0.5351498071096573, + "gpqa": 0.3087248322147651, + "mmlu_pro": 0.3683510638297872, + "hf_math_lvl5": 0.09290030211480363, + "hf_musr": 0.40975, + "hf_avg": 24.768981526162975 + }, + { + "hf_id": "vicgalle/Roleplay-Llama-3-8B", + "name": "Roleplay-Llama-3-8B", + "params_b": 8.03, + "ifeval": 0.7320221456845614, + "bbh": 0.5012318206922323, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.370844414893617, + "hf_math_lvl5": 0.09138972809667674, + "hf_musr": 0.3528854166666666, + "hf_avg": 24.020182936148974 + }, + { + "hf_id": "vihangd/smart-dan-sft-v0.1", + "name": "smart-dan-sft-v0.1", + "params_b": 0.379, + "ifeval": 0.15764615664215392, + "bbh": 0.30617689187138886, + "gpqa": 0.2550335570469799, + "mmlu_pro": 0.11419547872340426, + "hf_math_lvl5": 0.009818731117824773, + "hf_musr": 0.35018750000000004, + "hf_avg": 3.871212537831616 + }, + { + "hf_id": "voidful/smol-360m-ft", + "name": "smol-360m-ft", + "params_b": 0.362, + "ifeval": 0.2013103011121602, + "bbh": 0.3011946898842932, + "gpqa": 0.24580536912751677, + "mmlu_pro": 0.10871010638297872, + "hf_math_lvl5": 0.008308157099697885, + "hf_musr": 0.3713645833333333, + "hf_avg": 4.7899302409628595 + }, + { + "hf_id": "vonjack/MobileLLM-125M-HF", + "name": "MobileLLM-125M-HF", + "params_b": 0.125, + "ifeval": 0.21072753627042912, + "bbh": 0.30272988561565645, + "gpqa": 0.2600671140939597, + "mmlu_pro": 0.1163563829787234, + "hf_math_lvl5": 0.00906344410876133, + "hf_musr": 0.37818749999999995, + "hf_avg": 5.565351724961825 + }, + { + "hf_id": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", + "name": "Phi-3-mini-4k-instruct-LLaMAfied", + "params_b": 3.821, + "ifeval": 0.5787488308798432, + "bbh": 0.5740684031598843, + "gpqa": 0.33053691275167785, + "mmlu_pro": 0.3885472074468085, + "hf_math_lvl5": 0.13821752265861026, + "hf_musr": 0.3923541666666666, + "hf_avg": 26.968080141379957 + }, + { + "hf_id": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", + "name": "Phi-3.5-mini-instruct-hermes-fc-json", + "params_b": 4.132, + "ifeval": 0.14158432957885078, + "bbh": 0.29747555432824196, + "gpqa": 0.25419463087248323, + "mmlu_pro": 0.11386303191489362, + "hf_math_lvl5": 0.0075528700906344415, + "hf_musr": 0.40413541666666664, + "hf_avg": 4.642406244437133 + }, + { + "hf_id": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", + "name": "SOLAR-10.7B-Instruct-v1.0-uncensored", + "params_b": 10.732, + "ifeval": 0.38840609582574237, + "bbh": 0.5301525050503222, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3343583776595745, + "hf_math_lvl5": 0.06570996978851963, + "hf_musr": 0.4639479166666667, + "hf_avg": 21.621994458789136 + }, + { + "hf_id": "wannaphong/KhanomTanLLM-Instruct", + "name": "KhanomTanLLM-Instruct", + "params_b": 3.447, + "ifeval": 0.16211762567764643, + "bbh": 0.30931233392513263, + "gpqa": 0.2634228187919463, + "mmlu_pro": 0.1118683510638298, + "hf_math_lvl5": 0.013595166163141994, + "hf_musr": 0.37006249999999996, + "hf_avg": 4.8192843205392455 + }, + { + "hf_id": "weathermanj/Menda-3B-500", + "name": "Menda-3B-500", + "params_b": 3.086, + "ifeval": 0.6353021095138676, + "bbh": 0.4766312519942703, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.3474900265957447, + "hf_math_lvl5": 0.3723564954682779, + "hf_musr": 0.39679166666666665, + "hf_avg": 27.910059821789428 + }, + { + "hf_id": "weathermanj/Menda-3b-750", + "name": "Menda-3b-750", + "params_b": 3.086, + "ifeval": 0.6335035483627884, + "bbh": 0.4736825577251204, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.3505651595744681, + "hf_math_lvl5": 0.3716012084592145, + "hf_musr": 0.39418749999999997, + "hf_avg": 27.833449062404906 + }, + { + "hf_id": "weathermanj/Menda-3b-Optim-100", + "name": "Menda-3b-Optim-100", + "params_b": 3.086, + "ifeval": 0.6398234462337709, + "bbh": 0.47348022177793836, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.3460771276595745, + "hf_math_lvl5": 0.3716012084592145, + "hf_musr": 0.39930208333333334, + "hf_avg": 27.957516413922587 + }, + { + "hf_id": "weathermanj/Menda-3b-Optim-200", + "name": "Menda-3b-Optim-200", + "params_b": 3.086, + "ifeval": 0.6374752323834094, + "bbh": 0.47460604908284837, + "gpqa": 0.2827181208053691, + "mmlu_pro": 0.3484042553191489, + "hf_math_lvl5": 0.3731117824773414, + "hf_musr": 0.40330208333333334, + "hf_avg": 27.967747014863694 + }, + { + "hf_id": "win10/Breeze-13B-32k-Instruct-v1_0", + "name": "Breeze-13B-32k-Instruct-v1_0", + "params_b": 12.726, + "ifeval": 0.35843118481185476, + "bbh": 0.46112304746712934, + "gpqa": 0.26426174496644295, + "mmlu_pro": 0.2568151595744681, + "hf_math_lvl5": 0.01283987915407855, + "hf_musr": 0.42019791666666667, + "hf_avg": 15.461558427858103 + }, + { + "hf_id": "win10/Llama-3.2-3B-Instruct-24-9-29", + "name": "Llama-3.2-3B-Instruct-24-9-29", + "params_b": 3.213, + "ifeval": 0.7332211864519476, + "bbh": 0.4614234982167829, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.3228058510638298, + "hf_math_lvl5": 0.17069486404833836, + "hf_musr": 0.35552083333333334, + "hf_avg": 24.004698090064014 + }, + { + "hf_id": "win10/llama3-13.45b-Instruct", + "name": "llama3-13.45b-Instruct", + "params_b": 13.265, + "ifeval": 0.4144348107465968, + "bbh": 0.486541523346714, + "gpqa": 0.25838926174496646, + "mmlu_pro": 0.3345246010638298, + "hf_math_lvl5": 0.02416918429003021, + "hf_musr": 0.38476041666666666, + "hf_avg": 17.340222099927356 + }, + { + "hf_id": "win10/miscii-14b-1M-0128", + "name": "miscii-14b-1M-0128", + "params_b": 14.766, + "ifeval": 0.4180818007331658, + "bbh": 0.5741994518517665, + "gpqa": 0.3825503355704698, + "mmlu_pro": 0.44913563829787234, + "hf_math_lvl5": 0.4773413897280967, + "hf_musr": 0.5431041666666667, + "hf_avg": 35.33959615681345 + }, + { + "hf_id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", + "name": "Deepseek-Lumen-R1-Qwen2.5-14B", + "params_b": 14.77, + "ifeval": 0.4436107306391486, + "bbh": 0.45690468424066283, + "gpqa": 0.28523489932885904, + "mmlu_pro": 0.4379155585106383, + "hf_math_lvl5": 0.27794561933534745, + "hf_musr": 0.47396875000000005, + "hf_avg": 26.028524707214856 + }, + { + "hf_id": "xMaulana/FinMatcha-3B-Instruct", + "name": "FinMatcha-3B-Instruct", + "params_b": 3.213, + "ifeval": 0.7548283000217202, + "bbh": 0.453555265188897, + "gpqa": 0.26929530201342283, + "mmlu_pro": 0.3181515957446808, + "hf_math_lvl5": 0.14350453172205438, + "hf_musr": 0.36333333333333334, + "hf_avg": 24.14212432324268 + }, + { + "hf_id": "xinchen9/Llama3.1_8B_Instruct_CoT", + "name": "Llama3.1_8B_Instruct_CoT", + "params_b": 8.03, + "ifeval": 0.2973565694579272, + "bbh": 0.4398206147249642, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.2878989361702128, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.43706249999999996, + "hf_avg": 16.316624597778073 + }, + { + "hf_id": "xinchen9/Llama3.1_CoT", + "name": "Llama3.1_CoT", + "params_b": 8.03, + "ifeval": 0.22461624046419057, + "bbh": 0.43410143664277245, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.2738530585106383, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.43045833333333333, + "hf_avg": 13.741515006547855 + }, + { + "hf_id": "xinchen9/Llama3.1_CoT_V1", + "name": "Llama3.1_CoT_V1", + "params_b": 8.03, + "ifeval": 0.2452991396162183, + "bbh": 0.4376001847280673, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.2805019946808511, + "hf_math_lvl5": 0.03323262839879154, + "hf_musr": 0.45721875, + "hf_avg": 14.734826092960331 + }, + { + "hf_id": "xinchen9/Mistral-7B-CoT", + "name": "Mistral-7B-CoT", + "params_b": 7.242, + "ifeval": 0.2783470081605695, + "bbh": 0.38726762098069667, + "gpqa": 0.24916107382550334, + "mmlu_pro": 0.2283909574468085, + "hf_math_lvl5": 0.024924471299093656, + "hf_musr": 0.3994270833333333, + "hf_avg": 11.26567589705759 + }, + { + "hf_id": "xinchen9/llama3-b8-ft-dis", + "name": "llama3-b8-ft-dis", + "params_b": 8.03, + "ifeval": 0.154598687039278, + "bbh": 0.4625789691224553, + "gpqa": 0.31291946308724833, + "mmlu_pro": 0.3243849734042553, + "hf_math_lvl5": 0.03927492447129909, + "hf_musr": 0.365375, + "hf_avg": 13.97349198756153 + }, + { + "hf_id": "xwen-team/Xwen-7B-Chat", + "name": "Xwen-7B-Chat", + "params_b": 7.616, + "ifeval": 0.6864098370102439, + "bbh": 0.506762793166296, + "gpqa": 0.2609060402684564, + "mmlu_pro": 0.42902260638297873, + "hf_math_lvl5": 0.4509063444108761, + "hf_musr": 0.3914270833333333, + "hf_avg": 31.576751501854343 + }, + { + "hf_id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", + "name": "L3.1-ClaudeMaid-4x8B", + "params_b": 24.942, + "ifeval": 0.6696487541944263, + "bbh": 0.5070848048063867, + "gpqa": 0.2911073825503356, + "mmlu_pro": 0.35804521276595747, + "hf_math_lvl5": 0.14123867069486404, + "hf_musr": 0.42893749999999997, + "hf_avg": 26.404880965062546 + }, + { + "hf_id": "yam-peleg/Hebrew-Gemma-11B-Instruct", + "name": "Hebrew-Gemma-11B-Instruct", + "params_b": 10.475, + "ifeval": 0.30207737691547315, + "bbh": 0.40357843109818686, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.25540226063829785, + "hf_math_lvl5": 0.06570996978851963, + "hf_musr": 0.4088541666666667, + "hf_avg": 14.058232008864843 + }, + { + "hf_id": "yam-peleg/Hebrew-Mistral-7B", + "name": "Hebrew-Mistral-7B", + "params_b": 7.504, + "ifeval": 0.23283443485507344, + "bbh": 0.43340366992362034, + "gpqa": 0.27936241610738255, + "mmlu_pro": 0.27800864361702127, + "hf_math_lvl5": 0.04984894259818731, + "hf_musr": 0.39765625, + "hf_avg": 13.302117179699644 + }, + { + "hf_id": "yam-peleg/Hebrew-Mistral-7B-200K", + "name": "Hebrew-Mistral-7B-200K", + "params_b": 7.504, + "ifeval": 0.1855731680829089, + "bbh": 0.4149272793394017, + "gpqa": 0.276006711409396, + "mmlu_pro": 0.25731382978723405, + "hf_math_lvl5": 0.023413897280966767, + "hf_musr": 0.3764791666666667, + "hf_avg": 10.64429135893812 + }, + { + "hf_id": "yam-peleg/Hebrew-Mistral-7B-200K", + "name": "Hebrew-Mistral-7B-200K", + "params_b": 7.504, + "ifeval": 0.17698041197356346, + "bbh": 0.3410500846818921, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.2529089095744681, + "hf_math_lvl5": 0.030966767371601207, + "hf_musr": 0.37399999999999994, + "hf_avg": 8.386669075864491 + }, + { + "hf_id": "yanng1242/Marcoro14-7B-slerp", + "name": "Marcoro14-7B-slerp", + "params_b": 7.242, + "ifeval": 0.4059916576904835, + "bbh": 0.5251655292981787, + "gpqa": 0.3145973154362416, + "mmlu_pro": 0.3168218085106383, + "hf_math_lvl5": 0.07477341389728097, + "hf_musr": 0.468625, + "hf_avg": 21.933478090702422 + }, + { + "hf_id": "yasserrmd/Coder-GRPO-3B", + "name": "Coder-GRPO-3B", + "params_b": 3.086, + "ifeval": 0.6207640172520024, + "bbh": 0.4469120364616385, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.3197307180851064, + "hf_math_lvl5": 0.3202416918429003, + "hf_musr": 0.4114583333333333, + "hf_avg": 25.914051108530288 + }, + { + "hf_id": "yasserrmd/Text2SQL-1.5B", + "name": "Text2SQL-1.5B", + "params_b": 1.544, + "ifeval": 0.2857407235025289, + "bbh": 0.38577157961565695, + "gpqa": 0.287751677852349, + "mmlu_pro": 0.23628656914893617, + "hf_math_lvl5": 0.06797583081570997, + "hf_musr": 0.39423958333333337, + "hf_avg": 13.233972018030462 + }, + { + "hf_id": "ycros/BagelMIsteryTour-v2-8x7B", + "name": "BagelMIsteryTour-v2-8x7B", + "params_b": 46.703, + "ifeval": 0.599431730031871, + "bbh": 0.515923595752544, + "gpqa": 0.30453020134228187, + "mmlu_pro": 0.34732380319148937, + "hf_math_lvl5": 0.07854984894259819, + "hf_musr": 0.4202916666666667, + "hf_avg": 24.258614269254906 + }, + { + "hf_id": "ycros/BagelMIsteryTour-v2-8x7B", + "name": "BagelMIsteryTour-v2-8x7B", + "params_b": 46.703, + "ifeval": 0.6262095683896506, + "bbh": 0.5141943573573103, + "gpqa": 0.30788590604026844, + "mmlu_pro": 0.3480718085106383, + "hf_math_lvl5": 0.09365558912386707, + "hf_musr": 0.41375, + "hf_avg": 24.82550730859936 + }, + { + "hf_id": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", + "name": "ECE-PRYMMAL-YL-1B-SLERP-V8", + "params_b": 1.357, + "ifeval": 0.15052726764983576, + "bbh": 0.3975573100103517, + "gpqa": 0.28942953020134227, + "mmlu_pro": 0.23836436170212766, + "hf_math_lvl5": 0.004531722054380665, + "hf_musr": 0.3874583333333333, + "hf_avg": 9.679667500944214 + }, + { + "hf_id": "ymcki/Llama-3.1-8B-GRPO-Instruct", + "name": "Llama-3.1-8B-GRPO-Instruct", + "params_b": 8.03, + "ifeval": 0.744536718130117, + "bbh": 0.5131586337530801, + "gpqa": 0.29446308724832215, + "mmlu_pro": 0.3738364361702128, + "hf_math_lvl5": 0.20241691842900303, + "hf_musr": 0.38165625000000003, + "hf_avg": 28.168375624484387 + }, + { + "hf_id": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", + "name": "Llama-3.1-8B-SFT-GRPO-Instruct", + "params_b": 8.03, + "ifeval": 0.33540007180946557, + "bbh": 0.3126261967336083, + "gpqa": 0.2533557046979866, + "mmlu_pro": 0.10979055851063829, + "hf_math_lvl5": 0.04003021148036254, + "hf_musr": 0.35260416666666666, + "hf_avg": 7.6591553865170505 + }, + { + "hf_id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", + "name": "gemma-2-2b-ORPO-jpn-it-abliterated-18", + "params_b": 2.614, + "ifeval": 0.4630945890237902, + "bbh": 0.4052902505118913, + "gpqa": 0.28859060402684567, + "mmlu_pro": 0.23445811170212766, + "hf_math_lvl5": 0.04305135951661632, + "hf_musr": 0.3754270833333333, + "hf_avg": 15.288362728327092 + }, + { + "hf_id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", + "name": "gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", + "params_b": 2.614, + "ifeval": 0.5218209905273563, + "bbh": 0.414688942270627, + "gpqa": 0.2835570469798658, + "mmlu_pro": 0.24609375, + "hf_math_lvl5": 0.054380664652567974, + "hf_musr": 0.35139583333333335, + "hf_avg": 16.505537805625195 + }, + { + "hf_id": "ymcki/gemma-2-2b-jpn-it-abliterated-17", + "name": "gemma-2-2b-jpn-it-abliterated-17", + "params_b": 2.614, + "ifeval": 0.5081572449988254, + "bbh": 0.40762664531580056, + "gpqa": 0.27181208053691275, + "mmlu_pro": 0.2455119680851064, + "hf_math_lvl5": 0.03851963746223565, + "hf_musr": 0.37006249999999996, + "hf_avg": 15.644976187098441 + }, + { + "hf_id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", + "name": "gemma-2-2b-jpn-it-abliterated-17-18-24", + "params_b": 2.614, + "ifeval": 0.505484337114412, + "bbh": 0.38123590457353557, + "gpqa": 0.28104026845637586, + "mmlu_pro": 0.2282247340425532, + "hf_math_lvl5": 0.0256797583081571, + "hf_musr": 0.35015625, + "hf_avg": 14.44776069045957 + }, + { + "hf_id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", + "name": "gemma-2-2b-jpn-it-abliterated-17-ORPO", + "params_b": 2.614, + "ifeval": 0.47478468242042227, + "bbh": 0.38979797271028965, + "gpqa": 0.27432885906040266, + "mmlu_pro": 0.21908244680851063, + "hf_math_lvl5": 0.061933534743202415, + "hf_musr": 0.37676041666666665, + "hf_avg": 14.844141845651569 + }, + { + "hf_id": "ymcki/gemma-2-2b-jpn-it-abliterated-18", + "name": "gemma-2-2b-jpn-it-abliterated-18", + "params_b": 2.614, + "ifeval": 0.5175246124726836, + "bbh": 0.4132188791645781, + "gpqa": 0.27348993288590606, + "mmlu_pro": 0.25049867021276595, + "hf_math_lvl5": 0.0445619335347432, + "hf_musr": 0.37415624999999997, + "hf_avg": 16.245944384743186 + }, + { + "hf_id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", + "name": "gemma-2-2b-jpn-it-abliterated-18-ORPO", + "params_b": 2.614, + "ifeval": 0.47423502972113984, + "bbh": 0.40389353402379324, + "gpqa": 0.26174496644295303, + "mmlu_pro": 0.21850066489361702, + "hf_math_lvl5": 0.04682779456193353, + "hf_musr": 0.3953333333333333, + "hf_avg": 15.132293957042885 + }, + { + "hf_id": "ymcki/gemma-2-2b-jpn-it-abliterated-24", + "name": "gemma-2-2b-jpn-it-abliterated-24", + "params_b": 2.614, + "ifeval": 0.49786566310722213, + "bbh": 0.41096027770392857, + "gpqa": 0.27768456375838924, + "mmlu_pro": 0.2473404255319149, + "hf_math_lvl5": 0.04380664652567976, + "hf_musr": 0.39148958333333334, + "hf_avg": 16.334186539569448 + }, + { + "hf_id": "yuchenxie/ArlowGPT-3B-Multilingual", + "name": "ArlowGPT-3B-Multilingual", + "params_b": 3.213, + "ifeval": 0.6395486198841297, + "bbh": 0.4301403132173714, + "gpqa": 0.2802013422818792, + "mmlu_pro": 0.2816655585106383, + "hf_math_lvl5": 0.11253776435045318, + "hf_musr": 0.37266666666666665, + "hf_avg": 20.501174920235083 + }, + { + "hf_id": "yuchenxie/ArlowGPT-8B", + "name": "ArlowGPT-8B", + "params_b": 8.03, + "ifeval": 0.7846536079823756, + "bbh": 0.5080162816130412, + "gpqa": 0.2936241610738255, + "mmlu_pro": 0.378656914893617, + "hf_math_lvl5": 0.2039274924471299, + "hf_musr": 0.3882291666666667, + "hf_avg": 28.973572026793594 + }, + { + "hf_id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", + "name": "Llama3-8B-SuperNova-Spectrum-Hermes-DPO", + "params_b": 8.03, + "ifeval": 0.4690897928607206, + "bbh": 0.4399870586095269, + "gpqa": 0.30201342281879195, + "mmlu_pro": 0.2634640957446808, + "hf_math_lvl5": 0.05664652567975831, + "hf_musr": 0.40121875, + "hf_avg": 18.088167586242665 + }, + { + "hf_id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", + "name": "Llama3-8B-SuperNova-Spectrum-dare_ties", + "params_b": 8.03, + "ifeval": 0.4012708502329375, + "bbh": 0.4615794426716074, + "gpqa": 0.2751677852348993, + "mmlu_pro": 0.35738031914893614, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.42109375, + "hf_avg": 19.17256512798472 + }, + { + "hf_id": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", + "name": "Llama3-8B-abliterated-Spectrum-slerp", + "params_b": 8.03, + "ifeval": 0.2884878788281759, + "bbh": 0.4977912063897858, + "gpqa": 0.3011744966442953, + "mmlu_pro": 0.32571476063829785, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.39982291666666664, + "hf_avg": 17.72531595014861 + }, + { + "hf_id": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", + "name": "gemma-2-2b-it-chinese-kyara-dpo", + "params_b": 2.614, + "ifeval": 0.5382075116247114, + "bbh": 0.4257464897414603, + "gpqa": 0.26677852348993286, + "mmlu_pro": 0.25731382978723405, + "hf_math_lvl5": 0.08383685800604229, + "hf_musr": 0.45756250000000004, + "hf_avg": 19.62411186744714 + }, + { + "hf_id": "zake7749/gemma-2-9b-it-chinese-kyara", + "name": "gemma-2-9b-it-chinese-kyara", + "params_b": 9.242, + "ifeval": 0.17642965110351644, + "bbh": 0.5953692987878404, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.41788563829787234, + "hf_math_lvl5": 0.10498489425981873, + "hf_musr": 0.4241979166666667, + "hf_avg": 21.38318153545784 + }, + { + "hf_id": "zelk12/MT-Gen4-gemma-2-9B", + "name": "MT-Gen4-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7883005979689446, + "bbh": 0.6109884725351095, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.4387466755319149, + "hf_math_lvl5": 0.22356495468277945, + "hf_musr": 0.4228020833333333, + "hf_avg": 34.69186259861687 + }, + { + "hf_id": "zelk12/MT-Gen5-gemma-2-9B", + "name": "MT-Gen5-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7923221496739761, + "bbh": 0.6132787046647334, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.4402426861702128, + "hf_math_lvl5": 0.21525679758308158, + "hf_musr": 0.42016666666666663, + "hf_avg": 34.56384281127872 + }, + { + "hf_id": "zelk12/MT-Gen6-gemma-2-9B", + "name": "MT-Gen6-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.1615668648075994, + "bbh": 0.5844669261858688, + "gpqa": 0.33305369127516776, + "mmlu_pro": 0.4165558510638298, + "hf_math_lvl5": 0.0823262839879154, + "hf_musr": 0.40692708333333333, + "hf_avg": 19.816468872767587 + }, + { + "hf_id": "zelk12/MT-Gen6fix-gemma-2-9B", + "name": "MT-Gen6fix-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.15759518078697854, + "bbh": 0.5917309697578781, + "gpqa": 0.337248322147651, + "mmlu_pro": 0.4119847074468085, + "hf_math_lvl5": 0.08157099697885196, + "hf_musr": 0.40841666666666665, + "hf_avg": 20.06441030320394 + }, + { + "hf_id": "zelk12/MT-Gen7-gemma-2-9B", + "name": "MT-Gen7-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.16641289556155447, + "bbh": 0.5935242633580781, + "gpqa": 0.33557046979865773, + "mmlu_pro": 0.4122340425531915, + "hf_math_lvl5": 0.0891238670694864, + "hf_musr": 0.40978125, + "hf_avg": 20.391800709714477 + }, + { + "hf_id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", + "name": "MT-Max-Merge_02012025163610-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7907485471881275, + "bbh": 0.6142243374633075, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.4395777925531915, + "hf_math_lvl5": 0.2212990936555891, + "hf_musr": 0.4228020833333333, + "hf_avg": 34.703708309941995 + }, + { + "hf_id": "zelk12/MT-Merge3-gemma-2-9B", + "name": "MT-Merge3-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7858526487497617, + "bbh": 0.6102112889343964, + "gpqa": 0.348993288590604, + "mmlu_pro": 0.4373337765957447, + "hf_math_lvl5": 0.22054380664652568, + "hf_musr": 0.42575, + "hf_avg": 34.63973964338121 + }, + { + "hf_id": "zelk12/MT-Merge4-gemma-2-9B", + "name": "MT-Merge4-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7807317916461656, + "bbh": 0.6118218058684427, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.43899601063829785, + "hf_math_lvl5": 0.21676737160120846, + "hf_musr": 0.42943749999999997, + "hf_avg": 34.59930867791183 + }, + { + "hf_id": "zelk12/MT-Merge5-gemma-2-9B", + "name": "MT-Merge5-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7843787816327346, + "bbh": 0.6122674386670167, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.4387466755319149, + "hf_math_lvl5": 0.21827794561933533, + "hf_musr": 0.42813541666666666, + "hf_avg": 34.6922400282852 + }, + { + "hf_id": "zelk12/MT-Merge6-gemma-2-9B", + "name": "MT-Merge6-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.16946036516443036, + "bbh": 0.5949106849534558, + "gpqa": 0.3288590604026846, + "mmlu_pro": 0.41148603723404253, + "hf_math_lvl5": 0.08006042296072508, + "hf_musr": 0.40978125, + "hf_avg": 20.20346619991928 + }, + { + "hf_id": "zelk12/MT1-Gen4-gemma-2-9B", + "name": "MT1-Gen4-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7941207108250552, + "bbh": 0.6057567677609054, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.42860704787234044, + "hf_math_lvl5": 0.21601208459214502, + "hf_musr": 0.42311458333333335, + "hf_avg": 34.28920938772912 + }, + { + "hf_id": "zelk12/MT1-Gen5-gemma-2-9B", + "name": "MT1-Gen5-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7794828831943688, + "bbh": 0.6017455017631886, + "gpqa": 0.3464765100671141, + "mmlu_pro": 0.42220744680851063, + "hf_math_lvl5": 0.20770392749244712, + "hf_musr": 0.41914583333333333, + "hf_avg": 33.556617199966034 + }, + { + "hf_id": "zelk12/MT1-Gen6-gemma-2-9B", + "name": "MT1-Gen6-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.16336542595867853, + "bbh": 0.5943545352208355, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.4133144946808511, + "hf_math_lvl5": 0.08081570996978851, + "hf_musr": 0.40444791666666663, + "hf_avg": 19.919694219754245 + }, + { + "hf_id": "zelk12/MT1-Gen7-gemma-2-9B", + "name": "MT1-Gen7-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.16336542595867853, + "bbh": 0.5937953240176393, + "gpqa": 0.32802013422818793, + "mmlu_pro": 0.4144780585106383, + "hf_math_lvl5": 0.08308157099697885, + "hf_musr": 0.41111458333333334, + "hf_avg": 20.19902054572098 + }, + { + "hf_id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", + "name": "MT1-Max-Merge_02012025163610-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7928718023732585, + "bbh": 0.6122674386670167, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.43816489361702127, + "hf_math_lvl5": 0.22280966767371602, + "hf_musr": 0.4255, + "hf_avg": 34.87300068519695 + }, + { + "hf_id": "zelk12/MT2-Gen4-gemma-2-9B", + "name": "MT2-Gen4-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7895993741051521, + "bbh": 0.609655139201776, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.43209773936170215, + "hf_math_lvl5": 0.22356495468277945, + "hf_musr": 0.41254166666666664, + "hf_avg": 34.202321593914604 + }, + { + "hf_id": "zelk12/MT2-Gen5-gemma-2-9B", + "name": "MT2-Gen5-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7749116787900548, + "bbh": 0.6063933817527739, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.43018617021276595, + "hf_math_lvl5": 0.2107250755287009, + "hf_musr": 0.42441666666666666, + "hf_avg": 34.04923355550058 + }, + { + "hf_id": "zelk12/MT2-Gen6-gemma-2-9B", + "name": "MT2-Gen6-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.16641289556155447, + "bbh": 0.595964957637105, + "gpqa": 0.33808724832214765, + "mmlu_pro": 0.42096077127659576, + "hf_math_lvl5": 0.08459214501510574, + "hf_musr": 0.41371874999999997, + "hf_avg": 20.837841635914646 + }, + { + "hf_id": "zelk12/MT2-Gen7-gemma-2-9B", + "name": "MT2-Gen7-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.17615482475387528, + "bbh": 0.6078922830693557, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.4311003989361702, + "hf_math_lvl5": 0.10196374622356495, + "hf_musr": 0.42032291666666666, + "hf_avg": 22.28290945432224 + }, + { + "hf_id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", + "name": "MT2-Max-Merge_02012025163610-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7901490268044344, + "bbh": 0.6108461203950706, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.4390791223404255, + "hf_math_lvl5": 0.2243202416918429, + "hf_musr": 0.42283333333333334, + "hf_avg": 34.675340724715895 + }, + { + "hf_id": "zelk12/MT3-Gen4-gemma-2-9B", + "name": "MT3-Gen4-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7737126380226687, + "bbh": 0.6100843629460684, + "gpqa": 0.34731543624161076, + "mmlu_pro": 0.4387466755319149, + "hf_math_lvl5": 0.20619335347432025, + "hf_musr": 0.4476354166666667, + "hf_avg": 34.517532467321914 + }, + { + "hf_id": "zelk12/MT3-Gen5-gemma-2-9B", + "name": "MT3-Gen5-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7990166092634211, + "bbh": 0.6098615465467813, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.43168218085106386, + "hf_math_lvl5": 0.22658610271903323, + "hf_musr": 0.41911458333333335, + "hf_avg": 34.7576818343369 + }, + { + "hf_id": "zelk12/MT3-Gen5-gemma-2-9B_v1", + "name": "MT3-Gen5-gemma-2-9B_v1", + "params_b": 10.159, + "ifeval": 0.7996161296471141, + "bbh": 0.6113330718661595, + "gpqa": 0.348993288590604, + "mmlu_pro": 0.4359208776595745, + "hf_math_lvl5": 0.22280966767371602, + "hf_musr": 0.4203854166666667, + "hf_avg": 34.734556510838615 + }, + { + "hf_id": "zelk12/MT3-Gen6-gemma-2-9B", + "name": "MT3-Gen6-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.17615482475387528, + "bbh": 0.6020072592121909, + "gpqa": 0.34312080536912754, + "mmlu_pro": 0.41023936170212766, + "hf_math_lvl5": 0.08836858006042296, + "hf_musr": 0.4125729166666667, + "hf_avg": 21.10285538804693 + }, + { + "hf_id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", + "name": "MT3-Max-Merge_02012025163610-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.17615482475387528, + "bbh": 0.6123461203950705, + "gpqa": 0.35067114093959734, + "mmlu_pro": 0.4389128989361702, + "hf_math_lvl5": 0.10120845921450151, + "hf_musr": 0.42546875, + "hf_avg": 22.46770825046867 + }, + { + "hf_id": "zelk12/MT4-Gen4-gemma-2-9B", + "name": "MT4-Gen4-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7874262512356104, + "bbh": 0.6076031496231499, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.4323470744680851, + "hf_math_lvl5": 0.21450151057401812, + "hf_musr": 0.42435416666666664, + "hf_avg": 34.381140380385126 + }, + { + "hf_id": "zelk12/MT4-Gen5-gemma-2-9B", + "name": "MT4-Gen5-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7788833628106757, + "bbh": 0.6106664051994928, + "gpqa": 0.3565436241610738, + "mmlu_pro": 0.43841422872340424, + "hf_math_lvl5": 0.22658610271903323, + "hf_musr": 0.42683333333333334, + "hf_avg": 34.72051149657049 + }, + { + "hf_id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", + "name": "MT4-Max-Merge_02012025163610-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.1770790391716202, + "bbh": 0.6120127870617372, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.4390791223404255, + "hf_math_lvl5": 0.09516616314199396, + "hf_musr": 0.4228020833333333, + "hf_avg": 22.332038216896635 + }, + { + "hf_id": "zelk12/MT5-Gen4-gemma-2-9B", + "name": "MT5-Gen4-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7834545672149895, + "bbh": 0.6131056160021203, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.4396609042553192, + "hf_math_lvl5": 0.2243202416918429, + "hf_musr": 0.42283333333333334, + "hf_avg": 34.658891352635635 + }, + { + "hf_id": "zelk12/MT5-Gen5-gemma-2-9B", + "name": "MT5-Gen5-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7947202312087482, + "bbh": 0.6111664051994928, + "gpqa": 0.34815436241610737, + "mmlu_pro": 0.43292885638297873, + "hf_math_lvl5": 0.2258308157099698, + "hf_musr": 0.41911458333333335, + "hf_avg": 34.634253143757086 + }, + { + "hf_id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", + "name": "MT5-Max-Merge_02012025163610-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.17615482475387528, + "bbh": 0.6126794537284038, + "gpqa": 0.35151006711409394, + "mmlu_pro": 0.43899601063829785, + "hf_math_lvl5": 0.09818731117824774, + "hf_musr": 0.4227708333333333, + "hf_avg": 22.353756658265933 + }, + { + "hf_id": "zelk12/MTM-Merge-gemma-2-9B", + "name": "MTM-Merge-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7798075772284205, + "bbh": 0.6133348543973538, + "gpqa": 0.3548657718120805, + "mmlu_pro": 0.43882978723404253, + "hf_math_lvl5": 0.2175226586102719, + "hf_musr": 0.4267708333333333, + "hf_avg": 34.61498534418492 + }, + { + "hf_id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", + "name": "MTMaMe-Merge_02012025163610-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.17860277397305815, + "bbh": 0.6116794537284039, + "gpqa": 0.3523489932885906, + "mmlu_pro": 0.43816489361702127, + "hf_math_lvl5": 0.09592145015105741, + "hf_musr": 0.42410416666666667, + "hf_avg": 22.385497094651754 + }, + { + "hf_id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", + "name": "Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7646200968984517, + "bbh": 0.6097862253440982, + "gpqa": 0.3422818791946309, + "mmlu_pro": 0.43467420212765956, + "hf_math_lvl5": 0.20694864048338368, + "hf_musr": 0.4282916666666667, + "hf_avg": 33.87751505283704 + }, + { + "hf_id": "zelk12/Rv0.4MT4g2-gemma-2-9B", + "name": "Rv0.4MT4g2-gemma-2-9B", + "params_b": 10.159, + "ifeval": 0.7320221456845614, + "bbh": 0.604119644415618, + "gpqa": 0.35318791946308725, + "mmlu_pro": 0.44173869680851063, + "hf_math_lvl5": 0.19486404833836857, + "hf_musr": 0.4230833333333333, + "hf_avg": 33.255961795847774 + }, + { + "hf_id": "zelk12/gemma-2-S2MTM-9B", + "name": "gemma-2-S2MTM-9B", + "params_b": 10.159, + "ifeval": 0.7822555264476034, + "bbh": 0.6060836790982922, + "gpqa": 0.34563758389261745, + "mmlu_pro": 0.4296875, + "hf_math_lvl5": 0.20468277945619334, + "hf_musr": 0.42184375, + "hf_avg": 33.89283041556518 + }, + { + "hf_id": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", + "name": "Qwen2.5-32B-Instruct-abliterated-v2", + "params_b": 32.764, + "ifeval": 0.8334131216283904, + "bbh": 0.6934020817780425, + "gpqa": 0.3674496644295302, + "mmlu_pro": 0.5621675531914894, + "hf_math_lvl5": 0.595166163141994, + "hf_musr": 0.43542708333333335, + "hf_avg": 46.88867299768722 + }, + { + "hf_id": "zetasepic/Qwen2.5-72B-Instruct-abliterated", + "name": "Qwen2.5-72B-Instruct-abliterated", + "params_b": 72.706, + "ifeval": 0.7152610628687439, + "bbh": 0.7152257183282452, + "gpqa": 0.40687919463087246, + "mmlu_pro": 0.5871841755319149, + "hf_math_lvl5": 0.5241691842900302, + "hf_musr": 0.4719166666666667, + "hf_avg": 46.33795303791254 + }, + { + "hf_id": "zhengr/MixTAO-7Bx2-MoE-v8.1", + "name": "MixTAO-7Bx2-MoE-v8.1", + "params_b": 12.879, + "ifeval": 0.4187810564856802, + "bbh": 0.42019437560239653, + "gpqa": 0.2986577181208054, + "mmlu_pro": 0.28465757978723405, + "hf_math_lvl5": 0.06042296072507553, + "hf_musr": 0.39762499999999995, + "hf_avg": 17.067606418207944 + }, + { + "name": "chatgpt-4o-latest", + "lb_name": "chatgpt-4o-latest", + "lb_global": 0.5611744444444445, + "lb_reasoning": 0.44, + "lb_math": 0.5219166666666667, + "lb_language": 0.49954666666666664, + "lb_if": 0.7251675, + "lb_data_analysis": 0.5165 + }, + { + "name": "command-r-08-2024", + "lb_name": "command-r-08-2024", + "lb_global": 0.3263570588235294, + "lb_reasoning": 0.23875000000000002, + "lb_coding": 0.26102499999999995, + "lb_math": 0.18350666666666668, + "lb_language": 0.2793333333333334, + "lb_if": 0.5561674999999999, + "lb_data_analysis": 0.39766500000000005, + "arena_elo": 1250.21, + "arena_rank": 222, + "arena_votes": 10141 + }, + { + "name": "command-r-plus", + "lb_name": "command-r-plus", + "lb_global": 0.3371144444444445, + "lb_reasoning": 0.34, + "lb_math": 0.16761666666666666, + "lb_language": 0.23921666666666663, + "lb_if": 0.7150825, + "lb_data_analysis": 0.179, + "arena_elo": 1261.9, + "arena_rank": 215, + "arena_votes": 77556, + "aider_pass_rate": 0.218 + }, + { + "name": "command-r-plus-08-2024", + "lb_name": "command-r-plus-08-2024", + "lb_global": 0.35830529411764706, + "lb_reasoning": 0.23125, + "lb_coding": 0.27127999999999997, + "lb_math": 0.22815333333333332, + "lb_language": 0.3086066666666667, + "lb_if": 0.5761225, + "lb_data_analysis": 0.492345, + "arena_elo": 1276.4, + "arena_rank": 205, + "arena_votes": 9869 + }, + { + "name": "deepseek-coder-v2", + "lb_name": "deepseek-coder-v2", + "lb_global": 0.4683305555555555, + "lb_reasoning": 0.4, + "lb_math": 0.47114666666666666, + "lb_language": 0.33044, + "lb_if": 0.6718325, + "lb_data_analysis": 0.3438, + "arena_elo": 1264.19, + "arena_rank": 214, + "arena_votes": 15147 + }, + { + "name": "deepseek-coder-v2-lite-instruct", + "lb_name": "deepseek-coder-v2-lite-instruct", + "lb_global": 0.3083644444444444, + "lb_reasoning": 0.27, + "lb_math": 0.3443766666666666, + "lb_language": 0.10635333333333334, + "lb_if": 0.483375, + "lb_data_analysis": 0.305 + }, + { + "name": "deepseek-v2-lite-chat", + "lb_name": "deepseek-v2-lite-chat", + "lb_global": 0.19665277777777782, + "lb_reasoning": 0.16999999999999998, + "lb_math": 0.14076333333333332, + "lb_language": 0.09204333333333332, + "lb_if": 0.4182925, + "lb_data_analysis": 0.1028 + }, + { + "name": "dracarys-llama-3.1-70b-instruct", + "lb_name": "dracarys-llama-3.1-70b-instruct", + "lb_global": 0.5101333333333334, + "lb_reasoning": 0.33, + "lb_math": 0.45677666666666666, + "lb_language": 0.41767, + "lb_if": 0.77371, + "lb_data_analysis": 0.4698 + }, + { + "name": "gemini-1.5-flash-8b-exp-0827", + "lb_name": "gemini-1.5-flash-8b-exp-0827", + "lb_global": 0.3835955555555556, + "lb_reasoning": 0.32, + "lb_math": 0.27782666666666667, + "lb_language": 0.22520333333333334, + "lb_if": 0.69029, + "lb_data_analysis": 0.2798, + "aider_pass_rate": 0.316 + }, + { + "name": "gemini-1.5-flash-api-0514", + "lb_name": "gemini-1.5-flash-api-0514", + "lb_global": 0.4053527777777778, + "lb_reasoning": 0.33, + "lb_math": 0.32271, + "lb_language": 0.30692, + "lb_if": 0.6301275, + "lb_data_analysis": 0.3904 + }, + { + "name": "gemini-1.5-flash-exp-0827", + "lb_name": "gemini-1.5-flash-exp-0827", + "lb_global": 0.47727333333333327, + "lb_reasoning": 0.37, + "lb_math": 0.28917000000000004, + "lb_language": 0.31043, + "lb_if": 0.7810825, + "lb_data_analysis": 0.42800000000000005, + "aider_pass_rate": 0.406 + }, + { + "name": "gemini-1.5-pro-api-0514", + "lb_name": "gemini-1.5-pro-api-0514", + "lb_global": 0.4573888888888889, + "lb_reasoning": 0.32, + "lb_math": 0.36908666666666673, + "lb_language": 0.38251666666666667, + "lb_if": 0.67196, + "lb_data_analysis": 0.5221 + }, + { + "name": "gemini-1.5-pro-exp-0801", + "lb_name": "gemini-1.5-pro-exp-0801", + "lb_global": 0.5365077777777778, + "lb_reasoning": 0.37, + "lb_math": 0.43523666666666666, + "lb_language": 0.46957333333333334, + "lb_if": 0.7883725, + "lb_data_analysis": 0.47230000000000005 + }, + { + "name": "gemini-1.5-pro-exp-0827", + "lb_name": "gemini-1.5-pro-exp-0827", + "lb_global": 0.5569816666666667, + "lb_reasoning": 0.33999999999999997, + "lb_math": 0.5614933333333333, + "lb_language": 0.49313, + "lb_if": 0.7594575, + "lb_data_analysis": 0.49250000000000005, + "aider_pass_rate": 0.5489999999999999 + }, + { + "name": "gpt-4-0125-preview", + "lb_name": "gpt-4-0125-preview", + "lb_global": 0.48571777777777775, + "lb_reasoning": 0.45, + "lb_math": 0.33393, + "lb_language": 0.43553333333333333, + "lb_if": 0.6392074999999999, + "lb_data_analysis": 0.5308999999999999, + "arena_elo": 1313.13, + "arena_rank": 175, + "arena_votes": 93439, + "aider_pass_rate": 0.556 + }, + { + "name": "mathstral-7b-v0.1", + "lb_name": "mathstral-7b-v0.1", + "lb_global": 0.26630333333333334, + "lb_reasoning": 0.22999999999999998, + "lb_math": 0.17840999999999999, + "lb_language": 0.15367666666666668, + "lb_if": 0.5324575, + "lb_data_analysis": 0.1983 + }, + { + "name": "meta-llama-3.1-405b-instruct-turbo", + "lb_name": "meta-llama-3.1-405b-instruct-turbo", + "lb_global": 0.5015238888888889, + "lb_reasoning": 0.39875000000000005, + "lb_math": 0.4048766666666666, + "lb_language": 0.43584666666666666, + "lb_if": 0.7590425, + "lb_data_analysis": 0.5377 + }, + { + "name": "meta-llama-3.1-70b-instruct-turbo", + "lb_name": "meta-llama-3.1-70b-instruct-turbo", + "lb_global": 0.4325422222222222, + "lb_reasoning": 0.275, + "lb_math": 0.32536, + "lb_language": 0.3835333333333333, + "lb_if": 0.689835, + "lb_data_analysis": 0.5463 + }, + { + "name": "meta-llama-3.1-8b-instruct-turbo", + "lb_name": "meta-llama-3.1-8b-instruct-turbo", + "lb_global": 0.27390555555555557, + "lb_reasoning": 0.14, + "lb_math": 0.15078666666666665, + "lb_language": 0.21096, + "lb_if": 0.5490425, + "lb_data_analysis": 0.2823 + }, + { + "name": "mistral-large-2402", + "lb_name": "mistral-large-2402", + "lb_global": 0.41446666666666665, + "lb_reasoning": 0.39, + "lb_math": 0.32204, + "lb_language": 0.28744, + "lb_if": 0.6819175000000001, + "lb_data_analysis": 0.3983, + "arena_elo": 1242.41, + "arena_rank": 223, + "arena_votes": 62437 + }, + { + "name": "mistral-large-2407", + "lb_name": "mistral-large-2407", + "lb_global": 0.4987861111111111, + "lb_reasoning": 0.36, + "lb_math": 0.43693333333333334, + "lb_language": 0.3979266666666667, + "lb_if": 0.7184575000000001, + "lb_data_analysis": 0.42910000000000004, + "arena_elo": 1314, + "arena_rank": 174, + "arena_votes": 45460 + }, + { + "name": "mistral-small-2402", + "lb_name": "mistral-small-2402", + "lb_global": 0.3296894444444444, + "lb_reasoning": 0.24, + "lb_math": 0.18547333333333335, + "lb_language": 0.22055333333333335, + "lb_if": 0.6390849999999999, + "lb_data_analysis": 0.2582 + }, + { + "name": "open-mistral-nemo", + "lb_name": "open-mistral-nemo", + "lb_global": 0.29661333333333334, + "lb_reasoning": 0.28, + "lb_math": 0.16897333333333334, + "lb_language": 0.14149333333333333, + "lb_if": 0.5180425000000001, + "lb_data_analysis": 0.2703 + }, + { + "name": "qwen1.5-72b-chat", + "lb_name": "qwen1.5-72b-chat", + "lb_global": 0.3123105555555556, + "lb_reasoning": 0.31000000000000005, + "lb_math": 0.2682466666666667, + "lb_language": 0.11368333333333332, + "lb_if": 0.5824975, + "lb_data_analysis": 0.2347, + "arena_elo": 1233.14, + "arena_rank": 231, + "arena_votes": 39296 + }, + { + "name": "smaug-qwen2-72b-instruct", + "lb_name": "smaug-qwen2-72b-instruct", + "lb_global": 0.4081883333333333, + "lb_reasoning": 0.38, + "lb_math": 0.4066699999999999, + "lb_language": 0.3003, + "lb_if": 0.6500425000000001, + "lb_data_analysis": 0.1729 + }, + { + "name": "starling-lm-7b-beta", + "lb_name": "starling-lm-7b-beta", + "lb_global": 0.1769927777777778, + "lb_reasoning": 0.2, + "lb_math": 0.14862666666666666, + "lb_language": 0.07264333333333332, + "lb_if": 0.3832075, + "lb_data_analysis": 0.03, + "arena_elo": 1171.59, + "arena_rank": 265, + "arena_votes": 16057 + }, + { + "name": "vicuna-7b-v1.5-16k", + "lb_name": "vicuna-7b-v1.5-16k", + "lb_global": 0.16691333333333336, + "lb_reasoning": 0.18, + "lb_math": 0.09043000000000001, + "lb_language": 0.07916333333333332, + "lb_if": 0.42121000000000003, + "lb_data_analysis": 0.049 + }, + { + "name": "chatgpt-4o-latest-0903", + "lb_name": "chatgpt-4o-latest-0903", + "lb_global": 0.5547116666666667, + "lb_reasoning": 0.4, + "lb_math": 0.46740000000000004, + "lb_language": 0.5172033333333333, + "lb_if": 0.71746, + "lb_data_analysis": 0.49339999999999995 + }, + { + "name": "dracarys2-72b-instruct", + "lb_name": "dracarys2-72b-instruct", + "lb_global": 0.4962682352941177, + "lb_reasoning": 0.390625, + "lb_coding": 0.58726, + "lb_math": 0.5225033333333333, + "lb_language": 0.3305833333333333, + "lb_if": 0.6521674999999999, + "lb_data_analysis": 0.484765, + "aider_pass_rate": 0.556 + }, + { + "name": "dracarys2-llama-3.1-70b-instruct", + "lb_name": "dracarys2-llama-3.1-70b-instruct", + "lb_global": 0.47264352941176463, + "lb_reasoning": 0.35, + "lb_coding": 0.41136, + "lb_math": 0.4029933333333333, + "lb_language": 0.42367, + "lb_if": 0.6324175, + "lb_data_analysis": 0.55128 + }, + { + "name": "gemini-1.5-flash-002", + "lb_name": "gemini-1.5-flash-002", + "lb_global": 0.5194177777777776, + "lb_reasoning": 0.44, + "lb_math": 0.47234333333333334, + "lb_language": 0.29534, + "lb_if": 0.8454575, + "lb_data_analysis": 0.39359999999999995, + "arena_elo": 1309.7, + "arena_rank": 178, + "arena_votes": 34909 + }, + { + "name": "gemini-1.5-pro-002", + "lb_name": "gemini-1.5-pro-002", + "lb_global": 0.5654955555555556, + "lb_reasoning": 0.37, + "lb_math": 0.5740066666666667, + "lb_language": 0.4740566666666666, + "lb_if": 0.7774575, + "lb_data_analysis": 0.5145, + "arena_elo": 1351.24, + "arena_rank": 122, + "arena_votes": 55607, + "aider_pass_rate": 0.496 + }, + { + "name": "gemini-exp-1114", + "lb_name": "gemini-exp-1114", + "lb_global": 0.5836005555555556, + "lb_reasoning": 0.39, + "lb_math": 0.5492266666666666, + "lb_language": 0.44917, + "lb_if": 0.7744599999999999, + "lb_data_analysis": 0.5723, + "aider_pass_rate": 0.504 + }, + { + "name": "gemini-exp-1121", + "lb_name": "gemini-exp-1121", + "lb_global": 0.5925927777777776, + "lb_reasoning": 0.37, + "lb_math": 0.6275233333333333, + "lb_language": 0.4155933333333333, + "lb_if": 0.865335, + "lb_data_analysis": 0.5644, + "aider_pass_rate": 0.466 + }, + { + "name": "gpt-4o-2024-11-20", + "lb_name": "gpt-4o-2024-11-20", + "lb_global": 0.5312570588235295, + "lb_reasoning": 0.47624999999999995, + "lb_coding": 0.692895, + "lb_math": 0.4147766666666666, + "lb_language": 0.44683, + "lb_if": 0.6494175, + "lb_data_analysis": 0.635295, + "aider_pass_rate": 0.586 + }, + { + "name": "o1-mini-2024-09-12", + "lb_name": "o1-mini-2024-09-12", + "lb_global": 0.5478455555555555, + "lb_reasoning": 0.585, + "lb_math": 0.6025633333333333, + "lb_language": 0.44664333333333334, + "lb_if": 0.6539999999999999, + "lb_data_analysis": 0.5488, + "aider_pass_rate": 0.08900000000000001 + }, + { + "name": "o1-preview-2024-09-12", + "lb_name": "o1-preview-2024-09-12", + "lb_global": 0.6751361111111112, + "lb_reasoning": 0.54, + "lb_math": 0.62916, + "lb_language": 0.7266166666666667, + "lb_if": 0.77725, + "lb_data_analysis": 0.6196 + }, + { + "name": "qwen2.5-7b-instruct-turbo", + "lb_name": "qwen2.5-7b-instruct-turbo", + "lb_global": 0.34952, + "lb_reasoning": 0.22625, + "lb_coding": 0.34293, + "lb_math": 0.36813999999999997, + "lb_language": 0.18380333333333335, + "lb_if": 0.521085, + "lb_data_analysis": 0.42332000000000003 + }, + { + "name": "step-2-16k-202411", + "lb_name": "step-2-16k-202411", + "lb_global": 0.5487152941176472, + "lb_reasoning": 0.39249999999999996, + "lb_coding": 0.57578, + "lb_math": 0.43683333333333335, + "lb_language": 0.38405, + "lb_if": 0.7988325, + "lb_data_analysis": 0.623475 + }, + { + "name": "amazon.nova-lite-v1:0", + "lb_name": "amazon.nova-lite-v1:0", + "lb_global": 0.3951647058823529, + "lb_reasoning": 0.28, + "lb_coding": 0.4504, + "lb_math": 0.34616, + "lb_language": 0.27619666666666665, + "lb_if": 0.5412925, + "lb_data_analysis": 0.41237999999999997 + }, + { + "name": "amazon.nova-micro-v1:0", + "lb_name": "amazon.nova-micro-v1:0", + "lb_global": 0.3434476470588235, + "lb_reasoning": 0.24625, + "lb_coding": 0.28919, + "lb_math": 0.34147333333333335, + "lb_language": 0.24191666666666664, + "lb_if": 0.4804175, + "lb_data_analysis": 0.41294499999999995 + }, + { + "name": "amazon.nova-pro-v1:0", + "lb_name": "amazon.nova-pro-v1:0", + "lb_global": 0.45361294117647044, + "lb_reasoning": 0.24875000000000003, + "lb_coding": 0.49648000000000003, + "lb_math": 0.37695999999999996, + "lb_language": 0.38935000000000003, + "lb_if": 0.6712899999999999, + "lb_data_analysis": 0.44343499999999997 + }, + { + "name": "chatgpt-4o-latest-2025-01-29", + "lb_name": "chatgpt-4o-latest-2025-01-29", + "lb_global": 0.5803555555555556, + "lb_reasoning": 0.44875, + "lb_math": 0.4801666666666667, + "lb_language": 0.4914266666666667, + "lb_if": 0.65071, + "lb_data_analysis": 0.69 + }, + { + "name": "claude-3-7-sonnet", + "lb_name": "claude-3-7-sonnet-thinking", + "lb_global": 0.7647433333333334, + "lb_reasoning": 0.8174999999999999, + "lb_math": 0.78999, + "lb_language": 0.5992933333333333, + "lb_if": 0.81254, + "lb_data_analysis": 0.7807999999999999 + }, + { + "name": "gemini-2.0-flash-exp", + "lb_name": "gemini-2.0-flash-exp", + "lb_global": 0.6079205555555556, + "lb_reasoning": 0.48625, + "lb_math": 0.6039100000000001, + "lb_language": 0.38215333333333334, + "lb_if": 0.8186249999999999, + "lb_data_analysis": 0.6451, + "aider_pass_rate": 0.564 + }, + { + "name": "gemini-2.0-flash-lite-preview-02-05", + "lb_name": "gemini-2.0-flash-lite-preview-02-05", + "lb_global": 0.5515844444444444, + "lb_reasoning": 0.41125, + "lb_math": 0.55543, + "lb_language": 0.3427633333333333, + "lb_if": 0.782835, + "lb_data_analysis": 0.5621, + "arena_elo": 1353.28, + "arena_rank": 119, + "arena_votes": 24951 + }, + { + "name": "gemini-2.0-flash-thinking-exp-01-21", + "lb_name": "gemini-2.0-flash-thinking-exp-01-21", + "lb_global": 0.6464572222222222, + "lb_reasoning": 0.6925, + "lb_math": 0.7481066666666667, + "lb_language": 0.4842666666666666, + "lb_if": 0.824705, + "lb_data_analysis": 0.7605 + }, + { + "name": "gemini-2.0-pro-exp-02-05", + "lb_name": "gemini-2.0-pro-exp-02-05", + "lb_global": 0.6425611111111112, + "lb_reasoning": 0.48125, + "lb_math": 0.6853566666666667, + "lb_language": 0.5249766666666666, + "lb_if": 0.8338325, + "lb_data_analysis": 0.7103 + }, + { + "name": "gemini-exp-1206", + "lb_name": "gemini-exp-1206", + "lb_global": 0.6486677777777777, + "lb_reasoning": 0.47500000000000003, + "lb_math": 0.72358, + "lb_language": 0.5129233333333333, + "lb_if": 0.773375, + "lb_data_analysis": 0.6174 + }, + { + "name": "grok-2-1212", + "lb_name": "grok-2-1212", + "lb_global": 0.5052638888888888, + "lb_reasoning": 0.3925, + "lb_math": 0.5593566666666666, + "lb_language": 0.45791666666666675, + "lb_if": 0.69625, + "lb_data_analysis": 0.5367 + }, + { + "name": "grok-beta", + "lb_name": "grok-beta", + "lb_global": 0.5053522222222222, + "lb_reasoning": 0.30500000000000005, + "lb_math": 0.45842333333333335, + "lb_language": 0.43165000000000003, + "lb_if": 0.69621, + "lb_data_analysis": 0.5441 + }, + { + "name": "learnlm-1.5-pro-experimental", + "lb_name": "learnlm-1.5-pro-experimental", + "lb_global": 0.5043411764705883, + "lb_reasoning": 0.33125000000000004, + "lb_coding": 0.589255, + "lb_math": 0.5670833333333333, + "lb_language": 0.3786433333333334, + "lb_if": 0.6815825, + "lb_data_analysis": 0.392975 + }, + { + "name": "llama-3.3-70b-instruct-turbo", + "lb_name": "llama-3.3-70b-instruct-turbo", + "lb_global": 0.511525294117647, + "lb_reasoning": 0.35125, + "lb_coding": 0.51822, + "lb_math": 0.41403999999999996, + "lb_language": 0.4396566666666666, + "lb_if": 0.8267074999999999, + "lb_data_analysis": 0.40787 + }, + { + "name": "mistral-large-2411", + "lb_name": "mistral-large-2411", + "lb_global": 0.503149411764706, + "lb_reasoning": 0.3325, + "lb_coding": 0.6289, + "lb_math": 0.42202333333333336, + "lb_language": 0.40453333333333336, + "lb_if": 0.67929, + "lb_data_analysis": 0.541955, + "arena_elo": 1305.07, + "arena_rank": 184, + "arena_votes": 28081, + "aider_pass_rate": 0.466 + }, + { + "name": "mistral-small-2501", + "lb_name": "mistral-small-2501", + "lb_global": 0.4389700000000001, + "lb_reasoning": 0.29625, + "lb_math": 0.39886666666666665, + "lb_language": 0.3046466666666667, + "lb_if": 0.5954174999999999, + "lb_data_analysis": 0.5353 + }, + { + "name": "o1-2024-12-17-high", + "lb_name": "o1-2024-12-17-high", + "lb_global": 0.7353261111111111, + "lb_reasoning": 0.87375, + "lb_math": 0.7928099999999999, + "lb_language": 0.7215266666666666, + "lb_if": 0.8154575, + "lb_data_analysis": 0.632 + }, + { + "name": "o3-mini-2025-01-31-high", + "lb_name": "o3-mini-2025-01-31-high", + "lb_global": 0.7242327777777777, + "lb_reasoning": 0.84375, + "lb_math": 0.7655433333333334, + "lb_language": 0.5685533333333334, + "lb_if": 0.843585, + "lb_data_analysis": 0.7196 + }, + { + "name": "o3-mini-2025-01-31-low", + "lb_name": "o3-mini-2025-01-31-low", + "lb_global": 0.6138683333333333, + "lb_reasoning": 0.6475, + "lb_math": 0.6167466666666667, + "lb_language": 0.48070666666666667, + "lb_if": 0.800625, + "lb_data_analysis": 0.6306 + }, + { + "name": "o3-mini-2025-01-31-medium", + "lb_name": "o3-mini-2025-01-31-medium", + "lb_global": 0.6853044444444445, + "lb_reasoning": 0.805, + "lb_math": 0.7167533333333332, + "lb_language": 0.5411733333333334, + "lb_if": 0.8315825000000001, + "lb_data_analysis": 0.6784 + }, + { + "name": "olmo-2-1124-13b-instruct", + "lb_name": "olmo-2-1124-13b-instruct", + "lb_global": 0.24903055555555545, + "lb_reasoning": 0.195, + "lb_math": 0.13641333333333336, + "lb_language": 0.11159000000000001, + "lb_if": 0.6055825, + "lb_data_analysis": 0.099 + }, + { + "name": "perplexity-sonar", + "lb_name": "perplexity-sonar", + "lb_global": 0.49163333333333337, + "lb_reasoning": 0.37375, + "lb_math": 0.41626, + "lb_language": 0.44117, + "lb_if": 0.7623325, + "lb_data_analysis": 0.3086 + }, + { + "name": "perplexity-sonar-pro", + "lb_name": "perplexity-sonar-pro", + "lb_global": 0.6049583333333334, + "lb_reasoning": 0.42375, + "lb_math": 0.5543733333333333, + "lb_language": 0.6904633333333332, + "lb_if": 0.64242, + "lb_data_analysis": 0.5974999999999999 + }, + { + "name": "qwen2.5-72b-instruct-turbo", + "lb_name": "qwen2.5-72b-instruct-turbo", + "lb_global": 0.49392705882352944, + "lb_reasoning": 0.35125, + "lb_coding": 0.572565, + "lb_math": 0.5187700000000001, + "lb_language": 0.36465666666666663, + "lb_if": 0.6439174999999999, + "lb_data_analysis": 0.50159 + }, + { + "name": "qwen2.5-max", + "lb_name": "qwen2.5-max", + "lb_global": 0.6028288235294117, + "lb_reasoning": 0.43125, + "lb_coding": 0.66794, + "lb_math": 0.5686833333333333, + "lb_language": 0.5836933333333333, + "lb_if": 0.7534574999999999, + "lb_data_analysis": 0.64271, + "arena_elo": 1374.18, + "arena_rank": 102, + "arena_votes": 33204 + }, + { + "name": "gpt-4.5-preview", + "lb_name": "gpt-4.5-preview", + "lb_global": 0.6878949999999999, + "lb_reasoning": 0.59625, + "lb_math": 0.6932900000000001, + "lb_language": 0.61445, + "lb_if": 0.72325, + "lb_data_analysis": 0.6749 + }, + { + "name": "gemini-2.0-flash-lite", + "lb_name": "gemini-2.0-flash-lite", + "lb_global": 0.5593472222222222, + "lb_reasoning": 0.36375, + "lb_math": 0.5809166666666666, + "lb_language": 0.33596333333333334, + "lb_if": 0.7662925, + "lb_data_analysis": 0.6617 + }, + { + "name": "mistral-small-2503", + "lb_name": "mistral-small-2503", + "lb_global": 0.46377117647058824, + "lb_reasoning": 0.38125, + "lb_coding": 0.49648000000000003, + "lb_math": 0.38392333333333334, + "lb_language": 0.34586000000000006, + "lb_if": 0.6366275000000001, + "lb_data_analysis": 0.521395 + }, + { + "name": "gemma-3-27b-it", + "lb_name": "gemma-3-27b-it", + "lb_global": 0.5053470588235295, + "lb_reasoning": 0.32625, + "lb_coding": 0.489435, + "lb_math": 0.52267, + "lb_language": 0.41314333333333336, + "lb_if": 0.7490399999999999, + "lb_data_analysis": 0.387965, + "arena_elo": 1365.18, + "arena_rank": 110, + "arena_votes": 48453 + }, + { + "name": "gemini-2.5-pro-exp-03-25", + "lb_name": "gemini-2.5-pro-exp-03-25", + "lb_global": 0.786795, + "lb_reasoning": 0.85625, + "lb_math": 0.8915666666666667, + "lb_language": 0.6931400000000001, + "lb_if": 0.8059149999999999, + "lb_data_analysis": 0.8184 + }, + { + "name": "deepseek-v3-0324", + "lb_name": "deepseek-v3-0324", + "lb_global": 0.6349094117647057, + "lb_reasoning": 0.4875, + "lb_coding": 0.6890700000000001, + "lb_math": 0.7143666666666667, + "lb_language": 0.4682266666666666, + "lb_if": 0.8147075, + "lb_data_analysis": 0.64019, + "arena_elo": 1394.16, + "arena_rank": 79, + "arena_votes": 46431 + }, + { + "name": "chatgpt-4o-latest-2025-03-27", + "lb_name": "chatgpt-4o-latest-2025-03-27", + "lb_global": 0.610315294117647, + "lb_reasoning": 0.5237499999999999, + "lb_coding": 0.7748, + "lb_math": 0.55717, + "lb_language": 0.49428333333333335, + "lb_if": 0.71921, + "lb_data_analysis": 0.665195 + }, + { + "name": "azerogpt", + "lb_name": "azerogpt", + "lb_global": 0.3511294444444445, + "lb_reasoning": 0.24875000000000003, + "lb_math": 0.31808333333333333, + "lb_language": 0.3073133333333333, + "lb_if": 0.5930825, + "lb_data_analysis": 0.2693 + }, + { + "name": "hunyuan-turbos-20250313", + "lb_name": "hunyuan-turbos-20250313", + "lb_global": 0.5244894117647059, + "lb_reasoning": 0.35, + "lb_coding": 0.50352, + "lb_math": 0.5746633333333334, + "lb_language": 0.34457999999999994, + "lb_if": 0.76129, + "lb_data_analysis": 0.47986000000000006 + }, + { + "name": "claude-3-7-sonnet-20250219", + "lb_name": "claude-3-7-sonnet-20250219-base", + "lb_global": 0.6044569999999999, + "lb_reasoning": 0.52, + "lb_coding": 0.504286, + "lb_math": 0.6465366666666666, + "lb_language": 0.6319400000000001, + "lb_if": 0.764915, + "lb_data_analysis": 0.599645, + "arena_elo": 1371.24, + "arena_rank": 105, + "arena_votes": 44275 + }, + { + "name": "command-a-03-2025", + "lb_name": "command-a-03-2025", + "lb_global": 0.4524654999999999, + "lb_reasoning": 0.375, + "lb_coding": 0.241372, + "lb_math": 0.4554233333333333, + "lb_language": 0.36696, + "lb_if": 0.82904, + "lb_data_analysis": 0.48457000000000006, + "arena_elo": 1353.06, + "arena_rank": 121, + "arena_votes": 57098 + }, + { + "name": "gemini-1.5-flash-8b-001", + "lb_name": "gemini-1.5-flash-8b-001", + "lb_global": 0.3732377777777778, + "lb_reasoning": 0.20875, + "lb_math": 0.32167666666666667, + "lb_language": 0.22867666666666667, + "lb_if": 0.6971649999999999, + "lb_data_analysis": 0.4241, + "arena_elo": 1258.67, + "arena_rank": 219, + "arena_votes": 35556 + }, + { + "name": "gemini-2.0-flash-001", + "lb_name": "gemini-2.0-flash-001", + "lb_global": 0.5820266666666667, + "lb_reasoning": 0.45875, + "lb_math": 0.6318866666666666, + "lb_language": 0.42386999999999997, + "lb_if": 0.8578749999999999, + "lb_data_analysis": 0.7332000000000001, + "arena_elo": 1360.78, + "arena_rank": 113, + "arena_votes": 44686 + }, + { + "name": "gemini-2.0-flash-lite-001", + "lb_name": "gemini-2.0-flash-lite-001", + "lb_global": 0.5408094117647059, + "lb_reasoning": 0.36375, + "lb_coding": 0.59308, + "lb_math": 0.5496666666666666, + "lb_language": 0.33941, + "lb_if": 0.7662925, + "lb_data_analysis": 0.65385 + }, + { + "name": "gemma-3-12b-it", + "lb_name": "gemma-3-12b-it", + "lb_global": 0.4429183333333333, + "lb_reasoning": 0.23249999999999998, + "lb_math": 0.4814133333333333, + "lb_language": 0.3126966666666667, + "lb_if": 0.7382925, + "lb_data_analysis": 0.4284, + "arena_elo": 1341.62, + "arena_rank": 136, + "arena_votes": 3829 + }, + { + "name": "gemma-3-4b-it", + "lb_name": "gemma-3-4b-it", + "lb_global": 0.33008388888888895, + "lb_reasoning": 0.165, + "lb_math": 0.31326, + "lb_language": 0.15059333333333333, + "lb_if": 0.6358325, + "lb_data_analysis": 0.36950000000000005, + "arena_elo": 1303.26, + "arena_rank": 187, + "arena_votes": 4177 + }, + { + "name": "gpt-4.5-preview-2025-02-27", + "lb_name": "gpt-4.5-preview-2025-02-27", + "lb_global": 0.6605464705882353, + "lb_reasoning": 0.59625, + "lb_coding": 0.760715, + "lb_math": 0.6794033333333332, + "lb_language": 0.6475866666666666, + "lb_if": 0.72325, + "lb_data_analysis": 0.600695, + "arena_elo": 1444.26, + "arena_rank": 26, + "arena_votes": 14549 + }, + { + "name": "llama-4-maverick-17b-128e-instruct", + "lb_name": "llama-4-maverick-17b-128e-instruct", + "lb_global": 0.5650666666666666, + "lb_reasoning": 0.4575, + "lb_math": 0.6057899999999999, + "lb_language": 0.4964766666666667, + "lb_if": 0.7574575, + "lb_data_analysis": 0.6155, + "arena_elo": 1327.62, + "arena_rank": 153, + "arena_votes": 40932 + }, + { + "name": "grok-3-beta", + "lb_name": "grok-3-beta", + "lb_global": 0.6427005882352941, + "lb_reasoning": 0.55125, + "lb_coding": 0.73576, + "lb_math": 0.6275233333333333, + "lb_language": 0.5379666666666667, + "lb_if": 0.8473775, + "lb_data_analysis": 0.55629 + }, + { + "name": "grok-3-mini-beta-high", + "lb_name": "grok-3-mini-beta-high", + "lb_global": 0.7200670588235294, + "lb_reasoning": 0.8475, + "lb_coding": 0.5451600000000001, + "lb_math": 0.7700466666666667, + "lb_language": 0.5908733333333333, + "lb_if": 0.7870425, + "lb_data_analysis": 0.6457799999999999 + }, + { + "name": "gpt-4.1-2025-04-14", + "lb_name": "gpt-4.1-2025-04-14", + "lb_global": 0.63021, + "lb_reasoning": 0.4425, + "lb_coding": 0.731935, + "lb_math": 0.6238633333333333, + "lb_language": 0.54551, + "lb_if": 0.7704575, + "lb_data_analysis": 0.66404, + "arena_elo": 1413.4, + "arena_rank": 59, + "arena_votes": 51837 + }, + { + "name": "gpt-4.1-mini-2025-04-14", + "lb_name": "gpt-4.1-mini-2025-04-14", + "lb_global": 0.588105294117647, + "lb_reasoning": 0.54, + "lb_coding": 0.721065, + "lb_math": 0.5877866666666667, + "lb_language": 0.37996, + "lb_if": 0.7030825, + "lb_data_analysis": 0.6133799999999999, + "arena_elo": 1381.85, + "arena_rank": 96, + "arena_votes": 40313 + }, + { + "name": "gpt-4.1-nano-2025-04-14", + "lb_name": "gpt-4.1-nano-2025-04-14", + "lb_global": 0.46059294117647065, + "lb_reasoning": 0.36375, + "lb_coding": 0.632115, + "lb_math": 0.42391, + "lb_language": 0.30958, + "lb_if": 0.5753725, + "lb_data_analysis": 0.498195, + "arena_elo": 1321.71, + "arena_rank": 163, + "arena_votes": 6107 + }, + { + "name": "o3-2025-04-16-high", + "lb_name": "o3-2025-04-16-high", + "lb_global": 0.8206864705882352, + "lb_reasoning": 0.935, + "lb_coding": 0.767145, + "lb_math": 0.8500366666666667, + "lb_language": 0.7599600000000001, + "lb_if": 0.8617475, + "lb_data_analysis": 0.6702 + }, + { + "name": "o4-mini-2025-04-16-high", + "lb_name": "o4-mini-2025-04-16-high", + "lb_global": 0.7962482352941177, + "lb_reasoning": 0.915, + "lb_coding": 0.799755, + "lb_math": 0.8489533333333333, + "lb_language": 0.6605466666666667, + "lb_if": 0.8495824999999999, + "lb_data_analysis": 0.6832750000000001 + }, + { + "name": "o4-mini-2025-04-16-medium", + "lb_name": "o4-mini-2025-04-16-medium", + "lb_global": 0.7519911764705883, + "lb_reasoning": 0.83875, + "lb_coding": 0.74219, + "lb_math": 0.8102, + "lb_language": 0.62409, + "lb_if": 0.81825, + "lb_data_analysis": 0.684715 + }, + { + "name": "o3-2025-04-16-medium", + "lb_name": "o3-2025-04-16-medium", + "lb_global": 0.8028294117647059, + "lb_reasoning": 0.94, + "lb_coding": 0.7786299999999999, + "lb_math": 0.8065733333333333, + "lb_language": 0.7348133333333333, + "lb_if": 0.8432074999999999, + "lb_data_analysis": 0.681925 + }, + { + "name": "gemini-2.5-flash-preview-04-17", + "lb_name": "gemini-2.5-flash-preview-04-17", + "lb_global": 0.7128994117647058, + "lb_reasoning": 0.73375, + "lb_coding": 0.60334, + "lb_math": 0.8180166666666667, + "lb_language": 0.5943233333333334, + "lb_if": 0.7902075, + "lb_data_analysis": 0.6552950000000001 + }, + { + "name": "claude-4-opus-20250514", + "lb_name": "claude-4-opus-20250514-base", + "lb_global": 0.7213676470588235, + "lb_reasoning": 0.605, + "lb_coding": 0.72872, + "lb_math": 0.7879033333333333, + "lb_language": 0.7611366666666667, + "lb_if": 0.78379, + "lb_data_analysis": 0.6651 + }, + { + "name": "claude-4-sonnet-20250514", + "lb_name": "claude-4-sonnet-20250514-base", + "lb_global": 0.4837226086956521, + "lb_reasoning": 0.39672999999999997, + "lb_coding": 0.552964, + "lb_math": 0.6035725, + "lb_language": 0.71014, + "lb_if": 0.22679, + "lb_data_analysis": 0.44066999999999995 + }, + { + "name": "deepseek-r1-0528", + "lb_name": "deepseek-r1-0528", + "lb_global": 0.6963865, + "lb_reasoning": 0.88625, + "lb_coding": 0.4127739999999999, + "lb_math": 0.8525833333333334, + "lb_language": 0.6482266666666666, + "lb_if": 0.79954, + "lb_data_analysis": 0.7153849999999999, + "arena_elo": 1419.15, + "arena_rank": 48, + "arena_votes": 19177 + }, + { + "name": "gemini-2.5-flash-preview-05-20", + "lb_name": "gemini-2.5-flash-preview-05-20", + "lb_global": 0.7309376470588234, + "lb_reasoning": 0.76125, + "lb_coding": 0.628295, + "lb_math": 0.8410333333333333, + "lb_language": 0.5703866666666667, + "lb_if": 0.7955825, + "lb_data_analysis": 0.698465 + }, + { + "name": "gemini-2.5-pro-preview-03-25", + "lb_name": "gemini-2.5-pro-preview-03-25", + "lb_global": 0.7808658823529412, + "lb_reasoning": 0.85625, + "lb_coding": 0.71081, + "lb_math": 0.8915666666666667, + "lb_language": 0.6931400000000001, + "lb_if": 0.8059149999999999, + "lb_data_analysis": 0.6247449999999999 + }, + { + "name": "gemini-2.5-pro-preview-05-06", + "lb_name": "gemini-2.5-pro-preview-05-06", + "lb_global": 0.8020735294117648, + "lb_reasoning": 0.86875, + "lb_coding": 0.72872, + "lb_math": 0.8862833333333334, + "lb_language": 0.7181133333333335, + "lb_if": 0.8350425, + "lb_data_analysis": 0.688475 + }, + { + "name": "learnlm-2.0-flash-experimental", + "lb_name": "learnlm-2.0-flash-experimental", + "lb_global": 0.5876311764705882, + "lb_reasoning": 0.4175, + "lb_coding": 0.642985, + "lb_math": 0.6110166666666667, + "lb_language": 0.43344000000000005, + "lb_if": 0.8375849999999999, + "lb_data_analysis": 0.5141899999999999 + }, + { + "name": "llama4-maverick-instruct-basic", + "lb_name": "llama4-maverick-instruct-basic", + "lb_global": 0.5692811764705882, + "lb_reasoning": 0.4575, + "lb_coding": 0.541945, + "lb_math": 0.6057899999999999, + "lb_language": 0.4964766666666667, + "lb_if": 0.7574575, + "lb_data_analysis": 0.47113 + }, + { + "name": "mistral-medium-2505", + "lb_name": "mistral-medium-2505", + "lb_global": 0.501658, + "lb_reasoning": 0.44625, + "lb_coding": 0.31590799999999997, + "lb_math": 0.59742, + "lb_language": 0.44743666666666665, + "lb_if": 0.7139575, + "lb_data_analysis": 0.602025, + "arena_elo": 1384.53, + "arena_rank": 93, + "arena_votes": 34386 + }, + { + "name": "phi-4-reasoning-plus", + "lb_name": "phi-4-reasoning-plus", + "lb_global": 0.574950588235294, + "lb_reasoning": 0.6125, + "lb_coding": 0.6059399999999999, + "lb_math": 0.6283266666666666, + "lb_language": 0.3069233333333334, + "lb_if": 0.731665, + "lb_data_analysis": 0.5474349999999999 + }, + { + "name": "qwen3-14b", + "lb_name": "qwen3-14b-thinking", + "lb_global": 0.6958435294117649, + "lb_reasoning": 0.7462500000000001, + "lb_coding": 0.5816, + "lb_math": 0.7350333333333333, + "lb_language": 0.5313133333333333, + "lb_if": 0.8233775000000001, + "lb_data_analysis": 0.68221 + }, + { + "name": "qwen3-235b-a22b", + "lb_name": "qwen3-235b-a22b-thinking", + "lb_global": 0.641824, + "lb_reasoning": 0.8275, + "lb_coding": 0.27999399999999997, + "lb_math": 0.8015266666666667, + "lb_language": 0.60609, + "lb_if": 0.8772925, + "lb_data_analysis": 0.68308, + "arena_elo": 1374.77, + "arena_rank": 100, + "arena_votes": 27019 + }, + { + "name": "qwen3-30b-a3b", + "lb_name": "qwen3-30b-a3b-thinking", + "lb_global": 0.3884617391304348, + "lb_reasoning": 0.36678, + "lb_coding": 0.20553, + "lb_math": 0.6534725, + "lb_language": 0.54465, + "lb_if": 0.21108249999999998, + "lb_data_analysis": 0.44922666666666666, + "arena_elo": 1328.13, + "arena_rank": 152, + "arena_votes": 27282 + }, + { + "name": "qwen3-32b", + "lb_name": "qwen3-32b-thinking", + "lb_global": 0.4270286956521739, + "lb_reasoning": 0.4825475, + "lb_coding": 0.28411400000000003, + "lb_math": 0.6744, + "lb_language": 0.5554233333333333, + "lb_if": 0.1777075, + "lb_data_analysis": 0.4654, + "arena_elo": 1347, + "arena_rank": 128, + "arena_votes": 3932 + }, + { + "name": "claude-4-1-opus-20250805", + "lb_name": "claude-4-1-opus-20250805-base", + "lb_global": 0.5204682608695652, + "lb_reasoning": 0.4089425, + "lb_coding": 0.624286, + "lb_math": 0.6282625000000001, + "lb_language": 0.7675233333333332, + "lb_if": 0.2591675, + "lb_data_analysis": 0.45376 + }, + { + "name": "claude-sonnet-4-5-20250929", + "lb_name": "claude-sonnet-4-5-20250929", + "lb_global": 0.5129947826086956, + "lb_reasoning": 0.4229325, + "lb_coding": 0.594286, + "lb_math": 0.6261975, + "lb_language": 0.7599833333333333, + "lb_if": 0.2352075, + "lb_data_analysis": 0.47005, + "arena_elo": 1449.59, + "arena_rank": 21, + "arena_votes": 46720 + }, + { + "name": "deepseek-v3.1-terminus", + "lb_name": "deepseek-v3.1-terminus", + "lb_global": 0.6519375000000001, + "lb_reasoning": 0.73375, + "lb_coding": 0.428444, + "lb_math": 0.8069266666666667, + "lb_language": 0.63882, + "lb_if": 0.8189575, + "lb_data_analysis": 0.67298, + "arena_elo": 1416.1, + "arena_rank": 54, + "arena_votes": 3743 + }, + { + "name": "gemini-2.5-flash-06-05", + "lb_name": "gemini-2.5-flash-06-05-highthinking", + "lb_global": 0.46885739130434795, + "lb_reasoning": 0.44639500000000004, + "lb_coding": 0.364114, + "lb_math": 0.6875425, + "lb_language": 0.6227466666666667, + "lb_if": 0.28496, + "lb_data_analysis": 0.4731066666666666 + }, + { + "name": "gemini-2.5-flash-lite", + "lb_name": "gemini-2.5-flash-lite-highthinking", + "lb_global": 0.41509304347826087, + "lb_reasoning": 0.4334125, + "lb_coding": 0.295646, + "lb_math": 0.610405, + "lb_language": 0.5198033333333333, + "lb_if": 0.23075, + "lb_data_analysis": 0.47041 + }, + { + "name": "gemini-2.5-flash-lite-preview-09-2025", + "lb_name": "gemini-2.5-flash-lite-preview-09-2025-highthinking", + "lb_global": 0.41473391304347823, + "lb_reasoning": 0.3615875, + "lb_coding": 0.27154200000000006, + "lb_math": 0.64903, + "lb_language": 0.52601, + "lb_if": 0.28108, + "lb_data_analysis": 0.47878333333333334 + }, + { + "name": "gemini-2.5-flash-preview-09-2025", + "lb_name": "gemini-2.5-flash-preview-09-2025-highthinking", + "lb_global": 0.5225665217391304, + "lb_reasoning": 0.51452, + "lb_coding": 0.4099939999999999, + "lb_math": 0.75351, + "lb_language": 0.6534300000000001, + "lb_if": 0.2767925, + "lb_data_analysis": 0.6098266666666666, + "arena_elo": 1404.26, + "arena_rank": 67, + "arena_votes": 32541 + }, + { + "name": "gemini-2.5-pro-06-05", + "lb_name": "gemini-2.5-pro-06-05-highthinking", + "lb_global": 0.5745526086956521, + "lb_reasoning": 0.7080775, + "lb_coding": 0.5027560000000001, + "lb_math": 0.6831575, + "lb_language": 0.7549566666666667, + "lb_if": 0.3306675, + "lb_data_analysis": 0.51615 + }, + { + "name": "glm-4.5", + "lb_name": "glm-4.5", + "lb_global": 0.6590484999999998, + "lb_reasoning": 0.7075, + "lb_coding": 0.43850000000000006, + "lb_math": 0.82083, + "lb_language": 0.6162266666666666, + "lb_if": 0.8157925, + "lb_data_analysis": 0.6628999999999999, + "arena_elo": 1410.09, + "arena_rank": 64, + "arena_votes": 24600 + }, + { + "name": "glm-4.5-air", + "lb_name": "glm-4.5-air", + "lb_global": 0.6118615, + "lb_reasoning": 0.82125, + "lb_coding": 0.34109, + "lb_math": 0.7937233333333333, + "lb_language": 0.44289666666666666, + "lb_if": 0.7883775000000001, + "lb_data_analysis": 0.65962, + "arena_elo": 1371.75, + "arena_rank": 104, + "arena_votes": 31154 + }, + { + "name": "glm-4.6", + "lb_name": "glm-4.6", + "lb_global": 0.5466830434782609, + "lb_reasoning": 0.620625, + "lb_coding": 0.494078, + "lb_math": 0.81128, + "lb_language": 0.5898633333333333, + "lb_if": 0.26192, + "lb_data_analysis": 0.5194766666666667, + "arena_elo": 1425.04, + "arena_rank": 39, + "arena_votes": 35128 + }, + { + "name": "gpt-5", + "lb_name": "gpt-5", + "lb_global": 0.7986840000000001, + "lb_reasoning": 0.94875, + "lb_coding": 0.6001839999999999, + "lb_math": 0.8995433333333334, + "lb_language": 0.7898766666666667, + "lb_if": 0.889875, + "lb_data_analysis": 0.72375 + }, + { + "name": "gpt-5-codex", + "lb_name": "gpt-5-codex", + "lb_global": 0.795967, + "lb_reasoning": 0.98, + "lb_coding": 0.5684440000000001, + "lb_math": 0.9274400000000002, + "lb_language": 0.79323, + "lb_if": 0.887335, + "lb_data_analysis": 0.702885 + }, + { + "name": "gpt-5-high", + "lb_name": "gpt-5-high", + "lb_global": 0.8026055000000001, + "lb_reasoning": 0.9724999999999999, + "lb_coding": 0.58839, + "lb_math": 0.92772, + "lb_language": 0.8082699999999999, + "lb_if": 0.8811249999999999, + "lb_data_analysis": 0.716345, + "arena_elo": 1434.12, + "arena_rank": 34, + "arena_votes": 32346 + }, + { + "name": "gpt-5-low", + "lb_name": "gpt-5-low", + "lb_global": 0.7562915000000001, + "lb_reasoning": 0.87875, + "lb_coding": 0.507122, + "lb_math": 0.8533066666666667, + "lb_language": 0.7873466666666666, + "lb_if": 0.8899175, + "lb_data_analysis": 0.69721 + }, + { + "name": "gpt-5-mini", + "lb_name": "gpt-5-mini", + "lb_global": 0.6097334782608695, + "lb_reasoning": 0.5865375, + "lb_coding": 0.514286, + "lb_math": 0.7437775, + "lb_language": 0.6914566666666667, + "lb_if": 0.642165, + "lb_data_analysis": 0.49605000000000005 + }, + { + "name": "gpt-5-mini-high", + "lb_name": "gpt-5-mini-high", + "lb_global": 0.6659678260869565, + "lb_reasoning": 0.68322, + "lb_coding": 0.55281, + "lb_math": 0.822005, + "lb_language": 0.7552066666666667, + "lb_if": 0.65271, + "lb_data_analysis": 0.55195, + "arena_elo": 1390.38, + "arena_rank": 84, + "arena_votes": 26941 + }, + { + "name": "gpt-5-mini-low", + "lb_name": "gpt-5-mini-low", + "lb_global": 0.5237830434782609, + "lb_reasoning": 0.45899, + "lb_coding": 0.49819800000000003, + "lb_math": 0.6323924999999999, + "lb_language": 0.6041233333333333, + "lb_if": 0.5071249999999999, + "lb_data_analysis": 0.44987333333333335 + }, + { + "name": "gpt-5-mini-minimal", + "lb_name": "gpt-5-mini-minimal", + "lb_global": 0.37732391304347834, + "lb_reasoning": 0.32298, + "lb_coding": 0.41279199999999994, + "lb_math": 0.475305, + "lb_language": 0.4478866666666666, + "lb_if": 0.208125, + "lb_data_analysis": 0.41506333333333334 + }, + { + "name": "gpt-5-minimal", + "lb_name": "gpt-5-minimal", + "lb_global": 0.577995, + "lb_reasoning": 0.5762499999999999, + "lb_coding": 0.450202, + "lb_math": 0.5898066666666666, + "lb_language": 0.5101600000000001, + "lb_if": 0.7686249999999999, + "lb_data_analysis": 0.6443300000000001 + }, + { + "name": "gpt-5-nano", + "lb_name": "gpt-5-nano", + "lb_global": 0.4801895652173912, + "lb_reasoning": 0.35452000000000006, + "lb_coding": 0.439504, + "lb_math": 0.6469925, + "lb_language": 0.47727666666666674, + "lb_if": 0.519835, + "lb_data_analysis": 0.4432066666666667 + }, + { + "name": "gpt-5-nano-high", + "lb_name": "gpt-5-nano-high", + "lb_global": 0.4883004347826087, + "lb_reasoning": 0.40288500000000005, + "lb_coding": 0.38954200000000005, + "lb_math": 0.6840625, + "lb_language": 0.46841666666666665, + "lb_if": 0.5569975, + "lb_data_analysis": 0.4340566666666667, + "arena_elo": 1337.83, + "arena_rank": 139, + "arena_votes": 8352 + }, + { + "name": "gpt-5-nano-low", + "lb_name": "gpt-5-nano-low", + "lb_global": 0.33655478260869565, + "lb_reasoning": 0.27678, + "lb_coding": 0.27090000000000003, + "lb_math": 0.48907, + "lb_language": 0.35401333333333335, + "lb_if": 0.2909575, + "lb_data_analysis": 0.3656633333333333 + }, + { + "name": "gpt-5-pro-2025-10-06", + "lb_name": "gpt-5-pro-2025-10-06", + "lb_global": 0.7129065217391305, + "lb_reasoning": 0.8169225, + "lb_coding": 0.598426, + "lb_math": 0.8616550000000001, + "lb_language": 0.80695, + "lb_if": 0.6395850000000001, + "lb_data_analysis": 0.5704066666666666 + }, + { + "name": "gpt-5-chat", + "lb_name": "gpt-5-chat", + "lb_global": 0.6058939999999999, + "lb_reasoning": 0.69375, + "lb_coding": 0.38426799999999994, + "lb_math": 0.73456, + "lb_language": 0.62963, + "lb_if": 0.7300425, + "lb_data_analysis": 0.644815, + "arena_elo": 1426.04, + "arena_rank": 38, + "arena_votes": 31603 + }, + { + "name": "gpt-oss-120b", + "lb_name": "gpt-oss-120b", + "lb_global": 0.463521304347826, + "lb_reasoning": 0.3920675, + "lb_coding": 0.340846, + "lb_math": 0.6887375, + "lb_language": 0.48590666666666665, + "lb_if": 0.5029175, + "lb_data_analysis": 0.38804999999999995, + "arena_elo": 1353.9, + "arena_rank": 118, + "arena_votes": 30756 + }, + { + "name": "grok-4-0709", + "lb_name": "grok-4-0709", + "lb_global": 0.6176039130434782, + "lb_reasoning": 0.791345, + "lb_coding": 0.47252799999999995, + "lb_math": 0.8302324999999999, + "lb_language": 0.76388, + "lb_if": 0.29075, + "lb_data_analysis": 0.6337666666666667, + "arena_elo": 1409.41, + "arena_rank": 65, + "arena_votes": 41753 + }, + { + "name": "grok-code-fast-1-0825", + "lb_name": "grok-code-fast-1-0825", + "lb_global": 0.4364647826086957, + "lb_reasoning": 0.42303, + "lb_coding": 0.45774600000000004, + "lb_math": 0.5601275, + "lb_language": 0.48560333333333333, + "lb_if": 0.2227075, + "lb_data_analysis": 0.48989666666666665 + }, + { + "name": "kimi-k2-instruct", + "lb_name": "kimi-k2-instruct", + "lb_global": 0.45940260869565225, + "lb_reasoning": 0.4223075, + "lb_coding": 0.487122, + "lb_math": 0.5815175, + "lb_language": 0.6668633333333333, + "lb_if": 0.203625, + "lb_data_analysis": 0.43341999999999997 + }, + { + "name": "qwen3-235b-a22b-instruct-2507", + "lb_name": "qwen3-235b-a22b-instruct-2507", + "lb_global": 0.4801386956521739, + "lb_reasoning": 0.5843275, + "lb_coding": 0.35844400000000004, + "lb_math": 0.6803175, + "lb_language": 0.6606966666666668, + "lb_if": 0.2172075, + "lb_data_analysis": 0.4471566666666667, + "arena_elo": 1422.1, + "arena_rank": 44, + "arena_votes": 71551 + }, + { + "name": "qwen3-235b-a22b-thinking-2507", + "lb_name": "qwen3-235b-a22b-thinking-2507", + "lb_global": 0.5290426086956522, + "lb_reasoning": 0.59399, + "lb_coding": 0.31587200000000004, + "lb_math": 0.7339475, + "lb_language": 0.6952366666666666, + "lb_if": 0.40641999999999995, + "lb_data_analysis": 0.5218266666666667, + "arena_elo": 1398.63, + "arena_rank": 75, + "arena_votes": 9186 + }, + { + "name": "qwen3-coder-480b-a35b-instruct", + "lb_name": "qwen3-coder-480b-a35b-instruct", + "lb_global": 0.6146834999999999, + "lb_reasoning": 0.56875, + "lb_coding": 0.489938, + "lb_math": 0.6728166666666667, + "lb_language": 0.6426233333333333, + "lb_if": 0.741625, + "lb_data_analysis": 0.64683, + "arena_elo": 1386.54, + "arena_rank": 91, + "arena_votes": 26406 + }, + { + "name": "qwen3-max-2025-09-23", + "lb_name": "qwen3-max-2025-09-23", + "lb_global": 0.7031759999999999, + "lb_reasoning": 0.9199999999999999, + "lb_coding": 0.47714000000000006, + "lb_math": 0.8317433333333333, + "lb_language": 0.7144733333333333, + "lb_if": 0.76546, + "lb_data_analysis": 0.6536649999999999, + "arena_elo": 1424.53, + "arena_rank": 40, + "arena_votes": 9170 + }, + { + "name": "qwen3-next-80b-a3b-instruct", + "lb_name": "qwen3-next-80b-a3b-instruct", + "lb_global": 0.47444043478260883, + "lb_reasoning": 0.5474525, + "lb_coding": 0.33281, + "lb_math": 0.7017800000000001, + "lb_language": 0.6633766666666666, + "lb_if": 0.191875, + "lb_data_analysis": 0.49784, + "arena_elo": 1401.65, + "arena_rank": 71, + "arena_votes": 22670 + }, + { + "name": "qwen3-next-80b-a3b", + "lb_name": "qwen3-next-80b-a3b-thinking", + "lb_global": 0.5095034782608695, + "lb_reasoning": 0.581635, + "lb_coding": 0.292622, + "lb_math": 0.7426, + "lb_language": 0.5631166666666667, + "lb_if": 0.41541999999999996, + "lb_data_analysis": 0.5358333333333333, + "arena_elo": 1368.86, + "arena_rank": 106, + "arena_votes": 13767 + }, + { + "name": "claude-haiku-4-5-20251001", + "lb_name": "claude-haiku-4-5-20251001", + "lb_global": 0.4302178260869566, + "lb_reasoning": 0.339375, + "lb_coding": 0.48867200000000005, + "lb_math": 0.579715, + "lb_language": 0.5704566666666667, + "lb_if": 0.17754250000000002, + "lb_data_analysis": 0.45124999999999993, + "arena_elo": 1405.56, + "arena_rank": 66, + "arena_votes": 47369 + }, + { + "name": "deepseek-v3.2-exp", + "lb_name": "deepseek-v3.2-exp", + "lb_global": 0.4794773913043478, + "lb_reasoning": 0.45499999999999996, + "lb_coding": 0.512774, + "lb_math": 0.643825, + "lb_language": 0.65596, + "lb_if": 0.1932925, + "lb_data_analysis": 0.4425866666666667, + "arena_elo": 1423.66, + "arena_rank": 42, + "arena_votes": 11680 + }, + { + "name": "minimax-m2", + "lb_name": "minimax-m2", + "lb_global": 0.6526024999999999, + "lb_reasoning": 0.8999999999999999, + "lb_coding": 0.361108, + "lb_math": 0.85953, + "lb_language": 0.47647666666666666, + "lb_if": 0.810165, + "lb_data_analysis": 0.6755800000000001, + "arena_elo": 1346.97, + "arena_rank": 129, + "arena_votes": 6688 + }, + { + "name": "kimi-k2", + "lb_name": "kimi-k2-thinking", + "lb_global": 0.622846956521739, + "lb_reasoning": 0.6348550000000001, + "lb_coding": 0.499748, + "lb_math": 0.8109575, + "lb_language": 0.6645333333333334, + "lb_if": 0.6203350000000001, + "lb_data_analysis": 0.52285 + }, + { + "name": "grok-4-fast-non-reasoning-2511", + "lb_name": "grok-4-fast-non-reasoning-2511", + "lb_global": 0.45728349999999995, + "lb_reasoning": 0.43625, + "lb_coding": 0.28417000000000003, + "lb_math": 0.4767433333333333, + "lb_language": 0.42305666666666664, + "lb_if": 0.668085, + "lb_data_analysis": 0.61029 + }, + { + "name": "grok-4-fast-reasoning-2511", + "lb_name": "grok-4-fast-reasoning-2511", + "lb_global": 0.7033175, + "lb_reasoning": 0.98125, + "lb_coding": 0.405872, + "lb_math": 0.8733633333333333, + "lb_language": 0.7509799999999999, + "lb_if": 0.724915, + "lb_data_analysis": 0.689235 + }, + { + "name": "gpt-5.1-2025-11-13-high", + "lb_name": "gpt-5.1-2025-11-13-high", + "lb_global": 0.7260717391304349, + "lb_reasoning": 0.7879325, + "lb_coding": 0.609956, + "lb_math": 0.868995, + "lb_language": 0.7926033333333334, + "lb_if": 0.63904, + "lb_data_analysis": 0.6960633333333334 + }, + { + "name": "gpt-5.1-2025-11-13", + "lb_name": "gpt-5.1-2025-11-13-nothinking", + "lb_global": 0.3969430434782608, + "lb_reasoning": 0.268125, + "lb_coding": 0.47992, + "lb_math": 0.4451125, + "lb_language": 0.5384466666666666, + "lb_if": 0.23495749999999999, + "lb_data_analysis": 0.4406566666666667 + }, + { + "name": "gpt-5.1-codex", + "lb_name": "gpt-5.1-codex", + "lb_global": 0.6930621739130435, + "lb_reasoning": 0.8198075, + "lb_coding": 0.60714, + "lb_math": 0.795805, + "lb_language": 0.6947899999999999, + "lb_if": 0.633875, + "lb_data_analysis": 0.6074700000000001 + }, + { + "name": "gpt-5.1-codex-mini", + "lb_name": "gpt-5.1-codex-mini", + "lb_global": 0.607808695652174, + "lb_reasoning": 0.647115, + "lb_coding": 0.51973, + "lb_math": 0.762555, + "lb_language": 0.63013, + "lb_if": 0.5902075, + "lb_data_analysis": 0.49701666666666666 + }, + { + "name": "gemini-3-pro-preview-11-2025-high", + "lb_name": "gemini-3-pro-preview-11-2025-high", + "lb_global": 0.7354960869565217, + "lb_reasoning": 0.7741825, + "lb_coding": 0.6284080000000001, + "lb_math": 0.8183699999999999, + "lb_language": 0.84621, + "lb_if": 0.6584599999999999, + "lb_data_analysis": 0.7438966666666667 + }, + { + "name": "gemini-3-pro-preview-11-2025-low", + "lb_name": "gemini-3-pro-preview-11-2025-low", + "lb_global": 0.6288699999999998, + "lb_reasoning": 0.7061550000000001, + "lb_coding": 0.612546, + "lb_math": 0.77734, + "lb_language": 0.7949166666666668, + "lb_if": 0.2687925, + "lb_data_analysis": 0.6691266666666666 + }, + { + "name": "grok-4-1-fast", + "lb_name": "grok-4-1-fast-non-reasoning", + "lb_global": 0.3162308695652174, + "lb_reasoning": 0.2334625, + "lb_coding": 0.277024, + "lb_math": 0.3892, + "lb_language": 0.5000733333333334, + "lb_if": 0.1697525, + "lb_data_analysis": 0.4061033333333333 + }, + { + "name": "grok-4-1-fast-reasoning", + "lb_name": "grok-4-1-fast-reasoning", + "lb_global": 0.6010439130434783, + "lb_reasoning": 0.8019700000000001, + "lb_coding": 0.468444, + "lb_math": 0.8372, + "lb_language": 0.7432699999999999, + "lb_if": 0.28204, + "lb_data_analysis": 0.52238, + "arena_elo": 1430.79, + "arena_rank": 36, + "arena_votes": 31128 + }, + { + "name": "claude-opus-4-5-20251101", + "lb_name": "claude-opus-4-5-20251101-high-effort", + "lb_global": 0.5654247826086957, + "lb_reasoning": 0.5490375000000001, + "lb_coding": 0.6812059999999999, + "lb_math": 0.66445, + "lb_language": 0.7709166666666668, + "lb_if": 0.26591750000000003, + "lb_data_analysis": 0.4561233333333334, + "arena_elo": 1467.13, + "arena_rank": 11, + "arena_votes": 35476 + }, + { + "name": "deepseek-v3.2", + "lb_name": "deepseek-v3.2", + "lb_global": 0.49750652173913046, + "lb_reasoning": 0.4425, + "lb_coding": 0.582756, + "lb_math": 0.6395425, + "lb_language": 0.6423933333333333, + "lb_if": 0.230625, + "lb_data_analysis": 0.45034, + "arena_elo": 1419.83, + "arena_rank": 47, + "arena_votes": 30709 + }, + { + "name": "deepseek-v3.2-speciale", + "lb_name": "deepseek-v3.2-speciale", + "lb_global": 0.6270335, + "lb_reasoning": 0.8144866666666667, + "lb_coding": 0.32281, + "lb_math": 0.9251366666666666, + "lb_language": 0.7452933333333333, + "lb_if": 0.5025825, + "lb_data_analysis": 0.73077 + }, + { + "name": "devstral-2512", + "lb_name": "devstral-2512", + "lb_global": 0.38828, + "lb_reasoning": 0.277405, + "lb_coding": 0.527176, + "lb_math": 0.5251575, + "lb_language": 0.45669000000000004, + "lb_if": 0.135, + "lb_data_analysis": 0.39141333333333334 + }, + { + "name": "gemini-3-flash-preview-high", + "lb_name": "gemini-3-flash-preview-high", + "lb_global": 0.7304982608695653, + "lb_reasoning": 0.7454799999999999, + "lb_coding": 0.5355900000000001, + "lb_math": 0.8417425000000001, + "lb_language": 0.8456466666666667, + "lb_if": 0.7486250000000001, + "lb_data_analysis": 0.7477266666666665 + }, + { + "name": "gemini-3-flash-preview-minimal", + "lb_name": "gemini-3-flash-preview-minimal", + "lb_global": 0.5436265217391305, + "lb_reasoning": 0.49168249999999997, + "lb_coding": 0.574268, + "lb_math": 0.6809875000000001, + "lb_language": 0.7864633333333334, + "lb_if": 0.2832075, + "lb_data_analysis": 0.4830566666666667 + }, + { + "name": "glm-4.6v", + "lb_name": "glm-4.6v", + "lb_global": 0.388711304347826, + "lb_reasoning": 0.37216250000000006, + "lb_coding": 0.276952, + "lb_math": 0.6250024999999999, + "lb_language": 0.49737666666666663, + "lb_if": 0.1706225, + "lb_data_analysis": 0.46410666666666667, + "arena_elo": 1377.44, + "arena_rank": 98, + "arena_votes": 2785 + }, + { + "name": "gpt-5.1-2025-11-13-low", + "lb_name": "gpt-5.1-2025-11-13-low", + "lb_global": 0.5888434782608697, + "lb_reasoning": 0.5964425, + "lb_coding": 0.561188, + "lb_math": 0.6837825, + "lb_language": 0.7785166666666666, + "lb_if": 0.4762475, + "lb_data_analysis": 0.4586733333333333 + }, + { + "name": "gpt-5.1-2025-11-13-medium", + "lb_name": "gpt-5.1-2025-11-13-medium", + "lb_global": 0.6913921739130435, + "lb_reasoning": 0.7398075, + "lb_coding": 0.622756, + "lb_math": 0.7901050000000001, + "lb_language": 0.7865699999999999, + "lb_if": 0.6030424999999999, + "lb_data_analysis": 0.6322366666666667 + }, + { + "name": "gpt-5.1-codex-max", + "lb_name": "gpt-5.1-codex-max", + "lb_global": 0.7239121739130434, + "lb_reasoning": 0.84572, + "lb_coding": 0.665536, + "lb_math": 0.8365499999999999, + "lb_language": 0.7538499999999999, + "lb_if": 0.67125, + "lb_data_analysis": 0.54889 + }, + { + "name": "gpt-5.1-codex-max-high", + "lb_name": "gpt-5.1-codex-max-high", + "lb_global": 0.7435586956521739, + "lb_reasoning": 0.8365375, + "lb_coding": 0.64272, + "lb_math": 0.8321625, + "lb_language": 0.7648033333333334, + "lb_if": 0.7038350000000001, + "lb_data_analysis": 0.7012333333333333 + }, + { + "name": "gpt-5.1-codex-max-xhigh", + "lb_name": "gpt-5.1-codex-max-xhigh", + "lb_global": 0.761212857142857, + "lb_reasoning": 0.8459625000000001, + "lb_coding": 0.639938, + "lb_math": 0.9087900000000001, + "lb_language": 0.7605666666666666, + "lb_if": 0.7390424999999999, + "lb_data_analysis": 0.718845 + }, + { + "name": "gpt-5.2-2025-12-11-high", + "lb_name": "gpt-5.2-2025-12-11-high", + "lb_global": 0.7537621739130433, + "lb_reasoning": 0.8321149999999999, + "lb_coding": 0.6142860000000001, + "lb_math": 0.93166, + "lb_language": 0.7980899999999999, + "lb_if": 0.6177075, + "lb_data_analysis": 0.7816333333333333 + }, + { + "name": "gpt-5.2-2025-12-11-low", + "lb_name": "gpt-5.2-2025-12-11-low", + "lb_global": 0.6558760869565218, + "lb_reasoning": 0.7123075000000001, + "lb_coding": 0.59559, + "lb_math": 0.8471425, + "lb_language": 0.7018066666666667, + "lb_if": 0.5454574999999999, + "lb_data_analysis": 0.5273833333333333 + }, + { + "name": "gpt-5.2-2025-12-11-medium", + "lb_name": "gpt-5.2-2025-12-11-medium", + "lb_global": 0.7261826086956522, + "lb_reasoning": 0.8417299999999999, + "lb_coding": 0.598426, + "lb_math": 0.92069, + "lb_language": 0.7494333333333333, + "lb_if": 0.5752524999999999, + "lb_data_analysis": 0.7036933333333333 + }, + { + "name": "gpt-5.2-2025-12-11", + "lb_name": "gpt-5.2-2025-12-11-nothinking", + "lb_global": 0.4690639130434783, + "lb_reasoning": 0.42798, + "lb_coding": 0.5458179999999999, + "lb_math": 0.5825075, + "lb_language": 0.4996666666666667, + "lb_if": 0.272, + "lb_data_analysis": 0.47681 + }, + { + "name": "glm-4.7", + "lb_name": "glm-4.7", + "lb_global": 0.5730817391304348, + "lb_reasoning": 0.5973075, + "lb_coding": 0.542528, + "lb_math": 0.7601875, + "lb_language": 0.6522633333333333, + "lb_if": 0.3565825, + "lb_data_analysis": 0.5517133333333334, + "arena_elo": 1440.77, + "arena_rank": 29, + "arena_votes": 11937 + }, + { + "name": "arcee-trinity-large-preview", + "lb_name": "arcee-trinity-large-preview", + "lb_global": 0.3041926086956522, + "lb_reasoning": 0.2060575, + "lb_coding": 0.28258600000000006, + "lb_math": 0.44932, + "lb_language": 0.4215, + "lb_if": 0.1219175, + "lb_data_analysis": 0.4032733333333333 + }, + { + "name": "claude-opus-4-6", + "lb_name": "claude-opus-4-6-thinking-auto-high-effort", + "lb_global": 0.7678552173913045, + "lb_reasoning": 0.88673, + "lb_coding": 0.682738, + "lb_math": 0.8931699999999999, + "lb_language": 0.8326966666666666, + "lb_if": 0.633125, + "lb_data_analysis": 0.6989299999999999, + "arena_elo": 1502.91, + "arena_rank": 2, + "arena_votes": 7454 + }, + { + "name": "claude-sonnet-4-6", + "lb_name": "claude-sonnet-4-6-thinking-auto-high-effort", + "lb_global": 0.7558704347826086, + "lb_reasoning": 0.86375, + "lb_coding": 0.659902, + "lb_math": 0.8653375, + "lb_language": 0.7769333333333334, + "lb_if": 0.639165, + "lb_data_analysis": 0.7605666666666667, + "arena_elo": 1458.05, + "arena_rank": 14, + "arena_votes": 3470 + }, + { + "name": "gemini-3.1-pro-preview-high", + "lb_name": "gemini-3.1-pro-preview-high", + "lb_global": 0.8070695652173913, + "lb_reasoning": 0.8400475000000001, + "lb_coding": 0.6958179999999999, + "lb_math": 0.91045, + "lb_language": 0.85376, + "lb_if": 0.791, + "lb_data_analysis": 0.7854133333333334 + }, + { + "name": "glm-5", + "lb_name": "glm-5", + "lb_global": 0.6870265217391303, + "lb_reasoning": 0.691105, + "lb_coding": 0.6245499999999999, + "lb_math": 0.83464, + "lb_language": 0.7752800000000001, + "lb_if": 0.5532900000000001, + "lb_data_analysis": 0.67896, + "arena_elo": 1455.43, + "arena_rank": 16, + "arena_votes": 6466 + }, + { + "name": "gpt-5.2-codex", + "lb_name": "gpt-5.2-codex", + "lb_global": 0.743308695652174, + "lb_reasoning": 0.777115, + "lb_coding": 0.644478, + "lb_math": 0.8877375, + "lb_language": 0.7367833333333333, + "lb_if": 0.6644575, + "lb_data_analysis": 0.78204 + }, + { + "name": "gpt-5.3-codex-high", + "lb_name": "gpt-5.3-codex-high", + "lb_global": 0.7318047826086957, + "lb_reasoning": 0.8015375, + "lb_coding": 0.642738, + "lb_math": 0.8783575000000001, + "lb_language": 0.8008666666666667, + "lb_if": 0.6537499999999999, + "lb_data_analysis": 0.62688 + }, + { + "name": "gpt-5.3-codex-xhigh", + "lb_name": "gpt-5.3-codex-xhigh", + "lb_global": 0.7197360869565216, + "lb_reasoning": 0.71423, + "lb_coding": 0.70992, + "lb_math": 0.8569950000000001, + "lb_language": 0.7918, + "lb_if": 0.713415, + "lb_data_analysis": 0.49679 + }, + { + "name": "kimi-k2.5", + "lb_name": "kimi-k2.5-thinking", + "lb_global": 0.6916286956521739, + "lb_reasoning": 0.7595675000000001, + "lb_coding": 0.601452, + "lb_math": 0.8486799999999999, + "lb_language": 0.77666, + "lb_if": 0.574125, + "lb_data_analysis": 0.6135766666666667, + "arena_elo": 1451.66, + "arena_rank": 19, + "arena_votes": 11075 + }, + { + "name": "minimax-m2.5", + "lb_name": "minimax-m2.5", + "lb_global": 0.6027330434782608, + "lb_reasoning": 0.5930300000000001, + "lb_coding": 0.592792, + "lb_math": 0.7740675, + "lb_language": 0.5510100000000001, + "lb_if": 0.5723325, + "lb_data_analysis": 0.49605000000000005, + "arena_elo": 1401.47, + "arena_rank": 72, + "arena_votes": 6065 + }, + { + "name": "claude-opus-4-6-thinking", + "arena_name": "claude-opus-4-6-thinking", + "arena_org": "Anthropic", + "arena_elo": 1503.45, + "arena_rank": 1, + "arena_votes": 6583 + }, + { + "name": "gemini-3.1-pro-preview", + "arena_name": "gemini-3.1-pro-preview", + "arena_org": "Google", + "arena_elo": 1500.36, + "arena_rank": 3, + "arena_votes": 4052 + }, + { + "name": "grok-4.20-beta1", + "arena_name": "grok-4.20-beta1", + "arena_org": "xAI", + "arena_elo": 1495.42, + "arena_rank": 4, + "arena_votes": 3818 + }, + { + "name": "gemini-3-pro", + "arena_name": "gemini-3-pro", + "arena_org": "Google", + "arena_elo": 1486.23, + "arena_rank": 5, + "arena_votes": 38248 + }, + { + "name": "gpt-5.2-chat-latest-20260210", + "arena_name": "gpt-5.2-chat-latest-20260210", + "arena_org": "OpenAI", + "arena_elo": 1481.31, + "arena_rank": 6, + "arena_votes": 3605 + }, + { + "name": "gemini-3-flash", + "arena_name": "gemini-3-flash", + "arena_org": "Google", + "arena_elo": 1473.23, + "arena_rank": 7, + "arena_votes": 29334 + }, + { + "name": "grok-4.1-thinking", + "arena_name": "grok-4.1-thinking", + "arena_org": "xAI", + "arena_elo": 1472.97, + "arena_rank": 8, + "arena_votes": 37474 + }, + { + "name": "claude-opus-4-5-20251101-thinking-32k", + "arena_name": "claude-opus-4-5-20251101-thinking-32k", + "arena_org": "Anthropic", + "arena_elo": 1470.96, + "arena_rank": 9, + "arena_votes": 30541 + }, + { + "name": "dola-seed-2.0-preview", + "arena_name": "dola-seed-2.0-preview", + "arena_org": "Bytedance", + "arena_elo": 1469.86, + "arena_rank": 10, + "arena_votes": 4620 + }, + { + "name": "grok-4.1", + "arena_name": "grok-4.1", + "arena_org": "xAI", + "arena_elo": 1462.43, + "arena_rank": 12, + "arena_votes": 41700 + }, + { + "name": "gemini-3-flash (thinking-minimal)", + "arena_name": "gemini-3-flash (thinking-minimal)", + "arena_org": "Google", + "arena_elo": 1461.15, + "arena_rank": 13, + "arena_votes": 20672 + }, + { + "name": "gpt-5.1-high", + "arena_name": "gpt-5.1-high", + "arena_org": "OpenAI", + "arena_elo": 1456.77, + "arena_rank": 15, + "arena_votes": 34379 + }, + { + "name": "qwen3.5-397b-a17b", + "arena_name": "qwen3.5-397b-a17b", + "arena_org": "Alibaba", + "arena_elo": 1453.57, + "arena_rank": 17, + "arena_votes": 4958 + }, + { + "name": "ernie-5.0-0110", + "arena_name": "ernie-5.0-0110", + "arena_org": "Baidu", + "arena_elo": 1452.82, + "arena_rank": 18, + "arena_votes": 13833 + }, + { + "name": "claude-sonnet-4-5-20250929-thinking-32k", + "arena_name": "claude-sonnet-4-5-20250929-thinking-32k", + "arena_org": "Anthropic", + "arena_elo": 1449.92, + "arena_rank": 20, + "arena_votes": 48912 + }, + { + "name": "gemini-2.5-pro", + "arena_name": "gemini-2.5-pro", + "arena_org": "Google", + "arena_elo": 1449.24, + "arena_rank": 22, + "arena_votes": 97296 + }, + { + "name": "ernie-5.0-preview-1203", + "arena_name": "ernie-5.0-preview-1203", + "arena_org": "Baidu", + "arena_elo": 1449.09, + "arena_rank": 23, + "arena_votes": 9725 + }, + { + "name": "claude-opus-4-1-20250805-thinking-16k", + "arena_name": "claude-opus-4-1-20250805-thinking-16k", + "arena_org": "Anthropic", + "arena_elo": 1448.65, + "arena_rank": 24, + "arena_votes": 49597 + }, + { + "name": "claude-opus-4-1-20250805", + "arena_name": "claude-opus-4-1-20250805", + "arena_org": "Anthropic", + "arena_elo": 1446.23, + "arena_rank": 25, + "arena_votes": 77218 + }, + { + "name": "chatgpt-4o-latest-20250326", + "arena_name": "chatgpt-4o-latest-20250326", + "arena_org": "OpenAI", + "arena_elo": 1442.76, + "arena_rank": 27, + "arena_votes": 82938 + }, + { + "name": "gpt-5.2-high", + "arena_name": "gpt-5.2-high", + "arena_org": "OpenAI", + "arena_elo": 1440.87, + "arena_rank": 28, + "arena_votes": 19253 + }, + { + "name": "gpt-5.1", + "arena_name": "gpt-5.1", + "arena_org": "OpenAI", + "arena_elo": 1436.95, + "arena_rank": 30, + "arena_votes": 36738 + }, + { + "name": "gpt-5.2", + "arena_name": "gpt-5.2", + "arena_org": "OpenAI", + "arena_elo": 1436.54, + "arena_rank": 31, + "arena_votes": 16113 + }, + { + "name": "qwen3-max-preview", + "arena_name": "qwen3-max-preview", + "arena_org": "Alibaba", + "arena_elo": 1434.2, + "arena_rank": 32, + "arena_votes": 27642 + }, + { + "name": "kimi-k2.5-instant", + "arena_name": "kimi-k2.5-instant", + "arena_org": "Moonshot", + "arena_elo": 1434.17, + "arena_rank": 33, + "arena_votes": 7125 + }, + { + "name": "o3-2025-04-16", + "arena_name": "o3-2025-04-16", + "arena_org": "OpenAI", + "arena_elo": 1432.45, + "arena_rank": 35, + "arena_votes": 60957 + }, + { + "name": "kimi-k2-thinking-turbo", + "arena_name": "kimi-k2-thinking-turbo", + "arena_org": "Moonshot", + "arena_elo": 1428.7, + "arena_rank": 37, + "arena_votes": 36099 + }, + { + "name": "claude-opus-4-20250514-thinking-16k", + "arena_name": "claude-opus-4-20250514-thinking-16k", + "arena_org": "Anthropic", + "arena_elo": 1423.71, + "arena_rank": 41, + "arena_votes": 37722 + }, + { + "name": "deepseek-v3.2-exp-thinking", + "arena_name": "deepseek-v3.2-exp-thinking", + "arena_org": "DeepSeek", + "arena_elo": 1423.32, + "arena_rank": 43, + "arena_votes": 8944 + }, + { + "name": "grok-4-fast-chat", + "arena_name": "grok-4-fast-chat", + "arena_org": "xAI", + "arena_elo": 1421.81, + "arena_rank": 45, + "arena_votes": 6962 + }, + { + "name": "deepseek-v3.2-thinking", + "arena_name": "deepseek-v3.2-thinking", + "arena_org": "DeepSeek", + "arena_elo": 1420.12, + "arena_rank": 46, + "arena_votes": 25692 + }, + { + "name": "ernie-5.0-preview-1022", + "arena_name": "ernie-5.0-preview-1022", + "arena_org": "Baidu", + "arena_elo": 1418.52, + "arena_rank": 49, + "arena_votes": 4561 + }, + { + "name": "deepseek-v3.1", + "arena_name": "deepseek-v3.1", + "arena_org": "DeepSeek", + "arena_elo": 1418.24, + "arena_rank": 50, + "arena_votes": 15194 + }, + { + "name": "deepseek-v3.1-thinking", + "arena_name": "deepseek-v3.1-thinking", + "arena_org": "DeepSeek", + "arena_elo": 1417.23, + "arena_rank": 51, + "arena_votes": 11918 + }, + { + "name": "kimi-k2-0905-preview", + "arena_name": "kimi-k2-0905-preview", + "arena_org": "Moonshot", + "arena_elo": 1417.2, + "arena_rank": 52, + "arena_votes": 11912 + }, + { + "name": "kimi-k2-0711-preview", + "arena_name": "kimi-k2-0711-preview", + "arena_org": "Moonshot", + "arena_elo": 1416.8, + "arena_rank": 53, + "arena_votes": 28440 + }, + { + "name": "deepseek-v3.1-terminus-thinking", + "arena_name": "deepseek-v3.1-terminus-thinking", + "arena_org": "DeepSeek", + "arena_elo": 1415.77, + "arena_rank": 55, + "arena_votes": 3536 + }, + { + "name": "amazon-nova-experimental-chat-26-01-10", + "arena_name": "amazon-nova-experimental-chat-26-01-10", + "arena_org": "Amazon", + "arena_elo": 1415.62, + "arena_rank": 56, + "arena_votes": 3421 + }, + { + "name": "mistral-large-3", + "arena_name": "mistral-large-3", + "arena_org": "Mistral", + "arena_elo": 1414.85, + "arena_rank": 57, + "arena_votes": 27128 + }, + { + "name": "qwen3-vl-235b-a22b-instruct", + "arena_name": "qwen3-vl-235b-a22b-instruct", + "arena_org": "Alibaba", + "arena_elo": 1414.74, + "arena_rank": 58, + "arena_votes": 11598 + }, + { + "name": "claude-opus-4-20250514", + "arena_name": "claude-opus-4-20250514", + "arena_org": "Anthropic", + "arena_elo": 1412.96, + "arena_rank": 60, + "arena_votes": 45304 + }, + { + "name": "mistral-medium-2508", + "arena_name": "mistral-medium-2508", + "arena_org": "Mistral", + "arena_elo": 1411.39, + "arena_rank": 61, + "arena_votes": 65627 + }, + { + "name": "grok-3-preview-02-24", + "arena_name": "grok-3-preview-02-24", + "arena_org": "xAI", + "arena_elo": 1411.33, + "arena_rank": 62, + "arena_votes": 33843 + }, + { + "name": "gemini-2.5-flash", + "arena_name": "gemini-2.5-flash", + "arena_org": "Google", + "arena_elo": 1410.86, + "arena_rank": 63, + "arena_votes": 96569 + }, + { + "name": "grok-4-fast-reasoning", + "arena_name": "grok-4-fast-reasoning", + "arena_org": "xAI", + "arena_elo": 1403.59, + "arena_rank": 68, + "arena_votes": 18440 + }, + { + "name": "qwen3-235b-a22b-no-thinking", + "arena_name": "qwen3-235b-a22b-no-thinking", + "arena_org": "Alibaba", + "arena_elo": 1401.95, + "arena_rank": 69, + "arena_votes": 39295 + }, + { + "name": "longcat-flash-chat", + "arena_name": "longcat-flash-chat", + "arena_org": "Meituan", + "arena_elo": 1399.96, + "arena_rank": 73, + "arena_votes": 11486 + }, + { + "name": "claude-sonnet-4-20250514-thinking-32k", + "arena_name": "claude-sonnet-4-20250514-thinking-32k", + "arena_org": "Anthropic", + "arena_elo": 1399.79, + "arena_rank": 74, + "arena_votes": 35975 + }, + { + "name": "qwen3-vl-235b-a22b-thinking", + "arena_name": "qwen3-vl-235b-a22b-thinking", + "arena_org": "Alibaba", + "arena_elo": 1395.02, + "arena_rank": 77, + "arena_votes": 7924 + }, + { + "name": "amazon-nova-experimental-chat-12-10", + "arena_name": "amazon-nova-experimental-chat-12-10", + "arena_org": "Amazon", + "arena_elo": 1394.64, + "arena_rank": 78, + "arena_votes": 3699 + }, + { + "name": "hunyuan-vision-1.5-thinking", + "arena_name": "hunyuan-vision-1.5-thinking", + "arena_org": "Tencent", + "arena_elo": 1393.73, + "arena_rank": 80, + "arena_votes": 2216 + }, + { + "name": "mai-1-preview", + "arena_name": "mai-1-preview", + "arena_org": "Microsoft AI", + "arena_elo": 1392, + "arena_rank": 81, + "arena_votes": 18018 + }, + { + "name": "o4-mini-2025-04-16", + "arena_name": "o4-mini-2025-04-16", + "arena_org": "OpenAI", + "arena_elo": 1390.98, + "arena_rank": 82, + "arena_votes": 46375 + }, + { + "name": "mimo-v2-flash (non-thinking)", + "arena_name": "mimo-v2-flash (non-thinking)", + "arena_org": "Xiaomi", + "arena_elo": 1390.49, + "arena_rank": 83, + "arena_votes": 19661 + }, + { + "name": "step-3.5-flash", + "arena_name": "step-3.5-flash", + "arena_org": "StepFun", + "arena_elo": 1389.7, + "arena_rank": 85, + "arena_votes": 8624 + }, + { + "name": "claude-sonnet-4-20250514", + "arena_name": "claude-sonnet-4-20250514", + "arena_org": "Anthropic", + "arena_elo": 1389.56, + "arena_rank": 86, + "arena_votes": 41365 + }, + { + "name": "claude-3-7-sonnet-20250219-thinking-32k", + "arena_name": "claude-3-7-sonnet-20250219-thinking-32k", + "arena_org": "Anthropic", + "arena_elo": 1388.19, + "arena_rank": 87, + "arena_votes": 39731 + }, + { + "name": "mimo-v2-flash (thinking)", + "arena_name": "mimo-v2-flash (thinking)", + "arena_org": "Xiaomi", + "arena_elo": 1386.91, + "arena_rank": 89, + "arena_votes": 10870 + }, + { + "name": "hunyuan-t1-20250711", + "arena_name": "hunyuan-t1-20250711", + "arena_org": "Tencent", + "arena_elo": 1386.69, + "arena_rank": 90, + "arena_votes": 4767 + }, + { + "name": "minimax-m2.1-preview", + "arena_name": "minimax-m2.1-preview", + "arena_org": "MiniMax", + "arena_elo": 1385.37, + "arena_rank": 92, + "arena_votes": 17092 + }, + { + "name": "qwen3-30b-a3b-instruct-2507", + "arena_name": "qwen3-30b-a3b-instruct-2507", + "arena_org": "Alibaba", + "arena_elo": 1383.75, + "arena_rank": 94, + "arena_votes": 23940 + }, + { + "name": "hunyuan-turbos-20250416", + "arena_name": "hunyuan-turbos-20250416", + "arena_org": "Tencent", + "arena_elo": 1382.57, + "arena_rank": 95, + "arena_votes": 11000 + }, + { + "name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking", + "arena_name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking", + "arena_org": "Google", + "arena_elo": 1379.57, + "arena_rank": 97, + "arena_votes": 46858 + }, + { + "name": "trinity-large", + "arena_name": "trinity-large", + "arena_org": "Arcee AI", + "arena_elo": 1375.1, + "arena_rank": 99, + "arena_votes": 2166 + }, + { + "name": "gemini-2.5-flash-lite-preview-06-17-thinking", + "arena_name": "gemini-2.5-flash-lite-preview-06-17-thinking", + "arena_org": "Google", + "arena_elo": 1374.68, + "arena_rank": 101, + "arena_votes": 33674 + }, + { + "name": "minimax-m1", + "arena_name": "minimax-m1", + "arena_org": "MiniMax", + "arena_elo": 1367.33, + "arena_rank": 107, + "arena_votes": 36564 + }, + { + "name": "glm-4.7-flash", + "arena_name": "glm-4.7-flash", + "arena_org": "Z.ai", + "arena_elo": 1365.72, + "arena_rank": 108, + "arena_votes": 10660 + }, + { + "name": "amazon-nova-experimental-chat-11-10", + "arena_name": "amazon-nova-experimental-chat-11-10", + "arena_org": "Amazon", + "arena_elo": 1365.22, + "arena_rank": 109, + "arena_votes": 19082 + }, + { + "name": "o3-mini-high", + "arena_name": "o3-mini-high", + "arena_org": "OpenAI", + "arena_elo": 1363.97, + "arena_rank": 111, + "arena_votes": 18584 + }, + { + "name": "grok-3-mini-high", + "arena_name": "grok-3-mini-high", + "arena_org": "xAI", + "arena_elo": 1362.9, + "arena_rank": 112, + "arena_votes": 17413 + }, + { + "name": "grok-3-mini-beta", + "arena_name": "grok-3-mini-beta", + "arena_org": "xAI", + "arena_elo": 1356.75, + "arena_rank": 115, + "arena_votes": 23615 + }, + { + "name": "intellect-3", + "arena_name": "intellect-3", + "arena_org": "Prime Intellect", + "arena_elo": 1356.04, + "arena_rank": 116, + "arena_votes": 5290 + }, + { + "name": "mistral-small-2506", + "arena_name": "mistral-small-2506", + "arena_org": "Mistral", + "arena_elo": 1356.02, + "arena_rank": 117, + "arena_votes": 18237 + }, + { + "name": "glm-4.5v", + "arena_name": "glm-4.5v", + "arena_org": "Z.ai", + "arena_elo": 1353.13, + "arena_rank": 120, + "arena_votes": 4950 + }, + { + "name": "amazon-nova-experimental-chat-10-20", + "arena_name": "amazon-nova-experimental-chat-10-20", + "arena_org": "Amazon", + "arena_elo": 1350.29, + "arena_rank": 123, + "arena_votes": 11338 + }, + { + "name": "hunyuan-turbos-20250226", + "arena_name": "hunyuan-turbos-20250226", + "arena_org": "Tencent", + "arena_elo": 1348.79, + "arena_rank": 124, + "arena_votes": 2226 + }, + { + "name": "amazon-nova-experimental-chat-10-09", + "arena_name": "amazon-nova-experimental-chat-10-09", + "arena_org": "Amazon", + "arena_elo": 1347.44, + "arena_rank": 126, + "arena_votes": 2874 + }, + { + "name": "llama-3.1-nemotron-ultra-253b-v1", + "arena_name": "llama-3.1-nemotron-ultra-253b-v1", + "arena_org": "Nvidia", + "arena_elo": 1347.23, + "arena_rank": 127, + "arena_votes": 2546 + }, + { + "name": "ling-flash-2.0", + "arena_name": "ling-flash-2.0", + "arena_org": "Ant Group", + "arena_elo": 1346.92, + "arena_rank": 130, + "arena_votes": 6995 + }, + { + "name": "step-3", + "arena_name": "step-3", + "arena_org": "StepFun", + "arena_elo": 1346.54, + "arena_rank": 131, + "arena_votes": 6567 + }, + { + "name": "qwen-plus-0125", + "arena_name": "qwen-plus-0125", + "arena_org": "Alibaba", + "arena_elo": 1346.19, + "arena_rank": 132, + "arena_votes": 5823 + }, + { + "name": "glm-4-plus-0111", + "arena_name": "glm-4-plus-0111", + "arena_org": "Zhipu", + "arena_elo": 1343.16, + "arena_rank": 134, + "arena_votes": 5760 + }, + { + "name": "nvidia-llama-3.3-nemotron-super-49b-v1.5", + "arena_name": "nvidia-llama-3.3-nemotron-super-49b-v1.5", + "arena_org": "Nvidia", + "arena_elo": 1341.3, + "arena_rank": 137, + "arena_votes": 3400 + }, + { + "name": "hunyuan-turbo-0110", + "arena_name": "hunyuan-turbo-0110", + "arena_org": "Tencent", + "arena_elo": 1340.43, + "arena_rank": 138, + "arena_votes": 2295 + }, + { + "name": "nova-2-lite", + "arena_name": "nova-2-lite", + "arena_org": "Amazon", + "arena_elo": 1337.17, + "arena_rank": 140, + "arena_votes": 12111 + }, + { + "name": "llama-3.1-405b-instruct-bf16", + "arena_name": "llama-3.1-405b-instruct-bf16", + "arena_org": "Meta", + "arena_elo": 1335.21, + "arena_rank": 143, + "arena_votes": 41392 + }, + { + "name": "grok-2-2024-08-13", + "arena_name": "grok-2-2024-08-13", + "arena_org": "xAI", + "arena_elo": 1335.09, + "arena_rank": 144, + "arena_votes": 63495 + }, + { + "name": "gemini-advanced-0514", + "arena_name": "gemini-advanced-0514", + "arena_org": "Google", + "arena_elo": 1334.77, + "arena_rank": 146, + "arena_votes": 50142 + }, + { + "name": "step-2-16k-exp-202412", + "arena_name": "step-2-16k-exp-202412", + "arena_org": "StepFun", + "arena_elo": 1334.03, + "arena_rank": 147, + "arena_votes": 4829 + }, + { + "name": "llama-3.1-405b-instruct-fp8", + "arena_name": "llama-3.1-405b-instruct-fp8", + "arena_org": "Meta", + "arena_elo": 1333.39, + "arena_rank": 148, + "arena_votes": 59655 + }, + { + "name": "olmo-3.1-32b-instruct", + "arena_name": "olmo-3.1-32b-instruct", + "arena_org": "Ai2", + "arena_elo": 1330.45, + "arena_rank": 149, + "arena_votes": 12252 + }, + { + "name": "molmo-2-8b", + "arena_name": "molmo-2-8b", + "arena_org": "Ai2", + "arena_elo": 1329.22, + "arena_rank": 150, + "arena_votes": 816 + }, + { + "name": "yi-lightning", + "arena_name": "yi-lightning", + "arena_org": "01 AI", + "arena_elo": 1328.48, + "arena_rank": 151, + "arena_votes": 27340, + "aider_pass_rate": 0.496 + }, + { + "name": "llama-3.3-nemotron-49b-super-v1", + "arena_name": "llama-3.3-nemotron-49b-super-v1", + "arena_org": "Nvidia", + "arena_elo": 1327.07, + "arena_rank": 154, + "arena_votes": 2230 + }, + { + "name": "hunyuan-large-2025-02-10", + "arena_name": "hunyuan-large-2025-02-10", + "arena_org": "Tencent", + "arena_elo": 1326.42, + "arena_rank": 155, + "arena_votes": 3738 + }, + { + "name": "deepseek-v2.5-1210", + "arena_name": "deepseek-v2.5-1210", + "arena_org": "DeepSeek", + "arena_elo": 1323.24, + "arena_rank": 158, + "arena_votes": 6793, + "aider_pass_rate": 0.586 + }, + { + "name": "gemini-1.5-pro-001", + "arena_name": "gemini-1.5-pro-001", + "arena_org": "Google", + "arena_elo": 1323.01, + "arena_rank": 159, + "arena_votes": 79132, + "aider_pass_rate": 0.45899999999999996 + }, + { + "name": "llama-4-scout-17b-16e-instruct", + "arena_name": "llama-4-scout-17b-16e-instruct", + "arena_org": "Meta", + "arena_elo": 1322.39, + "arena_rank": 160, + "arena_votes": 31053 + }, + { + "name": "step-1o-turbo-202506", + "arena_name": "step-1o-turbo-202506", + "arena_org": "StepFun", + "arena_elo": 1321.97, + "arena_rank": 161, + "arena_votes": 9622 + }, + { + "name": "ring-flash-2.0", + "arena_name": "ring-flash-2.0", + "arena_org": "Ant Group", + "arena_elo": 1320.45, + "arena_rank": 164, + "arena_votes": 7148 + }, + { + "name": "glm-4-plus", + "arena_name": "glm-4-plus", + "arena_org": "Zhipu AI", + "arena_elo": 1319.33, + "arena_rank": 165, + "arena_votes": 26134 + }, + { + "name": "gemma-3n-e4b-it", + "arena_name": "gemma-3n-e4b-it", + "arena_org": "Google", + "arena_elo": 1319.29, + "arena_rank": 167, + "arena_votes": 23193 + }, + { + "name": "qwen-max-0919", + "arena_name": "qwen-max-0919", + "arena_org": "Alibaba", + "arena_elo": 1318.05, + "arena_rank": 168, + "arena_votes": 16479 + }, + { + "name": "gpt-oss-20b", + "arena_name": "gpt-oss-20b", + "arena_org": "OpenAI", + "arena_elo": 1317.02, + "arena_rank": 170, + "arena_votes": 10758 + }, + { + "name": "nvidia-nemotron-3-nano-30b-a3b-bf16", + "arena_name": "nvidia-nemotron-3-nano-30b-a3b-bf16", + "arena_org": "Nvidia", + "arena_elo": 1317, + "arena_rank": 171, + "arena_votes": 15408 + }, + { + "name": "qwen2.5-plus-1127", + "arena_name": "qwen2.5-plus-1127", + "arena_org": "Alibaba", + "arena_elo": 1315.38, + "arena_rank": 172, + "arena_votes": 10179 + }, + { + "name": "athene-v2-chat", + "arena_name": "athene-v2-chat", + "arena_org": "NexusFlow", + "arena_elo": 1314.44, + "arena_rank": 173, + "arena_votes": 24746 + }, + { + "name": "gpt-4-1106-preview", + "arena_name": "gpt-4-1106-preview", + "arena_org": "OpenAI", + "arena_elo": 1313.07, + "arena_rank": 176, + "arena_votes": 100107, + "aider_pass_rate": 0.519 + }, + { + "name": "hunyuan-standard-2025-02-10", + "arena_name": "hunyuan-standard-2025-02-10", + "arena_org": "Tencent", + "arena_elo": 1311.55, + "arena_rank": 177, + "arena_votes": 3905 + }, + { + "name": "mercury", + "arena_name": "mercury", + "arena_org": "Inception AI", + "arena_elo": 1308.55, + "arena_rank": 179, + "arena_votes": 1886 + }, + { + "name": "grok-2-mini-2024-08-13", + "arena_name": "grok-2-mini-2024-08-13", + "arena_org": "xAI", + "arena_elo": 1307.96, + "arena_rank": 180, + "arena_votes": 52574 + }, + { + "name": "athene-70b-0725", + "arena_name": "athene-70b-0725", + "arena_org": "NexusFlow", + "arena_elo": 1305.96, + "arena_rank": 182, + "arena_votes": 19622 + }, + { + "name": "olmo-3-32b-think", + "arena_name": "olmo-3-32b-think", + "arena_org": "Ai2", + "arena_elo": 1305.64, + "arena_rank": 183, + "arena_votes": 5868 + }, + { + "name": "magistral-medium-2506", + "arena_name": "magistral-medium-2506", + "arena_org": "Mistral", + "arena_elo": 1304.79, + "arena_rank": 185, + "arena_votes": 11985 + }, + { + "name": "mistral-small-3.1-24b-instruct-2503", + "arena_name": "mistral-small-3.1-24b-instruct-2503", + "arena_org": "Mistral", + "arena_elo": 1304.36, + "arena_rank": 186, + "arena_votes": 33897 + }, + { + "name": "hunyuan-large-vision", + "arena_name": "hunyuan-large-vision", + "arena_org": "Tencent", + "arena_elo": 1295.96, + "arena_rank": 190, + "arena_votes": 5565 + }, + { + "name": "amazon-nova-pro-v1.0", + "arena_name": "amazon-nova-pro-v1.0", + "arena_org": "Amazon", + "arena_elo": 1290.18, + "arena_rank": 192, + "arena_votes": 24753 + }, + { + "name": "reka-core-20240904", + "arena_name": "reka-core-20240904", + "arena_org": "Reka AI", + "arena_elo": 1287.81, + "arena_rank": 195, + "arena_votes": 7309 + }, + { + "name": "ibm-granite-h-small", + "arena_name": "ibm-granite-h-small", + "arena_org": "IBM", + "arena_elo": 1287.04, + "arena_rank": 196, + "arena_votes": 5622 + }, + { + "name": "gpt-4-0314", + "arena_name": "gpt-4-0314", + "arena_org": "OpenAI", + "arena_elo": 1286.93, + "arena_rank": 197, + "arena_votes": 54167, + "aider_pass_rate": 0.504 + }, + { + "name": "llama-3.1-nemotron-51b-instruct", + "arena_name": "llama-3.1-nemotron-51b-instruct", + "arena_org": "Nvidia", + "arena_elo": 1286.25, + "arena_rank": 199, + "arena_votes": 3749 + }, + { + "name": "gemini-1.5-flash-001", + "arena_name": "gemini-1.5-flash-001", + "arena_org": "Google", + "arena_elo": 1285.55, + "arena_rank": 200, + "arena_votes": 62823 + }, + { + "name": "olmo-3.1-32b-think", + "arena_name": "olmo-3.1-32b-think", + "arena_org": "Ai2", + "arena_elo": 1285.07, + "arena_rank": 201, + "arena_votes": 8442 + }, + { + "name": "nemotron-4-340b-instruct", + "arena_name": "nemotron-4-340b-instruct", + "arena_org": "Nvidia", + "arena_elo": 1277.38, + "arena_rank": 204, + "arena_votes": 19661 + }, + { + "name": "llama-3-70b-instruct", + "arena_name": "llama-3-70b-instruct", + "arena_org": "Meta", + "arena_elo": 1275.95, + "arena_rank": 206, + "arena_votes": 156880 + }, + { + "name": "mistral-small-24b-instruct-2501", + "arena_name": "mistral-small-24b-instruct-2501", + "arena_org": "Mistral", + "arena_elo": 1273.85, + "arena_rank": 208, + "arena_votes": 14677 + }, + { + "name": "glm-4-0520", + "arena_name": "glm-4-0520", + "arena_org": "Zhipu AI", + "arena_elo": 1273.36, + "arena_rank": 209, + "arena_votes": 9788 + }, + { + "name": "reka-flash-20240904", + "arena_name": "reka-flash-20240904", + "arena_org": "Reka AI", + "arena_elo": 1272.12, + "arena_rank": 210, + "arena_votes": 7537 + }, + { + "name": "c4ai-aya-expanse-32b", + "arena_name": "c4ai-aya-expanse-32b", + "arena_org": "Cohere", + "arena_elo": 1267.03, + "arena_rank": 212, + "arena_votes": 27123 + }, + { + "name": "amazon-nova-lite-v1.0", + "arena_name": "amazon-nova-lite-v1.0", + "arena_org": "Amazon", + "arena_elo": 1260.66, + "arena_rank": 218, + "arena_votes": 19376 + }, + { + "name": "olmo-2-0325-32b-instruct", + "arena_name": "olmo-2-0325-32b-instruct", + "arena_org": "Ai2", + "arena_elo": 1251.98, + "arena_rank": 221, + "arena_votes": 3335 + }, + { + "name": "amazon-nova-micro-v1.0", + "arena_name": "amazon-nova-micro-v1.0", + "arena_org": "Amazon", + "arena_elo": 1240.88, + "arena_rank": 224, + "arena_votes": 19355 + }, + { + "name": "ministral-8b-2410", + "arena_name": "ministral-8b-2410", + "arena_org": "Mistral", + "arena_elo": 1237.03, + "arena_rank": 226, + "arena_votes": 4780 + }, + { + "name": "gemini-pro-dev-api", + "arena_name": "gemini-pro-dev-api", + "arena_org": "Google", + "arena_elo": 1235, + "arena_rank": 227, + "arena_votes": 18352 + }, + { + "name": "hunyuan-standard-256k", + "arena_name": "hunyuan-standard-256k", + "arena_org": "Tencent", + "arena_elo": 1233.36, + "arena_rank": 229, + "arena_votes": 2729 + }, + { + "name": "reka-flash-21b-20240226-online", + "arena_name": "reka-flash-21b-20240226-online", + "arena_org": "Reka AI", + "arena_elo": 1233.27, + "arena_rank": 230, + "arena_votes": 15451 + }, + { + "name": "reka-flash-21b-20240226", + "arena_name": "reka-flash-21b-20240226", + "arena_org": "Reka AI", + "arena_elo": 1226.51, + "arena_rank": 234, + "arena_votes": 24806 + }, + { + "name": "c4ai-aya-expanse-8b", + "arena_name": "c4ai-aya-expanse-8b", + "arena_org": "Cohere", + "arena_elo": 1223, + "arena_rank": 237, + "arena_votes": 9827 + }, + { + "name": "mistral-medium", + "arena_name": "mistral-medium", + "arena_org": "Mistral", + "arena_elo": 1222.96, + "arena_rank": 238, + "arena_votes": 34552 + }, + { + "name": "gemini-pro", + "arena_name": "gemini-pro", + "arena_org": "Google", + "arena_elo": 1221.71, + "arena_rank": 239, + "arena_votes": 6390 + }, + { + "name": "gpt-3.5-turbo-1106", + "arena_name": "gpt-3.5-turbo-1106", + "arena_org": "OpenAI", + "arena_elo": 1202.4, + "arena_rank": 246, + "arena_votes": 16616, + "aider_pass_rate": 0.455 + }, + { + "name": "dbrx-instruct-preview", + "arena_name": "dbrx-instruct-preview", + "arena_org": "Databricks", + "arena_elo": 1194.93, + "arena_rank": 250, + "arena_votes": 32196 + }, + { + "name": "wizardlm-70b", + "arena_name": "wizardlm-70b", + "arena_org": "Microsoft", + "arena_elo": 1184.64, + "arena_rank": 253, + "arena_votes": 8214 + }, + { + "name": "snowflake-arctic-instruct", + "arena_name": "snowflake-arctic-instruct", + "arena_org": "Snowflake", + "arena_elo": 1179.46, + "arena_rank": 260, + "arena_votes": 32836 + }, + { + "name": "tulu-2-dpo-70b", + "arena_name": "tulu-2-dpo-70b", + "arena_org": "AllenAI/UW", + "arena_elo": 1178.01, + "arena_rank": 262, + "arena_votes": 6534 + }, + { + "name": "vicuna-33b", + "arena_name": "vicuna-33b", + "arena_org": "LMSYS", + "arena_elo": 1172.78, + "arena_rank": 264, + "arena_votes": 22479 + }, + { + "name": "llama-2-70b-chat", + "arena_name": "llama-2-70b-chat", + "arena_org": "Meta", + "arena_elo": 1170.79, + "arena_rank": 267, + "arena_votes": 38491 + }, + { + "name": "llama2-70b-steerlm-chat", + "arena_name": "llama2-70b-steerlm-chat", + "arena_org": "Nvidia", + "arena_elo": 1155.29, + "arena_rank": 273, + "arena_votes": 3584 + }, + { + "name": "dolphin-2.2.1-mistral-7b", + "arena_name": "dolphin-2.2.1-mistral-7b", + "arena_org": "Cognitive Computations", + "arena_elo": 1151.95, + "arena_rank": 275, + "arena_votes": 1679 + }, + { + "name": "mpt-30b-chat", + "arena_name": "mpt-30b-chat", + "arena_org": "MosaicML", + "arena_elo": 1150.09, + "arena_rank": 276, + "arena_votes": 2571 + }, + { + "name": "wizardlm-13b", + "arena_name": "wizardlm-13b", + "arena_org": "Microsoft", + "arena_elo": 1149.12, + "arena_rank": 278, + "arena_votes": 7046 + }, + { + "name": "falcon-180b-chat", + "arena_name": "falcon-180b-chat", + "arena_org": "TII", + "arena_elo": 1146.96, + "arena_rank": 279, + "arena_votes": 1295 + }, + { + "name": "phi-3-mini-4k-instruct-june-2024", + "arena_name": "phi-3-mini-4k-instruct-june-2024", + "arena_org": "Microsoft", + "arena_elo": 1142.99, + "arena_rank": 281, + "arena_votes": 12296 + }, + { + "name": "llama-2-13b-chat", + "arena_name": "llama-2-13b-chat", + "arena_org": "Meta", + "arena_elo": 1141.46, + "arena_rank": 282, + "arena_votes": 19171 + }, + { + "name": "vicuna-13b", + "arena_name": "vicuna-13b", + "arena_org": "LMSYS", + "arena_elo": 1140.86, + "arena_rank": 283, + "arena_votes": 19366 + }, + { + "name": "qwen-14b-chat", + "arena_name": "qwen-14b-chat", + "arena_org": "Alibaba", + "arena_elo": 1138.51, + "arena_rank": 284, + "arena_votes": 4964 + }, + { + "name": "palm-2", + "arena_name": "palm-2", + "arena_org": "Google", + "arena_elo": 1137.12, + "arena_rank": 285, + "arena_votes": 8554 + }, + { + "name": "codellama-34b-instruct", + "arena_name": "codellama-34b-instruct", + "arena_org": "Meta", + "arena_elo": 1136.55, + "arena_rank": 286, + "arena_votes": 7363 + }, + { + "name": "guanaco-33b", + "arena_name": "guanaco-33b", + "arena_org": "UW", + "arena_elo": 1127.26, + "arena_rank": 291, + "arena_votes": 2921 + }, + { + "name": "stripedhyena-nous-7b", + "arena_name": "stripedhyena-nous-7b", + "arena_org": "Together AI", + "arena_elo": 1120.9, + "arena_rank": 293, + "arena_votes": 5184 + }, + { + "name": "codellama-70b-instruct", + "arena_name": "codellama-70b-instruct", + "arena_org": "Meta", + "arena_elo": 1118.94, + "arena_rank": 294, + "arena_votes": 1143 + }, + { + "name": "vicuna-7b", + "arena_name": "vicuna-7b", + "arena_org": "LMSYS", + "arena_elo": 1114.57, + "arena_rank": 295, + "arena_votes": 6923 + }, + { + "name": "mistral-7b-instruct", + "arena_name": "mistral-7b-instruct", + "arena_org": "Mistral", + "arena_elo": 1109.58, + "arena_rank": 299, + "arena_votes": 8977 + }, + { + "name": "llama-2-7b-chat", + "arena_name": "llama-2-7b-chat", + "arena_org": "Meta", + "arena_elo": 1108.14, + "arena_rank": 300, + "arena_votes": 14148 + }, + { + "name": "olmo-7b-instruct", + "arena_name": "olmo-7b-instruct", + "arena_org": "Ai2", + "arena_elo": 1074.57, + "arena_rank": 303, + "arena_votes": 6329 + }, + { + "name": "koala-13b", + "arena_name": "koala-13b", + "arena_org": "UC Berkeley", + "arena_elo": 1070.35, + "arena_rank": 304, + "arena_votes": 6964 + }, + { + "name": "alpaca-13b", + "arena_name": "alpaca-13b", + "arena_org": "Stanford", + "arena_elo": 1067.28, + "arena_rank": 305, + "arena_votes": 5745 + }, + { + "name": "gpt4all-13b-snoozy", + "arena_name": "gpt4all-13b-snoozy", + "arena_org": "Nomic AI", + "arena_elo": 1065.85, + "arena_rank": 306, + "arena_votes": 1743 + }, + { + "name": "mpt-7b-chat", + "arena_name": "mpt-7b-chat", + "arena_org": "MosaicML", + "arena_elo": 1061.73, + "arena_rank": 307, + "arena_votes": 3925 + }, + { + "name": "chatglm3-6b", + "arena_name": "chatglm3-6b", + "arena_org": "Tsinghua", + "arena_elo": 1055.97, + "arena_rank": 308, + "arena_votes": 4658 + }, + { + "name": "RWKV-4-Raven-14B", + "arena_name": "RWKV-4-Raven-14B", + "arena_org": "RWKV", + "arena_elo": 1041.2, + "arena_rank": 309, + "arena_votes": 4845 + }, + { + "name": "chatglm2-6b", + "arena_name": "chatglm2-6b", + "arena_org": "Tsinghua", + "arena_elo": 1024.11, + "arena_rank": 310, + "arena_votes": 2657 + }, + { + "name": "oasst-pythia-12b", + "arena_name": "oasst-pythia-12b", + "arena_org": "OpenAssistant", + "arena_elo": 1022, + "arena_rank": 311, + "arena_votes": 6311 + }, + { + "name": "chatglm-6b", + "arena_name": "chatglm-6b", + "arena_org": "Tsinghua", + "arena_elo": 995.492, + "arena_rank": 312, + "arena_votes": 4914 + }, + { + "name": "fastchat-t5-3b", + "arena_name": "fastchat-t5-3b", + "arena_org": "LMSYS", + "arena_elo": 991.198, + "arena_rank": 313, + "arena_votes": 4203 + }, + { + "name": "stablelm-tuned-alpha-7b", + "arena_name": "stablelm-tuned-alpha-7b", + "arena_org": "Stability AI", + "arena_elo": 952.506, + "arena_rank": 316, + "arena_votes": 3287 + }, + { + "name": "gpt-3.5-turbo-0301", + "aider_pass_rate": 0.504 + }, + { + "name": "gpt-3.5-turbo-0613", + "aider_pass_rate": 0.38299999999999995 + }, + { + "name": "gpt-4-turbo-2024-04-09 (udiff)", + "aider_pass_rate": 0.48100000000000004 + }, + { + "name": "llama3-70b-8192", + "aider_pass_rate": 0.386 + }, + { + "name": "gpt-4-turbo-2024-04-09 (diff)", + "aider_pass_rate": 0.485 + }, + { + "name": "gemini-1.5-flash-latest", + "aider_pass_rate": 0.33799999999999997 + }, + { + "name": "codestral-2405", + "aider_pass_rate": 0.353 + }, + { + "name": "codeqwen:7b-chat-v1.5-q8_0", + "aider_pass_rate": 0.32299999999999995 + }, + { + "name": "codestral:22b-v0.1-q8_0", + "aider_pass_rate": 0.353 + }, + { + "name": "qwen2:72b-instruct-q8_0", + "aider_pass_rate": 0.436 + }, + { + "name": "gemma2:27b-instruct-q8_0", + "aider_pass_rate": 0.316 + }, + { + "name": "DeepSeek Chat V2 0628", + "aider_pass_rate": 0.609 + }, + { + "name": "llama-3.1-405b-instruct (diff)", + "aider_pass_rate": 0.466 + }, + { + "name": "llama-3.1-405b-instruct (whole)", + "aider_pass_rate": 0.489 + }, + { + "name": "DeepSeek Coder V2 0724", + "aider_pass_rate": 0.579 + }, + { + "name": "nousresearch/hermes-3-llama-3.1-405b", + "aider_pass_rate": 0.511 + }, + { + "name": "yi-coder:9b-chat-q4_0", + "aider_pass_rate": 0.414 + }, + { + "name": "Reflection-70B", + "aider_pass_rate": 0.331 + }, + { + "name": "Command R (08-24)", + "aider_pass_rate": 0.301 + }, + { + "name": "o1-mini (whole)", + "aider_pass_rate": 0.496 + }, + { + "name": "qwen2.5-coder:7b-instruct-q8_0", + "aider_pass_rate": 0.451 + }, + { + "name": "qwen-2.5-72b-instruct (bf16)", + "aider_pass_rate": 0.534 + }, + { + "name": "Codestral-22B-v0.1-Q4_K_M", + "aider_pass_rate": 0.361 + }, + { + "name": "gemini-1.5-flash-002 (0924)", + "aider_pass_rate": 0.376 + }, + { + "name": "gemini-1.5-flash-8b-exp-0924", + "aider_pass_rate": 0.331 + }, + { + "name": "ollama/codestral", + "aider_pass_rate": 0.33799999999999997 + }, + { + "name": "ollama/codegeex4", + "aider_pass_rate": 0.28600000000000003 + }, + { + "name": "ollama/wojtek/opencodeinterpreter:6.7b", + "aider_pass_rate": 0.263 + }, + { + "name": "ollama/mistral-nemo:12b-instruct-2407-q4_K_M", + "aider_pass_rate": 0.226 + }, + { + "name": "ollama/qwen2.5:32b", + "aider_pass_rate": 0.444 + }, + { + "name": "ollama/llama3.2:3b-instruct-fp16", + "aider_pass_rate": 0.203 + }, + { + "name": "ollama/hermes3:8b-llama3.1-fp16", + "aider_pass_rate": 0.24100000000000002 + }, + { + "name": "ollama/mistral-small", + "aider_pass_rate": 0.301 + }, + { + "name": "ollama/yi-coder:9b-chat-fp16", + "aider_pass_rate": 0.39799999999999996 + }, + { + "name": "ollama/hermes3", + "aider_pass_rate": 0.21100000000000002 + }, + { + "name": "openai/chatgpt-4o-latest", + "aider_pass_rate": 0.564 + }, + { + "name": "ollama/Qwen2.5.1-Coder-7B-Instruct-GGUF:Q8_0-32k", + "aider_pass_rate": 0.526 + }, + { + "name": "Qwen2.5-Coder-0.5B-Instruct", + "aider_pass_rate": 0.14300000000000002 + }, + { + "name": "Qwen2.5-Coder-1.5B-Instruct", + "aider_pass_rate": 0.28600000000000003 + }, + { + "name": "Qwen2.5-Coder-3B-Instruct", + "aider_pass_rate": 0.33799999999999997 + }, + { + "name": "ollama/qwen2.5:32b-instruct-q8_0", + "aider_pass_rate": 0.466 + }, + { + "name": "openrouter/qwen/qwen-2.5-coder-32b-instruct", + "aider_pass_rate": 0.496 + }, + { + "name": "ollama/qwen2.5-coder:14b", + "aider_pass_rate": 0.534 + }, + { + "name": "ollama/qwen2.5-coder:32b", + "aider_pass_rate": 0.586 + }, + { + "name": "ollama/tulu3", + "aider_pass_rate": 0.218 + }, + { + "name": "ollama/granite3-dense:8b", + "aider_pass_rate": 0.17300000000000001 + }, + { + "name": "gemini-exp-1206 (diff)", + "aider_pass_rate": 0.556 + }, + { + "name": "gemini-exp-1206 (whole)", + "aider_pass_rate": 0.609 + } +] \ No newline at end of file