| { |
| "RelicEnv": { |
| "qwen2.5-3b-instruct": 0.18, |
| "qwen2.5-7b-instruct": 0.396, |
| "qwen2.5-14b-instruct": 0.8, |
| "qwen2.5-32b-instruct": 0.8560000000000001, |
| "qwen2.5-72b-instruct": 0.892, |
| "llama-3.1-8b-instruct": 0.21600000000000003, |
| "llama-3.1-70b-instruct": 0.6639999999999999, |
| "llama-3.2-3b-instruct": 0.164, |
| "llama-3.3-70b-instruct": 0.836, |
| "mistral-large-instruct-2411": 0.8560000000000001, |
| "gemma-2-27b-it": 0.544, |
| "gemma-2-9b-it": 0.36400000000000005, |
| "deepseek-v3": 0.9359999999999999, |
| "deepseek-r1": 0.916, |
| "qwq-32b": 0.9560000000000001, |
| "Average": 0.6384 |
| }, |
| "HerbEnv": { |
| "qwen2.5-3b-instruct": 0.184, |
| "qwen2.5-7b-instruct": 0.304, |
| "qwen2.5-14b-instruct": 0.784, |
| "qwen2.5-32b-instruct": 0.8400000000000001, |
| "qwen2.5-72b-instruct": 0.8039999999999999, |
| "llama-3.1-8b-instruct": 0.30000000000000004, |
| "llama-3.1-70b-instruct": 0.568, |
| "llama-3.2-3b-instruct": 0.128, |
| "llama-3.3-70b-instruct": 0.612, |
| "mistral-large-instruct-2411": 0.76, |
| "gemma-2-27b-it": 0.504, |
| "gemma-2-9b-it": 0.18000000000000002, |
| "deepseek-v3": 0.968, |
| "deepseek-r1": 0.9359999999999999, |
| "qwq-32b": 0.924, |
| "Average": 0.5863999999999999 |
| }, |
| "TransdimensionalEnv": { |
| "qwen2.5-3b-instruct": 0.156, |
| "qwen2.5-7b-instruct": 0.38400000000000006, |
| "qwen2.5-14b-instruct": 0.836, |
| "qwen2.5-32b-instruct": 0.876, |
| "qwen2.5-72b-instruct": 0.836, |
| "llama-3.1-8b-instruct": 0.44399999999999995, |
| "llama-3.1-70b-instruct": 0.828, |
| "llama-3.2-3b-instruct": 0.172, |
| "llama-3.3-70b-instruct": 0.86, |
| "mistral-large-instruct-2411": 0.86, |
| "gemma-2-27b-it": 0.5599999999999999, |
| "gemma-2-9b-it": 0.516, |
| "deepseek-v3": 0.968, |
| "deepseek-r1": 0.9359999999999999, |
| "qwq-32b": 0.968, |
| "Average": 0.6799999999999999 |
| }, |
| "SorcererEnv": { |
| "qwen2.5-3b-instruct": 0.16, |
| "qwen2.5-7b-instruct": 0.32400000000000007, |
| "qwen2.5-14b-instruct": 0.8039999999999999, |
| "qwen2.5-32b-instruct": 0.8240000000000001, |
| "qwen2.5-72b-instruct": 0.8320000000000001, |
| "llama-3.1-8b-instruct": 0.276, |
| "llama-3.1-70b-instruct": 0.6639999999999999, |
| "llama-3.2-3b-instruct": 0.196, |
| "llama-3.3-70b-instruct": 0.7360000000000001, |
| "mistral-large-instruct-2411": 0.8, |
| "gemma-2-27b-it": 0.5640000000000001, |
| "gemma-2-9b-it": 0.28800000000000003, |
| "deepseek-v3": 0.8640000000000001, |
| "deepseek-r1": 0.8240000000000001, |
| "qwq-32b": 0.8400000000000001, |
| "Average": 0.5997333333333333 |
| }, |
| "QuantumEnv": { |
| "qwen2.5-3b-instruct": 0.196, |
| "qwen2.5-7b-instruct": 0.532, |
| "qwen2.5-14b-instruct": 0.8720000000000001, |
| "qwen2.5-32b-instruct": 0.9039999999999999, |
| "qwen2.5-72b-instruct": 0.916, |
| "llama-3.1-8b-instruct": 0.45600000000000007, |
| "llama-3.1-70b-instruct": 0.7999999999999999, |
| "llama-3.2-3b-instruct": 0.168, |
| "llama-3.3-70b-instruct": 0.8480000000000001, |
| "mistral-large-instruct-2411": 0.8720000000000001, |
| "gemma-2-27b-it": 0.744, |
| "gemma-2-9b-it": 0.544, |
| "deepseek-v3": 0.884, |
| "deepseek-r1": 0.8640000000000001, |
| "qwq-32b": 0.868, |
| "Average": 0.6978666666666666 |
| }, |
| "AstronomyEnv": { |
| "qwen2.5-3b-instruct": 0.172, |
| "qwen2.5-7b-instruct": 0.42800000000000005, |
| "qwen2.5-14b-instruct": 0.716, |
| "qwen2.5-32b-instruct": 0.676, |
| "qwen2.5-72b-instruct": 0.748, |
| "llama-3.1-8b-instruct": 0.336, |
| "llama-3.1-70b-instruct": 0.692, |
| "llama-3.2-3b-instruct": 0.176, |
| "llama-3.3-70b-instruct": 0.6519999999999999, |
| "mistral-large-instruct-2411": 0.7999999999999999, |
| "gemma-2-27b-it": 0.508, |
| "gemma-2-9b-it": 0.372, |
| "deepseek-v3": 0.748, |
| "deepseek-r1": 0.8200000000000001, |
| "qwq-32b": 0.852, |
| "Average": 0.5797333333333333 |
| }, |
| "MusicGenresEnv": { |
| "qwen2.5-3b-instruct": 0.22000000000000003, |
| "qwen2.5-7b-instruct": 0.42000000000000004, |
| "qwen2.5-14b-instruct": 0.72, |
| "qwen2.5-32b-instruct": 0.716, |
| "qwen2.5-72b-instruct": 0.696, |
| "llama-3.1-8b-instruct": 0.35200000000000004, |
| "llama-3.1-70b-instruct": 0.6280000000000001, |
| "llama-3.2-3b-instruct": 0.136, |
| "llama-3.3-70b-instruct": 0.592, |
| "mistral-large-instruct-2411": 0.732, |
| "gemma-2-27b-it": 0.44800000000000006, |
| "gemma-2-9b-it": 0.332, |
| "deepseek-v3": 0.748, |
| "deepseek-r1": 0.792, |
| "qwq-32b": 0.876, |
| "Average": 0.5605333333333334 |
| }, |
| "CloudEnv": { |
| "qwen2.5-3b-instruct": 0.21199999999999997, |
| "qwen2.5-7b-instruct": 0.42000000000000004, |
| "qwen2.5-14b-instruct": 0.76, |
| "qwen2.5-32b-instruct": 0.656, |
| "qwen2.5-72b-instruct": 0.712, |
| "llama-3.1-8b-instruct": 0.42000000000000004, |
| "llama-3.1-70b-instruct": 0.664, |
| "llama-3.2-3b-instruct": 0.22800000000000004, |
| "llama-3.3-70b-instruct": 0.696, |
| "mistral-large-instruct-2411": 0.8360000000000001, |
| "gemma-2-27b-it": 0.6, |
| "gemma-2-9b-it": 0.4, |
| "deepseek-v3": 0.8200000000000001, |
| "deepseek-r1": 0.908, |
| "qwq-32b": 0.9120000000000001, |
| "Average": 0.6162666666666667 |
| }, |
| "CuisineEnv": { |
| "qwen2.5-3b-instruct": 0.21600000000000003, |
| "qwen2.5-7b-instruct": 0.316, |
| "qwen2.5-14b-instruct": 0.6960000000000001, |
| "qwen2.5-32b-instruct": 0.664, |
| "qwen2.5-72b-instruct": 0.656, |
| "llama-3.1-8b-instruct": 0.22799999999999998, |
| "llama-3.1-70b-instruct": 0.476, |
| "llama-3.2-3b-instruct": 0.152, |
| "llama-3.3-70b-instruct": 0.44400000000000006, |
| "mistral-large-instruct-2411": 0.644, |
| "gemma-2-27b-it": 0.27599999999999997, |
| "gemma-2-9b-it": 0.156, |
| "deepseek-v3": 0.8400000000000001, |
| "deepseek-r1": 0.7959999999999999, |
| "qwq-32b": 0.8800000000000001, |
| "Average": 0.49599999999999994 |
| }, |
| "PlantEnv": { |
| "qwen2.5-3b-instruct": 0.168, |
| "qwen2.5-7b-instruct": 0.236, |
| "qwen2.5-14b-instruct": 0.34, |
| "qwen2.5-32b-instruct": 0.22000000000000003, |
| "qwen2.5-72b-instruct": 0.22799999999999998, |
| "llama-3.1-8b-instruct": 0.148, |
| "llama-3.1-70b-instruct": 0.16, |
| "llama-3.2-3b-instruct": 0.084, |
| "llama-3.3-70b-instruct": 0.07599999999999998, |
| "mistral-large-instruct-2411": 0.264, |
| "gemma-2-27b-it": 0.14400000000000002, |
| "gemma-2-9b-it": 0.092, |
| "deepseek-v3": 0.512, |
| "deepseek-r1": 0.5, |
| "qwq-32b": 0.548, |
| "Average": 0.24800000000000003 |
| }, |
| "HistoricalEnv": { |
| "qwen2.5-3b-instruct": 0.24, |
| "qwen2.5-7b-instruct": 0.368, |
| "qwen2.5-14b-instruct": 0.5800000000000001, |
| "qwen2.5-32b-instruct": 0.476, |
| "qwen2.5-72b-instruct": 0.512, |
| "llama-3.1-8b-instruct": 0.332, |
| "llama-3.1-70b-instruct": 0.616, |
| "llama-3.2-3b-instruct": 0.2, |
| "llama-3.3-70b-instruct": 0.652, |
| "mistral-large-instruct-2411": 0.6880000000000001, |
| "gemma-2-27b-it": 0.5, |
| "gemma-2-9b-it": 0.376, |
| "deepseek-v3": 0.748, |
| "deepseek-r1": 0.828, |
| "qwq-32b": 0.884, |
| "Average": 0.5333333333333334 |
| }, |
| "GadgetEnv": { |
| "qwen2.5-3b-instruct": 0.124, |
| "qwen2.5-7b-instruct": 0.312, |
| "qwen2.5-14b-instruct": 0.852, |
| "qwen2.5-32b-instruct": 0.8640000000000001, |
| "qwen2.5-72b-instruct": 0.892, |
| "llama-3.1-8b-instruct": 0.284, |
| "llama-3.1-70b-instruct": 0.692, |
| "llama-3.2-3b-instruct": 0.11200000000000002, |
| "llama-3.3-70b-instruct": 0.7360000000000001, |
| "mistral-large-instruct-2411": 0.884, |
| "gemma-2-27b-it": 0.32799999999999996, |
| "gemma-2-9b-it": 0.184, |
| "deepseek-v3": 0.9640000000000001, |
| "deepseek-r1": 0.932, |
| "qwq-32b": 0.932, |
| "Average": 0.6061333333333334 |
| }, |
| "TimeTravelEnv": { |
| "qwen2.5-3b-instruct": 0.128, |
| "qwen2.5-7b-instruct": 0.292, |
| "qwen2.5-14b-instruct": 0.808, |
| "qwen2.5-32b-instruct": 0.828, |
| "qwen2.5-72b-instruct": 0.8039999999999999, |
| "llama-3.1-8b-instruct": 0.376, |
| "llama-3.1-70b-instruct": 0.684, |
| "llama-3.2-3b-instruct": 0.124, |
| "llama-3.3-70b-instruct": 0.716, |
| "mistral-large-instruct-2411": 0.884, |
| "gemma-2-27b-it": 0.32799999999999996, |
| "gemma-2-9b-it": 0.21600000000000003, |
| "deepseek-v3": 0.9399999999999998, |
| "deepseek-r1": 0.932, |
| "qwq-32b": 0.924, |
| "Average": 0.5989333333333333 |
| }, |
| "PollutionEnv": { |
| "qwen2.5-3b-instruct": 0.136, |
| "qwen2.5-7b-instruct": 0.328, |
| "qwen2.5-14b-instruct": 0.792, |
| "qwen2.5-32b-instruct": 0.7120000000000001, |
| "qwen2.5-72b-instruct": 0.704, |
| "llama-3.1-8b-instruct": 0.316, |
| "llama-3.1-70b-instruct": 0.664, |
| "llama-3.2-3b-instruct": 0.124, |
| "llama-3.3-70b-instruct": 0.6960000000000001, |
| "mistral-large-instruct-2411": 0.784, |
| "gemma-2-27b-it": 0.336, |
| "gemma-2-9b-it": 0.252, |
| "deepseek-v3": 0.8640000000000001, |
| "deepseek-r1": 0.8560000000000001, |
| "qwq-32b": 0.852, |
| "Average": 0.5610666666666666 |
| }, |
| "DemographicEnv": { |
| "qwen2.5-3b-instruct": 0.072, |
| "qwen2.5-7b-instruct": 0.42800000000000005, |
| "qwen2.5-14b-instruct": 0.68, |
| "qwen2.5-32b-instruct": 0.7799999999999999, |
| "qwen2.5-72b-instruct": 0.7719999999999999, |
| "llama-3.1-8b-instruct": 0.272, |
| "llama-3.1-70b-instruct": 0.6239999999999999, |
| "llama-3.2-3b-instruct": 0.176, |
| "llama-3.3-70b-instruct": 0.748, |
| "mistral-large-instruct-2411": 0.8200000000000001, |
| "gemma-2-27b-it": 0.356, |
| "gemma-2-9b-it": 0.156, |
| "deepseek-v3": 0.8960000000000001, |
| "deepseek-r1": 0.876, |
| "qwq-32b": 0.8960000000000001, |
| "Average": 0.5701333333333333 |
| }, |
| "GeneticEnv": { |
| "qwen2.5-3b-instruct": 0.084, |
| "qwen2.5-7b-instruct": 0.392, |
| "qwen2.5-14b-instruct": 0.884, |
| "qwen2.5-32b-instruct": 0.9279999999999999, |
| "qwen2.5-72b-instruct": 0.9400000000000001, |
| "llama-3.1-8b-instruct": 0.45999999999999996, |
| "llama-3.1-70b-instruct": 0.9, |
| "llama-3.2-3b-instruct": 0.192, |
| "llama-3.3-70b-instruct": 0.916, |
| "mistral-large-instruct-2411": 0.9040000000000001, |
| "gemma-2-27b-it": 0.776, |
| "gemma-2-9b-it": 0.548, |
| "deepseek-v3": 0.984, |
| "deepseek-r1": 0.952, |
| "qwq-32b": 0.932, |
| "Average": 0.7194666666666667 |
| }, |
| "CraftsmanEnv": { |
| "qwen2.5-3b-instruct": 0.14400000000000002, |
| "qwen2.5-7b-instruct": 0.256, |
| "qwen2.5-14b-instruct": 0.624, |
| "qwen2.5-32b-instruct": 0.736, |
| "qwen2.5-72b-instruct": 0.664, |
| "llama-3.1-8b-instruct": 0.22000000000000003, |
| "llama-3.1-70b-instruct": 0.524, |
| "llama-3.2-3b-instruct": 0.10800000000000001, |
| "llama-3.3-70b-instruct": 0.41600000000000004, |
| "mistral-large-instruct-2411": 0.7080000000000001, |
| "gemma-2-27b-it": 0.324, |
| "gemma-2-9b-it": 0.096, |
| "deepseek-v3": 0.9, |
| "deepseek-r1": 0.7879999999999999, |
| "qwq-32b": 0.8160000000000001, |
| "Average": 0.4882666666666667 |
| }, |
| "StarConstellationEnv": { |
| "qwen2.5-3b-instruct": 0.1, |
| "qwen2.5-7b-instruct": 0.332, |
| "qwen2.5-14b-instruct": 0.5960000000000001, |
| "qwen2.5-32b-instruct": 0.572, |
| "qwen2.5-72b-instruct": 0.5840000000000001, |
| "llama-3.1-8b-instruct": 0.376, |
| "llama-3.1-70b-instruct": 0.4640000000000001, |
| "llama-3.2-3b-instruct": 0.136, |
| "llama-3.3-70b-instruct": 0.41200000000000003, |
| "mistral-large-instruct-2411": 0.6120000000000001, |
| "gemma-2-27b-it": 0.472, |
| "gemma-2-9b-it": 0.22799999999999998, |
| "deepseek-v3": 0.744, |
| "deepseek-r1": 0.748, |
| "qwq-32b": 0.736, |
| "Average": 0.47413333333333335 |
| }, |
| "MythicalCreatureEnv": { |
| "qwen2.5-3b-instruct": 0.2, |
| "qwen2.5-7b-instruct": 0.324, |
| "qwen2.5-14b-instruct": 0.632, |
| "qwen2.5-32b-instruct": 0.712, |
| "qwen2.5-72b-instruct": 0.668, |
| "llama-3.1-8b-instruct": 0.31200000000000006, |
| "llama-3.1-70b-instruct": 0.62, |
| "llama-3.2-3b-instruct": 0.11200000000000002, |
| "llama-3.3-70b-instruct": 0.648, |
| "mistral-large-instruct-2411": 0.7480000000000001, |
| "gemma-2-27b-it": 0.42799999999999994, |
| "gemma-2-9b-it": 0.268, |
| "deepseek-v3": 0.8400000000000001, |
| "deepseek-r1": 0.8400000000000001, |
| "qwq-32b": 0.852, |
| "Average": 0.5469333333333333 |
| }, |
| "ArtStyleEnv": { |
| "qwen2.5-3b-instruct": 0.136, |
| "qwen2.5-7b-instruct": 0.332, |
| "qwen2.5-14b-instruct": 0.78, |
| "qwen2.5-32b-instruct": 0.8320000000000001, |
| "qwen2.5-72b-instruct": 0.748, |
| "llama-3.1-8b-instruct": 0.356, |
| "llama-3.1-70b-instruct": 0.616, |
| "llama-3.2-3b-instruct": 0.17200000000000001, |
| "llama-3.3-70b-instruct": 0.6199999999999999, |
| "mistral-large-instruct-2411": 0.828, |
| "gemma-2-27b-it": 0.43200000000000005, |
| "gemma-2-9b-it": 0.256, |
| "deepseek-v3": 0.876, |
| "deepseek-r1": 0.8200000000000001, |
| "qwq-32b": 0.868, |
| "Average": 0.5781333333333335 |
| }, |
| "CookingEnv": { |
| "qwen2.5-3b-instruct": 0.13999999999999999, |
| "qwen2.5-7b-instruct": 0.44799999999999995, |
| "qwen2.5-14b-instruct": 0.76, |
| "qwen2.5-32b-instruct": 0.7440000000000001, |
| "qwen2.5-72b-instruct": 0.7, |
| "llama-3.1-8b-instruct": 0.364, |
| "llama-3.1-70b-instruct": 0.6839999999999999, |
| "llama-3.2-3b-instruct": 0.156, |
| "llama-3.3-70b-instruct": 0.656, |
| "mistral-large-instruct-2411": 0.74, |
| "gemma-2-27b-it": 0.48, |
| "gemma-2-9b-it": 0.364, |
| "deepseek-v3": 0.8640000000000001, |
| "deepseek-r1": 0.812, |
| "qwq-32b": 0.9, |
| "Average": 0.5874666666666666 |
| }, |
| "HistoricalBattleEnv": { |
| "qwen2.5-3b-instruct": 0.256, |
| "qwen2.5-7b-instruct": 0.292, |
| "qwen2.5-14b-instruct": 0.45999999999999996, |
| "qwen2.5-32b-instruct": 0.476, |
| "qwen2.5-72b-instruct": 0.42400000000000004, |
| "llama-3.1-8b-instruct": 0.28400000000000003, |
| "llama-3.1-70b-instruct": 0.492, |
| "llama-3.2-3b-instruct": 0.148, |
| "llama-3.3-70b-instruct": 0.62, |
| "mistral-large-instruct-2411": 0.608, |
| "gemma-2-27b-it": 0.388, |
| "gemma-2-9b-it": 0.34, |
| "deepseek-v3": 0.724, |
| "deepseek-r1": 0.788, |
| "qwq-32b": 0.8560000000000001, |
| "Average": 0.47706666666666664 |
| }, |
| "FungalEnv": { |
| "qwen2.5-3b-instruct": 0.15999999999999998, |
| "qwen2.5-7b-instruct": 0.46399999999999997, |
| "qwen2.5-14b-instruct": 0.664, |
| "qwen2.5-32b-instruct": 0.728, |
| "qwen2.5-72b-instruct": 0.6839999999999999, |
| "llama-3.1-8b-instruct": 0.41600000000000004, |
| "llama-3.1-70b-instruct": 0.5840000000000001, |
| "llama-3.2-3b-instruct": 0.14, |
| "llama-3.3-70b-instruct": 0.644, |
| "mistral-large-instruct-2411": 0.7440000000000001, |
| "gemma-2-27b-it": 0.536, |
| "gemma-2-9b-it": 0.184, |
| "deepseek-v3": 0.844, |
| "deepseek-r1": 0.764, |
| "qwq-32b": 0.7879999999999999, |
| "Average": 0.5562666666666666 |
| }, |
| "CryptographyEnv": { |
| "qwen2.5-3b-instruct": 0.24000000000000005, |
| "qwen2.5-7b-instruct": 0.23199999999999998, |
| "qwen2.5-14b-instruct": 0.508, |
| "qwen2.5-32b-instruct": 0.5760000000000001, |
| "qwen2.5-72b-instruct": 0.528, |
| "llama-3.1-8b-instruct": 0.29600000000000004, |
| "llama-3.1-70b-instruct": 0.524, |
| "llama-3.2-3b-instruct": 0.11600000000000002, |
| "llama-3.3-70b-instruct": 0.512, |
| "mistral-large-instruct-2411": 0.6799999999999999, |
| "gemma-2-27b-it": 0.328, |
| "gemma-2-9b-it": 0.192, |
| "deepseek-v3": 0.784, |
| "deepseek-r1": 0.74, |
| "qwq-32b": 0.8480000000000001, |
| "Average": 0.4736 |
| }, |
| "StorageEnv": { |
| "qwen2.5-3b-instruct": 0.22800000000000004, |
| "qwen2.5-7b-instruct": 0.44000000000000006, |
| "qwen2.5-14b-instruct": 0.852, |
| "qwen2.5-32b-instruct": 0.884, |
| "qwen2.5-72b-instruct": 0.8119999999999999, |
| "llama-3.1-8b-instruct": 0.34800000000000003, |
| "llama-3.1-70b-instruct": 0.724, |
| "llama-3.2-3b-instruct": 0.21600000000000003, |
| "llama-3.3-70b-instruct": 0.796, |
| "mistral-large-instruct-2411": 0.8880000000000001, |
| "gemma-2-27b-it": 0.596, |
| "gemma-2-9b-it": 0.392, |
| "deepseek-v3": 0.9640000000000001, |
| "deepseek-r1": 0.9119999999999999, |
| "qwq-32b": 0.944, |
| "Average": 0.6663999999999999 |
| }, |
| "RoverEnv": { |
| "qwen2.5-3b-instruct": 0.14400000000000002, |
| "qwen2.5-7b-instruct": 0.236, |
| "qwen2.5-14b-instruct": 0.8480000000000001, |
| "qwen2.5-32b-instruct": 0.8360000000000001, |
| "qwen2.5-72b-instruct": 0.796, |
| "llama-3.1-8b-instruct": 0.28400000000000003, |
| "llama-3.1-70b-instruct": 0.612, |
| "llama-3.2-3b-instruct": 0.148, |
| "llama-3.3-70b-instruct": 0.724, |
| "mistral-large-instruct-2411": 0.828, |
| "gemma-2-27b-it": 0.4600000000000001, |
| "gemma-2-9b-it": 0.072, |
| "deepseek-v3": 0.9200000000000002, |
| "deepseek-r1": 0.9, |
| "qwq-32b": 0.8720000000000001, |
| "Average": 0.5786666666666668 |
| }, |
| "FashionEnv": { |
| "qwen2.5-3b-instruct": 0.17200000000000001, |
| "qwen2.5-7b-instruct": 0.304, |
| "qwen2.5-14b-instruct": 0.8240000000000001, |
| "qwen2.5-32b-instruct": 0.808, |
| "qwen2.5-72b-instruct": 0.768, |
| "llama-3.1-8b-instruct": 0.32, |
| "llama-3.1-70b-instruct": 0.6, |
| "llama-3.2-3b-instruct": 0.16399999999999998, |
| "llama-3.3-70b-instruct": 0.6160000000000001, |
| "mistral-large-instruct-2411": 0.756, |
| "gemma-2-27b-it": 0.524, |
| "gemma-2-9b-it": 0.292, |
| "deepseek-v3": 0.86, |
| "deepseek-r1": 0.756, |
| "qwq-32b": 0.86, |
| "Average": 0.5749333333333334 |
| }, |
| "LicenseEnv": { |
| "qwen2.5-3b-instruct": 0.196, |
| "qwen2.5-7b-instruct": 0.29200000000000004, |
| "qwen2.5-14b-instruct": 0.556, |
| "qwen2.5-32b-instruct": 0.44000000000000006, |
| "qwen2.5-72b-instruct": 0.484, |
| "llama-3.1-8b-instruct": 0.26, |
| "llama-3.1-70b-instruct": 0.496, |
| "llama-3.2-3b-instruct": 0.072, |
| "llama-3.3-70b-instruct": 0.45999999999999996, |
| "mistral-large-instruct-2411": 0.504, |
| "gemma-2-27b-it": 0.37600000000000006, |
| "gemma-2-9b-it": 0.296, |
| "deepseek-v3": 0.556, |
| "deepseek-r1": 0.52, |
| "qwq-32b": 0.5800000000000001, |
| "Average": 0.4058666666666667 |
| }, |
| "VirusClassificationEnv": { |
| "qwen2.5-3b-instruct": 0.22000000000000003, |
| "qwen2.5-7b-instruct": 0.28, |
| "qwen2.5-14b-instruct": 0.384, |
| "qwen2.5-32b-instruct": 0.38, |
| "qwen2.5-72b-instruct": 0.42800000000000005, |
| "llama-3.1-8b-instruct": 0.256, |
| "llama-3.1-70b-instruct": 0.332, |
| "llama-3.2-3b-instruct": 0.156, |
| "llama-3.3-70b-instruct": 0.396, |
| "mistral-large-instruct-2411": 0.532, |
| "gemma-2-27b-it": 0.34, |
| "gemma-2-9b-it": 0.31200000000000006, |
| "deepseek-v3": 0.536, |
| "deepseek-r1": 0.64, |
| "qwq-32b": 0.684, |
| "Average": 0.3917333333333333 |
| }, |
| "TestingEnv": { |
| "qwen2.5-3b-instruct": 0.19200000000000003, |
| "qwen2.5-7b-instruct": 0.22000000000000003, |
| "qwen2.5-14b-instruct": 0.608, |
| "qwen2.5-32b-instruct": 0.648, |
| "qwen2.5-72b-instruct": 0.708, |
| "llama-3.1-8b-instruct": 0.332, |
| "llama-3.1-70b-instruct": 0.68, |
| "llama-3.2-3b-instruct": 0.17200000000000001, |
| "llama-3.3-70b-instruct": 0.7040000000000001, |
| "mistral-large-instruct-2411": 0.764, |
| "gemma-2-27b-it": 0.22799999999999998, |
| "gemma-2-9b-it": 0.26, |
| "deepseek-v3": 0.8880000000000001, |
| "deepseek-r1": 0.764, |
| "qwq-32b": 0.7999999999999999, |
| "Average": 0.5312 |
| }, |
| "NarrativeDetectEnv": { |
| "qwen2.5-3b-instruct": 0.148, |
| "qwen2.5-7b-instruct": 0.30000000000000004, |
| "qwen2.5-14b-instruct": 0.552, |
| "qwen2.5-32b-instruct": 0.8440000000000001, |
| "qwen2.5-72b-instruct": 0.76, |
| "llama-3.1-8b-instruct": 0.28800000000000003, |
| "llama-3.1-70b-instruct": 0.6279999999999999, |
| "llama-3.2-3b-instruct": 0.10400000000000001, |
| "llama-3.3-70b-instruct": 0.704, |
| "mistral-large-instruct-2411": 0.7919999999999999, |
| "gemma-2-27b-it": 0.328, |
| "gemma-2-9b-it": 0.192, |
| "deepseek-v3": 0.8560000000000001, |
| "deepseek-r1": 0.748, |
| "qwq-32b": 0.784, |
| "Average": 0.5352 |
| }, |
| "RenewableEnergyEnv": { |
| "qwen2.5-3b-instruct": 0.184, |
| "qwen2.5-7b-instruct": 0.44399999999999995, |
| "qwen2.5-14b-instruct": 0.648, |
| "qwen2.5-32b-instruct": 0.932, |
| "qwen2.5-72b-instruct": 0.8880000000000001, |
| "llama-3.1-8b-instruct": 0.396, |
| "llama-3.1-70b-instruct": 0.812, |
| "llama-3.2-3b-instruct": 0.2, |
| "llama-3.3-70b-instruct": 0.8240000000000001, |
| "mistral-large-instruct-2411": 0.8560000000000001, |
| "gemma-2-27b-it": 0.348, |
| "gemma-2-9b-it": 0.188, |
| "deepseek-v3": 0.96, |
| "deepseek-r1": 0.9800000000000001, |
| "qwq-32b": 0.9800000000000001, |
| "Average": 0.6426666666666667 |
| }, |
| "CelestialEnv": { |
| "qwen2.5-3b-instruct": 0.20400000000000001, |
| "qwen2.5-7b-instruct": 0.252, |
| "qwen2.5-14b-instruct": 0.728, |
| "qwen2.5-32b-instruct": 0.792, |
| "qwen2.5-72b-instruct": 0.7239999999999999, |
| "llama-3.1-8b-instruct": 0.256, |
| "llama-3.1-70b-instruct": 0.6920000000000001, |
| "llama-3.2-3b-instruct": 0.192, |
| "llama-3.3-70b-instruct": 0.744, |
| "mistral-large-instruct-2411": 0.82, |
| "gemma-2-27b-it": 0.528, |
| "gemma-2-9b-it": 0.344, |
| "deepseek-v3": 0.8480000000000001, |
| "deepseek-r1": 0.8360000000000001, |
| "qwq-32b": 0.8879999999999999, |
| "Average": 0.5898666666666668 |
| }, |
| "SpiceEnv": { |
| "qwen2.5-3b-instruct": 0.21199999999999997, |
| "qwen2.5-7b-instruct": 0.332, |
| "qwen2.5-14b-instruct": 0.672, |
| "qwen2.5-32b-instruct": 0.476, |
| "qwen2.5-72b-instruct": 0.5880000000000001, |
| "llama-3.1-8b-instruct": 0.32799999999999996, |
| "llama-3.1-70b-instruct": 0.40800000000000003, |
| "llama-3.2-3b-instruct": 0.22000000000000003, |
| "llama-3.3-70b-instruct": 0.336, |
| "mistral-large-instruct-2411": 0.5800000000000001, |
| "gemma-2-27b-it": 0.28400000000000003, |
| "gemma-2-9b-it": 0.172, |
| "deepseek-v3": 0.908, |
| "deepseek-r1": 0.7679999999999999, |
| "qwq-32b": 0.8720000000000001, |
| "Average": 0.47706666666666664 |
| }, |
| "WildlifeEnv": { |
| "qwen2.5-3b-instruct": 0.21600000000000003, |
| "qwen2.5-7b-instruct": 0.352, |
| "qwen2.5-14b-instruct": 0.644, |
| "qwen2.5-32b-instruct": 0.592, |
| "qwen2.5-72b-instruct": 0.616, |
| "llama-3.1-8b-instruct": 0.316, |
| "llama-3.1-70b-instruct": 0.544, |
| "llama-3.2-3b-instruct": 0.23199999999999998, |
| "llama-3.3-70b-instruct": 0.616, |
| "mistral-large-instruct-2411": 0.628, |
| "gemma-2-27b-it": 0.45199999999999996, |
| "gemma-2-9b-it": 0.344, |
| "deepseek-v3": 0.736, |
| "deepseek-r1": 0.6040000000000001, |
| "qwq-32b": 0.716, |
| "Average": 0.5072 |
| }, |
| "VehicleEnv": { |
| "qwen2.5-3b-instruct": 0.172, |
| "qwen2.5-7b-instruct": 0.308, |
| "qwen2.5-14b-instruct": 0.54, |
| "qwen2.5-32b-instruct": 0.776, |
| "qwen2.5-72b-instruct": 0.78, |
| "llama-3.1-8b-instruct": 0.248, |
| "llama-3.1-70b-instruct": 0.62, |
| "llama-3.2-3b-instruct": 0.152, |
| "llama-3.3-70b-instruct": 0.6960000000000001, |
| "mistral-large-instruct-2411": 0.8800000000000001, |
| "gemma-2-27b-it": 0.44799999999999995, |
| "gemma-2-9b-it": 0.248, |
| "deepseek-v3": 0.9199999999999999, |
| "deepseek-r1": 0.9199999999999999, |
| "qwq-32b": 0.916, |
| "Average": 0.5749333333333333 |
| }, |
| "BeverageEnv": { |
| "qwen2.5-3b-instruct": 0.128, |
| "qwen2.5-7b-instruct": 0.296, |
| "qwen2.5-14b-instruct": 0.792, |
| "qwen2.5-32b-instruct": 0.6880000000000001, |
| "qwen2.5-72b-instruct": 0.724, |
| "llama-3.1-8b-instruct": 0.41200000000000003, |
| "llama-3.1-70b-instruct": 0.6199999999999999, |
| "llama-3.2-3b-instruct": 0.16399999999999998, |
| "llama-3.3-70b-instruct": 0.5800000000000001, |
| "mistral-large-instruct-2411": 0.748, |
| "gemma-2-27b-it": 0.40800000000000003, |
| "gemma-2-9b-it": 0.296, |
| "deepseek-v3": 0.8800000000000001, |
| "deepseek-r1": 0.7520000000000001, |
| "qwq-32b": 0.844, |
| "Average": 0.5554666666666667 |
| }, |
| "ControlEnv": { |
| "qwen2.5-3b-instruct": 0.12800000000000003, |
| "qwen2.5-7b-instruct": 0.364, |
| "qwen2.5-14b-instruct": 0.68, |
| "qwen2.5-32b-instruct": 0.8320000000000001, |
| "qwen2.5-72b-instruct": 0.8400000000000001, |
| "llama-3.1-8b-instruct": 0.364, |
| "llama-3.1-70b-instruct": 0.656, |
| "llama-3.2-3b-instruct": 0.15599999999999997, |
| "llama-3.3-70b-instruct": 0.6320000000000001, |
| "mistral-large-instruct-2411": 0.784, |
| "gemma-2-27b-it": 0.4640000000000001, |
| "gemma-2-9b-it": 0.18, |
| "deepseek-v3": 0.9119999999999999, |
| "deepseek-r1": 0.9119999999999999, |
| "qwq-32b": 0.932, |
| "Average": 0.5890666666666665 |
| }, |
| "CurrencyEnv": { |
| "qwen2.5-3b-instruct": 0.252, |
| "qwen2.5-7b-instruct": 0.392, |
| "qwen2.5-14b-instruct": 0.8560000000000001, |
| "qwen2.5-32b-instruct": 0.884, |
| "qwen2.5-72b-instruct": 0.836, |
| "llama-3.1-8b-instruct": 0.476, |
| "llama-3.1-70b-instruct": 0.7520000000000001, |
| "llama-3.2-3b-instruct": 0.22400000000000003, |
| "llama-3.3-70b-instruct": 0.7000000000000001, |
| "mistral-large-instruct-2411": 0.8960000000000001, |
| "gemma-2-27b-it": 0.68, |
| "gemma-2-9b-it": 0.196, |
| "deepseek-v3": 0.9800000000000001, |
| "deepseek-r1": 0.932, |
| "qwq-32b": 0.9640000000000001, |
| "Average": 0.668 |
| }, |
| "MarketingEnv": { |
| "qwen2.5-3b-instruct": 0.12, |
| "qwen2.5-7b-instruct": 0.34400000000000003, |
| "qwen2.5-14b-instruct": 0.524, |
| "qwen2.5-32b-instruct": 0.7479999999999999, |
| "qwen2.5-72b-instruct": 0.732, |
| "llama-3.1-8b-instruct": 0.30800000000000005, |
| "llama-3.1-70b-instruct": 0.7040000000000001, |
| "llama-3.2-3b-instruct": 0.14400000000000002, |
| "llama-3.3-70b-instruct": 0.7639999999999999, |
| "mistral-large-instruct-2411": 0.7600000000000001, |
| "gemma-2-27b-it": 0.32400000000000007, |
| "gemma-2-9b-it": 0.184, |
| "deepseek-v3": 0.812, |
| "deepseek-r1": 0.7959999999999999, |
| "qwq-32b": 0.8320000000000001, |
| "Average": 0.5397333333333333 |
| }, |
| "BotanicalEnv": { |
| "qwen2.5-3b-instruct": 0.18800000000000003, |
| "qwen2.5-7b-instruct": 0.316, |
| "qwen2.5-14b-instruct": 0.9119999999999999, |
| "qwen2.5-32b-instruct": 0.884, |
| "qwen2.5-72b-instruct": 0.9039999999999999, |
| "llama-3.1-8b-instruct": 0.4119999999999999, |
| "llama-3.1-70b-instruct": 0.836, |
| "llama-3.2-3b-instruct": 0.23600000000000004, |
| "llama-3.3-70b-instruct": 0.8480000000000001, |
| "mistral-large-instruct-2411": 0.8640000000000001, |
| "gemma-2-27b-it": 0.604, |
| "gemma-2-9b-it": 0.264, |
| "deepseek-v3": 0.9040000000000001, |
| "deepseek-r1": 0.9399999999999998, |
| "qwq-32b": 0.968, |
| "Average": 0.672 |
| }, |
| "CircusActEnv": { |
| "qwen2.5-3b-instruct": 0.17200000000000001, |
| "qwen2.5-7b-instruct": 0.32399999999999995, |
| "qwen2.5-14b-instruct": 0.64, |
| "qwen2.5-32b-instruct": 0.712, |
| "qwen2.5-72b-instruct": 0.768, |
| "llama-3.1-8b-instruct": 0.276, |
| "llama-3.1-70b-instruct": 0.648, |
| "llama-3.2-3b-instruct": 0.176, |
| "llama-3.3-70b-instruct": 0.62, |
| "mistral-large-instruct-2411": 0.748, |
| "gemma-2-27b-it": 0.384, |
| "gemma-2-9b-it": 0.29600000000000004, |
| "deepseek-v3": 0.8640000000000001, |
| "deepseek-r1": 0.82, |
| "qwq-32b": 0.8720000000000001, |
| "Average": 0.5546666666666668 |
| }, |
| "AudioDialectEnv": { |
| "qwen2.5-3b-instruct": 0.128, |
| "qwen2.5-7b-instruct": 0.312, |
| "qwen2.5-14b-instruct": 0.5800000000000001, |
| "qwen2.5-32b-instruct": 0.6, |
| "qwen2.5-72b-instruct": 0.528, |
| "llama-3.1-8b-instruct": 0.21600000000000003, |
| "llama-3.1-70b-instruct": 0.4, |
| "llama-3.2-3b-instruct": 0.132, |
| "llama-3.3-70b-instruct": 0.32399999999999995, |
| "mistral-large-instruct-2411": 0.68, |
| "gemma-2-27b-it": 0.28, |
| "gemma-2-9b-it": 0.11600000000000002, |
| "deepseek-v3": 0.7520000000000001, |
| "deepseek-r1": 0.7919999999999999, |
| "qwq-32b": 0.8119999999999999, |
| "Average": 0.4434666666666666 |
| }, |
| "LeadershipEnv": { |
| "qwen2.5-3b-instruct": 0.164, |
| "qwen2.5-7b-instruct": 0.372, |
| "qwen2.5-14b-instruct": 0.7, |
| "qwen2.5-32b-instruct": 0.732, |
| "qwen2.5-72b-instruct": 0.7639999999999999, |
| "llama-3.1-8b-instruct": 0.364, |
| "llama-3.1-70b-instruct": 0.708, |
| "llama-3.2-3b-instruct": 0.128, |
| "llama-3.3-70b-instruct": 0.6920000000000001, |
| "mistral-large-instruct-2411": 0.728, |
| "gemma-2-27b-it": 0.46799999999999997, |
| "gemma-2-9b-it": 0.20400000000000001, |
| "deepseek-v3": 0.8200000000000001, |
| "deepseek-r1": 0.748, |
| "qwq-32b": 0.828, |
| "Average": 0.5613333333333334 |
| }, |
| "TransportEnv": { |
| "qwen2.5-3b-instruct": 0.196, |
| "qwen2.5-7b-instruct": 0.372, |
| "qwen2.5-14b-instruct": 0.716, |
| "qwen2.5-32b-instruct": 0.732, |
| "qwen2.5-72b-instruct": 0.8, |
| "llama-3.1-8b-instruct": 0.316, |
| "llama-3.1-70b-instruct": 0.648, |
| "llama-3.2-3b-instruct": 0.15200000000000002, |
| "llama-3.3-70b-instruct": 0.6000000000000001, |
| "mistral-large-instruct-2411": 0.7879999999999999, |
| "gemma-2-27b-it": 0.44399999999999995, |
| "gemma-2-9b-it": 0.364, |
| "deepseek-v3": 0.8640000000000001, |
| "deepseek-r1": 0.8240000000000001, |
| "qwq-32b": 0.9199999999999999, |
| "Average": 0.5824 |
| }, |
| "EcologicalEnv": { |
| "qwen2.5-3b-instruct": 0.152, |
| "qwen2.5-7b-instruct": 0.45600000000000007, |
| "qwen2.5-14b-instruct": 0.748, |
| "qwen2.5-32b-instruct": 0.82, |
| "qwen2.5-72b-instruct": 0.792, |
| "llama-3.1-8b-instruct": 0.42000000000000004, |
| "llama-3.1-70b-instruct": 0.692, |
| "llama-3.2-3b-instruct": 0.21600000000000003, |
| "llama-3.3-70b-instruct": 0.64, |
| "mistral-large-instruct-2411": 0.772, |
| "gemma-2-27b-it": 0.5680000000000001, |
| "gemma-2-9b-it": 0.46799999999999997, |
| "deepseek-v3": 0.868, |
| "deepseek-r1": 0.8720000000000001, |
| "qwq-32b": 0.8879999999999999, |
| "Average": 0.6248 |
| }, |
| "MythicEnv": { |
| "qwen2.5-3b-instruct": 0.132, |
| "qwen2.5-7b-instruct": 0.36, |
| "qwen2.5-14b-instruct": 0.744, |
| "qwen2.5-32b-instruct": 0.74, |
| "qwen2.5-72b-instruct": 0.672, |
| "llama-3.1-8b-instruct": 0.236, |
| "llama-3.1-70b-instruct": 0.596, |
| "llama-3.2-3b-instruct": 0.12, |
| "llama-3.3-70b-instruct": 0.576, |
| "mistral-large-instruct-2411": 0.6960000000000001, |
| "gemma-2-27b-it": 0.45599999999999996, |
| "gemma-2-9b-it": 0.136, |
| "deepseek-v3": 0.8960000000000001, |
| "deepseek-r1": 0.8720000000000001, |
| "qwq-32b": 0.8400000000000001, |
| "Average": 0.5381333333333332 |
| }, |
| "EnzymeEnv": { |
| "qwen2.5-3b-instruct": 0.252, |
| "qwen2.5-7b-instruct": 0.43200000000000005, |
| "qwen2.5-14b-instruct": 0.636, |
| "qwen2.5-32b-instruct": 0.676, |
| "qwen2.5-72b-instruct": 0.676, |
| "llama-3.1-8b-instruct": 0.316, |
| "llama-3.1-70b-instruct": 0.552, |
| "llama-3.2-3b-instruct": 0.192, |
| "llama-3.3-70b-instruct": 0.5640000000000001, |
| "mistral-large-instruct-2411": 0.732, |
| "gemma-2-27b-it": 0.43600000000000005, |
| "gemma-2-9b-it": 0.264, |
| "deepseek-v3": 0.8400000000000001, |
| "deepseek-r1": 0.76, |
| "qwq-32b": 0.804, |
| "Average": 0.5421333333333334 |
| }, |
| "OSKernelEnv": { |
| "qwen2.5-3b-instruct": 0.192, |
| "qwen2.5-7b-instruct": 0.28400000000000003, |
| "qwen2.5-14b-instruct": 0.8119999999999999, |
| "qwen2.5-32b-instruct": 0.784, |
| "qwen2.5-72b-instruct": 0.788, |
| "llama-3.1-8b-instruct": 0.316, |
| "llama-3.1-70b-instruct": 0.6920000000000001, |
| "llama-3.2-3b-instruct": 0.128, |
| "llama-3.3-70b-instruct": 0.74, |
| "mistral-large-instruct-2411": 0.8559999999999999, |
| "gemma-2-27b-it": 0.46399999999999997, |
| "gemma-2-9b-it": 0.2, |
| "deepseek-v3": 0.9480000000000001, |
| "deepseek-r1": 0.96, |
| "qwq-32b": 0.984, |
| "Average": 0.6098666666666668 |
| }, |
| "MineralClassificationEnv": { |
| "qwen2.5-3b-instruct": 0.11600000000000002, |
| "qwen2.5-7b-instruct": 0.248, |
| "qwen2.5-14b-instruct": 0.8320000000000001, |
| "qwen2.5-32b-instruct": 0.9040000000000001, |
| "qwen2.5-72b-instruct": 0.884, |
| "llama-3.1-8b-instruct": 0.384, |
| "llama-3.1-70b-instruct": 0.8240000000000001, |
| "llama-3.2-3b-instruct": 0.14800000000000002, |
| "llama-3.3-70b-instruct": 0.8960000000000001, |
| "mistral-large-instruct-2411": 0.908, |
| "gemma-2-27b-it": 0.508, |
| "gemma-2-9b-it": 0.268, |
| "deepseek-v3": 0.984, |
| "deepseek-r1": 0.9199999999999999, |
| "qwq-32b": 0.9640000000000001, |
| "Average": 0.6525333333333333 |
| }, |
| "EconomicEnv": { |
| "qwen2.5-3b-instruct": 0.136, |
| "qwen2.5-7b-instruct": 0.24, |
| "qwen2.5-14b-instruct": 0.8560000000000001, |
| "qwen2.5-32b-instruct": 0.9199999999999999, |
| "qwen2.5-72b-instruct": 0.8960000000000001, |
| "llama-3.1-8b-instruct": 0.43600000000000005, |
| "llama-3.1-70b-instruct": 0.808, |
| "llama-3.2-3b-instruct": 0.152, |
| "llama-3.3-70b-instruct": 0.8240000000000001, |
| "mistral-large-instruct-2411": 0.924, |
| "gemma-2-27b-it": 0.45199999999999996, |
| "gemma-2-9b-it": 0.36, |
| "deepseek-v3": 0.9559999999999998, |
| "deepseek-r1": 0.9359999999999999, |
| "qwq-32b": 0.9719999999999999, |
| "Average": 0.6578666666666667 |
| }, |
| "DetectiveEnv": { |
| "qwen2.5-3b-instruct": 0.168, |
| "qwen2.5-7b-instruct": 0.38, |
| "qwen2.5-14b-instruct": 0.836, |
| "qwen2.5-32b-instruct": 0.884, |
| "qwen2.5-72b-instruct": 0.8480000000000001, |
| "llama-3.1-8b-instruct": 0.34800000000000003, |
| "llama-3.1-70b-instruct": 0.74, |
| "llama-3.2-3b-instruct": 0.248, |
| "llama-3.3-70b-instruct": 0.792, |
| "mistral-large-instruct-2411": 0.8960000000000001, |
| "gemma-2-27b-it": 0.512, |
| "gemma-2-9b-it": 0.33199999999999996, |
| "deepseek-v3": 0.976, |
| "deepseek-r1": 0.9640000000000001, |
| "qwq-32b": 0.984, |
| "Average": 0.6605333333333333 |
| }, |
| "ChessEnv": { |
| "qwen2.5-3b-instruct": 0.184, |
| "qwen2.5-7b-instruct": 0.27999999999999997, |
| "qwen2.5-14b-instruct": 0.592, |
| "qwen2.5-32b-instruct": 0.616, |
| "qwen2.5-72b-instruct": 0.5720000000000001, |
| "llama-3.1-8b-instruct": 0.188, |
| "llama-3.1-70b-instruct": 0.6639999999999999, |
| "llama-3.2-3b-instruct": 0.084, |
| "llama-3.3-70b-instruct": 0.6280000000000001, |
| "mistral-large-instruct-2411": 0.744, |
| "gemma-2-27b-it": 0.30000000000000004, |
| "gemma-2-9b-it": 0.096, |
| "deepseek-v3": 0.696, |
| "deepseek-r1": 0.6519999999999999, |
| "qwq-32b": 0.664, |
| "Average": 0.4639999999999999 |
| }, |
| "MythicalEnv": { |
| "qwen2.5-3b-instruct": 0.2, |
| "qwen2.5-7b-instruct": 0.336, |
| "qwen2.5-14b-instruct": 0.8039999999999999, |
| "qwen2.5-32b-instruct": 0.712, |
| "qwen2.5-72b-instruct": 0.632, |
| "llama-3.1-8b-instruct": 0.356, |
| "llama-3.1-70b-instruct": 0.54, |
| "llama-3.2-3b-instruct": 0.16, |
| "llama-3.3-70b-instruct": 0.556, |
| "mistral-large-instruct-2411": 0.728, |
| "gemma-2-27b-it": 0.54, |
| "gemma-2-9b-it": 0.404, |
| "deepseek-v3": 0.9279999999999999, |
| "deepseek-r1": 0.8959999999999999, |
| "qwq-32b": 0.876, |
| "Average": 0.5778666666666666 |
| }, |
| "ChemicalCompoundsEnv": { |
| "qwen2.5-3b-instruct": 0.18, |
| "qwen2.5-7b-instruct": 0.252, |
| "qwen2.5-14b-instruct": 0.40800000000000003, |
| "qwen2.5-32b-instruct": 0.30000000000000004, |
| "qwen2.5-72b-instruct": 0.28400000000000003, |
| "llama-3.1-8b-instruct": 0.148, |
| "llama-3.1-70b-instruct": 0.28, |
| "llama-3.2-3b-instruct": 0.14, |
| "llama-3.3-70b-instruct": 0.18000000000000002, |
| "mistral-large-instruct-2411": 0.43200000000000005, |
| "gemma-2-27b-it": 0.23200000000000004, |
| "gemma-2-9b-it": 0.13599999999999998, |
| "deepseek-v3": 0.46799999999999997, |
| "deepseek-r1": 0.624, |
| "qwq-32b": 0.752, |
| "Average": 0.32106666666666667 |
| }, |
| "ArchitecturalEnv": { |
| "qwen2.5-3b-instruct": 0.20400000000000001, |
| "qwen2.5-7b-instruct": 0.316, |
| "qwen2.5-14b-instruct": 0.72, |
| "qwen2.5-32b-instruct": 0.66, |
| "qwen2.5-72b-instruct": 0.7120000000000001, |
| "llama-3.1-8b-instruct": 0.256, |
| "llama-3.1-70b-instruct": 0.556, |
| "llama-3.2-3b-instruct": 0.132, |
| "llama-3.3-70b-instruct": 0.508, |
| "mistral-large-instruct-2411": 0.724, |
| "gemma-2-27b-it": 0.488, |
| "gemma-2-9b-it": 0.236, |
| "deepseek-v3": 0.82, |
| "deepseek-r1": 0.744, |
| "qwq-32b": 0.8240000000000001, |
| "Average": 0.5266666666666666 |
| }, |
| "ComputationEnv": { |
| "qwen2.5-3b-instruct": 0.152, |
| "qwen2.5-7b-instruct": 0.248, |
| "qwen2.5-14b-instruct": 0.76, |
| "qwen2.5-32b-instruct": 0.884, |
| "qwen2.5-72b-instruct": 0.8560000000000001, |
| "llama-3.1-8b-instruct": 0.32799999999999996, |
| "llama-3.1-70b-instruct": 0.788, |
| "llama-3.2-3b-instruct": 0.13999999999999999, |
| "llama-3.3-70b-instruct": 0.8560000000000001, |
| "mistral-large-instruct-2411": 0.828, |
| "gemma-2-27b-it": 0.45199999999999996, |
| "gemma-2-9b-it": 0.252, |
| "deepseek-v3": 0.96, |
| "deepseek-r1": 0.9399999999999998, |
| "qwq-32b": 0.908, |
| "Average": 0.6234666666666667 |
| }, |
| "MachinePartEnv": { |
| "qwen2.5-3b-instruct": 0.14, |
| "qwen2.5-7b-instruct": 0.32, |
| "qwen2.5-14b-instruct": 0.8240000000000001, |
| "qwen2.5-32b-instruct": 0.8800000000000001, |
| "qwen2.5-72b-instruct": 0.828, |
| "llama-3.1-8b-instruct": 0.376, |
| "llama-3.1-70b-instruct": 0.8200000000000001, |
| "llama-3.2-3b-instruct": 0.168, |
| "llama-3.3-70b-instruct": 0.8960000000000001, |
| "mistral-large-instruct-2411": 0.876, |
| "gemma-2-27b-it": 0.508, |
| "gemma-2-9b-it": 0.268, |
| "deepseek-v3": 0.9719999999999999, |
| "deepseek-r1": 0.952, |
| "qwq-32b": 0.916, |
| "Average": 0.6496 |
| }, |
| "LiteraryEnv": { |
| "qwen2.5-3b-instruct": 0.10400000000000001, |
| "qwen2.5-7b-instruct": 0.328, |
| "qwen2.5-14b-instruct": 0.8800000000000001, |
| "qwen2.5-32b-instruct": 0.9279999999999999, |
| "qwen2.5-72b-instruct": 0.9, |
| "llama-3.1-8b-instruct": 0.336, |
| "llama-3.1-70b-instruct": 0.664, |
| "llama-3.2-3b-instruct": 0.13999999999999999, |
| "llama-3.3-70b-instruct": 0.664, |
| "mistral-large-instruct-2411": 0.884, |
| "gemma-2-27b-it": 0.44399999999999995, |
| "gemma-2-9b-it": 0.13999999999999999, |
| "deepseek-v3": 0.984, |
| "deepseek-r1": 0.9119999999999999, |
| "qwq-32b": 0.968, |
| "Average": 0.6184 |
| }, |
| "MarineEnv": { |
| "qwen2.5-3b-instruct": 0.144, |
| "qwen2.5-7b-instruct": 0.384, |
| "qwen2.5-14b-instruct": 0.8720000000000001, |
| "qwen2.5-32b-instruct": 0.844, |
| "qwen2.5-72b-instruct": 0.8320000000000001, |
| "llama-3.1-8b-instruct": 0.308, |
| "llama-3.1-70b-instruct": 0.636, |
| "llama-3.2-3b-instruct": 0.12000000000000002, |
| "llama-3.3-70b-instruct": 0.704, |
| "mistral-large-instruct-2411": 0.7879999999999999, |
| "gemma-2-27b-it": 0.484, |
| "gemma-2-9b-it": 0.23199999999999998, |
| "deepseek-v3": 0.884, |
| "deepseek-r1": 0.9, |
| "qwq-32b": 0.8880000000000001, |
| "Average": 0.6013333333333334 |
| }, |
| "PhilosophyEnv": { |
| "qwen2.5-3b-instruct": 0.144, |
| "qwen2.5-7b-instruct": 0.3, |
| "qwen2.5-14b-instruct": 0.7280000000000001, |
| "qwen2.5-32b-instruct": 0.82, |
| "qwen2.5-72b-instruct": 0.8719999999999999, |
| "llama-3.1-8b-instruct": 0.32799999999999996, |
| "llama-3.1-70b-instruct": 0.764, |
| "llama-3.2-3b-instruct": 0.036000000000000004, |
| "llama-3.3-70b-instruct": 0.796, |
| "mistral-large-instruct-2411": 0.7879999999999999, |
| "gemma-2-27b-it": 0.372, |
| "gemma-2-9b-it": 0.28, |
| "deepseek-v3": 0.844, |
| "deepseek-r1": 0.78, |
| "qwq-32b": 0.8320000000000001, |
| "Average": 0.5789333333333334 |
| }, |
| "ArchaeologicalEnv": { |
| "qwen2.5-3b-instruct": 0.18, |
| "qwen2.5-7b-instruct": 0.38, |
| "qwen2.5-14b-instruct": 0.58, |
| "qwen2.5-32b-instruct": 0.608, |
| "qwen2.5-72b-instruct": 0.5640000000000001, |
| "llama-3.1-8b-instruct": 0.26, |
| "llama-3.1-70b-instruct": 0.608, |
| "llama-3.2-3b-instruct": 0.192, |
| "llama-3.3-70b-instruct": 0.548, |
| "mistral-large-instruct-2411": 0.64, |
| "gemma-2-27b-it": 0.476, |
| "gemma-2-9b-it": 0.30000000000000004, |
| "deepseek-v3": 0.916, |
| "deepseek-r1": 0.7040000000000001, |
| "qwq-32b": 0.7559999999999999, |
| "Average": 0.5141333333333333 |
| }, |
| "GemstoneEnv": { |
| "qwen2.5-3b-instruct": 0.192, |
| "qwen2.5-7b-instruct": 0.264, |
| "qwen2.5-14b-instruct": 0.492, |
| "qwen2.5-32b-instruct": 0.45599999999999996, |
| "qwen2.5-72b-instruct": 0.44000000000000006, |
| "llama-3.1-8b-instruct": 0.192, |
| "llama-3.1-70b-instruct": 0.40800000000000003, |
| "llama-3.2-3b-instruct": 0.15200000000000002, |
| "llama-3.3-70b-instruct": 0.45599999999999996, |
| "mistral-large-instruct-2411": 0.528, |
| "gemma-2-27b-it": 0.33999999999999997, |
| "gemma-2-9b-it": 0.256, |
| "deepseek-v3": 0.5680000000000001, |
| "deepseek-r1": 0.5680000000000001, |
| "qwq-32b": 0.636, |
| "Average": 0.3965333333333333 |
| }, |
| "MicrobiologyEnv": { |
| "qwen2.5-3b-instruct": 0.14400000000000002, |
| "qwen2.5-7b-instruct": 0.38400000000000006, |
| "qwen2.5-14b-instruct": 0.752, |
| "qwen2.5-32b-instruct": 0.7, |
| "qwen2.5-72b-instruct": 0.844, |
| "llama-3.1-8b-instruct": 0.316, |
| "llama-3.1-70b-instruct": 0.512, |
| "llama-3.2-3b-instruct": 0.12000000000000002, |
| "llama-3.3-70b-instruct": 0.496, |
| "mistral-large-instruct-2411": 0.764, |
| "gemma-2-27b-it": 0.504, |
| "gemma-2-9b-it": 0.172, |
| "deepseek-v3": 0.9279999999999999, |
| "deepseek-r1": 0.952, |
| "qwq-32b": 0.932, |
| "Average": 0.568 |
| }, |
| "SciFiEnv": { |
| "qwen2.5-3b-instruct": 0.192, |
| "qwen2.5-7b-instruct": 0.384, |
| "qwen2.5-14b-instruct": 0.7879999999999999, |
| "qwen2.5-32b-instruct": 0.776, |
| "qwen2.5-72b-instruct": 0.7879999999999999, |
| "llama-3.1-8b-instruct": 0.35200000000000004, |
| "llama-3.1-70b-instruct": 0.664, |
| "llama-3.2-3b-instruct": 0.164, |
| "llama-3.3-70b-instruct": 0.588, |
| "mistral-large-instruct-2411": 0.736, |
| "gemma-2-27b-it": 0.52, |
| "gemma-2-9b-it": 0.33599999999999997, |
| "deepseek-v3": 0.9279999999999999, |
| "deepseek-r1": 0.9199999999999999, |
| "qwq-32b": 0.9, |
| "Average": 0.6023999999999999 |
| }, |
| "HormoneEnv": { |
| "qwen2.5-3b-instruct": 0.152, |
| "qwen2.5-7b-instruct": 0.40800000000000003, |
| "qwen2.5-14b-instruct": 0.7999999999999999, |
| "qwen2.5-32b-instruct": 0.784, |
| "qwen2.5-72b-instruct": 0.764, |
| "llama-3.1-8b-instruct": 0.336, |
| "llama-3.1-70b-instruct": 0.76, |
| "llama-3.2-3b-instruct": 0.184, |
| "llama-3.3-70b-instruct": 0.8480000000000001, |
| "mistral-large-instruct-2411": 0.8, |
| "gemma-2-27b-it": 0.524, |
| "gemma-2-9b-it": 0.312, |
| "deepseek-v3": 0.9480000000000001, |
| "deepseek-r1": 0.944, |
| "qwq-32b": 0.852, |
| "Average": 0.6277333333333334 |
| }, |
| "SculptorEnv": { |
| "qwen2.5-3b-instruct": 0.23200000000000004, |
| "qwen2.5-7b-instruct": 0.4159999999999999, |
| "qwen2.5-14b-instruct": 0.7079999999999999, |
| "qwen2.5-32b-instruct": 0.636, |
| "qwen2.5-72b-instruct": 0.6, |
| "llama-3.1-8b-instruct": 0.22799999999999998, |
| "llama-3.1-70b-instruct": 0.484, |
| "llama-3.2-3b-instruct": 0.188, |
| "llama-3.3-70b-instruct": 0.532, |
| "mistral-large-instruct-2411": 0.684, |
| "gemma-2-27b-it": 0.30000000000000004, |
| "gemma-2-9b-it": 0.156, |
| "deepseek-v3": 0.788, |
| "deepseek-r1": 0.7479999999999999, |
| "qwq-32b": 0.8119999999999999, |
| "Average": 0.5008 |
| }, |
| "NeuroEnv": { |
| "qwen2.5-3b-instruct": 0.10800000000000001, |
| "qwen2.5-7b-instruct": 0.24400000000000005, |
| "qwen2.5-14b-instruct": 0.8960000000000001, |
| "qwen2.5-32b-instruct": 0.892, |
| "qwen2.5-72b-instruct": 0.8879999999999999, |
| "llama-3.1-8b-instruct": 0.512, |
| "llama-3.1-70b-instruct": 0.8880000000000001, |
| "llama-3.2-3b-instruct": 0.20400000000000001, |
| "llama-3.3-70b-instruct": 0.9279999999999999, |
| "mistral-large-instruct-2411": 0.8880000000000001, |
| "gemma-2-27b-it": 0.72, |
| "gemma-2-9b-it": 0.42800000000000005, |
| "deepseek-v3": 0.952, |
| "deepseek-r1": 0.932, |
| "qwq-32b": 0.852, |
| "Average": 0.6888000000000001 |
| }, |
| "OceanEnv": { |
| "qwen2.5-3b-instruct": 0.2, |
| "qwen2.5-7b-instruct": 0.45999999999999996, |
| "qwen2.5-14b-instruct": 0.6160000000000001, |
| "qwen2.5-32b-instruct": 0.6000000000000001, |
| "qwen2.5-72b-instruct": 0.62, |
| "llama-3.1-8b-instruct": 0.36400000000000005, |
| "llama-3.1-70b-instruct": 0.5680000000000001, |
| "llama-3.2-3b-instruct": 0.156, |
| "llama-3.3-70b-instruct": 0.476, |
| "mistral-large-instruct-2411": 0.656, |
| "gemma-2-27b-it": 0.43200000000000005, |
| "gemma-2-9b-it": 0.248, |
| "deepseek-v3": 0.852, |
| "deepseek-r1": 0.836, |
| "qwq-32b": 0.8240000000000001, |
| "Average": 0.5272000000000001 |
| }, |
| "MineralEnv": { |
| "qwen2.5-3b-instruct": 0.14400000000000002, |
| "qwen2.5-7b-instruct": 0.38, |
| "qwen2.5-14b-instruct": 0.768, |
| "qwen2.5-32b-instruct": 0.6960000000000001, |
| "qwen2.5-72b-instruct": 0.684, |
| "llama-3.1-8b-instruct": 0.29600000000000004, |
| "llama-3.1-70b-instruct": 0.556, |
| "llama-3.2-3b-instruct": 0.16, |
| "llama-3.3-70b-instruct": 0.56, |
| "mistral-large-instruct-2411": 0.66, |
| "gemma-2-27b-it": 0.384, |
| "gemma-2-9b-it": 0.17200000000000001, |
| "deepseek-v3": 0.8480000000000001, |
| "deepseek-r1": 0.82, |
| "qwq-32b": 0.8720000000000001, |
| "Average": 0.5333333333333333 |
| }, |
| "FishEnv": { |
| "qwen2.5-3b-instruct": 0.188, |
| "qwen2.5-7b-instruct": 0.38, |
| "qwen2.5-14b-instruct": 0.732, |
| "qwen2.5-32b-instruct": 0.668, |
| "qwen2.5-72b-instruct": 0.7200000000000001, |
| "llama-3.1-8b-instruct": 0.392, |
| "llama-3.1-70b-instruct": 0.624, |
| "llama-3.2-3b-instruct": 0.13599999999999998, |
| "llama-3.3-70b-instruct": 0.616, |
| "mistral-large-instruct-2411": 0.736, |
| "gemma-2-27b-it": 0.508, |
| "gemma-2-9b-it": 0.268, |
| "deepseek-v3": 0.86, |
| "deepseek-r1": 0.868, |
| "qwq-32b": 0.924, |
| "Average": 0.5746666666666667 |
| }, |
| "MartialArtsEnv": { |
| "qwen2.5-3b-instruct": 0.184, |
| "qwen2.5-7b-instruct": 0.43200000000000005, |
| "qwen2.5-14b-instruct": 0.672, |
| "qwen2.5-32b-instruct": 0.5640000000000001, |
| "qwen2.5-72b-instruct": 0.56, |
| "llama-3.1-8b-instruct": 0.276, |
| "llama-3.1-70b-instruct": 0.54, |
| "llama-3.2-3b-instruct": 0.2, |
| "llama-3.3-70b-instruct": 0.52, |
| "mistral-large-instruct-2411": 0.568, |
| "gemma-2-27b-it": 0.4, |
| "gemma-2-9b-it": 0.22400000000000003, |
| "deepseek-v3": 0.784, |
| "deepseek-r1": 0.716, |
| "qwq-32b": 0.752, |
| "Average": 0.4928 |
| }, |
| "RocketFuelEnv": { |
| "qwen2.5-3b-instruct": 0.22800000000000004, |
| "qwen2.5-7b-instruct": 0.41600000000000004, |
| "qwen2.5-14b-instruct": 0.852, |
| "qwen2.5-32b-instruct": 0.7879999999999999, |
| "qwen2.5-72b-instruct": 0.8160000000000001, |
| "llama-3.1-8b-instruct": 0.36, |
| "llama-3.1-70b-instruct": 0.6799999999999999, |
| "llama-3.2-3b-instruct": 0.184, |
| "llama-3.3-70b-instruct": 0.7239999999999999, |
| "mistral-large-instruct-2411": 0.828, |
| "gemma-2-27b-it": 0.6279999999999999, |
| "gemma-2-9b-it": 0.248, |
| "deepseek-v3": 0.916, |
| "deepseek-r1": 0.8960000000000001, |
| "qwq-32b": 0.9040000000000001, |
| "Average": 0.6312000000000001 |
| }, |
| "MLEnv": { |
| "qwen2.5-3b-instruct": 0.088, |
| "qwen2.5-7b-instruct": 0.392, |
| "qwen2.5-14b-instruct": 0.6, |
| "qwen2.5-32b-instruct": 0.748, |
| "qwen2.5-72b-instruct": 0.792, |
| "llama-3.1-8b-instruct": 0.304, |
| "llama-3.1-70b-instruct": 0.672, |
| "llama-3.2-3b-instruct": 0.10799999999999998, |
| "llama-3.3-70b-instruct": 0.5960000000000001, |
| "mistral-large-instruct-2411": 0.7639999999999999, |
| "gemma-2-27b-it": 0.264, |
| "gemma-2-9b-it": 0.156, |
| "deepseek-v3": 0.808, |
| "deepseek-r1": 0.652, |
| "qwq-32b": 0.772, |
| "Average": 0.5144 |
| }, |
| "PoliticalManifestoEnv": { |
| "qwen2.5-3b-instruct": 0.184, |
| "qwen2.5-7b-instruct": 0.312, |
| "qwen2.5-14b-instruct": 0.76, |
| "qwen2.5-32b-instruct": 0.852, |
| "qwen2.5-72b-instruct": 0.7839999999999999, |
| "llama-3.1-8b-instruct": 0.42400000000000004, |
| "llama-3.1-70b-instruct": 0.62, |
| "llama-3.2-3b-instruct": 0.128, |
| "llama-3.3-70b-instruct": 0.692, |
| "mistral-large-instruct-2411": 0.796, |
| "gemma-2-27b-it": 0.45200000000000007, |
| "gemma-2-9b-it": 0.152, |
| "deepseek-v3": 0.86, |
| "deepseek-r1": 0.792, |
| "qwq-32b": 0.8800000000000001, |
| "Average": 0.5792 |
| }, |
| "CoffeeEnv": { |
| "qwen2.5-3b-instruct": 0.20400000000000001, |
| "qwen2.5-7b-instruct": 0.38, |
| "qwen2.5-14b-instruct": 0.7799999999999999, |
| "qwen2.5-32b-instruct": 0.8039999999999999, |
| "qwen2.5-72b-instruct": 0.764, |
| "llama-3.1-8b-instruct": 0.31599999999999995, |
| "llama-3.1-70b-instruct": 0.552, |
| "llama-3.2-3b-instruct": 0.17200000000000001, |
| "llama-3.3-70b-instruct": 0.6599999999999999, |
| "mistral-large-instruct-2411": 0.828, |
| "gemma-2-27b-it": 0.592, |
| "gemma-2-9b-it": 0.364, |
| "deepseek-v3": 0.9120000000000001, |
| "deepseek-r1": 0.9279999999999999, |
| "qwq-32b": 0.9359999999999999, |
| "Average": 0.6128 |
| }, |
| "MotifAnalysisEnv": { |
| "qwen2.5-3b-instruct": 0.096, |
| "qwen2.5-7b-instruct": 0.332, |
| "qwen2.5-14b-instruct": 0.5680000000000001, |
| "qwen2.5-32b-instruct": 0.496, |
| "qwen2.5-72b-instruct": 0.5920000000000001, |
| "llama-3.1-8b-instruct": 0.244, |
| "llama-3.1-70b-instruct": 0.36000000000000004, |
| "llama-3.2-3b-instruct": 0.13999999999999999, |
| "llama-3.3-70b-instruct": 0.22400000000000003, |
| "mistral-large-instruct-2411": 0.46399999999999997, |
| "gemma-2-27b-it": 0.18, |
| "gemma-2-9b-it": 0.128, |
| "deepseek-v3": 0.752, |
| "deepseek-r1": 0.8240000000000001, |
| "qwq-32b": 0.8640000000000001, |
| "Average": 0.4176 |
| }, |
| "NutritionEnv": { |
| "qwen2.5-3b-instruct": 0.132, |
| "qwen2.5-7b-instruct": 0.22000000000000003, |
| "qwen2.5-14b-instruct": 0.7920000000000001, |
| "qwen2.5-32b-instruct": 0.8400000000000001, |
| "qwen2.5-72b-instruct": 0.876, |
| "llama-3.1-8b-instruct": 0.264, |
| "llama-3.1-70b-instruct": 0.64, |
| "llama-3.2-3b-instruct": 0.128, |
| "llama-3.3-70b-instruct": 0.7040000000000001, |
| "mistral-large-instruct-2411": 0.8320000000000001, |
| "gemma-2-27b-it": 0.38, |
| "gemma-2-9b-it": 0.20800000000000002, |
| "deepseek-v3": 0.944, |
| "deepseek-r1": 0.944, |
| "qwq-32b": 0.9120000000000001, |
| "Average": 0.5877333333333333 |
| }, |
| "MalwareEnv": { |
| "qwen2.5-3b-instruct": 0.16, |
| "qwen2.5-7b-instruct": 0.316, |
| "qwen2.5-14b-instruct": 0.728, |
| "qwen2.5-32b-instruct": 0.756, |
| "qwen2.5-72b-instruct": 0.7200000000000001, |
| "llama-3.1-8b-instruct": 0.268, |
| "llama-3.1-70b-instruct": 0.5840000000000001, |
| "llama-3.2-3b-instruct": 0.10800000000000001, |
| "llama-3.3-70b-instruct": 0.548, |
| "mistral-large-instruct-2411": 0.752, |
| "gemma-2-27b-it": 0.252, |
| "gemma-2-9b-it": 0.12, |
| "deepseek-v3": 0.916, |
| "deepseek-r1": 0.9, |
| "qwq-32b": 0.916, |
| "Average": 0.5362666666666667 |
| }, |
| "GeologicalEnv": { |
| "qwen2.5-3b-instruct": 0.132, |
| "qwen2.5-7b-instruct": 0.336, |
| "qwen2.5-14b-instruct": 0.7639999999999999, |
| "qwen2.5-32b-instruct": 0.748, |
| "qwen2.5-72b-instruct": 0.676, |
| "llama-3.1-8b-instruct": 0.28800000000000003, |
| "llama-3.1-70b-instruct": 0.552, |
| "llama-3.2-3b-instruct": 0.13999999999999999, |
| "llama-3.3-70b-instruct": 0.508, |
| "mistral-large-instruct-2411": 0.812, |
| "gemma-2-27b-it": 0.41600000000000004, |
| "gemma-2-9b-it": 0.164, |
| "deepseek-v3": 0.9119999999999999, |
| "deepseek-r1": 0.8480000000000001, |
| "qwq-32b": 0.8880000000000001, |
| "Average": 0.5456000000000001 |
| }, |
| "TheatricalEnv": { |
| "qwen2.5-3b-instruct": 0.14400000000000002, |
| "qwen2.5-7b-instruct": 0.42400000000000004, |
| "qwen2.5-14b-instruct": 0.676, |
| "qwen2.5-32b-instruct": 0.78, |
| "qwen2.5-72b-instruct": 0.808, |
| "llama-3.1-8b-instruct": 0.41200000000000003, |
| "llama-3.1-70b-instruct": 0.7959999999999999, |
| "llama-3.2-3b-instruct": 0.1, |
| "llama-3.3-70b-instruct": 0.768, |
| "mistral-large-instruct-2411": 0.844, |
| "gemma-2-27b-it": 0.528, |
| "gemma-2-9b-it": 0.28, |
| "deepseek-v3": 0.884, |
| "deepseek-r1": 0.8240000000000001, |
| "qwq-32b": 0.908, |
| "Average": 0.6117333333333335 |
| }, |
| "PrintingTechniqueEnv": { |
| "qwen2.5-3b-instruct": 0.144, |
| "qwen2.5-7b-instruct": 0.252, |
| "qwen2.5-14b-instruct": 0.736, |
| "qwen2.5-32b-instruct": 0.7200000000000001, |
| "qwen2.5-72b-instruct": 0.776, |
| "llama-3.1-8b-instruct": 0.4, |
| "llama-3.1-70b-instruct": 0.54, |
| "llama-3.2-3b-instruct": 0.16, |
| "llama-3.3-70b-instruct": 0.548, |
| "mistral-large-instruct-2411": 0.7040000000000001, |
| "gemma-2-27b-it": 0.44000000000000006, |
| "gemma-2-9b-it": 0.192, |
| "deepseek-v3": 0.916, |
| "deepseek-r1": 0.852, |
| "qwq-32b": 0.9279999999999999, |
| "Average": 0.5538666666666666 |
| }, |
| "StellarEnv": { |
| "qwen2.5-3b-instruct": 0.132, |
| "qwen2.5-7b-instruct": 0.388, |
| "qwen2.5-14b-instruct": 0.6759999999999999, |
| "qwen2.5-32b-instruct": 0.724, |
| "qwen2.5-72b-instruct": 0.6960000000000001, |
| "llama-3.1-8b-instruct": 0.30000000000000004, |
| "llama-3.1-70b-instruct": 0.6040000000000001, |
| "llama-3.2-3b-instruct": 0.16, |
| "llama-3.3-70b-instruct": 0.6240000000000001, |
| "mistral-large-instruct-2411": 0.732, |
| "gemma-2-27b-it": 0.364, |
| "gemma-2-9b-it": 0.23199999999999998, |
| "deepseek-v3": 0.82, |
| "deepseek-r1": 0.648, |
| "qwq-32b": 0.776, |
| "Average": 0.5250666666666667 |
| }, |
| "SoilEnv": { |
| "qwen2.5-3b-instruct": 0.172, |
| "qwen2.5-7b-instruct": 0.48, |
| "qwen2.5-14b-instruct": 0.8320000000000001, |
| "qwen2.5-32b-instruct": 0.788, |
| "qwen2.5-72b-instruct": 0.8240000000000001, |
| "llama-3.1-8b-instruct": 0.42400000000000004, |
| "llama-3.1-70b-instruct": 0.64, |
| "llama-3.2-3b-instruct": 0.22799999999999998, |
| "llama-3.3-70b-instruct": 0.664, |
| "mistral-large-instruct-2411": 0.76, |
| "gemma-2-27b-it": 0.628, |
| "gemma-2-9b-it": 0.44000000000000006, |
| "deepseek-v3": 0.884, |
| "deepseek-r1": 0.8039999999999999, |
| "qwq-32b": 0.8480000000000001, |
| "Average": 0.6277333333333334 |
| }, |
| "SoftwareEnv": { |
| "qwen2.5-3b-instruct": 0.14800000000000002, |
| "qwen2.5-7b-instruct": 0.40800000000000003, |
| "qwen2.5-14b-instruct": 0.744, |
| "qwen2.5-32b-instruct": 0.86, |
| "qwen2.5-72b-instruct": 0.8400000000000001, |
| "llama-3.1-8b-instruct": 0.4159999999999999, |
| "llama-3.1-70b-instruct": 0.72, |
| "llama-3.2-3b-instruct": 0.16799999999999998, |
| "llama-3.3-70b-instruct": 0.784, |
| "mistral-large-instruct-2411": 0.804, |
| "gemma-2-27b-it": 0.528, |
| "gemma-2-9b-it": 0.308, |
| "deepseek-v3": 0.836, |
| "deepseek-r1": 0.8360000000000001, |
| "qwq-32b": 0.8800000000000001, |
| "Average": 0.6186666666666667 |
| }, |
| "CarIdentificationEnv": { |
| "qwen2.5-3b-instruct": 0.272, |
| "qwen2.5-7b-instruct": 0.4, |
| "qwen2.5-14b-instruct": 0.9120000000000001, |
| "qwen2.5-32b-instruct": 0.916, |
| "qwen2.5-72b-instruct": 0.9359999999999999, |
| "llama-3.1-8b-instruct": 0.544, |
| "llama-3.1-70b-instruct": 0.8400000000000001, |
| "llama-3.2-3b-instruct": 0.124, |
| "llama-3.3-70b-instruct": 0.852, |
| "mistral-large-instruct-2411": 0.9119999999999999, |
| "gemma-2-27b-it": 0.672, |
| "gemma-2-9b-it": 0.376, |
| "deepseek-v3": 0.992, |
| "deepseek-r1": 0.952, |
| "qwq-32b": 0.9879999999999999, |
| "Average": 0.7125333333333334 |
| }, |
| "PharmaceuticalEnv": { |
| "qwen2.5-3b-instruct": 0.156, |
| "qwen2.5-7b-instruct": 0.32, |
| "qwen2.5-14b-instruct": 0.7600000000000001, |
| "qwen2.5-32b-instruct": 0.752, |
| "qwen2.5-72b-instruct": 0.7559999999999999, |
| "llama-3.1-8b-instruct": 0.28400000000000003, |
| "llama-3.1-70b-instruct": 0.508, |
| "llama-3.2-3b-instruct": 0.148, |
| "llama-3.3-70b-instruct": 0.472, |
| "mistral-large-instruct-2411": 0.756, |
| "gemma-2-27b-it": 0.336, |
| "gemma-2-9b-it": 0.128, |
| "deepseek-v3": 0.8800000000000001, |
| "deepseek-r1": 0.8640000000000001, |
| "qwq-32b": 0.8, |
| "Average": 0.528 |
| }, |
| "NetworkEnv": { |
| "qwen2.5-3b-instruct": 0.184, |
| "qwen2.5-7b-instruct": 0.36, |
| "qwen2.5-14b-instruct": 0.66, |
| "qwen2.5-32b-instruct": 0.716, |
| "qwen2.5-72b-instruct": 0.716, |
| "llama-3.1-8b-instruct": 0.43199999999999994, |
| "llama-3.1-70b-instruct": 0.68, |
| "llama-3.2-3b-instruct": 0.14400000000000002, |
| "llama-3.3-70b-instruct": 0.7040000000000001, |
| "mistral-large-instruct-2411": 0.78, |
| "gemma-2-27b-it": 0.492, |
| "gemma-2-9b-it": 0.392, |
| "deepseek-v3": 0.8400000000000001, |
| "deepseek-r1": 0.736, |
| "qwq-32b": 0.828, |
| "Average": 0.5776 |
| }, |
| "BirdNestEnv": { |
| "qwen2.5-3b-instruct": 0.148, |
| "qwen2.5-7b-instruct": 0.21200000000000002, |
| "qwen2.5-14b-instruct": 0.48, |
| "qwen2.5-32b-instruct": 0.33999999999999997, |
| "qwen2.5-72b-instruct": 0.42400000000000004, |
| "llama-3.1-8b-instruct": 0.16799999999999998, |
| "llama-3.1-70b-instruct": 0.22400000000000003, |
| "llama-3.2-3b-instruct": 0.084, |
| "llama-3.3-70b-instruct": 0.20800000000000002, |
| "mistral-large-instruct-2411": 0.492, |
| "gemma-2-27b-it": 0.176, |
| "gemma-2-9b-it": 0.128, |
| "deepseek-v3": 0.764, |
| "deepseek-r1": 0.756, |
| "qwq-32b": 0.8119999999999999, |
| "Average": 0.36106666666666676 |
| }, |
| "EnergyEnv": { |
| "qwen2.5-3b-instruct": 0.15999999999999998, |
| "qwen2.5-7b-instruct": 0.42000000000000004, |
| "qwen2.5-14b-instruct": 0.7999999999999999, |
| "qwen2.5-32b-instruct": 0.7, |
| "qwen2.5-72b-instruct": 0.5880000000000001, |
| "llama-3.1-8b-instruct": 0.29600000000000004, |
| "llama-3.1-70b-instruct": 0.46799999999999997, |
| "llama-3.2-3b-instruct": 0.18, |
| "llama-3.3-70b-instruct": 0.396, |
| "mistral-large-instruct-2411": 0.78, |
| "gemma-2-27b-it": 0.35200000000000004, |
| "gemma-2-9b-it": 0.196, |
| "deepseek-v3": 0.916, |
| "deepseek-r1": 0.8720000000000001, |
| "qwq-32b": 0.8880000000000001, |
| "Average": 0.5341333333333333 |
| }, |
| "LanguageEnv": { |
| "qwen2.5-3b-instruct": 0.196, |
| "qwen2.5-7b-instruct": 0.304, |
| "qwen2.5-14b-instruct": 0.388, |
| "qwen2.5-32b-instruct": 0.512, |
| "qwen2.5-72b-instruct": 0.5599999999999999, |
| "llama-3.1-8b-instruct": 0.23200000000000004, |
| "llama-3.1-70b-instruct": 0.40800000000000003, |
| "llama-3.2-3b-instruct": 0.144, |
| "llama-3.3-70b-instruct": 0.336, |
| "mistral-large-instruct-2411": 0.536, |
| "gemma-2-27b-it": 0.20800000000000002, |
| "gemma-2-9b-it": 0.172, |
| "deepseek-v3": 0.724, |
| "deepseek-r1": 0.716, |
| "qwq-32b": 0.8119999999999999, |
| "Average": 0.41653333333333337 |
| }, |
| "AlgorithmEnv": { |
| "qwen2.5-3b-instruct": 0.1, |
| "qwen2.5-7b-instruct": 0.28400000000000003, |
| "qwen2.5-14b-instruct": 0.688, |
| "qwen2.5-32b-instruct": 0.6960000000000001, |
| "qwen2.5-72b-instruct": 0.66, |
| "llama-3.1-8b-instruct": 0.35200000000000004, |
| "llama-3.1-70b-instruct": 0.512, |
| "llama-3.2-3b-instruct": 0.22399999999999998, |
| "llama-3.3-70b-instruct": 0.484, |
| "mistral-large-instruct-2411": 0.788, |
| "gemma-2-27b-it": 0.268, |
| "gemma-2-9b-it": 0.164, |
| "deepseek-v3": 0.792, |
| "deepseek-r1": 0.724, |
| "qwq-32b": 0.812, |
| "Average": 0.5032 |
| }, |
| "MathematicalEnv": { |
| "qwen2.5-3b-instruct": 0.048, |
| "qwen2.5-7b-instruct": 0.42800000000000005, |
| "qwen2.5-14b-instruct": 0.7000000000000001, |
| "qwen2.5-32b-instruct": 0.8119999999999999, |
| "qwen2.5-72b-instruct": 0.792, |
| "llama-3.1-8b-instruct": 0.316, |
| "llama-3.1-70b-instruct": 0.8, |
| "llama-3.2-3b-instruct": 0.12800000000000003, |
| "llama-3.3-70b-instruct": 0.8400000000000001, |
| "mistral-large-instruct-2411": 0.884, |
| "gemma-2-27b-it": 0.268, |
| "gemma-2-9b-it": 0.068, |
| "deepseek-v3": 0.9119999999999999, |
| "deepseek-r1": 0.876, |
| "qwq-32b": 0.8160000000000001, |
| "Average": 0.5792 |
| }, |
| "MusicalEnv": { |
| "qwen2.5-3b-instruct": 0.04, |
| "qwen2.5-7b-instruct": 0.336, |
| "qwen2.5-14b-instruct": 0.8039999999999999, |
| "qwen2.5-32b-instruct": 0.8560000000000001, |
| "qwen2.5-72b-instruct": 0.8400000000000001, |
| "llama-3.1-8b-instruct": 0.34400000000000003, |
| "llama-3.1-70b-instruct": 0.68, |
| "llama-3.2-3b-instruct": 0.088, |
| "llama-3.3-70b-instruct": 0.8240000000000001, |
| "mistral-large-instruct-2411": 0.884, |
| "gemma-2-27b-it": 0.28, |
| "gemma-2-9b-it": 0.11599999999999999, |
| "deepseek-v3": 0.9480000000000001, |
| "deepseek-r1": 0.892, |
| "qwq-32b": 0.9039999999999999, |
| "Average": 0.5890666666666668 |
| }, |
| "InventorEnv": { |
| "qwen2.5-3b-instruct": 0.14800000000000002, |
| "qwen2.5-7b-instruct": 0.43200000000000005, |
| "qwen2.5-14b-instruct": 0.776, |
| "qwen2.5-32b-instruct": 0.7999999999999999, |
| "qwen2.5-72b-instruct": 0.772, |
| "llama-3.1-8b-instruct": 0.4, |
| "llama-3.1-70b-instruct": 0.7, |
| "llama-3.2-3b-instruct": 0.188, |
| "llama-3.3-70b-instruct": 0.616, |
| "mistral-large-instruct-2411": 0.8039999999999999, |
| "gemma-2-27b-it": 0.552, |
| "gemma-2-9b-it": 0.364, |
| "deepseek-v3": 0.9399999999999998, |
| "deepseek-r1": 0.908, |
| "qwq-32b": 0.9, |
| "Average": 0.62 |
| }, |
| "MedicalEnv": { |
| "qwen2.5-3b-instruct": 0.22000000000000003, |
| "qwen2.5-7b-instruct": 0.544, |
| "qwen2.5-14b-instruct": 0.8320000000000001, |
| "qwen2.5-32b-instruct": 0.8800000000000001, |
| "qwen2.5-72b-instruct": 0.8960000000000001, |
| "llama-3.1-8b-instruct": 0.52, |
| "llama-3.1-70b-instruct": 0.82, |
| "llama-3.2-3b-instruct": 0.23200000000000004, |
| "llama-3.3-70b-instruct": 0.8960000000000001, |
| "mistral-large-instruct-2411": 0.8960000000000001, |
| "gemma-2-27b-it": 0.692, |
| "gemma-2-9b-it": 0.5760000000000001, |
| "deepseek-v3": 0.9039999999999999, |
| "deepseek-r1": 0.9359999999999999, |
| "qwq-32b": 0.9199999999999999, |
| "Average": 0.7175999999999999 |
| }, |
| "MusicEnv": { |
| "qwen2.5-3b-instruct": 0.184, |
| "qwen2.5-7b-instruct": 0.26, |
| "qwen2.5-14b-instruct": 0.656, |
| "qwen2.5-32b-instruct": 0.664, |
| "qwen2.5-72b-instruct": 0.7559999999999999, |
| "llama-3.1-8b-instruct": 0.356, |
| "llama-3.1-70b-instruct": 0.596, |
| "llama-3.2-3b-instruct": 0.10800000000000001, |
| "llama-3.3-70b-instruct": 0.596, |
| "mistral-large-instruct-2411": 0.6639999999999999, |
| "gemma-2-27b-it": 0.45600000000000007, |
| "gemma-2-9b-it": 0.28400000000000003, |
| "deepseek-v3": 0.8119999999999999, |
| "deepseek-r1": 0.868, |
| "qwq-32b": 0.868, |
| "Average": 0.5418666666666667 |
| }, |
| "FantasyEnv": { |
| "qwen2.5-3b-instruct": 0.148, |
| "qwen2.5-7b-instruct": 0.32, |
| "qwen2.5-14b-instruct": 0.74, |
| "qwen2.5-32b-instruct": 0.7879999999999999, |
| "qwen2.5-72b-instruct": 0.5720000000000001, |
| "llama-3.1-8b-instruct": 0.40800000000000003, |
| "llama-3.1-70b-instruct": 0.676, |
| "llama-3.2-3b-instruct": 0.152, |
| "llama-3.3-70b-instruct": 0.704, |
| "mistral-large-instruct-2411": 0.8240000000000001, |
| "gemma-2-27b-it": 0.524, |
| "gemma-2-9b-it": 0.324, |
| "deepseek-v3": 0.9199999999999999, |
| "deepseek-r1": 0.9719999999999999, |
| "qwq-32b": 0.9719999999999999, |
| "Average": 0.6029333333333332 |
| }, |
| "EducationEnv": { |
| "qwen2.5-3b-instruct": 0.10400000000000001, |
| "qwen2.5-7b-instruct": 0.268, |
| "qwen2.5-14b-instruct": 0.828, |
| "qwen2.5-32b-instruct": 0.9039999999999999, |
| "qwen2.5-72b-instruct": 0.8480000000000001, |
| "llama-3.1-8b-instruct": 0.5680000000000001, |
| "llama-3.1-70b-instruct": 0.768, |
| "llama-3.2-3b-instruct": 0.192, |
| "llama-3.3-70b-instruct": 0.9039999999999999, |
| "mistral-large-instruct-2411": 0.876, |
| "gemma-2-27b-it": 0.624, |
| "gemma-2-9b-it": 0.45999999999999996, |
| "deepseek-v3": 0.9480000000000001, |
| "deepseek-r1": 0.9, |
| "qwq-32b": 0.9359999999999999, |
| "Average": 0.6752 |
| }, |
| "ChemicalEnv": { |
| "qwen2.5-3b-instruct": 0.264, |
| "qwen2.5-7b-instruct": 0.44000000000000006, |
| "qwen2.5-14b-instruct": 0.724, |
| "qwen2.5-32b-instruct": 0.7040000000000001, |
| "qwen2.5-72b-instruct": 0.72, |
| "llama-3.1-8b-instruct": 0.36, |
| "llama-3.1-70b-instruct": 0.62, |
| "llama-3.2-3b-instruct": 0.16399999999999998, |
| "llama-3.3-70b-instruct": 0.45999999999999996, |
| "mistral-large-instruct-2411": 0.68, |
| "gemma-2-27b-it": 0.44399999999999995, |
| "gemma-2-9b-it": 0.316, |
| "deepseek-v3": 0.8799999999999999, |
| "deepseek-r1": 0.6799999999999999, |
| "qwq-32b": 0.8200000000000001, |
| "Average": 0.5517333333333333 |
| }, |
| "Average": { |
| "qwen2.5-3b-instruct": 0.1655841584158416, |
| "qwen2.5-7b-instruct": 0.34736633663366323, |
| "qwen2.5-14b-instruct": 0.7148514851485149, |
| "qwen2.5-32b-instruct": 0.7330693069306928, |
| "qwen2.5-72b-instruct": 0.7272079207920793, |
| "llama-3.1-8b-instruct": 0.3334653465346535, |
| "llama-3.1-70b-instruct": 0.6271287128712871, |
| "llama-3.2-3b-instruct": 0.15599999999999997, |
| "llama-3.3-70b-instruct": 0.6372277227722771, |
| "mistral-large-instruct-2411": 0.7573861386138615, |
| "gemma-2-27b-it": 0.44522772277227735, |
| "gemma-2-9b-it": 0.264, |
| "deepseek-v3": 0.8605148514851484, |
| "deepseek-r1": 0.8304554455445546, |
| "qwq-32b": 0.8630891089108911 |
| } |
| } |