diff --git "a/leaderboard_data.json" "b/leaderboard_data.json" new file mode 100644--- /dev/null +++ "b/leaderboard_data.json" @@ -0,0 +1,28730 @@ +{ + "metadata": { + "generated_at": "2026-03-16T21:12:50Z", + "scoring_dimensions": [ + "semantic_relevance", + "factual_accuracy", + "freshness", + "objectivity_tone", + "layout_ad_density", + "accountability", + "transparency", + "authority" + ], + "weighted_score_formula": { + "weights": { + "semantic_relevance": 3, + "factual_accuracy": 3, + "objectivity_tone": 3, + "freshness": 2, + "transparency": 2, + "authority": 2, + "layout_ad_density": 2, + "accountability": 2 + }, + "scale_factor": 1.0526315789473684 + }, + "runs": [ + { + "model_name": "Gemini-2.5-Flash-Preview", + "score_file": "data/content-scores/gemini-2.5-flash/qwen_scoring_for_gemini_2.5_flash_few_shot.json", + "rank_file": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "score_file": "data/content-scores/gemini-3-flash-preview/qwen_scoring_for_gemini-3-flash-preview_fewshot.json", + "rank_file": "data/rank-scores/gemini-3-flash-preview/geo_scores.json" + }, + { + "model_name": "Gemini-3-Pro-Preview", + "score_file": "data/content-scores/gemini-3-pro-preview/qwen_scoring_for_gemini-3-pro-preview_fewshot.json", + "rank_file": "data/rank-scores/gemini-3-pro-preview/geo_scores.json" + }, + { + "model_name": "Perplexity-Sonar-Pro", + "score_file": "data/content-scores/perplexity/qwen_scoring_for_perplexity_few_shot.json", + "rank_file": "data/rank-scores/perplexity/geo_scores.json" + }, + { + "model_name": "claude", + "score_file": "data/content-scores/claude/qwen_scoring_for_claude-sonnet-4.5_fewshot.json", + "rank_file": "data/rank-scores/claude/geo_scores.json" + }, + { + "model_name": "deepseek-chat-gensee", + "score_file": "data/content-scores/deepseek-chat-gensee/qwen_scoring_deepseek_chat_gensee.json", + "rank_file": "data/rank-scores/deepseek-chat-gensee/geo_scores.json" + }, + { + "model_name": "deepseek-chat-tavily", + "score_file": "data/content-scores/deepseek-chat-tavily/qwen_scoring_deepseek_chat_tavily.json", + "rank_file": "data/rank-scores/deepseek-chat-tavily/geo_scores.json" + }, + { + "model_name": "deepseek-reasoning-gensee", + "score_file": "data/content-scores/deepseek-reasoning-gensee/qwen_scoring_deepseek_reasoner_gensee.json", + "rank_file": "data/rank-scores/deepseek-reasoning-gensee/geo_scores.json" + }, + { + "model_name": "deepseek-reasoning-tavily", + "score_file": "data/content-scores/deepseek-reasoning-tavily/qwen_scoring_deepseek_reasoner_tavily.json", + "rank_file": "data/rank-scores/deepseek-reasoning-tavily/geo_scores.json" + }, + { + "model_name": "exa", + "score_file": "data/content-scores/exa/qwen_scoring_for_exa_few_shot.json", + "rank_file": "data/rank-scores/exa/geo_scores.json" + }, + { + "model_name": "gensee", + "score_file": "data/content-scores/gensee/qwen_scoring_for_gensee_few_shot.json", + "rank_file": "data/rank-scores/gensee/geo_scores.json" + }, + { + "model_name": "google-search", + "score_file": "data/content-scores/google-search/qwen_scoring_for_search_engine.json", + "rank_file": null + }, + { + "model_name": "gpt-4o", + "score_file": "data/content-scores/gpt-4o/qwen_scoring_for_gpt4o_few_shot.json", + "rank_file": "data/rank-scores/gpt-4o/geo_scores.json" + }, + { + "model_name": "gpt-5", + "score_file": "data/content-scores/gpt-5/qwen_scoring_for_gpt-5_fewshot.json", + "rank_file": "data/rank-scores/gpt-5/geo_scores.json" + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "score_file": "data/content-scores/grok/qwen_scoring_for_grok_4.1_fast_non_reasoning_few_shot.json", + "rank_file": "data/rank-scores/grok/geo_scores.json" + }, + { + "model_name": "tavily", + "score_file": "data/content-scores/tavily/qwen_scoring_for_tavily_few_shot.json", + "rank_file": "data/rank-scores/tavily/geo_scores.json" + } + ] + }, + "overall": [ + { + "model_name": "gpt-5", + "num_sources": 316, + "num_queries": 93, + "num_complete_scores": 308, + "unweighted_mean_score": 4.464240506329115, + "weighted_total_content_score": 88.15456362425039, + "semantic_relevance": 3.922829581993569, + "factual_accuracy": 4.771704180064309, + "freshness": 4.490445859872612, + "objectivity_tone": 4.546623794212219, + "layout_ad_density": 4.015923566878981, + "accountability": 4.43312101910828, + "transparency": 4.792993630573249, + "authority": 4.735668789808917, + "avg_ge_freq": 0.6740462025316463, + "relative_se_rank": 2.0599499775368484, + "normalized_reciprocal_se_rank": 0.07444993783835872, + "reciprocal_se_rank": 0.027598407393197893, + "percentage_ge_sources_not_in_se_sources": 87.34177215189875, + "percentage_ge_sources_in_se_sources": 12.658227848101266 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "num_sources": 308, + "num_queries": 80, + "num_complete_scores": 307, + "unweighted_mean_score": 4.152687296416938, + "weighted_total_content_score": 83.11004784688996, + "semantic_relevance": 4.263843648208469, + "factual_accuracy": 4.501628664495114, + "freshness": 4.192182410423452, + "objectivity_tone": 4.003257328990228, + "layout_ad_density": 3.6319218241042344, + "accountability": 4.0, + "transparency": 4.299674267100977, + "authority": 4.328990228013029, + "avg_ge_freq": 0.7316051948051939, + "relative_se_rank": 1.7286246441730713, + "normalized_reciprocal_se_rank": 0.14088939196167136, + "reciprocal_se_rank": 0.043563227680110374, + "percentage_ge_sources_not_in_se_sources": 69.15584415584416, + "percentage_ge_sources_in_se_sources": 30.844155844155843 + }, + { + "model_name": "gpt-4o", + "num_sources": 294, + "num_queries": 88, + "num_complete_scores": 294, + "unweighted_mean_score": 4.066751700680272, + "weighted_total_content_score": 81.5180809165772, + "semantic_relevance": 4.241496598639456, + "factual_accuracy": 4.207482993197279, + "freshness": 4.523809523809524, + "objectivity_tone": 3.925170068027211, + "layout_ad_density": 3.3435374149659864, + "accountability": 3.9625850340136055, + "transparency": 4.149659863945578, + "authority": 4.180272108843537, + "avg_ge_freq": 0.46483707482993103, + "relative_se_rank": 1.8249937074261993, + "normalized_reciprocal_se_rank": 0.12262328761538778, + "reciprocal_se_rank": 0.03917404241243542, + "percentage_ge_sources_not_in_se_sources": 74.82993197278913, + "percentage_ge_sources_in_se_sources": 25.170068027210874 + }, + { + "model_name": "gensee", + "num_sources": 382, + "num_queries": 93, + "num_complete_scores": 377, + "unweighted_mean_score": 4.066835395575553, + "weighted_total_content_score": 81.48250206668509, + "semantic_relevance": 4.431578947368421, + "factual_accuracy": 4.426315789473684, + "freshness": 4.343832020997375, + "objectivity_tone": 3.8947368421052633, + "layout_ad_density": 3.339522546419098, + "accountability": 3.9658792650918637, + "transparency": 4.020997375328084, + "authority": 4.091863517060368, + "avg_ge_freq": 0.5340209424083775, + "relative_se_rank": 1.7669587654960388, + "normalized_reciprocal_se_rank": 0.1299587450378004, + "reciprocal_se_rank": 0.040936688734811204, + "percentage_ge_sources_not_in_se_sources": 71.46596858638743, + "percentage_ge_sources_in_se_sources": 28.53403141361256 + }, + { + "model_name": "deepseek-chat-gensee", + "num_sources": 82, + "num_queries": 19, + "num_complete_scores": 76, + "unweighted_mean_score": 4.26378842676311, + "weighted_total_content_score": 81.1168164313222, + "semantic_relevance": 4.243589743589744, + "factual_accuracy": 4.564102564102564, + "freshness": 4.423076923076923, + "objectivity_tone": 4.153846153846154, + "layout_ad_density": 3.9220779220779223, + "accountability": 3.9871794871794872, + "transparency": 4.32051282051282, + "authority": 4.461538461538462, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9224627653967212, + "normalized_reciprocal_se_rank": 0.12477656633162594, + "reciprocal_se_rank": 0.03969145647289068, + "percentage_ge_sources_not_in_se_sources": 74.390243902439, + "percentage_ge_sources_in_se_sources": 25.609756097560982 + }, + { + "model_name": "deepseek-reasoning-tavily", + "num_sources": 62, + "num_queries": 19, + "num_complete_scores": 58, + "unweighted_mean_score": 4.282327586206897, + "weighted_total_content_score": 80.1018675721562, + "semantic_relevance": 4.189655172413793, + "factual_accuracy": 4.603448275862069, + "freshness": 4.396551724137931, + "objectivity_tone": 4.0344827586206895, + "layout_ad_density": 3.7241379310344827, + "accountability": 4.103448275862069, + "transparency": 4.551724137931035, + "authority": 4.655172413793103, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4716965265158806, + "normalized_reciprocal_se_rank": 0.2609989378665101, + "reciprocal_se_rank": 0.07242450206015655, + "percentage_ge_sources_not_in_se_sources": 56.45161290322581, + "percentage_ge_sources_in_se_sources": 43.54838709677419 + }, + { + "model_name": "exa", + "num_sources": 425, + "num_queries": 99, + "num_complete_scores": 421, + "unweighted_mean_score": 4.0269747899159665, + "weighted_total_content_score": 80.06439628482967, + "semantic_relevance": 3.6485849056603774, + "factual_accuracy": 4.120283018867925, + "freshness": 4.345882352941176, + "objectivity_tone": 4.023584905660377, + "layout_ad_density": 3.390995260663507, + "accountability": 4.124705882352941, + "transparency": 4.305882352941176, + "authority": 4.24, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4974151993912352, + "normalized_reciprocal_se_rank": 0.20452762803005117, + "reciprocal_se_rank": 0.058854939745084926, + "percentage_ge_sources_not_in_se_sources": 57.176470588235304, + "percentage_ge_sources_in_se_sources": 42.8235294117647 + }, + { + "model_name": "google-search", + "num_sources": 406, + "num_queries": 99, + "num_complete_scores": 403, + "unweighted_mean_score": 4.000307881773399, + "weighted_total_content_score": 79.88073632356752, + "semantic_relevance": 3.9482758620689653, + "factual_accuracy": 4.221674876847291, + "freshness": 4.059113300492611, + "objectivity_tone": 3.7758620689655173, + "layout_ad_density": 3.7493796526054592, + "accountability": 3.8793103448275863, + "transparency": 4.1330049261083746, + "authority": 4.231527093596059, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "num_sources": 259, + "num_queries": 84, + "num_complete_scores": 253, + "unweighted_mean_score": 4.0607707509881426, + "weighted_total_content_score": 79.39849624060149, + "semantic_relevance": 4.209486166007905, + "factual_accuracy": 4.217391304347826, + "freshness": 4.446640316205533, + "objectivity_tone": 3.8181818181818183, + "layout_ad_density": 3.33596837944664, + "accountability": 4.201581027667984, + "transparency": 4.177865612648222, + "authority": 4.07905138339921, + "avg_ge_freq": 0.8146749034749037, + "relative_se_rank": 1.587931757339867, + "normalized_reciprocal_se_rank": 0.1641520228549373, + "reciprocal_se_rank": 0.04915303461805529, + "percentage_ge_sources_not_in_se_sources": 62.934362934362916, + "percentage_ge_sources_in_se_sources": 37.06563706563707 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "num_sources": 444, + "num_queries": 97, + "num_complete_scores": 427, + "unweighted_mean_score": 3.985254756530152, + "weighted_total_content_score": 78.20056899004268, + "semantic_relevance": 3.63302752293578, + "factual_accuracy": 4.13302752293578, + "freshness": 4.742596810933941, + "objectivity_tone": 3.8509174311926606, + "layout_ad_density": 3.502283105022831, + "accountability": 3.8906605922551254, + "transparency": 4.14123006833713, + "authority": 3.9931662870159452, + "avg_ge_freq": 0.5082466216216226, + "relative_se_rank": 1.9513731817138333, + "normalized_reciprocal_se_rank": 0.08733801529571, + "reciprocal_se_rank": 0.03069529979193034, + "percentage_ge_sources_not_in_se_sources": 79.50450450450452, + "percentage_ge_sources_in_se_sources": 20.495495495495497 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "num_sources": 444, + "num_queries": 98, + "num_complete_scores": 441, + "unweighted_mean_score": 3.931531531531532, + "weighted_total_content_score": 78.0298719772404, + "semantic_relevance": 3.520361990950226, + "factual_accuracy": 3.995475113122172, + "freshness": 4.444695259593679, + "objectivity_tone": 3.8981900452488687, + "layout_ad_density": 3.3355855855855854, + "accountability": 4.054176072234763, + "transparency": 4.162528216704289, + "authority": 4.060948081264108, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "num_sources": 379, + "num_queries": 98, + "num_complete_scores": 373, + "unweighted_mean_score": 3.950208412277378, + "weighted_total_content_score": 77.85307596167198, + "semantic_relevance": 3.6906666666666665, + "factual_accuracy": 4.074666666666666, + "freshness": 4.358090185676392, + "objectivity_tone": 3.6186666666666665, + "layout_ad_density": 3.6426666666666665, + "accountability": 3.806366047745358, + "transparency": 4.220159151193634, + "authority": 4.183023872679045, + "avg_ge_freq": 0.8135451187335091, + "relative_se_rank": 1.6003800744631216, + "normalized_reciprocal_se_rank": 0.16776928125494017, + "reciprocal_se_rank": 0.050022230204463676, + "percentage_ge_sources_not_in_se_sources": 60.949868073878655, + "percentage_ge_sources_in_se_sources": 39.05013192612139 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "num_sources": 456, + "num_queries": 99, + "num_complete_scores": 438, + "unweighted_mean_score": 3.9539324960753532, + "weighted_total_content_score": 77.48153277931671, + "semantic_relevance": 3.6334841628959276, + "factual_accuracy": 4.099547511312217, + "freshness": 4.459161147902869, + "objectivity_tone": 3.925339366515837, + "layout_ad_density": 3.415929203539823, + "accountability": 4.057395143487859, + "transparency": 4.097130242825607, + "authority": 3.9624724061810155, + "avg_ge_freq": 0.5219184210526322, + "relative_se_rank": 1.907622173881119, + "normalized_reciprocal_se_rank": 0.10236888641393292, + "reciprocal_se_rank": 0.03430708678393044, + "percentage_ge_sources_not_in_se_sources": 76.7543859649123, + "percentage_ge_sources_in_se_sources": 23.245614035087723 + }, + { + "model_name": "tavily", + "num_sources": 395, + "num_queries": 97, + "num_complete_scores": 389, + "unweighted_mean_score": 3.9243911304980004, + "weighted_total_content_score": 77.37774816788804, + "semantic_relevance": 3.544757033248082, + "factual_accuracy": 4.0664961636828645, + "freshness": 4.447570332480819, + "objectivity_tone": 3.8005115089514065, + "layout_ad_density": 3.3324808184143224, + "accountability": 4.043478260869565, + "transparency": 4.033248081841432, + "authority": 4.156010230179028, + "avg_ge_freq": 0.9864979746835443, + "relative_se_rank": 1.2450772341845837, + "normalized_reciprocal_se_rank": 0.2743286477154668, + "reciprocal_se_rank": 0.07562751486366782, + "percentage_ge_sources_not_in_se_sources": 45.316455696202524, + "percentage_ge_sources_in_se_sources": 54.683544303797476 + }, + { + "model_name": "deepseek-reasoning-gensee", + "num_sources": 81, + "num_queries": 19, + "num_complete_scores": 74, + "unweighted_mean_score": 4.166666666666667, + "weighted_total_content_score": 76.71215074723848, + "semantic_relevance": 4.04, + "factual_accuracy": 4.466666666666667, + "freshness": 4.351351351351352, + "objectivity_tone": 3.986666666666667, + "layout_ad_density": 3.7866666666666666, + "accountability": 4.081081081081081, + "transparency": 4.22972972972973, + "authority": 4.391891891891892, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9087325248138731, + "normalized_reciprocal_se_rank": 0.14292879978187134, + "reciprocal_se_rank": 0.044053279559236075, + "percentage_ge_sources_not_in_se_sources": 74.07407407407408, + "percentage_ge_sources_in_se_sources": 25.925925925925927 + }, + { + "model_name": "deepseek-chat-tavily", + "num_sources": 75, + "num_queries": 19, + "num_complete_scores": 69, + "unweighted_mean_score": 3.963768115942029, + "weighted_total_content_score": 72.70175438596492, + "semantic_relevance": 3.782608695652174, + "factual_accuracy": 4.086956521739131, + "freshness": 4.217391304347826, + "objectivity_tone": 3.782608695652174, + "layout_ad_density": 3.5072463768115942, + "accountability": 3.8550724637681157, + "transparency": 4.231884057971015, + "authority": 4.246376811594203, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.648791164687895, + "normalized_reciprocal_se_rank": 0.19786996874326315, + "reciprocal_se_rank": 0.05725516239219187, + "percentage_ge_sources_not_in_se_sources": 61.333333333333336, + "percentage_ge_sources_in_se_sources": 38.666666666666664 + } + ], + "by_query_type": [ + { + "model_name": "deepseek-chat-gensee", + "query_type": "DebateQA", + "num_sources": 20, + "num_queries": 4, + "num_complete_scores": 20, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 92.05263157894737, + "semantic_relevance": 4.85, + "factual_accuracy": 5.0, + "freshness": 3.95, + "objectivity_tone": 4.6, + "layout_ad_density": 4.4, + "accountability": 4.3, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4475099206349205, + "normalized_reciprocal_se_rank": 0.13340620098782763, + "reciprocal_se_rank": 0.0417650822762013, + "percentage_ge_sources_not_in_se_sources": 55.0, + "percentage_ge_sources_in_se_sources": 45.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_type": "DebateQA", + "num_sources": 36, + "num_queries": 8, + "num_complete_scores": 36, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.35087719298242, + "semantic_relevance": 4.888888888888889, + "factual_accuracy": 4.722222222222222, + "freshness": 4.333333333333333, + "objectivity_tone": 4.222222222222222, + "layout_ad_density": 4.111111111111111, + "accountability": 4.388888888888889, + "transparency": 4.722222222222222, + "authority": 4.611111111111111, + "avg_ge_freq": 0.7500055555555557, + "relative_se_rank": 0.8543317246023244, + "normalized_reciprocal_se_rank": 0.29328182035967976, + "reciprocal_se_rank": 0.08018179663982594, + "percentage_ge_sources_not_in_se_sources": 30.555555555555557, + "percentage_ge_sources_in_se_sources": 69.44444444444446 + }, + { + "model_name": "gpt-5", + "query_type": "DebateQA", + "num_sources": 85, + "num_queries": 20, + "num_complete_scores": 79, + "unweighted_mean_score": 4.655, + "weighted_total_content_score": 90.19195046439627, + "semantic_relevance": 4.271604938271605, + "factual_accuracy": 4.851851851851852, + "freshness": 4.261904761904762, + "objectivity_tone": 4.765432098765432, + "layout_ad_density": 4.518072289156627, + "accountability": 4.714285714285714, + "transparency": 4.892857142857143, + "authority": 4.928571428571429, + "avg_ge_freq": 0.6274458823529414, + "relative_se_rank": 2.0396269615741502, + "normalized_reciprocal_se_rank": 0.024647681783897292, + "reciprocal_se_rank": 0.015631360428654928, + "percentage_ge_sources_not_in_se_sources": 89.41176470588235, + "percentage_ge_sources_in_se_sources": 10.588235294117647 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_type": "DebateQA", + "num_sources": 20, + "num_queries": 4, + "num_complete_scores": 20, + "unweighted_mean_score": 4.45625, + "weighted_total_content_score": 89.63157894736842, + "semantic_relevance": 4.7, + "factual_accuracy": 4.9, + "freshness": 4.0, + "objectivity_tone": 4.25, + "layout_ad_density": 4.3, + "accountability": 4.35, + "transparency": 4.5, + "authority": 4.65, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3464484126984126, + "normalized_reciprocal_se_rank": 0.23220261224941802, + "reciprocal_se_rank": 0.06550499663274852, + "percentage_ge_sources_not_in_se_sources": 55.0, + "percentage_ge_sources_in_se_sources": 45.0 + }, + { + "model_name": "gpt-5", + "query_type": "Pinocchios", + "num_sources": 55, + "num_queries": 20, + "num_complete_scores": 55, + "unweighted_mean_score": 4.4340909090909095, + "weighted_total_content_score": 88.99521531100477, + "semantic_relevance": 4.290909090909091, + "factual_accuracy": 4.6909090909090905, + "freshness": 3.7636363636363637, + "objectivity_tone": 4.618181818181818, + "layout_ad_density": 3.9454545454545453, + "accountability": 4.672727272727273, + "transparency": 4.890909090909091, + "authority": 4.6, + "avg_ge_freq": 0.6605890909090913, + "relative_se_rank": 1.8746770791895104, + "normalized_reciprocal_se_rank": 0.16050373868555687, + "reciprocal_se_rank": 0.048276383810364386, + "percentage_ge_sources_not_in_se_sources": 74.54545454545455, + "percentage_ge_sources_in_se_sources": 25.454545454545453 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_type": "DebateQA", + "num_sources": 17, + "num_queries": 4, + "num_complete_scores": 17, + "unweighted_mean_score": 4.397058823529412, + "weighted_total_content_score": 88.42105263157896, + "semantic_relevance": 4.647058823529412, + "factual_accuracy": 4.647058823529412, + "freshness": 3.8823529411764706, + "objectivity_tone": 4.352941176470588, + "layout_ad_density": 4.0, + "accountability": 4.235294117647059, + "transparency": 4.705882352941177, + "authority": 4.705882352941177, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.013410364145658, + "normalized_reciprocal_se_rank": 0.380931631912024, + "reciprocal_se_rank": 0.10124328048371453, + "percentage_ge_sources_not_in_se_sources": 41.1764705882353, + "percentage_ge_sources_in_se_sources": 58.8235294117647 + }, + { + "model_name": "gpt-5", + "query_type": "HotpotQA", + "num_sources": 38, + "num_queries": 19, + "num_complete_scores": 38, + "unweighted_mean_score": 4.4144736842105265, + "weighted_total_content_score": 88.33795013850413, + "semantic_relevance": 3.6842105263157894, + "factual_accuracy": 4.7894736842105265, + "freshness": 4.842105263157895, + "objectivity_tone": 4.815789473684211, + "layout_ad_density": 4.157894736842105, + "accountability": 3.5, + "transparency": 4.7631578947368425, + "authority": 4.7631578947368425, + "avg_ge_freq": 0.5526184210526318, + "relative_se_rank": 1.6329072044063837, + "normalized_reciprocal_se_rank": 0.24588844317711944, + "reciprocal_se_rank": 0.06879358221974471, + "percentage_ge_sources_not_in_se_sources": 71.05263157894737, + "percentage_ge_sources_in_se_sources": 28.94736842105263 + }, + { + "model_name": "gpt-5", + "query_type": "QuoraQuestions", + "num_sources": 58, + "num_queries": 14, + "num_complete_scores": 56, + "unweighted_mean_score": 4.478879310344827, + "weighted_total_content_score": 87.84029038112523, + "semantic_relevance": 3.508771929824561, + "factual_accuracy": 4.719298245614035, + "freshness": 4.684210526315789, + "objectivity_tone": 4.684210526315789, + "layout_ad_density": 4.172413793103448, + "accountability": 4.473684210526316, + "transparency": 4.842105263157895, + "authority": 4.842105263157895, + "avg_ge_freq": 0.6379275862068965, + "relative_se_rank": 2.1757824170978175, + "normalized_reciprocal_se_rank": 0.04254727146439185, + "reciprocal_se_rank": 0.019932475424696096, + "percentage_ge_sources_not_in_se_sources": 91.37931034482759, + "percentage_ge_sources_in_se_sources": 8.620689655172415 + }, + { + "model_name": "gpt-4o", + "query_type": "Pinocchios", + "num_sources": 40, + "num_queries": 18, + "num_complete_scores": 40, + "unweighted_mean_score": 4.29375, + "weighted_total_content_score": 86.60526315789473, + "semantic_relevance": 4.625, + "factual_accuracy": 4.75, + "freshness": 3.4, + "objectivity_tone": 4.2, + "layout_ad_density": 3.875, + "accountability": 4.275, + "transparency": 4.55, + "authority": 4.675, + "avg_ge_freq": 0.44164499999999995, + "relative_se_rank": 1.5765700005314691, + "normalized_reciprocal_se_rank": 0.28054167213258124, + "reciprocal_se_rank": 0.07712045034253773, + "percentage_ge_sources_not_in_se_sources": 62.5, + "percentage_ge_sources_in_se_sources": 37.5 + }, + { + "model_name": "exa", + "query_type": "DebateQA", + "num_sources": 89, + "num_queries": 20, + "num_complete_scores": 87, + "unweighted_mean_score": 4.3292536115569815, + "weighted_total_content_score": 86.44589000591367, + "semantic_relevance": 4.49438202247191, + "factual_accuracy": 4.426966292134831, + "freshness": 4.415730337078652, + "objectivity_tone": 4.146067415730337, + "layout_ad_density": 3.7701149425287355, + "accountability": 4.415730337078652, + "transparency": 4.426966292134831, + "authority": 4.51685393258427, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.010284064126628, + "normalized_reciprocal_se_rank": 0.28063309301928224, + "reciprocal_se_rank": 0.07714241798278879, + "percentage_ge_sources_not_in_se_sources": 38.20224719101124, + "percentage_ge_sources_in_se_sources": 61.79775280898876 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_type": "Pinocchios", + "num_sources": 67, + "num_queries": 18, + "num_complete_scores": 67, + "unweighted_mean_score": 4.270522388059701, + "weighted_total_content_score": 86.00157109190886, + "semantic_relevance": 4.462686567164179, + "factual_accuracy": 4.731343283582089, + "freshness": 3.1194029850746268, + "objectivity_tone": 4.17910447761194, + "layout_ad_density": 4.0, + "accountability": 4.402985074626866, + "transparency": 4.611940298507463, + "authority": 4.656716417910448, + "avg_ge_freq": 0.7860761194029849, + "relative_se_rank": 1.7392122002022354, + "normalized_reciprocal_se_rank": 0.2045813768315125, + "reciprocal_se_rank": 0.058867855112426565, + "percentage_ge_sources_not_in_se_sources": 67.16417910447761, + "percentage_ge_sources_in_se_sources": 32.83582089552239 + }, + { + "model_name": "gensee", + "query_type": "Pinocchios", + "num_sources": 77, + "num_queries": 20, + "num_complete_scores": 75, + "unweighted_mean_score": 4.286873840445268, + "weighted_total_content_score": 85.94668489405333, + "semantic_relevance": 4.402597402597403, + "factual_accuracy": 4.753246753246753, + "freshness": 3.857142857142857, + "objectivity_tone": 4.1558441558441555, + "layout_ad_density": 3.8133333333333335, + "accountability": 4.298701298701299, + "transparency": 4.4935064935064934, + "authority": 4.4935064935064934, + "avg_ge_freq": 0.4934883116883122, + "relative_se_rank": 1.7999418903256372, + "normalized_reciprocal_se_rank": 0.19747215967140244, + "reciprocal_se_rank": 0.05715957234822537, + "percentage_ge_sources_not_in_se_sources": 70.12987012987011, + "percentage_ge_sources_in_se_sources": 29.870129870129865 + }, + { + "model_name": "claude", + "query_type": "Pinocchios", + "num_sources": 39, + "num_queries": 20, + "num_complete_scores": 39, + "unweighted_mean_score": 4.262820512820513, + "weighted_total_content_score": 85.56005398110662, + "semantic_relevance": 4.435897435897436, + "factual_accuracy": 4.435897435897436, + "freshness": 3.4358974358974357, + "objectivity_tone": 4.205128205128205, + "layout_ad_density": 3.8205128205128207, + "accountability": 4.666666666666667, + "transparency": 4.6923076923076925, + "authority": 4.410256410256411, + "avg_ge_freq": 0.8205102564102565, + "relative_se_rank": 1.4724542616408207, + "normalized_reciprocal_se_rank": 0.28256007847697834, + "reciprocal_se_rank": 0.07760545575053605, + "percentage_ge_sources_not_in_se_sources": 53.84615384615383, + "percentage_ge_sources_in_se_sources": 46.153846153846175 + }, + { + "model_name": "gpt-5", + "query_type": "VACOS", + "num_sources": 80, + "num_queries": 20, + "num_complete_scores": 80, + "unweighted_mean_score": 4.2953125, + "weighted_total_content_score": 85.55263157894731, + "semantic_relevance": 3.725, + "factual_accuracy": 4.775, + "freshness": 4.925, + "objectivity_tone": 4.05, + "layout_ad_density": 3.3625, + "accountability": 4.3875, + "transparency": 4.6, + "authority": 4.5375, + "avg_ge_freq": 0.8166749999999997, + "relative_se_rank": 2.327785098166267, + "normalized_reciprocal_se_rank": 0.0098989898989899, + "reciprocal_se_rank": 0.012087378640776695, + "percentage_ge_sources_not_in_se_sources": 98.75, + "percentage_ge_sources_in_se_sources": 1.2499999999999998 + }, + { + "model_name": "gensee", + "query_type": "DebateQA", + "num_sources": 89, + "num_queries": 20, + "num_complete_scores": 87, + "unweighted_mean_score": 4.282303370786517, + "weighted_total_content_score": 85.36960378474275, + "semantic_relevance": 4.7727272727272725, + "factual_accuracy": 4.5, + "freshness": 4.393258426966292, + "objectivity_tone": 4.0, + "layout_ad_density": 3.7126436781609193, + "accountability": 4.247191011235955, + "transparency": 4.224719101123595, + "authority": 4.370786516853933, + "avg_ge_freq": 0.5730269662921349, + "relative_se_rank": 1.455758534903284, + "normalized_reciprocal_se_rank": 0.17012042957025347, + "reciprocal_se_rank": 0.05058719060061921, + "percentage_ge_sources_not_in_se_sources": 58.42696629213483, + "percentage_ge_sources_in_se_sources": 41.57303370786517 + }, + { + "model_name": "google-search", + "query_type": "DebateQA", + "num_sources": 89, + "num_queries": 20, + "num_complete_scores": 89, + "unweighted_mean_score": 4.262640449438202, + "weighted_total_content_score": 85.2749852158486, + "semantic_relevance": 4.595505617977528, + "factual_accuracy": 4.415730337078652, + "freshness": 4.292134831460674, + "objectivity_tone": 3.797752808988764, + "layout_ad_density": 4.089887640449438, + "accountability": 4.067415730337078, + "transparency": 4.449438202247191, + "authority": 4.393258426966292, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_type": "VACOS", + "num_sources": 84, + "num_queries": 19, + "num_complete_scores": 84, + "unweighted_mean_score": 4.205357142857143, + "weighted_total_content_score": 84.48621553884713, + "semantic_relevance": 4.392857142857143, + "factual_accuracy": 4.738095238095238, + "freshness": 4.845238095238095, + "objectivity_tone": 3.8452380952380953, + "layout_ad_density": 3.2261904761904763, + "accountability": 4.011904761904762, + "transparency": 4.226190476190476, + "authority": 4.357142857142857, + "avg_ge_freq": 0.6825476190476188, + "relative_se_rank": 2.0432068047879337, + "normalized_reciprocal_se_rank": 0.0649730073602254, + "reciprocal_se_rank": 0.025321183807432805, + "percentage_ge_sources_not_in_se_sources": 84.52380952380952, + "percentage_ge_sources_in_se_sources": 15.476190476190476 + }, + { + "model_name": "deepseek-chat-gensee", + "query_type": "Pinocchios", + "num_sources": 16, + "num_queries": 4, + "num_complete_scores": 15, + "unweighted_mean_score": 4.458333333333333, + "weighted_total_content_score": 83.75, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 4.066666666666666, + "objectivity_tone": 4.533333333333333, + "layout_ad_density": 4.4, + "accountability": 4.4, + "transparency": 4.533333333333333, + "authority": 4.733333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8245541272980292, + "normalized_reciprocal_se_rank": 0.17878186628186626, + "reciprocal_se_rank": 0.052668458159963016, + "percentage_ge_sources_not_in_se_sources": 74.99999999999999, + "percentage_ge_sources_in_se_sources": 24.999999999999993 + }, + { + "model_name": "exa", + "query_type": "Pinocchios", + "num_sources": 87, + "num_queries": 20, + "num_complete_scores": 86, + "unweighted_mean_score": 4.200123152709359, + "weighted_total_content_score": 83.5934664246824, + "semantic_relevance": 3.7011494252873565, + "factual_accuracy": 4.264367816091954, + "freshness": 3.7126436781609193, + "objectivity_tone": 4.344827586206897, + "layout_ad_density": 3.6627906976744184, + "accountability": 4.632183908045977, + "transparency": 4.689655172413793, + "authority": 4.586206896551724, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.80952333519013, + "normalized_reciprocal_se_rank": 0.2313966587355651, + "reciprocal_se_rank": 0.06531133304568201, + "percentage_ge_sources_not_in_se_sources": 62.06896551724138, + "percentage_ge_sources_in_se_sources": 37.93103448275862 + }, + { + "model_name": "tavily", + "query_type": "DebateQA", + "num_sources": 76, + "num_queries": 20, + "num_complete_scores": 76, + "unweighted_mean_score": 4.184210526315789, + "weighted_total_content_score": 83.55955678670357, + "semantic_relevance": 4.223684210526316, + "factual_accuracy": 4.315789473684211, + "freshness": 4.473684210526316, + "objectivity_tone": 3.8947368421052633, + "layout_ad_density": 3.776315789473684, + "accountability": 4.315789473684211, + "transparency": 4.157894736842105, + "authority": 4.315789473684211, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9508661126222719, + "normalized_reciprocal_se_rank": 0.3434220529106368, + "reciprocal_se_rank": 0.09223005640328419, + "percentage_ge_sources_not_in_se_sources": 36.8421052631579, + "percentage_ge_sources_in_se_sources": 63.1578947368421 + }, + { + "model_name": "google-search", + "query_type": "Pinocchios", + "num_sources": 93, + "num_queries": 20, + "num_complete_scores": 91, + "unweighted_mean_score": 4.177611367127496, + "weighted_total_content_score": 83.53140916808147, + "semantic_relevance": 4.161290322580645, + "factual_accuracy": 4.580645161290323, + "freshness": 3.3225806451612905, + "objectivity_tone": 3.967741935483871, + "layout_ad_density": 4.0, + "accountability": 4.268817204301075, + "transparency": 4.483870967741935, + "authority": 4.623655913978495, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-chat-tavily", + "query_type": "DebateQA", + "num_sources": 20, + "num_queries": 4, + "num_complete_scores": 20, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.84210526315789, + "semantic_relevance": 4.5, + "factual_accuracy": 4.3, + "freshness": 3.85, + "objectivity_tone": 3.9, + "layout_ad_density": 3.6, + "accountability": 4.05, + "transparency": 4.5, + "authority": 4.3, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1437896825396825, + "normalized_reciprocal_se_rank": 0.2723387312276201, + "reciprocal_se_rank": 0.07514935531925823, + "percentage_ge_sources_not_in_se_sources": 45.0, + "percentage_ge_sources_in_se_sources": 55.0 + }, + { + "model_name": "gpt-4o", + "query_type": "DebateQA", + "num_sources": 78, + "num_queries": 18, + "num_complete_scores": 78, + "unweighted_mean_score": 4.1201923076923075, + "weighted_total_content_score": 82.60458839406209, + "semantic_relevance": 4.487179487179487, + "factual_accuracy": 4.102564102564102, + "freshness": 4.564102564102564, + "objectivity_tone": 3.9615384615384617, + "layout_ad_density": 3.7051282051282053, + "accountability": 4.0, + "transparency": 4.089743589743589, + "authority": 4.051282051282051, + "avg_ge_freq": 0.4999897435897439, + "relative_se_rank": 1.7141289179822279, + "normalized_reciprocal_se_rank": 0.12281919725976252, + "reciprocal_se_rank": 0.03922111778814682, + "percentage_ge_sources_not_in_se_sources": 71.7948717948718, + "percentage_ge_sources_in_se_sources": 28.205128205128204 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_type": "DebateQA", + "num_sources": 82, + "num_queries": 20, + "num_complete_scores": 79, + "unweighted_mean_score": 4.237874779541446, + "weighted_total_content_score": 82.51604621309367, + "semantic_relevance": 4.1875, + "factual_accuracy": 4.325, + "freshness": 4.320987654320987, + "objectivity_tone": 3.6375, + "layout_ad_density": 4.1375, + "accountability": 4.246913580246914, + "transparency": 4.518518518518518, + "authority": 4.506172839506172, + "avg_ge_freq": 0.7804853658536585, + "relative_se_rank": 1.185618672325219, + "normalized_reciprocal_se_rank": 0.2274442114543135, + "reciprocal_se_rank": 0.0643615944999443, + "percentage_ge_sources_not_in_se_sources": 46.34146341463415, + "percentage_ge_sources_in_se_sources": 53.65853658536585 + }, + { + "model_name": "tavily", + "query_type": "Pinocchios", + "num_sources": 81, + "num_queries": 20, + "num_complete_scores": 79, + "unweighted_mean_score": 4.188712522045855, + "weighted_total_content_score": 82.27420402859, + "semantic_relevance": 3.55, + "factual_accuracy": 4.325, + "freshness": 4.075, + "objectivity_tone": 4.1875, + "layout_ad_density": 3.8, + "accountability": 4.5125, + "transparency": 4.5625, + "authority": 4.525, + "avg_ge_freq": 0.9958851851851852, + "relative_se_rank": 1.4422121160476211, + "normalized_reciprocal_se_rank": 0.3128941728047197, + "reciprocal_se_rank": 0.08489447356229925, + "percentage_ge_sources_not_in_se_sources": 45.67901234567901, + "percentage_ge_sources_in_se_sources": 54.32098765432099 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_type": "DebateQA", + "num_sources": 100, + "num_queries": 20, + "num_complete_scores": 100, + "unweighted_mean_score": 4.1325, + "weighted_total_content_score": 82.12631578947368, + "semantic_relevance": 3.92, + "factual_accuracy": 4.0, + "freshness": 4.54, + "objectivity_tone": 3.98, + "layout_ad_density": 3.7, + "accountability": 4.19, + "transparency": 4.4, + "authority": 4.33, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_type": "VACOS", + "num_sources": 81, + "num_queries": 20, + "num_complete_scores": 80, + "unweighted_mean_score": 4.138888888888889, + "weighted_total_content_score": 81.97530864197529, + "semantic_relevance": 3.6875, + "factual_accuracy": 4.625, + "freshness": 4.91358024691358, + "objectivity_tone": 3.8625, + "layout_ad_density": 2.950617283950617, + "accountability": 4.08641975308642, + "transparency": 4.54320987654321, + "authority": 4.407407407407407, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.003012313759094, + "normalized_reciprocal_se_rank": 0.07692643713869617, + "reciprocal_se_rank": 0.028193488535754666, + "percentage_ge_sources_not_in_se_sources": 82.71604938271606, + "percentage_ge_sources_in_se_sources": 17.28395061728395 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_type": "Pinocchios", + "num_sources": 86, + "num_queries": 20, + "num_complete_scores": 84, + "unweighted_mean_score": 4.162873754152824, + "weighted_total_content_score": 81.87270501835988, + "semantic_relevance": 3.5294117647058822, + "factual_accuracy": 4.117647058823529, + "freshness": 4.523255813953488, + "objectivity_tone": 4.08235294117647, + "layout_ad_density": 3.764705882352941, + "accountability": 4.465116279069767, + "transparency": 4.534883720930233, + "authority": 4.255813953488372, + "avg_ge_freq": 0.5736325581395352, + "relative_se_rank": 1.941468415075042, + "normalized_reciprocal_se_rank": 0.15368587669053982, + "reciprocal_se_rank": 0.04663811114651323, + "percentage_ge_sources_not_in_se_sources": 73.25581395348837, + "percentage_ge_sources_in_se_sources": 26.74418604651163 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_type": "Pinocchios", + "num_sources": 10, + "num_queries": 4, + "num_complete_scores": 9, + "unweighted_mean_score": 4.513888888888889, + "weighted_total_content_score": 81.57894736842105, + "semantic_relevance": 4.555555555555555, + "factual_accuracy": 4.777777777777778, + "freshness": 3.5555555555555554, + "objectivity_tone": 4.555555555555555, + "layout_ad_density": 4.555555555555555, + "accountability": 4.555555555555555, + "transparency": 4.777777777777778, + "authority": 4.777777777777778, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8599648428916721, + "normalized_reciprocal_se_rank": 0.4548140553403711, + "reciprocal_se_rank": 0.11899658125897268, + "percentage_ge_sources_not_in_se_sources": 30.0, + "percentage_ge_sources_in_se_sources": 70.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_type": "DebateQA", + "num_sources": 100, + "num_queries": 20, + "num_complete_scores": 96, + "unweighted_mean_score": 4.178210678210679, + "weighted_total_content_score": 81.42105263157897, + "semantic_relevance": 4.214285714285714, + "factual_accuracy": 4.255102040816326, + "freshness": 4.73469387755102, + "objectivity_tone": 3.683673469387755, + "layout_ad_density": 3.9285714285714284, + "accountability": 3.979591836734694, + "transparency": 4.357142857142857, + "authority": 4.23469387755102, + "avg_ge_freq": 0.4833250000000001, + "relative_se_rank": 1.7292861678201157, + "normalized_reciprocal_se_rank": 0.10523315112286376, + "reciprocal_se_rank": 0.03499534456593085, + "percentage_ge_sources_not_in_se_sources": 72.0, + "percentage_ge_sources_in_se_sources": 28.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_type": "VACOS", + "num_sources": 94, + "num_queries": 20, + "num_complete_scores": 94, + "unweighted_mean_score": 4.059840425531915, + "weighted_total_content_score": 81.35498320268756, + "semantic_relevance": 3.904255319148936, + "factual_accuracy": 4.617021276595745, + "freshness": 4.829787234042553, + "objectivity_tone": 3.8085106382978724, + "layout_ad_density": 3.1702127659574466, + "accountability": 3.734042553191489, + "transparency": 4.212765957446808, + "authority": 4.202127659574468, + "avg_ge_freq": 0.4148723404255325, + "relative_se_rank": 2.3167240998153185, + "normalized_reciprocal_se_rank": 0.007184894289987778, + "reciprocal_se_rank": 0.01143520518133201, + "percentage_ge_sources_not_in_se_sources": 96.80851063829788, + "percentage_ge_sources_in_se_sources": 3.1914893617021276 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_type": "DebateQA", + "num_sources": 100, + "num_queries": 20, + "num_complete_scores": 95, + "unweighted_mean_score": 4.1310714285714285, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 4.11340206185567, + "factual_accuracy": 4.195876288659794, + "freshness": 4.5353535353535355, + "objectivity_tone": 3.8556701030927836, + "layout_ad_density": 3.8181818181818183, + "accountability": 4.181818181818182, + "transparency": 4.303030303030303, + "authority": 4.171717171717172, + "avg_ge_freq": 0.4899890000000004, + "relative_se_rank": 1.6757336210963478, + "normalized_reciprocal_se_rank": 0.12549266202008144, + "reciprocal_se_rank": 0.039863528009679766, + "percentage_ge_sources_not_in_se_sources": 71.0, + "percentage_ge_sources_in_se_sources": 29.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_type": "VACOS", + "num_sources": 91, + "num_queries": 20, + "num_complete_scores": 91, + "unweighted_mean_score": 4.0467032967032965, + "weighted_total_content_score": 81.01792943898207, + "semantic_relevance": 3.923076923076923, + "factual_accuracy": 4.428571428571429, + "freshness": 4.934065934065934, + "objectivity_tone": 3.868131868131868, + "layout_ad_density": 2.8131868131868134, + "accountability": 4.208791208791209, + "transparency": 4.131868131868132, + "authority": 4.065934065934066, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_type": "Pinocchios", + "num_sources": 72, + "num_queries": 19, + "num_complete_scores": 72, + "unweighted_mean_score": 4.086805555555555, + "weighted_total_content_score": 80.99415204678363, + "semantic_relevance": 3.5972222222222223, + "factual_accuracy": 4.236111111111111, + "freshness": 3.986111111111111, + "objectivity_tone": 3.7222222222222223, + "layout_ad_density": 4.069444444444445, + "accountability": 4.111111111111111, + "transparency": 4.5, + "authority": 4.472222222222222, + "avg_ge_freq": 0.8148138888888888, + "relative_se_rank": 1.9321450595878684, + "normalized_reciprocal_se_rank": 0.20414708640646176, + "reciprocal_se_rank": 0.05876349891805757, + "percentage_ge_sources_not_in_se_sources": 62.5, + "percentage_ge_sources_in_se_sources": 37.5 + }, + { + "model_name": "gpt-4o", + "query_type": "VACOS", + "num_sources": 81, + "num_queries": 19, + "num_complete_scores": 81, + "unweighted_mean_score": 4.032407407407407, + "weighted_total_content_score": 80.88369070825209, + "semantic_relevance": 4.172839506172839, + "factual_accuracy": 4.296296296296297, + "freshness": 4.9753086419753085, + "objectivity_tone": 3.8518518518518516, + "layout_ad_density": 2.740740740740741, + "accountability": 3.8518518518518516, + "transparency": 4.098765432098766, + "authority": 4.271604938271605, + "avg_ge_freq": 0.49793086419753113, + "relative_se_rank": 2.029381072384311, + "normalized_reciprocal_se_rank": 0.07521622430371759, + "reciprocal_se_rank": 0.027782539335116603, + "percentage_ge_sources_not_in_se_sources": 83.95061728395062, + "percentage_ge_sources_in_se_sources": 16.049382716049383 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_type": "Pinocchios", + "num_sources": 89, + "num_queries": 20, + "num_complete_scores": 88, + "unweighted_mean_score": 4.073863636363637, + "weighted_total_content_score": 80.40212891780013, + "semantic_relevance": 3.7386363636363638, + "factual_accuracy": 4.204545454545454, + "freshness": 3.8863636363636362, + "objectivity_tone": 4.125, + "layout_ad_density": 3.7386363636363638, + "accountability": 4.363636363636363, + "transparency": 4.375, + "authority": 4.159090909090909, + "avg_ge_freq": 0.5842617977528091, + "relative_se_rank": 2.0481831598330293, + "normalized_reciprocal_se_rank": 0.14693960626866004, + "reciprocal_se_rank": 0.04501704131212949, + "percentage_ge_sources_not_in_se_sources": 73.03370786516854, + "percentage_ge_sources_in_se_sources": 26.96629213483146 + }, + { + "model_name": "claude", + "query_type": "VACOS", + "num_sources": 81, + "num_queries": 19, + "num_complete_scores": 81, + "unweighted_mean_score": 3.9891975308641974, + "weighted_total_content_score": 80.10396361273551, + "semantic_relevance": 4.2592592592592595, + "factual_accuracy": 4.345679012345679, + "freshness": 4.938271604938271, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 2.740740740740741, + "accountability": 4.197530864197531, + "transparency": 3.9012345679012346, + "authority": 3.8641975308641974, + "avg_ge_freq": 0.7819024691358022, + "relative_se_rank": 2.0549062284325044, + "normalized_reciprocal_se_rank": 0.056933641949831956, + "reciprocal_se_rank": 0.02338939454619748, + "percentage_ge_sources_not_in_se_sources": 83.95061728395062, + "percentage_ge_sources_in_se_sources": 16.049382716049383 + }, + { + "model_name": "gensee", + "query_type": "VACOS", + "num_sources": 88, + "num_queries": 19, + "num_complete_scores": 88, + "unweighted_mean_score": 3.9332386363636362, + "weighted_total_content_score": 79.43779904306221, + "semantic_relevance": 4.534090909090909, + "factual_accuracy": 4.363636363636363, + "freshness": 4.818181818181818, + "objectivity_tone": 3.6363636363636362, + "layout_ad_density": 2.4886363636363638, + "accountability": 4.011363636363637, + "transparency": 3.7954545454545454, + "authority": 3.8181818181818183, + "avg_ge_freq": 0.5340818181818183, + "relative_se_rank": 2.115456142814551, + "normalized_reciprocal_se_rank": 0.07056832757590334, + "reciprocal_se_rank": 0.02666569036411269, + "percentage_ge_sources_not_in_se_sources": 87.5, + "percentage_ge_sources_in_se_sources": 12.5 + }, + { + "model_name": "claude", + "query_type": "DebateQA", + "num_sources": 70, + "num_queries": 20, + "num_complete_scores": 65, + "unweighted_mean_score": 4.280769230769231, + "weighted_total_content_score": 79.39849624060146, + "semantic_relevance": 4.3076923076923075, + "factual_accuracy": 4.323076923076923, + "freshness": 4.430769230769231, + "objectivity_tone": 4.107692307692307, + "layout_ad_density": 3.9384615384615387, + "accountability": 4.323076923076923, + "transparency": 4.384615384615385, + "authority": 4.430769230769231, + "avg_ge_freq": 0.8523828571428572, + "relative_se_rank": 1.3421362086210757, + "normalized_reciprocal_se_rank": 0.21041652104583275, + "reciprocal_se_rank": 0.06026998928043071, + "percentage_ge_sources_not_in_se_sources": 54.28571428571426, + "percentage_ge_sources_in_se_sources": 45.71428571428574 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_type": "VACOS", + "num_sources": 64, + "num_queries": 20, + "num_complete_scores": 62, + "unweighted_mean_score": 4.046428571428572, + "weighted_total_content_score": 79.39144736842107, + "semantic_relevance": 4.193548387096774, + "factual_accuracy": 4.5, + "freshness": 4.984126984126984, + "objectivity_tone": 3.7419354838709675, + "layout_ad_density": 3.142857142857143, + "accountability": 3.619047619047619, + "transparency": 4.095238095238095, + "authority": 4.111111111111111, + "avg_ge_freq": 0.8385406249999999, + "relative_se_rank": 2.067510788296202, + "normalized_reciprocal_se_rank": 0.051796852838519515, + "reciprocal_se_rank": 0.02215506900731415, + "percentage_ge_sources_not_in_se_sources": 85.93750000000001, + "percentage_ge_sources_in_se_sources": 14.0625 + }, + { + "model_name": "gpt-4o", + "query_type": "QuoraQuestions", + "num_sources": 76, + "num_queries": 19, + "num_complete_scores": 76, + "unweighted_mean_score": 3.9654605263157894, + "weighted_total_content_score": 79.21052631578947, + "semantic_relevance": 4.065789473684211, + "factual_accuracy": 4.0394736842105265, + "freshness": 4.631578947368421, + "objectivity_tone": 3.6973684210526314, + "layout_ad_density": 3.210526315789474, + "accountability": 4.0, + "transparency": 4.118421052631579, + "authority": 3.960526315789474, + "avg_ge_freq": 0.4298052631578951, + "relative_se_rank": 1.7644798178150203, + "normalized_reciprocal_se_rank": 0.0996490754594708, + "reciprocal_se_rank": 0.03365353997691167, + "percentage_ge_sources_not_in_se_sources": 71.05263157894738, + "percentage_ge_sources_in_se_sources": 28.947368421052637 + }, + { + "model_name": "claude", + "query_type": "HotpotQA", + "num_sources": 10, + "num_queries": 7, + "num_complete_scores": 10, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 79.15789473684211, + "semantic_relevance": 3.7, + "factual_accuracy": 4.1, + "freshness": 4.1, + "objectivity_tone": 4.6, + "layout_ad_density": 3.6, + "accountability": 3.5, + "transparency": 3.9, + "authority": 3.9, + "avg_ge_freq": 0.8333400000000001, + "relative_se_rank": 1.8379612104849017, + "normalized_reciprocal_se_rank": 0.1327922077922078, + "reciprocal_se_rank": 0.04161754507628294, + "percentage_ge_sources_not_in_se_sources": 70.00000000000001, + "percentage_ge_sources_in_se_sources": 30.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_type": "QuoraQuestions", + "num_sources": 58, + "num_queries": 15, + "num_complete_scores": 57, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 78.82032667876587, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.175438596491228, + "freshness": 4.43859649122807, + "objectivity_tone": 3.6842105263157894, + "layout_ad_density": 3.245614035087719, + "accountability": 3.8947368421052633, + "transparency": 4.157894736842105, + "authority": 4.0701754385964914, + "avg_ge_freq": 0.8563258620689654, + "relative_se_rank": 1.5173397953190046, + "normalized_reciprocal_se_rank": 0.15419400685217452, + "reciprocal_se_rank": 0.04676021038438175, + "percentage_ge_sources_not_in_se_sources": 58.62068965517241, + "percentage_ge_sources_in_se_sources": 41.37931034482759 + }, + { + "model_name": "tavily", + "query_type": "VACOS", + "num_sources": 83, + "num_queries": 20, + "num_complete_scores": 83, + "unweighted_mean_score": 3.9548192771084336, + "weighted_total_content_score": 78.80786303107162, + "semantic_relevance": 3.7590361445783134, + "factual_accuracy": 4.313253012048193, + "freshness": 4.9156626506024095, + "objectivity_tone": 3.5180722891566263, + "layout_ad_density": 2.6144578313253013, + "accountability": 4.0602409638554215, + "transparency": 4.144578313253012, + "authority": 4.313253012048193, + "avg_ge_freq": 0.9397590361445783, + "relative_se_rank": 1.7133062132347119, + "normalized_reciprocal_se_rank": 0.1335959024960005, + "reciprocal_se_rank": 0.04181066589102925, + "percentage_ge_sources_not_in_se_sources": 68.67469879518072, + "percentage_ge_sources_in_se_sources": 31.325301204819276 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_type": "VACOS", + "num_sources": 92, + "num_queries": 20, + "num_complete_scores": 88, + "unweighted_mean_score": 4.015489130434783, + "weighted_total_content_score": 78.72997711670477, + "semantic_relevance": 3.741573033707865, + "factual_accuracy": 4.393258426966292, + "freshness": 4.802197802197802, + "objectivity_tone": 3.853932584269663, + "layout_ad_density": 2.760869565217391, + "accountability": 4.373626373626373, + "transparency": 4.164835164835165, + "authority": 4.087912087912088, + "avg_ge_freq": 0.45288152173913093, + "relative_se_rank": 2.168925621074675, + "normalized_reciprocal_se_rank": 0.032034724656595535, + "reciprocal_se_rank": 0.017406402283987762, + "percentage_ge_sources_not_in_se_sources": 89.1304347826087, + "percentage_ge_sources_in_se_sources": 10.869565217391303 + }, + { + "model_name": "gpt-4o", + "query_type": "HotpotQA", + "num_sources": 19, + "num_queries": 14, + "num_complete_scores": 19, + "unweighted_mean_score": 3.9210526315789473, + "weighted_total_content_score": 78.28254847645428, + "semantic_relevance": 3.4210526315789473, + "factual_accuracy": 3.789473684210526, + "freshness": 4.368421052631579, + "objectivity_tone": 4.421052631578948, + "layout_ad_density": 3.8421052631578947, + "accountability": 3.473684210526316, + "transparency": 3.8947368421052633, + "authority": 4.157894736842105, + "avg_ge_freq": 0.3683947368421053, + "relative_se_rank": 2.1738400706504932, + "normalized_reciprocal_se_rank": 0.08335991493886231, + "reciprocal_se_rank": 0.029739397036280018, + "percentage_ge_sources_not_in_se_sources": 89.47368421052632, + "percentage_ge_sources_in_se_sources": 10.526315789473685 + }, + { + "model_name": "deepseek-chat-tavily", + "query_type": "Pinocchios", + "num_sources": 14, + "num_queries": 4, + "num_complete_scores": 14, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.27067669172932, + "semantic_relevance": 3.642857142857143, + "factual_accuracy": 3.9285714285714284, + "freshness": 3.5714285714285716, + "objectivity_tone": 3.7857142857142856, + "layout_ad_density": 3.9285714285714284, + "accountability": 4.142857142857143, + "transparency": 4.142857142857143, + "authority": 4.357142857142857, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.668905839637547, + "normalized_reciprocal_se_rank": 0.19718958290386862, + "reciprocal_se_rank": 0.057091671620104346, + "percentage_ge_sources_not_in_se_sources": 64.28571428571429, + "percentage_ge_sources_in_se_sources": 35.71428571428571 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_type": "HotpotQA", + "num_sources": 63, + "num_queries": 20, + "num_complete_scores": 63, + "unweighted_mean_score": 3.8968253968253967, + "weighted_total_content_score": 78.01169590643275, + "semantic_relevance": 3.4603174603174605, + "factual_accuracy": 4.111111111111111, + "freshness": 4.158730158730159, + "objectivity_tone": 4.190476190476191, + "layout_ad_density": 3.857142857142857, + "accountability": 3.4285714285714284, + "transparency": 3.9523809523809526, + "authority": 4.015873015873016, + "avg_ge_freq": 0.6137492063492065, + "relative_se_rank": 1.9920341456410042, + "normalized_reciprocal_se_rank": 0.07504523694999884, + "reciprocal_se_rank": 0.027741452568082244, + "percentage_ge_sources_not_in_se_sources": 82.53968253968254, + "percentage_ge_sources_in_se_sources": 17.46031746031746 + }, + { + "model_name": "gensee", + "query_type": "QuoraQuestions", + "num_sources": 83, + "num_queries": 18, + "num_complete_scores": 82, + "unweighted_mean_score": 3.923780487804878, + "weighted_total_content_score": 77.94546607482562, + "semantic_relevance": 4.304878048780488, + "factual_accuracy": 4.182926829268292, + "freshness": 4.390243902439025, + "objectivity_tone": 3.682926829268293, + "layout_ad_density": 3.2560975609756095, + "accountability": 3.7195121951219514, + "transparency": 3.902439024390244, + "authority": 3.951219512195122, + "avg_ge_freq": 0.5542120481927711, + "relative_se_rank": 1.6111832468899239, + "normalized_reciprocal_se_rank": 0.12268532386073742, + "reciprocal_se_rank": 0.03918894918012865, + "percentage_ge_sources_not_in_se_sources": 62.650602409638545, + "percentage_ge_sources_in_se_sources": 37.34939759036144 + }, + { + "model_name": "deepseek-chat-gensee", + "query_type": "HotpotQA", + "num_sources": 11, + "num_queries": 3, + "num_complete_scores": 9, + "unweighted_mean_score": 4.308928571428572, + "weighted_total_content_score": 77.70334928229664, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.4, + "layout_ad_density": 4.666666666666667, + "accountability": 3.3, + "transparency": 4.1, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.629286988225921, + "normalized_reciprocal_se_rank": 0.14128295946477765, + "reciprocal_se_rank": 0.04365779851216744, + "percentage_ge_sources_not_in_se_sources": 81.81818181818181, + "percentage_ge_sources_in_se_sources": 18.181818181818183 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_type": "Pinocchios", + "num_sources": 86, + "num_queries": 20, + "num_complete_scores": 85, + "unweighted_mean_score": 3.9194767441860465, + "weighted_total_content_score": 77.67441860465114, + "semantic_relevance": 3.388235294117647, + "factual_accuracy": 3.9647058823529413, + "freshness": 3.7093023255813953, + "objectivity_tone": 3.9411764705882355, + "layout_ad_density": 3.5813953488372094, + "accountability": 4.325581395348837, + "transparency": 4.3604651162790695, + "authority": 4.174418604651163, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_type": "QuoraQuestions", + "num_sources": 14, + "num_queries": 4, + "num_complete_scores": 13, + "unweighted_mean_score": 4.201923076923077, + "weighted_total_content_score": 77.36842105263158, + "semantic_relevance": 3.769230769230769, + "factual_accuracy": 4.461538461538462, + "freshness": 5.0, + "objectivity_tone": 3.6923076923076925, + "layout_ad_density": 3.6153846153846154, + "accountability": 4.230769230769231, + "transparency": 4.384615384615385, + "authority": 4.461538461538462, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3760488515963876, + "normalized_reciprocal_se_rank": 0.20085172184169675, + "reciprocal_se_rank": 0.05797165160759218, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "google-search", + "query_type": "VACOS", + "num_sources": 74, + "num_queries": 20, + "num_complete_scores": 74, + "unweighted_mean_score": 3.8462837837837838, + "weighted_total_content_score": 77.0554765291607, + "semantic_relevance": 3.7567567567567566, + "factual_accuracy": 4.216216216216216, + "freshness": 4.472972972972973, + "objectivity_tone": 3.689189189189189, + "layout_ad_density": 3.1216216216216215, + "accountability": 3.6216216216216215, + "transparency": 3.7837837837837838, + "authority": 4.108108108108108, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_type": "QuoraQuestions", + "num_sources": 85, + "num_queries": 19, + "num_complete_scores": 85, + "unweighted_mean_score": 3.8705882352941177, + "weighted_total_content_score": 76.9659442724458, + "semantic_relevance": 3.8, + "factual_accuracy": 3.823529411764706, + "freshness": 4.564705882352941, + "objectivity_tone": 3.5647058823529414, + "layout_ad_density": 3.176470588235294, + "accountability": 4.0588235294117645, + "transparency": 4.070588235294117, + "authority": 3.9058823529411764, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.329000189685051, + "normalized_reciprocal_se_rank": 0.1994493307755928, + "reciprocal_se_rank": 0.05763466928830994, + "percentage_ge_sources_not_in_se_sources": 50.588235294117645, + "percentage_ge_sources_in_se_sources": 49.411764705882355 + }, + { + "model_name": "gensee", + "query_type": "HotpotQA", + "num_sources": 45, + "num_queries": 16, + "num_complete_scores": 45, + "unweighted_mean_score": 3.786111111111111, + "weighted_total_content_score": 76.67836257309939, + "semantic_relevance": 3.8444444444444446, + "factual_accuracy": 4.288888888888889, + "freshness": 4.066666666666666, + "objectivity_tone": 4.133333333333334, + "layout_ad_density": 3.6444444444444444, + "accountability": 3.2, + "transparency": 3.466666666666667, + "authority": 3.6444444444444444, + "avg_ge_freq": 0.4888711111111113, + "relative_se_rank": 1.9318191821883404, + "normalized_reciprocal_se_rank": 0.06456158601930041, + "reciprocal_se_rank": 0.025222322854152286, + "percentage_ge_sources_not_in_se_sources": 84.44444444444443, + "percentage_ge_sources_in_se_sources": 15.555555555555555 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_type": "Pinocchios", + "num_sources": 16, + "num_queries": 4, + "num_complete_scores": 15, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 76.57894736842105, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 4.133333333333334, + "freshness": 3.8, + "objectivity_tone": 3.8, + "layout_ad_density": 4.266666666666667, + "accountability": 4.333333333333333, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.283513696623452, + "normalized_reciprocal_se_rank": 0.0625, + "reciprocal_se_rank": 0.02472694174757281, + "percentage_ge_sources_not_in_se_sources": 93.75, + "percentage_ge_sources_in_se_sources": 6.25 + }, + { + "model_name": "google-search", + "query_type": "QuoraQuestions", + "num_sources": 80, + "num_queries": 19, + "num_complete_scores": 80, + "unweighted_mean_score": 3.809375, + "weighted_total_content_score": 76.22368421052633, + "semantic_relevance": 4.05, + "factual_accuracy": 3.975, + "freshness": 4.275, + "objectivity_tone": 3.4375, + "layout_ad_density": 3.3875, + "accountability": 3.6375, + "transparency": 3.9125, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_type": "VACOS", + "num_sources": 15, + "num_queries": 4, + "num_complete_scores": 14, + "unweighted_mean_score": 4.080357142857143, + "weighted_total_content_score": 76.07017543859648, + "semantic_relevance": 4.142857142857143, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5714285714285716, + "accountability": 4.0, + "transparency": 4.285714285714286, + "authority": 4.642857142857143, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1803071364046978, + "normalized_reciprocal_se_rank": 0.052794612794612804, + "reciprocal_se_rank": 0.022394822006472487, + "percentage_ge_sources_not_in_se_sources": 93.33333333333333, + "percentage_ge_sources_in_se_sources": 6.666666666666666 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_type": "QuoraQuestions", + "num_sources": 82, + "num_queries": 19, + "num_complete_scores": 82, + "unweighted_mean_score": 3.8185975609756095, + "weighted_total_content_score": 75.69961489088577, + "semantic_relevance": 3.6219512195121952, + "factual_accuracy": 3.8292682926829267, + "freshness": 4.512195121951219, + "objectivity_tone": 3.3658536585365852, + "layout_ad_density": 3.317073170731707, + "accountability": 3.7195121951219514, + "transparency": 4.195121951219512, + "authority": 3.9878048780487805, + "avg_ge_freq": 0.8252060975609754, + "relative_se_rank": 1.3424344412739029, + "normalized_reciprocal_se_rank": 0.2024394067077523, + "reciprocal_se_rank": 0.05835315840793079, + "percentage_ge_sources_not_in_se_sources": 52.4390243902439, + "percentage_ge_sources_in_se_sources": 47.5609756097561 + }, + { + "model_name": "google-search", + "query_type": "HotpotQA", + "num_sources": 70, + "num_queries": 20, + "num_complete_scores": 69, + "unweighted_mean_score": 3.812244897959184, + "weighted_total_content_score": 75.33834586466166, + "semantic_relevance": 2.9285714285714284, + "factual_accuracy": 3.7857142857142856, + "freshness": 4.057142857142857, + "objectivity_tone": 3.9714285714285715, + "layout_ad_density": 4.072463768115942, + "accountability": 3.6714285714285713, + "transparency": 3.8857142857142857, + "authority": 4.128571428571429, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-chat-gensee", + "query_type": "VACOS", + "num_sources": 18, + "num_queries": 4, + "num_complete_scores": 16, + "unweighted_mean_score": 4.110294117647059, + "weighted_total_content_score": 74.85380116959064, + "semantic_relevance": 4.3125, + "factual_accuracy": 4.25, + "freshness": 4.882352941176471, + "objectivity_tone": 3.8125, + "layout_ad_density": 3.1875, + "accountability": 3.6470588235294117, + "transparency": 4.235294117647059, + "authority": 4.411764705882353, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.211382113821138, + "normalized_reciprocal_se_rank": 0.043995510662177335, + "reciprocal_se_rank": 0.020280474649406684, + "percentage_ge_sources_not_in_se_sources": 94.44444444444444, + "percentage_ge_sources_in_se_sources": 5.555555555555555 + }, + { + "model_name": "deepseek-chat-tavily", + "query_type": "HotpotQA", + "num_sources": 9, + "num_queries": 3, + "num_complete_scores": 8, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 74.85380116959064, + "semantic_relevance": 3.375, + "factual_accuracy": 4.75, + "freshness": 4.625, + "objectivity_tone": 4.375, + "layout_ad_density": 4.5, + "accountability": 3.125, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.740759746838774, + "normalized_reciprocal_se_rank": 0.17267917267917265, + "reciprocal_se_rank": 0.05120203421174294, + "percentage_ge_sources_not_in_se_sources": 77.77777777777777, + "percentage_ge_sources_in_se_sources": 22.22222222222222 + }, + { + "model_name": "deepseek-chat-gensee", + "query_type": "QuoraQuestions", + "num_sources": 17, + "num_queries": 4, + "num_complete_scores": 16, + "unweighted_mean_score": 3.8676470588235294, + "weighted_total_content_score": 74.61300309597522, + "semantic_relevance": 3.5294117647058822, + "factual_accuracy": 4.117647058823529, + "freshness": 4.625, + "objectivity_tone": 3.4705882352941178, + "layout_ad_density": 3.235294117647059, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.875, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.810108670223448, + "normalized_reciprocal_se_rank": 0.13864781252324507, + "reciprocal_se_rank": 0.043024595727672955, + "percentage_ge_sources_not_in_se_sources": 70.58823529411765, + "percentage_ge_sources_in_se_sources": 29.41176470588235 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_type": "QuoraQuestions", + "num_sources": 83, + "num_queries": 18, + "num_complete_scores": 81, + "unweighted_mean_score": 3.789457831325301, + "weighted_total_content_score": 74.40710209258081, + "semantic_relevance": 3.5853658536585367, + "factual_accuracy": 3.768292682926829, + "freshness": 4.463414634146342, + "objectivity_tone": 3.4878048780487805, + "layout_ad_density": 3.036144578313253, + "accountability": 3.975609756097561, + "transparency": 4.109756097560975, + "authority": 3.8902439024390243, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_type": "QuoraQuestions", + "num_sources": 59, + "num_queries": 18, + "num_complete_scores": 58, + "unweighted_mean_score": 3.8017241379310347, + "weighted_total_content_score": 74.39785905441569, + "semantic_relevance": 3.9655172413793105, + "factual_accuracy": 3.793103448275862, + "freshness": 4.517241379310345, + "objectivity_tone": 3.310344827586207, + "layout_ad_density": 3.1206896551724137, + "accountability": 3.8793103448275863, + "transparency": 4.0344827586206895, + "authority": 3.793103448275862, + "avg_ge_freq": 0.8079084745762712, + "relative_se_rank": 1.2724077582318005, + "normalized_reciprocal_se_rank": 0.18350554762304847, + "reciprocal_se_rank": 0.0538035175113636, + "percentage_ge_sources_not_in_se_sources": 49.15254237288135, + "percentage_ge_sources_in_se_sources": 50.84745762711865 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_type": "HotpotQA", + "num_sources": 8, + "num_queries": 3, + "num_complete_scores": 7, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 74.34210526315789, + "semantic_relevance": 3.4285714285714284, + "factual_accuracy": 4.714285714285714, + "freshness": 5.0, + "objectivity_tone": 4.571428571428571, + "layout_ad_density": 4.714285714285714, + "accountability": 3.142857142857143, + "transparency": 3.857142857142857, + "authority": 4.571428571428571, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.8173972683851103, + "normalized_reciprocal_se_rank": 0.19426406926406925, + "reciprocal_se_rank": 0.056388696255201105, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_type": "QuoraQuestions", + "num_sources": 90, + "num_queries": 19, + "num_complete_scores": 86, + "unweighted_mean_score": 3.828611111111111, + "weighted_total_content_score": 74.25730994152043, + "semantic_relevance": 3.558139534883721, + "factual_accuracy": 3.7790697674418605, + "freshness": 4.822222222222222, + "objectivity_tone": 3.5813953488372094, + "layout_ad_density": 3.1797752808988764, + "accountability": 3.911111111111111, + "transparency": 3.9444444444444446, + "authority": 3.8, + "avg_ge_freq": 0.49258000000000024, + "relative_se_rank": 1.7659524786645306, + "normalized_reciprocal_se_rank": 0.14047144175948556, + "reciprocal_se_rank": 0.04346279789851716, + "percentage_ge_sources_not_in_se_sources": 72.22222222222223, + "percentage_ge_sources_in_se_sources": 27.77777777777778 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_type": "HotpotQA", + "num_sources": 84, + "num_queries": 20, + "num_complete_scores": 84, + "unweighted_mean_score": 3.7202380952380953, + "weighted_total_content_score": 73.859649122807, + "semantic_relevance": 2.6785714285714284, + "factual_accuracy": 3.7738095238095237, + "freshness": 4.535714285714286, + "objectivity_tone": 4.190476190476191, + "layout_ad_density": 3.511904761904762, + "accountability": 3.5238095238095237, + "transparency": 3.761904761904762, + "authority": 3.7857142857142856, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_type": "QuoraQuestions", + "num_sources": 78, + "num_queries": 19, + "num_complete_scores": 78, + "unweighted_mean_score": 3.7115384615384617, + "weighted_total_content_score": 73.6707152496626, + "semantic_relevance": 3.5128205128205128, + "factual_accuracy": 3.769230769230769, + "freshness": 4.576923076923077, + "objectivity_tone": 3.3205128205128207, + "layout_ad_density": 3.051282051282051, + "accountability": 3.8846153846153846, + "transparency": 3.7564102564102564, + "authority": 3.8205128205128207, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9222314853068678, + "normalized_reciprocal_se_rank": 0.31851954838074426, + "reciprocal_se_rank": 0.08624620215945074, + "percentage_ge_sources_not_in_se_sources": 32.05128205128205, + "percentage_ge_sources_in_se_sources": 67.94871794871796 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_type": "QuoraQuestions", + "num_sources": 17, + "num_queries": 4, + "num_complete_scores": 16, + "unweighted_mean_score": 3.9296875, + "weighted_total_content_score": 73.49845201238391, + "semantic_relevance": 3.4375, + "factual_accuracy": 4.1875, + "freshness": 4.75, + "objectivity_tone": 3.6875, + "layout_ad_density": 3.25, + "accountability": 4.0625, + "transparency": 4.0, + "authority": 4.0625, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.5443321452749619, + "normalized_reciprocal_se_rank": 0.16442527055248699, + "reciprocal_se_rank": 0.049218693652175266, + "percentage_ge_sources_not_in_se_sources": 58.8235294117647, + "percentage_ge_sources_in_se_sources": 41.1764705882353 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_type": "HotpotQA", + "num_sources": 83, + "num_queries": 20, + "num_complete_scores": 77, + "unweighted_mean_score": 3.71617900172117, + "weighted_total_content_score": 72.63157894736844, + "semantic_relevance": 2.9753086419753085, + "factual_accuracy": 3.765432098765432, + "freshness": 4.719512195121951, + "objectivity_tone": 4.135802469135802, + "layout_ad_density": 3.5, + "accountability": 3.451219512195122, + "transparency": 3.6951219512195124, + "authority": 3.524390243902439, + "avg_ge_freq": 0.57830843373494, + "relative_se_rank": 1.9786482310103346, + "normalized_reciprocal_se_rank": 0.056782624848369385, + "reciprocal_se_rank": 0.02335310645628293, + "percentage_ge_sources_not_in_se_sources": 83.13253012048195, + "percentage_ge_sources_in_se_sources": 16.86746987951807 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_type": "QuoraQuestions", + "num_sources": 81, + "num_queries": 17, + "num_complete_scores": 76, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 72.37166991552957, + "semantic_relevance": 3.371794871794872, + "factual_accuracy": 3.7948717948717947, + "freshness": 4.9113924050632916, + "objectivity_tone": 3.5641025641025643, + "layout_ad_density": 3.0987654320987654, + "accountability": 3.7974683544303796, + "transparency": 3.8227848101265822, + "authority": 3.6455696202531644, + "avg_ge_freq": 0.5061604938271607, + "relative_se_rank": 1.7841348598268654, + "normalized_reciprocal_se_rank": 0.11912889330801175, + "reciprocal_se_rank": 0.038334369993915436, + "percentage_ge_sources_not_in_se_sources": 71.60493827160494, + "percentage_ge_sources_in_se_sources": 28.395061728395063 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_type": "HotpotQA", + "num_sources": 85, + "num_queries": 20, + "num_complete_scores": 81, + "unweighted_mean_score": 3.6874369747899154, + "weighted_total_content_score": 72.28482972136224, + "semantic_relevance": 2.9146341463414633, + "factual_accuracy": 3.8902439024390243, + "freshness": 4.211764705882353, + "objectivity_tone": 4.2317073170731705, + "layout_ad_density": 3.5714285714285716, + "accountability": 3.411764705882353, + "transparency": 3.6588235294117646, + "authority": 3.552941176470588, + "avg_ge_freq": 0.5999917647058826, + "relative_se_rank": 1.9004372674862968, + "normalized_reciprocal_se_rank": 0.06427878374204764, + "reciprocal_se_rank": 0.0251543679380163, + "percentage_ge_sources_not_in_se_sources": 78.82352941176471, + "percentage_ge_sources_in_se_sources": 21.176470588235293 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_type": "HotpotQA", + "num_sources": 79, + "num_queries": 20, + "num_complete_scores": 78, + "unweighted_mean_score": 3.590641952983725, + "weighted_total_content_score": 71.13924050632909, + "semantic_relevance": 2.949367088607595, + "factual_accuracy": 3.5949367088607596, + "freshness": 4.075949367088608, + "objectivity_tone": 3.670886075949367, + "layout_ad_density": 3.4871794871794872, + "accountability": 3.3164556962025316, + "transparency": 3.7848101265822787, + "authority": 3.848101265822785, + "avg_ge_freq": 0.8143506329113921, + "relative_se_rank": 1.61782985019127, + "normalized_reciprocal_se_rank": 0.13063939371395492, + "reciprocal_se_rank": 0.04110024266427558, + "percentage_ge_sources_not_in_se_sources": 63.29113924050633, + "percentage_ge_sources_in_se_sources": 36.70886075949367 + }, + { + "model_name": "exa", + "query_type": "HotpotQA", + "num_sources": 83, + "num_queries": 20, + "num_complete_scores": 83, + "unweighted_mean_score": 3.572289156626506, + "weighted_total_content_score": 70.83069118579579, + "semantic_relevance": 2.4939759036144578, + "factual_accuracy": 3.4578313253012047, + "freshness": 4.156626506024097, + "objectivity_tone": 4.180722891566265, + "layout_ad_density": 3.36144578313253, + "accountability": 3.3855421686746987, + "transparency": 3.783132530120482, + "authority": 3.7590361445783134, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.371670293189712, + "normalized_reciprocal_se_rank": 0.22448376867351463, + "reciprocal_se_rank": 0.06365022596766494, + "percentage_ge_sources_not_in_se_sources": 54.21686746987952, + "percentage_ge_sources_in_se_sources": 45.78313253012048 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_type": "HotpotQA", + "num_sources": 6, + "num_queries": 3, + "num_complete_scores": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 70.52631578947368, + "semantic_relevance": 3.2, + "factual_accuracy": 4.8, + "freshness": 4.4, + "objectivity_tone": 4.4, + "layout_ad_density": 4.8, + "accountability": 2.8, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.241378176028632, + "normalized_reciprocal_se_rank": 0.259018759018759, + "reciprocal_se_rank": 0.07194868238557559, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.333333333333336 + }, + { + "model_name": "deepseek-chat-tavily", + "query_type": "QuoraQuestions", + "num_sources": 19, + "num_queries": 4, + "num_complete_scores": 17, + "unweighted_mean_score": 3.9338235294117645, + "weighted_total_content_score": 69.58448753462604, + "semantic_relevance": 3.411764705882353, + "factual_accuracy": 4.0, + "freshness": 4.764705882352941, + "objectivity_tone": 3.5294117647058822, + "layout_ad_density": 3.411764705882353, + "accountability": 4.117647058823529, + "transparency": 4.117647058823529, + "authority": 4.117647058823529, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2778629209492955, + "normalized_reciprocal_se_rank": 0.2256203749740223, + "reciprocal_se_rank": 0.06392334253016554, + "percentage_ge_sources_not_in_se_sources": 47.36842105263158, + "percentage_ge_sources_in_se_sources": 52.63157894736842 + }, + { + "model_name": "tavily", + "query_type": "HotpotQA", + "num_sources": 77, + "num_queries": 18, + "num_complete_scores": 73, + "unweighted_mean_score": 3.5633333333333335, + "weighted_total_content_score": 68.33902939166094, + "semantic_relevance": 2.635135135135135, + "factual_accuracy": 3.5675675675675675, + "freshness": 4.162162162162162, + "objectivity_tone": 4.108108108108108, + "layout_ad_density": 3.472972972972973, + "accountability": 3.4054054054054053, + "transparency": 3.5, + "authority": 3.77027027027027, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.150415973913549, + "normalized_reciprocal_se_rank": 0.27249775517340985, + "reciprocal_se_rank": 0.07518756738390195, + "percentage_ge_sources_not_in_se_sources": 41.55844155844156, + "percentage_ge_sources_in_se_sources": 58.44155844155844 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_type": "VACOS", + "num_sources": 20, + "num_queries": 4, + "num_complete_scores": 16, + "unweighted_mean_score": 4.051470588235294, + "weighted_total_content_score": 67.57894736842104, + "semantic_relevance": 4.411764705882353, + "factual_accuracy": 4.411764705882353, + "freshness": 4.625, + "objectivity_tone": 3.8823529411764706, + "layout_ad_density": 2.8823529411764706, + "accountability": 3.9375, + "transparency": 3.9375, + "authority": 4.3125, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1174661246612465, + "normalized_reciprocal_se_rank": 0.0791919191919192, + "reciprocal_se_rank": 0.0287378640776699, + "percentage_ge_sources_not_in_se_sources": 90.0, + "percentage_ge_sources_in_se_sources": 10.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_type": "VACOS", + "num_sources": 13, + "num_queries": 4, + "num_complete_scores": 10, + "unweighted_mean_score": 3.525, + "weighted_total_content_score": 54.17004048582995, + "semantic_relevance": 3.5, + "factual_accuracy": 3.5, + "freshness": 4.6, + "objectivity_tone": 3.5, + "layout_ad_density": 2.1, + "accountability": 3.2, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1902022097144047, + "normalized_reciprocal_se_rank": 0.060916860916860924, + "reciprocal_se_rank": 0.024346527259148616, + "percentage_ge_sources_not_in_se_sources": 92.3076923076923, + "percentage_ge_sources_in_se_sources": 7.692307692307692 + } + ], + "queries": [ + { + "model_name": "claude", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 5.0, + "weighted_total_content_score": 100.0, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.66665, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 1, + "unweighted_mean_score": 5.0, + "weighted_total_content_score": 100.0, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.2127659574468085, + "normalized_reciprocal_se_rank": 0.27972027972027974, + "reciprocal_se_rank": 0.07692307692307693, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 5.0, + "weighted_total_content_score": 100.0, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.66665, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 5.0, + "weighted_total_content_score": 100.0, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.8332999999999999, + "relative_se_rank": 0.031914893617021274, + "normalized_reciprocal_se_rank": 0.8959595959595961, + "reciprocal_se_rank": 0.225, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.95, + "weighted_total_content_score": 99.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.6, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.9333200000000001, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.95, + "weighted_total_content_score": 99.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.8, + "accountability": 4.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.8666600000000001, + "relative_se_rank": 1.3041666666666667, + "normalized_reciprocal_se_rank": 0.1721019721019721, + "reciprocal_se_rank": 0.05106333795654183, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "tavily", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.9375, + "weighted_total_content_score": 98.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.75, + "accountability": 5.0, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5744680851063829, + "normalized_reciprocal_se_rank": 0.567929292929293, + "reciprocal_se_rank": 0.1461771844660194, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gpt-5", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.916666666666667, + "weighted_total_content_score": 98.59649122807018, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.666666666666667, + "accountability": 4.666666666666667, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.925, + "weighted_total_content_score": 98.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.6, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.9333199999999999, + "relative_se_rank": 1.5391304347826087, + "normalized_reciprocal_se_rank": 0.040330910919146215, + "reciprocal_se_rank": 0.019399903351930765, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.333333333333333, + "accountability": 4.666666666666667, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.8889, + "relative_se_rank": 0.8936170212765958, + "normalized_reciprocal_se_rank": 0.3694083694083694, + "reciprocal_se_rank": 0.09847434119278779, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8414634146341463, + "normalized_reciprocal_se_rank": 0.46525784157363104, + "reciprocal_se_rank": 0.12150613183444048, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "exa", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.0851063829787233, + "normalized_reciprocal_se_rank": 0.395959595959596, + "reciprocal_se_rank": 0.10485436893203884, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-4o", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.5, + "accountability": 4.75, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 1.1329787234042552, + "normalized_reciprocal_se_rank": 0.23322973322973323, + "reciprocal_se_rank": 0.06575180482947474, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "tavily", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.5, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06097560975609756, + "normalized_reciprocal_se_rank": 0.7225589225589226, + "reciprocal_se_rank": 0.18333333333333335, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.875, + "weighted_total_content_score": 97.36842105263158, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.85, + "weighted_total_content_score": 97.05263157894737, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 4.6, + "objectivity_tone": 5.0, + "layout_ad_density": 4.8, + "accountability": 5.0, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.85, + "weighted_total_content_score": 97.05263157894736, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.6, + "accountability": 4.8, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 0.93334, + "relative_se_rank": 0.9083333333333334, + "normalized_reciprocal_se_rank": 0.26806156806156806, + "reciprocal_se_rank": 0.0741215903837263, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.825, + "weighted_total_content_score": 96.84210526315789, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 4.8, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.3041666666666667, + "normalized_reciprocal_se_rank": 0.1721019721019721, + "reciprocal_se_rank": 0.05106333795654183, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "claude", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.833333333333333, + "weighted_total_content_score": 96.84210526315788, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.333333333333333, + "accountability": 4.666666666666667, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.8889, + "relative_se_rank": 0.8222222222222223, + "normalized_reciprocal_se_rank": 0.3661054994388328, + "reciprocal_se_rank": 0.097680690399137, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "exa", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.825, + "weighted_total_content_score": 96.63157894736841, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.2, + "accountability": 4.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06666666666666667, + "normalized_reciprocal_se_rank": 0.6958056758056758, + "reciprocal_se_rank": 0.1769047619047619, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.8, + "weighted_total_content_score": 96.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.2, + "accountability": 4.8, + "transparency": 5.0, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.10833333333333335, + "normalized_reciprocal_se_rank": 0.5108449575116241, + "reciprocal_se_rank": 0.13246031746031744, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.8125, + "weighted_total_content_score": 96.57894736842105, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.75, + "avg_ge_freq": 0.916675, + "relative_se_rank": 0.9085365853658537, + "normalized_reciprocal_se_rank": 0.1628658283716935, + "reciprocal_se_rank": 0.0488439733223244, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "exa", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.84375, + "weighted_total_content_score": 96.57894736842104, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.8125, + "weighted_total_content_score": 96.57894736842104, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 3.75, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.8, + "weighted_total_content_score": 96.42105263157895, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.4, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5191489361702127, + "normalized_reciprocal_se_rank": 0.39336589336589334, + "reciprocal_se_rank": 0.10423112486219281, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.8, + "weighted_total_content_score": 96.42105263157893, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.6, + "objectivity_tone": 4.8, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.93334, + "relative_se_rank": 0.9702127659574469, + "normalized_reciprocal_se_rank": 0.38002886002886005, + "reciprocal_se_rank": 0.10102635228848822, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "claude", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.8, + "weighted_total_content_score": 96.21052631578948, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 4.8, + "objectivity_tone": 5.0, + "layout_ad_density": 4.6, + "accountability": 4.4, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.86668, + "relative_se_rank": 0.14666666666666667, + "normalized_reciprocal_se_rank": 0.5259164746884045, + "reciprocal_se_rank": 0.13608187134502925, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.825, + "weighted_total_content_score": 96.21052631578947, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.4, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.9951219512195124, + "normalized_reciprocal_se_rank": 0.06127946127946128, + "reciprocal_se_rank": 0.0244336569579288, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.023809523809523808, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.02702702702702703, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 5.0, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.127659574468085, + "normalized_reciprocal_se_rank": 0.21099887766554432, + "reciprocal_se_rank": 0.060409924487594385, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "exa", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.775, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 4.6, + "layout_ad_density": 4.6, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.08936170212765956, + "normalized_reciprocal_se_rank": 0.6264454064454065, + "reciprocal_se_rank": 0.16023809523809524, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 0.03333333333333333, + "normalized_reciprocal_se_rank": 0.8959595959595961, + "reciprocal_se_rank": 0.225, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.775, + "weighted_total_content_score": 95.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.8, + "objectivity_tone": 4.4, + "layout_ad_density": 4.8, + "accountability": 4.2, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.813953488372093, + "normalized_reciprocal_se_rank": 0.23975511209657557, + "reciprocal_se_rank": 0.06731979635330335, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "google-search", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.775, + "weighted_total_content_score": 95.36842105263159, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.6, + "accountability": 4.4, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.775, + "weighted_total_content_score": 95.36842105263158, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5022222222222222, + "normalized_reciprocal_se_rank": 0.553605900272567, + "reciprocal_se_rank": 0.14273539836646632, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "exa", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.26315789473685, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.75, + "accountability": 4.25, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0548780487804876, + "normalized_reciprocal_se_rank": 0.14183654729109277, + "reciprocal_se_rank": 0.04379082082965578, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 4.5, + "layout_ad_density": 5.0, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.23255813953488375, + "normalized_reciprocal_se_rank": 0.2816257816257816, + "reciprocal_se_rank": 0.07738095238095238, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 95.26315789473684, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 4.25, + "objectivity_tone": 4.75, + "layout_ad_density": 4.5, + "accountability": 4.75, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5833499999999999, + "relative_se_rank": 0.6951219512195121, + "normalized_reciprocal_se_rank": 0.13046231546231546, + "reciprocal_se_rank": 0.04105769230769231, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.725, + "weighted_total_content_score": 95.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.6, + "objectivity_tone": 4.8, + "layout_ad_density": 4.4, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 0.6936170212765957, + "normalized_reciprocal_se_rank": 0.28368377734459604, + "reciprocal_se_rank": 0.07787547076969661, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.725, + "weighted_total_content_score": 95.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.8, + "objectivity_tone": 4.8, + "layout_ad_density": 5.0, + "accountability": 4.2, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.231111111111111, + "normalized_reciprocal_se_rank": 0.22700758374382662, + "reciprocal_se_rank": 0.06425667667630786, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-5", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.775, + "weighted_total_content_score": 95.15789473684211, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.73334, + "relative_se_rank": 1.8744186046511628, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-5", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.8, + "weighted_total_content_score": 95.15789473684211, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 4.8, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.73332, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.708333333333333, + "weighted_total_content_score": 95.0877192982456, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.6666666666666665, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7629629629629631, + "normalized_reciprocal_se_rank": 0.5973063973063973, + "reciprocal_se_rank": 0.15323624595469257, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.775, + "weighted_total_content_score": 94.94736842105263, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.6, + "accountability": 4.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-5", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.775, + "weighted_total_content_score": 94.73684210526315, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.6, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.6666599999999998, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 94.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.5, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.7, + "weighted_total_content_score": 94.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.8, + "objectivity_tone": 4.8, + "layout_ad_density": 4.6, + "accountability": 4.6, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 0.86668, + "relative_se_rank": 0.9125, + "normalized_reciprocal_se_rank": 0.25650152316818986, + "reciprocal_se_rank": 0.07134381260594852, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "exa", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 94.52631578947368, + "semantic_relevance": 3.8, + "factual_accuracy": 5.0, + "freshness": 4.6, + "objectivity_tone": 5.0, + "layout_ad_density": 4.6, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3466666666666667, + "normalized_reciprocal_se_rank": 0.3583838383838384, + "reciprocal_se_rank": 0.0958252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.675, + "weighted_total_content_score": 94.52631578947367, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 5.0, + "layout_ad_density": 3.8, + "accountability": 5.0, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.7106382978723402, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.675, + "weighted_total_content_score": 94.52631578947367, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.2, + "objectivity_tone": 5.0, + "layout_ad_density": 4.2, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8622222222222224, + "normalized_reciprocal_se_rank": 0.029752066115702486, + "reciprocal_se_rank": 0.016857899382171224, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-5", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.725, + "weighted_total_content_score": 94.52631578947367, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.4, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.86668, + "relative_se_rank": 1.422222222222222, + "normalized_reciprocal_se_rank": 0.13411896745230079, + "reciprocal_se_rank": 0.04193635382955771, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.725, + "weighted_total_content_score": 94.52631578947367, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 4.2, + "layout_ad_density": 4.2, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6829268292682926, + "normalized_reciprocal_se_rank": 0.07454027454027454, + "reciprocal_se_rank": 0.027620114513318396, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.7, + "weighted_total_content_score": 94.3157894736842, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 4.8, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 0.9957446808510637, + "normalized_reciprocal_se_rank": 0.22842712842712842, + "reciprocal_se_rank": 0.06459778085991677, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-5", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.7, + "weighted_total_content_score": 94.3157894736842, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 4.2, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.00002, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.71875, + "weighted_total_content_score": 94.21052631578948, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 4.75, + "objectivity_tone": 4.25, + "layout_ad_density": 4.25, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6976744186046512, + "normalized_reciprocal_se_rank": 0.39652939652939656, + "reciprocal_se_rank": 0.10499128703012198, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "claude", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.65625, + "weighted_total_content_score": 94.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.25, + "avg_ge_freq": 0.583325, + "relative_se_rank": 1.6063829787234043, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-5", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.8125, + "weighted_total_content_score": 94.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.75, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.75, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.750025, + "relative_se_rank": 1.4388888888888889, + "normalized_reciprocal_se_rank": 0.048523856450685715, + "reciprocal_se_rank": 0.02136859657431526, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-5", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 94.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 4.0, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 94.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.5, + "relative_se_rank": 0.03125, + "normalized_reciprocal_se_rank": 0.8959595959595961, + "reciprocal_se_rank": 0.225, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "tavily", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 94.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06097560975609756, + "normalized_reciprocal_se_rank": 0.777056277056277, + "reciprocal_se_rank": 0.19642857142857142, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 94.10526315789474, + "semantic_relevance": 3.4, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.6, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.902325581395349, + "normalized_reciprocal_se_rank": 0.06127946127946128, + "reciprocal_se_rank": 0.0244336569579288, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-5", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.725, + "weighted_total_content_score": 93.6842105263158, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 4.6, + "accountability": 4.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.65, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.0583333333333333, + "normalized_reciprocal_se_rank": 0.18293760111941929, + "reciprocal_se_rank": 0.053667044929180854, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "claude", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 1, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.20454545454545456, + "normalized_reciprocal_se_rank": 0.3063973063973064, + "reciprocal_se_rank": 0.08333333333333333, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 4.75, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 4.25, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.08333333333333334, + "normalized_reciprocal_se_rank": 0.6457671957671958, + "reciprocal_se_rank": 0.16488095238095238, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 0.03125, + "normalized_reciprocal_se_rank": 0.8959595959595961, + "reciprocal_se_rank": 0.225, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "google-search", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.675, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 4.8, + "objectivity_tone": 4.6, + "layout_ad_density": 4.2, + "accountability": 4.2, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-5", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.3297872340425532, + "normalized_reciprocal_se_rank": 0.05411255411255411, + "reciprocal_se_rank": 0.02271151178918169, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.65625, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.75, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.750025, + "relative_se_rank": 1.9878048780487805, + "normalized_reciprocal_se_rank": 0.025774991292232673, + "reciprocal_se_rank": 0.015902243053230663, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "tavily", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 1, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 93.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.23255813953488372, + "normalized_reciprocal_se_rank": 0.27972027972027974, + "reciprocal_se_rank": 0.07692307692307693, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.65, + "weighted_total_content_score": 93.47368421052632, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 4.6, + "objectivity_tone": 4.8, + "layout_ad_density": 4.6, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.675, + "weighted_total_content_score": 93.47368421052632, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 0.9333333333333332, + "normalized_reciprocal_se_rank": 0.4335353535353536, + "reciprocal_se_rank": 0.11388349514563108, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "google-search", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.65, + "weighted_total_content_score": 93.47368421052632, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 3.6, + "objectivity_tone": 5.0, + "layout_ad_density": 4.6, + "accountability": 5.0, + "transparency": 4.6, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.675, + "weighted_total_content_score": 93.4736842105263, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 4.4, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.3822222222222222, + "normalized_reciprocal_se_rank": 0.25594405594405595, + "reciprocal_se_rank": 0.07120985810306199, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-4o", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 93.33333333333333, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4166666666666667, + "normalized_reciprocal_se_rank": 0.1847041847041847, + "reciprocal_se_rank": 0.05409153952843273, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gpt-5", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.65, + "weighted_total_content_score": 93.26315789473685, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 5.0, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 0.73332, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.65625, + "weighted_total_content_score": 93.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 4.75, + "objectivity_tone": 4.25, + "layout_ad_density": 4.25, + "accountability": 4.5, + "transparency": 5.0, + "authority": 4.75, + "avg_ge_freq": 0.833325, + "relative_se_rank": 0.38372093023255816, + "normalized_reciprocal_se_rank": 0.41673934466617396, + "reciprocal_se_rank": 0.10984756097560976, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 93.15789473684211, + "semantic_relevance": 3.5, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 93.1578947368421, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.14444444444444443, + "normalized_reciprocal_se_rank": 0.5243867243867244, + "reciprocal_se_rank": 0.13571428571428573, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.65625, + "weighted_total_content_score": 93.1578947368421, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 4.25, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.49999999999999994, + "relative_se_rank": 1.7159090909090913, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-5", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.708333333333333, + "weighted_total_content_score": 92.98245614035089, + "semantic_relevance": 3.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.666666666666667, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.666666666666667, + "weighted_total_content_score": 92.98245614035086, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.0, + "accountability": 4.666666666666667, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 0.2681159420289855, + "normalized_reciprocal_se_rank": 0.31783031783031784, + "reciprocal_se_rank": 0.08608058608058607, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "tavily", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.98245614035086, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.09629629629629628, + "normalized_reciprocal_se_rank": 0.6905464905464905, + "reciprocal_se_rank": 0.17564102564102566, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.84210526315789, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 4.8, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.80002, + "relative_se_rank": 1.3659574468085107, + "normalized_reciprocal_se_rank": 0.10731490731490731, + "reciprocal_se_rank": 0.03549557238877627, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.6, + "weighted_total_content_score": 92.84210526315789, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 3.8, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.59998, + "relative_se_rank": 1.3555555555555556, + "normalized_reciprocal_se_rank": 0.3108225108225108, + "reciprocal_se_rank": 0.08439667128987517, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.675, + "weighted_total_content_score": 92.63157894736841, + "semantic_relevance": 3.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.4, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 92.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.63157894736841, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 5.0, + "layout_ad_density": 3.4, + "accountability": 5.0, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7148936170212763, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 92.63157894736841, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 4.8, + "objectivity_tone": 5.0, + "layout_ad_density": 4.8, + "accountability": 3.8, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.3414634146341464, + "normalized_reciprocal_se_rank": 0.11282898919262556, + "reciprocal_se_rank": 0.03682055808269401, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-5", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": 0.93334, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.125, + "normalized_reciprocal_se_rank": 0.16896235078053262, + "reciprocal_se_rank": 0.05030891438658429, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06666666666666667, + "normalized_reciprocal_se_rank": 0.6531986531986531, + "reciprocal_se_rank": 0.16666666666666666, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.6315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 5.0, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7777777777777778, + "normalized_reciprocal_se_rank": 0.4817059483726151, + "reciprocal_se_rank": 0.12545846817691478, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 92.42105263157895, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 3.8, + "objectivity_tone": 4.8, + "layout_ad_density": 4.4, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3208333333333335, + "normalized_reciprocal_se_rank": 0.19654320987654322, + "reciprocal_se_rank": 0.05693635382955771, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.28070175438596, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 4.666666666666667, + "objectivity_tone": 4.0, + "layout_ad_density": 4.666666666666667, + "accountability": 5.0, + "transparency": 4.333333333333333, + "authority": 4.666666666666667, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 1.5555555555555556, + "normalized_reciprocal_se_rank": 0.09324009324009325, + "reciprocal_se_rank": 0.03211351755041075, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gensee", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.541666666666667, + "weighted_total_content_score": 92.28070175438596, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.6666666666666665, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": 0.4444333333333333, + "relative_se_rank": 1.3252032520325203, + "normalized_reciprocal_se_rank": 0.05557877626843144, + "reciprocal_se_rank": 0.02306383216158911, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.6, + "weighted_total_content_score": 92.21052631578947, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 3.8, + "objectivity_tone": 4.8, + "layout_ad_density": 4.4, + "accountability": 4.8, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.65, + "weighted_total_content_score": 92.21052631578947, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 4.8, + "accountability": 4.6, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9608695652173912, + "normalized_reciprocal_se_rank": 0.3149923477196205, + "reciprocal_se_rank": 0.08539864666078259, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "google-search", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.6, + "weighted_total_content_score": 92.21052631578947, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 4.6, + "layout_ad_density": 4.6, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.65, + "weighted_total_content_score": 92.21052631578947, + "semantic_relevance": 3.4, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.59998, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 92.10526315789474, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 92.10526315789474, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 0.03333333333333333, + "normalized_reciprocal_se_rank": 0.8959595959595961, + "reciprocal_se_rank": 0.225, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 92.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.1951219512195122, + "normalized_reciprocal_se_rank": 0.3955747955747956, + "reciprocal_se_rank": 0.10476190476190475, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 92.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.25, + "avg_ge_freq": 0.749975, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 92.0, + "semantic_relevance": 3.4, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.8, + "accountability": 3.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.6, + "weighted_total_content_score": 92.0, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.2, + "accountability": 4.2, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.6667, + "relative_se_rank": 1.538095238095238, + "normalized_reciprocal_se_rank": 0.0998834498834499, + "reciprocal_se_rank": 0.033709858103061985, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.59375, + "weighted_total_content_score": 91.8421052631579, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 4.25, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 91.84210526315789, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 3.25, + "objectivity_tone": 4.5, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7333333333333334, + "normalized_reciprocal_se_rank": 0.4703914141414142, + "reciprocal_se_rank": 0.12273968446601942, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gensee", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.59375, + "weighted_total_content_score": 91.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 4.25, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 1.152173913043478, + "normalized_reciprocal_se_rank": 0.2138888888888889, + "reciprocal_se_rank": 0.061104368932038834, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 91.78947368421053, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 3.8, + "objectivity_tone": 4.8, + "layout_ad_density": 4.6, + "accountability": 4.2, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.8, + "relative_se_rank": 1.9658536585365856, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "tavily", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 91.78947368421052, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4044444444444444, + "normalized_reciprocal_se_rank": 0.19926322043969105, + "reciprocal_se_rank": 0.057589948600799544, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 91.57894736842105, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.75, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 3.75, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 0.75, + "relative_se_rank": 0.3469387755102041, + "normalized_reciprocal_se_rank": 0.40063173730586943, + "reciprocal_se_rank": 0.1059770436730123, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.65, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 3.2, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 4.8, + "accountability": 4.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.9609756097560975, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "claude", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.66665, + "relative_se_rank": 1.5365853658536586, + "normalized_reciprocal_se_rank": 0.051549982584465345, + "reciprocal_se_rank": 0.02209574824238366, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "claude", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 0.0975609756097561, + "normalized_reciprocal_se_rank": 0.5541125541125541, + "reciprocal_se_rank": 0.14285714285714285, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.125, + "normalized_reciprocal_se_rank": 0.42199775533108863, + "reciprocal_se_rank": 0.1111111111111111, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.0510204081632653, + "normalized_reciprocal_se_rank": 0.777056277056277, + "reciprocal_se_rank": 0.19642857142857142, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.07317073170731707, + "normalized_reciprocal_se_rank": 0.6531986531986531, + "reciprocal_se_rank": 0.16666666666666666, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 3.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.02702702702702703, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.329268292682927, + "normalized_reciprocal_se_rank": 0.1531986531986532, + "reciprocal_se_rank": 0.0465210355987055, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-5", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.5, + "accountability": 3.0, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.1744186046511629, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-5", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 3.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.044444444444444446, + "normalized_reciprocal_se_rank": 0.791919191919192, + "reciprocal_se_rank": 0.2, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "tavily", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.6666666666666665, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8571428571428571, + "normalized_reciprocal_se_rank": 0.4046389824167602, + "reciprocal_se_rank": 0.10693994965839626, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.55, + "weighted_total_content_score": 91.36842105263159, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 4.2, + "objectivity_tone": 4.2, + "layout_ad_density": 4.2, + "accountability": 4.4, + "transparency": 5.0, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.09583333333333335, + "normalized_reciprocal_se_rank": 0.6000224466891134, + "reciprocal_se_rank": 0.15388888888888888, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "google-search", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.55, + "weighted_total_content_score": 91.36842105263159, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 4.2, + "transparency": 5.0, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 91.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.2, + "objectivity_tone": 4.4, + "layout_ad_density": 4.6, + "accountability": 4.2, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 91.3157894736842, + "semantic_relevance": 4.75, + "factual_accuracy": 4.75, + "freshness": 4.25, + "objectivity_tone": 4.75, + "layout_ad_density": 4.25, + "accountability": 4.0, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 91.3157894736842, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.25, + "objectivity_tone": 4.25, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 91.15789473684211, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 3.6, + "accountability": 5.0, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9224489795918368, + "normalized_reciprocal_se_rank": 0.23052318052318052, + "reciprocal_se_rank": 0.06510144386357979, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-4o", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 91.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.03333333333333333, + "normalized_reciprocal_se_rank": 0.8959595959595961, + "reciprocal_se_rank": 0.225, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "google-search", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 91.05263157894736, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 4.25, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 4.5, + "transparency": 4.25, + "authority": 4.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 90.94736842105263, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 4.4, + "layout_ad_density": 4.0, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1333333333333333, + "normalized_reciprocal_se_rank": 0.09965874658234361, + "reciprocal_se_rank": 0.03365586386323305, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-4o", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.52, + "normalized_reciprocal_se_rank": 0.2108356290174472, + "reciprocal_se_rank": 0.06037069726390114, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.87719298245612, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.666666666666667, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.8699186991869918, + "normalized_reciprocal_se_rank": 0.02856851341699827, + "reciprocal_se_rank": 0.016573502010395213, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 90.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.75, + "accountability": 4.0, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.73684210526316, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 3.6, + "objectivity_tone": 4.4, + "layout_ad_density": 4.4, + "accountability": 4.2, + "transparency": 4.6, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.45, + "weighted_total_content_score": 90.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.2, + "accountability": 5.0, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.4428571428571426, + "normalized_reciprocal_se_rank": 0.3583838383838384, + "reciprocal_se_rank": 0.0958252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.666666666666667, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 4.6, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.0510204081632653, + "normalized_reciprocal_se_rank": 0.777056277056277, + "reciprocal_se_rank": 0.19642857142857142, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 4.2, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0533333333333332, + "normalized_reciprocal_se_rank": 0.2660915032679739, + "reciprocal_se_rank": 0.073648201027984, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 3.6, + "objectivity_tone": 4.6, + "layout_ad_density": 4.4, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 0.6, + "relative_se_rank": 0.8816326530612244, + "normalized_reciprocal_se_rank": 0.345679012345679, + "reciprocal_se_rank": 0.09277238403451996, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-5", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 3.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 3.6, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.52631578947368, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.666666666666667, + "transparency": 4.333333333333333, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9268292682926829, + "normalized_reciprocal_se_rank": 0.34298540965207636, + "reciprocal_se_rank": 0.09212513484358144, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "exa", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 90.52631578947367, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.25, + "objectivity_tone": 5.0, + "layout_ad_density": 3.25, + "accountability": 4.75, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6648936170212765, + "normalized_reciprocal_se_rank": 0.05492424242424243, + "reciprocal_se_rank": 0.02290655339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gensee", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 90.52631578947367, + "semantic_relevance": 3.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.52631578947367, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 90.3157894736842, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 4.6, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.66668, + "relative_se_rank": 1.3466666666666667, + "normalized_reciprocal_se_rank": 0.3583838383838384, + "reciprocal_se_rank": 0.0958252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.3157894736842, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.1609756097560977, + "normalized_reciprocal_se_rank": 0.010013175230566534, + "reciprocal_se_rank": 0.012114816378218657, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.3157894736842, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 4.2, + "layout_ad_density": 4.8, + "accountability": 3.8, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.961904761904762, + "normalized_reciprocal_se_rank": 0.04740740740740741, + "reciprocal_se_rank": 0.02110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.3157894736842, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 3.2, + "objectivity_tone": 4.4, + "layout_ad_density": 4.8, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.462222222222222, + "normalized_reciprocal_se_rank": 0.21876832844574778, + "reciprocal_se_rank": 0.0622768556216724, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 90.3157894736842, + "semantic_relevance": 4.2, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9955555555555555, + "normalized_reciprocal_se_rank": 0.38498316498316504, + "reciprocal_se_rank": 0.1022168284789644, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 90.3157894736842, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 3.6, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.59998, + "relative_se_rank": 1.302127659574468, + "normalized_reciprocal_se_rank": 0.295959595959596, + "reciprocal_se_rank": 0.0808252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-4o", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 90.3157894736842, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.4, + "accountability": 4.4, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.1024390243902438, + "normalized_reciprocal_se_rank": 0.31562289562289564, + "reciprocal_se_rank": 0.08555016181229774, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "exa", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 90.26315789473684, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.25, + "accountability": 5.0, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.755813953488372, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-5", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 90.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.25, + "objectivity_tone": 4.75, + "layout_ad_density": 3.75, + "accountability": 3.75, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.49997499999999995, + "relative_se_rank": 1.622340425531915, + "normalized_reciprocal_se_rank": 0.11994949494949496, + "reciprocal_se_rank": 0.03853155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-5", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 90.10526315789474, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 4.4, + "accountability": 5.0, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 0.73332, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.10526315789473, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.6, + "accountability": 4.2, + "transparency": 5.0, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 90.10526315789473, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 4.4, + "accountability": 4.2, + "transparency": 4.4, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.104761904761905, + "normalized_reciprocal_se_rank": 0.010415263748597083, + "reciprocal_se_rank": 0.012211434735706577, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "google-search", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 90.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 4.8, + "objectivity_tone": 3.8, + "layout_ad_density": 4.6, + "accountability": 4.2, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 90.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.80002, + "relative_se_rank": 0.2622222222222222, + "normalized_reciprocal_se_rank": 0.4556088913044182, + "reciprocal_se_rank": 0.11918757339596456, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 90.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 4.6, + "freshness": 4.6, + "objectivity_tone": 4.4, + "layout_ad_density": 4.0, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.9608695652173914, + "normalized_reciprocal_se_rank": 0.2823416235180941, + "reciprocal_se_rank": 0.07755296293274591, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "exa", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 90.0, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.25, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.75625, + "normalized_reciprocal_se_rank": 0.39081289081289083, + "reciprocal_se_rank": 0.10361766065649561, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gpt-4o", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 90.0, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.66665, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 89.99999999999999, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.25, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.46875, + "weighted_total_content_score": 89.99999999999999, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.5, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.6777777777777778, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 89.89473684210527, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 89.89473684210527, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 4.6, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 4.2, + "transparency": 4.6, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8, + "normalized_reciprocal_se_rank": 0.26535710017558534, + "reciprocal_se_rank": 0.07347173038199746, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "exa", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.571428571428571, + "weighted_total_content_score": 89.89473684210526, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 3.8, + "objectivity_tone": 4.4, + "layout_ad_density": 4.25, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1632653061224492, + "normalized_reciprocal_se_rank": 0.06369686527789295, + "reciprocal_se_rank": 0.02501453801580437, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 89.89473684210525, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 5.0, + "authority": 4.6, + "avg_ge_freq": 0.73334, + "relative_se_rank": 0.5511111111111111, + "normalized_reciprocal_se_rank": 0.4900691121743754, + "reciprocal_se_rank": 0.12746806336228922, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.416666666666667, + "weighted_total_content_score": 89.82456140350877, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.666666666666667, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 1.4397163120567376, + "normalized_reciprocal_se_rank": 0.21773288439955105, + "reciprocal_se_rank": 0.062028047464940665, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "exa", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 89.73684210526315, + "semantic_relevance": 3.75, + "factual_accuracy": 4.0, + "freshness": 4.25, + "objectivity_tone": 5.0, + "layout_ad_density": 4.75, + "accountability": 4.75, + "transparency": 5.0, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.46875, + "weighted_total_content_score": 89.73684210526315, + "semantic_relevance": 4.75, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.75, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.75, + "relative_se_rank": 1.7000000000000002, + "normalized_reciprocal_se_rank": 0.10549943883277216, + "reciprocal_se_rank": 0.035059331175836025, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 89.6842105263158, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.8, + "accountability": 4.0, + "transparency": 4.6, + "authority": 5.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.9627906976744185, + "normalized_reciprocal_se_rank": 0.025212121212121213, + "reciprocal_se_rank": 0.015766990291262134, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 89.6842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.8, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.104761904761905, + "normalized_reciprocal_se_rank": 0.010415263748597083, + "reciprocal_se_rank": 0.012211434735706581, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.45, + "weighted_total_content_score": 89.6842105263158, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.86668, + "relative_se_rank": 1.1142857142857143, + "normalized_reciprocal_se_rank": 0.26926103136629453, + "reciprocal_se_rank": 0.07440981093510475, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 89.47368421052633, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 5.0, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 89.47368421052632, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.8, + "accountability": 3.8, + "transparency": 5.0, + "authority": 4.4, + "avg_ge_freq": 0.66668, + "relative_se_rank": 1.3511111111111112, + "normalized_reciprocal_se_rank": 0.3306397306397306, + "reciprocal_se_rank": 0.08915857605177993, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "claude", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 89.47368421052632, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.2708333333333333, + "normalized_reciprocal_se_rank": 0.21969696969696972, + "reciprocal_se_rank": 0.0625, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 89.47368421052632, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 2.5, + "accountability": 4.5, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2560975609756098, + "normalized_reciprocal_se_rank": 0.32659932659932656, + "reciprocal_se_rank": 0.08818770226537216, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "exa", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.45, + "weighted_total_content_score": 89.47368421052632, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2146341463414634, + "normalized_reciprocal_se_rank": 0.23188305822452165, + "reciprocal_se_rank": 0.06542821059278554, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.416666666666667, + "weighted_total_content_score": 89.47368421052632, + "semantic_relevance": 5.0, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.333333333333333, + "accountability": 3.3333333333333335, + "transparency": 4.0, + "authority": 4.333333333333333, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 89.47368421052632, + "semantic_relevance": 3.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.020833333333333332, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 89.47368421052632, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.16665, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 89.4736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.364864864864865, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "tavily", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 4.5625, + "weighted_total_content_score": 89.4736842105263, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.38636363636363635, + "normalized_reciprocal_se_rank": 0.1724663514944845, + "reciprocal_se_rank": 0.05115089514066496, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "google-search", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 89.26315789473685, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 4.6, + "accountability": 3.8, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 89.26315789473684, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 4.2, + "objectivity_tone": 4.6, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.097560975609756, + "normalized_reciprocal_se_rank": 0.01714110805019896, + "reciprocal_se_rank": 0.013827596351868195, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "tavily", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 89.26315789473684, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.2, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.48888888888888893, + "normalized_reciprocal_se_rank": 0.5998460798460798, + "reciprocal_se_rank": 0.15384650947757744, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "gpt-4o", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.416666666666667, + "weighted_total_content_score": 89.12280701754385, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 4.666666666666667, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.333333333333333, + "transparency": 4.333333333333333, + "authority": 4.666666666666667, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.5777777777777777, + "normalized_reciprocal_se_rank": 0.12367853620378873, + "reciprocal_se_rank": 0.03942760942760943, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 89.12280701754385, + "semantic_relevance": 3.3333333333333335, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.666666666666667, + "accountability": 4.0, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.458333333333333, + "weighted_total_content_score": 89.12280701754385, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.45, + "weighted_total_content_score": 89.05263157894737, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 4.4, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.488888888888889, + "normalized_reciprocal_se_rank": 0.10232884399551065, + "reciprocal_se_rank": 0.03429746494066882, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.45, + "weighted_total_content_score": 89.05263157894736, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.46875, + "weighted_total_content_score": 88.94736842105263, + "semantic_relevance": 3.25, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.5, + "accountability": 3.75, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.292682926829268, + "normalized_reciprocal_se_rank": 0.22300931391840484, + "reciprocal_se_rank": 0.06329592737359728, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 88.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 5.0, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 88.84210526315789, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 4.2, + "layout_ad_density": 4.4, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.45, + "weighted_total_content_score": 88.84210526315789, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 4.2, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 4.2, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.93334, + "relative_se_rank": 0.9511111111111112, + "normalized_reciprocal_se_rank": 0.3721019721019721, + "reciprocal_se_rank": 0.0991215903837263, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.84210526315789, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 4.6, + "objectivity_tone": 4.2, + "layout_ad_density": 3.2, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.84210526315789, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.73334, + "relative_se_rank": 1.8651162790697675, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-5", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.84210526315789, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 3.4, + "objectivity_tone": 4.6, + "layout_ad_density": 4.2, + "accountability": 4.8, + "transparency": 5.0, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.502439024390244, + "normalized_reciprocal_se_rank": 0.22164502164502164, + "reciprocal_se_rank": 0.06296809986130374, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 88.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 5.0, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 0.86668, + "relative_se_rank": 1.7106382978723402, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.416666666666667, + "weighted_total_content_score": 88.77192982456141, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 4.333333333333333, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.0, + "accountability": 3.6666666666666665, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8108108108108107, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 88.77192982456138, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.666666666666667, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.6444444444444446, + "normalized_reciprocal_se_rank": 0.04202020202020202, + "reciprocal_se_rank": 0.019805825242718445, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gpt-5", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.77192982456138, + "semantic_relevance": 5.0, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4565217391304348, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.53125, + "weighted_total_content_score": 88.6842105263158, + "semantic_relevance": 2.0, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.475, + "weighted_total_content_score": 88.63157894736841, + "semantic_relevance": 3.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.45, + "weighted_total_content_score": 88.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 4.4, + "objectivity_tone": 4.2, + "layout_ad_density": 3.8, + "accountability": 5.0, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 88.63157894736841, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 3.2, + "objectivity_tone": 4.4, + "layout_ad_density": 4.4, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.791111111111111, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.525, + "weighted_total_content_score": 88.63157894736841, + "semantic_relevance": 2.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.8, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.63157894736841, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 4.4, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.7822222222222224, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-5", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.55, + "weighted_total_content_score": 88.63157894736841, + "semantic_relevance": 2.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.2, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.63157894736841, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 3.4, + "objectivity_tone": 3.8, + "layout_ad_density": 4.4, + "accountability": 4.6, + "transparency": 5.0, + "authority": 4.6, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.9142857142857141, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.42105263157896, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 2.6666666666666665, + "objectivity_tone": 5.0, + "layout_ad_density": 4.333333333333333, + "accountability": 4.666666666666667, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": 0.4444333333333333, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.9411764705882355, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.02702702702702703, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.416666666666667, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4565217391304348, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.40625, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 4.25, + "factual_accuracy": 4.5, + "freshness": 4.25, + "objectivity_tone": 4.75, + "layout_ad_density": 4.75, + "accountability": 3.5, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8353658536585367, + "normalized_reciprocal_se_rank": 0.25, + "reciprocal_se_rank": 0.06978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gensee", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 2.6, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.66668, + "relative_se_rank": 1.5761904761904764, + "normalized_reciprocal_se_rank": 0.0754930254930255, + "reciprocal_se_rank": 0.027849052242256124, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.333333333333333, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.333333333333333, + "transparency": 4.333333333333333, + "authority": 4.666666666666667, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4397163120567376, + "normalized_reciprocal_se_rank": 0.21773288439955105, + "reciprocal_se_rank": 0.062028047464940665, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gpt-5", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 2.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.42105263157895, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 4.333333333333333, + "accountability": 4.333333333333333, + "transparency": 5.0, + "authority": 4.333333333333333, + "avg_ge_freq": 0.4444333333333333, + "relative_se_rank": 1.4305555555555556, + "normalized_reciprocal_se_rank": 0.14066591844369622, + "reciprocal_se_rank": 0.04350952894642215, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 88.42105263157893, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.5, + "objectivity_tone": 5.0, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 4.5, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.40625, + "weighted_total_content_score": 88.42105263157893, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.25, + "accountability": 3.75, + "transparency": 4.5, + "authority": 4.25, + "avg_ge_freq": 0.66665, + "relative_se_rank": 1.7159090909090913, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "exa", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.42105263157893, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.1, + "normalized_reciprocal_se_rank": 0.5663331729998397, + "reciprocal_se_rank": 0.1457936507936508, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 88.42105263157893, + "semantic_relevance": 3.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.42105263157893, + "semantic_relevance": 4.4, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.15, + "normalized_reciprocal_se_rank": 0.45304473304473303, + "reciprocal_se_rank": 0.11857142857142855, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 88.21052631578947, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.5073170731707317, + "normalized_reciprocal_se_rank": 0.2675849403122131, + "reciprocal_se_rank": 0.07400706090026479, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 4.5, + "objectivity_tone": 4.25, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7045454545454546, + "normalized_reciprocal_se_rank": 0.3295033670033671, + "reciprocal_se_rank": 0.08888551779935275, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 88.0701754385965, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 4.0, + "accountability": 4.666666666666667, + "transparency": 5.0, + "authority": 4.666666666666667, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 88.0, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.4, + "accountability": 5.0, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 87.99999999999999, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 4.6, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 87.99999999999999, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 2.4, + "accountability": 5.0, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 50, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 87.99999999999999, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.8, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 0.73334, + "relative_se_rank": 1.0680851063829788, + "normalized_reciprocal_se_rank": 0.10487719298245615, + "reciprocal_se_rank": 0.03490981093510475, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "exa", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 87.99999999999999, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 3.2, + "objectivity_tone": 4.8, + "layout_ad_density": 3.8, + "accountability": 4.8, + "transparency": 4.8, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7106382978723402, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "tavily", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 87.89473684210527, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.25, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 3.75, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8214285714285716, + "normalized_reciprocal_se_rank": 0.10549943883277216, + "reciprocal_se_rank": 0.035059331175836025, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 3.5, + "authority": 3.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 1.2872340425531914, + "normalized_reciprocal_se_rank": 0.06649831649831649, + "reciprocal_se_rank": 0.02568770226537217, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 2.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.0510204081632653, + "normalized_reciprocal_se_rank": 0.777056277056277, + "reciprocal_se_rank": 0.19642857142857142, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 2.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.0510204081632653, + "normalized_reciprocal_se_rank": 0.777056277056277, + "reciprocal_se_rank": 0.19642857142857142, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 3.75, + "accountability": 4.5, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7134146341463415, + "normalized_reciprocal_se_rank": 0.37640036730945825, + "reciprocal_se_rank": 0.10015445719329213, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gensee", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 4.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1222222222222222, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-4o", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 4.0, + "accountability": 3.75, + "transparency": 4.5, + "authority": 4.25, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 1.1333333333333333, + "normalized_reciprocal_se_rank": 0.4132996632996633, + "reciprocal_se_rank": 0.10902103559870549, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-5", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.40625, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 4.25, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.25, + "accountability": 4.5, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 0.750025, + "relative_se_rank": 2.2867647058823533, + "normalized_reciprocal_se_rank": 0.06421356421356421, + "reciprocal_se_rank": 0.02513869625520111, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "tavily", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 4.25, + "factual_accuracy": 4.75, + "freshness": 3.75, + "objectivity_tone": 4.5, + "layout_ad_density": 4.25, + "accountability": 4.0, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1277777777777778, + "normalized_reciprocal_se_rank": 0.44797979797979803, + "reciprocal_se_rank": 0.11735436893203884, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "tavily", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.40625, + "weighted_total_content_score": 87.89473684210526, + "semantic_relevance": 4.25, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6576086956521738, + "normalized_reciprocal_se_rank": 0.3937404346495256, + "reciprocal_se_rank": 0.10432112385995881, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "exa", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.78947368421053, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7789473684210528, + "normalized_reciprocal_se_rank": 0.06013468013468013, + "reciprocal_se_rank": 0.024158576051779936, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 87.78947368421052, + "semantic_relevance": 3.8, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 5.0, + "layout_ad_density": 4.2, + "accountability": 4.2, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.78947368421052, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.2, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 87.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 2.4, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.425, + "weighted_total_content_score": 87.78947368421052, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.4, + "accountability": 5.0, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.5380952380952384, + "normalized_reciprocal_se_rank": 0.0998834498834499, + "reciprocal_se_rank": 0.033709858103061985, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.333333333333333, + "weighted_total_content_score": 87.71929824561403, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.333333333333333, + "accountability": 4.666666666666667, + "transparency": 3.6666666666666665, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.12403100775193798, + "normalized_reciprocal_se_rank": 0.48365132809577244, + "reciprocal_se_rank": 0.1259259259259259, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.34375, + "weighted_total_content_score": 87.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.25, + "accountability": 4.25, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.75, + "relative_se_rank": 1.826086956521739, + "normalized_reciprocal_se_rank": 0.016576016576016577, + "reciprocal_se_rank": 0.013691809808314661, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 87.57894736842105, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 2.4, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 87.57894736842105, + "semantic_relevance": 3.8, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 4.6, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 87.57894736842105, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.4, + "objectivity_tone": 4.4, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.6, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.24255319148936166, + "normalized_reciprocal_se_rank": 0.4561132057906251, + "reciprocal_se_rank": 0.11930875576036866, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.57894736842104, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 4.2, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 4.8, + "transparency": 4.8, + "authority": 4.2, + "avg_ge_freq": 0.6, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.57894736842104, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 4.4, + "accountability": 3.8, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 3.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.5, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.66665, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 4.2, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9083333333333334, + "normalized_reciprocal_se_rank": 0.39654320987654323, + "reciprocal_se_rank": 0.10499460625674217, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.40625, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 3.75, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.5, + "accountability": 4.75, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 5.0, + "layout_ad_density": 4.5, + "accountability": 4.5, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.08536585365853658, + "normalized_reciprocal_se_rank": 0.6036556036556036, + "reciprocal_se_rank": 0.15476190476190477, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.34375, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.75, + "accountability": 5.0, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2867647058823533, + "normalized_reciprocal_se_rank": 0.06421356421356421, + "reciprocal_se_rank": 0.025138696255201105, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "exa", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.416666666666667, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.575757575757576, + "normalized_reciprocal_se_rank": 0.11264156718702174, + "reciprocal_se_rank": 0.03677552221241542, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gensee", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06666666666666667, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.8, + "accountability": 3.4, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.95609756097561, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 87.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.5, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.2023809523809523, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "claude", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.36842105263156, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 4.75, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.25, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7045454545454546, + "normalized_reciprocal_se_rank": 0.3295033670033671, + "reciprocal_se_rank": 0.08888551779935275, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 87.15789473684211, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.6, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.64, + "normalized_reciprocal_se_rank": 0.05594405594405595, + "reciprocal_se_rank": 0.02315160567587752, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.15789473684211, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 3.4, + "layout_ad_density": 3.8, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.80002, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 87.1578947368421, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 87.1578947368421, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.2, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.1534883720930234, + "normalized_reciprocal_se_rank": 0.13141864109606044, + "reciprocal_se_rank": 0.041287489001237826, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-5", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.1578947368421, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.73336, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.10526315789474, + "semantic_relevance": 3.25, + "factual_accuracy": 5.0, + "freshness": 4.75, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 4.75, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6428571428571429, + "normalized_reciprocal_se_rank": 0.28815397565397566, + "reciprocal_se_rank": 0.07894962036345532, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gensee", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 87.10526315789474, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 4.5, + "objectivity_tone": 4.25, + "layout_ad_density": 4.5, + "accountability": 3.5, + "transparency": 3.75, + "authority": 4.5, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 2.048780487804878, + "normalized_reciprocal_se_rank": 0.016576016576016577, + "reciprocal_se_rank": 0.013691809808314661, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 87.01754385964914, + "semantic_relevance": 4.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 1.6991869918699187, + "normalized_reciprocal_se_rank": 0.10213243546576879, + "reciprocal_se_rank": 0.03425026968716289, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gpt-4o", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.333333333333333, + "weighted_total_content_score": 87.01754385964911, + "semantic_relevance": 5.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.666666666666667, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 0.8840579710144927, + "normalized_reciprocal_se_rank": 0.17225243891910558, + "reciprocal_se_rank": 0.05109949381794041, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.291666666666667, + "weighted_total_content_score": 87.01754385964911, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 3.6666666666666665, + "accountability": 3.6666666666666665, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.333333333333333, + "weighted_total_content_score": 87.01754385964911, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.666666666666667, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.3412698412698412, + "normalized_reciprocal_se_rank": 0.35959089643300174, + "reciprocal_se_rank": 0.09611528822055138, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 70, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 5.0, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9674418604651163, + "normalized_reciprocal_se_rank": 0.023931623931623933, + "reciprocal_se_rank": 0.015459297983569828, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 3.4, + "factual_accuracy": 4.6, + "freshness": 4.2, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 4.8, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 3.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5777777777777777, + "normalized_reciprocal_se_rank": 0.37251696734950324, + "reciprocal_se_rank": 0.09922131011553598, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "gensee", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.80002, + "relative_se_rank": 0.7911111111111111, + "normalized_reciprocal_se_rank": 0.2769271322854211, + "reciprocal_se_rank": 0.07625190800062301, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "gpt-4o", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 4.4, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": 0.73334, + "relative_se_rank": 1.5761904761904764, + "normalized_reciprocal_se_rank": 0.0754930254930255, + "reciprocal_se_rank": 0.027849052242256124, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-4o", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 3.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.6, + "accountability": 3.8, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.95609756097561, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-5", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 86.94736842105263, + "semantic_relevance": 3.6, + "factual_accuracy": 4.6, + "freshness": 4.6, + "objectivity_tone": 4.4, + "layout_ad_density": 4.0, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.8, + "normalized_reciprocal_se_rank": 0.09595959595959597, + "reciprocal_se_rank": 0.032766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 86.94736842105262, + "semantic_relevance": 4.8, + "factual_accuracy": 4.4, + "freshness": 4.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.6, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.8, + "relative_se_rank": 0.780952380952381, + "normalized_reciprocal_se_rank": 0.22026374859708192, + "reciprocal_se_rank": 0.06263619201725998, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 86.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.5, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.1702127659574468, + "normalized_reciprocal_se_rank": 0.3955747955747956, + "reciprocal_se_rank": 0.10476190476190475, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 86.84210526315789, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 86.84210526315789, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 3.75, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-5", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 86.84210526315789, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.75, + "accountability": 3.75, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 0.83335, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 86.73684210526315, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.6, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 86.73684210526315, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 4.8, + "objectivity_tone": 3.4, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.4558139534883723, + "normalized_reciprocal_se_rank": 0.20975468975468975, + "reciprocal_se_rank": 0.06011095700416089, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 86.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 4.8, + "objectivity_tone": 4.2, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.0545454545454547, + "normalized_reciprocal_se_rank": 0.19395559606085921, + "reciprocal_se_rank": 0.05631457283986665, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-4o", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 86.73684210526315, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 86.73684210526315, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.0, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.9411764705882355, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 86.73684210526315, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.66666, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.291666666666667, + "weighted_total_content_score": 86.66666666666667, + "semantic_relevance": 5.0, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.333333333333333, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6046511627906976, + "normalized_reciprocal_se_rank": 0.12525252525252525, + "reciprocal_se_rank": 0.039805825242718446, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 86.57894736842105, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.25, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.34375, + "weighted_total_content_score": 86.57894736842104, + "semantic_relevance": 4.0, + "factual_accuracy": 4.25, + "freshness": 4.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.75, + "accountability": 3.5, + "transparency": 4.25, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1988636363636365, + "normalized_reciprocal_se_rank": 0.2745791245791246, + "reciprocal_se_rank": 0.07568770226537216, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 86.52631578947368, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.8, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": 0.73336, + "relative_se_rank": 1.0266666666666668, + "normalized_reciprocal_se_rank": 0.2766050598867936, + "reciprocal_se_rank": 0.0761745168174577, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "claude", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 86.52631578947368, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.8, + "relative_se_rank": 1.9705882352941178, + "normalized_reciprocal_se_rank": 0.0775890775890776, + "reciprocal_se_rank": 0.02835271524591913, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 86.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.8697674418604653, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 86.52631578947367, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.66666, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 86.52631578947367, + "semantic_relevance": 4.2, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 4.2, + "layout_ad_density": 4.2, + "accountability": 4.8, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.8666600000000001, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.35, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 5.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 4.5, + "objectivity_tone": 4.0, + "layout_ad_density": 2.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.4375, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 1.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.40625, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 3.5, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.49999999999999994, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 3.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 2.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 2.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 2.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.074468085106383, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-5", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 86.3157894736842, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 2.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 86.31578947368419, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.6, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.269767441860465, + "normalized_reciprocal_se_rank": 0.11585517311745105, + "reciprocal_se_rank": 0.03754772363744576, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 86.31578947368419, + "semantic_relevance": 4.8, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.8, + "accountability": 3.4, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.80002, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 86.10526315789474, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 4.4, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.5348837209302324, + "normalized_reciprocal_se_rank": 0.0990831390831391, + "reciprocal_se_rank": 0.03351755041075429, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.307142857142857, + "weighted_total_content_score": 86.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 1.8, + "objectivity_tone": 4.6, + "layout_ad_density": 3.5, + "accountability": 4.4, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.49361702127659574, + "normalized_reciprocal_se_rank": 0.5017508417508417, + "reciprocal_se_rank": 0.13027508090614887, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 86.10526315789473, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": 0.73336, + "relative_se_rank": 1.8800000000000001, + "normalized_reciprocal_se_rank": 0.023931623931623933, + "reciprocal_se_rank": 0.015459297983569828, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 86.05263157894737, + "semantic_relevance": 3.75, + "factual_accuracy": 4.75, + "freshness": 3.75, + "objectivity_tone": 4.75, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 4.75, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 86.05263157894737, + "semantic_relevance": 4.5, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.5, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 86.05263157894736, + "semantic_relevance": 4.75, + "factual_accuracy": 4.5, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.833325, + "relative_se_rank": 0.1125, + "normalized_reciprocal_se_rank": 0.6098484848484849, + "reciprocal_se_rank": 0.15625, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 85.89473684210527, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.2, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.488372093023256, + "normalized_reciprocal_se_rank": 0.11265031265031264, + "reciprocal_se_rank": 0.036777623670827556, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.89473684210527, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 3.2, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.7866666666666666, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "google-search", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 85.89473684210527, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 85.89473684210526, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 85.89473684210526, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 3.8, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.89473684210525, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 3.2, + "objectivity_tone": 4.4, + "layout_ad_density": 3.4, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.3708333333333336, + "normalized_reciprocal_se_rank": 0.10833117499784166, + "reciprocal_se_rank": 0.035739772632976514, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 85.89473684210525, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.89473684210525, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 3.6, + "objectivity_tone": 4.4, + "layout_ad_density": 3.2, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1675675675675676, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 85.78947368421053, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 4.0, + "objectivity_tone": 3.5, + "layout_ad_density": 4.5, + "accountability": 4.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 0.666675, + "relative_se_rank": 0.7708333333333334, + "normalized_reciprocal_se_rank": 0.17337395115172893, + "reciprocal_se_rank": 0.051368983407818355, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.78947368421052, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.09523809523809523, + "normalized_reciprocal_se_rank": 0.6069584736251403, + "reciprocal_se_rank": 0.15555555555555556, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 85.78947368421052, + "semantic_relevance": 3.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.78947368421052, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 2.5, + "accountability": 3.75, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.78947368421052, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 3.75, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 4.25, + "transparency": 3.75, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.601063829787234, + "normalized_reciprocal_se_rank": 0.25, + "reciprocal_se_rank": 0.06978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 85.6842105263158, + "semantic_relevance": 3.0, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.502439024390244, + "normalized_reciprocal_se_rank": 0.27515151515151515, + "reciprocal_se_rank": 0.0758252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 85.68421052631578, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 85.68421052631578, + "semantic_relevance": 4.8, + "factual_accuracy": 4.4, + "freshness": 4.2, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 4.8, + "authority": 4.2, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.1272727272727274, + "normalized_reciprocal_se_rank": 0.21736251402918075, + "reciprocal_se_rank": 0.061939050701186625, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "tavily", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 85.68421052631578, + "semantic_relevance": 4.8, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8333333333333333, + "normalized_reciprocal_se_rank": 0.21852974186307517, + "reciprocal_se_rank": 0.06221952535059331, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.208333333333333, + "weighted_total_content_score": 85.6140350877193, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 5.0, + "freshness": 2.3333333333333335, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 4.0, + "accountability": 4.666666666666667, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8108108108108107, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.208333333333333, + "weighted_total_content_score": 85.61403508771929, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.666666666666667, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 85.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.75, + "accountability": 4.75, + "transparency": 3.75, + "authority": 3.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7916666666666666, + "normalized_reciprocal_se_rank": 0.2903361096343553, + "reciprocal_se_rank": 0.0794739680917504, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "exa", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 85.52631578947368, + "semantic_relevance": 2.75, + "factual_accuracy": 5.0, + "freshness": 4.25, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.52631578947368, + "semantic_relevance": 4.75, + "factual_accuracy": 4.5, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.0, + "transparency": 4.25, + "authority": 4.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 85.52631578947367, + "semantic_relevance": 4.0, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 3.0, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.52631578947367, + "semantic_relevance": 4.5, + "factual_accuracy": 4.75, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.75, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": 0.833325, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 85.47368421052633, + "semantic_relevance": 2.6, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.4, + "accountability": 5.0, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5795918367346939, + "normalized_reciprocal_se_rank": 0.2662488583541215, + "reciprocal_se_rank": 0.07368601208023794, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 85.47368421052632, + "semantic_relevance": 3.4, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 5.0, + "accountability": 3.2, + "transparency": 4.4, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6097560975609757, + "normalized_reciprocal_se_rank": 0.13144250385629697, + "reciprocal_se_rank": 0.0412932230140131, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.47368421052632, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.4, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.9411764705882355, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.325, + "weighted_total_content_score": 85.47368421052632, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.6, + "accountability": 5.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.4736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.2, + "freshness": 4.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.2975609756097561, + "normalized_reciprocal_se_rank": 0.10456950456950458, + "reciprocal_se_rank": 0.03483587609801202, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 85.26315789473685, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.5957446808510638, + "normalized_reciprocal_se_rank": 0.14258901067411706, + "reciprocal_se_rank": 0.04397163120567376, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.3, + "weighted_total_content_score": 85.26315789473685, + "semantic_relevance": 3.6, + "factual_accuracy": 3.8, + "freshness": 4.6, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.8272727272727276, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.34375, + "weighted_total_content_score": 85.26315789473685, + "semantic_relevance": 2.75, + "factual_accuracy": 4.5, + "freshness": 4.75, + "objectivity_tone": 4.25, + "layout_ad_density": 4.0, + "accountability": 4.75, + "transparency": 5.0, + "authority": 4.75, + "avg_ge_freq": 0.833325, + "relative_se_rank": 1.1581632653061225, + "normalized_reciprocal_se_rank": 0.10602346999405823, + "reciprocal_se_rank": 0.03518525128498001, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "exa", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 85.26315789473685, + "semantic_relevance": 4.25, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.4, + "accountability": 5.0, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8444444444444446, + "normalized_reciprocal_se_rank": 0.038159371492704826, + "reciprocal_se_rank": 0.018878101402373244, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.8, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 4.4, + "objectivity_tone": 4.4, + "layout_ad_density": 3.0, + "accountability": 4.6, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.774468085106383, + "normalized_reciprocal_se_rank": 0.03353535353535354, + "reciprocal_se_rank": 0.017766990291262132, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.4, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.60002, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.421428571428572, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 2.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.166666666666667, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 4.666666666666667, + "freshness": 3.3333333333333335, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 2.6666666666666665, + "accountability": 4.666666666666667, + "transparency": 4.0, + "authority": 4.333333333333333, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 0.041666666666666664, + "normalized_reciprocal_se_rank": 0.8150392817059484, + "reciprocal_se_rank": 0.20555555555555557, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 3.75, + "objectivity_tone": 4.25, + "layout_ad_density": 2.75, + "accountability": 4.75, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.5520833333333334, + "normalized_reciprocal_se_rank": 0.6112794612794613, + "reciprocal_se_rank": 0.1565938511326861, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gpt-4o", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.5, + "factual_accuracy": 4.75, + "freshness": 3.0, + "objectivity_tone": 4.75, + "layout_ad_density": 3.5, + "accountability": 4.25, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 1.7976190476190474, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-4o", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.5, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 0.375, + "normalized_reciprocal_se_rank": 0.19079685746352412, + "reciprocal_se_rank": 0.05555555555555555, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06666666666666667, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.6, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.6, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.390909090909091, + "normalized_reciprocal_se_rank": 0.2692063492063492, + "reciprocal_se_rank": 0.07439667128987518, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 2.8, + "accountability": 4.8, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.565, + "normalized_reciprocal_se_rank": 0.24740740740740738, + "reciprocal_se_rank": 0.06915857605177993, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "tavily", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 5.0, + "freshness": 4.666666666666667, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 2.6666666666666665, + "accountability": 4.666666666666667, + "transparency": 4.333333333333333, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0852713178294573, + "normalized_reciprocal_se_rank": 0.16468994246772026, + "reciprocal_se_rank": 0.0492822920007386, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "tavily", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 2.5, + "accountability": 4.0, + "transparency": 4.5, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.291666666666667, + "weighted_total_content_score": 85.26315789473684, + "semantic_relevance": 4.0, + "factual_accuracy": 4.333333333333333, + "freshness": 4.333333333333333, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6666666666666665, + "accountability": 5.0, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.24822695035460993, + "normalized_reciprocal_se_rank": 0.3405519844913784, + "reciprocal_se_rank": 0.09154040404040403, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 31, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.05263157894737, + "semantic_relevance": 4.6, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 4.0, + "layout_ad_density": 4.2, + "accountability": 3.6, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9555555555555557, + "normalized_reciprocal_se_rank": 0.382010582010582, + "reciprocal_se_rank": 0.10150254276467867, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.05263157894737, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 3.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.2, + "accountability": 4.2, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3866666666666667, + "normalized_reciprocal_se_rank": 0.25137085137085136, + "reciprocal_se_rank": 0.0701109570041609, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 85.05263157894737, + "semantic_relevance": 3.2, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.4, + "accountability": 3.8, + "transparency": 4.8, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 85.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 4.2, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 3.6, + "authority": 4.2, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.6, + "normalized_reciprocal_se_rank": 0.17900383141762455, + "reciprocal_se_rank": 0.052721794442584534, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 85.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 4.8, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.05263157894737, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.05263157894736, + "semantic_relevance": 4.2, + "factual_accuracy": 4.2, + "freshness": 3.0, + "objectivity_tone": 4.8, + "layout_ad_density": 3.6, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 85.05263157894736, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-5", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 85.05263157894736, + "semantic_relevance": 3.4, + "factual_accuracy": 4.4, + "freshness": 4.6, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 85.0, + "semantic_relevance": 4.5, + "factual_accuracy": 4.75, + "freshness": 2.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 4.25, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 85.0, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 4.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6063829787234043, + "normalized_reciprocal_se_rank": 0.38874859708193044, + "reciprocal_se_rank": 0.10312162891046385, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 85.0, + "semantic_relevance": 4.5, + "factual_accuracy": 4.25, + "freshness": 4.5, + "objectivity_tone": 3.5, + "layout_ad_density": 4.25, + "accountability": 4.0, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 0.83335, + "relative_se_rank": 0.63, + "normalized_reciprocal_se_rank": 0.26771746771746774, + "reciprocal_se_rank": 0.07403890607774102, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "claude", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 84.91228070175438, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 1.7249999999999999, + "normalized_reciprocal_se_rank": 0.12525252525252525, + "reciprocal_se_rank": 0.039805825242718446, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 84.84210526315789, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 4.6, + "objectivity_tone": 4.8, + "layout_ad_density": 4.0, + "accountability": 3.2, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 84.84210526315789, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.9411764705882355, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 84.84210526315789, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 3.2, + "objectivity_tone": 4.4, + "layout_ad_density": 3.0, + "accountability": 4.8, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.1675675675675676, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 84.84210526315789, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 2.4, + "objectivity_tone": 4.0, + "layout_ad_density": 4.4, + "accountability": 4.6, + "transparency": 5.0, + "authority": 4.4, + "avg_ge_freq": 0.93334, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 84.73684210526316, + "semantic_relevance": 4.75, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 2.75, + "accountability": 3.75, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 0.666675, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 84.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 3.0, + "authority": 4.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.4024390243902438, + "normalized_reciprocal_se_rank": 0.09539842873176206, + "reciprocal_se_rank": 0.03263214670981661, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-4o", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 84.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.5, + "objectivity_tone": 3.5, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.83335, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 84.73684210526315, + "semantic_relevance": 2.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.5, + "accountability": 3.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.030612244897959, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 84.73684210526315, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 2.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 84.73684210526315, + "semantic_relevance": 4.25, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.25, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.235294117647059, + "normalized_reciprocal_se_rank": 0.13852813852813853, + "reciprocal_se_rank": 0.04299583911234396, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 3.8, + "objectivity_tone": 4.0, + "layout_ad_density": 4.6, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.8044444444444445, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.296428571428572, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 4.6, + "layout_ad_density": 4.0, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.2, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.408888888888889, + "normalized_reciprocal_se_rank": 0.1357704024370691, + "reciprocal_se_rank": 0.042333179226383105, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 3.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.73332, + "relative_se_rank": 1.6666666666666667, + "normalized_reciprocal_se_rank": 0.04524826659214011, + "reciprocal_se_rank": 0.020581500952965702, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 3.6, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4829268292682927, + "normalized_reciprocal_se_rank": 0.3306397306397306, + "reciprocal_se_rank": 0.08915857605177993, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.2, + "accountability": 4.0, + "transparency": 3.6, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 4.6, + "freshness": 4.8, + "objectivity_tone": 3.6, + "layout_ad_density": 2.0, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.6, + "normalized_reciprocal_se_rank": 0.22975206611570248, + "reciprocal_se_rank": 0.0649161518093557, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-5", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.80002, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 2.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.6, + "accountability": 3.6, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.63157894736841, + "semantic_relevance": 4.8, + "factual_accuracy": 4.4, + "freshness": 4.2, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3695652173913042, + "normalized_reciprocal_se_rank": 0.17804713804713804, + "reciprocal_se_rank": 0.052491909385113264, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.208333333333333, + "weighted_total_content_score": 84.56140350877193, + "semantic_relevance": 4.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.666666666666667, + "transparency": 3.6666666666666665, + "authority": 3.6666666666666665, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.166666666666667, + "weighted_total_content_score": 84.56140350877193, + "semantic_relevance": 5.0, + "factual_accuracy": 4.666666666666667, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.6666666666666665, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6341463414634145, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.208333333333333, + "weighted_total_content_score": 84.56140350877193, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.333333333333333, + "freshness": 4.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 4.333333333333333, + "accountability": 4.0, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8699186991869919, + "normalized_reciprocal_se_rank": 0.4024370691037357, + "reciprocal_se_rank": 0.10641084912929573, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.65625, + "weighted_total_content_score": 84.4736842105263, + "semantic_relevance": 4.75, + "factual_accuracy": 4.75, + "freshness": 4.333333333333333, + "objectivity_tone": 4.75, + "layout_ad_density": 4.25, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 84.4736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 3.75, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.5, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.109375, + "normalized_reciprocal_se_rank": 0.6589786756453423, + "reciprocal_se_rank": 0.16805555555555557, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 84.4736842105263, + "semantic_relevance": 4.25, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.75, + "accountability": 3.25, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.13068181818181818, + "normalized_reciprocal_se_rank": 0.4884680134680135, + "reciprocal_se_rank": 0.12708333333333333, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 26, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 84.4736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.25, + "freshness": 4.75, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 3.5, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.49997499999999995, + "relative_se_rank": 1.75, + "normalized_reciprocal_se_rank": 0.25, + "reciprocal_se_rank": 0.06978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-5", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 84.4736842105263, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 3.25, + "accountability": 4.25, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.42105263157895, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.4, + "accountability": 4.8, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": 0.8, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.42105263157895, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.53332, + "relative_se_rank": 0.26666666666666666, + "normalized_reciprocal_se_rank": 0.5253434177572108, + "reciprocal_se_rank": 0.13594417077175697, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.42105263157895, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.73334, + "relative_se_rank": 0.9782608695652174, + "normalized_reciprocal_se_rank": 0.21095194367921644, + "reciprocal_se_rank": 0.06039864666078258, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-5", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.665, + "weighted_total_content_score": 84.42105263157895, + "semantic_relevance": 3.25, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.6, + "accountability": 4.8, + "transparency": 5.0, + "authority": 4.8, + "avg_ge_freq": 0.66666, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 84.42105263157893, + "semantic_relevance": 3.6, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 84.42105263157893, + "semantic_relevance": 4.4, + "factual_accuracy": 4.4, + "freshness": 4.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.8666600000000001, + "relative_se_rank": 0.22000000000000003, + "normalized_reciprocal_se_rank": 0.5411971936109867, + "reciprocal_se_rank": 0.13975369458128079, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 84.42105263157893, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 3.8, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 0.73332, + "relative_se_rank": 0.9120000000000001, + "normalized_reciprocal_se_rank": 0.18775101441768108, + "reciprocal_se_rank": 0.054823666085802004, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 84.42105263157893, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 3.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.431818181818182, + "normalized_reciprocal_se_rank": 0.20232323232323238, + "reciprocal_se_rank": 0.0583252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.42105263157893, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 3.4, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 84.42105263157893, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 4.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.6, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9422222222222223, + "normalized_reciprocal_se_rank": 0.3374218374218374, + "reciprocal_se_rank": 0.09078825705039298, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.21052631578948, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 4.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.657142857142857, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.166666666666667, + "weighted_total_content_score": 84.21052631578948, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 4.666666666666667, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6666666666666665, + "accountability": 3.6666666666666665, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.166666666666667, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.0, + "accountability": 3.3333333333333335, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 4.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 3.75, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": 0.833325, + "relative_se_rank": 0.8351063829787234, + "normalized_reciprocal_se_rank": 0.21147407884996539, + "reciprocal_se_rank": 0.06052411117996741, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.14893617021276595, + "normalized_reciprocal_se_rank": 0.3757575757575758, + "reciprocal_se_rank": 0.1, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 3.8, + "authority": 4.4, + "avg_ge_freq": 0.73334, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.2, + "normalized_reciprocal_se_rank": 0.16896235078053262, + "reciprocal_se_rank": 0.05030891438658429, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gensee", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.254464285714286, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.25, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 3.8, + "objectivity_tone": 4.2, + "layout_ad_density": 4.4, + "accountability": 3.2, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 84.21052631578947, + "semantic_relevance": 4.5, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4875, + "normalized_reciprocal_se_rank": 0.07438016528925621, + "reciprocal_se_rank": 0.02758164165931156, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "tavily", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 84.21052631578945, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 3.8, + "objectivity_tone": 4.2, + "layout_ad_density": 5.0, + "accountability": 3.6, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5414634146341464, + "normalized_reciprocal_se_rank": 0.5849831649831649, + "reciprocal_se_rank": 0.15027508090614888, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 84.0, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 3.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.7234042553191489, + "normalized_reciprocal_se_rank": 0.09595959595959597, + "reciprocal_se_rank": 0.032766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 84.0, + "semantic_relevance": 4.8, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 4.6, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.10434782608695652, + "normalized_reciprocal_se_rank": 0.5247170113836781, + "reciprocal_se_rank": 0.13579365079365077, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 84.0, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.66668, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 84.0, + "semantic_relevance": 1.8, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.6, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.5317073170731708, + "normalized_reciprocal_se_rank": 0.16676656676656676, + "reciprocal_se_rank": 0.049781286674490555, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 83.99999999999997, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 4.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 3.8, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.8, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 83.94736842105263, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.28125, + "weighted_total_content_score": 83.94736842105263, + "semantic_relevance": 3.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 2.75, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 83.94736842105263, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.75, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3095238095238095, + "normalized_reciprocal_se_rank": 0.2871900826446281, + "reciprocal_se_rank": 0.0787180052956752, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-4o", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 83.94736842105263, + "semantic_relevance": 4.75, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.25, + "accountability": 3.25, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 1.8641304347826086, + "normalized_reciprocal_se_rank": 0.012516469038208168, + "reciprocal_se_rank": 0.012716336006753905, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "tavily", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 83.94736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 2.75, + "accountability": 4.75, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6888888888888889, + "normalized_reciprocal_se_rank": 0.13852813852813853, + "reciprocal_se_rank": 0.04299583911234396, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "exa", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.208333333333333, + "weighted_total_content_score": 83.85964912280701, + "semantic_relevance": 3.0, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 2.6666666666666665, + "accountability": 3.6666666666666665, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.166666666666667, + "weighted_total_content_score": 83.85964912280701, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.333333333333333, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.6666666666666665, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 83.78947368421053, + "semantic_relevance": 4.2, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.2, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 4.6, + "layout_ad_density": 3.2, + "accountability": 4.6, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 4.0, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.097560975609756, + "normalized_reciprocal_se_rank": 0.01714110805019896, + "reciprocal_se_rank": 0.013827596351868195, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 3.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.5170731707317073, + "normalized_reciprocal_se_rank": 0.21966329966329967, + "reciprocal_se_rank": 0.062491909385113266, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.6, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.63, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 3.5, + "factual_accuracy": 5.0, + "freshness": 4.4, + "objectivity_tone": 5.0, + "layout_ad_density": 4.4, + "accountability": 4.8, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 4.0, + "objectivity_tone": 4.2, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 83.78947368421052, + "semantic_relevance": 3.4, + "factual_accuracy": 4.6, + "freshness": 2.8, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 4.6, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3617021276595744, + "normalized_reciprocal_se_rank": 0.18993746993746996, + "reciprocal_se_rank": 0.05534905224225613, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-4o", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.09375, + "weighted_total_content_score": 83.6842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 4.25, + "authority": 5.0, + "avg_ge_freq": 0.583325, + "relative_se_rank": 0.6170212765957447, + "normalized_reciprocal_se_rank": 0.4284205693296603, + "reciprocal_se_rank": 0.11265445719329215, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "claude", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 83.68421052631578, + "semantic_relevance": 4.75, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.25, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 83.68421052631578, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.5, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.625, + "normalized_reciprocal_se_rank": 0.1186552239183818, + "reciprocal_se_rank": 0.03822055137844611, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 83.68421052631578, + "semantic_relevance": 4.5, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 2.75, + "accountability": 4.5, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 1.7790697674418605, + "normalized_reciprocal_se_rank": 0.10549943883277216, + "reciprocal_se_rank": 0.035059331175836025, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gensee", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.428571428571429, + "weighted_total_content_score": 83.68421052631578, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 4.25, + "objectivity_tone": 4.5, + "layout_ad_density": 2.0, + "accountability": 5.0, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 1.6063829787234043, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "google-search", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 83.68421052631578, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.25, + "transparency": 3.75, + "authority": 4.25, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 83.68421052631578, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 83.57894736842105, + "semantic_relevance": 4.4, + "factual_accuracy": 4.4, + "freshness": 3.4, + "objectivity_tone": 3.4, + "layout_ad_density": 4.0, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 83.57894736842104, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 3.6, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.8, + "relative_se_rank": 1.2833333333333337, + "normalized_reciprocal_se_rank": 0.24278338945005612, + "reciprocal_se_rank": 0.06804746494066882, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.225, + "weighted_total_content_score": 83.57894736842104, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.4, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.4, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.008695652173913, + "normalized_reciprocal_se_rank": 0.3165795889933821, + "reciprocal_se_rank": 0.085780046869769, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-4o", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 83.57894736842104, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 4.2, + "accountability": 3.6, + "transparency": 3.8, + "authority": 4.2, + "avg_ge_freq": 0.80002, + "relative_se_rank": 1.4217391304347826, + "normalized_reciprocal_se_rank": 0.22061999303378613, + "reciprocal_se_rank": 0.06272179444258454, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 4.75, + "objectivity_tone": 3.75, + "layout_ad_density": 2.25, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 83.36842105263158, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 4.8, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 3.6, + "transparency": 3.4, + "authority": 3.6, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.36842105263158, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 4.6, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.9904761904761905, + "normalized_reciprocal_se_rank": 0.031553631553631556, + "reciprocal_se_rank": 0.01729079981507166, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.36842105263158, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 3.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 83.36842105263158, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 4.6, + "objectivity_tone": 3.6, + "layout_ad_density": 3.8, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-5", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.6, + "weighted_total_content_score": 83.36842105263158, + "semantic_relevance": 4.75, + "factual_accuracy": 5.0, + "freshness": 4.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.0, + "accountability": 4.8, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.36842105263158, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 2.2, + "accountability": 4.6, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8930232558139537, + "normalized_reciprocal_se_rank": 0.07515151515151516, + "reciprocal_se_rank": 0.027766990291262134, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.36842105263158, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.4, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.8790697674418606, + "normalized_reciprocal_se_rank": 0.11082251082251082, + "reciprocal_se_rank": 0.0363384188626907, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 77, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 83.15789473684211, + "semantic_relevance": 3.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.0, + "accountability": 4.8, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 83.15789473684211, + "semantic_relevance": 4.0, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.4, + "accountability": 4.4, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.6487804878048782, + "normalized_reciprocal_se_rank": 0.06802740044119356, + "reciprocal_se_rank": 0.026055127775917862, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.15789473684211, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.73334, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 83.15789473684211, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.272727272727273, + "normalized_reciprocal_se_rank": 0.11851851851851852, + "reciprocal_se_rank": 0.038187702265372166, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "google-search", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 83.15789473684211, + "semantic_relevance": 3.2, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 83.15789473684211, + "semantic_relevance": 3.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 2.5, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3375, + "normalized_reciprocal_se_rank": 0.1878787878787879, + "reciprocal_se_rank": 0.054854368932038836, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "tavily", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 83.15789473684211, + "semantic_relevance": 3.75, + "factual_accuracy": 4.0, + "freshness": 4.5, + "objectivity_tone": 3.75, + "layout_ad_density": 4.25, + "accountability": 4.75, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.90625, + "normalized_reciprocal_se_rank": 0.11994949494949496, + "reciprocal_se_rank": 0.03853155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "claude", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 83.1578947368421, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 2.0, + "authority": 2.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.0625, + "weighted_total_content_score": 83.1578947368421, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 0.75, + "relative_se_rank": 0.6276595744680851, + "normalized_reciprocal_se_rank": 0.3417202326293235, + "reciprocal_se_rank": 0.0918211238599588, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "google-search", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.166666666666667, + "weighted_total_content_score": 83.1578947368421, + "semantic_relevance": 4.0, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 2.6666666666666665, + "accountability": 4.0, + "transparency": 4.333333333333333, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 83.1578947368421, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 83.1578947368421, + "semantic_relevance": 1.5, + "factual_accuracy": 5.0, + "freshness": 4.25, + "objectivity_tone": 5.0, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.5833499999999999, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 83.1578947368421, + "semantic_relevance": 4.5, + "factual_accuracy": 4.0, + "freshness": 2.5, + "objectivity_tone": 3.5, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.94736842105263, + "semantic_relevance": 4.4, + "factual_accuracy": 4.8, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 3.4, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.774468085106383, + "normalized_reciprocal_se_rank": 0.03353535353535354, + "reciprocal_se_rank": 0.017766990291262132, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 82.94736842105263, + "semantic_relevance": 4.2, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.2, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.73334, + "relative_se_rank": 1.7565217391304349, + "normalized_reciprocal_se_rank": 0.11082251082251082, + "reciprocal_se_rank": 0.0363384188626907, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.94736842105263, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.078048780487805, + "normalized_reciprocal_se_rank": 0.23239057239057243, + "reciprocal_se_rank": 0.06555016181229774, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.94736842105263, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 4.0, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.0863636363636364, + "normalized_reciprocal_se_rank": 0.18216154216154218, + "reciprocal_se_rank": 0.053480564742700666, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "tavily", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 82.94736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0181818181818183, + "normalized_reciprocal_se_rank": 0.20792854732248672, + "reciprocal_se_rank": 0.05967215093428686, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "tavily", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.94736842105263, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.12000000000000002, + "normalized_reciprocal_se_rank": 0.5887926887926888, + "reciprocal_se_rank": 0.15119047619047618, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 82.89473684210526, + "semantic_relevance": 4.0, + "factual_accuracy": 4.25, + "freshness": 3.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.75, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.833325, + "relative_se_rank": 0.7111111111111111, + "normalized_reciprocal_se_rank": 0.32910927456382005, + "reciprocal_se_rank": 0.0887908208296558, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "google-search", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.09375, + "weighted_total_content_score": 82.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 4.25, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 3.5, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.89473684210525, + "semantic_relevance": 4.25, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.75, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.208333333333333, + "weighted_total_content_score": 82.80701754385966, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.666666666666667, + "transparency": 4.0, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7, + "normalized_reciprocal_se_rank": 0.1847041847041847, + "reciprocal_se_rank": 0.05409153952843273, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "claude", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.041666666666667, + "weighted_total_content_score": 82.80701754385963, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 1.3333333333333333, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 0.3049645390070922, + "normalized_reciprocal_se_rank": 0.26917463984912665, + "reciprocal_se_rank": 0.07438905180840664, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 3.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 3.8, + "factual_accuracy": 4.8, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.575, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.75, + "freshness": 4.4, + "objectivity_tone": 4.5, + "layout_ad_density": 4.2, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.4382978723404256, + "normalized_reciprocal_se_rank": 0.14446216551479707, + "reciprocal_se_rank": 0.04442173394651677, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 96, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 3.6, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 3.0, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.8, + "relative_se_rank": 0.9727272727272729, + "normalized_reciprocal_se_rank": 0.32949494949494956, + "reciprocal_se_rank": 0.08888349514563107, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 4.4, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.66666, + "relative_se_rank": 2.078048780487805, + "normalized_reciprocal_se_rank": 0.020619993033786137, + "reciprocal_se_rank": 0.014663542015400067, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 4.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.6, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.4, + "authority": 3.4, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.4375000000000002, + "normalized_reciprocal_se_rank": 0.05365497076023392, + "reciprocal_se_rank": 0.022601558507920283, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "tavily", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.73684210526315, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 4.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 3.6, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4325581395348839, + "normalized_reciprocal_se_rank": 0.24278338945005612, + "reciprocal_se_rank": 0.06804746494066882, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "tavily", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.15625, + "weighted_total_content_score": 82.63157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 3.75, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.5, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0572916666666667, + "normalized_reciprocal_se_rank": 0.44797979797979803, + "reciprocal_se_rank": 0.11735436893203884, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.1875, + "weighted_total_content_score": 82.63157894736841, + "semantic_relevance": 3.5, + "factual_accuracy": 4.0, + "freshness": 4.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 0.66665, + "relative_se_rank": 0.5957446808510638, + "normalized_reciprocal_se_rank": 0.14258901067411706, + "reciprocal_se_rank": 0.04397163120567376, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.75, + "accountability": 4.75, + "transparency": 3.75, + "authority": 4.25, + "avg_ge_freq": 0.583325, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.63157894736841, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 3.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7127659574468086, + "normalized_reciprocal_se_rank": 0.03151515151515152, + "reciprocal_se_rank": 0.01728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.63157894736841, + "semantic_relevance": 4.25, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.0, + "accountability": 4.25, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 0.6667, + "relative_se_rank": 1.6777777777777778, + "normalized_reciprocal_se_rank": 0.197979797979798, + "reciprocal_se_rank": 0.05728155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 82.52631578947368, + "semantic_relevance": 3.6, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.53332, + "relative_se_rank": 5.346666666666667, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 3.2, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.73334, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.52631578947368, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 3.6, + "objectivity_tone": 3.6, + "layout_ad_density": 4.8, + "accountability": 3.8, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 82.52631578947367, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 1.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 3.4, + "transparency": 4.2, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.2127659574468085, + "normalized_reciprocal_se_rank": 0.44590433497569365, + "reciprocal_se_rank": 0.11685565330726622, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.45614035087719, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 4.0, + "accountability": 4.333333333333333, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 0.7851851851851852, + "normalized_reciprocal_se_rank": 0.44867724867724873, + "reciprocal_se_rank": 0.11752196024040684, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 82.3157894736842, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 3.0, + "objectivity_tone": 3.6, + "layout_ad_density": 4.0, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2625000000000002, + "normalized_reciprocal_se_rank": 0.3583838383838384, + "reciprocal_se_rank": 0.0958252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 82.3157894736842, + "semantic_relevance": 3.8, + "factual_accuracy": 4.0, + "freshness": 3.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 4.8, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.73332, + "relative_se_rank": 1.297872340425532, + "normalized_reciprocal_se_rank": 0.28902356902356907, + "reciprocal_se_rank": 0.07915857605177994, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 4.2, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4500000000000004, + "normalized_reciprocal_se_rank": 0.1663654084706716, + "reciprocal_se_rank": 0.04968489184125362, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 82.3157894736842, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 3.4, + "objectivity_tone": 4.2, + "layout_ad_density": 3.25, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 4.4, + "freshness": 4.6, + "objectivity_tone": 4.2, + "layout_ad_density": 3.8, + "accountability": 3.4, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 82.3157894736842, + "semantic_relevance": 1.2, + "factual_accuracy": 4.6, + "freshness": 3.8, + "objectivity_tone": 4.4, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.80002, + "relative_se_rank": 2.0, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 82.3157894736842, + "semantic_relevance": 3.4, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 3.0, + "layout_ad_density": 4.6, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 0.8, + "relative_se_rank": 1.872727272727273, + "normalized_reciprocal_se_rank": 0.04740740740740741, + "reciprocal_se_rank": 0.021100323624595466, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.10526315789474, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.083333333333333, + "weighted_total_content_score": 82.10526315789474, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.3333333333333335, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.5968992248062015, + "normalized_reciprocal_se_rank": 0.14066591844369622, + "reciprocal_se_rank": 0.04350952894642215, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 82.10526315789474, + "semantic_relevance": 4.2, + "factual_accuracy": 4.8, + "freshness": 2.6, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.4, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 82.10526315789474, + "semantic_relevance": 5.0, + "factual_accuracy": 4.6, + "freshness": 4.8, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 4.2, + "transparency": 3.6, + "authority": 4.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 2.2, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 4.0, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 32, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 4.0, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.25, + "accountability": 4.5, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 1.7000000000000002, + "normalized_reciprocal_se_rank": 0.10549943883277216, + "reciprocal_se_rank": 0.035059331175836025, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 3.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 4.6, + "freshness": 3.4, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.19166666666666668, + "normalized_reciprocal_se_rank": 0.5488279621612955, + "reciprocal_se_rank": 0.14158730158730157, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.083333333333333, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 4.0, + "factual_accuracy": 4.333333333333333, + "freshness": 3.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 4.333333333333333, + "accountability": 3.6666666666666665, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8148148148148149, + "normalized_reciprocal_se_rank": 0.3429854096520763, + "reciprocal_se_rank": 0.09212513484358144, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "google-search", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 4.8, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 4.8, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 4.2, + "transparency": 3.6, + "authority": 4.0, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.9411764705882355, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.04878048780487805, + "normalized_reciprocal_se_rank": 0.791919191919192, + "reciprocal_se_rank": 0.2, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.9476190476190478, + "normalized_reciprocal_se_rank": 0.06127946127946128, + "reciprocal_se_rank": 0.024433656957928797, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 3.4, + "factual_accuracy": 3.2, + "freshness": 4.6, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 5.0, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.09375, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 4.25, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.5, + "accountability": 3.75, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6861702127659575, + "normalized_reciprocal_se_rank": 0.041919191919191925, + "reciprocal_se_rank": 0.019781553398058253, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 82.10526315789473, + "semantic_relevance": 3.2, + "factual_accuracy": 4.4, + "freshness": 4.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 4.0, + "transparency": 4.4, + "authority": 5.0, + "avg_ge_freq": 0.59998, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.89473684210527, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 3.6, + "objectivity_tone": 4.2, + "layout_ad_density": 4.6, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 81.89473684210526, + "semantic_relevance": 3.0, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.6, + "accountability": 4.8, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.89473684210526, + "semantic_relevance": 3.6, + "factual_accuracy": 4.6, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.89473684210526, + "semantic_relevance": 4.0, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 4.6, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.60002, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.89473684210526, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 4.2, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7866666666666666, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 81.89473684210526, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 1.6, + "objectivity_tone": 4.4, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9095238095238094, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.2, + "accountability": 4.6, + "transparency": 3.6, + "authority": 4.2, + "avg_ge_freq": 0.6667, + "relative_se_rank": 1.4465116279069767, + "normalized_reciprocal_se_rank": 0.21966329966329967, + "reciprocal_se_rank": 0.062491909385113266, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 81.89473684210525, + "semantic_relevance": 3.8, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.8, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.9411764705882355, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.89473684210525, + "semantic_relevance": 3.8, + "factual_accuracy": 4.8, + "freshness": 4.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.89473684210525, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.2, + "accountability": 4.6, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.89473684210525, + "semantic_relevance": 4.6, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5434782608695652, + "normalized_reciprocal_se_rank": 0.3732691999358666, + "reciprocal_se_rank": 0.099402065033133, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "gpt-5", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.89473684210525, + "semantic_relevance": 4.2, + "factual_accuracy": 4.2, + "freshness": 3.4, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 81.89473684210525, + "semantic_relevance": 4.2, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 3.4, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.11020408163265305, + "normalized_reciprocal_se_rank": 0.5693057559724226, + "reciprocal_se_rank": 0.1465079365079365, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.09375, + "weighted_total_content_score": 81.84210526315789, + "semantic_relevance": 3.75, + "factual_accuracy": 4.5, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.75, + "avg_ge_freq": 0.666675, + "relative_se_rank": 2.0337837837837838, + "normalized_reciprocal_se_rank": 0.25, + "reciprocal_se_rank": 0.06978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.041666666666667, + "weighted_total_content_score": 81.75438596491227, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.6666666666666665, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": 0.8889, + "relative_se_rank": 0.9481481481481482, + "normalized_reciprocal_se_rank": 0.19981869981869982, + "reciprocal_se_rank": 0.05772342544187204, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "claude", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.083333333333333, + "weighted_total_content_score": 81.75438596491227, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7101449275362317, + "normalized_reciprocal_se_rank": 0.02210135543468877, + "reciprocal_se_rank": 0.015019500456393659, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.083333333333333, + "weighted_total_content_score": 81.75438596491227, + "semantic_relevance": 5.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 4.333333333333333, + "transparency": 4.0, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6422764227642277, + "normalized_reciprocal_se_rank": 0.263973063973064, + "reciprocal_se_rank": 0.07313915857605179, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gpt-4o", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.041666666666667, + "weighted_total_content_score": 81.75438596491227, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.666666666666667, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 3.6666666666666665, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.7235772357723576, + "normalized_reciprocal_se_rank": 0.07901234567901234, + "reciprocal_se_rank": 0.028694714131607332, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 81.6842105263158, + "semantic_relevance": 3.8, + "factual_accuracy": 4.0, + "freshness": 4.4, + "objectivity_tone": 3.4, + "layout_ad_density": 4.4, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.025, + "normalized_reciprocal_se_rank": 0.09595959595959597, + "reciprocal_se_rank": 0.032766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 81.6842105263158, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 3.8, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.59998, + "relative_se_rank": 1.9333333333333331, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "claude", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.6842105263158, + "semantic_relevance": 4.4, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.4, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4428571428571426, + "normalized_reciprocal_se_rank": 0.3583838383838384, + "reciprocal_se_rank": 0.0958252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 81.6842105263158, + "semantic_relevance": 4.2, + "factual_accuracy": 5.0, + "freshness": 2.2, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.15, + "weighted_total_content_score": 81.6842105263158, + "semantic_relevance": 2.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.19591836734693877, + "normalized_reciprocal_se_rank": 0.3506484094203392, + "reciprocal_se_rank": 0.09396648672964461, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.68421052631578, + "semantic_relevance": 3.8, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 81.68421052631578, + "semantic_relevance": 3.2, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.6, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8363636363636366, + "normalized_reciprocal_se_rank": 0.11082251082251082, + "reciprocal_se_rank": 0.0363384188626907, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.47368421052632, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 4.8, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.47368421052632, + "semantic_relevance": 3.8, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.7434782608695651, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.47368421052632, + "semantic_relevance": 3.4, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 81.47368421052632, + "semantic_relevance": 3.6, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 4.6, + "layout_ad_density": 3.8, + "accountability": 3.6, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.7183673469387752, + "normalized_reciprocal_se_rank": 0.026599326599326595, + "reciprocal_se_rank": 0.016100323624595465, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 81.47368421052632, + "semantic_relevance": 3.8, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 2.8, + "layout_ad_density": 2.4, + "accountability": 4.4, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 81.47368421052632, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 2.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.80002, + "relative_se_rank": 1.7234042553191489, + "normalized_reciprocal_se_rank": 0.09595959595959597, + "reciprocal_se_rank": 0.032766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "tavily", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.47368421052632, + "semantic_relevance": 3.4, + "factual_accuracy": 4.6, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.80002, + "relative_se_rank": 1.9285714285714284, + "normalized_reciprocal_se_rank": 0.09595959595959597, + "reciprocal_se_rank": 0.032766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 81.4736842105263, + "semantic_relevance": 4.8, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.8837209302325584, + "normalized_reciprocal_se_rank": 0.09595959595959597, + "reciprocal_se_rank": 0.032766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 81.40350877192982, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.0, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 1.73015873015873, + "normalized_reciprocal_se_rank": 0.05258938592271926, + "reciprocal_se_rank": 0.02234550778240098, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gensee", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.09375, + "weighted_total_content_score": 81.3157894736842, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 3.75, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 4.75, + "authority": 4.25, + "avg_ge_freq": 0.583325, + "relative_se_rank": 1.25, + "normalized_reciprocal_se_rank": 0.3885281385281385, + "reciprocal_se_rank": 0.10306865464632455, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 81.26315789473684, + "semantic_relevance": 3.6, + "factual_accuracy": 4.2, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 2.1675675675675676, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.235714285714286, + "weighted_total_content_score": 81.26315789473684, + "semantic_relevance": 1.8, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.75, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.66668, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 81.26315789473684, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.8, + "accountability": 3.2, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 81.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 4.4, + "freshness": 3.6, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 2.0, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 4.2, + "accountability": 4.0, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 3.8, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0904761904761906, + "normalized_reciprocal_se_rank": 0.011736411736411736, + "reciprocal_se_rank": 0.012528895053166896, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 3.6, + "authority": 3.4, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.06666666666666667, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 3.0, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 2.6, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.073469387755102, + "normalized_reciprocal_se_rank": 0.17599360533291103, + "reciprocal_se_rank": 0.051998463417374255, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gpt-5", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.0625, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 3.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 3.5, + "transparency": 4.75, + "authority": 4.25, + "avg_ge_freq": 0.916675, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.0625, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.5, + "accountability": 4.5, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06666666666666667, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.03125, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 4.5, + "factual_accuracy": 4.25, + "freshness": 4.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.75, + "accountability": 4.25, + "transparency": 4.0, + "authority": 3.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.545918367346939, + "normalized_reciprocal_se_rank": 0.16329966329966328, + "reciprocal_se_rank": 0.048948220064724914, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 4.0625, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 1.2625, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "tavily", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 81.05263157894737, + "semantic_relevance": 4.6, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 4.4, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1400000000000001, + "normalized_reciprocal_se_rank": 0.29733700642791555, + "reciprocal_se_rank": 0.08115622241835835, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 81.05263157894736, + "semantic_relevance": 2.6, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.2, + "accountability": 4.8, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.128571428571428, + "weighted_total_content_score": 81.05263157894736, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 3.75, + "accountability": 3.8, + "transparency": 4.6, + "authority": 4.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.5217391304347827, + "normalized_reciprocal_se_rank": 0.11972789115646258, + "reciprocal_se_rank": 0.038478303942936394, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 81.05263157894736, + "semantic_relevance": 4.0, + "factual_accuracy": 4.333333333333333, + "freshness": 3.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.333333333333333, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.6666666666666665, + "avg_ge_freq": 0.5555333333333333, + "relative_se_rank": 1.8108108108108107, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 4.041666666666667, + "weighted_total_content_score": 81.05263157894736, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 4.666666666666667, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.666666666666667, + "accountability": 4.0, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 81.05263157894736, + "semantic_relevance": 3.4, + "factual_accuracy": 4.4, + "freshness": 3.0, + "objectivity_tone": 3.6, + "layout_ad_density": 4.8, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 80.84210526315789, + "semantic_relevance": 2.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 5.0, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.26122448979591834, + "normalized_reciprocal_se_rank": 0.29146097956624273, + "reciprocal_se_rank": 0.0797442645074224, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 80.84210526315789, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.6, + "accountability": 3.6, + "transparency": 4.4, + "authority": 3.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.5155555555555555, + "normalized_reciprocal_se_rank": 0.060542681719152315, + "reciprocal_se_rank": 0.024256615267466208, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.84210526315789, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 3.2, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.6667, + "relative_se_rank": 0.1625, + "normalized_reciprocal_se_rank": 0.5742258933063531, + "reciprocal_se_rank": 0.1476902025177887, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.09375, + "weighted_total_content_score": 80.78947368421052, + "semantic_relevance": 2.5, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.75, + "transparency": 4.5, + "authority": 4.25, + "avg_ge_freq": 0.75, + "relative_se_rank": 1.1020408163265305, + "normalized_reciprocal_se_rank": 0.2182239057239057, + "reciprocal_se_rank": 0.0621460355987055, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gensee", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.9583333333333335, + "weighted_total_content_score": 80.70175438596492, + "semantic_relevance": 5.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.7588652482269502, + "normalized_reciprocal_se_rank": 0.013732092163464714, + "reciprocal_se_rank": 0.013008439621803413, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "tavily", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 80.70175438596492, + "semantic_relevance": 2.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.333333333333333, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7000000000000001, + "normalized_reciprocal_se_rank": 0.518037518037518, + "reciprocal_se_rank": 0.1341886269070735, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "gpt-4o", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 80.63157894736842, + "semantic_relevance": 3.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 3.4, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.8044444444444445, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 4.4, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 2.8, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 5.0, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.8954545454545457, + "normalized_reciprocal_se_rank": 0.03353535353535354, + "reciprocal_se_rank": 0.017766990291262136, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 3.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.4232558139534883, + "normalized_reciprocal_se_rank": 0.2692063492063492, + "reciprocal_se_rank": 0.07439667128987518, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 3.8, + "transparency": 3.6, + "authority": 3.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.4978723404255319, + "normalized_reciprocal_se_rank": 0.11906176612058965, + "reciprocal_se_rank": 0.03831823991732615, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 2.0, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.8, + "layout_ad_density": 3.4, + "accountability": 3.8, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3954545454545457, + "normalized_reciprocal_se_rank": 0.24146224146224143, + "reciprocal_se_rank": 0.06773000462320851, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 4.6, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 3.8, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 3.2, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 4.2, + "accountability": 3.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.872727272727273, + "normalized_reciprocal_se_rank": 0.04740740740740741, + "reciprocal_se_rank": 0.021100323624595466, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 4.8, + "factual_accuracy": 3.8, + "freshness": 4.6, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.4, + "transparency": 3.6, + "authority": 3.8, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.6090909090909093, + "normalized_reciprocal_se_rank": 0.05782267115600449, + "reciprocal_se_rank": 0.023603020496224376, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 3.6, + "objectivity_tone": 3.6, + "layout_ad_density": 3.8, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.52631578947368, + "semantic_relevance": 3.75, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.25, + "accountability": 3.25, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 75, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.52631578947368, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.3404255319148936, + "normalized_reciprocal_se_rank": 0.1842286501377411, + "reciprocal_se_rank": 0.05397727272727273, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.03125, + "weighted_total_content_score": 80.52631578947368, + "semantic_relevance": 3.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.75, + "accountability": 2.5, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.03125, + "weighted_total_content_score": 80.52631578947368, + "semantic_relevance": 3.25, + "factual_accuracy": 4.5, + "freshness": 4.25, + "objectivity_tone": 4.25, + "layout_ad_density": 4.25, + "accountability": 2.75, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.21875, + "weighted_total_content_score": 80.52631578947368, + "semantic_relevance": 3.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.75, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.024390243902439, + "normalized_reciprocal_se_rank": 0.019624819624819625, + "reciprocal_se_rank": 0.014424410540915394, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-4o", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.52631578947368, + "semantic_relevance": 5.0, + "factual_accuracy": 4.25, + "freshness": 4.5, + "objectivity_tone": 3.25, + "layout_ad_density": 3.0, + "accountability": 4.5, + "transparency": 4.0, + "authority": 3.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.42105263157893, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 4.2, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 4.4, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.4978723404255319, + "normalized_reciprocal_se_rank": 0.11906176612058965, + "reciprocal_se_rank": 0.03831823991732615, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.9583333333333335, + "weighted_total_content_score": 80.35087719298245, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 3.0, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.5813953488372094, + "normalized_reciprocal_se_rank": 0.1847041847041847, + "reciprocal_se_rank": 0.05409153952843273, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gpt-4o", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.9583333333333335, + "weighted_total_content_score": 80.35087719298245, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 4.0, + "transparency": 3.6666666666666665, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.9166666666666665, + "weighted_total_content_score": 80.35087719298245, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 2.0, + "accountability": 2.6666666666666665, + "transparency": 3.6666666666666665, + "authority": 4.333333333333333, + "avg_ge_freq": 0.7778, + "relative_se_rank": 1.6422764227642277, + "normalized_reciprocal_se_rank": 0.263973063973064, + "reciprocal_se_rank": 0.07313915857605179, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.26315789473685, + "semantic_relevance": 3.75, + "factual_accuracy": 4.25, + "freshness": 3.0, + "objectivity_tone": 4.25, + "layout_ad_density": 3.5, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.49997499999999995, + "relative_se_rank": 1.3841463414634145, + "normalized_reciprocal_se_rank": 0.11184926184926186, + "reciprocal_se_rank": 0.03658513816280806, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.0625, + "weighted_total_content_score": 80.26315789473685, + "semantic_relevance": 3.0, + "factual_accuracy": 4.25, + "freshness": 4.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0337837837837838, + "normalized_reciprocal_se_rank": 0.25, + "reciprocal_se_rank": 0.06978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 80.21052631578947, + "semantic_relevance": 4.8, + "factual_accuracy": 4.8, + "freshness": 2.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 3.8, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.076595744680851, + "normalized_reciprocal_se_rank": 0.17082661237699998, + "reciprocal_se_rank": 0.05075688015855097, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 80.21052631578947, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 4.8, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.6, + "avg_ge_freq": 0.66668, + "relative_se_rank": 0.47111111111111115, + "normalized_reciprocal_se_rank": 0.3087696286536866, + "reciprocal_se_rank": 0.08390338164251207, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 80.21052631578947, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 4.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 3.4, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.888372093023256, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 80.21052631578945, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.8, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 3.8, + "authority": 3.0, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 2.048780487804878, + "normalized_reciprocal_se_rank": 0.028107158541941152, + "reciprocal_se_rank": 0.016462642465175176, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.03125, + "weighted_total_content_score": 80.0, + "semantic_relevance": 3.5, + "factual_accuracy": 4.25, + "freshness": 4.25, + "objectivity_tone": 3.75, + "layout_ad_density": 4.25, + "accountability": 4.25, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.03125, + "weighted_total_content_score": 80.0, + "semantic_relevance": 3.0, + "factual_accuracy": 4.75, + "freshness": 4.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 0.66665, + "relative_se_rank": 1.7916666666666665, + "normalized_reciprocal_se_rank": 0.25, + "reciprocal_se_rank": 0.06978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gensee", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 80.0, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.25, + "normalized_reciprocal_se_rank": 0.09539842873176206, + "reciprocal_se_rank": 0.03263214670981661, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-4o", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.0, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 2.0, + "transparency": 3.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 4.125, + "weighted_total_content_score": 80.0, + "semantic_relevance": 1.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 80.0, + "semantic_relevance": 4.2, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.6, + "accountability": 3.6, + "transparency": 4.8, + "authority": 4.0, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.05, + "weighted_total_content_score": 79.99999999999999, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 4.4, + "objectivity_tone": 3.4, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.02, + "normalized_reciprocal_se_rank": 0.11082251082251082, + "reciprocal_se_rank": 0.0363384188626907, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "claude", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 79.99999999999999, + "semantic_relevance": 4.6, + "factual_accuracy": 4.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 2.2, + "accountability": 2.2, + "transparency": 3.4, + "authority": 4.2, + "avg_ge_freq": 0.93334, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.78947368421053, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 4.8, + "objectivity_tone": 3.4, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.86668, + "relative_se_rank": 1.6975609756097563, + "normalized_reciprocal_se_rank": 0.073015873015873, + "reciprocal_se_rank": 0.02725381414701803, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.78947368421052, + "semantic_relevance": 3.6, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 3.8, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 64, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.78947368421052, + "semantic_relevance": 3.6, + "factual_accuracy": 4.0, + "freshness": 3.2, + "objectivity_tone": 4.2, + "layout_ad_density": 3.6, + "accountability": 4.4, + "transparency": 4.8, + "authority": 4.2, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.29, + "weighted_total_content_score": 79.78947368421052, + "semantic_relevance": 4.75, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 4.0, + "accountability": 4.4, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.73336, + "relative_se_rank": 1.525, + "normalized_reciprocal_se_rank": 0.3108225108225108, + "reciprocal_se_rank": 0.08439667128987517, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 79.78947368421052, + "semantic_relevance": 3.0, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 4.2, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 5.346666666666667, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 79.78947368421052, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 4.2, + "transparency": 3.6, + "authority": 4.2, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 79.78947368421052, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 4.2, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.774468085106383, + "normalized_reciprocal_se_rank": 0.03353535353535354, + "reciprocal_se_rank": 0.017766990291262132, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 79.78947368421052, + "semantic_relevance": 4.0, + "factual_accuracy": 4.6, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 4.4, + "accountability": 3.4, + "transparency": 3.4, + "authority": 4.2, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.5590909090909093, + "normalized_reciprocal_se_rank": 0.0814073751872795, + "reciprocal_se_rank": 0.029270218795001624, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.7894736842105, + "semantic_relevance": 4.2, + "factual_accuracy": 3.8, + "freshness": 4.6, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 79.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 3.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.0, + "accountability": 4.5, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 0.75, + "relative_se_rank": 0.5520833333333334, + "normalized_reciprocal_se_rank": 0.6112794612794613, + "reciprocal_se_rank": 0.1565938511326861, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "tavily", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.0625, + "weighted_total_content_score": 79.73684210526315, + "semantic_relevance": 2.75, + "factual_accuracy": 3.75, + "freshness": 4.25, + "objectivity_tone": 4.25, + "layout_ad_density": 3.75, + "accountability": 4.25, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6489361702127658, + "normalized_reciprocal_se_rank": 0.06993006993006994, + "reciprocal_se_rank": 0.026512322628827484, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.041666666666667, + "weighted_total_content_score": 79.64912280701755, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.0, + "transparency": 4.666666666666667, + "authority": 4.333333333333333, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.9583333333333335, + "weighted_total_content_score": 79.64912280701753, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 2.3333333333333335, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.64912280701753, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 4.333333333333333, + "freshness": 4.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.0, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6341463414634145, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "exa", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.9583333333333335, + "weighted_total_content_score": 79.64912280701753, + "semantic_relevance": 4.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 2.6666666666666665, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 79.57894736842105, + "semantic_relevance": 4.6, + "factual_accuracy": 4.0, + "freshness": 2.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 3.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 0.9000000000000001, + "normalized_reciprocal_se_rank": 0.3103683297622692, + "reciprocal_se_rank": 0.08428753554967147, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 79.57894736842105, + "semantic_relevance": 4.0, + "factual_accuracy": 4.4, + "freshness": 3.8, + "objectivity_tone": 3.6, + "layout_ad_density": 3.6, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.019047619047619, + "normalized_reciprocal_se_rank": 0.022745978301533857, + "reciprocal_se_rank": 0.015174397698669542, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 79.57894736842105, + "semantic_relevance": 2.4, + "factual_accuracy": 4.0, + "freshness": 3.2, + "objectivity_tone": 4.8, + "layout_ad_density": 4.4, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 79.57894736842105, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.8, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 67, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 79.57894736842104, + "semantic_relevance": 5.0, + "factual_accuracy": 4.6, + "freshness": 2.2, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gensee", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 79.57894736842104, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.5422222222222222, + "normalized_reciprocal_se_rank": 0.04777045820524082, + "reciprocal_se_rank": 0.021187561559026312, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.96875, + "weighted_total_content_score": 79.47368421052632, + "semantic_relevance": 3.25, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 3.0, + "accountability": 3.25, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.96875, + "weighted_total_content_score": 79.47368421052632, + "semantic_relevance": 3.75, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 4.5, + "authority": 4.25, + "avg_ge_freq": 0.66665, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.47368421052632, + "semantic_relevance": 2.75, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 4.5, + "accountability": 3.25, + "transparency": 3.5, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.4736842105263, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.4736842105263, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 4.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.277777777777778, + "normalized_reciprocal_se_rank": 0.09539842873176206, + "reciprocal_se_rank": 0.03263214670981661, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.36842105263159, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.8, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.8, + "relative_se_rank": 1.331914893617021, + "normalized_reciprocal_se_rank": 0.1595510662177329, + "reciprocal_se_rank": 0.04804746494066882, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "claude", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 79.36842105263159, + "semantic_relevance": 3.8, + "factual_accuracy": 4.2, + "freshness": 3.6, + "objectivity_tone": 3.8, + "layout_ad_density": 4.0, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6816326530612244, + "normalized_reciprocal_se_rank": 0.04740740740740741, + "reciprocal_se_rank": 0.021100323624595466, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-5", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 79.36842105263158, + "semantic_relevance": 4.4, + "factual_accuracy": 4.6, + "freshness": 4.0, + "objectivity_tone": 4.4, + "layout_ad_density": 4.4, + "accountability": 4.0, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.5463414634146342, + "normalized_reciprocal_se_rank": 0.23572567783094098, + "reciprocal_se_rank": 0.06635155850792028, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 5, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 79.36842105263158, + "semantic_relevance": 4.0, + "factual_accuracy": 4.8, + "freshness": 4.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 3.8, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": 0.60002, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 4.041666666666667, + "weighted_total_content_score": 79.29824561403508, + "semantic_relevance": 3.3333333333333335, + "factual_accuracy": 3.6666666666666665, + "freshness": 4.333333333333333, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 5.0, + "accountability": 4.333333333333333, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.8889, + "relative_se_rank": 1.825, + "normalized_reciprocal_se_rank": 0.04958677685950414, + "reciprocal_se_rank": 0.02162400706090026, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "exa", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 79.21052631578948, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 2.25, + "accountability": 4.0, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1170212765957448, + "normalized_reciprocal_se_rank": 0.24402757736091069, + "reciprocal_se_rank": 0.06834643242410232, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "exa", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.96875, + "weighted_total_content_score": 79.21052631578948, + "semantic_relevance": 3.5, + "factual_accuracy": 4.25, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1833333333333336, + "normalized_reciprocal_se_rank": 0.19943883277216612, + "reciprocal_se_rank": 0.05763214670981661, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 79.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 79.15789473684211, + "semantic_relevance": 3.8, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.4, + "accountability": 5.0, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.005, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 79.15789473684211, + "semantic_relevance": 4.4, + "factual_accuracy": 3.8, + "freshness": 4.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 4.2, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8454545454545457, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 79.15789473684211, + "semantic_relevance": 5.0, + "factual_accuracy": 4.2, + "freshness": 3.8, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 3.2, + "transparency": 3.4, + "authority": 3.6, + "avg_ge_freq": 0.60002, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 79.1578947368421, + "semantic_relevance": 2.8, + "factual_accuracy": 4.6, + "freshness": 3.2, + "objectivity_tone": 4.2, + "layout_ad_density": 3.4, + "accountability": 4.8, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 2.1675675675675676, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 79.1578947368421, + "semantic_relevance": 2.4, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 4.8, + "accountability": 4.0, + "transparency": 4.8, + "authority": 4.6, + "avg_ge_freq": 0.73332, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 35, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 4.2, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 4.6, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 3.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 3.6666666666666665, + "transparency": 4.666666666666667, + "authority": 4.666666666666667, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.0625, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 1.5, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.5, + "accountability": 3.5, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 3.6, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 3.6, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.06666666666666667, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 3.5, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 2.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 2.75, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 3.75, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1666666666666667, + "normalized_reciprocal_se_rank": 0.2572390572390572, + "reciprocal_se_rank": 0.0715210355987055, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "google-search", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 3.5, + "transparency": 3.0, + "authority": 3.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 3.8, + "authority": 4.4, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.822727272727273, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.96875, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 3.75, + "factual_accuracy": 3.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.6861702127659575, + "normalized_reciprocal_se_rank": 0.041919191919191925, + "reciprocal_se_rank": 0.01978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gpt-4o", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 4.2, + "accountability": 3.6, + "transparency": 3.4, + "authority": 3.2, + "avg_ge_freq": 0.80002, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 4.25, + "freshness": 4.75, + "objectivity_tone": 3.75, + "layout_ad_density": 3.25, + "accountability": 4.25, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": 0.583325, + "relative_se_rank": 1.0872093023255813, + "normalized_reciprocal_se_rank": 0.1331890331890332, + "reciprocal_se_rank": 0.0417128987517337, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.5, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.94736842105263, + "semantic_relevance": 3.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 5.0, + "accountability": 3.5, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 6, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.9166666666666665, + "weighted_total_content_score": 78.94736842105262, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6666666666666665, + "accountability": 3.6666666666666665, + "transparency": 3.6666666666666665, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 78.73684210526315, + "semantic_relevance": 2.6, + "factual_accuracy": 4.0, + "freshness": 3.6, + "objectivity_tone": 4.2, + "layout_ad_density": 4.4, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.64, + "normalized_reciprocal_se_rank": 0.05594405594405595, + "reciprocal_se_rank": 0.02315160567587752, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 78.73684210526315, + "semantic_relevance": 4.4, + "factual_accuracy": 3.6, + "freshness": 4.4, + "objectivity_tone": 3.2, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 3.9166666666666665, + "weighted_total_content_score": 78.59649122807018, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 4.333333333333333, + "freshness": 4.333333333333333, + "objectivity_tone": 3.0, + "layout_ad_density": 3.6666666666666665, + "accountability": 4.333333333333333, + "transparency": 3.6666666666666665, + "authority": 3.3333333333333335, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.1259259259259259, + "normalized_reciprocal_se_rank": 0.5342953342953344, + "reciprocal_se_rank": 0.13809523809523808, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 78.52631578947368, + "semantic_relevance": 2.6, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 78.52631578947367, + "semantic_relevance": 4.2, + "factual_accuracy": 4.0, + "freshness": 2.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 78.52631578947367, + "semantic_relevance": 3.4, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 78.52631578947367, + "semantic_relevance": 4.8, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4272727272727272, + "normalized_reciprocal_se_rank": 0.16676656676656676, + "reciprocal_se_rank": 0.049781286674490555, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-4o", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 78.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.0, + "accountability": 3.5, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.1818181818181819, + "normalized_reciprocal_se_rank": 0.27705627705627706, + "reciprocal_se_rank": 0.07628294036061026, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 78.42105263157893, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.5, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 4.5, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1521739130434783, + "normalized_reciprocal_se_rank": 0.21099887766554432, + "reciprocal_se_rank": 0.060409924487594385, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "claude", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 78.42105263157893, + "semantic_relevance": 4.0, + "factual_accuracy": 3.5, + "freshness": 2.5, + "objectivity_tone": 3.0, + "layout_ad_density": 4.5, + "accountability": 5.0, + "transparency": 5.0, + "authority": 4.5, + "avg_ge_freq": 0.83335, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 78.42105263157893, + "semantic_relevance": 2.5, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2804878048780488, + "normalized_reciprocal_se_rank": 0.3265993265993266, + "reciprocal_se_rank": 0.08818770226537216, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-5", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.42105263157893, + "semantic_relevance": 3.5, + "factual_accuracy": 3.5, + "freshness": 2.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.16665, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 78.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 3.8, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 0.990909090909091, + "normalized_reciprocal_se_rank": 0.2928330928330928, + "reciprocal_se_rank": 0.08007397133610725, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 78.3157894736842, + "semantic_relevance": 4.8, + "factual_accuracy": 4.0, + "freshness": 4.6, + "objectivity_tone": 3.2, + "layout_ad_density": 2.6, + "accountability": 3.8, + "transparency": 4.4, + "authority": 3.8, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.2875, + "normalized_reciprocal_se_rank": 0.2067821067821068, + "reciprocal_se_rank": 0.05939667128987517, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 78.3157894736842, + "semantic_relevance": 4.2, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 3.8, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.0390243902439025, + "normalized_reciprocal_se_rank": 0.031553631553631556, + "reciprocal_se_rank": 0.01729079981507166, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.55, + "weighted_total_content_score": 78.3157894736842, + "semantic_relevance": 3.5, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.6, + "authority": 5.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.0949999999999998, + "normalized_reciprocal_se_rank": 0.029752066115702486, + "reciprocal_se_rank": 0.016857899382171228, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 78.3157894736842, + "semantic_relevance": 4.6, + "factual_accuracy": 4.2, + "freshness": 4.6, + "objectivity_tone": 3.6, + "layout_ad_density": 3.0, + "accountability": 3.6, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 8, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 78.3157894736842, + "semantic_relevance": 3.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.0, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8697674418604653, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.15789473684211, + "semantic_relevance": 3.75, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 4.5, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 78.15789473684211, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 3.75, + "objectivity_tone": 3.25, + "layout_ad_density": 4.5, + "accountability": 4.25, + "transparency": 3.75, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 78.10526315789474, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.6714285714285715, + "normalized_reciprocal_se_rank": 0.07169472502805836, + "reciprocal_se_rank": 0.026936353829557714, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 2.4, + "transparency": 3.0, + "authority": 2.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.4, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 4.75, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 3.5, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.86668, + "relative_se_rank": 1.52, + "normalized_reciprocal_se_rank": 0.3306397306397306, + "reciprocal_se_rank": 0.08915857605177993, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 3.8, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 3.6, + "layout_ad_density": 3.8, + "accountability": 3.2, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 3.5, + "accountability": 3.6, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 0.6933333333333335, + "normalized_reciprocal_se_rank": 0.27141377141377143, + "reciprocal_se_rank": 0.07492709555816351, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "google-search", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 3.6, + "factual_accuracy": 4.0, + "freshness": 2.8, + "objectivity_tone": 3.4, + "layout_ad_density": 3.8, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 2.8, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 4.6, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.1288888888888888, + "normalized_reciprocal_se_rank": 0.2929652076318743, + "reciprocal_se_rank": 0.0801057173678533, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "tavily", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 3.8, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7826086956521738, + "normalized_reciprocal_se_rank": 0.05594405594405595, + "reciprocal_se_rank": 0.02315160567587752, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "tavily", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 78.10526315789473, + "semantic_relevance": 2.8, + "factual_accuracy": 3.6, + "freshness": 4.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.2, + "accountability": 4.8, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.93334, + "relative_se_rank": 0.48936170212765956, + "normalized_reciprocal_se_rank": 0.48424563091229755, + "reciprocal_se_rank": 0.12606873169979965, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "gensee", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.89473684210527, + "semantic_relevance": 4.6, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 3.8, + "authority": 3.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.8697674418604653, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 4.25, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.5, + "accountability": 2.5, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 1.2790697674418605, + "normalized_reciprocal_se_rank": 0.13986013986013987, + "reciprocal_se_rank": 0.043315907393577296, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 2.8, + "factual_accuracy": 4.4, + "freshness": 4.6, + "objectivity_tone": 4.4, + "layout_ad_density": 4.0, + "accountability": 3.2, + "transparency": 4.2, + "authority": 3.6, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.9658536585365856, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 2.5, + "factual_accuracy": 4.25, + "freshness": 4.0, + "objectivity_tone": 4.25, + "layout_ad_density": 4.0, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 0.916675, + "relative_se_rank": 1.6326530612244898, + "normalized_reciprocal_se_rank": 0.03513394817742644, + "reciprocal_se_rank": 0.018151118615449553, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gensee", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 2.0, + "transparency": 2.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.8333333333333335, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.666666666666667, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 2.6666666666666665, + "accountability": 3.6666666666666665, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 1.05, + "normalized_reciprocal_se_rank": 0.14921703810592699, + "reciprocal_se_rank": 0.04556428828273488, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "gpt-5", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 1.5, + "accountability": 4.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 1.1041666666666667, + "normalized_reciprocal_se_rank": 0.21099887766554432, + "reciprocal_se_rank": 0.060409924487594385, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.90625, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 3.75, + "factual_accuracy": 4.25, + "freshness": 4.75, + "objectivity_tone": 3.5, + "layout_ad_density": 2.75, + "accountability": 3.75, + "transparency": 4.5, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 3.5, + "factual_accuracy": 3.5, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.19791666666666666, + "normalized_reciprocal_se_rank": 0.415913521176679, + "reciprocal_se_rank": 0.10964912280701754, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "tavily", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 77.89473684210526, + "semantic_relevance": 2.0, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0510204081632653, + "normalized_reciprocal_se_rank": 0.32659932659932656, + "reciprocal_se_rank": 0.08818770226537216, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.89473684210525, + "semantic_relevance": 4.4, + "factual_accuracy": 3.8, + "freshness": 4.6, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 3.4, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.73332, + "relative_se_rank": 0.1782608695652174, + "normalized_reciprocal_se_rank": 0.40796055796055797, + "reciprocal_se_rank": 0.10773809523809523, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-4o", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.89473684210525, + "semantic_relevance": 4.6, + "factual_accuracy": 3.8, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 3.8, + "transparency": 3.6, + "authority": 3.4, + "avg_ge_freq": 0.73334, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 77.89473684210525, + "semantic_relevance": 3.4, + "factual_accuracy": 4.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4577777777777778, + "normalized_reciprocal_se_rank": 0.1198912198912199, + "reciprocal_se_rank": 0.03851755041075429, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.6842105263158, + "semantic_relevance": 4.6, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.2, + "accountability": 3.8, + "transparency": 4.6, + "authority": 3.6, + "avg_ge_freq": 0.73334, + "relative_se_rank": 1.0166666666666668, + "normalized_reciprocal_se_rank": 0.22060454165717322, + "reciprocal_se_rank": 0.06271808161179648, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "google-search", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.6842105263158, + "semantic_relevance": 4.8, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.2, + "accountability": 2.6, + "transparency": 4.2, + "authority": 3.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.6842105263158, + "semantic_relevance": 4.2, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.2, + "accountability": 4.0, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 77.6842105263158, + "semantic_relevance": 4.2, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.8, + "accountability": 4.4, + "transparency": 3.6, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5434782608695652, + "normalized_reciprocal_se_rank": 0.38615039281705954, + "reciprocal_se_rank": 0.10249730312837109, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 77.68421052631578, + "semantic_relevance": 3.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 2.4, + "accountability": 3.6, + "transparency": 4.0, + "authority": 4.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.63157894736842, + "semantic_relevance": 4.25, + "factual_accuracy": 3.75, + "freshness": 4.75, + "objectivity_tone": 3.75, + "layout_ad_density": 3.5, + "accountability": 3.75, + "transparency": 3.25, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9146341463414633, + "normalized_reciprocal_se_rank": 0.2965554060691482, + "reciprocal_se_rank": 0.08096841068166424, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.90625, + "weighted_total_content_score": 77.63157894736841, + "semantic_relevance": 3.75, + "factual_accuracy": 4.0, + "freshness": 4.25, + "objectivity_tone": 3.5, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 0.666675, + "relative_se_rank": 1.622340425531915, + "normalized_reciprocal_se_rank": 0.11994949494949496, + "reciprocal_se_rank": 0.03853155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 77.54385964912281, + "semantic_relevance": 3.0, + "factual_accuracy": 3.3333333333333335, + "freshness": 5.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 4.0, + "accountability": 4.666666666666667, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 0.5555333333333333, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 77.54385964912281, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 4.333333333333333, + "freshness": 3.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.333333333333333, + "accountability": 2.6666666666666665, + "transparency": 3.0, + "authority": 3.3333333333333335, + "avg_ge_freq": 0.4444333333333333, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 38, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 3.6, + "factual_accuracy": 4.0, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 3.2, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 3.0, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 3.4, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 3.0, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.8, + "accountability": 4.0, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.6, + "accountability": 3.6, + "transparency": 3.4, + "authority": 3.2, + "avg_ge_freq": 0.60002, + "relative_se_rank": 1.0739130434782609, + "normalized_reciprocal_se_rank": 0.22083052749719417, + "reciprocal_se_rank": 0.06277238403451996, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 3.2, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.4, + "accountability": 3.4, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.6133333333333333, + "normalized_reciprocal_se_rank": 0.044291334396597555, + "reciprocal_se_rank": 0.020351558507920284, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 3.2, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.6, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 0.8734693877551021, + "normalized_reciprocal_se_rank": 0.3166137566137566, + "reciprocal_se_rank": 0.08578825705039297, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "exa", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 4.6, + "objectivity_tone": 3.6, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3791666666666669, + "normalized_reciprocal_se_rank": 0.17631313131313134, + "reciprocal_se_rank": 0.0520752427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 3.2, + "transparency": 3.2, + "authority": 3.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 4.6, + "factual_accuracy": 4.0, + "freshness": 3.8, + "objectivity_tone": 3.4, + "layout_ad_density": 2.8, + "accountability": 4.6, + "transparency": 4.0, + "authority": 3.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 77.47368421052632, + "semantic_relevance": 3.4, + "factual_accuracy": 3.6, + "freshness": 4.8, + "objectivity_tone": 3.4, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0095238095238095, + "normalized_reciprocal_se_rank": 0.4196632996632997, + "reciprocal_se_rank": 0.11055016181229774, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.4736842105263, + "semantic_relevance": 4.6, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.2, + "accountability": 4.4, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.7866666666666666, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.4736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 3.6, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4844444444444445, + "normalized_reciprocal_se_rank": 0.21503928170594838, + "reciprocal_se_rank": 0.061380798274002155, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.26315789473685, + "semantic_relevance": 4.4, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.6, + "accountability": 3.8, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7100000000000002, + "normalized_reciprocal_se_rank": 0.07164391164391165, + "reciprocal_se_rank": 0.0269241438173477, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 77.26315789473685, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 2.8, + "objectivity_tone": 3.0, + "layout_ad_density": 3.6, + "accountability": 4.6, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.9, + "weighted_total_content_score": 77.26315789473684, + "semantic_relevance": 2.6, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.6, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 77.26315789473684, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.4, + "objectivity_tone": 3.2, + "layout_ad_density": 2.2, + "accountability": 4.2, + "transparency": 4.0, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6833333333333331, + "normalized_reciprocal_se_rank": 0.11082251082251082, + "reciprocal_se_rank": 0.0363384188626907, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "claude", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.19298245614034, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 3.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 2.3333333333333335, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 3.3333333333333335, + "avg_ge_freq": 0.8889, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 3.9166666666666665, + "weighted_total_content_score": 77.19298245614034, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.6666666666666665, + "accountability": 5.0, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.875, + "normalized_reciprocal_se_rank": 0.03607503607503607, + "reciprocal_se_rank": 0.018377253814147013, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 3.8333333333333335, + "weighted_total_content_score": 77.19298245614034, + "semantic_relevance": 3.3333333333333335, + "factual_accuracy": 3.6666666666666665, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.333333333333333, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 0.8536585365853658, + "normalized_reciprocal_se_rank": 0.518037518037518, + "reciprocal_se_rank": 0.1341886269070735, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "exa", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 3.90625, + "weighted_total_content_score": 77.10526315789474, + "semantic_relevance": 3.75, + "factual_accuracy": 3.5, + "freshness": 3.75, + "objectivity_tone": 3.5, + "layout_ad_density": 3.5, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.14500000000000002, + "normalized_reciprocal_se_rank": 0.4846887680221013, + "reciprocal_se_rank": 0.12617521367521367, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 3.84375, + "weighted_total_content_score": 77.10526315789473, + "semantic_relevance": 4.25, + "factual_accuracy": 3.5, + "freshness": 4.75, + "objectivity_tone": 4.0, + "layout_ad_density": 2.75, + "accountability": 3.5, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.833325, + "relative_se_rank": 0.585, + "normalized_reciprocal_se_rank": 0.4254295087628421, + "reciprocal_se_rank": 0.11193573147456642, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.05263157894737, + "semantic_relevance": 3.8, + "factual_accuracy": 3.8, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.05263157894737, + "semantic_relevance": 3.8, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.2, + "accountability": 2.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 77.05263157894737, + "semantic_relevance": 3.4, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 3.6, + "layout_ad_density": 4.0, + "accountability": 3.8, + "transparency": 4.0, + "authority": 3.6, + "avg_ge_freq": 0.59998, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 24, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.78125, + "weighted_total_content_score": 77.05263157894737, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.5, + "objectivity_tone": 5.0, + "layout_ad_density": 4.75, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.2936170212765956, + "normalized_reciprocal_se_rank": 0.3306397306397306, + "reciprocal_se_rank": 0.08915857605177993, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-5", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 77.05263157894737, + "semantic_relevance": 2.6, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 3.4, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.2, + "accountability": 4.6, + "transparency": 3.8, + "authority": 3.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.6448979591836732, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 2.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.8125, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 4.25, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.25, + "layout_ad_density": 2.5, + "accountability": 3.25, + "transparency": 3.75, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 2.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.4, + "accountability": 3.4, + "transparency": 3.4, + "authority": 3.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 3.0, + "authority": 2.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.9375, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 2.5, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 2.0, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.4, + "accountability": 3.4, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.8444444444444446, + "normalized_reciprocal_se_rank": 0.038159371492704826, + "reciprocal_se_rank": 0.018878101402373244, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 2.0, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.71875, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 3.75, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.25, + "transparency": 2.5, + "authority": 2.75, + "avg_ge_freq": 0.666675, + "relative_se_rank": 1.2604166666666667, + "normalized_reciprocal_se_rank": 0.13585858585858587, + "reciprocal_se_rank": 0.04235436893203884, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 0.8837209302325582, + "normalized_reciprocal_se_rank": 0.061098792806109886, + "reciprocal_se_rank": 0.024390243902439025, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 76.84210526315789, + "semantic_relevance": 4.4, + "factual_accuracy": 3.8, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 3.0, + "transparency": 4.0, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.168888888888889, + "normalized_reciprocal_se_rank": 0.11910213243546577, + "reciprocal_se_rank": 0.03832793959007551, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "tavily", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 3.9166666666666665, + "weighted_total_content_score": 76.84210526315788, + "semantic_relevance": 3.0, + "factual_accuracy": 3.6666666666666665, + "freshness": 4.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 4.666666666666667, + "accountability": 4.333333333333333, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 76.63157894736841, + "semantic_relevance": 3.2, + "factual_accuracy": 4.2, + "freshness": 3.4, + "objectivity_tone": 4.2, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 76.63157894736841, + "semantic_relevance": 4.6, + "factual_accuracy": 4.2, + "freshness": 3.4, + "objectivity_tone": 3.6, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": 0.73332, + "relative_se_rank": 1.2916666666666665, + "normalized_reciprocal_se_rank": 0.19522206188872854, + "reciprocal_se_rank": 0.05661889351209739, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gensee", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 76.63157894736841, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.0, + "accountability": 4.0, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 0.73334, + "relative_se_rank": 1.822727272727273, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 76.63157894736841, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 3.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 3.2, + "authority": 3.6, + "avg_ge_freq": 0.66668, + "relative_se_rank": 1.1375, + "normalized_reciprocal_se_rank": 0.13924963924963923, + "reciprocal_se_rank": 0.04316920943134535, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "tavily", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 76.63157894736841, + "semantic_relevance": 3.8, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 4.0, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5772727272727274, + "normalized_reciprocal_se_rank": 0.4294179894179894, + "reciprocal_se_rank": 0.11289412852519647, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "google-search", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.8125, + "weighted_total_content_score": 76.57894736842104, + "semantic_relevance": 4.0, + "factual_accuracy": 4.25, + "freshness": 4.0, + "objectivity_tone": 3.5, + "layout_ad_density": 4.0, + "accountability": 3.25, + "transparency": 3.75, + "authority": 3.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.9166666666666665, + "weighted_total_content_score": 76.49122807017544, + "semantic_relevance": 3.3333333333333335, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 2.6666666666666665, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.333333333333333, + "transparency": 4.666666666666667, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.575757575757576, + "normalized_reciprocal_se_rank": 0.11264156718702174, + "reciprocal_se_rank": 0.03677552221241542, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.49122807017544, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6341463414634145, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 76.42105263157895, + "semantic_relevance": 3.8, + "factual_accuracy": 4.0, + "freshness": 3.6, + "objectivity_tone": 3.6, + "layout_ad_density": 3.8, + "accountability": 4.0, + "transparency": 3.6, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.42105263157893, + "semantic_relevance": 2.4, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 3.4, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.175, + "weighted_total_content_score": 76.42105263157893, + "semantic_relevance": 4.6, + "factual_accuracy": 4.4, + "freshness": 3.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.2, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.84375, + "weighted_total_content_score": 76.3157894736842, + "semantic_relevance": 3.0, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.75, + "accountability": 2.75, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 0.49999999999999994, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.3157894736842, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.12244897959183673, + "normalized_reciprocal_se_rank": 0.4277777777777778, + "reciprocal_se_rank": 0.1125, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gpt-5", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.21052631578948, + "semantic_relevance": 2.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.4, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": 0.8, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 76.21052631578947, + "semantic_relevance": 3.6, + "factual_accuracy": 4.2, + "freshness": 4.8, + "objectivity_tone": 3.4, + "layout_ad_density": 2.2, + "accountability": 4.6, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 37, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 76.21052631578947, + "semantic_relevance": 3.2, + "factual_accuracy": 4.4, + "freshness": 4.4, + "objectivity_tone": 3.2, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 76.21052631578947, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 2.0, + "accountability": 4.4, + "transparency": 3.6, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4878048780487805, + "normalized_reciprocal_se_rank": 0.28902356902356907, + "reciprocal_se_rank": 0.07915857605177994, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 76.14035087719299, + "semantic_relevance": 4.0, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.3333333333333335, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.78125, + "weighted_total_content_score": 76.05263157894737, + "semantic_relevance": 3.75, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 3.25, + "layout_ad_density": 2.5, + "accountability": 2.75, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.78125, + "weighted_total_content_score": 76.05263157894737, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 4.75, + "objectivity_tone": 3.5, + "layout_ad_density": 2.25, + "accountability": 4.25, + "transparency": 4.0, + "authority": 3.25, + "avg_ge_freq": 0.750025, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.925, + "weighted_total_content_score": 76.0, + "semantic_relevance": 3.8, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 2.4, + "layout_ad_density": 3.8, + "accountability": 4.8, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.275, + "weighted_total_content_score": 76.0, + "semantic_relevance": 3.75, + "factual_accuracy": 4.25, + "freshness": 4.4, + "objectivity_tone": 4.25, + "layout_ad_density": 4.2, + "accountability": 3.8, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.3853658536585365, + "normalized_reciprocal_se_rank": 0.07152667494913485, + "reciprocal_se_rank": 0.026895972863991136, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.725, + "weighted_total_content_score": 76.0, + "semantic_relevance": 4.6, + "factual_accuracy": 4.0, + "freshness": 3.6, + "objectivity_tone": 4.0, + "layout_ad_density": 2.2, + "accountability": 4.0, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.7209302325581395, + "normalized_reciprocal_se_rank": 0.03345137651020004, + "reciprocal_se_rank": 0.017746811345897583, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "google-search", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 76.0, + "semantic_relevance": 1.8, + "factual_accuracy": 4.4, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 3.8, + "transparency": 4.4, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 75.78947368421053, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.333333333333333, + "freshness": 2.3333333333333335, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.666666666666667, + "transparency": 4.0, + "authority": 3.6666666666666665, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 75.78947368421053, + "semantic_relevance": 3.6, + "factual_accuracy": 4.0, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 3.8, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.7916666666666665, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 3.3333333333333335, + "factual_accuracy": 4.0, + "freshness": 2.6666666666666665, + "objectivity_tone": 4.0, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.333333333333333, + "transparency": 4.0, + "authority": 4.666666666666667, + "avg_ge_freq": 0.4444333333333333, + "relative_se_rank": 1.7317073170731707, + "normalized_reciprocal_se_rank": 0.07323232323232325, + "reciprocal_se_rank": 0.027305825242718445, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.666666666666667, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 4.75, + "factual_accuracy": 4.25, + "freshness": 4.75, + "objectivity_tone": 3.0, + "layout_ad_density": 3.25, + "accountability": 2.75, + "transparency": 3.75, + "authority": 3.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 3.8, + "factual_accuracy": 3.8, + "freshness": 4.2, + "objectivity_tone": 3.2, + "layout_ad_density": 3.4, + "accountability": 3.8, + "transparency": 4.6, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 80, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.6875, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 4.75, + "factual_accuracy": 4.25, + "freshness": 3.25, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.75, + "transparency": 3.0, + "authority": 3.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.3333333333333335, + "accountability": 3.3333333333333335, + "transparency": 3.6666666666666665, + "authority": 3.6666666666666665, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-5", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.24375, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.25, + "accountability": 3.75, + "transparency": 4.75, + "authority": 4.75, + "avg_ge_freq": 0.583325, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.975, + "weighted_total_content_score": 75.78947368421052, + "semantic_relevance": 2.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 2.6, + "layout_ad_density": 4.4, + "accountability": 5.0, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4590909090909092, + "normalized_reciprocal_se_rank": 0.10868686868686868, + "reciprocal_se_rank": 0.0358252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 75.7894736842105, + "semantic_relevance": 3.8, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.0, + "accountability": 3.2, + "transparency": 3.8, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 75.57894736842105, + "semantic_relevance": 3.4, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.2, + "accountability": 3.4, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": 0.73336, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 75.57894736842104, + "semantic_relevance": 3.6, + "factual_accuracy": 3.4, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 75.57894736842104, + "semantic_relevance": 1.8, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.78125, + "weighted_total_content_score": 75.52631578947368, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 3.25, + "objectivity_tone": 3.25, + "layout_ad_density": 3.25, + "accountability": 4.0, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.13333333333333333, + "normalized_reciprocal_se_rank": 0.49466089466089463, + "reciprocal_se_rank": 0.1285714285714286, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.8125, + "weighted_total_content_score": 75.52631578947368, + "semantic_relevance": 3.25, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 3.25, + "layout_ad_density": 2.25, + "accountability": 4.5, + "transparency": 4.25, + "authority": 3.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.53125, + "normalized_reciprocal_se_rank": 0.10039026629935723, + "reciprocal_se_rank": 0.03383164165931156, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.78125, + "weighted_total_content_score": 75.52631578947367, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.75, + "accountability": 4.0, + "transparency": 3.75, + "authority": 3.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.5562500000000001, + "normalized_reciprocal_se_rank": 0.07948826241509169, + "reciprocal_se_rank": 0.028809072764791444, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.75, + "weighted_total_content_score": 75.36842105263159, + "semantic_relevance": 4.25, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.75, + "accountability": 5.0, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 75.36842105263159, + "semantic_relevance": 4.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 2.6, + "accountability": 3.25, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9609756097560975, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.864285714285714, + "weighted_total_content_score": 75.36842105263159, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 4.4, + "objectivity_tone": 3.6, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.065, + "normalized_reciprocal_se_rank": 0.043939393939393945, + "reciprocal_se_rank": 0.02026699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 75.36842105263158, + "semantic_relevance": 2.8, + "factual_accuracy": 4.2, + "freshness": 4.6, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 3.8125, + "weighted_total_content_score": 75.26315789473684, + "semantic_relevance": 3.5, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.5, + "accountability": 3.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.66665, + "relative_se_rank": 3.3666666666666667, + "normalized_reciprocal_se_rank": 0.5, + "reciprocal_se_rank": 0.12985436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "claude", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 75.26315789473684, + "semantic_relevance": 4.5, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 3.75, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": 0.833325, + "relative_se_rank": 1.2272727272727275, + "normalized_reciprocal_se_rank": 0.1977873977873978, + "reciprocal_se_rank": 0.05723532131299121, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "google-search", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 75.26315789473684, + "semantic_relevance": 1.5, + "factual_accuracy": 3.0, + "freshness": 3.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.5, + "accountability": 4.5, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 75.1578947368421, + "semantic_relevance": 2.4, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 3.8, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.8744186046511628, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "tavily", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 75.1578947368421, + "semantic_relevance": 2.8, + "factual_accuracy": 4.2, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.2, + "accountability": 3.2, + "transparency": 3.2, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2958333333333336, + "normalized_reciprocal_se_rank": 0.19822467095194368, + "reciprocal_se_rank": 0.057340394233598116, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "tavily", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 75.0877192982456, + "semantic_relevance": 4.0, + "factual_accuracy": 3.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.0, + "accountability": 3.3333333333333335, + "transparency": 3.6666666666666665, + "authority": 3.6666666666666665, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3414634146341464, + "normalized_reciprocal_se_rank": 0.06686548019881354, + "reciprocal_se_rank": 0.025775928494375096, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.78125, + "weighted_total_content_score": 75.0, + "semantic_relevance": 3.25, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.5, + "accountability": 2.5, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.5833499999999999, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.8125, + "weighted_total_content_score": 74.99999999999999, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 3.5, + "objectivity_tone": 4.25, + "layout_ad_density": 3.75, + "accountability": 4.25, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.875, + "weighted_total_content_score": 74.94736842105263, + "semantic_relevance": 2.2, + "factual_accuracy": 3.2, + "freshness": 4.0, + "objectivity_tone": 3.8, + "layout_ad_density": 3.6, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.4878048780487805, + "normalized_reciprocal_se_rank": 0.3108225108225108, + "reciprocal_se_rank": 0.08439667128987517, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.825, + "weighted_total_content_score": 74.94736842105263, + "semantic_relevance": 2.4, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.4, + "accountability": 4.0, + "transparency": 4.6, + "authority": 3.6, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.9658536585365856, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 74.94736842105263, + "semantic_relevance": 2.4, + "factual_accuracy": 3.8, + "freshness": 4.4, + "objectivity_tone": 4.2, + "layout_ad_density": 3.4, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.807142857142857, + "weighted_total_content_score": 74.94736842105263, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.6, + "layout_ad_density": 4.5, + "accountability": 3.4, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.86668, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.725, + "weighted_total_content_score": 74.94736842105263, + "semantic_relevance": 4.4, + "factual_accuracy": 4.2, + "freshness": 4.8, + "objectivity_tone": 3.0, + "layout_ad_density": 2.8, + "accountability": 3.6, + "transparency": 3.8, + "authority": 3.2, + "avg_ge_freq": 0.6, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.725, + "weighted_total_content_score": 74.94736842105263, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 2.75, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.0714285714285716, + "normalized_reciprocal_se_rank": 0.013822434875066453, + "reciprocal_se_rank": 0.013030148185998977, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 3.6, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.2, + "accountability": 3.8, + "transparency": 4.4, + "authority": 3.8, + "avg_ge_freq": 0.80002, + "relative_se_rank": 1.831818181818182, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "claude", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 4.5, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.1875, + "normalized_reciprocal_se_rank": 0.40548340548340545, + "reciprocal_se_rank": 0.10714285714285714, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.59375, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.5, + "objectivity_tone": 4.5, + "layout_ad_density": 3.5, + "accountability": 2.5, + "transparency": 2.5, + "authority": 2.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.4166666666666667, + "normalized_reciprocal_se_rank": 0.35364014292585727, + "reciprocal_se_rank": 0.09468537414965986, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.7, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 4.4, + "factual_accuracy": 4.0, + "freshness": 4.6, + "objectivity_tone": 3.4, + "layout_ad_density": 2.2, + "accountability": 4.2, + "transparency": 3.6, + "authority": 3.2, + "avg_ge_freq": 0.6, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.6666666666666665, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 4.0, + "accountability": 3.6666666666666665, + "transparency": 2.6666666666666665, + "authority": 2.6666666666666665, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 1.6, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.2, + "accountability": 3.6, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 3.8125, + "weighted_total_content_score": 74.73684210526315, + "semantic_relevance": 3.5, + "factual_accuracy": 3.5, + "freshness": 3.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1888888888888889, + "normalized_reciprocal_se_rank": 0.1878787878787879, + "reciprocal_se_rank": 0.054854368932038836, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 74.52631578947368, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 4.4, + "objectivity_tone": 2.8, + "layout_ad_density": 2.2, + "accountability": 4.0, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.8, + "relative_se_rank": 1.1400000000000001, + "normalized_reciprocal_se_rank": 0.29733700642791555, + "reciprocal_se_rank": 0.08115622241835833, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "google-search", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 74.52631578947368, + "semantic_relevance": 3.4, + "factual_accuracy": 3.6, + "freshness": 2.8, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 4.4, + "transparency": 4.6, + "authority": 4.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.7, + "weighted_total_content_score": 74.52631578947367, + "semantic_relevance": 4.2, + "factual_accuracy": 3.8, + "freshness": 4.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 3.6, + "authority": 3.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 74.52631578947367, + "semantic_relevance": 3.8, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.6, + "accountability": 4.2, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.80002, + "relative_se_rank": 1.8, + "normalized_reciprocal_se_rank": 0.09595959595959597, + "reciprocal_se_rank": 0.032766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.075, + "weighted_total_content_score": 74.52631578947367, + "semantic_relevance": 4.2, + "factual_accuracy": 4.6, + "freshness": 4.5, + "objectivity_tone": 4.0, + "layout_ad_density": 3.2, + "accountability": 4.25, + "transparency": 3.75, + "authority": 3.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.881818181818182, + "normalized_reciprocal_se_rank": 0.04087938205585265, + "reciprocal_se_rank": 0.019531696173615075, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 74.52631578947367, + "semantic_relevance": 2.4, + "factual_accuracy": 4.4, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 3.2, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 74.52631578947367, + "semantic_relevance": 3.4, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 2.6, + "layout_ad_density": 2.2, + "accountability": 3.8, + "transparency": 4.6, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.6315789473684212, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 3.7083333333333335, + "weighted_total_content_score": 74.3859649122807, + "semantic_relevance": 4.0, + "factual_accuracy": 3.6666666666666665, + "freshness": 3.6666666666666665, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.3333333333333335, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 0.7152777777777778, + "normalized_reciprocal_se_rank": 0.5973063973063973, + "reciprocal_se_rank": 0.15323624595469257, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.85, + "weighted_total_content_score": 74.3157894736842, + "semantic_relevance": 1.8, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 4.0, + "accountability": 4.6, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 30, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 74.3157894736842, + "semantic_relevance": 3.8, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4782608695652173, + "normalized_reciprocal_se_rank": 0.1706035969450604, + "reciprocal_se_rank": 0.05070329149893441, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.725, + "weighted_total_content_score": 74.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.8, + "accountability": 3.2, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.73334, + "relative_se_rank": 0.6444444444444445, + "normalized_reciprocal_se_rank": 0.24761653805132067, + "reciprocal_se_rank": 0.06920882831815714, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "google-search", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 74.3157894736842, + "semantic_relevance": 4.6, + "factual_accuracy": 4.0, + "freshness": 3.6, + "objectivity_tone": 3.2, + "layout_ad_density": 3.6, + "accountability": 3.4, + "transparency": 3.8, + "authority": 3.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 3.6875, + "weighted_total_content_score": 74.21052631578948, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 3.5, + "transparency": 3.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 74.21052631578947, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 3.0, + "objectivity_tone": 3.5, + "layout_ad_density": 4.5, + "accountability": 3.5, + "transparency": 3.0, + "authority": 2.5, + "avg_ge_freq": 0.83335, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.71875, + "weighted_total_content_score": 74.21052631578947, + "semantic_relevance": 2.75, + "factual_accuracy": 3.5, + "freshness": 4.5, + "objectivity_tone": 4.75, + "layout_ad_density": 4.0, + "accountability": 3.75, + "transparency": 3.0, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6032608695652173, + "normalized_reciprocal_se_rank": 0.46677890011223344, + "reciprocal_se_rank": 0.12187162891046385, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 74.10526315789474, + "semantic_relevance": 2.8, + "factual_accuracy": 3.6, + "freshness": 4.2, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 3.8, + "transparency": 3.8, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 25, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.055, + "weighted_total_content_score": 74.10526315789473, + "semantic_relevance": 3.75, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 3.8, + "accountability": 3.8, + "transparency": 4.4, + "authority": 3.8, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 1.7951219512195125, + "normalized_reciprocal_se_rank": 0.037677474041110416, + "reciprocal_se_rank": 0.018762305655509535, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.7, + "weighted_total_content_score": 74.10526315789473, + "semantic_relevance": 4.2, + "factual_accuracy": 3.8, + "freshness": 4.8, + "objectivity_tone": 3.2, + "layout_ad_density": 2.8, + "accountability": 3.8, + "transparency": 3.8, + "authority": 3.2, + "avg_ge_freq": 0.66666, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.725, + "weighted_total_content_score": 74.10526315789473, + "semantic_relevance": 3.2, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.2, + "transparency": 3.2, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4232558139534883, + "normalized_reciprocal_se_rank": 0.2692063492063492, + "reciprocal_se_rank": 0.07439667128987518, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 74.03508771929823, + "semantic_relevance": 1.6666666666666667, + "factual_accuracy": 3.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 3.6666666666666665, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gensee", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.7, + "weighted_total_content_score": 73.89473684210526, + "semantic_relevance": 4.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.2, + "accountability": 4.2, + "transparency": 3.8, + "authority": 3.4, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 73.89473684210525, + "semantic_relevance": 3.6, + "factual_accuracy": 3.4, + "freshness": 4.8, + "objectivity_tone": 2.8, + "layout_ad_density": 3.8, + "accountability": 4.4, + "transparency": 3.6, + "authority": 3.8, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.0949999999999998, + "normalized_reciprocal_se_rank": 0.029752066115702486, + "reciprocal_se_rank": 0.016857899382171224, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.6666666666666665, + "weighted_total_content_score": 73.6842105263158, + "semantic_relevance": 2.0, + "factual_accuracy": 4.333333333333333, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 0.8889, + "relative_se_rank": 1.4444444444444446, + "normalized_reciprocal_se_rank": 0.11264156718702174, + "reciprocal_se_rank": 0.03677552221241542, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 73.6842105263158, + "semantic_relevance": 4.5, + "factual_accuracy": 3.75, + "freshness": 4.75, + "objectivity_tone": 3.75, + "layout_ad_density": 3.75, + "accountability": 3.0, + "transparency": 2.5, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.725, + "weighted_total_content_score": 73.68421052631578, + "semantic_relevance": 3.2, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.4, + "accountability": 4.2, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 33, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.7, + "weighted_total_content_score": 73.68421052631578, + "semantic_relevance": 3.8, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.8, + "accountability": 3.8, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.8666600000000001, + "relative_se_rank": 1.4697674418604652, + "normalized_reciprocal_se_rank": 0.14733044733044734, + "reciprocal_se_rank": 0.04511095700416089, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.6666666666666665, + "weighted_total_content_score": 73.68421052631578, + "semantic_relevance": 4.0, + "factual_accuracy": 3.3333333333333335, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.6666666666666665, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4962962962962962, + "normalized_reciprocal_se_rank": 0.263973063973064, + "reciprocal_se_rank": 0.07313915857605179, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.78125, + "weighted_total_content_score": 73.68421052631578, + "semantic_relevance": 1.75, + "factual_accuracy": 3.75, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 73.68421052631578, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 73.68421052631578, + "semantic_relevance": 5.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.2, + "weighted_total_content_score": 73.47368421052633, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 4.0, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.2, + "avg_ge_freq": 0.8666600000000001, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 98, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.95, + "weighted_total_content_score": 73.47368421052632, + "semantic_relevance": 4.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.2, + "accountability": 4.0, + "transparency": 3.6, + "authority": 3.8, + "avg_ge_freq": 0.66668, + "relative_se_rank": 1.625, + "normalized_reciprocal_se_rank": 0.13742183742183742, + "reciprocal_se_rank": 0.0427300046232085, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.775, + "weighted_total_content_score": 73.47368421052632, + "semantic_relevance": 3.8, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.6, + "layout_ad_density": 3.2, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.4, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.231818181818182, + "normalized_reciprocal_se_rank": 0.11638973029347362, + "reciprocal_se_rank": 0.03767617305595603, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "google-search", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 73.47368421052632, + "semantic_relevance": 3.4, + "factual_accuracy": 4.0, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.7, + "weighted_total_content_score": 73.26315789473685, + "semantic_relevance": 3.4, + "factual_accuracy": 3.6, + "freshness": 4.8, + "objectivity_tone": 3.4, + "layout_ad_density": 2.6, + "accountability": 3.6, + "transparency": 4.4, + "authority": 3.8, + "avg_ge_freq": 0.66668, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 19, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.09, + "weighted_total_content_score": 73.26315789473684, + "semantic_relevance": 3.75, + "factual_accuracy": 3.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.8, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.9333333333333336, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.65, + "weighted_total_content_score": 73.26315789473684, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 4.6, + "objectivity_tone": 3.2, + "layout_ad_density": 4.6, + "accountability": 3.6, + "transparency": 2.4, + "authority": 2.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 73.15789473684211, + "semantic_relevance": 2.0, + "factual_accuracy": 3.5, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.66665, + "relative_se_rank": 0.3414634146341463, + "normalized_reciprocal_se_rank": 0.20524691358024694, + "reciprocal_se_rank": 0.059027777777777776, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 73.1578947368421, + "semantic_relevance": 1.25, + "factual_accuracy": 3.75, + "freshness": 3.25, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 3.6875, + "weighted_total_content_score": 73.1578947368421, + "semantic_relevance": 2.5, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 1.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.8, + "weighted_total_content_score": 73.05263157894737, + "semantic_relevance": 1.8, + "factual_accuracy": 3.4, + "freshness": 4.6, + "objectivity_tone": 3.4, + "layout_ad_density": 4.2, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9707317073170731, + "normalized_reciprocal_se_rank": 0.11082251082251082, + "reciprocal_se_rank": 0.0363384188626907, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.6666666666666665, + "weighted_total_content_score": 72.98245614035088, + "semantic_relevance": 2.3333333333333335, + "factual_accuracy": 4.333333333333333, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6666666666666665, + "accountability": 3.6666666666666665, + "transparency": 4.0, + "authority": 4.333333333333333, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.7083333333333335, + "weighted_total_content_score": 72.98245614035088, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 3.6666666666666665, + "freshness": 4.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 4.0, + "accountability": 3.6666666666666665, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.6875, + "weighted_total_content_score": 72.89473684210526, + "semantic_relevance": 2.0, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.11979166666666666, + "normalized_reciprocal_se_rank": 0.4616697275788185, + "reciprocal_se_rank": 0.12064393939393939, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 72.84210526315789, + "semantic_relevance": 3.4, + "factual_accuracy": 4.2, + "freshness": 3.6, + "objectivity_tone": 4.0, + "layout_ad_density": 2.4, + "accountability": 3.0, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.195, + "normalized_reciprocal_se_rank": 0.011736411736411736, + "reciprocal_se_rank": 0.012528895053166894, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.8714285714285714, + "weighted_total_content_score": 72.84210526315789, + "semantic_relevance": 4.0, + "factual_accuracy": 4.2, + "freshness": 4.0, + "objectivity_tone": 3.8, + "layout_ad_density": 4.0, + "accountability": 4.5, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 1.8266666666666667, + "normalized_reciprocal_se_rank": 0.051370851370851366, + "reciprocal_se_rank": 0.02205270457697642, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.1450000000000005, + "weighted_total_content_score": 72.84210526315789, + "semantic_relevance": 4.0, + "factual_accuracy": 3.25, + "freshness": 5.0, + "objectivity_tone": 3.25, + "layout_ad_density": 4.0, + "accountability": 4.4, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.7, + "weighted_total_content_score": 72.84210526315789, + "semantic_relevance": 2.4, + "factual_accuracy": 3.4, + "freshness": 3.6, + "objectivity_tone": 4.2, + "layout_ad_density": 4.2, + "accountability": 4.0, + "transparency": 3.6, + "authority": 4.2, + "avg_ge_freq": 0.46663999999999994, + "relative_se_rank": 2.0146341463414634, + "normalized_reciprocal_se_rank": 0.043939393939393945, + "reciprocal_se_rank": 0.02026699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.88, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 3.5, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.2, + "accountability": 4.4, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.94, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 4.75, + "factual_accuracy": 4.25, + "freshness": 5.0, + "objectivity_tone": 4.75, + "layout_ad_density": 3.8, + "accountability": 3.0, + "transparency": 3.2, + "authority": 3.0, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 0.9041666666666668, + "normalized_reciprocal_se_rank": 0.3102874902874903, + "reciprocal_se_rank": 0.08426811053024645, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "claude", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.66665, + "relative_se_rank": 1.2560975609756098, + "normalized_reciprocal_se_rank": 0.32659932659932656, + "reciprocal_se_rank": 0.08818770226537216, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "exa", + "query_id": 1, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 2.6, + "factual_accuracy": 4.2, + "freshness": 4.2, + "objectivity_tone": 3.4, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.7955555555555556, + "normalized_reciprocal_se_rank": 0.11082251082251082, + "reciprocal_se_rank": 0.0363384188626907, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.725, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 2.4, + "factual_accuracy": 3.0, + "freshness": 3.2, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 4.6, + "transparency": 4.8, + "authority": 4.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1729729729729725, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gpt-4o", + "query_id": 57, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 2.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 68, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 3.0, + "factual_accuracy": 3.0, + "freshness": 4.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.5, + "accountability": 4.5, + "transparency": 5.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 72.63157894736841, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 4.0, + "transparency": 3.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 72.42105263157895, + "semantic_relevance": 2.4, + "factual_accuracy": 4.0, + "freshness": 3.8, + "objectivity_tone": 3.6, + "layout_ad_density": 3.0, + "accountability": 3.8, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 72.42105263157895, + "semantic_relevance": 3.2, + "factual_accuracy": 3.4, + "freshness": 4.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.8, + "accountability": 3.4, + "transparency": 3.6, + "authority": 3.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.269387755102041, + "normalized_reciprocal_se_rank": 0.19822467095194368, + "reciprocal_se_rank": 0.057340394233598116, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 72.42105263157893, + "semantic_relevance": 3.6, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 72.42105263157893, + "semantic_relevance": 3.8, + "factual_accuracy": 3.4, + "freshness": 4.4, + "objectivity_tone": 2.8, + "layout_ad_density": 2.6, + "accountability": 3.8, + "transparency": 4.0, + "authority": 4.6, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.7777777777777777, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 28, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.6785714285714284, + "weighted_total_content_score": 72.42105263157893, + "semantic_relevance": 4.6, + "factual_accuracy": 3.8, + "freshness": 3.6, + "objectivity_tone": 2.8, + "layout_ad_density": 3.5, + "accountability": 3.8, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 0.93334, + "relative_se_rank": 0.9555555555555555, + "normalized_reciprocal_se_rank": 0.32192837465564744, + "reciprocal_se_rank": 0.08706531332744924, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "claude", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.65, + "weighted_total_content_score": 72.42105263157893, + "semantic_relevance": 3.2, + "factual_accuracy": 3.6, + "freshness": 4.8, + "objectivity_tone": 3.6, + "layout_ad_density": 2.6, + "accountability": 4.4, + "transparency": 3.8, + "authority": 3.2, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 72.36842105263159, + "semantic_relevance": 2.75, + "factual_accuracy": 4.0, + "freshness": 3.25, + "objectivity_tone": 4.0, + "layout_ad_density": 3.25, + "accountability": 3.25, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 0.583325, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.6875, + "weighted_total_content_score": 72.36842105263158, + "semantic_relevance": 2.5, + "factual_accuracy": 3.25, + "freshness": 3.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.25, + "avg_ge_freq": 0.75, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 72.36842105263158, + "semantic_relevance": 3.25, + "factual_accuracy": 3.75, + "freshness": 4.0, + "objectivity_tone": 3.75, + "layout_ad_density": 4.25, + "accountability": 2.5, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 3, + "unweighted_mean_score": 3.7083333333333335, + "weighted_total_content_score": 72.28070175438596, + "semantic_relevance": 3.0, + "factual_accuracy": 3.3333333333333335, + "freshness": 4.333333333333333, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.333333333333333, + "authority": 3.6666666666666665, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 72.28070175438596, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 3.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.6666666666666665, + "accountability": 3.3333333333333335, + "transparency": 4.0, + "authority": 4.333333333333333, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 86, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 72.21052631578947, + "semantic_relevance": 4.2, + "factual_accuracy": 3.4, + "freshness": 4.6, + "objectivity_tone": 3.0, + "layout_ad_density": 2.8, + "accountability": 3.6, + "transparency": 4.0, + "authority": 3.4, + "avg_ge_freq": 0.80002, + "relative_se_rank": 1.3166666666666669, + "normalized_reciprocal_se_rank": 0.14733044733044734, + "reciprocal_se_rank": 0.04511095700416089, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "tavily", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 72.21052631578947, + "semantic_relevance": 4.0, + "factual_accuracy": 3.6, + "freshness": 3.8, + "objectivity_tone": 3.4, + "layout_ad_density": 2.8, + "accountability": 4.2, + "transparency": 3.2, + "authority": 3.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.4791666666666667, + "normalized_reciprocal_se_rank": 0.5367912895185623, + "reciprocal_se_rank": 0.1386949943260623, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "gensee", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.5625, + "weighted_total_content_score": 72.10526315789474, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 2.0, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.66665, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.65625, + "weighted_total_content_score": 72.10526315789473, + "semantic_relevance": 2.5, + "factual_accuracy": 3.5, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.2621951219512195, + "normalized_reciprocal_se_rank": 0.3018278018278018, + "reciprocal_se_rank": 0.08223532131299122, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 72.0, + "semantic_relevance": 3.2, + "factual_accuracy": 3.8, + "freshness": 3.6, + "objectivity_tone": 3.8, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 0.66668, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 36, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 3.5833333333333335, + "weighted_total_content_score": 71.9298245614035, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 4.0, + "freshness": 3.6666666666666665, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 3.6666666666666665, + "accountability": 3.3333333333333335, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.65, + "weighted_total_content_score": 71.78947368421053, + "semantic_relevance": 2.6, + "factual_accuracy": 3.6, + "freshness": 4.4, + "objectivity_tone": 3.6, + "layout_ad_density": 2.8, + "accountability": 4.6, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5510204081632654, + "normalized_reciprocal_se_rank": 0.293534916262189, + "reciprocal_se_rank": 0.08024261337368133, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.725, + "weighted_total_content_score": 71.78947368421052, + "semantic_relevance": 2.8, + "factual_accuracy": 2.6, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.6, + "accountability": 4.0, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 71.78947368421052, + "semantic_relevance": 3.8, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 3.4, + "authority": 3.0, + "avg_ge_freq": 0.8, + "relative_se_rank": 1.2625, + "normalized_reciprocal_se_rank": 0.3583838383838384, + "reciprocal_se_rank": 0.0958252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-4o", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.65, + "weighted_total_content_score": 71.78947368421052, + "semantic_relevance": 3.6, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 2.8, + "layout_ad_density": 3.6, + "accountability": 3.6, + "transparency": 3.8, + "authority": 3.4, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.021276595744681, + "normalized_reciprocal_se_rank": 0.23759259259259263, + "reciprocal_se_rank": 0.06680016181229773, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 71.57894736842107, + "semantic_relevance": 3.6, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.2, + "accountability": 3.8, + "transparency": 4.0, + "authority": 3.4, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.1, + "weighted_total_content_score": 71.57894736842107, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 2.5, + "accountability": 3.6, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9609756097560975, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 71.57894736842105, + "semantic_relevance": 3.3333333333333335, + "factual_accuracy": 3.3333333333333335, + "freshness": 5.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 2.6666666666666665, + "accountability": 4.333333333333333, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.008130081300813, + "normalized_reciprocal_se_rank": 0.19325530436641547, + "reciprocal_se_rank": 0.05614629886474546, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.5833333333333335, + "weighted_total_content_score": 71.57894736842105, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 4.0, + "freshness": 3.6666666666666665, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.6666666666666665, + "transparency": 3.3333333333333335, + "authority": 3.3333333333333335, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 16, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.7083333333333335, + "weighted_total_content_score": 71.57894736842105, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.333333333333333, + "accountability": 4.0, + "transparency": 3.6666666666666665, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 71.57894736842104, + "semantic_relevance": 1.6, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 4.4, + "layout_ad_density": 3.8, + "accountability": 3.0, + "transparency": 4.4, + "authority": 4.0, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.4418604651162792, + "normalized_reciprocal_se_rank": 0.22596877869605145, + "reciprocal_se_rank": 0.06400706090026478, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.525, + "weighted_total_content_score": 71.57894736842104, + "semantic_relevance": 4.2, + "factual_accuracy": 3.8, + "freshness": 2.8, + "objectivity_tone": 3.6, + "layout_ad_density": 2.4, + "accountability": 3.8, + "transparency": 3.6, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8833333333333334, + "normalized_reciprocal_se_rank": 0.4196632996632997, + "reciprocal_se_rank": 0.11055016181229774, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "exa", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 71.57894736842104, + "semantic_relevance": 3.8, + "factual_accuracy": 3.2, + "freshness": 4.4, + "objectivity_tone": 3.4, + "layout_ad_density": 2.2, + "accountability": 4.0, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8097560975609757, + "normalized_reciprocal_se_rank": 0.28455799828348843, + "reciprocal_se_rank": 0.07808553842248873, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "google-search", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.65625, + "weighted_total_content_score": 71.57894736842104, + "semantic_relevance": 2.5, + "factual_accuracy": 4.0, + "freshness": 4.75, + "objectivity_tone": 3.0, + "layout_ad_density": 3.5, + "accountability": 3.5, + "transparency": 4.25, + "authority": 3.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 71.57894736842104, + "semantic_relevance": 1.75, + "factual_accuracy": 4.0, + "freshness": 4.25, + "objectivity_tone": 4.25, + "layout_ad_density": 3.25, + "accountability": 3.75, + "transparency": 3.5, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.78125, + "normalized_reciprocal_se_rank": 0.16163357715081855, + "reciprocal_se_rank": 0.04854787412119184, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.55, + "weighted_total_content_score": 71.36842105263159, + "semantic_relevance": 4.2, + "factual_accuracy": 3.4, + "freshness": 4.4, + "objectivity_tone": 3.4, + "layout_ad_density": 3.6, + "accountability": 2.8, + "transparency": 3.2, + "authority": 3.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0875000000000001, + "normalized_reciprocal_se_rank": 0.21122861265718412, + "reciprocal_se_rank": 0.060465127798692286, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "gensee", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.46875, + "weighted_total_content_score": 71.3157894736842, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 2.75, + "objectivity_tone": 4.0, + "layout_ad_density": 3.25, + "accountability": 3.5, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.49999999999999994, + "relative_se_rank": 1.2447916666666667, + "normalized_reciprocal_se_rank": 0.2138888888888889, + "reciprocal_se_rank": 0.061104368932038834, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gpt-4o", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 3.5625, + "weighted_total_content_score": 71.3157894736842, + "semantic_relevance": 3.75, + "factual_accuracy": 3.5, + "freshness": 4.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 3.25, + "transparency": 3.75, + "authority": 4.25, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.59375, + "weighted_total_content_score": 71.3157894736842, + "semantic_relevance": 3.25, + "factual_accuracy": 3.5, + "freshness": 4.0, + "objectivity_tone": 3.5, + "layout_ad_density": 4.0, + "accountability": 3.25, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": 0.5833499999999999, + "relative_se_rank": 1.1875, + "normalized_reciprocal_se_rank": 0.2745791245791246, + "reciprocal_se_rank": 0.07568770226537216, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "tavily", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 71.3157894736842, + "semantic_relevance": 3.5, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 2.75, + "layout_ad_density": 2.25, + "accountability": 3.75, + "transparency": 4.0, + "authority": 4.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0121951219512195, + "normalized_reciprocal_se_rank": 0.0214263850627487, + "reciprocal_se_rank": 0.014857310973815825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "google-search", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 71.15789473684211, + "semantic_relevance": 2.2, + "factual_accuracy": 3.6, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 4.4, + "accountability": 3.4, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 79, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 71.05263157894737, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 2.5, + "objectivity_tone": 5.0, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 71.05263157894737, + "semantic_relevance": 3.0, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.5930232558139534, + "normalized_reciprocal_se_rank": 0.34719746484452363, + "reciprocal_se_rank": 0.09313725490196079, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "gensee", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.46875, + "weighted_total_content_score": 71.05263157894737, + "semantic_relevance": 3.75, + "factual_accuracy": 4.25, + "freshness": 3.25, + "objectivity_tone": 4.0, + "layout_ad_density": 3.75, + "accountability": 2.5, + "transparency": 3.0, + "authority": 3.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 3.5625, + "weighted_total_content_score": 71.05263157894737, + "semantic_relevance": 3.5, + "factual_accuracy": 3.5, + "freshness": 3.5, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 15, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.5625, + "weighted_total_content_score": 71.05263157894736, + "semantic_relevance": 3.5, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.75, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 18, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 70.87719298245614, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 1.6666666666666667, + "accountability": 3.3333333333333335, + "transparency": 3.3333333333333335, + "authority": 3.3333333333333335, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 3.7083333333333335, + "weighted_total_content_score": 70.87719298245614, + "semantic_relevance": 3.0, + "factual_accuracy": 3.3333333333333335, + "freshness": 5.0, + "objectivity_tone": 1.6666666666666667, + "layout_ad_density": 4.333333333333333, + "accountability": 4.333333333333333, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 4, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.5833333333333335, + "weighted_total_content_score": 70.87719298245612, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 3.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 2.6666666666666665, + "accountability": 2.6666666666666665, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 70.78947368421052, + "semantic_relevance": 2.75, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 1.5, + "layout_ad_density": 4.25, + "accountability": 4.25, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 0.41664999999999996, + "relative_se_rank": 1.61875, + "normalized_reciprocal_se_rank": 0.044203944203944204, + "reciprocal_se_rank": 0.02033055940822931, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 70.73684210526315, + "semantic_relevance": 2.4, + "factual_accuracy": 3.4, + "freshness": 4.8, + "objectivity_tone": 3.4, + "layout_ad_density": 3.6, + "accountability": 3.2, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.8625000000000003, + "normalized_reciprocal_se_rank": 0.008565656565656566, + "reciprocal_se_rank": 0.011766990291262134, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.55, + "weighted_total_content_score": 70.73684210526315, + "semantic_relevance": 3.8, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.4, + "accountability": 3.6, + "transparency": 3.4, + "authority": 2.6, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.2625, + "normalized_reciprocal_se_rank": 0.3583838383838384, + "reciprocal_se_rank": 0.0958252427184466, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "gpt-4o", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 70.52631578947368, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 2.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 2.5, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.71875, + "weighted_total_content_score": 70.52631578947367, + "semantic_relevance": 2.25, + "factual_accuracy": 2.75, + "freshness": 4.5, + "objectivity_tone": 2.5, + "layout_ad_density": 4.25, + "accountability": 4.25, + "transparency": 4.75, + "authority": 4.5, + "avg_ge_freq": 0.583325, + "relative_se_rank": 1.25, + "normalized_reciprocal_se_rank": 0.3885281385281385, + "reciprocal_se_rank": 0.10306865464632455, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gensee", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.475, + "weighted_total_content_score": 70.52631578947367, + "semantic_relevance": 4.2, + "factual_accuracy": 4.0, + "freshness": 3.6, + "objectivity_tone": 3.2, + "layout_ad_density": 2.2, + "accountability": 3.6, + "transparency": 3.4, + "authority": 3.6, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.4878048780487805, + "normalized_reciprocal_se_rank": 0.28902356902356907, + "reciprocal_se_rank": 0.07915857605177994, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "tavily", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 3.5625, + "weighted_total_content_score": 70.52631578947367, + "semantic_relevance": 4.0, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.0, + "accountability": 2.0, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.575, + "weighted_total_content_score": 70.3157894736842, + "semantic_relevance": 3.4, + "factual_accuracy": 3.6, + "freshness": 4.4, + "objectivity_tone": 2.6, + "layout_ad_density": 3.2, + "accountability": 3.2, + "transparency": 4.2, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8, + "normalized_reciprocal_se_rank": 0.19385730319063652, + "reciprocal_se_rank": 0.05629095392202188, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 4.491071428571429, + "weighted_total_content_score": 70.3157894736842, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 4.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.333333333333333, + "accountability": 4.25, + "transparency": 4.25, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.55, + "weighted_total_content_score": 70.3157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 2.6, + "layout_ad_density": 2.4, + "accountability": 3.4, + "transparency": 4.0, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6355555555555557, + "normalized_reciprocal_se_rank": 0.2737392369012922, + "reciprocal_se_rank": 0.07548588459521341, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 70.26315789473684, + "semantic_relevance": 2.75, + "factual_accuracy": 3.25, + "freshness": 3.75, + "objectivity_tone": 4.75, + "layout_ad_density": 3.75, + "accountability": 2.75, + "transparency": 3.25, + "authority": 3.75, + "avg_ge_freq": 0.75, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.55, + "weighted_total_content_score": 70.10526315789474, + "semantic_relevance": 2.4, + "factual_accuracy": 3.2, + "freshness": 3.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.8, + "accountability": 3.6, + "transparency": 3.4, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 14, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 70.10526315789473, + "semantic_relevance": 4.75, + "factual_accuracy": 4.75, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 4.0, + "accountability": 3.75, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.93334, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.525, + "weighted_total_content_score": 70.10526315789473, + "semantic_relevance": 3.6, + "factual_accuracy": 3.4, + "freshness": 4.2, + "objectivity_tone": 3.2, + "layout_ad_density": 2.2, + "accountability": 3.8, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": 0.93334, + "relative_se_rank": 2.0390243902439025, + "normalized_reciprocal_se_rank": 0.031553631553631556, + "reciprocal_se_rank": 0.017290799815071655, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.5625, + "weighted_total_content_score": 70.0, + "semantic_relevance": 3.0, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.0, + "accountability": 4.5, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 21, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.575, + "weighted_total_content_score": 69.89473684210526, + "semantic_relevance": 3.6, + "factual_accuracy": 2.8, + "freshness": 4.6, + "objectivity_tone": 2.8, + "layout_ad_density": 3.0, + "accountability": 4.4, + "transparency": 3.6, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 69.89473684210525, + "semantic_relevance": 1.8, + "factual_accuracy": 3.2, + "freshness": 3.2, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 69.82456140350877, + "semantic_relevance": 1.3333333333333333, + "factual_accuracy": 3.3333333333333335, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.3333333333333335, + "accountability": 3.6666666666666665, + "transparency": 4.666666666666667, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 69.82456140350877, + "semantic_relevance": 1.3333333333333333, + "factual_accuracy": 3.3333333333333335, + "freshness": 2.6666666666666665, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.5833333333333335, + "weighted_total_content_score": 69.82456140350877, + "semantic_relevance": 2.3333333333333335, + "factual_accuracy": 3.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.0, + "accountability": 4.333333333333333, + "transparency": 4.0, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.4108527131782946, + "normalized_reciprocal_se_rank": 0.4954380405360797, + "reciprocal_se_rank": 0.12875816993464054, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "google-search", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.53125, + "weighted_total_content_score": 69.73684210526315, + "semantic_relevance": 2.75, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.25, + "accountability": 3.75, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.8049999999999997, + "weighted_total_content_score": 69.6842105263158, + "semantic_relevance": 4.25, + "factual_accuracy": 3.75, + "freshness": 4.4, + "objectivity_tone": 3.75, + "layout_ad_density": 3.2, + "accountability": 3.8, + "transparency": 4.0, + "authority": 3.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gensee", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.4, + "weighted_total_content_score": 69.68421052631578, + "semantic_relevance": 4.6, + "factual_accuracy": 4.2, + "freshness": 3.6, + "objectivity_tone": 3.0, + "layout_ad_density": 2.0, + "accountability": 3.2, + "transparency": 3.2, + "authority": 3.4, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 13, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.525, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 3.2, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.4, + "accountability": 4.6, + "transparency": 3.6, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.55, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 2.4, + "factual_accuracy": 3.0, + "freshness": 3.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.2, + "accountability": 4.4, + "transparency": 4.0, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 3.5, + "factual_accuracy": 3.25, + "freshness": 2.25, + "objectivity_tone": 3.25, + "layout_ad_density": 3.0, + "accountability": 4.5, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 0.75, + "relative_se_rank": 0.7111111111111111, + "normalized_reciprocal_se_rank": 0.32910927456382005, + "reciprocal_se_rank": 0.08879082082965578, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 4.46875, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 1.75, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.25, + "layout_ad_density": 5.0, + "accountability": 5.0, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": 0.66666, + "relative_se_rank": 2.0, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5348837209302325, + "normalized_reciprocal_se_rank": 0.11965811965811966, + "reciprocal_se_rank": 0.038461538461538464, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 5.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 2.0, + "transparency": 3.0, + "authority": 2.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 62, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.40625, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 4.75, + "transparency": 4.75, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.575, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 1.8, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.2, + "accountability": 2.8, + "transparency": 4.2, + "authority": 3.6, + "avg_ge_freq": 0.6, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 69.47368421052632, + "semantic_relevance": 4.2, + "factual_accuracy": 3.8, + "freshness": 4.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 2.6, + "transparency": 2.4, + "authority": 2.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.2541666666666667, + "normalized_reciprocal_se_rank": 0.35882551000198065, + "reciprocal_se_rank": 0.09593137254901961, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.525, + "weighted_total_content_score": 69.26315789473684, + "semantic_relevance": 2.6, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.4, + "accountability": 3.0, + "transparency": 3.8, + "authority": 4.6, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.53125, + "weighted_total_content_score": 69.21052631578948, + "semantic_relevance": 3.5, + "factual_accuracy": 3.5, + "freshness": 5.0, + "objectivity_tone": 2.25, + "layout_ad_density": 3.5, + "accountability": 3.25, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.1249999999999996, + "normalized_reciprocal_se_rank": 0.10549943883277216, + "reciprocal_se_rank": 0.035059331175836025, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.6, + "weighted_total_content_score": 69.05263157894737, + "semantic_relevance": 3.0, + "factual_accuracy": 2.4, + "freshness": 5.0, + "objectivity_tone": 2.6, + "layout_ad_density": 2.8, + "accountability": 4.0, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.475, + "weighted_total_content_score": 69.05263157894736, + "semantic_relevance": 3.0, + "factual_accuracy": 3.8, + "freshness": 2.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.4, + "accountability": 4.4, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 10, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.45, + "weighted_total_content_score": 69.05263157894736, + "semantic_relevance": 3.6, + "factual_accuracy": 3.8, + "freshness": 3.8, + "objectivity_tone": 3.0, + "layout_ad_density": 2.6, + "accountability": 3.4, + "transparency": 3.6, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.425, + "weighted_total_content_score": 68.84210526315789, + "semantic_relevance": 2.0, + "factual_accuracy": 4.2, + "freshness": 4.4, + "objectivity_tone": 4.4, + "layout_ad_density": 4.0, + "accountability": 3.2, + "transparency": 2.2, + "authority": 3.0, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.425, + "weighted_total_content_score": 68.63157894736841, + "semantic_relevance": 3.0, + "factual_accuracy": 3.8, + "freshness": 4.6, + "objectivity_tone": 3.6, + "layout_ad_density": 2.2, + "accountability": 2.6, + "transparency": 4.0, + "authority": 3.6, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 34, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.625, + "weighted_total_content_score": 68.63157894736841, + "semantic_relevance": 2.2, + "factual_accuracy": 2.4, + "freshness": 4.8, + "objectivity_tone": 2.6, + "layout_ad_density": 4.4, + "accountability": 3.6, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.6480000000000001, + "normalized_reciprocal_se_rank": 0.04740740740740741, + "reciprocal_se_rank": 0.02110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 3.3333333333333335, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 2.3333333333333335, + "transparency": 3.3333333333333335, + "authority": 3.3333333333333335, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 84, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 3.4, + "factual_accuracy": 2.8, + "freshness": 5.0, + "objectivity_tone": 2.8, + "layout_ad_density": 2.2, + "accountability": 3.8, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.4863636363636368, + "normalized_reciprocal_se_rank": 0.13475413475413475, + "reciprocal_se_rank": 0.04208897898218286, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 76, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.55, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 3.2, + "factual_accuracy": 3.0, + "freshness": 3.8, + "objectivity_tone": 2.0, + "layout_ad_density": 4.6, + "accountability": 3.4, + "transparency": 4.0, + "authority": 4.4, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.9666666666666668, + "normalized_reciprocal_se_rank": 0.043939393939393945, + "reciprocal_se_rank": 0.02026699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.3125, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 3.5, + "objectivity_tone": 3.75, + "layout_ad_density": 2.0, + "accountability": 3.25, + "transparency": 3.0, + "authority": 2.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6686046511627908, + "normalized_reciprocal_se_rank": 0.3974186307519641, + "reciprocal_se_rank": 0.10520496224379719, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gensee", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 2.0, + "factual_accuracy": 5.0, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.4375, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 3.0, + "factual_accuracy": 3.75, + "freshness": 4.0, + "objectivity_tone": 3.25, + "layout_ad_density": 3.25, + "accountability": 3.5, + "transparency": 3.25, + "authority": 3.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 1.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 2.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 68.42105263157895, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 2.0, + "transparency": 2.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.041666666666666664, + "normalized_reciprocal_se_rank": 0.791919191919192, + "reciprocal_se_rank": 0.2, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 99, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.45, + "weighted_total_content_score": 68.21052631578948, + "semantic_relevance": 3.4, + "factual_accuracy": 3.4, + "freshness": 4.8, + "objectivity_tone": 2.8, + "layout_ad_density": 3.0, + "accountability": 3.8, + "transparency": 3.4, + "authority": 3.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.45, + "weighted_total_content_score": 68.21052631578948, + "semantic_relevance": 2.2, + "factual_accuracy": 3.6, + "freshness": 3.8, + "objectivity_tone": 3.8, + "layout_ad_density": 3.8, + "accountability": 3.4, + "transparency": 3.6, + "authority": 3.4, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.6708333333333336, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.475, + "weighted_total_content_score": 68.21052631578947, + "semantic_relevance": 2.6, + "factual_accuracy": 3.6, + "freshness": 3.2, + "objectivity_tone": 3.0, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 4.2, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0558139534883721, + "normalized_reciprocal_se_rank": 0.31423569023569026, + "reciprocal_se_rank": 0.0852168284789644, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "google-search", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.40625, + "weighted_total_content_score": 68.1578947368421, + "semantic_relevance": 2.0, + "factual_accuracy": 4.25, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 4.25, + "accountability": 2.75, + "transparency": 2.5, + "authority": 3.25, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 58, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.655, + "weighted_total_content_score": 68.0, + "semantic_relevance": 4.25, + "factual_accuracy": 4.0, + "freshness": 4.2, + "objectivity_tone": 4.5, + "layout_ad_density": 3.8, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.73334, + "relative_se_rank": 0.8375, + "normalized_reciprocal_se_rank": 0.16153164296021438, + "reciprocal_se_rank": 0.04852338022587675, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.475, + "weighted_total_content_score": 67.99999999999999, + "semantic_relevance": 2.0, + "factual_accuracy": 3.4, + "freshness": 4.8, + "objectivity_tone": 3.6, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 3.6, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 67.89473684210526, + "semantic_relevance": 1.5, + "factual_accuracy": 2.5, + "freshness": 3.5, + "objectivity_tone": 4.5, + "layout_ad_density": 4.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.16304347826086957, + "normalized_reciprocal_se_rank": 0.4451178451178451, + "reciprocal_se_rank": 0.11666666666666667, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "exa", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.4375, + "weighted_total_content_score": 67.89473684210526, + "semantic_relevance": 1.75, + "factual_accuracy": 3.5, + "freshness": 4.25, + "objectivity_tone": 4.25, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.75, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 93, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.425, + "weighted_total_content_score": 67.78947368421052, + "semantic_relevance": 3.2, + "factual_accuracy": 3.2, + "freshness": 3.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.4, + "accountability": 3.8, + "transparency": 3.8, + "authority": 3.8, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.55, + "weighted_total_content_score": 67.78947368421052, + "semantic_relevance": 1.2, + "factual_accuracy": 2.0, + "freshness": 4.2, + "objectivity_tone": 4.4, + "layout_ad_density": 4.2, + "accountability": 3.4, + "transparency": 4.4, + "authority": 4.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.327659574468085, + "normalized_reciprocal_se_rank": 0.17111111111111113, + "reciprocal_se_rank": 0.050825242718446594, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 55, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 67.71929824561404, + "semantic_relevance": 1.6666666666666667, + "factual_accuracy": 3.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.3333333333333335, + "accountability": 3.6666666666666665, + "transparency": 4.333333333333333, + "authority": 3.3333333333333335, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 72, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.34375, + "weighted_total_content_score": 67.63157894736841, + "semantic_relevance": 3.25, + "factual_accuracy": 3.75, + "freshness": 3.5, + "objectivity_tone": 3.75, + "layout_ad_density": 2.75, + "accountability": 3.25, + "transparency": 3.25, + "authority": 3.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8475609756097562, + "normalized_reciprocal_se_rank": 0.16329966329966328, + "reciprocal_se_rank": 0.048948220064724914, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.425, + "weighted_total_content_score": 67.57894736842105, + "semantic_relevance": 2.8, + "factual_accuracy": 3.2, + "freshness": 3.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.6, + "accountability": 2.8, + "transparency": 4.4, + "authority": 4.2, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.9951219512195124, + "normalized_reciprocal_se_rank": 0.06127946127946128, + "reciprocal_se_rank": 0.0244336569579288, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.45, + "weighted_total_content_score": 67.57894736842104, + "semantic_relevance": 1.6, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 3.6, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.46665999999999996, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 67.57894736842104, + "semantic_relevance": 3.4, + "factual_accuracy": 3.6, + "freshness": 4.0, + "objectivity_tone": 3.2, + "layout_ad_density": 2.8, + "accountability": 2.8, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": 0.86668, + "relative_se_rank": 0.14693877551020407, + "normalized_reciprocal_se_rank": 0.4250780789818223, + "reciprocal_se_rank": 0.11185128596893304, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.4583333333333335, + "weighted_total_content_score": 67.36842105263158, + "semantic_relevance": 1.6666666666666667, + "factual_accuracy": 3.6666666666666665, + "freshness": 2.3333333333333335, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 4.666666666666667, + "accountability": 3.6666666666666665, + "transparency": 4.333333333333333, + "authority": 4.0, + "avg_ge_freq": 0.8889, + "relative_se_rank": 1.983739837398374, + "normalized_reciprocal_se_rank": 0.016046994770399028, + "reciprocal_se_rank": 0.013564690490945394, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gensee", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 67.36842105263158, + "semantic_relevance": 5.0, + "factual_accuracy": 4.0, + "freshness": 4.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.0, + "transparency": 2.3333333333333335, + "authority": 2.3333333333333335, + "avg_ge_freq": 0.4444333333333333, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gensee", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.3333333333333335, + "weighted_total_content_score": 67.36842105263158, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 4.0, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.333333333333333, + "accountability": 2.6666666666666665, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 67.36842105263158, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 3.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.45, + "weighted_total_content_score": 67.15789473684211, + "semantic_relevance": 3.2, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.4, + "layout_ad_density": 3.2, + "accountability": 3.6, + "transparency": 3.8, + "authority": 3.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.425, + "weighted_total_content_score": 67.1578947368421, + "semantic_relevance": 2.8, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 2.8, + "layout_ad_density": 3.2, + "accountability": 3.8, + "transparency": 3.2, + "authority": 3.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.6325581395348838, + "normalized_reciprocal_se_rank": 0.13887898593780945, + "reciprocal_se_rank": 0.043080144679230915, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.8299999999999996, + "weighted_total_content_score": 66.94736842105263, + "semantic_relevance": 1.75, + "factual_accuracy": 3.75, + "freshness": 3.6, + "objectivity_tone": 4.0, + "layout_ad_density": 3.6, + "accountability": 4.2, + "transparency": 4.2, + "authority": 4.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.024390243902439, + "normalized_reciprocal_se_rank": 0.038159371492704826, + "reciprocal_se_rank": 0.018878101402373244, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.425, + "weighted_total_content_score": 66.94736842105263, + "semantic_relevance": 2.2, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.2, + "accountability": 3.4, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": 0.93334, + "relative_se_rank": 1.7822222222222224, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "gensee", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.425, + "weighted_total_content_score": 66.94736842105263, + "semantic_relevance": 1.8, + "factual_accuracy": 4.0, + "freshness": 4.8, + "objectivity_tone": 3.0, + "layout_ad_density": 2.6, + "accountability": 3.2, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 1.7822222222222224, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 71, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.45, + "weighted_total_content_score": 66.73684210526315, + "semantic_relevance": 1.6, + "factual_accuracy": 3.0, + "freshness": 2.6, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 4.6, + "transparency": 4.4, + "authority": 4.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 66.73684210526315, + "semantic_relevance": 3.0, + "factual_accuracy": 3.4, + "freshness": 4.6, + "objectivity_tone": 3.0, + "layout_ad_density": 2.4, + "accountability": 4.0, + "transparency": 3.4, + "authority": 3.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.7023809523809526, + "weighted_total_content_score": 66.66666666666667, + "semantic_relevance": 1.0, + "factual_accuracy": 1.6666666666666667, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 5.0, + "accountability": 3.6666666666666665, + "transparency": 4.666666666666667, + "authority": 5.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "google-search", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 66.57894736842105, + "semantic_relevance": 1.0, + "factual_accuracy": 2.25, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 3.5, + "transparency": 4.0, + "authority": 3.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.53125, + "weighted_total_content_score": 66.57894736842104, + "semantic_relevance": 1.0, + "factual_accuracy": 1.5, + "freshness": 4.5, + "objectivity_tone": 4.25, + "layout_ad_density": 3.75, + "accountability": 3.75, + "transparency": 4.5, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6329787234042552, + "normalized_reciprocal_se_rank": 0.09393939393939395, + "reciprocal_se_rank": 0.03228155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.8549999999999995, + "weighted_total_content_score": 66.52631578947367, + "semantic_relevance": 2.25, + "factual_accuracy": 2.25, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 3.8, + "accountability": 4.2, + "transparency": 4.6, + "authority": 4.4, + "avg_ge_freq": 0.53332, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.9799999999999995, + "weighted_total_content_score": 66.52631578947367, + "semantic_relevance": 4.25, + "factual_accuracy": 4.5, + "freshness": 4.75, + "objectivity_tone": 4.25, + "layout_ad_density": 3.0, + "accountability": 4.0, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.055, + "normalized_reciprocal_se_rank": 0.051370851370851366, + "reciprocal_se_rank": 0.02205270457697642, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.34375, + "weighted_total_content_score": 66.3157894736842, + "semantic_relevance": 1.5, + "factual_accuracy": 3.0, + "freshness": 3.75, + "objectivity_tone": 5.0, + "layout_ad_density": 3.5, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.833325, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.3928571428571432, + "weighted_total_content_score": 66.3157894736842, + "semantic_relevance": 3.8, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 3.3333333333333335, + "accountability": 2.6, + "transparency": 2.6, + "authority": 2.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 66.3157894736842, + "semantic_relevance": 2.5, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 3.0, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2926829268292683, + "normalized_reciprocal_se_rank": 0.21099887766554432, + "reciprocal_se_rank": 0.060409924487594385, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "gensee", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 66.3157894736842, + "semantic_relevance": 3.0, + "factual_accuracy": 4.0, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 2.0, + "transparency": 3.0, + "authority": 4.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.35, + "weighted_total_content_score": 66.3157894736842, + "semantic_relevance": 2.8, + "factual_accuracy": 3.6, + "freshness": 4.4, + "objectivity_tone": 3.0, + "layout_ad_density": 2.6, + "accountability": 4.0, + "transparency": 3.0, + "authority": 3.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.390909090909091, + "normalized_reciprocal_se_rank": 0.2612794612794612, + "reciprocal_se_rank": 0.07249190938511327, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.325, + "weighted_total_content_score": 66.31578947368419, + "semantic_relevance": 3.6, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.4, + "accountability": 3.4, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.275, + "weighted_total_content_score": 66.10526315789474, + "semantic_relevance": 1.8, + "factual_accuracy": 4.0, + "freshness": 4.4, + "objectivity_tone": 4.6, + "layout_ad_density": 4.0, + "accountability": 2.6, + "transparency": 2.2, + "authority": 2.6, + "avg_ge_freq": 0.6, + "relative_se_rank": 1.3083333333333336, + "normalized_reciprocal_se_rank": 0.182010582010582, + "reciprocal_se_rank": 0.05344429033749422, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.275, + "weighted_total_content_score": 66.10526315789474, + "semantic_relevance": 2.0, + "factual_accuracy": 3.8, + "freshness": 5.0, + "objectivity_tone": 4.6, + "layout_ad_density": 3.4, + "accountability": 2.2, + "transparency": 3.0, + "authority": 2.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.325, + "weighted_total_content_score": 66.10526315789473, + "semantic_relevance": 3.2, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.4, + "accountability": 3.4, + "transparency": 3.4, + "authority": 2.8, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.068292682926829, + "normalized_reciprocal_se_rank": 0.022745978301533857, + "reciprocal_se_rank": 0.015174397698669542, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 27, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.4, + "weighted_total_content_score": 66.10526315789473, + "semantic_relevance": 2.8, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.6, + "layout_ad_density": 3.2, + "accountability": 3.2, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 95, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.325, + "weighted_total_content_score": 66.10526315789473, + "semantic_relevance": 3.0, + "factual_accuracy": 3.4, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.2, + "accountability": 3.0, + "transparency": 3.0, + "authority": 2.8, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 17, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.3, + "weighted_total_content_score": 65.89473684210526, + "semantic_relevance": 3.4, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 2.8, + "layout_ad_density": 2.4, + "accountability": 3.6, + "transparency": 2.8, + "authority": 2.8, + "avg_ge_freq": 0.93334, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.475, + "weighted_total_content_score": 65.89473684210526, + "semantic_relevance": 1.6, + "factual_accuracy": 2.2, + "freshness": 5.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.0, + "accountability": 3.2, + "transparency": 4.6, + "authority": 5.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.1875, + "weighted_total_content_score": 65.78947368421052, + "semantic_relevance": 4.5, + "factual_accuracy": 3.5, + "freshness": 3.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 2.5, + "transparency": 3.0, + "authority": 2.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.333333333333333, + "weighted_total_content_score": 65.78947368421052, + "semantic_relevance": 4.666666666666667, + "factual_accuracy": 4.666666666666667, + "freshness": 4.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 4.333333333333333, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.333333333333333, + "avg_ge_freq": 0.5833499999999999, + "relative_se_rank": 1.8353658536585367, + "normalized_reciprocal_se_rank": 0.25, + "reciprocal_se_rank": 0.06978155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "tavily", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.28125, + "weighted_total_content_score": 65.78947368421052, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 2.75, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 2.75, + "transparency": 3.25, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.6420454545454546, + "normalized_reciprocal_se_rank": 0.4073272406605739, + "reciprocal_se_rank": 0.10758591462474958, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gensee", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.325, + "weighted_total_content_score": 65.6842105263158, + "semantic_relevance": 2.0, + "factual_accuracy": 3.6, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 2.8, + "transparency": 3.0, + "authority": 3.2, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 2.3255813953488373, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.325, + "weighted_total_content_score": 65.68421052631578, + "semantic_relevance": 2.8, + "factual_accuracy": 3.2, + "freshness": 4.6, + "objectivity_tone": 3.2, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.6, + "authority": 3.2, + "avg_ge_freq": 0.8666600000000001, + "relative_se_rank": 1.4045454545454548, + "normalized_reciprocal_se_rank": 0.21503928170594833, + "reciprocal_se_rank": 0.061380798274002155, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.3333333333333335, + "weighted_total_content_score": 65.6140350877193, + "semantic_relevance": 3.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.6666666666666665, + "accountability": 3.0, + "transparency": 3.6666666666666665, + "authority": 3.3333333333333335, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6422764227642277, + "normalized_reciprocal_se_rank": 0.263973063973064, + "reciprocal_se_rank": 0.07313915857605179, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "google-search", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.28125, + "weighted_total_content_score": 65.52631578947368, + "semantic_relevance": 3.25, + "factual_accuracy": 3.5, + "freshness": 2.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.25, + "accountability": 4.0, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.35, + "weighted_total_content_score": 65.47368421052632, + "semantic_relevance": 1.8, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.6, + "accountability": 3.2, + "transparency": 3.2, + "authority": 3.2, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 9, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 4.04, + "weighted_total_content_score": 65.47368421052632, + "semantic_relevance": 3.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 3.75, + "layout_ad_density": 3.0, + "accountability": 4.25, + "transparency": 4.5, + "authority": 4.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 92, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.3125, + "weighted_total_content_score": 65.26315789473685, + "semantic_relevance": 3.0, + "factual_accuracy": 3.0, + "freshness": 3.25, + "objectivity_tone": 3.0, + "layout_ad_density": 3.5, + "accountability": 3.75, + "transparency": 3.75, + "authority": 3.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7346938775510204, + "normalized_reciprocal_se_rank": 0.16931994772903866, + "reciprocal_se_rank": 0.05039484180867676, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 65.26315789473684, + "semantic_relevance": 3.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 2.0, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 65.26315789473684, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 2, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 65.26315789473684, + "semantic_relevance": 3.3333333333333335, + "factual_accuracy": 3.6666666666666665, + "freshness": 3.6666666666666665, + "objectivity_tone": 3.0, + "layout_ad_density": 2.0, + "accountability": 3.6666666666666665, + "transparency": 3.3333333333333335, + "authority": 3.3333333333333335, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.4375, + "weighted_total_content_score": 65.26315789473684, + "semantic_relevance": 1.25, + "factual_accuracy": 2.0, + "freshness": 4.5, + "objectivity_tone": 3.75, + "layout_ad_density": 4.0, + "accountability": 3.75, + "transparency": 4.25, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5714285714285714, + "normalized_reciprocal_se_rank": 0.47544893378226716, + "reciprocal_se_rank": 0.12395496224379719, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "tavily", + "query_id": 20, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.15625, + "weighted_total_content_score": 65.26315789473684, + "semantic_relevance": 3.75, + "factual_accuracy": 3.75, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 1.25, + "accountability": 2.25, + "transparency": 2.5, + "authority": 2.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.225, + "weighted_total_content_score": 65.05263157894737, + "semantic_relevance": 2.0, + "factual_accuracy": 3.8, + "freshness": 4.6, + "objectivity_tone": 4.4, + "layout_ad_density": 3.0, + "accountability": 2.2, + "transparency": 2.8, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.325, + "weighted_total_content_score": 65.05263157894737, + "semantic_relevance": 3.2, + "factual_accuracy": 2.8, + "freshness": 3.8, + "objectivity_tone": 2.6, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 3.4, + "authority": 3.4, + "avg_ge_freq": 0.66666, + "relative_se_rank": 1.4266666666666667, + "normalized_reciprocal_se_rank": 0.1881359044995409, + "reciprocal_se_rank": 0.0549161518093557, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "exa", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.275, + "weighted_total_content_score": 65.05263157894737, + "semantic_relevance": 2.0, + "factual_accuracy": 3.4, + "freshness": 4.4, + "objectivity_tone": 4.0, + "layout_ad_density": 2.6, + "accountability": 2.4, + "transparency": 4.2, + "authority": 3.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.15, + "weighted_total_content_score": 64.84210526315789, + "semantic_relevance": 3.8, + "factual_accuracy": 3.4, + "freshness": 3.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.4, + "accountability": 2.8, + "transparency": 2.6, + "authority": 2.2, + "avg_ge_freq": 0.66666, + "relative_se_rank": 2.1739130434782608, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.3, + "weighted_total_content_score": 64.84210526315789, + "semantic_relevance": 3.6, + "factual_accuracy": 2.8, + "freshness": 5.0, + "objectivity_tone": 2.4, + "layout_ad_density": 2.6, + "accountability": 3.0, + "transparency": 4.0, + "authority": 3.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5866666666666667, + "normalized_reciprocal_se_rank": 0.30691153963881235, + "reciprocal_se_rank": 0.08345689908796705, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 48, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.3125, + "weighted_total_content_score": 64.73684210526315, + "semantic_relevance": 1.5, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.5, + "accountability": 4.0, + "transparency": 3.5, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.272727272727273, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.225, + "weighted_total_content_score": 64.63157894736841, + "semantic_relevance": 3.4, + "factual_accuracy": 3.0, + "freshness": 4.2, + "objectivity_tone": 3.4, + "layout_ad_density": 2.6, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5466666666666666, + "normalized_reciprocal_se_rank": 0.3722783389450056, + "reciprocal_se_rank": 0.09916396979503775, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 40, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 3.40625, + "weighted_total_content_score": 64.47368421052632, + "semantic_relevance": 2.5, + "factual_accuracy": 2.5, + "freshness": 5.0, + "objectivity_tone": 1.75, + "layout_ad_density": 3.25, + "accountability": 4.25, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4, + "normalized_reciprocal_se_rank": 0.15713957759412306, + "reciprocal_se_rank": 0.0474680052956752, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "exa", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.3125, + "weighted_total_content_score": 64.47368421052632, + "semantic_relevance": 3.5, + "factual_accuracy": 2.75, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 2.75, + "accountability": 4.0, + "transparency": 3.75, + "authority": 2.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1861702127659575, + "normalized_reciprocal_se_rank": 0.12769855416914241, + "reciprocal_se_rank": 0.04039358461831334, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.3, + "weighted_total_content_score": 64.42105263157895, + "semantic_relevance": 3.2, + "factual_accuracy": 2.6, + "freshness": 3.2, + "objectivity_tone": 2.6, + "layout_ad_density": 3.6, + "accountability": 4.0, + "transparency": 3.6, + "authority": 3.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 82, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 64.42105263157893, + "semantic_relevance": 4.25, + "factual_accuracy": 4.5, + "freshness": 4.75, + "objectivity_tone": 3.75, + "layout_ad_density": 3.25, + "accountability": 4.25, + "transparency": 3.75, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8545454545454547, + "normalized_reciprocal_se_rank": 0.06758494031221304, + "reciprocal_se_rank": 0.025948808473080322, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.21875, + "weighted_total_content_score": 64.21052631578948, + "semantic_relevance": 2.5, + "factual_accuracy": 3.0, + "freshness": 2.75, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.916675, + "relative_se_rank": 1.6467391304347825, + "normalized_reciprocal_se_rank": 0.16329966329966328, + "reciprocal_se_rank": 0.048948220064724914, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 64.21052631578947, + "semantic_relevance": 1.3333333333333333, + "factual_accuracy": 3.0, + "freshness": 3.0, + "objectivity_tone": 4.666666666666667, + "layout_ad_density": 3.0, + "accountability": 3.6666666666666665, + "transparency": 3.6666666666666665, + "authority": 3.6666666666666665, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "exa", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.3, + "weighted_total_content_score": 64.21052631578947, + "semantic_relevance": 4.0, + "factual_accuracy": 2.0, + "freshness": 5.0, + "objectivity_tone": 2.2, + "layout_ad_density": 2.2, + "accountability": 3.6, + "transparency": 3.8, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.7777777777777777, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 64.21052631578947, + "semantic_relevance": 2.0, + "factual_accuracy": 3.5, + "freshness": 3.75, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 3.5, + "transparency": 3.25, + "authority": 3.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 63.94736842105263, + "semantic_relevance": 3.25, + "factual_accuracy": 3.25, + "freshness": 3.5, + "objectivity_tone": 4.25, + "layout_ad_density": 2.5, + "accountability": 2.75, + "transparency": 2.5, + "authority": 3.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.36046511627906974, + "normalized_reciprocal_se_rank": 0.2987863451631567, + "reciprocal_se_rank": 0.08150448585231193, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "tavily", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.59375, + "weighted_total_content_score": 63.94736842105262, + "semantic_relevance": 1.5, + "factual_accuracy": 2.75, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.75, + "accountability": 4.0, + "transparency": 4.333333333333333, + "authority": 4.666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.5612244897959184, + "normalized_reciprocal_se_rank": 0.10549943883277216, + "reciprocal_se_rank": 0.035059331175836025, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.225, + "weighted_total_content_score": 63.78947368421052, + "semantic_relevance": 2.4, + "factual_accuracy": 3.4, + "freshness": 4.4, + "objectivity_tone": 3.2, + "layout_ad_density": 3.2, + "accountability": 3.0, + "transparency": 3.2, + "authority": 3.0, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.7791666666666668, + "normalized_reciprocal_se_rank": 0.019663299663299664, + "reciprocal_se_rank": 0.0144336569579288, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 63.78947368421052, + "semantic_relevance": 3.8, + "factual_accuracy": 2.4, + "freshness": 5.0, + "objectivity_tone": 2.4, + "layout_ad_density": 2.6, + "accountability": 3.4, + "transparency": 3.6, + "authority": 2.8, + "avg_ge_freq": 0.86668, + "relative_se_rank": 2.7777777777777777, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 73, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 63.78947368421052, + "semantic_relevance": 3.2, + "factual_accuracy": 2.8, + "freshness": 2.8, + "objectivity_tone": 2.6, + "layout_ad_density": 3.2, + "accountability": 4.2, + "transparency": 3.8, + "authority": 3.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.8875, + "normalized_reciprocal_se_rank": 0.41432789432789435, + "reciprocal_se_rank": 0.10926811053024646, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.21875, + "weighted_total_content_score": 63.68421052631578, + "semantic_relevance": 1.75, + "factual_accuracy": 3.5, + "freshness": 4.0, + "objectivity_tone": 3.75, + "layout_ad_density": 4.5, + "accountability": 2.75, + "transparency": 2.5, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.21875, + "weighted_total_content_score": 63.68421052631578, + "semantic_relevance": 2.25, + "factual_accuracy": 3.25, + "freshness": 4.0, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.25, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.3724489795918366, + "normalized_reciprocal_se_rank": 0.04354822737175678, + "reciprocal_se_rank": 0.020172996383019226, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 63.578947368421055, + "semantic_relevance": 3.4, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 2.6, + "accountability": 3.2, + "transparency": 3.6, + "authority": 3.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.7777777777777777, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.1666666666666665, + "weighted_total_content_score": 63.508771929824555, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 3.0, + "freshness": 4.0, + "objectivity_tone": 3.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.3333333333333335, + "avg_ge_freq": 0.6666666666666666, + "relative_se_rank": 0.9767441860465116, + "normalized_reciprocal_se_rank": 0.1875111826802648, + "reciprocal_se_rank": 0.054766036614917996, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.275, + "weighted_total_content_score": 63.368421052631575, + "semantic_relevance": 1.2, + "factual_accuracy": 2.8, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 2.4, + "accountability": 3.6, + "transparency": 4.2, + "authority": 3.8, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 78, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 4.03125, + "weighted_total_content_score": 63.368421052631575, + "semantic_relevance": 3.0, + "factual_accuracy": 3.75, + "freshness": 4.25, + "objectivity_tone": 4.0, + "layout_ad_density": 3.25, + "accountability": 5.0, + "transparency": 4.75, + "authority": 4.25, + "avg_ge_freq": 0.39997999999999995, + "relative_se_rank": 1.7106382978723402, + "normalized_reciprocal_se_rank": 0.1583838383838384, + "reciprocal_se_rank": 0.047766990291262135, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 63, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.1875, + "weighted_total_content_score": 63.15789473684211, + "semantic_relevance": 2.5, + "factual_accuracy": 3.0, + "freshness": 2.25, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 4.25, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.75, + "relative_se_rank": 4.761904761904762, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.675, + "weighted_total_content_score": 63.1578947368421, + "semantic_relevance": 1.8, + "factual_accuracy": 3.2, + "freshness": 5.0, + "objectivity_tone": 4.2, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 3.25, + "authority": 3.5, + "avg_ge_freq": 0.53332, + "relative_se_rank": 1.95609756097561, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "google-search", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.1875, + "weighted_total_content_score": 63.1578947368421, + "semantic_relevance": 2.0, + "factual_accuracy": 3.5, + "freshness": 3.5, + "objectivity_tone": 3.5, + "layout_ad_density": 3.0, + "accountability": 3.5, + "transparency": 3.5, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gpt-4o", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.2083333333333335, + "weighted_total_content_score": 63.1578947368421, + "semantic_relevance": 4.0, + "factual_accuracy": 2.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.0, + "transparency": 3.6666666666666665, + "authority": 3.0, + "avg_ge_freq": 0.5555666666666667, + "relative_se_rank": 2.7777777777777772, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 63.157894736842096, + "semantic_relevance": 3.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 3.0, + "accountability": 3.5, + "transparency": 3.5, + "authority": 3.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 1.8333333333333333, + "normalized_reciprocal_se_rank": 0.03924963924963925, + "reciprocal_se_rank": 0.01914008321775312, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "google-search", + "query_id": 88, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.21875, + "weighted_total_content_score": 62.89473684210526, + "semantic_relevance": 1.75, + "factual_accuracy": 3.75, + "freshness": 3.75, + "objectivity_tone": 2.75, + "layout_ad_density": 3.0, + "accountability": 3.5, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 3, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 4.025, + "weighted_total_content_score": 62.63157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.5, + "accountability": 4.0, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1931818181818183, + "normalized_reciprocal_se_rank": 0.23989898989898992, + "reciprocal_se_rank": 0.06735436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "claude", + "query_id": 74, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 62.63157894736841, + "semantic_relevance": 3.5, + "factual_accuracy": 3.0, + "freshness": 2.5, + "objectivity_tone": 3.0, + "layout_ad_density": 3.0, + "accountability": 3.5, + "transparency": 3.5, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.2, + "weighted_total_content_score": 62.52631578947368, + "semantic_relevance": 3.4, + "factual_accuracy": 2.8, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.2, + "authority": 3.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.255555555555555, + "normalized_reciprocal_se_rank": 0.08439955106621773, + "reciprocal_se_rank": 0.029989212513484353, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "tavily", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 3.2916666666666665, + "weighted_total_content_score": 62.456140350877185, + "semantic_relevance": 3.0, + "factual_accuracy": 2.3333333333333335, + "freshness": 5.0, + "objectivity_tone": 1.3333333333333333, + "layout_ad_density": 3.3333333333333335, + "accountability": 4.666666666666667, + "transparency": 3.3333333333333335, + "authority": 3.3333333333333335, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.8916666666666666, + "normalized_reciprocal_se_rank": 0.03277216610549944, + "reciprocal_se_rank": 0.017583603020496223, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 62.315789473684205, + "semantic_relevance": 1.2, + "factual_accuracy": 2.4, + "freshness": 5.0, + "objectivity_tone": 3.6, + "layout_ad_density": 3.4, + "accountability": 3.8, + "transparency": 3.4, + "authority": 3.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 60, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.175, + "weighted_total_content_score": 62.315789473684205, + "semantic_relevance": 1.8, + "factual_accuracy": 3.4, + "freshness": 4.0, + "objectivity_tone": 3.2, + "layout_ad_density": 3.4, + "accountability": 3.0, + "transparency": 3.2, + "authority": 3.4, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 2.0833333333333335, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.15625, + "weighted_total_content_score": 62.10526315789474, + "semantic_relevance": 3.25, + "factual_accuracy": 3.25, + "freshness": 4.25, + "objectivity_tone": 2.0, + "layout_ad_density": 3.5, + "accountability": 3.0, + "transparency": 3.25, + "authority": 2.75, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "claude", + "query_id": 89, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 62.10526315789473, + "semantic_relevance": 4.0, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 2.0, + "accountability": 4.0, + "transparency": 3.0, + "authority": 2.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.1875, + "weighted_total_content_score": 62.10526315789473, + "semantic_relevance": 1.25, + "factual_accuracy": 2.5, + "freshness": 3.75, + "objectivity_tone": 4.25, + "layout_ad_density": 3.25, + "accountability": 3.0, + "transparency": 4.0, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.2034883720930234, + "normalized_reciprocal_se_rank": 0.31792929292929295, + "reciprocal_se_rank": 0.08610436893203884, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 3.1875, + "weighted_total_content_score": 62.105263157894726, + "semantic_relevance": 3.0, + "factual_accuracy": 2.5, + "freshness": 3.5, + "objectivity_tone": 2.5, + "layout_ad_density": 4.0, + "accountability": 2.5, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": 0.83335, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 54, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 61.84210526315789, + "semantic_relevance": 2.25, + "factual_accuracy": 3.0, + "freshness": 3.25, + "objectivity_tone": 3.5, + "layout_ad_density": 2.5, + "accountability": 3.25, + "transparency": 3.5, + "authority": 3.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0969387755102042, + "normalized_reciprocal_se_rank": 0.17842056932966027, + "reciprocal_se_rank": 0.05258164165931156, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 52, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 61.754385964912274, + "semantic_relevance": 2.3333333333333335, + "factual_accuracy": 3.0, + "freshness": 4.0, + "objectivity_tone": 3.3333333333333335, + "layout_ad_density": 3.0, + "accountability": 2.6666666666666665, + "transparency": 3.0, + "authority": 3.6666666666666665, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0816326530612246, + "normalized_reciprocal_se_rank": 0.05861269238878347, + "reciprocal_se_rank": 0.02379285569536302, + "percentage_ge_sources_not_in_se_sources": 33.333333333333336, + "percentage_ge_sources_in_se_sources": 66.66666666666666 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.225, + "weighted_total_content_score": 61.684210526315795, + "semantic_relevance": 1.0, + "factual_accuracy": 3.4, + "freshness": 4.2, + "objectivity_tone": 2.6, + "layout_ad_density": 3.4, + "accountability": 3.2, + "transparency": 4.4, + "authority": 3.6, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 6.666666666666667, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 59, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.175, + "weighted_total_content_score": 61.684210526315795, + "semantic_relevance": 1.4, + "factual_accuracy": 2.8, + "freshness": 4.2, + "objectivity_tone": 3.6, + "layout_ad_density": 3.0, + "accountability": 2.6, + "transparency": 4.2, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.586046511627907, + "normalized_reciprocal_se_rank": 0.3732992260264988, + "reciprocal_se_rank": 0.099409280040348, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.05, + "weighted_total_content_score": 61.473684210526315, + "semantic_relevance": 2.0, + "factual_accuracy": 3.8, + "freshness": 4.4, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 2.2, + "transparency": 3.0, + "authority": 2.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 61.473684210526315, + "semantic_relevance": 1.8, + "factual_accuracy": 2.8, + "freshness": 3.4, + "objectivity_tone": 3.8, + "layout_ad_density": 3.4, + "accountability": 2.8, + "transparency": 3.8, + "authority": 3.2, + "avg_ge_freq": 0.53332, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 61.315789473684205, + "semantic_relevance": 3.25, + "factual_accuracy": 2.5, + "freshness": 5.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.5, + "accountability": 3.5, + "transparency": 3.0, + "authority": 2.75, + "avg_ge_freq": 0.49999999999999994, + "relative_se_rank": 1.6329787234042552, + "normalized_reciprocal_se_rank": 0.09393939393939395, + "reciprocal_se_rank": 0.03228155339805825, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "gensee", + "query_id": 97, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 61.315789473684205, + "semantic_relevance": 5.0, + "factual_accuracy": 4.666666666666667, + "freshness": 4.333333333333333, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.3333333333333335, + "transparency": 3.6666666666666665, + "authority": 4.0, + "avg_ge_freq": 0.5833499999999999, + "relative_se_rank": 0.89375, + "normalized_reciprocal_se_rank": 0.361217437533227, + "reciprocal_se_rank": 0.09650613183444046, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "tavily", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 61.26315789473684, + "semantic_relevance": 2.2, + "factual_accuracy": 3.4, + "freshness": 4.6, + "objectivity_tone": 2.6, + "layout_ad_density": 2.2, + "accountability": 3.2, + "transparency": 3.2, + "authority": 3.6, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.9555555555555557, + "normalized_reciprocal_se_rank": 0.382010582010582, + "reciprocal_se_rank": 0.10150254276467867, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 61.05263157894736, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.6341463414634145, + "normalized_reciprocal_se_rank": 0.3333333333333333, + "reciprocal_se_rank": 0.08980582524271845, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "gpt-4o", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 61.05263157894736, + "semantic_relevance": 1.0, + "factual_accuracy": 1.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 5.0, + "transparency": 4.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 3.0, + "weighted_total_content_score": 61.05263157894736, + "semantic_relevance": 4.0, + "factual_accuracy": 3.0, + "freshness": 2.0, + "objectivity_tone": 3.0, + "layout_ad_density": 3.5, + "accountability": 3.5, + "transparency": 2.5, + "authority": 2.5, + "avg_ge_freq": 0.5, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-gensee", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.0, + "weighted_total_content_score": 60.78947368421053, + "semantic_relevance": 5.0, + "factual_accuracy": 4.333333333333333, + "freshness": 4.333333333333333, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 3.3333333333333335, + "accountability": 2.6666666666666665, + "transparency": 4.333333333333333, + "authority": 4.333333333333333, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.09375, + "weighted_total_content_score": 60.526315789473685, + "semantic_relevance": 1.75, + "factual_accuracy": 3.0, + "freshness": 3.5, + "objectivity_tone": 3.25, + "layout_ad_density": 2.75, + "accountability": 3.25, + "transparency": 3.75, + "authority": 3.5, + "avg_ge_freq": 0.75, + "relative_se_rank": 1.5918367346938775, + "normalized_reciprocal_se_rank": 0.05925925925925926, + "reciprocal_se_rank": 0.023948220064724916, + "percentage_ge_sources_not_in_se_sources": 75.0, + "percentage_ge_sources_in_se_sources": 25.0 + }, + { + "model_name": "exa", + "query_id": 43, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.0, + "weighted_total_content_score": 60.526315789473685, + "semantic_relevance": 2.25, + "factual_accuracy": 3.25, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 2.5, + "accountability": 3.0, + "transparency": 2.75, + "authority": 2.75, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1458333333333335, + "normalized_reciprocal_se_rank": 0.14886363636363636, + "reciprocal_se_rank": 0.045479368932038834, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.3600000000000003, + "weighted_total_content_score": 60.421052631578945, + "semantic_relevance": 2.25, + "factual_accuracy": 3.75, + "freshness": 4.4, + "objectivity_tone": 4.25, + "layout_ad_density": 3.6, + "accountability": 2.4, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.73334, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.4200000000000004, + "weighted_total_content_score": 60.421052631578945, + "semantic_relevance": 2.0, + "factual_accuracy": 3.25, + "freshness": 5.0, + "objectivity_tone": 3.5, + "layout_ad_density": 2.8, + "accountability": 3.6, + "transparency": 3.6, + "authority": 3.2, + "avg_ge_freq": 0.3333, + "relative_se_rank": 1.7148936170212763, + "normalized_reciprocal_se_rank": 0.1306397306397306, + "reciprocal_se_rank": 0.04110032362459547, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 4, + "unweighted_mean_score": 3.1875, + "weighted_total_content_score": 60.263157894736835, + "semantic_relevance": 2.0, + "factual_accuracy": 2.0, + "freshness": 3.0, + "objectivity_tone": 2.25, + "layout_ad_density": 4.0, + "accountability": 4.25, + "transparency": 4.5, + "authority": 3.5, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 3.075, + "weighted_total_content_score": 60.21052631578948, + "semantic_relevance": 3.2, + "factual_accuracy": 2.6, + "freshness": 5.0, + "objectivity_tone": 2.2, + "layout_ad_density": 1.8, + "accountability": 2.8, + "transparency": 3.8, + "authority": 3.2, + "avg_ge_freq": 0.5333399999999999, + "relative_se_rank": 0.37333333333333335, + "normalized_reciprocal_se_rank": 0.23152989876697544, + "reciprocal_se_rank": 0.06534334946099651, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "claude", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.0416666666666665, + "weighted_total_content_score": 60.0, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 2.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 2.0, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 0.7777666666666666, + "relative_se_rank": 1.9074074074074074, + "normalized_reciprocal_se_rank": 0.14066591844369622, + "reciprocal_se_rank": 0.04350952894642215, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.2950000000000004, + "weighted_total_content_score": 59.78947368421052, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 3.0, + "accountability": 2.2, + "transparency": 3.2, + "authority": 2.4, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 2.9583333333333335, + "weighted_total_content_score": 59.29824561403509, + "semantic_relevance": 3.6666666666666665, + "factual_accuracy": 3.0, + "freshness": 3.6666666666666665, + "objectivity_tone": 2.3333333333333335, + "layout_ad_density": 2.0, + "accountability": 3.6666666666666665, + "transparency": 3.3333333333333335, + "authority": 2.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "google-search", + "query_id": 46, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 58.94736842105263, + "semantic_relevance": 1.0, + "factual_accuracy": 1.0, + "freshness": 3.5, + "objectivity_tone": 4.0, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "grok-4.1-fast-non-reasoning", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 2.875, + "weighted_total_content_score": 58.94736842105263, + "semantic_relevance": 2.0, + "factual_accuracy": 4.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 2.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 4.375, + "weighted_total_content_score": 58.59649122807017, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 4.5, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 2.9375, + "weighted_total_content_score": 58.421052631578945, + "semantic_relevance": 1.5, + "factual_accuracy": 3.0, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 3.8333333333333335, + "weighted_total_content_score": 58.421052631578945, + "semantic_relevance": 4.333333333333333, + "factual_accuracy": 4.666666666666667, + "freshness": 5.0, + "objectivity_tone": 3.6666666666666665, + "layout_ad_density": 1.6666666666666667, + "accountability": 2.0, + "transparency": 4.333333333333333, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 2.925, + "weighted_total_content_score": 57.684210526315795, + "semantic_relevance": 3.4, + "factual_accuracy": 2.6, + "freshness": 4.4, + "objectivity_tone": 2.0, + "layout_ad_density": 2.0, + "accountability": 2.6, + "transparency": 4.0, + "authority": 2.4, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "gensee", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 2.90625, + "weighted_total_content_score": 57.631578947368425, + "semantic_relevance": 3.5, + "factual_accuracy": 3.0, + "freshness": 4.25, + "objectivity_tone": 1.75, + "layout_ad_density": 2.0, + "accountability": 2.75, + "transparency": 3.25, + "authority": 2.75, + "avg_ge_freq": 0.49999999999999994, + "relative_se_rank": 2.7777777777777777, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.0, + "weighted_total_content_score": 57.63157894736842, + "semantic_relevance": 1.5, + "factual_accuracy": 2.25, + "freshness": 4.25, + "objectivity_tone": 3.0, + "layout_ad_density": 3.75, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.25, + "avg_ge_freq": 0.833325, + "relative_se_rank": 1.112244897959184, + "normalized_reciprocal_se_rank": 0.15441124532033623, + "reciprocal_se_rank": 0.04681241089008079, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 3.1875, + "weighted_total_content_score": 57.368421052631575, + "semantic_relevance": 1.25, + "factual_accuracy": 2.5, + "freshness": 4.0, + "objectivity_tone": 4.75, + "layout_ad_density": 3.25, + "accountability": 3.6666666666666665, + "transparency": 3.6666666666666665, + "authority": 3.6666666666666665, + "avg_ge_freq": 0.5833499999999999, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 23, + "query_type": "DebateQA", + "num_sources": 5, + "unweighted_mean_score": 3.65625, + "weighted_total_content_score": 56.8421052631579, + "semantic_relevance": 3.25, + "factual_accuracy": 3.25, + "freshness": 4.75, + "objectivity_tone": 2.5, + "layout_ad_density": 3.75, + "accountability": 3.75, + "transparency": 3.75, + "authority": 4.25, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-5", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 1, + "unweighted_mean_score": 2.875, + "weighted_total_content_score": 56.84210526315789, + "semantic_relevance": 2.0, + "factual_accuracy": 2.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 2.0, + "transparency": 4.0, + "authority": 2.0, + "avg_ge_freq": 0.6667, + "relative_se_rank": 2.857142857142857, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 2.9166666666666665, + "weighted_total_content_score": 56.49122807017543, + "semantic_relevance": 2.6666666666666665, + "factual_accuracy": 2.6666666666666665, + "freshness": 5.0, + "objectivity_tone": 1.6666666666666667, + "layout_ad_density": 2.3333333333333335, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.7777777777777772, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 69, + "query_type": "Pinocchios", + "num_sources": 5, + "unweighted_mean_score": 3.6071428571428568, + "weighted_total_content_score": 56.421052631578945, + "semantic_relevance": 2.5, + "factual_accuracy": 3.75, + "freshness": 2.75, + "objectivity_tone": 4.25, + "layout_ad_density": 3.25, + "accountability": 3.5, + "transparency": 3.75, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 5.346666666666667, + "normalized_reciprocal_se_rank": 0.2, + "reciprocal_se_rank": 0.05776699029126213, + "percentage_ge_sources_not_in_se_sources": 80.0, + "percentage_ge_sources_in_se_sources": 20.0 + }, + { + "model_name": "exa", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 2.8, + "weighted_total_content_score": 56.0, + "semantic_relevance": 1.8, + "factual_accuracy": 2.8, + "freshness": 3.6, + "objectivity_tone": 3.8, + "layout_ad_density": 2.8, + "accountability": 2.2, + "transparency": 2.6, + "authority": 2.8, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 65, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 3.125, + "weighted_total_content_score": 55.78947368421052, + "semantic_relevance": 1.0, + "factual_accuracy": 1.0, + "freshness": 5.0, + "objectivity_tone": 1.0, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 4.0, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 49, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 2.75, + "weighted_total_content_score": 54.52631578947368, + "semantic_relevance": 2.2, + "factual_accuracy": 2.2, + "freshness": 4.0, + "objectivity_tone": 3.4, + "layout_ad_density": 3.4, + "accountability": 2.4, + "transparency": 1.8, + "authority": 2.6, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "tavily", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 2.8, + "weighted_total_content_score": 54.52631578947368, + "semantic_relevance": 2.0, + "factual_accuracy": 2.4, + "freshness": 4.4, + "objectivity_tone": 2.6, + "layout_ad_density": 1.8, + "accountability": 3.0, + "transparency": 3.0, + "authority": 3.2, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.897872340425532, + "normalized_reciprocal_se_rank": 0.36516594516594514, + "reciprocal_se_rank": 0.09745492371705963, + "percentage_ge_sources_not_in_se_sources": 40.0, + "percentage_ge_sources_in_se_sources": 60.0 + }, + { + "model_name": "Perplexity-Sonar-Pro", + "query_id": 56, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 2.75, + "weighted_total_content_score": 54.38596491228069, + "semantic_relevance": 1.6666666666666667, + "factual_accuracy": 3.0, + "freshness": 3.6666666666666665, + "objectivity_tone": 3.0, + "layout_ad_density": 1.6666666666666667, + "accountability": 2.0, + "transparency": 3.3333333333333335, + "authority": 3.6666666666666665, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "exa", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 2.78125, + "weighted_total_content_score": 53.94736842105263, + "semantic_relevance": 1.5, + "factual_accuracy": 2.0, + "freshness": 4.25, + "objectivity_tone": 3.25, + "layout_ad_density": 2.25, + "accountability": 2.25, + "transparency": 3.5, + "authority": 3.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "Gemini-2.5-Flash-Preview", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 2.75, + "weighted_total_content_score": 53.89473684210526, + "semantic_relevance": 3.0, + "factual_accuracy": 2.4, + "freshness": 3.6, + "objectivity_tone": 1.8, + "layout_ad_density": 2.0, + "accountability": 3.2, + "transparency": 3.0, + "authority": 3.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 42, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 2.75, + "weighted_total_content_score": 53.68421052631579, + "semantic_relevance": 2.0, + "factual_accuracy": 2.5, + "freshness": 5.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.5, + "accountability": 2.0, + "transparency": 2.5, + "authority": 3.0, + "avg_ge_freq": 0.66665, + "relative_se_rank": 2.0408163265306123, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "gpt-4o", + "query_id": 66, + "query_type": "Pinocchios", + "num_sources": 1, + "unweighted_mean_score": 3.0, + "weighted_total_content_score": 53.68421052631579, + "semantic_relevance": 1.0, + "factual_accuracy": 1.0, + "freshness": 5.0, + "objectivity_tone": 1.0, + "layout_ad_density": 5.0, + "accountability": 2.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.024390243902439025, + "normalized_reciprocal_se_rank": 1.0, + "reciprocal_se_rank": 0.25, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 90, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 2.995, + "weighted_total_content_score": 53.68421052631578, + "semantic_relevance": 3.0, + "factual_accuracy": 2.75, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 1.8, + "accountability": 2.6, + "transparency": 4.0, + "authority": 2.8, + "avg_ge_freq": 0.80002, + "relative_se_rank": 0.6088888888888889, + "normalized_reciprocal_se_rank": 0.2841694253458959, + "reciprocal_se_rank": 0.07799216774088276, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "Gemini-3-Pro-Preview", + "query_id": 44, + "query_type": "HotpotQA", + "num_sources": 3, + "unweighted_mean_score": 2.7083333333333335, + "weighted_total_content_score": 52.63157894736842, + "semantic_relevance": 2.3333333333333335, + "factual_accuracy": 2.0, + "freshness": 5.0, + "objectivity_tone": 2.3333333333333335, + "layout_ad_density": 2.3333333333333335, + "accountability": 2.6666666666666665, + "transparency": 2.3333333333333335, + "authority": 2.6666666666666665, + "avg_ge_freq": 0.8889, + "relative_se_rank": 1.798449612403101, + "normalized_reciprocal_se_rank": 0.026166426166426168, + "reciprocal_se_rank": 0.015996301433194637, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "tavily", + "query_id": 41, + "query_type": "HotpotQA", + "num_sources": 5, + "unweighted_mean_score": 3.25, + "weighted_total_content_score": 52.42105263157894, + "semantic_relevance": 2.0, + "factual_accuracy": 3.75, + "freshness": 4.25, + "objectivity_tone": 4.5, + "layout_ad_density": 3.25, + "accountability": 2.5, + "transparency": 2.5, + "authority": 3.25, + "avg_ge_freq": 1.0, + "relative_se_rank": 4.545454545454546, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 85, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 2.725, + "weighted_total_content_score": 52.0, + "semantic_relevance": 2.2, + "factual_accuracy": 2.0, + "freshness": 3.8, + "objectivity_tone": 1.6, + "layout_ad_density": 3.0, + "accountability": 3.0, + "transparency": 3.2, + "authority": 3.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.5148936170212765, + "normalized_reciprocal_se_rank": 0.40632792572186516, + "reciprocal_se_rank": 0.10734578797685594, + "percentage_ge_sources_not_in_se_sources": 20.0, + "percentage_ge_sources_in_se_sources": 80.0 + }, + { + "model_name": "claude", + "query_id": 47, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 2.5625, + "weighted_total_content_score": 51.578947368421055, + "semantic_relevance": 1.5, + "factual_accuracy": 2.5, + "freshness": 2.0, + "objectivity_tone": 4.0, + "layout_ad_density": 2.0, + "accountability": 2.0, + "transparency": 3.0, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 91, + "query_type": "QuoraQuestions", + "num_sources": 5, + "unweighted_mean_score": 2.575, + "weighted_total_content_score": 49.473684210526315, + "semantic_relevance": 2.2, + "factual_accuracy": 2.0, + "freshness": 4.4, + "objectivity_tone": 1.6, + "layout_ad_density": 2.2, + "accountability": 3.0, + "transparency": 3.2, + "authority": 2.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.9444444444444442, + "normalized_reciprocal_se_rank": 0.09402774792845714, + "reciprocal_se_rank": 0.03230278408960499, + "percentage_ge_sources_not_in_se_sources": 60.0, + "percentage_ge_sources_in_se_sources": 40.0 + }, + { + "model_name": "Gemini-3-Flash-Preview", + "query_id": 83, + "query_type": "QuoraQuestions", + "num_sources": 2, + "unweighted_mean_score": 2.5625, + "weighted_total_content_score": 47.89473684210526, + "semantic_relevance": 1.0, + "factual_accuracy": 1.5, + "freshness": 5.0, + "objectivity_tone": 2.0, + "layout_ad_density": 3.0, + "accountability": 2.5, + "transparency": 3.5, + "authority": 2.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.625, + "weighted_total_content_score": 46.84210526315789, + "semantic_relevance": 5.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 22, + "query_type": "DebateQA", + "num_sources": 4, + "unweighted_mean_score": 4.6875, + "weighted_total_content_score": 46.57894736842105, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 5.0, + "layout_ad_density": 4.0, + "accountability": 5.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 0.916675, + "relative_se_rank": 2.380952380952381, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 29, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 3.5625, + "weighted_total_content_score": 45.96491228070175, + "semantic_relevance": 2.5, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 3.5, + "transparency": 3.5, + "authority": 4.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.2222222222222223, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 51, + "query_type": "HotpotQA", + "num_sources": 2, + "unweighted_mean_score": 4.5, + "weighted_total_content_score": 44.73684210526316, + "semantic_relevance": 4.0, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.0, + "layout_ad_density": 5.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 94, + "query_type": "QuoraQuestions", + "num_sources": 3, + "unweighted_mean_score": 3.375, + "weighted_total_content_score": 44.21052631578947, + "semantic_relevance": 3.5, + "factual_accuracy": 3.0, + "freshness": 5.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.0, + "accountability": 4.5, + "transparency": 3.5, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 0.375, + "normalized_reciprocal_se_rank": 0.46807625595504393, + "reciprocal_se_rank": 0.1221833721833722, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "google-search", + "query_id": 7, + "query_type": "VACOS", + "num_sources": 2, + "unweighted_mean_score": 2.25, + "weighted_total_content_score": 43.68421052631578, + "semantic_relevance": 1.0, + "factual_accuracy": 2.0, + "freshness": 3.0, + "objectivity_tone": 2.5, + "layout_ad_density": 3.0, + "accountability": 2.5, + "transparency": 2.0, + "authority": 2.0, + "avg_ge_freq": null, + "relative_se_rank": null, + "normalized_reciprocal_se_rank": null, + "reciprocal_se_rank": null, + "percentage_ge_sources_not_in_se_sources": null, + "percentage_ge_sources_in_se_sources": null + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 4, + "unweighted_mean_score": 4.25, + "weighted_total_content_score": 43.1578947368421, + "semantic_relevance": 4.5, + "factual_accuracy": 5.0, + "freshness": 5.0, + "objectivity_tone": 4.5, + "layout_ad_density": 2.0, + "accountability": 3.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 81, + "query_type": "QuoraQuestions", + "num_sources": 4, + "unweighted_mean_score": 4.3125, + "weighted_total_content_score": 42.63157894736842, + "semantic_relevance": 4.0, + "factual_accuracy": 4.5, + "freshness": 3.5, + "objectivity_tone": 3.5, + "layout_ad_density": 5.0, + "accountability": 4.0, + "transparency": 5.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.7134146341463414, + "normalized_reciprocal_se_rank": 0.42542950876284213, + "reciprocal_se_rank": 0.11193573147456642, + "percentage_ge_sources_not_in_se_sources": 25.0, + "percentage_ge_sources_in_se_sources": 75.0 + }, + { + "model_name": "gpt-4o", + "query_id": 53, + "query_type": "HotpotQA", + "num_sources": 1, + "unweighted_mean_score": 2.0, + "weighted_total_content_score": 41.05263157894736, + "semantic_relevance": 1.0, + "factual_accuracy": 1.0, + "freshness": 1.0, + "objectivity_tone": 5.0, + "layout_ad_density": 1.0, + "accountability": 2.0, + "transparency": 2.0, + "authority": 3.0, + "avg_ge_freq": 0.3333, + "relative_se_rank": 2.127659574468085, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 87, + "query_type": "QuoraQuestions", + "num_sources": 1, + "unweighted_mean_score": 2.0, + "weighted_total_content_score": 40.0, + "semantic_relevance": 2.0, + "factual_accuracy": 2.0, + "freshness": 1.0, + "objectivity_tone": 2.0, + "layout_ad_density": 3.0, + "accountability": 2.0, + "transparency": 2.0, + "authority": 2.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.0731707317073171, + "normalized_reciprocal_se_rank": 0.04814098431119708, + "reciprocal_se_rank": 0.02127659574468085, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + }, + { + "model_name": "deepseek-chat-tavily", + "query_id": 11, + "query_type": "VACOS", + "num_sources": 3, + "unweighted_mean_score": 3.0, + "weighted_total_content_score": 38.24561403508772, + "semantic_relevance": 2.0, + "factual_accuracy": 2.0, + "freshness": 5.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.0, + "accountability": 2.0, + "transparency": 4.0, + "authority": 4.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.4390243902439024, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "claude", + "query_id": 39, + "query_type": "DebateQA", + "num_sources": 3, + "unweighted_mean_score": 2.9375, + "weighted_total_content_score": 37.89473684210526, + "semantic_relevance": 2.5, + "factual_accuracy": 2.5, + "freshness": 3.0, + "objectivity_tone": 2.0, + "layout_ad_density": 4.0, + "accountability": 2.5, + "transparency": 3.5, + "authority": 3.5, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.4305555555555556, + "normalized_reciprocal_se_rank": 0.14066591844369622, + "reciprocal_se_rank": 0.04350952894642215, + "percentage_ge_sources_not_in_se_sources": 66.66666666666667, + "percentage_ge_sources_in_se_sources": 33.33333333333333 + }, + { + "model_name": "deepseek-reasoning-tavily", + "query_id": 61, + "query_type": "Pinocchios", + "num_sources": 2, + "unweighted_mean_score": 3.5, + "weighted_total_content_score": 35.26315789473684, + "semantic_relevance": 4.0, + "factual_accuracy": 4.0, + "freshness": 2.0, + "objectivity_tone": 3.0, + "layout_ad_density": 4.0, + "accountability": 2.0, + "transparency": 4.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 1.1666666666666667, + "normalized_reciprocal_se_rank": 0.23989898989898992, + "reciprocal_se_rank": 0.06735436893203883, + "percentage_ge_sources_not_in_se_sources": 50.0, + "percentage_ge_sources_in_se_sources": 50.0 + }, + { + "model_name": "deepseek-reasoning-gensee", + "query_id": 12, + "query_type": "VACOS", + "num_sources": 5, + "unweighted_mean_score": 3.75, + "weighted_total_content_score": 30.31578947368421, + "semantic_relevance": 5.0, + "factual_accuracy": 4.5, + "freshness": 5.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.0, + "accountability": 3.0, + "transparency": 3.0, + "authority": 5.0, + "avg_ge_freq": 1.0, + "relative_se_rank": 2.5, + "normalized_reciprocal_se_rank": 0.0, + "reciprocal_se_rank": 0.009708737864077669, + "percentage_ge_sources_not_in_se_sources": 100.0, + "percentage_ge_sources_in_se_sources": 0.0 + }, + { + "model_name": "tavily", + "query_id": 45, + "query_type": "HotpotQA", + "num_sources": 4, + "unweighted_mean_score": 2.2916666666666665, + "weighted_total_content_score": 27.105263157894736, + "semantic_relevance": 1.5, + "factual_accuracy": 1.5, + "freshness": 4.0, + "objectivity_tone": 2.5, + "layout_ad_density": 2.5, + "accountability": 2.6666666666666665, + "transparency": 1.6666666666666667, + "authority": 1.6666666666666667, + "avg_ge_freq": 1.0, + "relative_se_rank": 0.16326530612244897, + "normalized_reciprocal_se_rank": 0.45578865578865585, + "reciprocal_se_rank": 0.11923076923076922, + "percentage_ge_sources_not_in_se_sources": 0.0, + "percentage_ge_sources_in_se_sources": 100.0 + } + ] +}