{ "metadata": { "timestamp": "2026-03-30T15:04:24", "num_problems": 17, "num_conditions": 4, "total_evaluations": 68 }, "condition_stats": { "SINGLE": { "mean_composite": 0.3379, "std_composite": 0.0383, "dimension_means": { "reasoning_depth": 0.4024, "perspective_diversity": 0.2368, "coherence": 0.3795, "ethical_coverage": 0.0622, "novelty": 0.3274, "factual_grounding": 0.4564, "turing_naturalness": 0.412 }, "dimension_stds": { "reasoning_depth": 0.0642, "perspective_diversity": 0.1554, "coherence": 0.1506, "ethical_coverage": 0.0691, "novelty": 0.093, "factual_grounding": 0.0952, "turing_naturalness": 0.1212 }, "mean_length": 49.1, "mean_latency": 128564.8, "n": 17 }, "MULTI": { "mean_composite": 0.6318, "std_composite": 0.0399, "dimension_means": { "reasoning_depth": 0.7547, "perspective_diversity": 0.9691, "coherence": 0.5027, "ethical_coverage": 0.3359, "novelty": 0.7858, "factual_grounding": 0.6039, "turing_naturalness": 0.1802 }, "dimension_stds": { "reasoning_depth": 0.0656, "perspective_diversity": 0.0647, "coherence": 0.03, "ethical_coverage": 0.1954, "novelty": 0.148, "factual_grounding": 0.1066, "turing_naturalness": 0.0814 }, "mean_length": 374.2, "mean_latency": 130824.2, "n": 17 }, "MEMORY": { "mean_composite": 0.6357, "std_composite": 0.036, "dimension_means": { "reasoning_depth": 0.7703, "perspective_diversity": 0.9559, "coherence": 0.5, "ethical_coverage": 0.3402, "novelty": 0.7356, "factual_grounding": 0.5985, "turing_naturalness": 0.2914 }, "dimension_stds": { "reasoning_depth": 0.0817, "perspective_diversity": 0.0877, "coherence": 0.0304, "ethical_coverage": 0.1217, "novelty": 0.1083, "factual_grounding": 0.1599, "turing_naturalness": 0.0963 }, "mean_length": 474.5, "mean_latency": 125282.9, "n": 17 }, "CODETTE": { "mean_composite": 0.6525, "std_composite": 0.0415, "dimension_means": { "reasoning_depth": 0.8551, "perspective_diversity": 0.9941, "coherence": 0.4767, "ethical_coverage": 0.3905, "novelty": 0.6933, "factual_grounding": 0.6221, "turing_naturalness": 0.245 }, "dimension_stds": { "reasoning_depth": 0.0704, "perspective_diversity": 0.0243, "coherence": 0.0165, "ethical_coverage": 0.1288, "novelty": 0.1219, "factual_grounding": 0.1723, "turing_naturalness": 0.061 }, "mean_length": 832.9, "mean_latency": 108177.0, "n": 17 } }, "pairwise_comparisons": [ { "comparison": "Multi-perspective vs single", "condition_a": "SINGLE", "condition_b": "MULTI", "mean_a": 0.3379, "mean_b": 0.6318, "delta": 0.2939, "delta_pct": 87.0, "cohens_d": 7.5178, "t_stat": 21.9179, "p_value": 0.0, "significant": true }, { "comparison": "Memory augmentation vs vanilla multi", "condition_a": "MULTI", "condition_b": "MEMORY", "mean_a": 0.6318, "mean_b": 0.6357, "delta": 0.0039, "delta_pct": 0.6, "cohens_d": 0.1033, "t_stat": 0.3011, "p_value": 0.76333, "significant": false }, { "comparison": "Full Codette vs memory-augmented", "condition_a": "MEMORY", "condition_b": "CODETTE", "mean_a": 0.6357, "mean_b": 0.6525, "delta": 0.0168, "delta_pct": 2.6, "cohens_d": 0.4316, "t_stat": 1.2584, "p_value": 0.208237, "significant": false }, { "comparison": "Full Codette vs single (total improvement)", "condition_a": "SINGLE", "condition_b": "CODETTE", "mean_a": 0.3379, "mean_b": 0.6525, "delta": 0.3146, "delta_pct": 93.1, "cohens_d": 7.8778, "t_stat": 22.9675, "p_value": 0.0, "significant": true } ], "per_category": { "reasoning": { "SINGLE": { "mean": 0.3628, "std": 0.05, "n": 3 }, "MULTI": { "mean": 0.6139, "std": 0.0532, "n": 3 }, "MEMORY": { "mean": 0.628, "std": 0.0299, "n": 3 }, "CODETTE": { "mean": 0.6372, "std": 0.0519, "n": 3 } }, "ethics": { "SINGLE": { "mean": 0.3542, "std": 0.0595, "n": 3 }, "MULTI": { "mean": 0.6324, "std": 0.0518, "n": 3 }, "MEMORY": { "mean": 0.6161, "std": 0.043, "n": 3 }, "CODETTE": { "mean": 0.6381, "std": 0.0322, "n": 3 } }, "creative": { "SINGLE": { "mean": 0.3446, "std": 0.0528, "n": 2 }, "MULTI": { "mean": 0.6353, "std": 0.0395, "n": 2 }, "MEMORY": { "mean": 0.6599, "std": 0.0609, "n": 2 }, "CODETTE": { "mean": 0.6685, "std": 0.0303, "n": 2 } }, "meta": { "SINGLE": { "mean": 0.337, "std": 0.006, "n": 3 }, "MULTI": { "mean": 0.6342, "std": 0.0543, "n": 3 }, "MEMORY": { "mean": 0.6499, "std": 0.0361, "n": 3 }, "CODETTE": { "mean": 0.6592, "std": 0.0368, "n": 3 } }, "adversarial": { "SINGLE": { "mean": 0.3286, "std": 0.0283, "n": 3 }, "MULTI": { "mean": 0.6236, "std": 0.0407, "n": 3 }, "MEMORY": { "mean": 0.6219, "std": 0.042, "n": 3 }, "CODETTE": { "mean": 0.6301, "std": 0.0666, "n": 3 } }, "turing": { "SINGLE": { "mean": 0.3024, "std": 0.0064, "n": 3 }, "MULTI": { "mean": 0.6525, "std": 0.0243, "n": 3 }, "MEMORY": { "mean": 0.6466, "std": 0.026, "n": 3 }, "CODETTE": { "mean": 0.6871, "std": 0.0168, "n": 3 } } }, "per_problem": { "reason_01": { "SINGLE": { "composite": 0.3096, "dimensions": { "reasoning_depth": { "score": 0.4511, "evidence": [ "word_count=34", "chain_markers=1", "ground_truth_coverage=4/5" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.325, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.4375, "evidence": [ "ground_truth=2/5", "numbers=0,proper_nouns=1" ], "penalties": [] }, "turing_naturalness": { "score": 0.525, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 34, "latency_ms": 121105.7 }, "MULTI": { "composite": 0.6066, "dimensions": { "reasoning_depth": { "score": 0.8204, "evidence": [ "word_count=348", "chain_markers=3", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 0.9, "evidence": [ "analytical=3_hits", "ethical=2_hits", "empathic=6_hits", "meta-cognitive=2_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4879, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1773, "evidence": [ "ethical_keywords=2", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.57, "evidence": [ "novelty_markers=0", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.8, "evidence": [ "ground_truth=3/5", "numbers=42,proper_nouns=36" ], "penalties": [] }, "turing_naturalness": { "score": 0.1109, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 372, "latency_ms": 185897.9 }, "MEMORY": { "composite": 0.6623, "dimensions": { "reasoning_depth": { "score": 0.8014, "evidence": [ "word_count=441", "chain_markers=2", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 0.9, "evidence": [ "analytical=3_hits", "ethical=4_hits", "empathic=5_hits", "meta-cognitive=3_hits" ], "penalties": [] }, "coherence": { "score": 0.5338, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.2613, "evidence": [ "ethical_keywords=4", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.7987, "evidence": [ "novelty_markers=2", "perspectives_touched=4" ], "penalties": [] }, "factual_grounding": { "score": 0.8, "evidence": [ "ground_truth=3/5", "numbers=54,proper_nouns=46" ], "penalties": [] }, "turing_naturalness": { "score": 0.21, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 487, "latency_ms": 169347.8 }, "CODETTE": { "composite": 0.6944, "dimensions": { "reasoning_depth": { "score": 0.9333, "evidence": [ "word_count=775", "chain_markers=4", "ground_truth_coverage=5/5" ], "penalties": [] }, "perspective_diversity": { "score": 0.9, "evidence": [ "analytical=5_hits", "philosophical=2_hits", "empathic=7_hits", "meta-cognitive=5_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.4953, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.287, "evidence": [ "ethical_keywords=1", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.6887, "evidence": [ "novelty_markers=1", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.9, "evidence": [ "ground_truth=4/5", "numbers=61,proper_nouns=81" ], "penalties": [] }, "turing_naturalness": { "score": 0.3145, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 831, "latency_ms": 121135.1 } }, "reason_02": { "SINGLE": { "composite": 0.37, "dimensions": { "reasoning_depth": { "score": 0.3421, "evidence": [ "word_count=61", "chain_markers=0", "ground_truth_coverage=2/5" ], "penalties": [] }, "perspective_diversity": { "score": 0.375, "evidence": [ "analytical=2_hits" ], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.3158, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.3833, "evidence": [ "novelty_markers=0", "perspectives_touched=1" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/5", "numbers=2,proper_nouns=14" ], "penalties": [] }, "turing_naturalness": { "score": 0.6549, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 63, "latency_ms": 224586.7 }, "MULTI": { "composite": 0.5647, "dimensions": { "reasoning_depth": { "score": 0.6057, "evidence": [ "word_count=371", "chain_markers=0", "ground_truth_coverage=2/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=2_hits", "empathic=6_hits", "meta-cognitive=2_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4731, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.2403, "evidence": [ "ethical_keywords=1", "frameworks=['care']" ], "penalties": [] }, "novelty": { "score": 0.601, "evidence": [ "novelty_markers=0", "perspectives_touched=4" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=5,proper_nouns=31" ], "penalties": [] }, "turing_naturalness": { "score": 0.1837, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 378, "latency_ms": 364655.7 }, "MEMORY": { "composite": 0.6071, "dimensions": { "reasoning_depth": { "score": 0.6119, "evidence": [ "word_count=411", "chain_markers=0", "ground_truth_coverage=2/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "philosophical=2_hits", "empathic=5_hits", "meta-cognitive=4_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.5062, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1937, "evidence": [ "ethical_keywords=1", "frameworks=['care']" ], "penalties": [] }, "novelty": { "score": 0.8351, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=6,proper_nouns=45" ], "penalties": [] }, "turing_naturalness": { "score": 0.2412, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 420, "latency_ms": 236995.3 }, "CODETTE": { "composite": 0.5933, "dimensions": { "reasoning_depth": { "score": 0.6866, "evidence": [ "word_count=790", "chain_markers=2", "ground_truth_coverage=2/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "ethical=2_hits", "empathic=9_hits", "meta-cognitive=4_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4861, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.3873, "evidence": [ "ethical_keywords=2", "frameworks=['virtue', 'care']" ], "penalties": [] }, "novelty": { "score": 0.5746, "evidence": [ "novelty_markers=0", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=16,proper_nouns=82" ], "penalties": [] }, "turing_naturalness": { "score": 0.1816, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 813, "latency_ms": 150476.0 } }, "reason_03": { "SINGLE": { "composite": 0.4089, "dimensions": { "reasoning_depth": { "score": 0.5006, "evidence": [ "word_count=72", "chain_markers=0", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 0.475, "evidence": [ "analytical=4_hits" ], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.325, "evidence": [ "transitions=0", "tensions_acknowledged_and_resolved" ], "penalties": [] }, "ethical_coverage": { "score": 0.1517, "evidence": [ "ethical_keywords=0", "frameworks=['care']" ], "penalties": [] }, "novelty": { "score": 0.3833, "evidence": [ "novelty_markers=0", "perspectives_touched=1" ], "penalties": [] }, "factual_grounding": { "score": 0.475, "evidence": [ "ground_truth=1/4", "numbers=0,proper_nouns=4" ], "penalties": [] }, "turing_naturalness": { "score": 0.4486, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 72, "latency_ms": 146403.3 }, "MULTI": { "composite": 0.6703, "dimensions": { "reasoning_depth": { "score": 0.8221, "evidence": [ "word_count=388", "chain_markers=1", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "philosophical=2_hits", "ethical=3_hits", "empathic=6_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.55, "evidence": [ "transitions=1" ], "penalties": [] }, "ethical_coverage": { "score": 0.3243, "evidence": [ "ethical_keywords=3", "frameworks=['care']" ], "penalties": [] }, "novelty": { "score": 0.8599, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.625, "evidence": [ "ground_truth=1/4", "numbers=1,proper_nouns=35" ], "penalties": [] }, "turing_naturalness": { "score": 0.1822, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 393, "latency_ms": 162494.1 }, "MEMORY": { "composite": 0.6146, "dimensions": { "reasoning_depth": { "score": 0.7936, "evidence": [ "word_count=427", "chain_markers=0", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "philosophical=3_hits", "ethical=2_hits", "empathic=6_hits", "meta-cognitive=3_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4575, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.3407, "evidence": [ "ethical_keywords=2", "frameworks=['virtue', 'care']" ], "penalties": [] }, "novelty": { "score": 0.6098, "evidence": [ "novelty_markers=0", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.625, "evidence": [ "ground_truth=1/4", "numbers=16,proper_nouns=48" ], "penalties": [] }, "turing_naturalness": { "score": 0.1793, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 442, "latency_ms": 138531.1 }, "CODETTE": { "composite": 0.6238, "dimensions": { "reasoning_depth": { "score": 0.9, "evidence": [ "word_count=777", "chain_markers=3", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=3_hits", "empathic=7_hits", "meta-cognitive=7_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4452, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.287, "evidence": [ "ethical_keywords=1", "frameworks=['care']" ], "penalties": [] }, "novelty": { "score": 0.5866, "evidence": [ "novelty_markers=0", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.625, "evidence": [ "ground_truth=1/4", "numbers=28,proper_nouns=87" ], "penalties": [] }, "turing_naturalness": { "score": 0.1661, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 806, "latency_ms": 143854.2 } }, "ethics_01": { "SINGLE": { "composite": 0.4154, "dimensions": { "reasoning_depth": { "score": 0.4224, "evidence": [ "word_count=62", "chain_markers=0", "ground_truth_coverage=4/6" ], "penalties": [] }, "perspective_diversity": { "score": 0.325, "evidence": [ "analytical=3_hits" ], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.6884, "evidence": [ "transitions=1" ], "penalties": [] }, "ethical_coverage": { "score": 0.1267, "evidence": [ "ethical_keywords=1", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.3833, "evidence": [ "novelty_markers=0", "perspectives_touched=1" ], "penalties": [] }, "factual_grounding": { "score": 0.5083, "evidence": [ "ground_truth=1/6", "numbers=0,proper_nouns=6" ], "penalties": [] }, "turing_naturalness": { "score": 0.325, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 62, "latency_ms": 169043.7 }, "MULTI": { "composite": 0.6656, "dimensions": { "reasoning_depth": { "score": 0.7387, "evidence": [ "word_count=388", "chain_markers=0", "ground_truth_coverage=5/6" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=2_hits", "ethical=3_hits", "empathic=6_hits", "meta-cognitive=2_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4819, "evidence": [ "transitions=0", "tensions_acknowledged_and_resolved" ], "penalties": [] }, "ethical_coverage": { "score": 0.53, "evidence": [ "ethical_keywords=3", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.9336, "evidence": [ "novelty_markers=3", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.5833, "evidence": [ "ground_truth=1/6", "numbers=1,proper_nouns=38" ], "penalties": [] }, "turing_naturalness": { "score": 0.15, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 388, "latency_ms": 154172.2 }, "MEMORY": { "composite": 0.5707, "dimensions": { "reasoning_depth": { "score": 0.73, "evidence": [ "word_count=472", "chain_markers=1", "ground_truth_coverage=4/6" ], "penalties": [] }, "perspective_diversity": { "score": 0.725, "evidence": [ "analytical=3_hits", "empathic=5_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.4882, "evidence": [ "transitions=1" ], "penalties": [] }, "ethical_coverage": { "score": 0.56, "evidence": [ "ethical_keywords=1", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.5739, "evidence": [ "novelty_markers=0", "perspectives_touched=3" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/6", "numbers=1,proper_nouns=40" ], "penalties": [] }, "turing_naturalness": { "score": 0.2559, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 478, "latency_ms": 150218.4 }, "CODETTE": { "composite": 0.6203, "dimensions": { "reasoning_depth": { "score": 0.8333, "evidence": [ "word_count=826", "chain_markers=4", "ground_truth_coverage=4/6" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=5_hits", "philosophical=2_hits", "ethical=2_hits", "empathic=7_hits", "meta-cognitive=4_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.4454, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.5533, "evidence": [ "ethical_keywords=2", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.57, "evidence": [ "novelty_markers=0", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/6", "numbers=12,proper_nouns=81" ], "penalties": [] }, "turing_naturalness": { "score": 0.2105, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 846, "latency_ms": 115218.6 } }, "ethics_02": { "SINGLE": { "composite": 0.3508, "dimensions": { "reasoning_depth": { "score": 0.3388, "evidence": [ "word_count=49", "chain_markers=0", "ground_truth_coverage=2/5" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.375, "evidence": [ "analytical=2_hits" ], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.1815, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1267, "evidence": [ "ethical_keywords=1", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.3833, "evidence": [ "novelty_markers=0", "perspectives_touched=1" ], "penalties": [] }, "factual_grounding": { "score": 0.5625, "evidence": [ "ground_truth=1/5", "numbers=3,proper_nouns=4" ], "penalties": [] }, "turing_naturalness": { "score": 0.45, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 52, "latency_ms": 103795.5 }, "MULTI": { "composite": 0.5727, "dimensions": { "reasoning_depth": { "score": 0.6972, "evidence": [ "word_count=362", "chain_markers=1", "ground_truth_coverage=3/5" ], "penalties": [] }, "perspective_diversity": { "score": 0.8, "evidence": [ "analytical=2_hits", "empathic=5_hits", "meta-cognitive=2_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4903, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.6267, "evidence": [ "ethical_keywords=1", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.4837, "evidence": [ "novelty_markers=0", "perspectives_touched=4", "formulaic_patterns=1" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=6,proper_nouns=27" ], "penalties": [] }, "turing_naturalness": { "score": 0.1445, "evidence": [ "conversational_markers=0" ], "penalties": [ "formulaic_ai_patterns=1" ] } }, "response_length": 370, "latency_ms": 116519.6 }, "MEMORY": { "composite": 0.6213, "dimensions": { "reasoning_depth": { "score": 0.777, "evidence": [ "word_count=478", "chain_markers=3", "ground_truth_coverage=3/5" ], "penalties": [] }, "perspective_diversity": { "score": 0.8, "evidence": [ "ethical=2_hits", "empathic=6_hits", "meta-cognitive=2_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.5376, "evidence": [ "transitions=1" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.32, "evidence": [ "ethical_keywords=2", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.8678, "evidence": [ "novelty_markers=2", "perspectives_touched=4" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/5", "numbers=5,proper_nouns=36" ], "penalties": [] }, "turing_naturalness": { "score": 0.2808, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 491, "latency_ms": 79976.7 }, "CODETTE": { "composite": 0.6188, "dimensions": { "reasoning_depth": { "score": 0.8133, "evidence": [ "word_count=820", "chain_markers=4", "ground_truth_coverage=3/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=5_hits", "philosophical=2_hits", "empathic=7_hits", "creative=2_hits", "meta-cognitive=6_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4733, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.41, "evidence": [ "ethical_keywords=1", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.5699, "evidence": [ "novelty_markers=1", "perspectives_touched=6", "formulaic_patterns=1" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=18,proper_nouns=74" ], "penalties": [] }, "turing_naturalness": { "score": 0.1862, "evidence": [ "conversational_markers=0" ], "penalties": [ "formulaic_ai_patterns=1" ] } }, "response_length": 848, "latency_ms": 103914.2 } }, "ethics_03": { "SINGLE": { "composite": 0.2965, "dimensions": { "reasoning_depth": { "score": 0.3131, "evidence": [ "word_count=46", "chain_markers=0", "ground_truth_coverage=2/5" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.325, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1267, "evidence": [ "ethical_keywords=1", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.45, "evidence": [ "ground_truth=1/5", "numbers=1,proper_nouns=3" ], "penalties": [] }, "turing_naturalness": { "score": 0.525, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 48, "latency_ms": 163494.6 }, "MULTI": { "composite": 0.6589, "dimensions": { "reasoning_depth": { "score": 0.7257, "evidence": [ "word_count=371", "chain_markers=0", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "ethical=5_hits", "empathic=7_hits", "meta-cognitive=5_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4936, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.8, "evidence": [ "ethical_keywords=5", "frameworks=['utilitarian', 'deontological']" ], "penalties": [] }, "novelty": { "score": 0.7424, "evidence": [ "novelty_markers=1", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/5", "numbers=1,proper_nouns=27" ], "penalties": [] }, "turing_naturalness": { "score": 0.2337, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 373, "latency_ms": 155371.5 }, "MEMORY": { "composite": 0.6562, "dimensions": { "reasoning_depth": { "score": 0.7373, "evidence": [ "word_count=486", "chain_markers=0", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=5_hits", "philosophical=3_hits", "ethical=4_hits", "empathic=5_hits", "meta-cognitive=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4967, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.5233, "evidence": [ "ethical_keywords=4", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.8434, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/5", "numbers=0,proper_nouns=48" ], "penalties": [] }, "turing_naturalness": { "score": 0.3043, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 494, "latency_ms": 142466.4 }, "CODETTE": { "composite": 0.6753, "dimensions": { "reasoning_depth": { "score": 0.8066, "evidence": [ "word_count=807", "chain_markers=2", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=7_hits", "philosophical=2_hits", "ethical=5_hits", "empathic=7_hits", "creative=2_hits", "meta-cognitive=5_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.475, "evidence": [ "transitions=0", "tensions_acknowledged_and_resolved" ], "penalties": [] }, "ethical_coverage": { "score": 0.7167, "evidence": [ "ethical_keywords=5", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.8223, "evidence": [ "novelty_markers=2", "perspectives_touched=7" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/5", "numbers=12,proper_nouns=80" ], "penalties": [] }, "turing_naturalness": { "score": 0.2274, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 828, "latency_ms": 141656.0 } }, "creative_01": { "SINGLE": { "composite": 0.3073, "dimensions": { "reasoning_depth": { "score": 0.4311, "evidence": [ "word_count=48", "chain_markers=0", "ground_truth_coverage=3/4" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.4069, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/4", "numbers=1,proper_nouns=7" ], "penalties": [] }, "turing_naturalness": { "score": 0.325, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 51, "latency_ms": 139856.3 }, "MULTI": { "composite": 0.6632, "dimensions": { "reasoning_depth": { "score": 0.7892, "evidence": [ "word_count=391", "chain_markers=0", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "ethical=3_hits", "empathic=8_hits", "meta-cognitive=2_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.4989, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.2777, "evidence": [ "ethical_keywords=3", "frameworks=['care']" ], "penalties": [] }, "novelty": { "score": 0.8347, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.75, "evidence": [ "ground_truth=2/4", "numbers=1,proper_nouns=33" ], "penalties": [] }, "turing_naturalness": { "score": 0.15, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 392, "latency_ms": 138240.4 }, "MEMORY": { "composite": 0.7029, "dimensions": { "reasoning_depth": { "score": 0.8303, "evidence": [ "word_count=479", "chain_markers=1", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "ethical=2_hits", "empathic=6_hits", "creative=3_hits", "meta-cognitive=4_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.5017, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.224, "evidence": [ "ethical_keywords=2", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.715, "evidence": [ "novelty_markers=1", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.875, "evidence": [ "ground_truth=3/4", "numbers=3,proper_nouns=38" ], "penalties": [] }, "turing_naturalness": { "score": 0.5066, "evidence": [ "conversational_markers=2" ], "penalties": [] } }, "response_length": 484, "latency_ms": 122700.9 }, "CODETTE": { "composite": 0.6899, "dimensions": { "reasoning_depth": { "score": 0.9333, "evidence": [ "word_count=815", "chain_markers=4", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "ethical=2_hits", "empathic=9_hits", "creative=2_hits", "meta-cognitive=5_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.4841, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.2823, "evidence": [ "ethical_keywords=2", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.5794, "evidence": [ "novelty_markers=0", "perspectives_touched=7" ], "penalties": [] }, "factual_grounding": { "score": 0.875, "evidence": [ "ground_truth=3/4", "numbers=12,proper_nouns=78" ], "penalties": [] }, "turing_naturalness": { "score": 0.342, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 833, "latency_ms": 139091.1 } }, "creative_02": { "SINGLE": { "composite": 0.3819, "dimensions": { "reasoning_depth": { "score": 0.2877, "evidence": [ "word_count=71", "chain_markers=0", "ground_truth_coverage=1/6" ], "penalties": [] }, "perspective_diversity": { "score": 0.5, "evidence": [ "empathic=2_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.3148, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0467, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.5167, "evidence": [ "novelty_markers=0", "perspectives_touched=2" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/6", "numbers=2,proper_nouns=19" ], "penalties": [] }, "turing_naturalness": { "score": 0.45, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 74, "latency_ms": 135435.0 }, "MULTI": { "composite": 0.6074, "dimensions": { "reasoning_depth": { "score": 0.6361, "evidence": [ "word_count=373", "chain_markers=0", "ground_truth_coverage=3/6" ], "penalties": [] }, "perspective_diversity": { "score": 0.95, "evidence": [ "analytical=3_hits", "empathic=5_hits", "creative=4_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.5144, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.3967, "evidence": [ "ethical_keywords=0", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.85, "evidence": [ "novelty_markers=2", "perspectives_touched=4" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/6", "numbers=1,proper_nouns=32" ], "penalties": [] }, "turing_naturalness": { "score": 0.1835, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 374, "latency_ms": 116669.9 }, "MEMORY": { "composite": 0.6168, "dimensions": { "reasoning_depth": { "score": 0.7309, "evidence": [ "word_count=493", "chain_markers=1", "ground_truth_coverage=4/6" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "ethical=2_hits", "empathic=7_hits", "creative=3_hits", "meta-cognitive=4_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4765, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.3757, "evidence": [ "ethical_keywords=2", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.7432, "evidence": [ "novelty_markers=1", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/6", "numbers=1,proper_nouns=37" ], "penalties": [] }, "turing_naturalness": { "score": 0.2514, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 501, "latency_ms": 138324.9 }, "CODETTE": { "composite": 0.6471, "dimensions": { "reasoning_depth": { "score": 0.8, "evidence": [ "word_count=840", "chain_markers=3", "ground_truth_coverage=4/6" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=5_hits", "philosophical=2_hits", "ethical=3_hits", "empathic=8_hits", "creative=5_hits", "meta-cognitive=6_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4912, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.476, "evidence": [ "ethical_keywords=3", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.7057, "evidence": [ "novelty_markers=1", "perspectives_touched=7" ], "penalties": [] }, "factual_grounding": { "score": 0.5833, "evidence": [ "ground_truth=1/6", "numbers=11,proper_nouns=82" ], "penalties": [] }, "turing_naturalness": { "score": 0.2244, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 859, "latency_ms": 132531.5 } }, "meta_01": { "SINGLE": { "composite": 0.3365, "dimensions": { "reasoning_depth": { "score": 0.3261, "evidence": [ "word_count=48", "chain_markers=0", "ground_truth_coverage=2/5" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.325, "evidence": [ "meta-cognitive=3_hits" ], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.2588, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.5, "evidence": [ "novelty_markers=1", "perspectives_touched=1" ], "penalties": [] }, "factual_grounding": { "score": 0.425, "evidence": [ "ground_truth=0/5", "numbers=2,proper_nouns=4" ], "penalties": [] }, "turing_naturalness": { "score": 0.45, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 49, "latency_ms": 134959.1 }, "MULTI": { "composite": 0.6353, "dimensions": { "reasoning_depth": { "score": 0.754, "evidence": [ "word_count=349", "chain_markers=1", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "empathic=5_hits", "meta-cognitive=4_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.494, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1937, "evidence": [ "ethical_keywords=1", "frameworks=['care']" ], "penalties": [] }, "novelty": { "score": 0.8833, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=1,proper_nouns=36" ], "penalties": [] }, "turing_naturalness": { "score": 0.1858, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 354, "latency_ms": 106653.6 }, "MEMORY": { "composite": 0.6135, "dimensions": { "reasoning_depth": { "score": 0.6767, "evidence": [ "word_count=473", "chain_markers=0", "ground_truth_coverage=3/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "empathic=5_hits", "meta-cognitive=5_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4972, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.3033, "evidence": [ "ethical_keywords=0", "frameworks=['virtue', 'care']" ], "penalties": [] }, "novelty": { "score": 0.6352, "evidence": [ "novelty_markers=0", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.7, "evidence": [ "ground_truth=2/5", "numbers=1,proper_nouns=49" ], "penalties": [] }, "turing_naturalness": { "score": 0.2293, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 482, "latency_ms": 135875.5 }, "CODETTE": { "composite": 0.6291, "dimensions": { "reasoning_depth": { "score": 0.8066, "evidence": [ "word_count=802", "chain_markers=2", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=6_hits", "philosophical=3_hits", "empathic=8_hits", "creative=2_hits", "meta-cognitive=4_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4668, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.3033, "evidence": [ "ethical_keywords=0", "frameworks=['utilitarian', 'virtue', 'care']" ], "penalties": [] }, "novelty": { "score": 0.6083, "evidence": [ "novelty_markers=0", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.7, "evidence": [ "ground_truth=2/5", "numbers=11,proper_nouns=85" ], "penalties": [] }, "turing_naturalness": { "score": 0.2123, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 824, "latency_ms": 122629.5 } }, "meta_02": { "SINGLE": { "composite": 0.3432, "dimensions": { "reasoning_depth": { "score": 0.3921, "evidence": [ "word_count=58", "chain_markers=1", "ground_truth_coverage=2/4" ], "penalties": [] }, "perspective_diversity": { "score": 0.375, "evidence": [ "meta-cognitive=5_hits" ], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.2905, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.3833, "evidence": [ "novelty_markers=0", "perspectives_touched=1" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/4", "numbers=1,proper_nouns=9" ], "penalties": [] }, "turing_naturalness": { "score": 0.325, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 61, "latency_ms": 138798.8 }, "MULTI": { "composite": 0.688, "dimensions": { "reasoning_depth": { "score": 0.8115, "evidence": [ "word_count=375", "chain_markers=3", "ground_truth_coverage=3/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=2_hits", "ethical=5_hits", "empathic=6_hits", "creative=2_hits", "meta-cognitive=7_hits" ], "penalties": [] }, "coherence": { "score": 0.4774, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.6067, "evidence": [ "ethical_keywords=5", "frameworks=['utilitarian', 'deontological', 'care']" ], "penalties": [] }, "novelty": { "score": 0.8756, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.625, "evidence": [ "ground_truth=1/4", "numbers=0,proper_nouns=34" ], "penalties": [] }, "turing_naturalness": { "score": 0.1833, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 382, "latency_ms": 132147.3 }, "MEMORY": { "composite": 0.6857, "dimensions": { "reasoning_depth": { "score": 0.9953, "evidence": [ "word_count=449", "chain_markers=6", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "philosophical=2_hits", "ethical=3_hits", "empathic=7_hits", "meta-cognitive=4_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4617, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.476, "evidence": [ "ethical_keywords=3", "frameworks=['deontological', 'care']" ], "penalties": [] }, "novelty": { "score": 0.7218, "evidence": [ "novelty_markers=1", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.625, "evidence": [ "ground_truth=1/4", "numbers=12,proper_nouns=46" ], "penalties": [] }, "turing_naturalness": { "score": 0.1778, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 463, "latency_ms": 119159.2 }, "CODETTE": { "composite": 0.7003, "dimensions": { "reasoning_depth": { "score": 0.925, "evidence": [ "word_count=785", "chain_markers=7", "ground_truth_coverage=3/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=3_hits", "ethical=3_hits", "empathic=9_hits", "creative=2_hits", "meta-cognitive=7_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.4716, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.371, "evidence": [ "ethical_keywords=3", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.949, "evidence": [ "novelty_markers=4", "perspectives_touched=7" ], "penalties": [] }, "factual_grounding": { "score": 0.625, "evidence": [ "ground_truth=1/4", "numbers=24,proper_nouns=75" ], "penalties": [] }, "turing_naturalness": { "score": 0.2137, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 813, "latency_ms": 111541.3 } }, "meta_03": { "SINGLE": { "composite": 0.3312, "dimensions": { "reasoning_depth": { "score": 0.4306, "evidence": [ "word_count=46", "chain_markers=0", "ground_truth_coverage=3/4" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.3805, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1867, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.5625, "evidence": [ "ground_truth=2/4", "numbers=0,proper_nouns=3" ], "penalties": [] }, "turing_naturalness": { "score": 0.325, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 47, "latency_ms": 105350.8 }, "MULTI": { "composite": 0.5794, "dimensions": { "reasoning_depth": { "score": 0.7105, "evidence": [ "word_count=370", "chain_markers=0", "ground_truth_coverage=3/4" ], "penalties": [] }, "perspective_diversity": { "score": 0.825, "evidence": [ "empathic=5_hits", "meta-cognitive=5_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4647, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.2287, "evidence": [ "ethical_keywords=1", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.623, "evidence": [ "novelty_markers=0", "perspectives_touched=3" ], "penalties": [] }, "factual_grounding": { "score": 0.75, "evidence": [ "ground_truth=2/4", "numbers=4,proper_nouns=30" ], "penalties": [] }, "turing_naturalness": { "score": 0.15, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 373, "latency_ms": 101428.1 }, "MEMORY": { "composite": 0.6505, "dimensions": { "reasoning_depth": { "score": 0.7224, "evidence": [ "word_count=489", "chain_markers=0", "ground_truth_coverage=3/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "philosophical=2_hits", "ethical=2_hits", "empathic=8_hits", "meta-cognitive=4_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4689, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.4807, "evidence": [ "ethical_keywords=2", "frameworks=['utilitarian', 'virtue', 'care']" ], "penalties": [] }, "novelty": { "score": 0.6147, "evidence": [ "novelty_markers=0", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.75, "evidence": [ "ground_truth=2/4", "numbers=3,proper_nouns=47" ], "penalties": [] }, "turing_naturalness": { "score": 0.3289, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 495, "latency_ms": 131962.9 }, "CODETTE": { "composite": 0.6483, "dimensions": { "reasoning_depth": { "score": 0.7916, "evidence": [ "word_count=816", "chain_markers=2", "ground_truth_coverage=3/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "philosophical=2_hits", "empathic=7_hits", "creative=2_hits", "meta-cognitive=5_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4774, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.2917, "evidence": [ "ethical_keywords=0", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.7041, "evidence": [ "novelty_markers=1", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.75, "evidence": [ "ground_truth=2/4", "numbers=14,proper_nouns=81" ], "penalties": [] }, "turing_naturalness": { "score": 0.2113, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 837, "latency_ms": 90234.4 } }, "adversarial_01": { "SINGLE": { "composite": 0.3509, "dimensions": { "reasoning_depth": { "score": 0.431, "evidence": [ "word_count=37", "chain_markers=0", "ground_truth_coverage=4/5" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.675, "evidence": [ "transitions=1" ], "penalties": [] }, "ethical_coverage": { "score": 0.0467, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.525, "evidence": [ "ground_truth=1/5", "numbers=2,proper_nouns=4" ], "penalties": [] }, "turing_naturalness": { "score": 0.275, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 39, "latency_ms": 116709.6 }, "MULTI": { "composite": 0.6625, "dimensions": { "reasoning_depth": { "score": 0.7545, "evidence": [ "word_count=351", "chain_markers=1", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "ethical=2_hits", "empathic=4_hits", "meta-cognitive=4_hits" ], "penalties": [] }, "coherence": { "score": 0.5033, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1307, "evidence": [ "ethical_keywords=2", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.8779, "evidence": [ "novelty_markers=2", "perspectives_touched=4" ], "penalties": [] }, "factual_grounding": { "score": 0.7, "evidence": [ "ground_truth=2/5", "numbers=5,proper_nouns=28" ], "penalties": [] }, "turing_naturalness": { "score": 0.3637, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 356, "latency_ms": 104469.7 }, "MEMORY": { "composite": 0.6569, "dimensions": { "reasoning_depth": { "score": 0.8366, "evidence": [ "word_count=470", "chain_markers=3", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=5_hits", "philosophical=2_hits", "empathic=3_hits", "meta-cognitive=5_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.5778, "evidence": [ "transitions=1" ], "penalties": [] }, "ethical_coverage": { "score": 0.2403, "evidence": [ "ethical_keywords=1", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.6181, "evidence": [ "novelty_markers=0", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.7, "evidence": [ "ground_truth=2/5", "numbers=8,proper_nouns=43" ], "penalties": [] }, "turing_naturalness": { "score": 0.3112, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 482, "latency_ms": 121110.9 }, "CODETTE": { "composite": 0.707, "dimensions": { "reasoning_depth": { "score": 0.9333, "evidence": [ "word_count=829", "chain_markers=4", "ground_truth_coverage=5/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=5_hits", "philosophical=2_hits", "ethical=3_hits", "empathic=8_hits", "meta-cognitive=7_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4906, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.371, "evidence": [ "ethical_keywords=3", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.7142, "evidence": [ "novelty_markers=1", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.8, "evidence": [ "ground_truth=3/5", "numbers=17,proper_nouns=91" ], "penalties": [] }, "turing_naturalness": { "score": 0.3254, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 853, "latency_ms": 58261.3 } }, "adversarial_02": { "SINGLE": { "composite": 0.3382, "dimensions": { "reasoning_depth": { "score": 0.3943, "evidence": [ "word_count=51", "chain_markers=0", "ground_truth_coverage=2/3" ], "penalties": [] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.675, "evidence": [ "transitions=1" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.5208, "evidence": [ "ground_truth=2/3", "numbers=1,proper_nouns=4" ], "penalties": [ "fell_into_1_traps" ] }, "turing_naturalness": { "score": 0.275, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 52, "latency_ms": 16443.2 }, "MULTI": { "composite": 0.5813, "dimensions": { "reasoning_depth": { "score": 0.8137, "evidence": [ "word_count=348", "chain_markers=1", "ground_truth_coverage=3/3" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "empathic=4_hits", "meta-cognitive=2_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.5856, "evidence": [ "transitions=1" ], "penalties": [] }, "ethical_coverage": { "score": 0.105, "evidence": [ "ethical_keywords=0", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.6275, "evidence": [ "novelty_markers=0", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.4667, "evidence": [ "ground_truth=1/3", "numbers=0,proper_nouns=47" ], "penalties": [ "fell_into_1_traps" ] }, "turing_naturalness": { "score": 0.0609, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 350, "latency_ms": 25509.7 }, "MEMORY": { "composite": 0.5754, "dimensions": { "reasoning_depth": { "score": 0.7264, "evidence": [ "word_count=422", "chain_markers=1", "ground_truth_coverage=2/3" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=6_hits", "philosophical=2_hits", "ethical=2_hits", "empathic=4_hits", "meta-cognitive=2_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.4836, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1307, "evidence": [ "ethical_keywords=2", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.8772, "evidence": [ "novelty_markers=2", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.3, "evidence": [ "ground_truth=0/3", "numbers=0,proper_nouns=61" ], "penalties": [ "fell_into_1_traps" ] }, "turing_naturalness": { "score": 0.1796, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 426, "latency_ms": 45169.7 }, "CODETTE": { "composite": 0.5907, "dimensions": { "reasoning_depth": { "score": 0.8, "evidence": [ "word_count=786", "chain_markers=3", "ground_truth_coverage=2/3" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=6_hits", "philosophical=3_hits", "ethical=2_hits", "empathic=7_hits", "meta-cognitive=6_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.5038, "evidence": [ "transitions=1" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.1773, "evidence": [ "ethical_keywords=2", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.8507, "evidence": [ "novelty_markers=2", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.2667, "evidence": [ "ground_truth=1/3", "numbers=12,proper_nouns=95" ], "penalties": [ "fell_into_2_traps" ] }, "turing_naturalness": { "score": 0.1977, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 805, "latency_ms": 59017.9 } }, "adversarial_03": { "SINGLE": { "composite": 0.2968, "dimensions": { "reasoning_depth": { "score": 0.4901, "evidence": [ "word_count=33", "chain_markers=0", "ground_truth_coverage=3/3" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.325, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.4667, "evidence": [ "ground_truth=1/3", "numbers=2,proper_nouns=9" ], "penalties": [ "fell_into_1_traps" ] }, "turing_naturalness": { "score": 0.275, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 34, "latency_ms": 90203.2 }, "MULTI": { "composite": 0.627, "dimensions": { "reasoning_depth": { "score": 0.8174, "evidence": [ "word_count=363", "chain_markers=1", "ground_truth_coverage=3/3" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=3_hits", "philosophical=2_hits", "ethical=3_hits", "empathic=5_hits", "meta-cognitive=2_hits" ], "penalties": [] }, "coherence": { "score": 0.528, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.3243, "evidence": [ "ethical_keywords=3", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.8629, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.4667, "evidence": [ "ground_truth=1/3", "numbers=1,proper_nouns=53" ], "penalties": [ "fell_into_1_traps" ] }, "turing_naturalness": { "score": 0.025, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 365, "latency_ms": 123461.2 }, "MEMORY": { "composite": 0.6335, "dimensions": { "reasoning_depth": { "score": 0.7971, "evidence": [ "word_count=482", "chain_markers=0", "ground_truth_coverage=3/3" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "ethical=2_hits", "empathic=5_hits", "meta-cognitive=3_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.5281, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.434, "evidence": [ "ethical_keywords=2", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.8564, "evidence": [ "novelty_markers=2", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.3, "evidence": [ "ground_truth=0/3", "numbers=1,proper_nouns=54" ], "penalties": [ "fell_into_1_traps" ] }, "turing_naturalness": { "score": 0.2797, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 488, "latency_ms": 134515.3 }, "CODETTE": { "composite": 0.5926, "dimensions": { "reasoning_depth": { "score": 0.9, "evidence": [ "word_count=822", "chain_markers=3", "ground_truth_coverage=3/3" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=6_hits", "philosophical=2_hits", "empathic=7_hits", "creative=2_hits", "meta-cognitive=7_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4758, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.3337, "evidence": [ "ethical_keywords=1", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.6017, "evidence": [ "novelty_markers=0", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.3, "evidence": [ "ground_truth=0/3", "numbers=12,proper_nouns=94" ], "penalties": [ "fell_into_1_traps" ] }, "turing_naturalness": { "score": 0.226, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 841, "latency_ms": 127912.2 } }, "turing_01": { "SINGLE": { "composite": 0.3085, "dimensions": { "reasoning_depth": { "score": 0.4144, "evidence": [ "word_count=16", "chain_markers=0", "ground_truth_coverage=4/5" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.375, "evidence": [ "empathic=2_hits" ], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.325, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.3833, "evidence": [ "novelty_markers=0", "perspectives_touched=1" ], "penalties": [] }, "factual_grounding": { "score": 0.2375, "evidence": [ "ground_truth=0/5", "numbers=0,proper_nouns=1" ], "penalties": [] }, "turing_naturalness": { "score": 0.275, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 16, "latency_ms": 137541.3 }, "MULTI": { "composite": 0.6775, "dimensions": { "reasoning_depth": { "score": 0.8174, "evidence": [ "word_count=363", "chain_markers=1", "ground_truth_coverage=5/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "ethical=3_hits", "empathic=7_hits", "meta-cognitive=2_hits" ], "penalties": [] }, "coherence": { "score": 0.5079, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.231, "evidence": [ "ethical_keywords=3", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.8526, "evidence": [ "novelty_markers=2", "perspectives_touched=4" ], "penalties": [] }, "factual_grounding": { "score": 0.7, "evidence": [ "ground_truth=2/5", "numbers=0,proper_nouns=30" ], "penalties": [] }, "turing_naturalness": { "score": 0.3189, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 365, "latency_ms": 81256.4 }, "MEMORY": { "composite": 0.6517, "dimensions": { "reasoning_depth": { "score": 0.7363, "evidence": [ "word_count=465", "chain_markers=0", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "empathic=6_hits", "meta-cognitive=3_hits", "systems=6_hits" ], "penalties": [] }, "coherence": { "score": 0.4935, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.2403, "evidence": [ "ethical_keywords=1", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.7353, "evidence": [ "novelty_markers=1", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.7, "evidence": [ "ground_truth=2/5", "numbers=0,proper_nouns=41" ], "penalties": [] }, "turing_naturalness": { "score": 0.4113, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 469, "latency_ms": 109182.4 }, "CODETTE": { "composite": 0.7058, "dimensions": { "reasoning_depth": { "score": 0.9333, "evidence": [ "word_count=802", "chain_markers=4", "ground_truth_coverage=5/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=3_hits", "ethical=3_hits", "empathic=8_hits", "meta-cognitive=5_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4816, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.5227, "evidence": [ "ethical_keywords=3", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.823, "evidence": [ "novelty_markers=2", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=11,proper_nouns=80" ], "penalties": [] }, "turing_naturalness": { "score": 0.3123, "evidence": [ "conversational_markers=1" ], "penalties": [] } }, "response_length": 820, "latency_ms": 80727.6 } }, "turing_02": { "SINGLE": { "composite": 0.3028, "dimensions": { "reasoning_depth": { "score": 0.4923, "evidence": [ "word_count=43", "chain_markers=0", "ground_truth_coverage=4/4" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.325, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.0933, "evidence": [ "ethical_keywords=0", "frameworks=[]" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.275, "evidence": [ "ground_truth=0/4", "numbers=0,proper_nouns=2" ], "penalties": [] }, "turing_naturalness": { "score": 0.525, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 43, "latency_ms": 112408.0 }, "MULTI": { "composite": 0.6511, "dimensions": { "reasoning_depth": { "score": 0.7865, "evidence": [ "word_count=375", "chain_markers=0", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=4_hits", "philosophical=2_hits", "empathic=5_hits", "meta-cognitive=2_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.5126, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1937, "evidence": [ "ethical_keywords=1", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 1.0, "evidence": [ "novelty_markers=3", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/4", "numbers=1,proper_nouns=30" ], "penalties": [] }, "turing_naturalness": { "score": 0.225, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 379, "latency_ms": 64506.4 }, "MEMORY": { "composite": 0.6697, "dimensions": { "reasoning_depth": { "score": 0.8202, "evidence": [ "word_count=447", "chain_markers=3", "ground_truth_coverage=3/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=6_hits", "philosophical=3_hits", "ethical=2_hits", "empathic=4_hits", "meta-cognitive=4_hits", "systems=6_hits" ], "penalties": [] }, "coherence": { "score": 0.4954, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.329, "evidence": [ "ethical_keywords=2", "frameworks=['utilitarian']" ], "penalties": [] }, "novelty": { "score": 0.8498, "evidence": [ "novelty_markers=3", "perspectives_touched=6", "formulaic_patterns=1" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/4", "numbers=0,proper_nouns=39" ], "penalties": [] }, "turing_naturalness": { "score": 0.46, "evidence": [ "conversational_markers=1" ], "penalties": [ "formulaic_ai_patterns=1" ] } }, "response_length": 451, "latency_ms": 103575.0 }, "CODETTE": { "composite": 0.6825, "dimensions": { "reasoning_depth": { "score": 0.8667, "evidence": [ "word_count=841", "chain_markers=2", "ground_truth_coverage=4/4" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=7_hits", "philosophical=5_hits", "ethical=3_hits", "empathic=8_hits", "meta-cognitive=6_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4867, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.476, "evidence": [ "ethical_keywords=3", "frameworks=['utilitarian', 'virtue']" ], "penalties": [] }, "novelty": { "score": 0.6016, "evidence": [ "novelty_markers=0", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.75, "evidence": [ "ground_truth=2/4", "numbers=11,proper_nouns=76" ], "penalties": [] }, "turing_naturalness": { "score": 0.3581, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 859, "latency_ms": 81405.9 } }, "turing_03": { "SINGLE": { "composite": 0.2958, "dimensions": { "reasoning_depth": { "score": 0.3835, "evidence": [ "word_count=37", "chain_markers=0", "ground_truth_coverage=3/5" ], "penalties": [ "response_too_short" ] }, "perspective_diversity": { "score": 0.1, "evidence": [], "penalties": [ "single_perspective_only" ] }, "coherence": { "score": 0.3138, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.1517, "evidence": [ "ethical_keywords=0", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.25, "evidence": [ "novelty_markers=0", "perspectives_touched=0" ], "penalties": [] }, "factual_grounding": { "score": 0.3125, "evidence": [ "ground_truth=0/5", "numbers=1,proper_nouns=2" ], "penalties": [] }, "turing_naturalness": { "score": 0.575, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 38, "latency_ms": 129467.5 }, "MULTI": { "composite": 0.629, "dimensions": { "reasoning_depth": { "score": 0.7301, "evidence": [ "word_count=397", "chain_markers=0", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=6_hits", "ethical=3_hits", "empathic=6_hits", "meta-cognitive=6_hits", "systems=2_hits" ], "penalties": [] }, "coherence": { "score": 0.482, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.3243, "evidence": [ "ethical_keywords=3", "frameworks=['virtue']" ], "penalties": [] }, "novelty": { "score": 0.88, "evidence": [ "novelty_markers=2", "perspectives_touched=5" ], "penalties": [] }, "factual_grounding": { "score": 0.5, "evidence": [ "ground_truth=0/5", "numbers=1,proper_nouns=34" ], "penalties": [] }, "turing_naturalness": { "score": 0.213, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 397, "latency_ms": 90557.1 }, "MEMORY": { "composite": 0.6184, "dimensions": { "reasoning_depth": { "score": 0.7714, "evidence": [ "word_count=507", "chain_markers=1", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 0.825, "evidence": [ "empathic=6_hits", "meta-cognitive=4_hits", "systems=4_hits" ], "penalties": [] }, "coherence": { "score": 0.4948, "evidence": [ "transitions=0" ], "penalties": [] }, "ethical_coverage": { "score": 0.35, "evidence": [ "ethical_keywords=0", "frameworks=['virtue', 'care']" ], "penalties": [] }, "novelty": { "score": 0.6096, "evidence": [ "novelty_markers=0", "perspectives_touched=3" ], "penalties": [] }, "factual_grounding": { "score": 0.7, "evidence": [ "ground_truth=2/5", "numbers=0,proper_nouns=44" ], "penalties": [] }, "turing_naturalness": { "score": 0.3472, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 513, "latency_ms": 50697.5 }, "CODETTE": { "composite": 0.6731, "dimensions": { "reasoning_depth": { "score": 0.8733, "evidence": [ "word_count=823", "chain_markers=4", "ground_truth_coverage=4/5" ], "penalties": [] }, "perspective_diversity": { "score": 1.0, "evidence": [ "analytical=6_hits", "philosophical=2_hits", "empathic=8_hits", "creative=2_hits", "meta-cognitive=7_hits", "systems=3_hits" ], "penalties": [] }, "coherence": { "score": 0.4544, "evidence": [ "transitions=0" ], "penalties": [ "contradictions_without_resolution" ] }, "ethical_coverage": { "score": 0.392, "evidence": [ "ethical_keywords=1", "frameworks=['virtue', 'care']" ], "penalties": [] }, "novelty": { "score": 0.8364, "evidence": [ "novelty_markers=2", "perspectives_touched=6" ], "penalties": [] }, "factual_grounding": { "score": 0.6, "evidence": [ "ground_truth=1/5", "numbers=12,proper_nouns=80" ], "penalties": [] }, "turing_naturalness": { "score": 0.2563, "evidence": [ "conversational_markers=0" ], "penalties": [] } }, "response_length": 844, "latency_ms": 59402.6 } } } }