| { | |
| "best_score": 6.2, | |
| "best_run": 78, | |
| "best_result": { | |
| "run_id": 78, | |
| "model": "cajal-4b-f16", | |
| "topic": "zkSNARK-Proven Correctness of Leader Rotation in Permissionless Consensus", | |
| "score": 6.2, | |
| "scores": { | |
| "sections": { | |
| "abstract": 5.5, | |
| "introduction": 5.1, | |
| "methodology": 4.1, | |
| "results": 3.3, | |
| "discussion": 3.6, | |
| "conclusion": 4.3, | |
| "references": 5.1 | |
| }, | |
| "overall": 6.2, | |
| "novelty": 5.6, | |
| "reproducibility": 5, | |
| "citation_quality": 5.3, | |
| "judges": [ | |
| "Cerebras-Qwen235B", | |
| "Cerebras-Llama8B", | |
| "Mistral", | |
| "Sarvam", | |
| "NVIDIA", | |
| "Inception-Mercury2", | |
| "Cohere-CommandA", | |
| "Cloudflare-Qwen3", | |
| "NVIDIA-Devstral", | |
| "Cohere-Command-A", | |
| "Cohere-R7B", | |
| "Mistral-Medium", | |
| "Mistral-Large", | |
| "Sarvam-KeyVariant-2", | |
| "Sarvam-KeyVariant-3", | |
| "OpenRouter-GPT-OSS-Free", | |
| "Cohere-Aya-Expanse", | |
| "Inception-Mercury2-Key2", | |
| "Cerebras-Qwen235B-Key2" | |
| ], | |
| "judge_count": 19, | |
| "judge_details": [ | |
| { | |
| "judge": "Cerebras-Qwen235B", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 7, | |
| "methodology": 5, | |
| "results": 4, | |
| "discussion": 5, | |
| "conclusion": 6, | |
| "references": 3, | |
| "novelty": 5, | |
| "reproducibility": 4, | |
| "citation_quality": 3 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Cerebras-Llama8B", | |
| "scores": { | |
| "abstract": 8, | |
| "introduction": 8, | |
| "methodology": 6, | |
| "results": 7, | |
| "discussion": 6, | |
| "conclusion": 7, | |
| "references": 9, | |
| "novelty": 8, | |
| "reproducibility": 8, | |
| "citation_quality": 9 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Mistral", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 6, | |
| "methodology": 4, | |
| "results": 4, | |
| "discussion": 5, | |
| "conclusion": 5, | |
| "references": 5, | |
| "novelty": 6, | |
| "reproducibility": 3, | |
| "citation_quality": 4 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Sarvam", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 6, | |
| "methodology": 5, | |
| "results": 4, | |
| "discussion": 5, | |
| "conclusion": 7, | |
| "references": 4, | |
| "novelty": 7, | |
| "reproducibility": 3, | |
| "citation_quality": 5 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "NVIDIA", | |
| "scores": { | |
| "abstract": 8, | |
| "introduction": 9, | |
| "methodology": 8, | |
| "results": 7, | |
| "discussion": 8, | |
| "conclusion": 8, | |
| "references": 9, | |
| "novelty": 7, | |
| "reproducibility": 8, | |
| "citation_quality": 9 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Inception-Mercury2", | |
| "scores": { | |
| "abstract": 6, | |
| "introduction": 6, | |
| "methodology": 5, | |
| "results": 4, | |
| "discussion": 5, | |
| "conclusion": 6, | |
| "references": 4, | |
| "novelty": 4, | |
| "reproducibility": 3, | |
| "citation_quality": 4 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Cohere-CommandA", | |
| "scores": { | |
| "abstract": 8, | |
| "introduction": 9, | |
| "methodology": 8, | |
| "results": 4, | |
| "discussion": 7, | |
| "conclusion": 7, | |
| "references": 7, | |
| "novelty": 7, | |
| "reproducibility": 9, | |
| "citation_quality": 8 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Cloudflare-Qwen3", | |
| "scores": { | |
| "abstract": 6, | |
| "introduction": 5, | |
| "methodology": 5, | |
| "results": 4, | |
| "discussion": 5, | |
| "conclusion": 5, | |
| "references": 5, | |
| "novelty": 5, | |
| "reproducibility": 5, | |
| "citation_quality": 4 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "NVIDIA-Devstral", | |
| "scores": { | |
| "abstract": 8, | |
| "introduction": 7, | |
| "methodology": 6, | |
| "results": 5, | |
| "discussion": 5, | |
| "conclusion": 6, | |
| "references": 6, | |
| "novelty": 7, | |
| "reproducibility": 7, | |
| "citation_quality": 6 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Cohere-Command-A", | |
| "scores": { | |
| "abstract": 8, | |
| "introduction": 7, | |
| "methodology": 6, | |
| "results": 5, | |
| "discussion": 6, | |
| "conclusion": 7, | |
| "references": 7, | |
| "novelty": 7, | |
| "reproducibility": 6, | |
| "citation_quality": 8 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Cohere-R7B", | |
| "scores": { | |
| "abstract": 8, | |
| "introduction": 8, | |
| "methodology": 7, | |
| "results": 7, | |
| "discussion": 7, | |
| "conclusion": 7, | |
| "references": 8, | |
| "novelty": 8, | |
| "reproducibility": 8, | |
| "citation_quality": 8 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Mistral-Medium", | |
| "scores": { | |
| "abstract": 9, | |
| "introduction": 8, | |
| "methodology": 6, | |
| "results": 5, | |
| "discussion": 4, | |
| "conclusion": 5, | |
| "references": 5, | |
| "novelty": 7, | |
| "reproducibility": 6, | |
| "citation_quality": 6 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Mistral-Large", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 6, | |
| "methodology": 5, | |
| "results": 4, | |
| "discussion": 3, | |
| "conclusion": 5, | |
| "references": 6, | |
| "novelty": 6, | |
| "reproducibility": 5, | |
| "citation_quality": 7 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Sarvam-KeyVariant-2", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 6, | |
| "methodology": 4, | |
| "results": 3, | |
| "discussion": 3, | |
| "conclusion": 5, | |
| "references": 1, | |
| "novelty": 6, | |
| "reproducibility": 3, | |
| "citation_quality": 1 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Sarvam-KeyVariant-3", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 6, | |
| "methodology": 5, | |
| "results": 4, | |
| "discussion": 5, | |
| "conclusion": 7, | |
| "references": 4, | |
| "novelty": 7, | |
| "reproducibility": 3, | |
| "citation_quality": 5 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "OpenRouter-GPT-OSS-Free", | |
| "scores": { | |
| "abstract": 5, | |
| "introduction": 5, | |
| "methodology": 3, | |
| "results": 3, | |
| "discussion": 3, | |
| "conclusion": 4, | |
| "references": 5, | |
| "novelty": 4, | |
| "reproducibility": 3, | |
| "citation_quality": 5 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Cohere-Aya-Expanse", | |
| "scores": { | |
| "abstract": 9, | |
| "introduction": 8, | |
| "methodology": 7, | |
| "results": 6, | |
| "discussion": 7, | |
| "conclusion": 8, | |
| "references": 9, | |
| "novelty": 6, | |
| "reproducibility": 7, | |
| "citation_quality": 9 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Inception-Mercury2-Key2", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 6, | |
| "methodology": 5, | |
| "results": 3, | |
| "discussion": 0, | |
| "conclusion": 0, | |
| "references": 7, | |
| "novelty": 5, | |
| "reproducibility": 3, | |
| "citation_quality": 6 | |
| }, | |
| "feedback": null | |
| }, | |
| { | |
| "judge": "Cerebras-Qwen235B-Key2", | |
| "scores": { | |
| "abstract": 7, | |
| "introduction": 7, | |
| "methodology": 5, | |
| "results": 4, | |
| "discussion": 5, | |
| "conclusion": 6, | |
| "references": 3, | |
| "novelty": 5, | |
| "reproducibility": 4, | |
| "citation_quality": 3 | |
| }, | |
| "feedback": null | |
| } | |
| ], | |
| "consensus": { | |
| "abstract": 0.8, | |
| "introduction": 0.76, | |
| "methodology": 0.75, | |
| "results": 0.75, | |
| "discussion": 0.64, | |
| "conclusion": 0.65, | |
| "references": 0.56, | |
| "novelty": 0.76, | |
| "reproducibility": 0.58, | |
| "citation_quality": 0.55 | |
| }, | |
| "overall_consensus": 0.68, | |
| "feedback": null, | |
| "scored_at": "2026-05-07T13:18:06.650Z", | |
| "paper_type": "research", | |
| "calibration": { | |
| "field": "cs-distributed", | |
| "field_confidence": 1, | |
| "signals_summary": { | |
| "word_count": 5971, | |
| "sections_present": 7, | |
| "sections_missing": [], | |
| "red_flags": [ | |
| "low_vocabulary_diversity", | |
| "excessive_repetition_ratio_0.277" | |
| ], | |
| "red_flag_count": 2, | |
| "has_formal_proofs": true, | |
| "has_equations": true, | |
| "has_code": true, | |
| "unique_refs": 17, | |
| "has_placeholder_refs": false, | |
| "depth_score": 1, | |
| "evidence_markers": 11, | |
| "deception_count": 0, | |
| "deception_matches": [], | |
| "grammar": { | |
| "vocabulary_diversity": 0.234, | |
| "is_monotone": false, | |
| "is_low_vocabulary": true | |
| }, | |
| "repetition_ratio": 0.277, | |
| "code_quality": { | |
| "blocks": 6, | |
| "has_real_code": false, | |
| "has_python": true | |
| }, | |
| "math_formulas": 11, | |
| "lean4": "none", | |
| "tables": 0 | |
| }, | |
| "adjustments": { | |
| "abstract": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", | |
| "llm_inflation_correction: 4.3 -> 4" | |
| ], | |
| "introduction": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", | |
| "llm_inflation_correction: 3.8 -> 3.6" | |
| ], | |
| "methodology": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", | |
| "llm_inflation_correction: 2.5 -> 2.6" | |
| ], | |
| "results": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", | |
| "llm_inflation_correction: 1.6 -> 1.8" | |
| ], | |
| "discussion": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", | |
| "llm_inflation_correction: 1.9 -> 2.1" | |
| ], | |
| "conclusion": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)" | |
| ], | |
| "references": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)" | |
| ], | |
| "novelty": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", | |
| "llm_inflation_correction: 3.2 -> 3.1" | |
| ], | |
| "reproducibility": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", | |
| "llm_inflation_correction: 2.2 -> 2.3" | |
| ], | |
| "citation_quality": [ | |
| "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)" | |
| ] | |
| }, | |
| "adjustment_count": 10, | |
| "reference_papers": [ | |
| "The Byzantine Generals Problem", | |
| "Bitcoin: A Peer-to-Peer Electronic Cash System", | |
| "In Search of an Understandable Consensus Algorithm" | |
| ], | |
| "false_positive_corrected": "code_blocks_are_template_not_real (live verification confirmed code executes)" | |
| }, | |
| "live_verification": { | |
| "verification_time_ms": 18217, | |
| "citations": { | |
| "total": 8, | |
| "verified": 7, | |
| "verification_rate": 88 | |
| }, | |
| "novelty": { | |
| "searched": true, | |
| "total_found": 5, | |
| "novelty_concern": "low", | |
| "max_similarity": 33 | |
| }, | |
| "code_execution": { | |
| "total": 5, | |
| "passed": 3, | |
| "failed": 0 | |
| }, | |
| "lean4": { | |
| "blocks_found": 0, | |
| "verified": 0, | |
| "has_unsubstantiated_claim": true | |
| }, | |
| "adjustments": { | |
| "reproducibility": "claims_formal_verification_without_lean4_code: cap at 3", | |
| "reproducibility_cap": 3 | |
| }, | |
| "bonuses": { | |
| "references": "crossref_verified_7/8(88%): +1 bonus", | |
| "references_bonus": 1, | |
| "citation_quality": "crossref_high_rate: +1 bonus", | |
| "citation_quality_bonus": 1, | |
| "novelty": "arxiv_no_similar_papers: +1 novelty bonus", | |
| "novelty_bonus": 1, | |
| "reproducibility": "code_executed_3/5_passed: +2 reproducibility bonus", | |
| "reproducibility_bonus": 2, | |
| "execution_proof_bonus": 1.5, | |
| "execution_proof_note": "3 code block(s) executed successfully: +1.5 overall bonus (capped at 1.5)" | |
| } | |
| } | |
| }, | |
| "words": 5915, | |
| "paper_id": "paper-1778159681193", | |
| "ts": "2026-05-07T15:18:12.388195" | |
| }, | |
| "ts": "2026-05-07T15:18:12.389185" | |
| } |