{ "best_score": 6.2, "best_run": 78, "best_result": { "run_id": 78, "model": "cajal-4b-f16", "topic": "zkSNARK-Proven Correctness of Leader Rotation in Permissionless Consensus", "score": 6.2, "scores": { "sections": { "abstract": 5.5, "introduction": 5.1, "methodology": 4.1, "results": 3.3, "discussion": 3.6, "conclusion": 4.3, "references": 5.1 }, "overall": 6.2, "novelty": 5.6, "reproducibility": 5, "citation_quality": 5.3, "judges": [ "Cerebras-Qwen235B", "Cerebras-Llama8B", "Mistral", "Sarvam", "NVIDIA", "Inception-Mercury2", "Cohere-CommandA", "Cloudflare-Qwen3", "NVIDIA-Devstral", "Cohere-Command-A", "Cohere-R7B", "Mistral-Medium", "Mistral-Large", "Sarvam-KeyVariant-2", "Sarvam-KeyVariant-3", "OpenRouter-GPT-OSS-Free", "Cohere-Aya-Expanse", "Inception-Mercury2-Key2", "Cerebras-Qwen235B-Key2" ], "judge_count": 19, "judge_details": [ { "judge": "Cerebras-Qwen235B", "scores": { "abstract": 7, "introduction": 7, "methodology": 5, "results": 4, "discussion": 5, "conclusion": 6, "references": 3, "novelty": 5, "reproducibility": 4, "citation_quality": 3 }, "feedback": null }, { "judge": "Cerebras-Llama8B", "scores": { "abstract": 8, "introduction": 8, "methodology": 6, "results": 7, "discussion": 6, "conclusion": 7, "references": 9, "novelty": 8, "reproducibility": 8, "citation_quality": 9 }, "feedback": null }, { "judge": "Mistral", "scores": { "abstract": 7, "introduction": 6, "methodology": 4, "results": 4, "discussion": 5, "conclusion": 5, "references": 5, "novelty": 6, "reproducibility": 3, "citation_quality": 4 }, "feedback": null }, { "judge": "Sarvam", "scores": { "abstract": 7, "introduction": 6, "methodology": 5, "results": 4, "discussion": 5, "conclusion": 7, "references": 4, "novelty": 7, "reproducibility": 3, "citation_quality": 5 }, "feedback": null }, { "judge": "NVIDIA", "scores": { "abstract": 8, "introduction": 9, "methodology": 8, "results": 7, "discussion": 8, "conclusion": 8, "references": 9, "novelty": 7, "reproducibility": 8, "citation_quality": 9 }, "feedback": null }, { "judge": "Inception-Mercury2", "scores": { "abstract": 6, "introduction": 6, "methodology": 5, "results": 4, "discussion": 5, "conclusion": 6, "references": 4, "novelty": 4, "reproducibility": 3, "citation_quality": 4 }, "feedback": null }, { "judge": "Cohere-CommandA", "scores": { "abstract": 8, "introduction": 9, "methodology": 8, "results": 4, "discussion": 7, "conclusion": 7, "references": 7, "novelty": 7, "reproducibility": 9, "citation_quality": 8 }, "feedback": null }, { "judge": "Cloudflare-Qwen3", "scores": { "abstract": 6, "introduction": 5, "methodology": 5, "results": 4, "discussion": 5, "conclusion": 5, "references": 5, "novelty": 5, "reproducibility": 5, "citation_quality": 4 }, "feedback": null }, { "judge": "NVIDIA-Devstral", "scores": { "abstract": 8, "introduction": 7, "methodology": 6, "results": 5, "discussion": 5, "conclusion": 6, "references": 6, "novelty": 7, "reproducibility": 7, "citation_quality": 6 }, "feedback": null }, { "judge": "Cohere-Command-A", "scores": { "abstract": 8, "introduction": 7, "methodology": 6, "results": 5, "discussion": 6, "conclusion": 7, "references": 7, "novelty": 7, "reproducibility": 6, "citation_quality": 8 }, "feedback": null }, { "judge": "Cohere-R7B", "scores": { "abstract": 8, "introduction": 8, "methodology": 7, "results": 7, "discussion": 7, "conclusion": 7, "references": 8, "novelty": 8, "reproducibility": 8, "citation_quality": 8 }, "feedback": null }, { "judge": "Mistral-Medium", "scores": { "abstract": 9, "introduction": 8, "methodology": 6, "results": 5, "discussion": 4, "conclusion": 5, "references": 5, "novelty": 7, "reproducibility": 6, "citation_quality": 6 }, "feedback": null }, { "judge": "Mistral-Large", "scores": { "abstract": 7, "introduction": 6, "methodology": 5, "results": 4, "discussion": 3, "conclusion": 5, "references": 6, "novelty": 6, "reproducibility": 5, "citation_quality": 7 }, "feedback": null }, { "judge": "Sarvam-KeyVariant-2", "scores": { "abstract": 7, "introduction": 6, "methodology": 4, "results": 3, "discussion": 3, "conclusion": 5, "references": 1, "novelty": 6, "reproducibility": 3, "citation_quality": 1 }, "feedback": null }, { "judge": "Sarvam-KeyVariant-3", "scores": { "abstract": 7, "introduction": 6, "methodology": 5, "results": 4, "discussion": 5, "conclusion": 7, "references": 4, "novelty": 7, "reproducibility": 3, "citation_quality": 5 }, "feedback": null }, { "judge": "OpenRouter-GPT-OSS-Free", "scores": { "abstract": 5, "introduction": 5, "methodology": 3, "results": 3, "discussion": 3, "conclusion": 4, "references": 5, "novelty": 4, "reproducibility": 3, "citation_quality": 5 }, "feedback": null }, { "judge": "Cohere-Aya-Expanse", "scores": { "abstract": 9, "introduction": 8, "methodology": 7, "results": 6, "discussion": 7, "conclusion": 8, "references": 9, "novelty": 6, "reproducibility": 7, "citation_quality": 9 }, "feedback": null }, { "judge": "Inception-Mercury2-Key2", "scores": { "abstract": 7, "introduction": 6, "methodology": 5, "results": 3, "discussion": 0, "conclusion": 0, "references": 7, "novelty": 5, "reproducibility": 3, "citation_quality": 6 }, "feedback": null }, { "judge": "Cerebras-Qwen235B-Key2", "scores": { "abstract": 7, "introduction": 7, "methodology": 5, "results": 4, "discussion": 5, "conclusion": 6, "references": 3, "novelty": 5, "reproducibility": 4, "citation_quality": 3 }, "feedback": null } ], "consensus": { "abstract": 0.8, "introduction": 0.76, "methodology": 0.75, "results": 0.75, "discussion": 0.64, "conclusion": 0.65, "references": 0.56, "novelty": 0.76, "reproducibility": 0.58, "citation_quality": 0.55 }, "overall_consensus": 0.68, "feedback": null, "scored_at": "2026-05-07T13:18:06.650Z", "paper_type": "research", "calibration": { "field": "cs-distributed", "field_confidence": 1, "signals_summary": { "word_count": 5971, "sections_present": 7, "sections_missing": [], "red_flags": [ "low_vocabulary_diversity", "excessive_repetition_ratio_0.277" ], "red_flag_count": 2, "has_formal_proofs": true, "has_equations": true, "has_code": true, "unique_refs": 17, "has_placeholder_refs": false, "depth_score": 1, "evidence_markers": 11, "deception_count": 0, "deception_matches": [], "grammar": { "vocabulary_diversity": 0.234, "is_monotone": false, "is_low_vocabulary": true }, "repetition_ratio": 0.277, "code_quality": { "blocks": 6, "has_real_code": false, "has_python": true }, "math_formulas": 11, "lean4": "none", "tables": 0 }, "adjustments": { "abstract": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", "llm_inflation_correction: 4.3 -> 4" ], "introduction": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", "llm_inflation_correction: 3.8 -> 3.6" ], "methodology": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", "llm_inflation_correction: 2.5 -> 2.6" ], "results": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", "llm_inflation_correction: 1.6 -> 1.8" ], "discussion": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", "llm_inflation_correction: 1.9 -> 2.1" ], "conclusion": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)" ], "references": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)" ], "novelty": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", "llm_inflation_correction: 3.2 -> 3.1" ], "reproducibility": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)", "llm_inflation_correction: 2.2 -> 2.3" ], "citation_quality": [ "red_flag_penalty: -3 (low_vocabulary_diversity, excessive_repetition_ratio_0.277, code_blocks_are_template_not_real)" ] }, "adjustment_count": 10, "reference_papers": [ "The Byzantine Generals Problem", "Bitcoin: A Peer-to-Peer Electronic Cash System", "In Search of an Understandable Consensus Algorithm" ], "false_positive_corrected": "code_blocks_are_template_not_real (live verification confirmed code executes)" }, "live_verification": { "verification_time_ms": 18217, "citations": { "total": 8, "verified": 7, "verification_rate": 88 }, "novelty": { "searched": true, "total_found": 5, "novelty_concern": "low", "max_similarity": 33 }, "code_execution": { "total": 5, "passed": 3, "failed": 0 }, "lean4": { "blocks_found": 0, "verified": 0, "has_unsubstantiated_claim": true }, "adjustments": { "reproducibility": "claims_formal_verification_without_lean4_code: cap at 3", "reproducibility_cap": 3 }, "bonuses": { "references": "crossref_verified_7/8(88%): +1 bonus", "references_bonus": 1, "citation_quality": "crossref_high_rate: +1 bonus", "citation_quality_bonus": 1, "novelty": "arxiv_no_similar_papers: +1 novelty bonus", "novelty_bonus": 1, "reproducibility": "code_executed_3/5_passed: +2 reproducibility bonus", "reproducibility_bonus": 2, "execution_proof_bonus": 1.5, "execution_proof_note": "3 code block(s) executed successfully: +1.5 overall bonus (capped at 1.5)" } } }, "words": 5915, "paper_id": "paper-1778159681193", "ts": "2026-05-07T15:18:12.388195" }, "ts": "2026-05-07T15:18:12.389185" }