""" PhD Research OS — AI Model Council Tests ========================================== Tests for the 4-member council pipeline: Query Planner → Extractor → Critic → Chairman """ import os import sys import json import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from phd_research_os.council import ( ModelCouncil, CouncilRound, COUNCIL_PROMPTS ) from phd_research_os.db import init_db, get_db, create_claim from phd_research_os.taxonomy import TaxonomyManager TEST_DB = "test_council.db" SAMPLE_TEXT = """ We investigated the sensitivity of graphene field-effect transistor (GFET) biosensors to cardiac troponin I (cTnI) in phosphate-buffered saline (PBS). Our measurements show that the Dirac point shifts by 45 ± 3 mV upon binding of 1 pM cTnI in 10 mM PBS (n=5, p<0.001). The limit of detection was determined to be 0.8 fM using the 3σ/slope method. We interpret these results as evidence that aptamer-functionalized GFETs can achieve clinically relevant sensitivity for cardiac biomarker detection. However, ionic strength effects at physiological conditions (150 mM) significantly reduce sensitivity, suggesting that a desalting step may be necessary for clinical translation. We hypothesize that a PEG spacer of 5 kDa molecular weight could mitigate Debye screening while maintaining binding affinity, though this remains to be experimentally validated. """ @pytest.fixture(autouse=True) def setup_teardown(): init_db(TEST_DB) TaxonomyManager(db_path=TEST_DB) yield for suffix in ["", "-wal", "-shm"]: p = TEST_DB + suffix if os.path.exists(p): os.remove(p) # ============================================================ # Council Prompt Tests # ============================================================ def test_all_4_council_members_defined(): assert "query_planner" in COUNCIL_PROMPTS assert "extractor" in COUNCIL_PROMPTS assert "critic" in COUNCIL_PROMPTS assert "chairman" in COUNCIL_PROMPTS def test_query_planner_prompt_has_json_instruction(): assert "JSON array" in COUNCIL_PROMPTS["query_planner"] def test_extractor_prompt_has_epistemic_tags(): prompt = COUNCIL_PROMPTS["extractor"] assert "Fact" in prompt assert "Interpretation" in prompt assert "Hypothesis" in prompt assert "Conflict_Hypothesis" in prompt def test_critic_prompt_checks_5_things(): prompt = COUNCIL_PROMPTS["critic"] assert "Missing important claims" in prompt assert "Incorrect epistemic tags" in prompt assert "Overly confident" in prompt assert "Taxonomy correctness" in prompt assert "Missing fields" in prompt def test_chairman_prompt_has_completeness_penalty(): assert "0.7 completeness penalty" in COUNCIL_PROMPTS["chairman"] def test_chairman_output_schema(): prompt = COUNCIL_PROMPTS["chairman"] assert "epistemic_tag" in prompt assert "confidence" in prompt assert "missing_fields" in prompt assert "status" in prompt # ============================================================ # Mock Council Pipeline Tests (no brain) # ============================================================ def test_council_deliberate_without_brain(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) assert isinstance(result, CouncilRound) assert result.round_id.startswith("CNCL_") assert len(result.final_claims) >= 1 assert result.started_at assert result.completed_at def test_council_produces_valid_claims(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) for claim in result.final_claims: assert "text" in claim assert "epistemic_tag" in claim assert "confidence" in claim assert "missing_fields" in claim assert "status" in claim assert claim["epistemic_tag"] in ["Fact", "Interpretation", "Hypothesis", "Conflict_Hypothesis"] assert 0.0 <= claim["confidence"] <= 1.0 assert claim["status"] in ["Complete", "Incomplete"] def test_council_claims_have_consistent_status(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) for claim in result.final_claims: if claim["missing_fields"]: assert claim["status"] == "Incomplete" def test_council_with_query(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT, query="What is the LOD for GFET cTnI sensors?") assert isinstance(result.query_plan, list) assert len(result.final_claims) >= 1 def test_council_query_planner_standalone(): council = ModelCouncil(brain=None, db_path=TEST_DB) queries = council.deliberate_query("What are the ionic strength effects on GFET sensitivity?") assert isinstance(queries, list) assert len(queries) >= 1 def test_council_metadata(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) assert "council_version" in result.metadata assert "taxonomy_domain" in result.metadata assert "extractor_claim_count" in result.metadata assert "final_claim_count" in result.metadata def test_council_critique_structure(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) if result.critique: assert "feedback" in result.critique def test_council_raw_extraction_preserved(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) assert isinstance(result.raw_extraction, list) def test_council_round_logged_to_db(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) history = council.get_council_history() assert len(history) >= 1 assert history[0]["round_id"] == result.round_id def test_council_history_retrieval(): council = ModelCouncil(brain=None, db_path=TEST_DB) council.deliberate(SAMPLE_TEXT) council.deliberate(SAMPLE_TEXT) history = council.get_council_history(limit=10) assert len(history) >= 2 def test_council_confidence_clamped(): council = ModelCouncil(brain=None, db_path=TEST_DB) result = council.deliberate(SAMPLE_TEXT) for claim in result.final_claims: assert 0.0 <= claim["confidence"] <= 1.0 def test_council_invalid_tag_defaulted(): """Claims with invalid epistemic tags should be defaulted to Interpretation.""" council = ModelCouncil(brain=None, db_path=TEST_DB) # Mock produces "Interpretation" by default, so this validates the path result = council.deliberate(SAMPLE_TEXT) for claim in result.final_claims: assert claim["epistemic_tag"] in ["Fact", "Interpretation", "Hypothesis", "Conflict_Hypothesis"] def test_multiple_deliberations_independent(): council = ModelCouncil(brain=None, db_path=TEST_DB) r1 = council.deliberate("Text about graphene sensors.") r2 = council.deliberate("Text about lithium batteries.") assert r1.round_id != r2.round_id if __name__ == "__main__": pytest.main([__file__, "-v"])