| """ |
| PhD Research OS — AI Model Council Tests |
| ========================================== |
| Tests for the 4-member council pipeline: Query Planner → Extractor → Critic → Chairman |
| """ |
|
|
| import os |
| import sys |
| import json |
| import pytest |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| from phd_research_os.council import ( |
| ModelCouncil, CouncilRound, COUNCIL_PROMPTS |
| ) |
| from phd_research_os.db import init_db, get_db, create_claim |
| from phd_research_os.taxonomy import TaxonomyManager |
|
|
| TEST_DB = "test_council.db" |
|
|
| SAMPLE_TEXT = """ |
| We investigated the sensitivity of graphene field-effect transistor (GFET) biosensors |
| to cardiac troponin I (cTnI) in phosphate-buffered saline (PBS). Our measurements |
| show that the Dirac point shifts by 45 ± 3 mV upon binding of 1 pM cTnI in 10 mM PBS |
| (n=5, p<0.001). The limit of detection was determined to be 0.8 fM using the 3σ/slope |
| method. We interpret these results as evidence that aptamer-functionalized GFETs can |
| achieve clinically relevant sensitivity for cardiac biomarker detection. However, ionic |
| strength effects at physiological conditions (150 mM) significantly reduce sensitivity, |
| suggesting that a desalting step may be necessary for clinical translation. We hypothesize |
| that a PEG spacer of 5 kDa molecular weight could mitigate Debye screening while |
| maintaining binding affinity, though this remains to be experimentally validated. |
| """ |
|
|
|
|
| @pytest.fixture(autouse=True) |
| def setup_teardown(): |
| init_db(TEST_DB) |
| TaxonomyManager(db_path=TEST_DB) |
| yield |
| for suffix in ["", "-wal", "-shm"]: |
| p = TEST_DB + suffix |
| if os.path.exists(p): |
| os.remove(p) |
|
|
|
|
| |
| |
| |
|
|
| def test_all_4_council_members_defined(): |
| assert "query_planner" in COUNCIL_PROMPTS |
| assert "extractor" in COUNCIL_PROMPTS |
| assert "critic" in COUNCIL_PROMPTS |
| assert "chairman" in COUNCIL_PROMPTS |
|
|
| def test_query_planner_prompt_has_json_instruction(): |
| assert "JSON array" in COUNCIL_PROMPTS["query_planner"] |
|
|
| def test_extractor_prompt_has_epistemic_tags(): |
| prompt = COUNCIL_PROMPTS["extractor"] |
| assert "Fact" in prompt |
| assert "Interpretation" in prompt |
| assert "Hypothesis" in prompt |
| assert "Conflict_Hypothesis" in prompt |
|
|
| def test_critic_prompt_checks_5_things(): |
| prompt = COUNCIL_PROMPTS["critic"] |
| assert "Missing important claims" in prompt |
| assert "Incorrect epistemic tags" in prompt |
| assert "Overly confident" in prompt |
| assert "Taxonomy correctness" in prompt |
| assert "Missing fields" in prompt |
|
|
| def test_chairman_prompt_has_completeness_penalty(): |
| assert "0.7 completeness penalty" in COUNCIL_PROMPTS["chairman"] |
|
|
| def test_chairman_output_schema(): |
| prompt = COUNCIL_PROMPTS["chairman"] |
| assert "epistemic_tag" in prompt |
| assert "confidence" in prompt |
| assert "missing_fields" in prompt |
| assert "status" in prompt |
|
|
|
|
| |
| |
| |
|
|
| def test_council_deliberate_without_brain(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| assert isinstance(result, CouncilRound) |
| assert result.round_id.startswith("CNCL_") |
| assert len(result.final_claims) >= 1 |
| assert result.started_at |
| assert result.completed_at |
|
|
| def test_council_produces_valid_claims(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| for claim in result.final_claims: |
| assert "text" in claim |
| assert "epistemic_tag" in claim |
| assert "confidence" in claim |
| assert "missing_fields" in claim |
| assert "status" in claim |
| assert claim["epistemic_tag"] in ["Fact", "Interpretation", "Hypothesis", "Conflict_Hypothesis"] |
| assert 0.0 <= claim["confidence"] <= 1.0 |
| assert claim["status"] in ["Complete", "Incomplete"] |
|
|
| def test_council_claims_have_consistent_status(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| for claim in result.final_claims: |
| if claim["missing_fields"]: |
| assert claim["status"] == "Incomplete" |
|
|
| def test_council_with_query(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT, query="What is the LOD for GFET cTnI sensors?") |
|
|
| assert isinstance(result.query_plan, list) |
| assert len(result.final_claims) >= 1 |
|
|
| def test_council_query_planner_standalone(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| queries = council.deliberate_query("What are the ionic strength effects on GFET sensitivity?") |
| assert isinstance(queries, list) |
| assert len(queries) >= 1 |
|
|
| def test_council_metadata(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| assert "council_version" in result.metadata |
| assert "taxonomy_domain" in result.metadata |
| assert "extractor_claim_count" in result.metadata |
| assert "final_claim_count" in result.metadata |
|
|
| def test_council_critique_structure(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| if result.critique: |
| assert "feedback" in result.critique |
|
|
| def test_council_raw_extraction_preserved(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| assert isinstance(result.raw_extraction, list) |
|
|
| def test_council_round_logged_to_db(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| history = council.get_council_history() |
| assert len(history) >= 1 |
| assert history[0]["round_id"] == result.round_id |
|
|
| def test_council_history_retrieval(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| council.deliberate(SAMPLE_TEXT) |
| council.deliberate(SAMPLE_TEXT) |
|
|
| history = council.get_council_history(limit=10) |
| assert len(history) >= 2 |
|
|
| def test_council_confidence_clamped(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| result = council.deliberate(SAMPLE_TEXT) |
|
|
| for claim in result.final_claims: |
| assert 0.0 <= claim["confidence"] <= 1.0 |
|
|
| def test_council_invalid_tag_defaulted(): |
| """Claims with invalid epistemic tags should be defaulted to Interpretation.""" |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| |
| result = council.deliberate(SAMPLE_TEXT) |
| for claim in result.final_claims: |
| assert claim["epistemic_tag"] in ["Fact", "Interpretation", "Hypothesis", "Conflict_Hypothesis"] |
|
|
| def test_multiple_deliberations_independent(): |
| council = ModelCouncil(brain=None, db_path=TEST_DB) |
| r1 = council.deliberate("Text about graphene sensors.") |
| r2 = council.deliberate("Text about lithium batteries.") |
|
|
| assert r1.round_id != r2.round_id |
|
|
|
|
| if __name__ == "__main__": |
| pytest.main([__file__, "-v"]) |
|
|