""" Tests for Foundation Components ================================= Tests for all "strongly implementable" features: - SPECTER2 embedding dedup (with Jaccard fallback) - SciFact benchmark evaluation - Epistemic Trigger Words validator - Low Confidence Quarantine - SciBERT-NLI contradiction pre-filter (with fallback) - Epistemic Velocity tracking - Confidence Decomposition Display """ import pytest import json import os import sys import tempfile # Add project root to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # ══════════════════════════════════════════════════════════════════════ # FIXTURES # ══════════════════════════════════════════════════════════════════════ @pytest.fixture def db_path(): """Create a temporary database for testing.""" from phd_research_os_v2.core.database import init_db with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: path = f.name init_db(path) yield path os.unlink(path) @pytest.fixture def sample_claims(): """Sample claims for testing.""" return [ { "claim_id": "CLM_TEST001", "text": "The limit of detection was 0.8 fM in 10 mM PBS buffer.", "epistemic_tag": "Fact", "source_section": "results", "source_doi": "10.1234/paper1", "evidence_strength": 800, "composite_confidence": 750, "qualifiers": json.dumps(["in 10 mM PBS"]), "missing_fields": json.dumps([]), "is_null_result": False, "is_inherited_citation": False, }, { "claim_id": "CLM_TEST002", "text": "A detection limit of 800 attomolar was achieved using the graphene sensor.", "epistemic_tag": "Fact", "source_section": "results", "source_doi": "10.1234/paper2", "evidence_strength": 750, "composite_confidence": 700, "qualifiers": json.dumps([]), "missing_fields": json.dumps([]), "is_null_result": False, "is_inherited_citation": False, }, { "claim_id": "CLM_TEST003", "text": "This approach may potentially reduce diagnostic costs in low-resource settings.", "epistemic_tag": "Hypothesis", "source_section": "discussion", "source_doi": "10.1234/paper1", "evidence_strength": 300, "composite_confidence": 200, "qualifiers": json.dumps(["may", "potentially"]), "missing_fields": json.dumps(["cost_analysis", "field_testing"]), "is_null_result": False, "is_inherited_citation": False, }, { "claim_id": "CLM_TEST004", "text": "The sensor did not show significant improvement over the control group.", "epistemic_tag": "Fact", "source_section": "results", "source_doi": "10.1234/paper3", "evidence_strength": 600, "composite_confidence": 400, "qualifiers": json.dumps(["not significant"]), "missing_fields": json.dumps([]), "is_null_result": True, "is_inherited_citation": False, }, ] # ══════════════════════════════════════════════════════════════════════ # TEST: EMBEDDING DEDUP (Layer 3) # ══════════════════════════════════════════════════════════════════════ class TestEmbeddingDedup: """Tests for phd_research_os_v2.layer3.embedding_dedup""" def test_jaccard_identical_texts(self): from phd_research_os_v2.layer3.embedding_dedup import jaccard_similarity sim = jaccard_similarity("The LOD was 0.8 fM", "The LOD was 0.8 fM") assert sim == 1.0 def test_jaccard_different_texts(self): from phd_research_os_v2.layer3.embedding_dedup import jaccard_similarity sim = jaccard_similarity("The LOD was 0.8 fM", "Completely unrelated text about cooking") assert sim < 0.2 def test_jaccard_similar_texts(self): from phd_research_os_v2.layer3.embedding_dedup import jaccard_similarity sim = jaccard_similarity( "The detection limit was 0.8 femtomolar", "The detection limit was measured at 0.8 fM" ) assert sim > 0.3 def test_jaccard_empty_texts(self): from phd_research_os_v2.layer3.embedding_dedup import jaccard_similarity assert jaccard_similarity("", "") == 0.0 assert jaccard_similarity("hello", "") == 0.0 def test_claim_similarity_auto_mode(self): from phd_research_os_v2.layer3.embedding_dedup import claim_similarity sim = claim_similarity("LOD was 0.8 fM", "LOD was 0.8 fM", method="jaccard") assert sim == 1.0 def test_batch_deduplicate_jaccard(self): from phd_research_os_v2.layer3.embedding_dedup import batch_deduplicate texts = [ "The LOD was 0.8 fM in PBS buffer", "The LOD was 0.8 fM in PBS buffer", # exact duplicate "Completely different topic about weather", ] result = batch_deduplicate(texts, threshold=0.85, method="jaccard") assert len(result["canonical_indices"]) <= 2 # At most 2 unique assert 1 in result["duplicates"] # Index 1 is a duplicate of 0 def test_batch_deduplicate_empty(self): from phd_research_os_v2.layer3.embedding_dedup import batch_deduplicate result = batch_deduplicate([], method="jaccard") assert result["canonical_indices"] == [] def test_batch_deduplicate_single(self): from phd_research_os_v2.layer3.embedding_dedup import batch_deduplicate result = batch_deduplicate(["one claim"], method="jaccard") assert result["canonical_indices"] == [0] def test_normalize_claim_text(self): from phd_research_os_v2.layer3.embedding_dedup import _normalize assert _normalize(" The LOD was 0.8 fM ") == "the lod was 0.8 fm" # ══════════════════════════════════════════════════════════════════════ # TEST: SCIFACT BENCHMARK (Layer 6) # ══════════════════════════════════════════════════════════════════════ class TestSciFact: """Tests for phd_research_os_v2.layer6.scifact_benchmark""" def test_baseline_classifier_support(self): from phd_research_os_v2.layer6.scifact_benchmark import quick_baseline_classifier result = quick_baseline_classifier( "Vitamin C helps prevent scurvy", "Studies have shown vitamin C is essential for preventing scurvy in sailors" ) assert result in ["SUPPORT", "CONTRADICT", "NOT_ENOUGH_INFO"] def test_baseline_classifier_contradict(self): from phd_research_os_v2.layer6.scifact_benchmark import quick_baseline_classifier result = quick_baseline_classifier( "The drug has no side effects", "The drug was found to have significant adverse effects including nausea" ) assert result in ["SUPPORT", "CONTRADICT", "NOT_ENOUGH_INFO"] def test_evaluate_returns_correct_structure(self): from phd_research_os_v2.layer6.scifact_benchmark import evaluate_against_scifact def dummy_classifier(claim, evidence): return "SUPPORT" examples = [ {"claim": "test claim 1", "evidence": "test evidence 1", "label": "SUPPORT"}, {"claim": "test claim 2", "evidence": "test evidence 2", "label": "CONTRADICT"}, {"claim": "test claim 3", "evidence": "test evidence 3", "label": "NOT_ENOUGH_INFO"}, ] result = evaluate_against_scifact(dummy_classifier, examples) assert "accuracy" in result assert "per_class" in result assert "confusion_matrix" in result assert "total_examples" in result assert result["total_examples"] == 3 assert 0 <= result["accuracy"] <= 1 def test_evaluate_perfect_classifier(self): from phd_research_os_v2.layer6.scifact_benchmark import evaluate_against_scifact examples = [ {"claim": "c1", "evidence": "e1", "label": "SUPPORT"}, {"claim": "c2", "evidence": "e2", "label": "CONTRADICT"}, ] def perfect(claim, evidence): for ex in examples: if ex["claim"] == claim: return ex["label"] return "NOT_ENOUGH_INFO" result = evaluate_against_scifact(perfect, examples) assert result["accuracy"] == 1.0 def test_evaluate_handles_errors(self): from phd_research_os_v2.layer6.scifact_benchmark import evaluate_against_scifact def broken(claim, evidence): raise ValueError("broken") examples = [{"claim": "c", "evidence": "e", "label": "SUPPORT"}] result = evaluate_against_scifact(broken, examples) assert result["total_examples"] == 1 # Should not crash # ══════════════════════════════════════════════════════════════════════ # TEST: EPISTEMIC TRIGGER WORDS (Layer 2) # ══════════════════════════════════════════════════════════════════════ class TestTriggerValidator: """Tests for phd_research_os_v2.layer2.trigger_validator""" def test_fact_detection(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores( "We measured a detection limit of 0.8 fM with p < 0.001", source_section="results" ) assert result["predicted_tag"] == "Fact" assert result["scores"]["Fact"] > 0.3 def test_hypothesis_detection(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores( "This may potentially reduce costs and further investigation is needed", source_section="discussion" ) assert result["predicted_tag"] == "Hypothesis" assert result["scores"]["Hypothesis"] > 0.3 def test_interpretation_detection(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores( "These findings suggest that the mechanism is likely due to charge transfer", source_section="discussion" ) assert result["predicted_tag"] == "Interpretation" def test_conflict_detection(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores( "Contrary to previous reports, our results show inconsistent findings that refutes the hypothesis" ) assert result["scores"]["Conflict_Hypothesis"] > 0.2 def test_section_prior_results(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores( "The value was obtained from the experiment", source_section="results" ) assert result["scores"]["Fact"] > 0 # Results prior boosts Fact def test_section_prior_abstract(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores( "A novel approach was developed", source_section="abstract" ) assert result["scores"]["Interpretation"] > 0 # Abstract prior boosts Interpretation def test_validate_ai_tag_agreement(self): from phd_research_os_v2.layer2.trigger_validator import validate_ai_tag result = validate_ai_tag( "We measured a detection limit of 0.8 fM with p < 0.001", ai_tag="Fact", source_section="results" ) assert result["agreement"] == True assert result["recommendation"] == "accept" def test_validate_ai_tag_disagreement(self): from phd_research_os_v2.layer2.trigger_validator import validate_ai_tag result = validate_ai_tag( "This may potentially reduce costs and further investigation is needed", ai_tag="Fact", source_section="discussion" ) # Trigger words should detect hypothesis language if not result["agreement"]: assert result["disagreement_severity"] in ["mild", "strong"] def test_batch_validate(self): from phd_research_os_v2.layer2.trigger_validator import batch_validate claims = [ {"text": "We measured 0.8 fM with p < 0.001", "epistemic_tag": "Fact", "source_section": "results"}, {"text": "May potentially reduce costs", "epistemic_tag": "Fact", "source_section": "discussion"}, {"text": "Suggests a novel mechanism", "epistemic_tag": "Interpretation", "source_section": "discussion"}, ] result = batch_validate(claims) assert result["total"] == 3 assert "agreement_rate" in result def test_empty_text(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores("", source_section="results") assert "predicted_tag" in result def test_scores_bounded(self): from phd_research_os_v2.layer2.trigger_validator import compute_trigger_scores result = compute_trigger_scores( "may possibly might could potentially suggests hypothesize propose speculate", source_section="discussion" ) for score in result["scores"].values(): assert 0 <= score <= 1.0 # ══════════════════════════════════════════════════════════════════════ # TEST: LOW CONFIDENCE QUARANTINE (Layer 4) # ══════════════════════════════════════════════════════════════════════ class TestQuarantine: """Tests for phd_research_os_v2.layer4.quarantine_and_nli.ConfidenceQuarantine""" def test_quarantine_check_low_confidence(self): from phd_research_os_v2.layer4.quarantine_and_nli import ConfidenceQuarantine q = ConfidenceQuarantine() result = q.quarantine_check({"composite_confidence": 200}) assert result["quarantined"] == True assert result["reason"] == "confidence_too_low" def test_quarantine_check_high_confidence(self): from phd_research_os_v2.layer4.quarantine_and_nli import ConfidenceQuarantine q = ConfidenceQuarantine() result = q.quarantine_check({"composite_confidence": 800}) assert result["quarantined"] == False assert result["reason"] is None def test_quarantine_check_threshold(self): from phd_research_os_v2.layer4.quarantine_and_nli import ConfidenceQuarantine q = ConfidenceQuarantine(threshold=500) assert q.quarantine_check({"composite_confidence": 499})["quarantined"] == True assert q.quarantine_check({"composite_confidence": 500})["quarantined"] == False def test_quarantine_claim_in_db(self, db_path): from phd_research_os_v2.layer4.quarantine_and_nli import ConfidenceQuarantine from phd_research_os_v2.core.database import get_db, now_iso # Insert a test claim conn = get_db(db_path) conn.execute(""" INSERT INTO claims (claim_id, text, epistemic_tag, composite_confidence, status, created_at, updated_at) VALUES ('CLM_Q1', 'test claim', 'Fact', 200, 'Complete', ?, ?) """, (now_iso(), now_iso())) conn.commit() conn.close() q = ConfidenceQuarantine(db_path=db_path) q.quarantine_claim("CLM_Q1") conn = get_db(db_path) row = conn.execute("SELECT status FROM claims WHERE claim_id = 'CLM_Q1'").fetchone() conn.close() assert dict(row)["status"] == "Quarantined" def test_promote_claim(self, db_path): from phd_research_os_v2.layer4.quarantine_and_nli import ConfidenceQuarantine from phd_research_os_v2.core.database import get_db, now_iso conn = get_db(db_path) conn.execute(""" INSERT INTO claims (claim_id, text, epistemic_tag, composite_confidence, status, missing_fields, created_at, updated_at) VALUES ('CLM_Q2', 'test', 'Fact', 200, 'Quarantined', '[]', ?, ?) """, (now_iso(), now_iso())) conn.commit() conn.close() q = ConfidenceQuarantine(db_path=db_path) result = q.promote_claim("CLM_Q2") assert result["new_status"] == "Complete" def test_quarantine_sweep(self, db_path): from phd_research_os_v2.layer4.quarantine_and_nli import ConfidenceQuarantine from phd_research_os_v2.core.database import get_db, now_iso conn = get_db(db_path) # Insert claims with various confidence levels for i, conf in enumerate([100, 200, 500, 800]): conn.execute(""" INSERT INTO claims (claim_id, text, epistemic_tag, composite_confidence, status, created_at, updated_at) VALUES (?, 'test', 'Fact', ?, 'Complete', ?, ?) """, (f"CLM_SW{i}", conf, now_iso(), now_iso())) conn.commit() conn.close() q = ConfidenceQuarantine(db_path=db_path, threshold=300) result = q.quarantine_sweep() assert result["quarantined_count"] == 2 # 100 and 200 are below 300 def test_quarantine_stats(self, db_path): from phd_research_os_v2.layer4.quarantine_and_nli import ConfidenceQuarantine q = ConfidenceQuarantine(db_path=db_path) stats = q.get_stats() assert "total_claims" in stats assert "quarantined" in stats assert "quarantine_rate" in stats # ══════════════════════════════════════════════════════════════════════ # TEST: NLI PRE-FILTER (Layer 4) # ══════════════════════════════════════════════════════════════════════ class TestNLIPreFilter: """Tests for contradiction pre-filter (keyword fallback only — SciBERT may not be installed)""" def test_nli_classify_fallback(self): from phd_research_os_v2.layer4.quarantine_and_nli import nli_classify result = nli_classify( "The drug reduces inflammation", "The drug has no effect on inflammation contrary to expectations" ) assert result["label"] in ["ENTAILMENT", "CONTRADICTION", "NEUTRAL"] assert "method" in result def test_prefilter_contradictions(self): from phd_research_os_v2.layer4.quarantine_and_nli import prefilter_contradictions claims = [ {"claim_id": "A", "text": "The sensor achieved 0.8 fM detection limit", "source_doi": "d1"}, {"claim_id": "B", "text": "The sensor failed to detect anything below 10 fM contrary to previous claims", "source_doi": "d2"}, {"claim_id": "C", "text": "Weather patterns affect global temperature", "source_doi": "d3"}, ] results = prefilter_contradictions(claims, contradiction_threshold=0.0) assert isinstance(results, list) # Should find at least some pairs def test_prefilter_skips_same_document(self): from phd_research_os_v2.layer4.quarantine_and_nli import prefilter_contradictions claims = [ {"claim_id": "A", "text": "X is true", "source_doi": "same_doi"}, {"claim_id": "B", "text": "X is false", "source_doi": "same_doi"}, ] results = prefilter_contradictions(claims) # Same-document pairs should be skipped for r in results: assert not (r["claim_a_id"] == "A" and r["claim_b_id"] == "B") def test_prefilter_empty_claims(self): from phd_research_os_v2.layer4.quarantine_and_nli import prefilter_contradictions assert prefilter_contradictions([]) == [] assert prefilter_contradictions([{"claim_id": "A", "text": "only one"}]) == [] # ══════════════════════════════════════════════════════════════════════ # TEST: EPISTEMIC VELOCITY (Layer 5) # ══════════════════════════════════════════════════════════════════════ class TestEpistemicVelocity: """Tests for phd_research_os_v2.layer5.velocity_and_decomposition.EpistemicVelocity""" def test_insufficient_data(self, db_path): from phd_research_os_v2.layer5.velocity_and_decomposition import EpistemicVelocity ev = EpistemicVelocity(db_path=db_path) result = ev.compute_velocity("NONEXISTENT") assert result["trend"] == "insufficient_data" def test_rising_trend(self, db_path): from phd_research_os_v2.layer5.velocity_and_decomposition import EpistemicVelocity from phd_research_os_v2.core.database import get_db, now_iso, to_fixed # Insert canonical claim with rising version history conn = get_db(db_path) history = [ {"version": 1, "confidence": to_fixed(0.5), "date": "2025-01-01", "source": "paper1"}, {"version": 2, "confidence": to_fixed(0.7), "date": "2025-06-01", "source": "paper2"}, {"version": 3, "confidence": to_fixed(0.9), "date": "2026-01-01", "source": "paper3"}, ] conn.execute(""" INSERT INTO canonical_claims (canonical_id, representative_text, epistemic_tag, composite_confidence, evidence_count, source_dois, aliases, version_history, current_version, schema_version, created_at, updated_at) VALUES ('CANON_RISE', 'test rising claim', 'Fact', ?, 3, '[]', '[]', ?, 3, '2.0', ?, ?) """, (to_fixed(0.9), json.dumps(history), now_iso(), now_iso())) conn.commit() conn.close() ev = EpistemicVelocity(db_path=db_path) result = ev.compute_velocity("CANON_RISE") assert result["trend"] == "rising" assert result["velocity"] > 0 def test_falling_trend(self, db_path): from phd_research_os_v2.layer5.velocity_and_decomposition import EpistemicVelocity from phd_research_os_v2.core.database import get_db, now_iso, to_fixed conn = get_db(db_path) history = [ {"version": 1, "confidence": to_fixed(0.9), "date": "2025-01-01", "source": "p1"}, {"version": 2, "confidence": to_fixed(0.6), "date": "2025-06-01", "source": "p2"}, {"version": 3, "confidence": to_fixed(0.3), "date": "2026-01-01", "source": "p3"}, ] conn.execute(""" INSERT INTO canonical_claims (canonical_id, representative_text, epistemic_tag, composite_confidence, evidence_count, source_dois, aliases, version_history, current_version, schema_version, created_at, updated_at) VALUES ('CANON_FALL', 'test falling claim', 'Fact', ?, 3, '[]', '[]', ?, 3, '2.0', ?, ?) """, (to_fixed(0.3), json.dumps(history), now_iso(), now_iso())) conn.commit() conn.close() ev = EpistemicVelocity(db_path=db_path) result = ev.compute_velocity("CANON_FALL") assert result["trend"] == "falling" assert result["velocity"] < 0 def test_single_version_insufficient(self, db_path): from phd_research_os_v2.layer5.velocity_and_decomposition import EpistemicVelocity from phd_research_os_v2.core.database import get_db, now_iso, to_fixed conn = get_db(db_path) history = [{"version": 1, "confidence": to_fixed(0.7), "date": "2025-01-01", "source": "p1"}] conn.execute(""" INSERT INTO canonical_claims (canonical_id, representative_text, epistemic_tag, composite_confidence, evidence_count, source_dois, aliases, version_history, current_version, schema_version, created_at, updated_at) VALUES ('CANON_SINGLE', 'test single', 'Fact', ?, 1, '[]', '[]', ?, 1, '2.0', ?, ?) """, (to_fixed(0.7), json.dumps(history), now_iso(), now_iso())) conn.commit() conn.close() ev = EpistemicVelocity(db_path=db_path) result = ev.compute_velocity("CANON_SINGLE") assert result["trend"] == "insufficient_data" # ══════════════════════════════════════════════════════════════════════ # TEST: CONFIDENCE DECOMPOSITION (Layer 5) # ══════════════════════════════════════════════════════════════════════ class TestConfidenceDecomposition: """Tests for phd_research_os_v2.layer5.velocity_and_decomposition (decomposition)""" def test_basic_decomposition(self): from phd_research_os_v2.layer5.velocity_and_decomposition import decompose_confidence claim = { "evidence_quality": 800, "truth_likelihood": 700, "qualifier_strength_score": 600, "composite_confidence": 700, "evidence_strength": 850, "source_section": "results", "qualifiers": json.dumps(["in PBS"]), "missing_fields": json.dumps([]), "is_null_result": False, "is_inherited_citation": False, "practical_significance": True, "parse_confidence": 950, } result = decompose_confidence(claim, source={"study_type": "in_vitro", "journal_tier": 1}) assert "composite_confidence" in result assert "scores" in result assert "headline" in result assert "warnings" in result assert "action_items" in result assert "evidence_quality" in result["scores"] assert "truth_likelihood" in result["scores"] assert "qualifier_strength" in result["scores"] # Each score should have value, bar, explanation for score_data in result["scores"].values(): assert "value" in score_data assert "bar" in score_data assert "explanation" in score_data def test_decomposition_null_result_warning(self): from phd_research_os_v2.layer5.velocity_and_decomposition import decompose_confidence claim = { "evidence_quality": 400, "truth_likelihood": 300, "qualifier_strength_score": 300, "composite_confidence": 333, "evidence_strength": 500, "source_section": "results", "qualifiers": json.dumps(["not significant"]), "missing_fields": json.dumps([]), "is_null_result": True, "is_inherited_citation": False, "practical_significance": True, } result = decompose_confidence(claim) assert any("null" in w.lower() for w in result["warnings"]) def test_decomposition_abstract_warning(self): from phd_research_os_v2.layer5.velocity_and_decomposition import decompose_confidence claim = { "evidence_quality": 500, "truth_likelihood": 500, "qualifier_strength_score": 500, "composite_confidence": 500, "evidence_strength": 700, "source_section": "abstract", "qualifiers": json.dumps([]), "missing_fields": json.dumps([]), "is_null_result": False, "is_inherited_citation": False, "practical_significance": True, } result = decompose_confidence(claim) assert any("abstract" in w.lower() for w in result["warnings"]) def test_format_text(self): from phd_research_os_v2.layer5.velocity_and_decomposition import ( decompose_confidence, format_decomposition_text ) claim = { "evidence_quality": 800, "truth_likelihood": 700, "qualifier_strength_score": 900, "composite_confidence": 800, "evidence_strength": 850, "source_section": "results", "qualifiers": json.dumps([]), "missing_fields": json.dumps([]), "is_null_result": False, "is_inherited_citation": False, "practical_significance": True, } decomposition = decompose_confidence(claim) text = format_decomposition_text(decomposition) assert isinstance(text, str) assert "Composite Confidence" in text assert "Evidence Quality" in text def test_format_markdown(self): from phd_research_os_v2.layer5.velocity_and_decomposition import ( decompose_confidence, format_decomposition_markdown ) claim = { "evidence_quality": 800, "truth_likelihood": 700, "qualifier_strength_score": 900, "composite_confidence": 800, "evidence_strength": 850, "source_section": "results", "qualifiers": json.dumps([]), "missing_fields": json.dumps([]), "is_null_result": False, "is_inherited_citation": False, "practical_significance": True, } decomposition = decompose_confidence(claim) md = format_decomposition_markdown(decomposition) assert isinstance(md, str) assert "**Confidence:" in md assert "|" in md # Table format def test_low_confidence_headline(self): from phd_research_os_v2.layer5.velocity_and_decomposition import decompose_confidence claim = { "evidence_quality": 100, "truth_likelihood": 100, "qualifier_strength_score": 100, "composite_confidence": 100, "evidence_strength": 200, "source_section": "discussion", "qualifiers": json.dumps(["may", "possibly", "potentially"]), "missing_fields": json.dumps(["data", "statistics"]), "is_null_result": False, "is_inherited_citation": True, "practical_significance": True, } result = decompose_confidence(claim) assert "quarantine" in result["headline"].lower() or "low" in result["headline"].lower() # ══════════════════════════════════════════════════════════════════════ # TEST: SCIRIFF INTEGRATION (Training) # ══════════════════════════════════════════════════════════════════════ class TestSciRIFFIntegration: """Tests for the SciRIFF data integration logic (without actually downloading).""" def test_relevant_task_families_defined(self): from phd_research_os_v2.training.sciriff_integration import RELEVANT_TASK_FAMILIES assert "ie" in RELEVANT_TASK_FAMILIES assert "classification" in RELEVANT_TASK_FAMILIES assert "entailment" in RELEVANT_TASK_FAMILIES def test_system_prompts_exist(self): from phd_research_os_v2.training.sciriff_integration import SYSTEM_PROMPTS assert "ie" in SYSTEM_PROMPTS assert "classification" in SYSTEM_PROMPTS assert "qa" in SYSTEM_PROMPTS for prompt in SYSTEM_PROMPTS.values(): assert "PhD Research OS" in prompt def test_high_priority_tasks_defined(self): from phd_research_os_v2.training.sciriff_integration import HIGH_PRIORITY_TASKS assert "scifact" in HIGH_PRIORITY_TASKS assert "scierc" in HIGH_PRIORITY_TASKS # ══════════════════════════════════════════════════════════════════════ # TEST: DATABASE SCHEMA SUPPORTS NEW FEATURES # ══════════════════════════════════════════════════════════════════════ class TestDatabaseSchema: """Verify the database schema supports quarantine and new features.""" def test_claims_table_has_required_columns(self, db_path): from phd_research_os_v2.core.database import get_db conn = get_db(db_path) # Get column info cursor = conn.execute("PRAGMA table_info(claims)") columns = {row[1] for row in cursor.fetchall()} conn.close() required = { "claim_id", "text", "epistemic_tag", "composite_confidence", "status", "is_null_result", "is_inherited_citation", "qualifiers", "missing_fields", "source_section", "evidence_quality", "truth_likelihood", "qualifier_strength_score", } for col in required: assert col in columns, f"Missing column: {col}" def test_canonical_claims_has_version_history(self, db_path): from phd_research_os_v2.core.database import get_db conn = get_db(db_path) cursor = conn.execute("PRAGMA table_info(canonical_claims)") columns = {row[1] for row in cursor.fetchall()} conn.close() assert "version_history" in columns assert "evidence_count" in columns def test_eval_runs_table_exists(self, db_path): from phd_research_os_v2.core.database import get_db conn = get_db(db_path) cursor = conn.execute("PRAGMA table_info(eval_runs)") columns = {row[1] for row in cursor.fetchall()} conn.close() assert "run_id" in columns assert "metrics" in columns assert "passed" in columns if __name__ == "__main__": pytest.main([__file__, "-v", "--tb=short"])