| """ |
| PhD Research OS v2.0 — Integration Tests |
| ========================================== |
| Tests the complete pipeline: Layer 0 → Layer 2 → Layer 4 → Layer 5 |
| """ |
|
|
| import os |
| import sys |
| import json |
| import tempfile |
| import pytest |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| from phd_research_os_v2.core.database import ( |
| init_db, get_db, get_stats, get_state, set_state, |
| to_fixed, from_fixed, gen_id, now_iso |
| ) |
| from phd_research_os_v2.layer0.parser import ( |
| StructuralParser, detect_section, classify_region_type, |
| extract_cross_references, score_parse_quality |
| ) |
| from phd_research_os_v2.layer2.extractor import ( |
| QualifiedExtractor, SECTION_MODIFIERS, VALID_TAGS |
| ) |
| from phd_research_os_v2.layer4.graph import KnowledgeGraph |
| from phd_research_os_v2.layer5.scorer import CalibratedScorer |
|
|
| TEST_DB = "test_v2_integration.db" |
|
|
| SAMPLE_PAPER = """Abstract |
| We investigated graphene field-effect transistor (GFET) biosensors for cardiac troponin detection. |
| |
| Introduction |
| Cardiac troponin I (cTnI) is a gold-standard biomarker for myocardial infarction. Current detection methods require laboratory equipment. Point-of-care biosensors could enable faster diagnosis. |
| |
| Methods |
| GFETs were fabricated using CVD graphene on SiO2/Si substrates. Aptamer probes were immobilized via pyrene linkers. Measurements were performed in 10 mM PBS at room temperature. |
| |
| Results |
| The Dirac point shifted by 45 ± 3 mV upon binding of 1 pM cTnI (n=5, p<0.001). The limit of detection was 0.8 fM using the 3-sigma method. Sensitivity was not maintained at physiological ionic strength (150 mM), showing a 10-fold reduction. We observed no significant change in selectivity when tested against troponin T. |
| |
| Discussion |
| These results suggest that aptamer-functionalized GFETs can achieve clinically relevant sensitivity in buffer conditions. However, the ionic strength dependence indicates that a desalting step may be necessary for clinical translation. We hypothesize that PEG spacers could mitigate Debye screening effects. |
| |
| Conclusion |
| GFET biosensors show promise for cardiac biomarker detection but require further optimization for physiological conditions. |
| """ |
|
|
|
|
| @pytest.fixture(autouse=True) |
| def setup_teardown(): |
| init_db(TEST_DB) |
| yield |
| for suffix in ["", "-wal", "-shm"]: |
| p = TEST_DB + suffix |
| if os.path.exists(p): |
| os.remove(p) |
|
|
|
|
| |
| |
| |
|
|
| def test_database_init(): |
| stats = get_stats(TEST_DB) |
| assert isinstance(stats, dict) |
| for table in ["documents", "regions", "claims", "graph_nodes"]: |
| assert table in stats |
|
|
| def test_system_state(): |
| assert get_state(TEST_DB, "schema_version") == "2.0" |
| set_state(TEST_DB, "test_key", "test_value") |
| assert get_state(TEST_DB, "test_key") == "test_value" |
|
|
| def test_fixed_point_math(): |
| assert to_fixed(0.85) == 850 |
| assert from_fixed(850) == 0.85 |
| assert to_fixed(1.0) == 1000 |
| assert to_fixed(0.0) == 0 |
|
|
|
|
| |
| |
| |
|
|
| def test_section_detection(): |
| assert detect_section("Abstract") == "abstract" |
| assert detect_section("2.1 Methods") == "methods" |
| assert detect_section("Results and Discussion") == "results_discussion" |
| assert detect_section("3. Results") == "results" |
| assert detect_section("References") == "references" |
| assert detect_section("Random paragraph text") is None |
|
|
| def test_region_classification(): |
| assert classify_region_type("Table 1: Results summary") == "caption" |
| assert classify_region_type("Figure 3: Scatter plot of sensitivity vs ionic strength") == "caption" |
| assert classify_region_type("This is a normal paragraph about the experiment.") == "body_text" |
| assert classify_region_type("METHODS") == "header" |
|
|
| def test_cross_reference_extraction(): |
| refs = extract_cross_references("As shown in Figure 3 and Table 2, the results (Eq. 4) confirm [32].") |
| types = [r["ref_type"] for r in refs] |
| assert "figure" in types |
| assert "table" in types |
| assert "citation" in types |
|
|
| def test_parse_quality_scoring(): |
| good_text = "The limit of detection was determined to be 0.8 fM using the 3-sigma method." |
| bad_text = "â–¡â–¡ garbled text â– â– â– with bad â–¡ characters" |
| empty_text = "" |
| |
| assert score_parse_quality(good_text, "fitz") > 700 |
| assert score_parse_quality(bad_text, "fitz") < 700 |
| assert score_parse_quality(empty_text, "fitz") == 0 |
|
|
| def test_ingest_text_file(): |
| |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: |
| f.write(SAMPLE_PAPER) |
| f.flush() |
| temp_path = f.name |
| |
| try: |
| parser = StructuralParser(TEST_DB) |
| result = parser.ingest_document(temp_path, doc_type="main", title="Test Paper") |
| |
| assert result.get("doc_id") is not None |
| assert result["total_regions"] > 0 |
| assert result["parse_method"] == "text" |
| assert "results" in result.get("sections_found", []) |
| finally: |
| os.unlink(temp_path) |
|
|
| def test_section_aware_chunking(): |
| |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: |
| f.write(SAMPLE_PAPER) |
| temp_path = f.name |
| |
| try: |
| parser = StructuralParser(TEST_DB) |
| result = parser.ingest_document(temp_path) |
| doc_id = result["doc_id"] |
| |
| chunks = parser.get_section_chunks(doc_id) |
| assert len(chunks) > 0 |
| |
| |
| sections = [c["section"] for c in chunks] |
| assert any(s is not None for s in sections) |
| finally: |
| os.unlink(temp_path) |
|
|
|
|
| |
| |
| |
|
|
| def test_extract_from_text_chunk(): |
| extractor = QualifiedExtractor(TEST_DB) |
| chunk = { |
| "text": "The LOD was 0.8 fM (n=5, p<0.001). Sensitivity may decrease at higher ionic strength. We hypothesize that PEG spacers could help.", |
| "section": "results", |
| "page": 1, |
| "min_confidence": 900, |
| "doc_id": None, |
| "region_ids": [], |
| } |
| |
| claims = extractor.extract_from_chunk(chunk) |
| assert len(claims) > 0 |
| |
| for claim in claims: |
| assert claim["epistemic_tag"] in VALID_TAGS |
| assert 0 <= claim["composite_confidence"] <= 1000 |
| assert claim["status"] in ["Complete", "Incomplete"] |
|
|
| def test_section_modifier_applied(): |
| extractor = QualifiedExtractor(TEST_DB) |
| |
| |
| chunk_results = { |
| "text": "The measured value was 0.8 fM with p<0.001.", |
| "section": "results", "page": 1, "min_confidence": 900, |
| "doc_id": None, "region_ids": [], |
| } |
| claims_results = extractor.extract_from_chunk(chunk_results) |
| |
| |
| chunk_abstract = { |
| "text": "The measured value was 0.8 fM with p<0.001.", |
| "section": "abstract", "page": 1, "min_confidence": 900, |
| "doc_id": None, "region_ids": [], |
| } |
| claims_abstract = extractor.extract_from_chunk(chunk_abstract) |
| |
| if claims_results and claims_abstract: |
| |
| assert claims_abstract[0]["composite_confidence"] <= claims_results[0]["composite_confidence"] |
|
|
| def test_abstract_fact_downgraded_to_interpretation(): |
| """Abstract claims tagged as Fact should be forced to Interpretation.""" |
| extractor = QualifiedExtractor(TEST_DB) |
| chunk = { |
| "text": "We measured the LOD at 0.8 fM, achieving clinical sensitivity.", |
| "section": "abstract", "page": 1, "min_confidence": 900, |
| "doc_id": None, "region_ids": [], |
| } |
| claims = extractor.extract_from_chunk(chunk) |
| |
| |
| |
| for claim in claims: |
| if claim["source_section"] == "abstract": |
| |
| |
| assert claim["epistemic_tag"] in VALID_TAGS |
|
|
| def test_null_result_detection(): |
| extractor = QualifiedExtractor(TEST_DB) |
| chunk = { |
| "text": "There was no significant difference between treatment and control groups (p=0.43, N=200).", |
| "section": "results", "page": 1, "min_confidence": 900, |
| "doc_id": None, "region_ids": [], |
| } |
| claims = extractor.extract_from_chunk(chunk) |
| |
| |
| null_claims = [c for c in claims if c["is_null_result"]] |
| assert len(null_claims) > 0 or len(claims) > 0 |
|
|
| def test_qualifier_extraction(): |
| extractor = QualifiedExtractor(TEST_DB) |
| chunk = { |
| "text": "Results suggest that the effect may be temperature-dependent under these conditions.", |
| "section": "discussion", "page": 1, "min_confidence": 900, |
| "doc_id": None, "region_ids": [], |
| } |
| claims = extractor.extract_from_chunk(chunk) |
| |
| |
| for claim in claims: |
| if claim.get("qualifiers"): |
| assert any(q in ["may", "suggests", "under these conditions"] for q in claim["qualifiers"]) |
|
|
| def test_full_document_extraction(): |
| |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: |
| f.write(SAMPLE_PAPER) |
| temp_path = f.name |
| |
| try: |
| parser = StructuralParser(TEST_DB) |
| result = parser.ingest_document(temp_path, title="Integration Test Paper") |
| doc_id = result["doc_id"] |
| |
| extractor = QualifiedExtractor(TEST_DB) |
| extract_result = extractor.extract_from_document(doc_id) |
| |
| assert extract_result["total_claims"] > 0 |
| assert "results" in extract_result.get("section_distribution", {}) or extract_result["total_claims"] > 0 |
| finally: |
| os.unlink(temp_path) |
|
|
|
|
| |
| |
| |
|
|
| def test_graph_add_node(): |
| graph = KnowledgeGraph(TEST_DB) |
| graph.add_claim_node("CLM_TEST001", "Test claim about graphene", {"tag": "Fact"}) |
| |
| stats = graph.get_stats() |
| assert stats["total_nodes"] >= 1 |
|
|
| def test_graph_add_edge(): |
| graph = KnowledgeGraph(TEST_DB) |
| graph.add_claim_node("CLM_A", "Claim A") |
| graph.add_claim_node("CLM_B", "Claim B") |
| edge_id = graph.add_edge("CLM_A", "CLM_B", "supports", 0.85, ["10.1234/test"]) |
| |
| assert edge_id.startswith("EDGE_") |
| stats = graph.get_stats() |
| assert stats["total_edges"] >= 1 |
|
|
| def test_graph_neighbors(): |
| graph = KnowledgeGraph(TEST_DB) |
| graph.add_claim_node("CLM_C", "Claim C") |
| graph.add_claim_node("CLM_D", "Claim D") |
| graph.add_claim_node("CLM_E", "Claim E") |
| graph.add_edge("CLM_C", "CLM_D", "supports", 0.9) |
| graph.add_edge("CLM_C", "CLM_E", "refutes", 0.7) |
| |
| neighbors = graph.get_neighbors("CLM_C") |
| assert len(neighbors) >= 2 |
|
|
| def test_graph_inferred_edges_hidden_by_default(): |
| graph = KnowledgeGraph(TEST_DB) |
| graph.add_claim_node("CLM_F", "Claim F") |
| graph.add_claim_node("CLM_G", "Claim G") |
| graph.add_edge("CLM_F", "CLM_G", "investigative_hypothesis", 0.4, is_inferred=True) |
| |
| |
| neighbors = graph.get_neighbors("CLM_F", include_inferred=False) |
| inferred = [n for n in neighbors if n.get("is_inferred")] |
| assert len(inferred) == 0 |
| |
| |
| all_neighbors = graph.get_neighbors("CLM_F", include_inferred=True) |
| assert len(all_neighbors) >= 1 |
|
|
|
|
| |
| |
| |
|
|
| def test_scorer_basic(): |
| scorer = CalibratedScorer(TEST_DB) |
| claim = { |
| "evidence_strength": 900, |
| "source_section": "results", |
| "missing_fields": "[]", |
| "qualifiers": "[]", |
| "parse_confidence": 950, |
| "is_null_result": False, |
| "is_inherited_citation": False, |
| } |
| source = {"study_type": "direct_physical_measurement", "journal_tier": 1} |
| |
| scores = scorer.score_claim(claim, source) |
| |
| assert "evidence_quality" in scores |
| assert "truth_likelihood" in scores |
| assert "qualifier_strength_score" in scores |
| assert "composite_confidence" in scores |
| assert 0 <= scores["evidence_quality"] <= 1000 |
| assert 0 <= scores["composite_confidence"] <= 1000 |
|
|
| def test_scorer_section_modifier(): |
| scorer = CalibratedScorer(TEST_DB) |
| |
| claim_results = { |
| "evidence_strength": 800, "source_section": "results", |
| "missing_fields": "[]", "qualifiers": "[]", |
| "parse_confidence": 1000, "is_null_result": False, "is_inherited_citation": False, |
| } |
| claim_abstract = { |
| "evidence_strength": 800, "source_section": "abstract", |
| "missing_fields": "[]", "qualifiers": "[]", |
| "parse_confidence": 1000, "is_null_result": False, "is_inherited_citation": False, |
| } |
| source = {"study_type": "in_vivo", "journal_tier": 1} |
| |
| scores_r = scorer.score_claim(claim_results, source) |
| scores_a = scorer.score_claim(claim_abstract, source) |
| |
| |
| assert scores_r["evidence_quality"] > scores_a["evidence_quality"] |
|
|
| def test_scorer_null_result_penalty(): |
| scorer = CalibratedScorer(TEST_DB) |
| |
| normal = {"evidence_strength": 800, "source_section": "results", |
| "missing_fields": "[]", "qualifiers": "[]", |
| "parse_confidence": 1000, "is_null_result": False, "is_inherited_citation": False} |
| null = {"evidence_strength": 800, "source_section": "results", |
| "missing_fields": "[]", "qualifiers": "[]", |
| "parse_confidence": 1000, "is_null_result": True, "is_inherited_citation": False} |
| source = {"study_type": "in_vivo", "journal_tier": 1} |
| |
| scores_normal = scorer.score_claim(normal, source) |
| scores_null = scorer.score_claim(null, source) |
| |
| assert scores_null["truth_likelihood"] <= scores_normal["truth_likelihood"] |
|
|
| def test_scorer_parser_confidence_caps(): |
| scorer = CalibratedScorer(TEST_DB) |
| |
| claim = {"evidence_strength": 900, "source_section": "results", |
| "missing_fields": "[]", "qualifiers": "[]", |
| "parse_confidence": 400, |
| "is_null_result": False, "is_inherited_citation": False} |
| source = {"study_type": "in_vivo", "journal_tier": 1} |
| |
| scores = scorer.score_claim(claim, source) |
| |
| |
| assert scores["evidence_quality"] <= 400 |
|
|
|
|
| |
| |
| |
|
|
| def test_full_pipeline(): |
| """Test the complete pipeline: ingest → extract → graph → score.""" |
| |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: |
| f.write(SAMPLE_PAPER) |
| temp_path = f.name |
| |
| try: |
| parser = StructuralParser(TEST_DB) |
| ingest_result = parser.ingest_document(temp_path, title="Full Pipeline Test") |
| doc_id = ingest_result["doc_id"] |
| assert ingest_result["total_regions"] > 0 |
| |
| |
| extractor = QualifiedExtractor(TEST_DB) |
| extract_result = extractor.extract_from_document(doc_id) |
| assert extract_result["total_claims"] > 0 |
| |
| |
| graph = KnowledgeGraph(TEST_DB) |
| conn = get_db(TEST_DB) |
| claims = conn.execute("SELECT claim_id, text FROM claims WHERE source_doc_id = ?", |
| (doc_id,)).fetchall() |
| conn.close() |
| |
| for c in claims: |
| d = dict(c) |
| graph.add_claim_node(d["claim_id"], d["text"]) |
| |
| stats = graph.get_stats() |
| assert stats["total_nodes"] > 0 |
| |
| |
| scorer = CalibratedScorer(TEST_DB) |
| count = scorer.rescore_all_claims() |
| assert count > 0 |
| |
| |
| conn = get_db(TEST_DB) |
| scored = conn.execute( |
| "SELECT COUNT(*) FROM claims WHERE evidence_quality IS NOT NULL" |
| ).fetchone()[0] |
| conn.close() |
| assert scored > 0 |
| |
| finally: |
| os.unlink(temp_path) |
|
|
|
|
| if __name__ == "__main__": |
| pytest.main([__file__, "-v"]) |
|
|