File size: 8,955 Bytes
04607af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
"""
PhD Research OS — Taxonomy Tests
==================================
Tests for the Quantum-Bio V2 taxonomy, domain management, and confidence scoring.
"""

import os
import sys
import json
import pytest

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from phd_research_os.taxonomy import (
    TaxonomyManager, STUDY_TYPE_WEIGHTS, ALLOWED_STUDY_TYPES,
    TAXONOMY_VERSION, PIPELINE_VERSION, LEGACY_TO_V2_MAP
)
from phd_research_os.db import init_db, get_db, create_claim, create_source, to_fixed, from_fixed

TEST_DB = "test_taxonomy.db"


@pytest.fixture(autouse=True)
def setup_teardown():
    tm = TaxonomyManager(db_path=TEST_DB)
    # Seed test data
    conn = get_db(TEST_DB)
    create_claim(conn, "Test claim", "Fact", 0.85,
                 study_type="primary_experimental", evidence_strength=0.9)
    create_source(conn, "10.1234/test", "Test Paper", study_type="Simulation")
    conn.close()
    yield
    for suffix in ["", "-wal", "-shm"]:
        p = TEST_DB + suffix
        if os.path.exists(p):
            os.remove(p)


# ============================================================
# Base Taxonomy Tests
# ============================================================

def test_8_study_types_defined():
    assert len(ALLOWED_STUDY_TYPES) == 8

def test_weights_are_fixed_point():
    for st, weight in STUDY_TYPE_WEIGHTS.items():
        assert isinstance(weight, int), f"{st} weight should be int (fixed-point)"
        assert 0 <= weight <= 1000, f"{st} weight {weight} out of range"

def test_in_vivo_highest():
    assert STUDY_TYPE_WEIGHTS["in_vivo"] == 1000

def test_perspective_lowest():
    assert STUDY_TYPE_WEIGHTS["perspective"] == 200

def test_simulation_split():
    """First-principles > phenomenological."""
    assert STUDY_TYPE_WEIGHTS["first_principles_simulation"] > STUDY_TYPE_WEIGHTS["phenomenological_simulation"]


# ============================================================
# Normalization Tests
# ============================================================

def test_normalize_legacy_types():
    tm = TaxonomyManager(db_path=TEST_DB)
    assert tm.normalize_study_type("PrimaryExperimental") == "direct_physical_measurement"
    assert tm.normalize_study_type("InVitro") == "in_vitro"
    assert tm.normalize_study_type("Simulation") == "phenomenological_simulation"
    assert tm.normalize_study_type("Review") == "review"

def test_normalize_aliases():
    tm = TaxonomyManager(db_path=TEST_DB)
    assert tm.normalize_study_type("clinical_trial") == "in_vivo"
    assert tm.normalize_study_type("meta-analysis") == "review"
    assert tm.normalize_study_type("case_study") == "perspective"

def test_normalize_v2_identity():
    tm = TaxonomyManager(db_path=TEST_DB)
    for st in ALLOWED_STUDY_TYPES:
        assert tm.normalize_study_type(st) == st

def test_normalize_case_insensitive():
    tm = TaxonomyManager(db_path=TEST_DB)
    assert tm.normalize_study_type("IN_VIVO") == "in_vivo"
    assert tm.normalize_study_type("Mathematical_Proof") == "mathematical_proof"


# ============================================================
# Confidence Scoring Tests
# ============================================================

def test_confidence_max():
    tm = TaxonomyManager(db_path=TEST_DB)
    result = tm.score_confidence(1.0, "in_vivo", 1, True)
    assert result["confidence"] == 1.0

def test_confidence_formula():
    tm = TaxonomyManager(db_path=TEST_DB)
    result = tm.score_confidence(0.8, "in_vitro", 2, True)
    # 0.8 × 0.85 × 0.85 × 1.0 = 0.578
    assert 0.57 <= result["confidence"] <= 0.58

def test_confidence_incomplete_penalty():
    tm = TaxonomyManager(db_path=TEST_DB)
    complete = tm.score_confidence(0.9, "in_vivo", 1, True)
    incomplete = tm.score_confidence(0.9, "in_vivo", 1, False)
    assert incomplete["confidence"] < complete["confidence"]
    assert incomplete["completeness_penalty"] == 0.7

def test_confidence_taxonomy_version_tag():
    tm = TaxonomyManager(db_path=TEST_DB)
    result = tm.score_confidence(0.5, "review", 3, True)
    assert result["taxonomy_version"] == TAXONOMY_VERSION

def test_confidence_perspective_low():
    tm = TaxonomyManager(db_path=TEST_DB)
    result = tm.score_confidence(1.0, "perspective", 1, True)
    assert result["confidence"] == 0.2  # 1.0 × 0.2 × 1.0 × 1.0


# ============================================================
# Domain Management Tests
# ============================================================

def test_default_domains_seeded():
    tm = TaxonomyManager(db_path=TEST_DB)
    domains = tm.list_domains()
    domain_ids = [d["domain_id"] for d in domains]
    assert "quantum_bio" in domain_ids
    assert "biosensors" in domain_ids
    assert "materials_science" in domain_ids

def test_create_custom_domain():
    tm = TaxonomyManager(db_path=TEST_DB)
    tm.create_domain("my_field", "My Research Field", "Custom taxonomy")
    domain = tm.get_domain("my_field")
    assert domain is not None
    assert domain["name"] == "My Research Field"

def test_add_custom_study_type():
    tm = TaxonomyManager(db_path=TEST_DB)
    tm.create_domain("test_domain", "Test", "Test domain")
    tm.add_study_type("test_domain", "custom_assay", 0.75, "A custom assay type")
    
    domain = tm.get_domain("test_domain")
    assert "custom_assay" in domain["custom_study_types"]
    assert domain["custom_study_types"]["custom_assay"]["weight"] == 750

def test_custom_type_affects_scoring():
    tm = TaxonomyManager(db_path=TEST_DB)
    tm.create_domain("test_scoring", "Test", "Test")
    tm.add_study_type("test_scoring", "ultra_precise", 0.99, "Ultra-precise measurement")
    
    result = tm.score_confidence(1.0, "ultra_precise", 1, True, domain_id="test_scoring")
    assert 0.98 <= result["confidence"] <= 1.0

def test_remove_study_type():
    tm = TaxonomyManager(db_path=TEST_DB)
    tm.create_domain("rm_test", "Test", "Test")
    tm.add_study_type("rm_test", "temp_type", 0.5, "Temporary")
    assert tm.remove_study_type("rm_test", "temp_type")
    
    domain = tm.get_domain("rm_test")
    assert "temp_type" not in domain["custom_study_types"]

def test_cannot_delete_base_taxonomy():
    tm = TaxonomyManager(db_path=TEST_DB)
    assert not tm.delete_domain("quantum_bio")

def test_soft_delete_domain():
    tm = TaxonomyManager(db_path=TEST_DB)
    tm.create_domain("deletable", "Deletable", "Will be deleted")
    assert tm.delete_domain("deletable")
    
    active = tm.list_domains(active_only=True)
    assert not any(d["domain_id"] == "deletable" for d in active)
    
    all_domains = tm.list_domains(active_only=False)
    assert any(d["domain_id"] == "deletable" for d in all_domains)


# ============================================================
# Migration Tests
# ============================================================

def test_migration_idempotent():
    tm = TaxonomyManager(db_path=TEST_DB)
    result1 = tm.migrate_to_v2()
    result2 = tm.migrate_to_v2()
    assert result2["already_migrated"]

def test_migration_normalizes_types():
    tm = TaxonomyManager(db_path=TEST_DB)
    result = tm.migrate_to_v2()
    
    conn = get_db(TEST_DB)
    claims = conn.execute("SELECT study_type FROM claims WHERE study_type IS NOT NULL").fetchall()
    conn.close()
    
    for claim in claims:
        st = dict(claim).get("study_type", "")
        if st:
            assert st in ALLOWED_STUDY_TYPES or st in ["primary_experimental"], f"Unexpected type: {st}"

def test_rollback():
    tm = TaxonomyManager(db_path=TEST_DB)
    tm.migrate_to_v2()
    result = tm.rollback_to_v1()
    assert result["rows_reverted"] >= 0
    assert not result["errors"]


# ============================================================
# Audit Log Tests
# ============================================================

def test_audit_log_records_actions():
    tm = TaxonomyManager(db_path=TEST_DB)
    tm.create_domain("audit_test", "Audit Test", "Testing audit")
    
    log = tm.get_audit_log()
    assert len(log) >= 1
    actions = [e["action"] for e in log]
    assert "create_domain" in actions


# ============================================================
# Cache Tests
# ============================================================

def test_cache_key_versioned():
    tm = TaxonomyManager(db_path=TEST_DB)
    key1 = tm.generate_cache_key("abc123")
    key2 = tm.generate_cache_key("abc123")
    assert key1 == key2  # Deterministic

    key3 = tm.generate_cache_key("different_hash")
    assert key1 != key3

def test_cache_validation():
    tm = TaxonomyManager(db_path=TEST_DB)
    valid_entry = {"taxonomy_version": TAXONOMY_VERSION, "pipeline_version": PIPELINE_VERSION}
    assert tm.validate_cache_entry(valid_entry)
    
    stale_entry = {"taxonomy_version": "old_v1", "pipeline_version": "1.0.0"}
    assert not tm.validate_cache_entry(stale_entry)


if __name__ == "__main__":
    pytest.main([__file__, "-v"])