nkshirsa commited on
Commit
dd48c30
Β·
verified Β·
1 Parent(s): e6104bc

v2.0: phd_research_os_v2/layer5/scorer.py

Browse files
Files changed (1) hide show
  1. phd_research_os_v2/layer5/scorer.py +186 -0
phd_research_os_v2/layer5/scorer.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Layer 5: Code-Computed Calibrated Scoring
3
+ ===========================================
4
+ The LLM provides COMPONENTS. The CODE computes FINAL SCORES.
5
+ Three separate scores: evidence_quality, truth_likelihood, qualifier_strength.
6
+ """
7
+
8
+ import json
9
+ from ..core.database import get_db, to_fixed, from_fixed, now_iso
10
+
11
+ # Study quality weights (Quantum-Bio V2 taxonomy)
12
+ STUDY_QUALITY_WEIGHTS = {
13
+ "in_vivo": 1000,
14
+ "direct_physical_measurement": 1000,
15
+ "mathematical_proof": 950,
16
+ "in_vitro": 850,
17
+ "first_principles_simulation": 800,
18
+ "phenomenological_simulation": 600,
19
+ "review": 400,
20
+ "perspective": 200,
21
+ # Legacy mappings
22
+ "primary_experimental": 1000,
23
+ "simulation": 600,
24
+ "review_non_systematic": 400,
25
+ "meta_analysis": 1000,
26
+ "case_study": 300,
27
+ }
28
+
29
+ JOURNAL_TIER_WEIGHTS = {1: 1000, 2: 850, 3: 700, 0: 500} # 0 = preprint
30
+
31
+ SECTION_MODIFIERS = {
32
+ "abstract": 700,
33
+ "introduction": 800,
34
+ "methods": 1000,
35
+ "results": 1000,
36
+ "results_discussion": 900,
37
+ "discussion": 750,
38
+ "conclusion": 800,
39
+ "supplement": 1000,
40
+ "unknown": 850,
41
+ None: 850,
42
+ }
43
+
44
+
45
+ class CalibratedScorer:
46
+ """
47
+ Code-computed scoring engine.
48
+
49
+ The LLM NEVER sets final confidence directly.
50
+ This module computes all scores from components.
51
+ """
52
+
53
+ def __init__(self, db_path: str = None):
54
+ self.db_path = db_path
55
+
56
+ def score_claim(self, claim: dict, source: dict = None) -> dict:
57
+ """
58
+ Compute three separate scores for a claim.
59
+ All arithmetic uses fixed-point integers (Γ—1000).
60
+ """
61
+ # ── Components ──
62
+ evidence_strength = claim.get("evidence_strength", 500)
63
+ study_type = source.get("study_type", "unknown") if source else "unknown"
64
+ journal_tier = source.get("journal_tier", 2) if source else 2
65
+ section = claim.get("source_section", "unknown")
66
+ missing_fields = claim.get("missing_fields", [])
67
+ if isinstance(missing_fields, str):
68
+ missing_fields = json.loads(missing_fields)
69
+ qualifiers = claim.get("qualifiers", [])
70
+ if isinstance(qualifiers, str):
71
+ qualifiers = json.loads(qualifiers)
72
+ parse_confidence = claim.get("parse_confidence", 1000)
73
+ is_null = claim.get("is_null_result", False)
74
+ is_inherited = claim.get("is_inherited_citation", False)
75
+
76
+ # ── Score 1: Evidence Quality ──
77
+ sq_weight = STUDY_QUALITY_WEIGHTS.get(study_type, 600)
78
+ jt_weight = JOURNAL_TIER_WEIGHTS.get(journal_tier, 700)
79
+ completeness = 700 if missing_fields else 1000
80
+ section_mod = SECTION_MODIFIERS.get(section, 850)
81
+
82
+ evidence_quality = (evidence_strength * sq_weight // 1000
83
+ * jt_weight // 1000
84
+ * completeness // 1000
85
+ * section_mod // 1000)
86
+
87
+ # Parser confidence CAPS evidence quality
88
+ evidence_quality = min(evidence_quality, parse_confidence)
89
+
90
+ # Statistical evidence gate
91
+ practical_sig = True
92
+ effect_size = claim.get("stat_effect_size")
93
+ sample_size = claim.get("stat_sample_size")
94
+ if effect_size is not None and sample_size is not None:
95
+ if sample_size > 1000 and abs(effect_size) < 0.1:
96
+ evidence_quality = min(evidence_quality, 400)
97
+ practical_sig = False
98
+
99
+ # ── Score 2: Truth Likelihood ──
100
+ # Start from evidence quality, adjust for corroboration and conflicts
101
+ truth_likelihood = evidence_quality
102
+
103
+ # Null result penalty
104
+ if is_null:
105
+ truth_likelihood = min(truth_likelihood, 500)
106
+
107
+ # Inherited citation penalty
108
+ if is_inherited:
109
+ truth_likelihood -= 200
110
+
111
+ truth_likelihood = max(0, min(1000, truth_likelihood))
112
+
113
+ # ── Score 3: Qualifier Strength ──
114
+ qualifier_strength = 1000
115
+ if qualifiers:
116
+ qualifier_strength -= len(qualifiers) * 100
117
+ if is_null:
118
+ qualifier_strength = min(qualifier_strength, 500)
119
+ if is_inherited:
120
+ qualifier_strength -= 200
121
+ qualifier_strength = max(0, min(1000, qualifier_strength))
122
+
123
+ # ── Composite ──
124
+ composite = (evidence_quality + truth_likelihood + qualifier_strength) // 3
125
+
126
+ return {
127
+ "evidence_quality": evidence_quality,
128
+ "truth_likelihood": truth_likelihood,
129
+ "qualifier_strength_score": qualifier_strength,
130
+ "composite_confidence": composite,
131
+ "practical_significance": practical_sig,
132
+ "components": {
133
+ "evidence_strength": evidence_strength,
134
+ "study_quality_weight": sq_weight,
135
+ "journal_tier_weight": jt_weight,
136
+ "completeness_penalty": completeness,
137
+ "section_modifier": section_mod,
138
+ "parse_confidence": parse_confidence,
139
+ }
140
+ }
141
+
142
+ def rescore_all_claims(self) -> int:
143
+ """Rescore all claims in the database. Returns number rescored."""
144
+ conn = get_db(self.db_path)
145
+ claims = conn.execute("SELECT * FROM claims").fetchall()
146
+ count = 0
147
+
148
+ for row in claims:
149
+ claim = dict(row)
150
+ claim["missing_fields"] = json.loads(claim.get("missing_fields", "[]"))
151
+ claim["qualifiers"] = json.loads(claim.get("qualifiers", "[]"))
152
+
153
+ # Get source info
154
+ source = None
155
+ if claim.get("source_doi"):
156
+ src_row = conn.execute(
157
+ "SELECT * FROM sources WHERE doi = ?", (claim["source_doi"],)
158
+ ).fetchone()
159
+ if src_row:
160
+ source = dict(src_row)
161
+
162
+ scores = self.score_claim(claim, source)
163
+
164
+ conn.execute("""
165
+ UPDATE claims SET
166
+ evidence_quality = ?,
167
+ truth_likelihood = ?,
168
+ qualifier_strength_score = ?,
169
+ composite_confidence = ?,
170
+ practical_significance = ?,
171
+ updated_at = ?
172
+ WHERE claim_id = ?
173
+ """, (
174
+ scores["evidence_quality"],
175
+ scores["truth_likelihood"],
176
+ scores["qualifier_strength_score"],
177
+ scores["composite_confidence"],
178
+ int(scores["practical_significance"]),
179
+ now_iso(),
180
+ claim["claim_id"],
181
+ ))
182
+ count += 1
183
+
184
+ conn.commit()
185
+ conn.close()
186
+ return count