nkshirsa commited on
Commit
a4f0eec
Β·
verified Β·
1 Parent(s): b639058

Add Epistemic Velocity tracking + Confidence Decomposition Display (Layer 5 upgrades)

Browse files
phd_research_os_v2/layer5/velocity_and_decomposition.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Layer 5: Epistemic Velocity + Confidence Decomposition
3
+ =========================================================
4
+
5
+ Two capabilities:
6
+
7
+ 1. Epistemic Velocity Tracking:
8
+ For every canonical claim, track how confidence has changed over time.
9
+ Rising = being confirmed. Falling = being challenged. Volatile = contested.
10
+
11
+ Inspired by: CLAIRE + PaperQA2
12
+ Source: SYSTEM_INSPIRATIONS.md NF-1
13
+
14
+ 2. Confidence Decomposition Display:
15
+ Generate human-readable explanations of WHY a claim has a given score.
16
+ Template-based from the scoring formula's components. No extra AI calls.
17
+
18
+ Inspired by: CLUE (arxiv:2505.17855)
19
+ Source: SYSTEM_INSPIRATIONS.md NF-4, IN-7
20
+
21
+ No ML dependencies. Pure Python + SQLite.
22
+ """
23
+
24
+ import json
25
+ import logging
26
+ from typing import Optional
27
+ from datetime import datetime, timezone
28
+
29
+ from ..core.database import get_db, gen_id, now_iso, to_fixed, from_fixed
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # ══════════════════════════════════════════════════════════════════════
35
+ # PART 1: EPISTEMIC VELOCITY TRACKING
36
+ # ══════════════════════════════════════════════════════════════════════
37
+
38
+ class EpistemicVelocity:
39
+ """
40
+ Tracks how claim confidence changes over time.
41
+
42
+ For each canonical claim, computes:
43
+ - trend: rising / falling / stable
44
+ - stability: stable / volatile
45
+ - velocity: rate of change (confidence units per month)
46
+ """
47
+
48
+ def __init__(self, db_path: str = None):
49
+ self.db_path = db_path
50
+
51
+ def compute_velocity(self, canonical_id: str) -> dict:
52
+ """
53
+ Compute epistemic velocity for a canonical claim.
54
+
55
+ Returns:
56
+ {
57
+ "canonical_id": str,
58
+ "current_confidence": float,
59
+ "trend": "rising" | "falling" | "stable" | "insufficient_data",
60
+ "stability": "stable" | "volatile" | "unknown",
61
+ "velocity": float (confidence change per month),
62
+ "history": [{"date": ..., "confidence": ..., "source": ...}, ...],
63
+ "data_points": int,
64
+ "months_tracked": float,
65
+ }
66
+ """
67
+ conn = get_db(self.db_path)
68
+
69
+ # Get version history from canonical_claims
70
+ row = conn.execute(
71
+ "SELECT * FROM canonical_claims WHERE canonical_id = ?",
72
+ (canonical_id,)
73
+ ).fetchone()
74
+
75
+ if not row:
76
+ conn.close()
77
+ return {
78
+ "canonical_id": canonical_id,
79
+ "current_confidence": 0,
80
+ "trend": "insufficient_data",
81
+ "stability": "unknown",
82
+ "velocity": 0,
83
+ "history": [],
84
+ "data_points": 0,
85
+ "months_tracked": 0,
86
+ }
87
+
88
+ canon = dict(row)
89
+ version_history = json.loads(canon.get("version_history", "[]"))
90
+ current_confidence = from_fixed(canon.get("composite_confidence", 0))
91
+
92
+ if len(version_history) < 2:
93
+ conn.close()
94
+ return {
95
+ "canonical_id": canonical_id,
96
+ "current_confidence": current_confidence,
97
+ "trend": "insufficient_data",
98
+ "stability": "unknown",
99
+ "velocity": 0,
100
+ "history": version_history,
101
+ "data_points": len(version_history),
102
+ "months_tracked": 0,
103
+ }
104
+
105
+ conn.close()
106
+
107
+ # Extract time series
108
+ confidences = [from_fixed(v.get("confidence", 500)) for v in version_history]
109
+ dates = []
110
+ for v in version_history:
111
+ try:
112
+ d = datetime.fromisoformat(v.get("date", "2026-01-01"))
113
+ dates.append(d)
114
+ except:
115
+ dates.append(datetime(2026, 1, 1))
116
+
117
+ # Compute months span
118
+ if len(dates) >= 2:
119
+ span_days = (dates[-1] - dates[0]).days
120
+ months_tracked = max(span_days / 30.0, 0.1)
121
+ else:
122
+ months_tracked = 0.1
123
+
124
+ # Compute trend (linear slope)
125
+ if len(confidences) >= 2:
126
+ days_from_start = [(d - dates[0]).days for d in dates]
127
+ n = len(days_from_start)
128
+ mean_x = sum(days_from_start) / n
129
+ mean_y = sum(confidences) / n
130
+
131
+ numerator = sum((x - mean_x) * (y - mean_y)
132
+ for x, y in zip(days_from_start, confidences))
133
+ denominator = sum((x - mean_x) ** 2 for x in days_from_start)
134
+
135
+ if denominator > 0:
136
+ slope_per_day = numerator / denominator
137
+ slope_per_month = slope_per_day * 30
138
+ else:
139
+ slope_per_month = 0
140
+ else:
141
+ slope_per_month = 0
142
+
143
+ # Determine trend
144
+ if slope_per_month > 0.01:
145
+ trend = "rising"
146
+ elif slope_per_month < -0.01:
147
+ trend = "falling"
148
+ else:
149
+ trend = "stable"
150
+
151
+ # Compute stability (std dev of last 3 data points)
152
+ recent = confidences[-min(3, len(confidences)):]
153
+ if len(recent) >= 2:
154
+ mean_r = sum(recent) / len(recent)
155
+ variance = sum((x - mean_r) ** 2 for x in recent) / len(recent)
156
+ std_dev = variance ** 0.5
157
+ stability = "stable" if std_dev < 0.05 else "volatile"
158
+ else:
159
+ stability = "unknown"
160
+
161
+ return {
162
+ "canonical_id": canonical_id,
163
+ "current_confidence": current_confidence,
164
+ "trend": trend,
165
+ "stability": stability,
166
+ "velocity": round(slope_per_month, 4),
167
+ "history": version_history,
168
+ "data_points": len(version_history),
169
+ "months_tracked": round(months_tracked, 1),
170
+ }
171
+
172
+ def compute_all_velocities(self) -> list[dict]:
173
+ """Compute velocity for all canonical claims."""
174
+ conn = get_db(self.db_path)
175
+ rows = conn.execute("SELECT canonical_id FROM canonical_claims").fetchall()
176
+ conn.close()
177
+
178
+ results = []
179
+ for row in rows:
180
+ velocity = self.compute_velocity(dict(row)["canonical_id"])
181
+ results.append(velocity)
182
+
183
+ return results
184
+
185
+ def get_trending(self, direction: str = "rising", limit: int = 20) -> list[dict]:
186
+ """Get claims trending in a specific direction."""
187
+ all_velocities = self.compute_all_velocities()
188
+
189
+ filtered = [v for v in all_velocities if v["trend"] == direction]
190
+
191
+ # Sort by absolute velocity (strongest trend first)
192
+ filtered.sort(key=lambda v: abs(v["velocity"]), reverse=True)
193
+
194
+ return filtered[:limit]
195
+
196
+ def get_volatile(self, limit: int = 20) -> list[dict]:
197
+ """Get the most volatile claims (actively contested)."""
198
+ all_velocities = self.compute_all_velocities()
199
+
200
+ volatile = [v for v in all_velocities if v["stability"] == "volatile"]
201
+ volatile.sort(key=lambda v: abs(v["velocity"]), reverse=True)
202
+
203
+ return volatile[:limit]
204
+
205
+
206
+ # ══════════════════════════════════════════════════════════════════════
207
+ # PART 2: CONFIDENCE DECOMPOSITION DISPLAY
208
+ # ══════════════════════════════════════════════════════════════════════
209
+
210
+ # Human-readable names for score components
211
+ COMPONENT_NAMES = {
212
+ "evidence_strength": "AI evidence assessment",
213
+ "study_quality_weight": "study type quality",
214
+ "journal_tier_weight": "journal tier",
215
+ "completeness_penalty": "data completeness",
216
+ "section_modifier": "section reliability",
217
+ "parse_confidence": "parser quality",
218
+ }
219
+
220
+ SECTION_NAMES = {
221
+ "abstract": "Abstract (0.7Γ— β€” often overstates results)",
222
+ "introduction": "Introduction (0.8Γ—)",
223
+ "methods": "Methods (1.0Γ—)",
224
+ "results": "Results (1.0Γ— β€” primary evidence)",
225
+ "results_discussion": "Results & Discussion (0.9Γ—)",
226
+ "discussion": "Discussion (0.75Γ— β€” goes beyond data)",
227
+ "conclusion": "Conclusion (0.8Γ—)",
228
+ "supplement": "Supplement (1.0Γ— β€” same weight as results)",
229
+ }
230
+
231
+ STUDY_TYPE_NAMES = {
232
+ "in_vivo": "in vivo experiment (highest weight)",
233
+ "direct_physical_measurement": "direct measurement (highest weight)",
234
+ "mathematical_proof": "mathematical proof (0.95Γ—)",
235
+ "in_vitro": "in vitro experiment (0.85Γ—)",
236
+ "first_principles_simulation": "first-principles simulation (0.80Γ—)",
237
+ "phenomenological_simulation": "phenomenological model (0.60Γ—)",
238
+ "review": "literature review (0.40Γ—)",
239
+ "perspective": "perspective/opinion (0.20Γ—)",
240
+ }
241
+
242
+
243
+ def decompose_confidence(claim: dict, source: dict = None) -> dict:
244
+ """
245
+ Generate a human-readable confidence decomposition for a claim.
246
+
247
+ Template-based β€” no AI calls. Reads the scoring components and
248
+ generates plain-English explanations.
249
+
250
+ Args:
251
+ claim: Claim dict from the database
252
+ source: Source/paper dict (optional, for study type and journal tier)
253
+
254
+ Returns:
255
+ {
256
+ "composite_confidence": float,
257
+ "scores": {
258
+ "evidence_quality": {"value": float, "bar": "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘", "explanation": str},
259
+ "truth_likelihood": {"value": float, "bar": "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘", "explanation": str},
260
+ "qualifier_strength": {"value": float, "bar": "β–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘οΏ½οΏ½οΏ½β–‘β–‘", "explanation": str},
261
+ },
262
+ "headline": "Strong evidence, but one contradicting study and hedged language",
263
+ "warnings": ["Abstract claim forced to Interpretation", ...],
264
+ "action_items": ["Review conflict with Kim 2024", ...],
265
+ }
266
+ """
267
+ # Extract components
268
+ ev_quality = from_fixed(claim.get("evidence_quality", 0))
269
+ truth_like = from_fixed(claim.get("truth_likelihood", 0))
270
+ qual_strength = from_fixed(claim.get("qualifier_strength_score", 0))
271
+ composite = from_fixed(claim.get("composite_confidence", 0))
272
+
273
+ section = claim.get("source_section", "unknown")
274
+ qualifiers = claim.get("qualifiers", [])
275
+ if isinstance(qualifiers, str):
276
+ qualifiers = json.loads(qualifiers)
277
+ missing = claim.get("missing_fields", [])
278
+ if isinstance(missing, str):
279
+ missing = json.loads(missing)
280
+ is_null = claim.get("is_null_result", False)
281
+ is_inherited = claim.get("is_inherited_citation", False)
282
+ practical_sig = claim.get("practical_significance", True)
283
+
284
+ ev_strength = from_fixed(claim.get("evidence_strength", 0))
285
+
286
+ # Study type info
287
+ study_type = source.get("study_type", "unknown") if source else "unknown"
288
+ journal_tier = source.get("journal_tier", 2) if source else 2
289
+
290
+ # Build bar visualizations (10 chars)
291
+ def bar(value, max_val=1.0):
292
+ filled = int((value / max_val) * 10)
293
+ return "β–ˆ" * filled + "β–‘" * (10 - filled)
294
+
295
+ # Evidence quality explanation
296
+ ev_parts = []
297
+ if ev_strength > 0:
298
+ ev_parts.append(f"AI rated evidence at {ev_strength:.2f}")
299
+ if study_type in STUDY_TYPE_NAMES:
300
+ ev_parts.append(f"study type: {STUDY_TYPE_NAMES[study_type]}")
301
+ ev_parts.append(f"journal tier {journal_tier}")
302
+ if section in SECTION_NAMES:
303
+ ev_parts.append(f"from {SECTION_NAMES[section]}")
304
+ if missing:
305
+ ev_parts.append(f"incomplete ({len(missing)} fields missing)")
306
+ ev_explanation = "; ".join(ev_parts) if ev_parts else "No component data available"
307
+
308
+ # Truth likelihood explanation
309
+ truth_parts = []
310
+ truth_parts.append(f"based on evidence quality of {ev_quality:.2f}")
311
+ if is_null:
312
+ truth_parts.append("null result (capped at 0.50)")
313
+ if is_inherited:
314
+ truth_parts.append("inherited citation (-0.20 penalty)")
315
+ if not practical_sig:
316
+ truth_parts.append("⚠️ large sample + tiny effect β†’ capped at 0.40")
317
+ truth_explanation = "; ".join(truth_parts)
318
+
319
+ # Qualifier strength explanation
320
+ qual_parts = []
321
+ if qualifiers:
322
+ qual_parts.append(f"{len(qualifiers)} qualifier(s): {', '.join(qualifiers[:5])}")
323
+ qual_parts.append(f"-{len(qualifiers) * 0.1:.1f} penalty applied")
324
+ else:
325
+ qual_parts.append("no hedging language detected (full weight)")
326
+ if is_null:
327
+ qual_parts.append("null result cap (max 0.50)")
328
+ if is_inherited:
329
+ qual_parts.append("inherited citation (-0.20)")
330
+ qual_explanation = "; ".join(qual_parts)
331
+
332
+ # Warnings
333
+ warnings = []
334
+ if section == "abstract":
335
+ warnings.append("Abstract claim β€” forced to Interpretation with 0.7Γ— penalty")
336
+ if not practical_sig:
337
+ warnings.append("Statistically significant but practically meaningless (large N, tiny effect)")
338
+ if is_null:
339
+ warnings.append("This is a null/negative result")
340
+ if is_inherited:
341
+ warnings.append("This finding is cited from another paper, not original to this one")
342
+ if missing:
343
+ warnings.append(f"Missing fields: {', '.join(missing)}")
344
+
345
+ parse_conf = from_fixed(claim.get("parse_confidence", 1000) if isinstance(claim.get("parse_confidence"), int) else 1000)
346
+ if parse_conf < 0.8:
347
+ warnings.append(f"Parser confidence only {parse_conf:.2f} β€” source text may be garbled")
348
+
349
+ # Headline
350
+ if composite > 0.8:
351
+ headline = "Strong confidence β€” well-supported claim"
352
+ elif composite > 0.6:
353
+ parts = []
354
+ if ev_quality > 0.7:
355
+ parts.append("good evidence")
356
+ if truth_like < 0.6:
357
+ parts.append("but truth likelihood reduced")
358
+ if qual_strength < 0.6:
359
+ parts.append("hedged language")
360
+ headline = ", ".join(parts) if parts else "Moderate confidence"
361
+ elif composite > 0.3:
362
+ headline = "Low confidence β€” review recommended"
363
+ else:
364
+ headline = "Very low confidence β€” quarantine candidate"
365
+
366
+ # Action items
367
+ action_items = []
368
+ if ev_quality < 0.5:
369
+ action_items.append("Find additional supporting evidence")
370
+ if qualifiers:
371
+ action_items.append("Verify qualifier scope β€” are conditions met?")
372
+ if is_inherited:
373
+ action_items.append("Trace to original source paper and verify")
374
+ if parse_conf < 0.8:
375
+ action_items.append("Check original PDF β€” parser may have misread this region")
376
+
377
+ return {
378
+ "composite_confidence": round(composite, 3),
379
+ "scores": {
380
+ "evidence_quality": {
381
+ "value": round(ev_quality, 3),
382
+ "bar": bar(ev_quality),
383
+ "explanation": ev_explanation,
384
+ },
385
+ "truth_likelihood": {
386
+ "value": round(truth_like, 3),
387
+ "bar": bar(truth_like),
388
+ "explanation": truth_explanation,
389
+ },
390
+ "qualifier_strength": {
391
+ "value": round(qual_strength, 3),
392
+ "bar": bar(qual_strength),
393
+ "explanation": qual_explanation,
394
+ },
395
+ },
396
+ "headline": headline,
397
+ "warnings": warnings,
398
+ "action_items": action_items,
399
+ }
400
+
401
+
402
+ def format_decomposition_text(decomposition: dict) -> str:
403
+ """
404
+ Format a decomposition dict as human-readable text.
405
+ Suitable for terminal output, Obsidian export, or Gradio display.
406
+ """
407
+ d = decomposition
408
+ lines = []
409
+
410
+ lines.append(f"Composite Confidence: {d['composite_confidence']:.3f}")
411
+ lines.append(f" β†’ {d['headline']}")
412
+ lines.append("")
413
+
414
+ for score_name, score_data in d["scores"].items():
415
+ display_name = score_name.replace("_", " ").title()
416
+ lines.append(f" {display_name:25s} {score_data['value']:.3f} {score_data['bar']}")
417
+ lines.append(f" ({score_data['explanation']})")
418
+
419
+ if d["warnings"]:
420
+ lines.append("")
421
+ lines.append(" ⚠️ Warnings:")
422
+ for w in d["warnings"]:
423
+ lines.append(f" β€’ {w}")
424
+
425
+ if d["action_items"]:
426
+ lines.append("")
427
+ lines.append(" πŸ“‹ Action Items:")
428
+ for a in d["action_items"]:
429
+ lines.append(f" β€’ {a}")
430
+
431
+ return "\n".join(lines)
432
+
433
+
434
+ def format_decomposition_markdown(decomposition: dict) -> str:
435
+ """Format for Obsidian/Markdown export."""
436
+ d = decomposition
437
+ lines = []
438
+
439
+ lines.append(f"**Confidence: {d['composite_confidence']:.3f}** β€” {d['headline']}")
440
+ lines.append("")
441
+ lines.append("| Score | Value | Visual |")
442
+ lines.append("|-------|-------|--------|")
443
+
444
+ for score_name, score_data in d["scores"].items():
445
+ display_name = score_name.replace("_", " ").title()
446
+ lines.append(f"| {display_name} | {score_data['value']:.3f} | `{score_data['bar']}` |")
447
+
448
+ lines.append("")
449
+
450
+ for score_name, score_data in d["scores"].items():
451
+ display_name = score_name.replace("_", " ").title()
452
+ lines.append(f"- **{display_name}**: {score_data['explanation']}")
453
+
454
+ if d["warnings"]:
455
+ lines.append("")
456
+ lines.append("> [!warning] Warnings")
457
+ for w in d["warnings"]:
458
+ lines.append(f"> - {w}")
459
+
460
+ if d["action_items"]:
461
+ lines.append("")
462
+ lines.append("**Action Items:**")
463
+ for a in d["action_items"]:
464
+ lines.append(f"- [ ] {a}")
465
+
466
+ return "\n".join(lines)