Add Epistemic Velocity tracking + Confidence Decomposition Display (Layer 5 upgrades)

Browse files

Files changed (1) hide show

phd_research_os_v2/layer5/velocity_and_decomposition.py +466 -0

phd_research_os_v2/layer5/velocity_and_decomposition.py ADDED Viewed

	@@ -0,0 +1,466 @@

+"""
+Layer 5: Epistemic Velocity + Confidence Decomposition
+=========================================================
+Two capabilities:
+1. Epistemic Velocity Tracking:
+   For every canonical claim, track how confidence has changed over time.
+   Rising = being confirmed. Falling = being challenged. Volatile = contested.
+   Inspired by: CLAIRE + PaperQA2
+   Source: SYSTEM_INSPIRATIONS.md NF-1
+2. Confidence Decomposition Display:
+   Generate human-readable explanations of WHY a claim has a given score.
+   Template-based from the scoring formula's components. No extra AI calls.
+   Inspired by: CLUE (arxiv:2505.17855)
+   Source: SYSTEM_INSPIRATIONS.md NF-4, IN-7
+No ML dependencies. Pure Python + SQLite.
+"""
+import json
+import logging
+from typing import Optional
+from datetime import datetime, timezone
+from ..core.database import get_db, gen_id, now_iso, to_fixed, from_fixed
+logger = logging.getLogger(__name__)
+# ══════════════════════════════════════════════════════════════════════
+# PART 1: EPISTEMIC VELOCITY TRACKING
+# ══════════════════════════════════════════════════════════════════════
+class EpistemicVelocity:
+    """
+    Tracks how claim confidence changes over time.
+    For each canonical claim, computes:
+    - trend: rising / falling / stable
+    - stability: stable / volatile
+    - velocity: rate of change (confidence units per month)
+    """
+    def __init__(self, db_path: str = None):
+        self.db_path = db_path
+    def compute_velocity(self, canonical_id: str) -> dict:
+        """
+        Compute epistemic velocity for a canonical claim.
+        Returns:
+            {
+                "canonical_id": str,
+                "current_confidence": float,
+                "trend": "rising" | "falling" | "stable" | "insufficient_data",
+                "stability": "stable" | "volatile" | "unknown",
+                "velocity": float (confidence change per month),
+                "history": [{"date": ..., "confidence": ..., "source": ...}, ...],
+                "data_points": int,
+                "months_tracked": float,
+            }
+        """
+        conn = get_db(self.db_path)
+        # Get version history from canonical_claims
+        row = conn.execute(
+            "SELECT * FROM canonical_claims WHERE canonical_id = ?",
+            (canonical_id,)
+        ).fetchone()
+        if not row:
+            conn.close()
+            return {
+                "canonical_id": canonical_id,
+                "current_confidence": 0,
+                "trend": "insufficient_data",
+                "stability": "unknown",
+                "velocity": 0,
+                "history": [],
+                "data_points": 0,
+                "months_tracked": 0,
+            }
+        canon = dict(row)
+        version_history = json.loads(canon.get("version_history", "[]"))
+        current_confidence = from_fixed(canon.get("composite_confidence", 0))
+        if len(version_history) < 2:
+            conn.close()
+            return {
+                "canonical_id": canonical_id,
+                "current_confidence": current_confidence,
+                "trend": "insufficient_data",
+                "stability": "unknown",
+                "velocity": 0,
+                "history": version_history,
+                "data_points": len(version_history),
+                "months_tracked": 0,
+            }
+        conn.close()
+        # Extract time series
+        confidences = [from_fixed(v.get("confidence", 500)) for v in version_history]
+        dates = []
+        for v in version_history:
+            try:
+                d = datetime.fromisoformat(v.get("date", "2026-01-01"))
+                dates.append(d)
+            except:
+                dates.append(datetime(2026, 1, 1))
+        # Compute months span
+        if len(dates) >= 2:
+            span_days = (dates[-1] - dates[0]).days
+            months_tracked = max(span_days / 30.0, 0.1)
+        else:
+            months_tracked = 0.1
+        # Compute trend (linear slope)
+        if len(confidences) >= 2:
+            days_from_start = [(d - dates[0]).days for d in dates]
+            n = len(days_from_start)
+            mean_x = sum(days_from_start) / n
+            mean_y = sum(confidences) / n
+            numerator = sum((x - mean_x) * (y - mean_y)
+                          for x, y in zip(days_from_start, confidences))
+            denominator = sum((x - mean_x) ** 2 for x in days_from_start)
+            if denominator > 0:
+                slope_per_day = numerator / denominator
+                slope_per_month = slope_per_day * 30
+            else:
+                slope_per_month = 0
+        else:
+            slope_per_month = 0
+        # Determine trend
+        if slope_per_month > 0.01:
+            trend = "rising"
+        elif slope_per_month < -0.01:
+            trend = "falling"
+        else:
+            trend = "stable"
+        # Compute stability (std dev of last 3 data points)
+        recent = confidences[-min(3, len(confidences)):]
+        if len(recent) >= 2:
+            mean_r = sum(recent) / len(recent)
+            variance = sum((x - mean_r) ** 2 for x in recent) / len(recent)
+            std_dev = variance ** 0.5
+            stability = "stable" if std_dev < 0.05 else "volatile"
+        else:
+            stability = "unknown"
+        return {
+            "canonical_id": canonical_id,
+            "current_confidence": current_confidence,
+            "trend": trend,
+            "stability": stability,
+            "velocity": round(slope_per_month, 4),
+            "history": version_history,
+            "data_points": len(version_history),
+            "months_tracked": round(months_tracked, 1),
+        }
+    def compute_all_velocities(self) -> list[dict]:
+        """Compute velocity for all canonical claims."""
+        conn = get_db(self.db_path)
+        rows = conn.execute("SELECT canonical_id FROM canonical_claims").fetchall()
+        conn.close()
+        results = []
+        for row in rows:
+            velocity = self.compute_velocity(dict(row)["canonical_id"])
+            results.append(velocity)
+        return results
+    def get_trending(self, direction: str = "rising", limit: int = 20) -> list[dict]:
+        """Get claims trending in a specific direction."""
+        all_velocities = self.compute_all_velocities()
+        filtered = [v for v in all_velocities if v["trend"] == direction]
+        # Sort by absolute velocity (strongest trend first)
+        filtered.sort(key=lambda v: abs(v["velocity"]), reverse=True)
+        return filtered[:limit]
+    def get_volatile(self, limit: int = 20) -> list[dict]:
+        """Get the most volatile claims (actively contested)."""
+        all_velocities = self.compute_all_velocities()
+        volatile = [v for v in all_velocities if v["stability"] == "volatile"]
+        volatile.sort(key=lambda v: abs(v["velocity"]), reverse=True)
+        return volatile[:limit]
+# ══════════════════════════════════════════════════════════════════════
+# PART 2: CONFIDENCE DECOMPOSITION DISPLAY
+# ══════════════════════════════════════════════════════════════════════
+# Human-readable names for score components
+COMPONENT_NAMES = {
+    "evidence_strength": "AI evidence assessment",
+    "study_quality_weight": "study type quality",
+    "journal_tier_weight": "journal tier",
+    "completeness_penalty": "data completeness",
+    "section_modifier": "section reliability",
+    "parse_confidence": "parser quality",
+}
+SECTION_NAMES = {
+    "abstract": "Abstract (0.7× — often overstates results)",
+    "introduction": "Introduction (0.8×)",
+    "methods": "Methods (1.0×)",
+    "results": "Results (1.0× — primary evidence)",
+    "results_discussion": "Results & Discussion (0.9×)",
+    "discussion": "Discussion (0.75× — goes beyond data)",
+    "conclusion": "Conclusion (0.8×)",
+    "supplement": "Supplement (1.0× — same weight as results)",
+}
+STUDY_TYPE_NAMES = {
+    "in_vivo": "in vivo experiment (highest weight)",
+    "direct_physical_measurement": "direct measurement (highest weight)",
+    "mathematical_proof": "mathematical proof (0.95×)",
+    "in_vitro": "in vitro experiment (0.85×)",
+    "first_principles_simulation": "first-principles simulation (0.80×)",
+    "phenomenological_simulation": "phenomenological model (0.60×)",
+    "review": "literature review (0.40×)",
+    "perspective": "perspective/opinion (0.20×)",
+}
+def decompose_confidence(claim: dict, source: dict = None) -> dict:
+    """
+    Generate a human-readable confidence decomposition for a claim.
+    Template-based — no AI calls. Reads the scoring components and
+    generates plain-English explanations.
+    Args:
+        claim: Claim dict from the database
+        source: Source/paper dict (optional, for study type and journal tier)
+    Returns:
+        {
+            "composite_confidence": float,
+            "scores": {
+                "evidence_quality": {"value": float, "bar": "████████░░", "explanation": str},
+                "truth_likelihood": {"value": float, "bar": "██████░░░░", "explanation": str},
+                "qualifier_strength": {"value": float, "bar": "████░░░���░░", "explanation": str},
+            },
+            "headline": "Strong evidence, but one contradicting study and hedged language",
+            "warnings": ["Abstract claim forced to Interpretation", ...],
+            "action_items": ["Review conflict with Kim 2024", ...],
+        }
+    """
+    # Extract components
+    ev_quality = from_fixed(claim.get("evidence_quality", 0))
+    truth_like = from_fixed(claim.get("truth_likelihood", 0))
+    qual_strength = from_fixed(claim.get("qualifier_strength_score", 0))
+    composite = from_fixed(claim.get("composite_confidence", 0))
+    section = claim.get("source_section", "unknown")
+    qualifiers = claim.get("qualifiers", [])
+    if isinstance(qualifiers, str):
+        qualifiers = json.loads(qualifiers)
+    missing = claim.get("missing_fields", [])
+    if isinstance(missing, str):
+        missing = json.loads(missing)
+    is_null = claim.get("is_null_result", False)
+    is_inherited = claim.get("is_inherited_citation", False)
+    practical_sig = claim.get("practical_significance", True)
+    ev_strength = from_fixed(claim.get("evidence_strength", 0))
+    # Study type info
+    study_type = source.get("study_type", "unknown") if source else "unknown"
+    journal_tier = source.get("journal_tier", 2) if source else 2
+    # Build bar visualizations (10 chars)
+    def bar(value, max_val=1.0):
+        filled = int((value / max_val) * 10)
+        return "█" * filled + "░" * (10 - filled)
+    # Evidence quality explanation
+    ev_parts = []
+    if ev_strength > 0:
+        ev_parts.append(f"AI rated evidence at {ev_strength:.2f}")
+    if study_type in STUDY_TYPE_NAMES:
+        ev_parts.append(f"study type: {STUDY_TYPE_NAMES[study_type]}")
+    ev_parts.append(f"journal tier {journal_tier}")
+    if section in SECTION_NAMES:
+        ev_parts.append(f"from {SECTION_NAMES[section]}")
+    if missing:
+        ev_parts.append(f"incomplete ({len(missing)} fields missing)")
+    ev_explanation = "; ".join(ev_parts) if ev_parts else "No component data available"
+    # Truth likelihood explanation
+    truth_parts = []
+    truth_parts.append(f"based on evidence quality of {ev_quality:.2f}")
+    if is_null:
+        truth_parts.append("null result (capped at 0.50)")
+    if is_inherited:
+        truth_parts.append("inherited citation (-0.20 penalty)")
+    if not practical_sig:
+        truth_parts.append("⚠️ large sample + tiny effect → capped at 0.40")
+    truth_explanation = "; ".join(truth_parts)
+    # Qualifier strength explanation
+    qual_parts = []
+    if qualifiers:
+        qual_parts.append(f"{len(qualifiers)} qualifier(s): {', '.join(qualifiers[:5])}")
+        qual_parts.append(f"-{len(qualifiers) * 0.1:.1f} penalty applied")
+    else:
+        qual_parts.append("no hedging language detected (full weight)")
+    if is_null:
+        qual_parts.append("null result cap (max 0.50)")
+    if is_inherited:
+        qual_parts.append("inherited citation (-0.20)")
+    qual_explanation = "; ".join(qual_parts)
+    # Warnings
+    warnings = []
+    if section == "abstract":
+        warnings.append("Abstract claim — forced to Interpretation with 0.7× penalty")
+    if not practical_sig:
+        warnings.append("Statistically significant but practically meaningless (large N, tiny effect)")
+    if is_null:
+        warnings.append("This is a null/negative result")
+    if is_inherited:
+        warnings.append("This finding is cited from another paper, not original to this one")
+    if missing:
+        warnings.append(f"Missing fields: {', '.join(missing)}")
+    parse_conf = from_fixed(claim.get("parse_confidence", 1000) if isinstance(claim.get("parse_confidence"), int) else 1000)
+    if parse_conf < 0.8:
+        warnings.append(f"Parser confidence only {parse_conf:.2f} — source text may be garbled")
+    # Headline
+    if composite > 0.8:
+        headline = "Strong confidence — well-supported claim"
+    elif composite > 0.6:
+        parts = []
+        if ev_quality > 0.7:
+            parts.append("good evidence")
+        if truth_like < 0.6:
+            parts.append("but truth likelihood reduced")
+        if qual_strength < 0.6:
+            parts.append("hedged language")
+        headline = ", ".join(parts) if parts else "Moderate confidence"
+    elif composite > 0.3:
+        headline = "Low confidence — review recommended"
+    else:
+        headline = "Very low confidence — quarantine candidate"
+    # Action items
+    action_items = []
+    if ev_quality < 0.5:
+        action_items.append("Find additional supporting evidence")
+    if qualifiers:
+        action_items.append("Verify qualifier scope — are conditions met?")
+    if is_inherited:
+        action_items.append("Trace to original source paper and verify")
+    if parse_conf < 0.8:
+        action_items.append("Check original PDF — parser may have misread this region")
+    return {
+        "composite_confidence": round(composite, 3),
+        "scores": {
+            "evidence_quality": {
+                "value": round(ev_quality, 3),
+                "bar": bar(ev_quality),
+                "explanation": ev_explanation,
+            },
+            "truth_likelihood": {
+                "value": round(truth_like, 3),
+                "bar": bar(truth_like),
+                "explanation": truth_explanation,
+            },
+            "qualifier_strength": {
+                "value": round(qual_strength, 3),
+                "bar": bar(qual_strength),
+                "explanation": qual_explanation,
+            },
+        },
+        "headline": headline,
+        "warnings": warnings,
+        "action_items": action_items,
+    }
+def format_decomposition_text(decomposition: dict) -> str:
+    """
+    Format a decomposition dict as human-readable text.
+    Suitable for terminal output, Obsidian export, or Gradio display.
+    """
+    d = decomposition
+    lines = []
+    lines.append(f"Composite Confidence: {d['composite_confidence']:.3f}")
+    lines.append(f"  → {d['headline']}")
+    lines.append("")
+    for score_name, score_data in d["scores"].items():
+        display_name = score_name.replace("_", " ").title()
+        lines.append(f"  {display_name:25s} {score_data['value']:.3f} {score_data['bar']}")
+        lines.append(f"    ({score_data['explanation']})")
+    if d["warnings"]:
+        lines.append("")
+        lines.append("  ⚠️ Warnings:")
+        for w in d["warnings"]:
+            lines.append(f"    • {w}")
+    if d["action_items"]:
+        lines.append("")
+        lines.append("  📋 Action Items:")
+        for a in d["action_items"]:
+            lines.append(f"    • {a}")
+    return "\n".join(lines)
+def format_decomposition_markdown(decomposition: dict) -> str:
+    """Format for Obsidian/Markdown export."""
+    d = decomposition
+    lines = []
+    lines.append(f"**Confidence: {d['composite_confidence']:.3f}** — {d['headline']}")
+    lines.append("")
+    lines.append("| Score | Value | Visual |")
+    lines.append("|-------|-------|--------|")
+    for score_name, score_data in d["scores"].items():
+        display_name = score_name.replace("_", " ").title()
+        lines.append(f"| {display_name} | {score_data['value']:.3f} | `{score_data['bar']}` |")
+    lines.append("")
+    for score_name, score_data in d["scores"].items():
+        display_name = score_name.replace("_", " ").title()
+        lines.append(f"- **{display_name}**: {score_data['explanation']}")
+    if d["warnings"]:
+        lines.append("")
+        lines.append("> [!warning] Warnings")
+        for w in d["warnings"]:
+            lines.append(f"> - {w}")
+    if d["action_items"]:
+        lines.append("")
+        lines.append("**Action Items:**")
+        for a in d["action_items"]:
+            lines.append(f"- [ ] {a}")
+    return "\n".join(lines)