Add Superpowers Skill Tree + Meta-Improver: phd_research_os/meta_improver.py
Browse files- phd_research_os/meta_improver.py +767 -0
phd_research_os/meta_improver.py
ADDED
|
@@ -0,0 +1,767 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PhD Research OS — Meta-Improver AI
|
| 3 |
+
====================================
|
| 4 |
+
A continuously running self-improvement intelligence that:
|
| 5 |
+
1. Monitors the Research OS for quality degradation, drift, and opportunities
|
| 6 |
+
2. Scans external sources (papers, social media, repos) for improvement ideas
|
| 7 |
+
3. Proposes improvements to BOTH the Research OS AND to itself
|
| 8 |
+
4. Tracks improvement history and learns from what worked
|
| 9 |
+
|
| 10 |
+
The Meta-Improver operates under the ECC Harness and produces Proposals
|
| 11 |
+
that require human approval — it NEVER self-modifies without authorization.
|
| 12 |
+
|
| 13 |
+
Architecture:
|
| 14 |
+
MetaImprover
|
| 15 |
+
├── InternalMonitor — watches DB metrics, eval scores, error rates
|
| 16 |
+
├── ExternalScanner — searches papers, GitHub, social media for ideas
|
| 17 |
+
├── SelfReflector — analyzes own performance and proposes self-upgrades
|
| 18 |
+
└── ImprovementEngine — synthesizes findings into ranked Proposals
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import json
|
| 22 |
+
import os
|
| 23 |
+
import time
|
| 24 |
+
import hashlib
|
| 25 |
+
from datetime import datetime, timezone
|
| 26 |
+
from typing import Optional
|
| 27 |
+
from dataclasses import dataclass, field, asdict
|
| 28 |
+
|
| 29 |
+
from .db import get_db, init_db, now_iso, gen_id, to_fixed, from_fixed
|
| 30 |
+
from .skills.registry import skill_registry, SUPERPOWERS_WORKFLOW_CONTEXT
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ============================================================
|
| 34 |
+
# Meta-Improver Configuration
|
| 35 |
+
# ============================================================
|
| 36 |
+
|
| 37 |
+
META_IMPROVER_VERSION = "1.0.0"
|
| 38 |
+
|
| 39 |
+
# What the meta-improver monitors internally
|
| 40 |
+
INTERNAL_MONITORS = {
|
| 41 |
+
"extraction_quality": {
|
| 42 |
+
"description": "Track claim extraction precision/recall over time",
|
| 43 |
+
"metric": "hallucination_rate",
|
| 44 |
+
"alert_threshold": 0.12, # Alert if hallucination > 12%
|
| 45 |
+
"check_interval_hours": 24,
|
| 46 |
+
},
|
| 47 |
+
"confidence_calibration": {
|
| 48 |
+
"description": "Monitor Brier score for systematic miscalibration",
|
| 49 |
+
"metric": "brier_score",
|
| 50 |
+
"alert_threshold": 0.25, # Alert if Brier > 0.25
|
| 51 |
+
"check_interval_hours": 168, # Weekly
|
| 52 |
+
},
|
| 53 |
+
"conflict_detection_rate": {
|
| 54 |
+
"description": "Track false positive rate in conflict detection",
|
| 55 |
+
"metric": "false_positive_rate",
|
| 56 |
+
"alert_threshold": 0.30,
|
| 57 |
+
"check_interval_hours": 168,
|
| 58 |
+
},
|
| 59 |
+
"api_cost_efficiency": {
|
| 60 |
+
"description": "Monitor cost per claim extracted",
|
| 61 |
+
"metric": "cost_per_claim_usd",
|
| 62 |
+
"alert_threshold": 0.10, # Alert if > $0.10/claim
|
| 63 |
+
"check_interval_hours": 24,
|
| 64 |
+
},
|
| 65 |
+
"prompt_drift": {
|
| 66 |
+
"description": "Detect when prompt performance degrades vs baseline",
|
| 67 |
+
"metric": "eval_score_delta",
|
| 68 |
+
"alert_threshold": -0.05, # Alert if 5% below baseline
|
| 69 |
+
"check_interval_hours": 168,
|
| 70 |
+
},
|
| 71 |
+
"taxonomy_coverage": {
|
| 72 |
+
"description": "Track study types not in taxonomy that appear in data",
|
| 73 |
+
"metric": "unmapped_type_count",
|
| 74 |
+
"alert_threshold": 5,
|
| 75 |
+
"check_interval_hours": 168,
|
| 76 |
+
},
|
| 77 |
+
"companion_agent_health": {
|
| 78 |
+
"description": "Track companion agent task success rate",
|
| 79 |
+
"metric": "task_success_rate",
|
| 80 |
+
"alert_threshold": 0.70, # Alert if <70% tasks complete
|
| 81 |
+
"check_interval_hours": 168,
|
| 82 |
+
},
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# External sources to scan for improvement ideas
|
| 86 |
+
EXTERNAL_SOURCES = {
|
| 87 |
+
"arxiv_papers": {
|
| 88 |
+
"description": "Search arXiv for papers on scientific NLP, claim extraction, epistemic classification",
|
| 89 |
+
"queries": [
|
| 90 |
+
"scientific claim extraction language model",
|
| 91 |
+
"epistemic classification scientific text",
|
| 92 |
+
"contradiction detection scientific literature",
|
| 93 |
+
"confidence calibration language model",
|
| 94 |
+
"structured output language model fine-tuning",
|
| 95 |
+
],
|
| 96 |
+
"scan_interval_hours": 168, # Weekly
|
| 97 |
+
},
|
| 98 |
+
"huggingface_models": {
|
| 99 |
+
"description": "Monitor HF Hub for new models suitable as Research OS brain",
|
| 100 |
+
"queries": [
|
| 101 |
+
"scientific text extraction",
|
| 102 |
+
"structured JSON output",
|
| 103 |
+
"instruction-tuned 3B 7B",
|
| 104 |
+
],
|
| 105 |
+
"scan_interval_hours": 168,
|
| 106 |
+
},
|
| 107 |
+
"github_repos": {
|
| 108 |
+
"description": "Monitor GitHub for tools/libraries that could improve the pipeline",
|
| 109 |
+
"queries": [
|
| 110 |
+
"scientific information extraction",
|
| 111 |
+
"pdf claim extraction",
|
| 112 |
+
"epistemic tagging NLP",
|
| 113 |
+
],
|
| 114 |
+
"scan_interval_hours": 336, # Bi-weekly
|
| 115 |
+
},
|
| 116 |
+
"social_discourse": {
|
| 117 |
+
"description": "Monitor discourse for best practices in research AI systems",
|
| 118 |
+
"queries": [
|
| 119 |
+
"research AI assistant best practices",
|
| 120 |
+
"scientific knowledge base design",
|
| 121 |
+
"LLM structured output techniques",
|
| 122 |
+
],
|
| 123 |
+
"scan_interval_hours": 168,
|
| 124 |
+
},
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
# ============================================================
|
| 129 |
+
# Database Extension
|
| 130 |
+
# ============================================================
|
| 131 |
+
|
| 132 |
+
def init_meta_improver_db(db_path: str = None):
|
| 133 |
+
"""Add meta-improver tables to the database."""
|
| 134 |
+
init_db(db_path)
|
| 135 |
+
conn = get_db(db_path)
|
| 136 |
+
conn.executescript("""
|
| 137 |
+
CREATE TABLE IF NOT EXISTS meta_monitor_state (
|
| 138 |
+
monitor_name TEXT PRIMARY KEY,
|
| 139 |
+
last_value REAL,
|
| 140 |
+
last_checked TEXT,
|
| 141 |
+
alert_active INTEGER DEFAULT 0,
|
| 142 |
+
trend TEXT, -- JSON: recent values for trend analysis
|
| 143 |
+
baseline_value REAL,
|
| 144 |
+
created_at TEXT NOT NULL
|
| 145 |
+
);
|
| 146 |
+
|
| 147 |
+
CREATE TABLE IF NOT EXISTS external_scan_results (
|
| 148 |
+
scan_id TEXT PRIMARY KEY,
|
| 149 |
+
source_type TEXT NOT NULL,
|
| 150 |
+
query TEXT NOT NULL,
|
| 151 |
+
results TEXT NOT NULL, -- JSON: search results
|
| 152 |
+
improvement_ideas TEXT, -- JSON: extracted ideas
|
| 153 |
+
scanned_at TEXT NOT NULL,
|
| 154 |
+
processed INTEGER DEFAULT 0
|
| 155 |
+
);
|
| 156 |
+
|
| 157 |
+
CREATE TABLE IF NOT EXISTS improvement_history (
|
| 158 |
+
improvement_id TEXT PRIMARY KEY,
|
| 159 |
+
category TEXT NOT NULL, -- internal_monitor, external_scan, self_reflection
|
| 160 |
+
description TEXT NOT NULL,
|
| 161 |
+
proposal_id TEXT, -- Links to proposals table
|
| 162 |
+
impact_measured TEXT, -- JSON: before/after metrics
|
| 163 |
+
status TEXT DEFAULT 'proposed', -- proposed, applied, measured, reverted
|
| 164 |
+
created_at TEXT NOT NULL,
|
| 165 |
+
applied_at TEXT,
|
| 166 |
+
measured_at TEXT
|
| 167 |
+
);
|
| 168 |
+
|
| 169 |
+
CREATE TABLE IF NOT EXISTS self_reflection_log (
|
| 170 |
+
reflection_id TEXT PRIMARY KEY,
|
| 171 |
+
trigger TEXT NOT NULL, -- what triggered this reflection
|
| 172 |
+
findings TEXT NOT NULL, -- JSON: what was discovered
|
| 173 |
+
self_improvement_proposal TEXT, -- JSON: how to improve the meta-improver itself
|
| 174 |
+
confidence REAL,
|
| 175 |
+
created_at TEXT NOT NULL
|
| 176 |
+
);
|
| 177 |
+
|
| 178 |
+
-- Ensure proposals table exists (normally created by agent_os, but meta-improver reads it)
|
| 179 |
+
CREATE TABLE IF NOT EXISTS proposals (
|
| 180 |
+
proposal_id TEXT PRIMARY KEY,
|
| 181 |
+
agent_id TEXT NOT NULL,
|
| 182 |
+
task_id TEXT,
|
| 183 |
+
proposal_type TEXT NOT NULL,
|
| 184 |
+
description TEXT NOT NULL,
|
| 185 |
+
changes TEXT NOT NULL,
|
| 186 |
+
evidence TEXT,
|
| 187 |
+
estimated_impact TEXT,
|
| 188 |
+
risk_assessment TEXT DEFAULT 'low',
|
| 189 |
+
reversible INTEGER DEFAULT 1,
|
| 190 |
+
status TEXT DEFAULT 'proposed',
|
| 191 |
+
created_at TEXT NOT NULL,
|
| 192 |
+
reviewed_at TEXT,
|
| 193 |
+
reviewed_by TEXT,
|
| 194 |
+
rejection_reason TEXT,
|
| 195 |
+
schema_version TEXT NOT NULL DEFAULT '1.0'
|
| 196 |
+
);
|
| 197 |
+
""")
|
| 198 |
+
conn.commit()
|
| 199 |
+
conn.close()
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
# ============================================================
|
| 203 |
+
# Internal Monitor
|
| 204 |
+
# ============================================================
|
| 205 |
+
|
| 206 |
+
class InternalMonitor:
|
| 207 |
+
"""
|
| 208 |
+
Watches Research OS database metrics for quality degradation.
|
| 209 |
+
Runs checks based on configured intervals and thresholds.
|
| 210 |
+
"""
|
| 211 |
+
|
| 212 |
+
def __init__(self, db_path: str):
|
| 213 |
+
self.db_path = db_path
|
| 214 |
+
|
| 215 |
+
def run_all_checks(self) -> list:
|
| 216 |
+
"""Run all internal monitoring checks. Returns list of alerts."""
|
| 217 |
+
alerts = []
|
| 218 |
+
conn = get_db(self.db_path)
|
| 219 |
+
|
| 220 |
+
# Check 1: Hallucination rate proxy (claims with very low evidence strength)
|
| 221 |
+
total = conn.execute("SELECT COUNT(*) FROM claims").fetchone()[0]
|
| 222 |
+
if total > 0:
|
| 223 |
+
low_evidence = conn.execute(
|
| 224 |
+
"SELECT COUNT(*) FROM claims WHERE evidence_strength IS NOT NULL AND evidence_strength < 200"
|
| 225 |
+
).fetchone()[0]
|
| 226 |
+
rate = low_evidence / total
|
| 227 |
+
self._update_monitor(conn, "extraction_quality", rate)
|
| 228 |
+
if rate > INTERNAL_MONITORS["extraction_quality"]["alert_threshold"]:
|
| 229 |
+
alerts.append({
|
| 230 |
+
"monitor": "extraction_quality",
|
| 231 |
+
"message": f"Low-evidence claim rate at {rate:.1%} (threshold: {INTERNAL_MONITORS['extraction_quality']['alert_threshold']:.0%})",
|
| 232 |
+
"severity": "high" if rate > 0.20 else "medium",
|
| 233 |
+
"current_value": rate,
|
| 234 |
+
})
|
| 235 |
+
|
| 236 |
+
# Check 2: Confidence calibration (if calibration data exists)
|
| 237 |
+
cal_count = conn.execute("SELECT COUNT(*) FROM calibration_log").fetchone()[0]
|
| 238 |
+
if cal_count >= 20:
|
| 239 |
+
alerts.append({
|
| 240 |
+
"monitor": "confidence_calibration",
|
| 241 |
+
"message": f"Calibration data available ({cal_count} points). Brier score analysis recommended.",
|
| 242 |
+
"severity": "info",
|
| 243 |
+
"current_value": cal_count,
|
| 244 |
+
})
|
| 245 |
+
|
| 246 |
+
# Check 3: API cost tracking
|
| 247 |
+
cost_row = conn.execute("""
|
| 248 |
+
SELECT SUM(cost_usd) as total_cost, COUNT(DISTINCT task_type) as task_types
|
| 249 |
+
FROM api_usage_log WHERE timestamp >= datetime('now', '-7 days')
|
| 250 |
+
""").fetchone()
|
| 251 |
+
if cost_row and cost_row[0]:
|
| 252 |
+
weekly_cost = cost_row[0]
|
| 253 |
+
if weekly_cost > 15.0: # $15/week threshold
|
| 254 |
+
alerts.append({
|
| 255 |
+
"monitor": "api_cost_efficiency",
|
| 256 |
+
"message": f"Weekly API cost: ${weekly_cost:.2f} (threshold: $15.00)",
|
| 257 |
+
"severity": "medium",
|
| 258 |
+
"current_value": weekly_cost,
|
| 259 |
+
})
|
| 260 |
+
|
| 261 |
+
# Check 4: Unresolved conflicts accumulation
|
| 262 |
+
unresolved = conn.execute(
|
| 263 |
+
"SELECT COUNT(*) FROM conflicts WHERE resolution_status = 'Unresolved'"
|
| 264 |
+
).fetchone()[0]
|
| 265 |
+
if unresolved > 20:
|
| 266 |
+
alerts.append({
|
| 267 |
+
"monitor": "conflict_detection_rate",
|
| 268 |
+
"message": f"{unresolved} unresolved conflicts accumulating. Review recommended.",
|
| 269 |
+
"severity": "medium",
|
| 270 |
+
"current_value": unresolved,
|
| 271 |
+
})
|
| 272 |
+
|
| 273 |
+
# Check 5: Incomplete claims ratio
|
| 274 |
+
if total > 0:
|
| 275 |
+
incomplete = conn.execute(
|
| 276 |
+
"SELECT COUNT(*) FROM claims WHERE status = 'Incomplete'"
|
| 277 |
+
).fetchone()[0]
|
| 278 |
+
incomplete_rate = incomplete / total
|
| 279 |
+
if incomplete_rate > 0.30:
|
| 280 |
+
alerts.append({
|
| 281 |
+
"monitor": "extraction_quality",
|
| 282 |
+
"message": f"Incomplete claim rate: {incomplete_rate:.1%}. May indicate extraction issues.",
|
| 283 |
+
"severity": "low",
|
| 284 |
+
"current_value": incomplete_rate,
|
| 285 |
+
})
|
| 286 |
+
|
| 287 |
+
# Check 6: Taxonomy coverage
|
| 288 |
+
unmapped = conn.execute("""
|
| 289 |
+
SELECT DISTINCT study_type FROM claims
|
| 290 |
+
WHERE study_type IS NOT NULL
|
| 291 |
+
AND study_type NOT IN ('in_vivo', 'direct_physical_measurement', 'mathematical_proof',
|
| 292 |
+
'in_vitro', 'first_principles_simulation', 'phenomenological_simulation',
|
| 293 |
+
'review', 'perspective', 'primary_experimental', 'simulation', 'review_non_systematic')
|
| 294 |
+
""").fetchall()
|
| 295 |
+
if len(unmapped) > 0:
|
| 296 |
+
types = [dict(r)["study_type"] for r in unmapped]
|
| 297 |
+
alerts.append({
|
| 298 |
+
"monitor": "taxonomy_coverage",
|
| 299 |
+
"message": f"Found {len(types)} unmapped study types: {types[:5]}",
|
| 300 |
+
"severity": "low",
|
| 301 |
+
"current_value": len(types),
|
| 302 |
+
})
|
| 303 |
+
|
| 304 |
+
conn.close()
|
| 305 |
+
return alerts
|
| 306 |
+
|
| 307 |
+
def _update_monitor(self, conn, name: str, value: float):
|
| 308 |
+
"""Update monitor state with latest value."""
|
| 309 |
+
existing = conn.execute(
|
| 310 |
+
"SELECT trend FROM meta_monitor_state WHERE monitor_name = ?", (name,)
|
| 311 |
+
).fetchone()
|
| 312 |
+
|
| 313 |
+
if existing:
|
| 314 |
+
trend = json.loads(existing[0] or "[]")
|
| 315 |
+
trend.append({"value": value, "timestamp": now_iso()})
|
| 316 |
+
trend = trend[-50:] # Keep last 50 data points
|
| 317 |
+
conn.execute("""
|
| 318 |
+
UPDATE meta_monitor_state SET last_value = ?, last_checked = ?, trend = ?
|
| 319 |
+
WHERE monitor_name = ?
|
| 320 |
+
""", (value, now_iso(), json.dumps(trend), name))
|
| 321 |
+
else:
|
| 322 |
+
conn.execute("""
|
| 323 |
+
INSERT INTO meta_monitor_state (monitor_name, last_value, last_checked,
|
| 324 |
+
trend, baseline_value, created_at)
|
| 325 |
+
VALUES (?, ?, ?, ?, ?, ?)
|
| 326 |
+
""", (name, value, now_iso(), json.dumps([{"value": value, "timestamp": now_iso()}]),
|
| 327 |
+
value, now_iso()))
|
| 328 |
+
conn.commit()
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
# ============================================================
|
| 332 |
+
# External Scanner
|
| 333 |
+
# ============================================================
|
| 334 |
+
|
| 335 |
+
class ExternalScanner:
|
| 336 |
+
"""
|
| 337 |
+
Scans external sources for improvement ideas.
|
| 338 |
+
Uses the Research OS Brain (API) to search and analyze findings.
|
| 339 |
+
|
| 340 |
+
Sources: arXiv papers, HF Hub models, GitHub repos, social discourse.
|
| 341 |
+
"""
|
| 342 |
+
|
| 343 |
+
def __init__(self, db_path: str, brain=None):
|
| 344 |
+
self.db_path = db_path
|
| 345 |
+
self.brain = brain
|
| 346 |
+
|
| 347 |
+
def scan_all_sources(self) -> list:
|
| 348 |
+
"""
|
| 349 |
+
Scan all configured external sources for improvement ideas.
|
| 350 |
+
Returns list of scan results with extracted ideas.
|
| 351 |
+
"""
|
| 352 |
+
results = []
|
| 353 |
+
conn = get_db(self.db_path)
|
| 354 |
+
|
| 355 |
+
for source_type, config in EXTERNAL_SOURCES.items():
|
| 356 |
+
for query in config["queries"]:
|
| 357 |
+
# Check if we scanned this recently
|
| 358 |
+
recent = conn.execute("""
|
| 359 |
+
SELECT 1 FROM external_scan_results
|
| 360 |
+
WHERE source_type = ? AND query = ?
|
| 361 |
+
AND scanned_at >= datetime('now', ?)
|
| 362 |
+
""", (source_type, query, f"-{config['scan_interval_hours']} hours")).fetchone()
|
| 363 |
+
|
| 364 |
+
if recent:
|
| 365 |
+
continue
|
| 366 |
+
|
| 367 |
+
# Perform the scan (using brain if available, otherwise generate structured placeholder)
|
| 368 |
+
scan_result = self._scan_source(source_type, query)
|
| 369 |
+
|
| 370 |
+
scan_id = gen_id("SCAN")
|
| 371 |
+
conn.execute("""
|
| 372 |
+
INSERT INTO external_scan_results (scan_id, source_type, query,
|
| 373 |
+
results, improvement_ideas, scanned_at)
|
| 374 |
+
VALUES (?, ?, ?, ?, ?, ?)
|
| 375 |
+
""", (scan_id, source_type, query,
|
| 376 |
+
json.dumps(scan_result.get("results", [])),
|
| 377 |
+
json.dumps(scan_result.get("ideas", [])),
|
| 378 |
+
now_iso()))
|
| 379 |
+
|
| 380 |
+
results.append({
|
| 381 |
+
"scan_id": scan_id,
|
| 382 |
+
"source": source_type,
|
| 383 |
+
"query": query,
|
| 384 |
+
"results_count": len(scan_result.get("results", [])),
|
| 385 |
+
"ideas_count": len(scan_result.get("ideas", [])),
|
| 386 |
+
})
|
| 387 |
+
|
| 388 |
+
conn.commit()
|
| 389 |
+
conn.close()
|
| 390 |
+
return results
|
| 391 |
+
|
| 392 |
+
def _scan_source(self, source_type: str, query: str) -> dict:
|
| 393 |
+
"""Scan a single source. Returns results and extracted ideas."""
|
| 394 |
+
if self.brain:
|
| 395 |
+
return self._scan_with_brain(source_type, query)
|
| 396 |
+
else:
|
| 397 |
+
return self._generate_scan_template(source_type, query)
|
| 398 |
+
|
| 399 |
+
def _scan_with_brain(self, source_type: str, query: str) -> dict:
|
| 400 |
+
"""Use the AI brain to analyze a query and generate improvement ideas."""
|
| 401 |
+
prompt = f"""You are the External Intelligence Scanner for a PhD Research OS.
|
| 402 |
+
|
| 403 |
+
Search context: {source_type}
|
| 404 |
+
Query: "{query}"
|
| 405 |
+
|
| 406 |
+
Based on your knowledge, identify:
|
| 407 |
+
1. Recent developments (papers, tools, models) relevant to this query
|
| 408 |
+
2. Specific improvement ideas for a system that:
|
| 409 |
+
- Extracts scientific claims from papers
|
| 410 |
+
- Classifies claims as Fact/Interpretation/Hypothesis/Conflict
|
| 411 |
+
- Scores confidence using evidence × quality × tier × completeness
|
| 412 |
+
- Detects contradictions between claims
|
| 413 |
+
- Uses Qwen2.5-3B fine-tuned with QLoRA
|
| 414 |
+
|
| 415 |
+
Output JSON:
|
| 416 |
+
{{
|
| 417 |
+
"results": [{{"title": "...", "source": "...", "relevance": "high|medium|low", "summary": "..."}}],
|
| 418 |
+
"ideas": [{{"idea": "...", "expected_impact": "...", "effort": "low|medium|high", "category": "model|data|pipeline|taxonomy|architecture"}}]
|
| 419 |
+
}}"""
|
| 420 |
+
|
| 421 |
+
try:
|
| 422 |
+
messages = [
|
| 423 |
+
{"role": "system", "content": "You are a research intelligence scanner. Output valid JSON only."},
|
| 424 |
+
{"role": "user", "content": prompt}
|
| 425 |
+
]
|
| 426 |
+
if self.brain.backend == "local":
|
| 427 |
+
raw = self.brain._generate_local(messages)
|
| 428 |
+
else:
|
| 429 |
+
raw = self.brain._generate_api(messages)
|
| 430 |
+
|
| 431 |
+
text = raw.strip()
|
| 432 |
+
if text.startswith("```"):
|
| 433 |
+
text = text.split("```")[1]
|
| 434 |
+
if text.startswith("json"):
|
| 435 |
+
text = text[4:]
|
| 436 |
+
text = text.strip()
|
| 437 |
+
return json.loads(text)
|
| 438 |
+
except Exception:
|
| 439 |
+
return self._generate_scan_template(source_type, query)
|
| 440 |
+
|
| 441 |
+
def _generate_scan_template(self, source_type: str, query: str) -> dict:
|
| 442 |
+
"""Generate a structured template when no brain is available."""
|
| 443 |
+
return {
|
| 444 |
+
"results": [{
|
| 445 |
+
"title": f"[Placeholder] Scan for: {query}",
|
| 446 |
+
"source": source_type,
|
| 447 |
+
"relevance": "medium",
|
| 448 |
+
"summary": "Brain not configured — manual scan recommended"
|
| 449 |
+
}],
|
| 450 |
+
"ideas": [{
|
| 451 |
+
"idea": f"Investigate: {query}",
|
| 452 |
+
"expected_impact": "Unknown — requires manual evaluation",
|
| 453 |
+
"effort": "medium",
|
| 454 |
+
"category": "research"
|
| 455 |
+
}]
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
def get_unprocessed_ideas(self) -> list:
|
| 459 |
+
"""Get all improvement ideas not yet converted to proposals."""
|
| 460 |
+
conn = get_db(self.db_path)
|
| 461 |
+
rows = conn.execute("""
|
| 462 |
+
SELECT scan_id, source_type, query, improvement_ideas, scanned_at
|
| 463 |
+
FROM external_scan_results WHERE processed = 0
|
| 464 |
+
ORDER BY scanned_at DESC
|
| 465 |
+
""").fetchall()
|
| 466 |
+
conn.close()
|
| 467 |
+
|
| 468 |
+
ideas = []
|
| 469 |
+
for row in rows:
|
| 470 |
+
d = dict(row)
|
| 471 |
+
d["improvement_ideas"] = json.loads(d.get("improvement_ideas", "[]"))
|
| 472 |
+
ideas.append(d)
|
| 473 |
+
return ideas
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
# ============================================================
|
| 477 |
+
# Self-Reflector
|
| 478 |
+
# ============================================================
|
| 479 |
+
|
| 480 |
+
class SelfReflector:
|
| 481 |
+
"""
|
| 482 |
+
Analyzes the Meta-Improver's own performance and proposes self-upgrades.
|
| 483 |
+
|
| 484 |
+
Monitors:
|
| 485 |
+
- Which improvement proposals were accepted vs rejected (learn preferences)
|
| 486 |
+
- Which monitoring alerts were actionable vs noise (tune thresholds)
|
| 487 |
+
- Which external scans produced valuable ideas (focus searches)
|
| 488 |
+
- Overall system improvement trajectory (are we getting better?)
|
| 489 |
+
"""
|
| 490 |
+
|
| 491 |
+
def __init__(self, db_path: str):
|
| 492 |
+
self.db_path = db_path
|
| 493 |
+
|
| 494 |
+
def reflect(self) -> dict:
|
| 495 |
+
"""
|
| 496 |
+
Run a self-reflection cycle.
|
| 497 |
+
Returns findings and self-improvement proposals.
|
| 498 |
+
"""
|
| 499 |
+
conn = get_db(self.db_path)
|
| 500 |
+
findings = {}
|
| 501 |
+
|
| 502 |
+
# 1. Proposal acceptance rate
|
| 503 |
+
total_proposals = conn.execute("SELECT COUNT(*) FROM proposals").fetchone()[0]
|
| 504 |
+
approved = conn.execute("SELECT COUNT(*) FROM proposals WHERE status = 'approved'").fetchone()[0]
|
| 505 |
+
rejected = conn.execute("SELECT COUNT(*) FROM proposals WHERE status = 'rejected'").fetchone()[0]
|
| 506 |
+
|
| 507 |
+
if total_proposals > 0:
|
| 508 |
+
acceptance_rate = approved / total_proposals
|
| 509 |
+
findings["proposal_acceptance_rate"] = {
|
| 510 |
+
"total": total_proposals,
|
| 511 |
+
"approved": approved,
|
| 512 |
+
"rejected": rejected,
|
| 513 |
+
"rate": acceptance_rate,
|
| 514 |
+
"insight": (
|
| 515 |
+
"Low acceptance rate — proposals may be too aggressive or poorly targeted"
|
| 516 |
+
if acceptance_rate < 0.3 and total_proposals > 10
|
| 517 |
+
else "Acceptance rate healthy" if acceptance_rate > 0.5
|
| 518 |
+
else "Insufficient data for trend"
|
| 519 |
+
)
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
# 2. Rejection reasons analysis
|
| 523 |
+
rejections = conn.execute("""
|
| 524 |
+
SELECT rejection_reason FROM proposals WHERE status = 'rejected' AND rejection_reason != ''
|
| 525 |
+
""").fetchall()
|
| 526 |
+
if rejections:
|
| 527 |
+
reasons = [dict(r)["rejection_reason"] for r in rejections]
|
| 528 |
+
findings["rejection_patterns"] = {
|
| 529 |
+
"total_rejections": len(reasons),
|
| 530 |
+
"sample_reasons": reasons[:5],
|
| 531 |
+
"insight": "Analyze rejection reasons to avoid proposing similar changes"
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
# 3. External scan yield
|
| 535 |
+
total_scans = conn.execute("SELECT COUNT(*) FROM external_scan_results").fetchone()[0]
|
| 536 |
+
processed = conn.execute("SELECT COUNT(*) FROM external_scan_results WHERE processed = 1").fetchone()[0]
|
| 537 |
+
findings["external_scan_yield"] = {
|
| 538 |
+
"total_scans": total_scans,
|
| 539 |
+
"processed": processed,
|
| 540 |
+
"insight": "Track which source types produce the most actionable ideas"
|
| 541 |
+
}
|
| 542 |
+
|
| 543 |
+
# 4. Improvement trajectory
|
| 544 |
+
improvements = conn.execute("""
|
| 545 |
+
SELECT status, COUNT(*) as cnt FROM improvement_history GROUP BY status
|
| 546 |
+
""").fetchall()
|
| 547 |
+
findings["improvement_trajectory"] = {
|
| 548 |
+
status_counts[0]: status_counts[1]
|
| 549 |
+
for status_counts in [dict(r).values() for r in improvements]
|
| 550 |
+
} if improvements else {"no_data": True}
|
| 551 |
+
|
| 552 |
+
# 5. Self-improvement proposals
|
| 553 |
+
self_proposals = []
|
| 554 |
+
|
| 555 |
+
if findings.get("proposal_acceptance_rate", {}).get("rate", 1.0) < 0.3:
|
| 556 |
+
self_proposals.append({
|
| 557 |
+
"target": "proposal_generation",
|
| 558 |
+
"change": "Increase evidence requirements before generating proposals",
|
| 559 |
+
"reason": "Low acceptance rate suggests proposals are insufficiently grounded"
|
| 560 |
+
})
|
| 561 |
+
|
| 562 |
+
if total_scans > 20 and processed < total_scans * 0.5:
|
| 563 |
+
self_proposals.append({
|
| 564 |
+
"target": "external_scanning",
|
| 565 |
+
"change": "Reduce scan frequency or focus on higher-yield source types",
|
| 566 |
+
"reason": "Many scans unprocessed — scanning faster than consumption"
|
| 567 |
+
})
|
| 568 |
+
|
| 569 |
+
# Log reflection
|
| 570 |
+
reflection_id = gen_id("REFL")
|
| 571 |
+
conn.execute("""
|
| 572 |
+
INSERT INTO self_reflection_log (reflection_id, trigger, findings,
|
| 573 |
+
self_improvement_proposal, confidence, created_at)
|
| 574 |
+
VALUES (?, ?, ?, ?, ?, ?)
|
| 575 |
+
""", (reflection_id, "scheduled_reflection", json.dumps(findings),
|
| 576 |
+
json.dumps(self_proposals), 0.6, now_iso()))
|
| 577 |
+
conn.commit()
|
| 578 |
+
conn.close()
|
| 579 |
+
|
| 580 |
+
return {
|
| 581 |
+
"reflection_id": reflection_id,
|
| 582 |
+
"findings": findings,
|
| 583 |
+
"self_improvement_proposals": self_proposals,
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
|
| 587 |
+
# ============================================================
|
| 588 |
+
# Meta-Improver — The Unified Intelligence
|
| 589 |
+
# ============================================================
|
| 590 |
+
|
| 591 |
+
class MetaImprover:
|
| 592 |
+
"""
|
| 593 |
+
The Meta-Improver AI: continuously monitors, scans, reflects, and proposes improvements.
|
| 594 |
+
|
| 595 |
+
This is permanently baked into the companion AI system. It:
|
| 596 |
+
1. Runs internal quality monitors on every check cycle
|
| 597 |
+
2. Scans external sources weekly for new papers, models, tools
|
| 598 |
+
3. Reflects on its own performance monthly
|
| 599 |
+
4. Produces ranked improvement proposals for human review
|
| 600 |
+
5. Tracks what improvements actually worked (closed-loop learning)
|
| 601 |
+
|
| 602 |
+
All improvements go through the ECC Harness proposal system.
|
| 603 |
+
The Meta-Improver NEVER self-modifies without human approval.
|
| 604 |
+
"""
|
| 605 |
+
|
| 606 |
+
def __init__(self, db_path: str = None, brain=None):
|
| 607 |
+
self.db_path = db_path or os.environ.get("RESEARCH_OS_DB", "data/research_os.db")
|
| 608 |
+
init_meta_improver_db(self.db_path)
|
| 609 |
+
self.brain = brain
|
| 610 |
+
self.monitor = InternalMonitor(self.db_path)
|
| 611 |
+
self.scanner = ExternalScanner(self.db_path, brain)
|
| 612 |
+
self.reflector = SelfReflector(self.db_path)
|
| 613 |
+
|
| 614 |
+
def run_improvement_cycle(self) -> dict:
|
| 615 |
+
"""
|
| 616 |
+
Run a full improvement cycle:
|
| 617 |
+
1. Internal monitoring → alerts
|
| 618 |
+
2. External scanning → ideas
|
| 619 |
+
3. Self-reflection → meta-proposals
|
| 620 |
+
4. Synthesis → ranked improvement proposals
|
| 621 |
+
|
| 622 |
+
Returns a comprehensive report.
|
| 623 |
+
"""
|
| 624 |
+
report = {
|
| 625 |
+
"timestamp": now_iso(),
|
| 626 |
+
"version": META_IMPROVER_VERSION,
|
| 627 |
+
"alerts": [],
|
| 628 |
+
"scan_results": [],
|
| 629 |
+
"reflection": {},
|
| 630 |
+
"improvement_proposals": [],
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
+
# Phase 1: Internal monitoring
|
| 634 |
+
print("MetaImprover: Running internal monitors...")
|
| 635 |
+
report["alerts"] = self.monitor.run_all_checks()
|
| 636 |
+
|
| 637 |
+
# Phase 2: External scanning
|
| 638 |
+
print("MetaImprover: Scanning external sources...")
|
| 639 |
+
report["scan_results"] = self.scanner.scan_all_sources()
|
| 640 |
+
|
| 641 |
+
# Phase 3: Self-reflection
|
| 642 |
+
print("MetaImprover: Running self-reflection...")
|
| 643 |
+
report["reflection"] = self.reflector.reflect()
|
| 644 |
+
|
| 645 |
+
# Phase 4: Synthesize improvement proposals
|
| 646 |
+
print("MetaImprover: Synthesizing proposals...")
|
| 647 |
+
report["improvement_proposals"] = self._synthesize_proposals(report)
|
| 648 |
+
|
| 649 |
+
return report
|
| 650 |
+
|
| 651 |
+
def _synthesize_proposals(self, report: dict) -> list:
|
| 652 |
+
"""
|
| 653 |
+
Synthesize findings from all sources into ranked improvement proposals.
|
| 654 |
+
"""
|
| 655 |
+
proposals = []
|
| 656 |
+
conn = get_db(self.db_path)
|
| 657 |
+
|
| 658 |
+
# From alerts → improvement proposals
|
| 659 |
+
for alert in report.get("alerts", []):
|
| 660 |
+
if alert.get("severity") in ["high", "medium"]:
|
| 661 |
+
imp_id = gen_id("IMP")
|
| 662 |
+
proposals.append({
|
| 663 |
+
"improvement_id": imp_id,
|
| 664 |
+
"category": "internal_monitor",
|
| 665 |
+
"source": alert["monitor"],
|
| 666 |
+
"description": f"Address: {alert['message']}",
|
| 667 |
+
"priority": "high" if alert["severity"] == "high" else "medium",
|
| 668 |
+
"suggested_action": self._suggest_action_for_alert(alert),
|
| 669 |
+
})
|
| 670 |
+
conn.execute("""
|
| 671 |
+
INSERT INTO improvement_history (improvement_id, category, description, created_at)
|
| 672 |
+
VALUES (?, 'internal_monitor', ?, ?)
|
| 673 |
+
""", (imp_id, alert["message"], now_iso()))
|
| 674 |
+
|
| 675 |
+
# From external scans → improvement proposals
|
| 676 |
+
unprocessed = self.scanner.get_unprocessed_ideas()
|
| 677 |
+
for scan in unprocessed[:5]: # Process top 5
|
| 678 |
+
for idea in scan.get("improvement_ideas", [])[:3]: # Top 3 ideas per scan
|
| 679 |
+
if isinstance(idea, dict):
|
| 680 |
+
imp_id = gen_id("IMP")
|
| 681 |
+
proposals.append({
|
| 682 |
+
"improvement_id": imp_id,
|
| 683 |
+
"category": "external_scan",
|
| 684 |
+
"source": f"{scan.get('source_type', 'unknown')}: {scan.get('query', '')}",
|
| 685 |
+
"description": idea.get("idea", ""),
|
| 686 |
+
"priority": "medium",
|
| 687 |
+
"expected_impact": idea.get("expected_impact", "unknown"),
|
| 688 |
+
"effort": idea.get("effort", "medium"),
|
| 689 |
+
})
|
| 690 |
+
|
| 691 |
+
# From self-reflection → meta-proposals
|
| 692 |
+
for sp in report.get("reflection", {}).get("self_improvement_proposals", []):
|
| 693 |
+
imp_id = gen_id("IMP")
|
| 694 |
+
proposals.append({
|
| 695 |
+
"improvement_id": imp_id,
|
| 696 |
+
"category": "self_reflection",
|
| 697 |
+
"source": "meta_improver_self_analysis",
|
| 698 |
+
"description": f"Self-improve: {sp.get('change', '')} (reason: {sp.get('reason', '')})",
|
| 699 |
+
"priority": "low",
|
| 700 |
+
"target": sp.get("target", "unknown"),
|
| 701 |
+
})
|
| 702 |
+
|
| 703 |
+
conn.commit()
|
| 704 |
+
conn.close()
|
| 705 |
+
|
| 706 |
+
# Rank by priority
|
| 707 |
+
priority_order = {"high": 0, "medium": 1, "low": 2}
|
| 708 |
+
proposals.sort(key=lambda x: priority_order.get(x.get("priority", "low"), 99))
|
| 709 |
+
|
| 710 |
+
return proposals
|
| 711 |
+
|
| 712 |
+
def _suggest_action_for_alert(self, alert: dict) -> str:
|
| 713 |
+
"""Suggest a concrete action based on an internal monitoring alert."""
|
| 714 |
+
actions = {
|
| 715 |
+
"extraction_quality": "Run evaluation harness against golden dataset. If degraded, check recent prompt changes and revert if needed.",
|
| 716 |
+
"confidence_calibration": "Compute Brier score from calibration_log. If overconfident, reduce study_quality_weights by 5%.",
|
| 717 |
+
"conflict_detection_rate": "Review last 20 detected conflicts manually. If >30% false positives, tighten keyword overlap threshold.",
|
| 718 |
+
"api_cost_efficiency": "Check which task types consume most tokens. Enable semantic caching for repeat queries.",
|
| 719 |
+
"prompt_drift": "Run regression gate. Compare current metrics to Phase 2 baseline. Revert prompt if degraded.",
|
| 720 |
+
"taxonomy_coverage": "Add unmapped study types to domain taxonomy or create alias mappings.",
|
| 721 |
+
"companion_agent_health": "Check failed tasks for common errors. Increase iteration budgets if tasks are timing out.",
|
| 722 |
+
}
|
| 723 |
+
return actions.get(alert.get("monitor", ""), "Investigate the alert and determine appropriate response.")
|
| 724 |
+
|
| 725 |
+
def get_improvement_history(self, limit: int = 20) -> list:
|
| 726 |
+
"""Get improvement history with status."""
|
| 727 |
+
conn = get_db(self.db_path)
|
| 728 |
+
rows = conn.execute("""
|
| 729 |
+
SELECT * FROM improvement_history ORDER BY created_at DESC LIMIT ?
|
| 730 |
+
""", (limit,)).fetchall()
|
| 731 |
+
conn.close()
|
| 732 |
+
return [dict(r) for r in rows]
|
| 733 |
+
|
| 734 |
+
def get_self_reflections(self, limit: int = 10) -> list:
|
| 735 |
+
"""Get self-reflection history."""
|
| 736 |
+
conn = get_db(self.db_path)
|
| 737 |
+
rows = conn.execute("""
|
| 738 |
+
SELECT * FROM self_reflection_log ORDER BY created_at DESC LIMIT ?
|
| 739 |
+
""", (limit,)).fetchall()
|
| 740 |
+
conn.close()
|
| 741 |
+
results = []
|
| 742 |
+
for r in rows:
|
| 743 |
+
d = dict(r)
|
| 744 |
+
d["findings"] = json.loads(d.get("findings", "{}"))
|
| 745 |
+
d["self_improvement_proposal"] = json.loads(d.get("self_improvement_proposal", "[]"))
|
| 746 |
+
results.append(d)
|
| 747 |
+
return results
|
| 748 |
+
|
| 749 |
+
def mark_improvement_applied(self, improvement_id: str, proposal_id: str = None):
|
| 750 |
+
"""Mark an improvement as applied (with optional link to proposal)."""
|
| 751 |
+
conn = get_db(self.db_path)
|
| 752 |
+
conn.execute("""
|
| 753 |
+
UPDATE improvement_history SET status = 'applied', proposal_id = ?, applied_at = ?
|
| 754 |
+
WHERE improvement_id = ?
|
| 755 |
+
""", (proposal_id, now_iso(), improvement_id))
|
| 756 |
+
conn.commit()
|
| 757 |
+
conn.close()
|
| 758 |
+
|
| 759 |
+
def mark_improvement_measured(self, improvement_id: str, impact: dict):
|
| 760 |
+
"""Record the measured impact of an applied improvement."""
|
| 761 |
+
conn = get_db(self.db_path)
|
| 762 |
+
conn.execute("""
|
| 763 |
+
UPDATE improvement_history SET status = 'measured', impact_measured = ?, measured_at = ?
|
| 764 |
+
WHERE improvement_id = ?
|
| 765 |
+
""", (json.dumps(impact), now_iso(), improvement_id))
|
| 766 |
+
conn.commit()
|
| 767 |
+
conn.close()
|