nkshirsa commited on
Commit
3044b2e
·
verified ·
1 Parent(s): aec6d5c

Add Superpowers Skill Tree + Meta-Improver: phd_research_os/meta_improver.py

Browse files
Files changed (1) hide show
  1. phd_research_os/meta_improver.py +767 -0
phd_research_os/meta_improver.py ADDED
@@ -0,0 +1,767 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PhD Research OS — Meta-Improver AI
3
+ ====================================
4
+ A continuously running self-improvement intelligence that:
5
+ 1. Monitors the Research OS for quality degradation, drift, and opportunities
6
+ 2. Scans external sources (papers, social media, repos) for improvement ideas
7
+ 3. Proposes improvements to BOTH the Research OS AND to itself
8
+ 4. Tracks improvement history and learns from what worked
9
+
10
+ The Meta-Improver operates under the ECC Harness and produces Proposals
11
+ that require human approval — it NEVER self-modifies without authorization.
12
+
13
+ Architecture:
14
+ MetaImprover
15
+ ├── InternalMonitor — watches DB metrics, eval scores, error rates
16
+ ├── ExternalScanner — searches papers, GitHub, social media for ideas
17
+ ├── SelfReflector — analyzes own performance and proposes self-upgrades
18
+ └── ImprovementEngine — synthesizes findings into ranked Proposals
19
+ """
20
+
21
+ import json
22
+ import os
23
+ import time
24
+ import hashlib
25
+ from datetime import datetime, timezone
26
+ from typing import Optional
27
+ from dataclasses import dataclass, field, asdict
28
+
29
+ from .db import get_db, init_db, now_iso, gen_id, to_fixed, from_fixed
30
+ from .skills.registry import skill_registry, SUPERPOWERS_WORKFLOW_CONTEXT
31
+
32
+
33
+ # ============================================================
34
+ # Meta-Improver Configuration
35
+ # ============================================================
36
+
37
+ META_IMPROVER_VERSION = "1.0.0"
38
+
39
+ # What the meta-improver monitors internally
40
+ INTERNAL_MONITORS = {
41
+ "extraction_quality": {
42
+ "description": "Track claim extraction precision/recall over time",
43
+ "metric": "hallucination_rate",
44
+ "alert_threshold": 0.12, # Alert if hallucination > 12%
45
+ "check_interval_hours": 24,
46
+ },
47
+ "confidence_calibration": {
48
+ "description": "Monitor Brier score for systematic miscalibration",
49
+ "metric": "brier_score",
50
+ "alert_threshold": 0.25, # Alert if Brier > 0.25
51
+ "check_interval_hours": 168, # Weekly
52
+ },
53
+ "conflict_detection_rate": {
54
+ "description": "Track false positive rate in conflict detection",
55
+ "metric": "false_positive_rate",
56
+ "alert_threshold": 0.30,
57
+ "check_interval_hours": 168,
58
+ },
59
+ "api_cost_efficiency": {
60
+ "description": "Monitor cost per claim extracted",
61
+ "metric": "cost_per_claim_usd",
62
+ "alert_threshold": 0.10, # Alert if > $0.10/claim
63
+ "check_interval_hours": 24,
64
+ },
65
+ "prompt_drift": {
66
+ "description": "Detect when prompt performance degrades vs baseline",
67
+ "metric": "eval_score_delta",
68
+ "alert_threshold": -0.05, # Alert if 5% below baseline
69
+ "check_interval_hours": 168,
70
+ },
71
+ "taxonomy_coverage": {
72
+ "description": "Track study types not in taxonomy that appear in data",
73
+ "metric": "unmapped_type_count",
74
+ "alert_threshold": 5,
75
+ "check_interval_hours": 168,
76
+ },
77
+ "companion_agent_health": {
78
+ "description": "Track companion agent task success rate",
79
+ "metric": "task_success_rate",
80
+ "alert_threshold": 0.70, # Alert if <70% tasks complete
81
+ "check_interval_hours": 168,
82
+ },
83
+ }
84
+
85
+ # External sources to scan for improvement ideas
86
+ EXTERNAL_SOURCES = {
87
+ "arxiv_papers": {
88
+ "description": "Search arXiv for papers on scientific NLP, claim extraction, epistemic classification",
89
+ "queries": [
90
+ "scientific claim extraction language model",
91
+ "epistemic classification scientific text",
92
+ "contradiction detection scientific literature",
93
+ "confidence calibration language model",
94
+ "structured output language model fine-tuning",
95
+ ],
96
+ "scan_interval_hours": 168, # Weekly
97
+ },
98
+ "huggingface_models": {
99
+ "description": "Monitor HF Hub for new models suitable as Research OS brain",
100
+ "queries": [
101
+ "scientific text extraction",
102
+ "structured JSON output",
103
+ "instruction-tuned 3B 7B",
104
+ ],
105
+ "scan_interval_hours": 168,
106
+ },
107
+ "github_repos": {
108
+ "description": "Monitor GitHub for tools/libraries that could improve the pipeline",
109
+ "queries": [
110
+ "scientific information extraction",
111
+ "pdf claim extraction",
112
+ "epistemic tagging NLP",
113
+ ],
114
+ "scan_interval_hours": 336, # Bi-weekly
115
+ },
116
+ "social_discourse": {
117
+ "description": "Monitor discourse for best practices in research AI systems",
118
+ "queries": [
119
+ "research AI assistant best practices",
120
+ "scientific knowledge base design",
121
+ "LLM structured output techniques",
122
+ ],
123
+ "scan_interval_hours": 168,
124
+ },
125
+ }
126
+
127
+
128
+ # ============================================================
129
+ # Database Extension
130
+ # ============================================================
131
+
132
+ def init_meta_improver_db(db_path: str = None):
133
+ """Add meta-improver tables to the database."""
134
+ init_db(db_path)
135
+ conn = get_db(db_path)
136
+ conn.executescript("""
137
+ CREATE TABLE IF NOT EXISTS meta_monitor_state (
138
+ monitor_name TEXT PRIMARY KEY,
139
+ last_value REAL,
140
+ last_checked TEXT,
141
+ alert_active INTEGER DEFAULT 0,
142
+ trend TEXT, -- JSON: recent values for trend analysis
143
+ baseline_value REAL,
144
+ created_at TEXT NOT NULL
145
+ );
146
+
147
+ CREATE TABLE IF NOT EXISTS external_scan_results (
148
+ scan_id TEXT PRIMARY KEY,
149
+ source_type TEXT NOT NULL,
150
+ query TEXT NOT NULL,
151
+ results TEXT NOT NULL, -- JSON: search results
152
+ improvement_ideas TEXT, -- JSON: extracted ideas
153
+ scanned_at TEXT NOT NULL,
154
+ processed INTEGER DEFAULT 0
155
+ );
156
+
157
+ CREATE TABLE IF NOT EXISTS improvement_history (
158
+ improvement_id TEXT PRIMARY KEY,
159
+ category TEXT NOT NULL, -- internal_monitor, external_scan, self_reflection
160
+ description TEXT NOT NULL,
161
+ proposal_id TEXT, -- Links to proposals table
162
+ impact_measured TEXT, -- JSON: before/after metrics
163
+ status TEXT DEFAULT 'proposed', -- proposed, applied, measured, reverted
164
+ created_at TEXT NOT NULL,
165
+ applied_at TEXT,
166
+ measured_at TEXT
167
+ );
168
+
169
+ CREATE TABLE IF NOT EXISTS self_reflection_log (
170
+ reflection_id TEXT PRIMARY KEY,
171
+ trigger TEXT NOT NULL, -- what triggered this reflection
172
+ findings TEXT NOT NULL, -- JSON: what was discovered
173
+ self_improvement_proposal TEXT, -- JSON: how to improve the meta-improver itself
174
+ confidence REAL,
175
+ created_at TEXT NOT NULL
176
+ );
177
+
178
+ -- Ensure proposals table exists (normally created by agent_os, but meta-improver reads it)
179
+ CREATE TABLE IF NOT EXISTS proposals (
180
+ proposal_id TEXT PRIMARY KEY,
181
+ agent_id TEXT NOT NULL,
182
+ task_id TEXT,
183
+ proposal_type TEXT NOT NULL,
184
+ description TEXT NOT NULL,
185
+ changes TEXT NOT NULL,
186
+ evidence TEXT,
187
+ estimated_impact TEXT,
188
+ risk_assessment TEXT DEFAULT 'low',
189
+ reversible INTEGER DEFAULT 1,
190
+ status TEXT DEFAULT 'proposed',
191
+ created_at TEXT NOT NULL,
192
+ reviewed_at TEXT,
193
+ reviewed_by TEXT,
194
+ rejection_reason TEXT,
195
+ schema_version TEXT NOT NULL DEFAULT '1.0'
196
+ );
197
+ """)
198
+ conn.commit()
199
+ conn.close()
200
+
201
+
202
+ # ============================================================
203
+ # Internal Monitor
204
+ # ============================================================
205
+
206
+ class InternalMonitor:
207
+ """
208
+ Watches Research OS database metrics for quality degradation.
209
+ Runs checks based on configured intervals and thresholds.
210
+ """
211
+
212
+ def __init__(self, db_path: str):
213
+ self.db_path = db_path
214
+
215
+ def run_all_checks(self) -> list:
216
+ """Run all internal monitoring checks. Returns list of alerts."""
217
+ alerts = []
218
+ conn = get_db(self.db_path)
219
+
220
+ # Check 1: Hallucination rate proxy (claims with very low evidence strength)
221
+ total = conn.execute("SELECT COUNT(*) FROM claims").fetchone()[0]
222
+ if total > 0:
223
+ low_evidence = conn.execute(
224
+ "SELECT COUNT(*) FROM claims WHERE evidence_strength IS NOT NULL AND evidence_strength < 200"
225
+ ).fetchone()[0]
226
+ rate = low_evidence / total
227
+ self._update_monitor(conn, "extraction_quality", rate)
228
+ if rate > INTERNAL_MONITORS["extraction_quality"]["alert_threshold"]:
229
+ alerts.append({
230
+ "monitor": "extraction_quality",
231
+ "message": f"Low-evidence claim rate at {rate:.1%} (threshold: {INTERNAL_MONITORS['extraction_quality']['alert_threshold']:.0%})",
232
+ "severity": "high" if rate > 0.20 else "medium",
233
+ "current_value": rate,
234
+ })
235
+
236
+ # Check 2: Confidence calibration (if calibration data exists)
237
+ cal_count = conn.execute("SELECT COUNT(*) FROM calibration_log").fetchone()[0]
238
+ if cal_count >= 20:
239
+ alerts.append({
240
+ "monitor": "confidence_calibration",
241
+ "message": f"Calibration data available ({cal_count} points). Brier score analysis recommended.",
242
+ "severity": "info",
243
+ "current_value": cal_count,
244
+ })
245
+
246
+ # Check 3: API cost tracking
247
+ cost_row = conn.execute("""
248
+ SELECT SUM(cost_usd) as total_cost, COUNT(DISTINCT task_type) as task_types
249
+ FROM api_usage_log WHERE timestamp >= datetime('now', '-7 days')
250
+ """).fetchone()
251
+ if cost_row and cost_row[0]:
252
+ weekly_cost = cost_row[0]
253
+ if weekly_cost > 15.0: # $15/week threshold
254
+ alerts.append({
255
+ "monitor": "api_cost_efficiency",
256
+ "message": f"Weekly API cost: ${weekly_cost:.2f} (threshold: $15.00)",
257
+ "severity": "medium",
258
+ "current_value": weekly_cost,
259
+ })
260
+
261
+ # Check 4: Unresolved conflicts accumulation
262
+ unresolved = conn.execute(
263
+ "SELECT COUNT(*) FROM conflicts WHERE resolution_status = 'Unresolved'"
264
+ ).fetchone()[0]
265
+ if unresolved > 20:
266
+ alerts.append({
267
+ "monitor": "conflict_detection_rate",
268
+ "message": f"{unresolved} unresolved conflicts accumulating. Review recommended.",
269
+ "severity": "medium",
270
+ "current_value": unresolved,
271
+ })
272
+
273
+ # Check 5: Incomplete claims ratio
274
+ if total > 0:
275
+ incomplete = conn.execute(
276
+ "SELECT COUNT(*) FROM claims WHERE status = 'Incomplete'"
277
+ ).fetchone()[0]
278
+ incomplete_rate = incomplete / total
279
+ if incomplete_rate > 0.30:
280
+ alerts.append({
281
+ "monitor": "extraction_quality",
282
+ "message": f"Incomplete claim rate: {incomplete_rate:.1%}. May indicate extraction issues.",
283
+ "severity": "low",
284
+ "current_value": incomplete_rate,
285
+ })
286
+
287
+ # Check 6: Taxonomy coverage
288
+ unmapped = conn.execute("""
289
+ SELECT DISTINCT study_type FROM claims
290
+ WHERE study_type IS NOT NULL
291
+ AND study_type NOT IN ('in_vivo', 'direct_physical_measurement', 'mathematical_proof',
292
+ 'in_vitro', 'first_principles_simulation', 'phenomenological_simulation',
293
+ 'review', 'perspective', 'primary_experimental', 'simulation', 'review_non_systematic')
294
+ """).fetchall()
295
+ if len(unmapped) > 0:
296
+ types = [dict(r)["study_type"] for r in unmapped]
297
+ alerts.append({
298
+ "monitor": "taxonomy_coverage",
299
+ "message": f"Found {len(types)} unmapped study types: {types[:5]}",
300
+ "severity": "low",
301
+ "current_value": len(types),
302
+ })
303
+
304
+ conn.close()
305
+ return alerts
306
+
307
+ def _update_monitor(self, conn, name: str, value: float):
308
+ """Update monitor state with latest value."""
309
+ existing = conn.execute(
310
+ "SELECT trend FROM meta_monitor_state WHERE monitor_name = ?", (name,)
311
+ ).fetchone()
312
+
313
+ if existing:
314
+ trend = json.loads(existing[0] or "[]")
315
+ trend.append({"value": value, "timestamp": now_iso()})
316
+ trend = trend[-50:] # Keep last 50 data points
317
+ conn.execute("""
318
+ UPDATE meta_monitor_state SET last_value = ?, last_checked = ?, trend = ?
319
+ WHERE monitor_name = ?
320
+ """, (value, now_iso(), json.dumps(trend), name))
321
+ else:
322
+ conn.execute("""
323
+ INSERT INTO meta_monitor_state (monitor_name, last_value, last_checked,
324
+ trend, baseline_value, created_at)
325
+ VALUES (?, ?, ?, ?, ?, ?)
326
+ """, (name, value, now_iso(), json.dumps([{"value": value, "timestamp": now_iso()}]),
327
+ value, now_iso()))
328
+ conn.commit()
329
+
330
+
331
+ # ============================================================
332
+ # External Scanner
333
+ # ============================================================
334
+
335
+ class ExternalScanner:
336
+ """
337
+ Scans external sources for improvement ideas.
338
+ Uses the Research OS Brain (API) to search and analyze findings.
339
+
340
+ Sources: arXiv papers, HF Hub models, GitHub repos, social discourse.
341
+ """
342
+
343
+ def __init__(self, db_path: str, brain=None):
344
+ self.db_path = db_path
345
+ self.brain = brain
346
+
347
+ def scan_all_sources(self) -> list:
348
+ """
349
+ Scan all configured external sources for improvement ideas.
350
+ Returns list of scan results with extracted ideas.
351
+ """
352
+ results = []
353
+ conn = get_db(self.db_path)
354
+
355
+ for source_type, config in EXTERNAL_SOURCES.items():
356
+ for query in config["queries"]:
357
+ # Check if we scanned this recently
358
+ recent = conn.execute("""
359
+ SELECT 1 FROM external_scan_results
360
+ WHERE source_type = ? AND query = ?
361
+ AND scanned_at >= datetime('now', ?)
362
+ """, (source_type, query, f"-{config['scan_interval_hours']} hours")).fetchone()
363
+
364
+ if recent:
365
+ continue
366
+
367
+ # Perform the scan (using brain if available, otherwise generate structured placeholder)
368
+ scan_result = self._scan_source(source_type, query)
369
+
370
+ scan_id = gen_id("SCAN")
371
+ conn.execute("""
372
+ INSERT INTO external_scan_results (scan_id, source_type, query,
373
+ results, improvement_ideas, scanned_at)
374
+ VALUES (?, ?, ?, ?, ?, ?)
375
+ """, (scan_id, source_type, query,
376
+ json.dumps(scan_result.get("results", [])),
377
+ json.dumps(scan_result.get("ideas", [])),
378
+ now_iso()))
379
+
380
+ results.append({
381
+ "scan_id": scan_id,
382
+ "source": source_type,
383
+ "query": query,
384
+ "results_count": len(scan_result.get("results", [])),
385
+ "ideas_count": len(scan_result.get("ideas", [])),
386
+ })
387
+
388
+ conn.commit()
389
+ conn.close()
390
+ return results
391
+
392
+ def _scan_source(self, source_type: str, query: str) -> dict:
393
+ """Scan a single source. Returns results and extracted ideas."""
394
+ if self.brain:
395
+ return self._scan_with_brain(source_type, query)
396
+ else:
397
+ return self._generate_scan_template(source_type, query)
398
+
399
+ def _scan_with_brain(self, source_type: str, query: str) -> dict:
400
+ """Use the AI brain to analyze a query and generate improvement ideas."""
401
+ prompt = f"""You are the External Intelligence Scanner for a PhD Research OS.
402
+
403
+ Search context: {source_type}
404
+ Query: "{query}"
405
+
406
+ Based on your knowledge, identify:
407
+ 1. Recent developments (papers, tools, models) relevant to this query
408
+ 2. Specific improvement ideas for a system that:
409
+ - Extracts scientific claims from papers
410
+ - Classifies claims as Fact/Interpretation/Hypothesis/Conflict
411
+ - Scores confidence using evidence × quality × tier × completeness
412
+ - Detects contradictions between claims
413
+ - Uses Qwen2.5-3B fine-tuned with QLoRA
414
+
415
+ Output JSON:
416
+ {{
417
+ "results": [{{"title": "...", "source": "...", "relevance": "high|medium|low", "summary": "..."}}],
418
+ "ideas": [{{"idea": "...", "expected_impact": "...", "effort": "low|medium|high", "category": "model|data|pipeline|taxonomy|architecture"}}]
419
+ }}"""
420
+
421
+ try:
422
+ messages = [
423
+ {"role": "system", "content": "You are a research intelligence scanner. Output valid JSON only."},
424
+ {"role": "user", "content": prompt}
425
+ ]
426
+ if self.brain.backend == "local":
427
+ raw = self.brain._generate_local(messages)
428
+ else:
429
+ raw = self.brain._generate_api(messages)
430
+
431
+ text = raw.strip()
432
+ if text.startswith("```"):
433
+ text = text.split("```")[1]
434
+ if text.startswith("json"):
435
+ text = text[4:]
436
+ text = text.strip()
437
+ return json.loads(text)
438
+ except Exception:
439
+ return self._generate_scan_template(source_type, query)
440
+
441
+ def _generate_scan_template(self, source_type: str, query: str) -> dict:
442
+ """Generate a structured template when no brain is available."""
443
+ return {
444
+ "results": [{
445
+ "title": f"[Placeholder] Scan for: {query}",
446
+ "source": source_type,
447
+ "relevance": "medium",
448
+ "summary": "Brain not configured — manual scan recommended"
449
+ }],
450
+ "ideas": [{
451
+ "idea": f"Investigate: {query}",
452
+ "expected_impact": "Unknown — requires manual evaluation",
453
+ "effort": "medium",
454
+ "category": "research"
455
+ }]
456
+ }
457
+
458
+ def get_unprocessed_ideas(self) -> list:
459
+ """Get all improvement ideas not yet converted to proposals."""
460
+ conn = get_db(self.db_path)
461
+ rows = conn.execute("""
462
+ SELECT scan_id, source_type, query, improvement_ideas, scanned_at
463
+ FROM external_scan_results WHERE processed = 0
464
+ ORDER BY scanned_at DESC
465
+ """).fetchall()
466
+ conn.close()
467
+
468
+ ideas = []
469
+ for row in rows:
470
+ d = dict(row)
471
+ d["improvement_ideas"] = json.loads(d.get("improvement_ideas", "[]"))
472
+ ideas.append(d)
473
+ return ideas
474
+
475
+
476
+ # ============================================================
477
+ # Self-Reflector
478
+ # ============================================================
479
+
480
+ class SelfReflector:
481
+ """
482
+ Analyzes the Meta-Improver's own performance and proposes self-upgrades.
483
+
484
+ Monitors:
485
+ - Which improvement proposals were accepted vs rejected (learn preferences)
486
+ - Which monitoring alerts were actionable vs noise (tune thresholds)
487
+ - Which external scans produced valuable ideas (focus searches)
488
+ - Overall system improvement trajectory (are we getting better?)
489
+ """
490
+
491
+ def __init__(self, db_path: str):
492
+ self.db_path = db_path
493
+
494
+ def reflect(self) -> dict:
495
+ """
496
+ Run a self-reflection cycle.
497
+ Returns findings and self-improvement proposals.
498
+ """
499
+ conn = get_db(self.db_path)
500
+ findings = {}
501
+
502
+ # 1. Proposal acceptance rate
503
+ total_proposals = conn.execute("SELECT COUNT(*) FROM proposals").fetchone()[0]
504
+ approved = conn.execute("SELECT COUNT(*) FROM proposals WHERE status = 'approved'").fetchone()[0]
505
+ rejected = conn.execute("SELECT COUNT(*) FROM proposals WHERE status = 'rejected'").fetchone()[0]
506
+
507
+ if total_proposals > 0:
508
+ acceptance_rate = approved / total_proposals
509
+ findings["proposal_acceptance_rate"] = {
510
+ "total": total_proposals,
511
+ "approved": approved,
512
+ "rejected": rejected,
513
+ "rate": acceptance_rate,
514
+ "insight": (
515
+ "Low acceptance rate — proposals may be too aggressive or poorly targeted"
516
+ if acceptance_rate < 0.3 and total_proposals > 10
517
+ else "Acceptance rate healthy" if acceptance_rate > 0.5
518
+ else "Insufficient data for trend"
519
+ )
520
+ }
521
+
522
+ # 2. Rejection reasons analysis
523
+ rejections = conn.execute("""
524
+ SELECT rejection_reason FROM proposals WHERE status = 'rejected' AND rejection_reason != ''
525
+ """).fetchall()
526
+ if rejections:
527
+ reasons = [dict(r)["rejection_reason"] for r in rejections]
528
+ findings["rejection_patterns"] = {
529
+ "total_rejections": len(reasons),
530
+ "sample_reasons": reasons[:5],
531
+ "insight": "Analyze rejection reasons to avoid proposing similar changes"
532
+ }
533
+
534
+ # 3. External scan yield
535
+ total_scans = conn.execute("SELECT COUNT(*) FROM external_scan_results").fetchone()[0]
536
+ processed = conn.execute("SELECT COUNT(*) FROM external_scan_results WHERE processed = 1").fetchone()[0]
537
+ findings["external_scan_yield"] = {
538
+ "total_scans": total_scans,
539
+ "processed": processed,
540
+ "insight": "Track which source types produce the most actionable ideas"
541
+ }
542
+
543
+ # 4. Improvement trajectory
544
+ improvements = conn.execute("""
545
+ SELECT status, COUNT(*) as cnt FROM improvement_history GROUP BY status
546
+ """).fetchall()
547
+ findings["improvement_trajectory"] = {
548
+ status_counts[0]: status_counts[1]
549
+ for status_counts in [dict(r).values() for r in improvements]
550
+ } if improvements else {"no_data": True}
551
+
552
+ # 5. Self-improvement proposals
553
+ self_proposals = []
554
+
555
+ if findings.get("proposal_acceptance_rate", {}).get("rate", 1.0) < 0.3:
556
+ self_proposals.append({
557
+ "target": "proposal_generation",
558
+ "change": "Increase evidence requirements before generating proposals",
559
+ "reason": "Low acceptance rate suggests proposals are insufficiently grounded"
560
+ })
561
+
562
+ if total_scans > 20 and processed < total_scans * 0.5:
563
+ self_proposals.append({
564
+ "target": "external_scanning",
565
+ "change": "Reduce scan frequency or focus on higher-yield source types",
566
+ "reason": "Many scans unprocessed — scanning faster than consumption"
567
+ })
568
+
569
+ # Log reflection
570
+ reflection_id = gen_id("REFL")
571
+ conn.execute("""
572
+ INSERT INTO self_reflection_log (reflection_id, trigger, findings,
573
+ self_improvement_proposal, confidence, created_at)
574
+ VALUES (?, ?, ?, ?, ?, ?)
575
+ """, (reflection_id, "scheduled_reflection", json.dumps(findings),
576
+ json.dumps(self_proposals), 0.6, now_iso()))
577
+ conn.commit()
578
+ conn.close()
579
+
580
+ return {
581
+ "reflection_id": reflection_id,
582
+ "findings": findings,
583
+ "self_improvement_proposals": self_proposals,
584
+ }
585
+
586
+
587
+ # ============================================================
588
+ # Meta-Improver — The Unified Intelligence
589
+ # ============================================================
590
+
591
+ class MetaImprover:
592
+ """
593
+ The Meta-Improver AI: continuously monitors, scans, reflects, and proposes improvements.
594
+
595
+ This is permanently baked into the companion AI system. It:
596
+ 1. Runs internal quality monitors on every check cycle
597
+ 2. Scans external sources weekly for new papers, models, tools
598
+ 3. Reflects on its own performance monthly
599
+ 4. Produces ranked improvement proposals for human review
600
+ 5. Tracks what improvements actually worked (closed-loop learning)
601
+
602
+ All improvements go through the ECC Harness proposal system.
603
+ The Meta-Improver NEVER self-modifies without human approval.
604
+ """
605
+
606
+ def __init__(self, db_path: str = None, brain=None):
607
+ self.db_path = db_path or os.environ.get("RESEARCH_OS_DB", "data/research_os.db")
608
+ init_meta_improver_db(self.db_path)
609
+ self.brain = brain
610
+ self.monitor = InternalMonitor(self.db_path)
611
+ self.scanner = ExternalScanner(self.db_path, brain)
612
+ self.reflector = SelfReflector(self.db_path)
613
+
614
+ def run_improvement_cycle(self) -> dict:
615
+ """
616
+ Run a full improvement cycle:
617
+ 1. Internal monitoring → alerts
618
+ 2. External scanning → ideas
619
+ 3. Self-reflection → meta-proposals
620
+ 4. Synthesis → ranked improvement proposals
621
+
622
+ Returns a comprehensive report.
623
+ """
624
+ report = {
625
+ "timestamp": now_iso(),
626
+ "version": META_IMPROVER_VERSION,
627
+ "alerts": [],
628
+ "scan_results": [],
629
+ "reflection": {},
630
+ "improvement_proposals": [],
631
+ }
632
+
633
+ # Phase 1: Internal monitoring
634
+ print("MetaImprover: Running internal monitors...")
635
+ report["alerts"] = self.monitor.run_all_checks()
636
+
637
+ # Phase 2: External scanning
638
+ print("MetaImprover: Scanning external sources...")
639
+ report["scan_results"] = self.scanner.scan_all_sources()
640
+
641
+ # Phase 3: Self-reflection
642
+ print("MetaImprover: Running self-reflection...")
643
+ report["reflection"] = self.reflector.reflect()
644
+
645
+ # Phase 4: Synthesize improvement proposals
646
+ print("MetaImprover: Synthesizing proposals...")
647
+ report["improvement_proposals"] = self._synthesize_proposals(report)
648
+
649
+ return report
650
+
651
+ def _synthesize_proposals(self, report: dict) -> list:
652
+ """
653
+ Synthesize findings from all sources into ranked improvement proposals.
654
+ """
655
+ proposals = []
656
+ conn = get_db(self.db_path)
657
+
658
+ # From alerts → improvement proposals
659
+ for alert in report.get("alerts", []):
660
+ if alert.get("severity") in ["high", "medium"]:
661
+ imp_id = gen_id("IMP")
662
+ proposals.append({
663
+ "improvement_id": imp_id,
664
+ "category": "internal_monitor",
665
+ "source": alert["monitor"],
666
+ "description": f"Address: {alert['message']}",
667
+ "priority": "high" if alert["severity"] == "high" else "medium",
668
+ "suggested_action": self._suggest_action_for_alert(alert),
669
+ })
670
+ conn.execute("""
671
+ INSERT INTO improvement_history (improvement_id, category, description, created_at)
672
+ VALUES (?, 'internal_monitor', ?, ?)
673
+ """, (imp_id, alert["message"], now_iso()))
674
+
675
+ # From external scans → improvement proposals
676
+ unprocessed = self.scanner.get_unprocessed_ideas()
677
+ for scan in unprocessed[:5]: # Process top 5
678
+ for idea in scan.get("improvement_ideas", [])[:3]: # Top 3 ideas per scan
679
+ if isinstance(idea, dict):
680
+ imp_id = gen_id("IMP")
681
+ proposals.append({
682
+ "improvement_id": imp_id,
683
+ "category": "external_scan",
684
+ "source": f"{scan.get('source_type', 'unknown')}: {scan.get('query', '')}",
685
+ "description": idea.get("idea", ""),
686
+ "priority": "medium",
687
+ "expected_impact": idea.get("expected_impact", "unknown"),
688
+ "effort": idea.get("effort", "medium"),
689
+ })
690
+
691
+ # From self-reflection → meta-proposals
692
+ for sp in report.get("reflection", {}).get("self_improvement_proposals", []):
693
+ imp_id = gen_id("IMP")
694
+ proposals.append({
695
+ "improvement_id": imp_id,
696
+ "category": "self_reflection",
697
+ "source": "meta_improver_self_analysis",
698
+ "description": f"Self-improve: {sp.get('change', '')} (reason: {sp.get('reason', '')})",
699
+ "priority": "low",
700
+ "target": sp.get("target", "unknown"),
701
+ })
702
+
703
+ conn.commit()
704
+ conn.close()
705
+
706
+ # Rank by priority
707
+ priority_order = {"high": 0, "medium": 1, "low": 2}
708
+ proposals.sort(key=lambda x: priority_order.get(x.get("priority", "low"), 99))
709
+
710
+ return proposals
711
+
712
+ def _suggest_action_for_alert(self, alert: dict) -> str:
713
+ """Suggest a concrete action based on an internal monitoring alert."""
714
+ actions = {
715
+ "extraction_quality": "Run evaluation harness against golden dataset. If degraded, check recent prompt changes and revert if needed.",
716
+ "confidence_calibration": "Compute Brier score from calibration_log. If overconfident, reduce study_quality_weights by 5%.",
717
+ "conflict_detection_rate": "Review last 20 detected conflicts manually. If >30% false positives, tighten keyword overlap threshold.",
718
+ "api_cost_efficiency": "Check which task types consume most tokens. Enable semantic caching for repeat queries.",
719
+ "prompt_drift": "Run regression gate. Compare current metrics to Phase 2 baseline. Revert prompt if degraded.",
720
+ "taxonomy_coverage": "Add unmapped study types to domain taxonomy or create alias mappings.",
721
+ "companion_agent_health": "Check failed tasks for common errors. Increase iteration budgets if tasks are timing out.",
722
+ }
723
+ return actions.get(alert.get("monitor", ""), "Investigate the alert and determine appropriate response.")
724
+
725
+ def get_improvement_history(self, limit: int = 20) -> list:
726
+ """Get improvement history with status."""
727
+ conn = get_db(self.db_path)
728
+ rows = conn.execute("""
729
+ SELECT * FROM improvement_history ORDER BY created_at DESC LIMIT ?
730
+ """, (limit,)).fetchall()
731
+ conn.close()
732
+ return [dict(r) for r in rows]
733
+
734
+ def get_self_reflections(self, limit: int = 10) -> list:
735
+ """Get self-reflection history."""
736
+ conn = get_db(self.db_path)
737
+ rows = conn.execute("""
738
+ SELECT * FROM self_reflection_log ORDER BY created_at DESC LIMIT ?
739
+ """, (limit,)).fetchall()
740
+ conn.close()
741
+ results = []
742
+ for r in rows:
743
+ d = dict(r)
744
+ d["findings"] = json.loads(d.get("findings", "{}"))
745
+ d["self_improvement_proposal"] = json.loads(d.get("self_improvement_proposal", "[]"))
746
+ results.append(d)
747
+ return results
748
+
749
+ def mark_improvement_applied(self, improvement_id: str, proposal_id: str = None):
750
+ """Mark an improvement as applied (with optional link to proposal)."""
751
+ conn = get_db(self.db_path)
752
+ conn.execute("""
753
+ UPDATE improvement_history SET status = 'applied', proposal_id = ?, applied_at = ?
754
+ WHERE improvement_id = ?
755
+ """, (proposal_id, now_iso(), improvement_id))
756
+ conn.commit()
757
+ conn.close()
758
+
759
+ def mark_improvement_measured(self, improvement_id: str, impact: dict):
760
+ """Record the measured impact of an applied improvement."""
761
+ conn = get_db(self.db_path)
762
+ conn.execute("""
763
+ UPDATE improvement_history SET status = 'measured', impact_measured = ?, measured_at = ?
764
+ WHERE improvement_id = ?
765
+ """, (json.dumps(impact), now_iso(), improvement_id))
766
+ conn.commit()
767
+ conn.close()