Add ECC Harness: phd_research_os/agent_os.py

Browse files

Files changed (1) hide show

phd_research_os/agent_os.py +1051 -0

phd_research_os/agent_os.py ADDED Viewed

	@@ -0,0 +1,1051 @@

+"""
+PhD Research OS — ECC Harness Orchestrator (agent_os.py)
+=========================================================
+The meta-system for spawning, managing, and auditing companion AI agents
+that improve the core Research OS brain.
+Implements the ECC Harness: Principal Architect Edition (V-SINGULARITY)
+  - §0: Global Objective Function (correctness > blast radius > simplicity > NFRs > no-op)
+  - §1: Pre-Flight (context loading, knowledge boundaries, assumption logging)
+  - §2: Planning (obviousness test, reversibility, idempotence, confidence signaling)
+  - §3: Execution (JIT verification, cognitive budget, failure modes, scope containment)
+  - §4: Post-Flight (validation, pedagogical handoff, definition of done, meta-learning)
+WAKE-UP ROUTINE: Before any task, this module reads ARCHITECTURE.md and AGENTS.md
+to ground itself in the project map. This is non-negotiable.
+"""
+import json
+import os
+import sqlite3
+import uuid
+import time
+from datetime import datetime, timezone
+from typing import Optional, Callable
+from dataclasses import dataclass, field, asdict
+from pathlib import Path
+from enum import Enum
+from .db import get_db, init_db, now_iso, gen_id, to_fixed, from_fixed
+# ============================================================
+# ECC Lifecycle States
+# ============================================================
+class AgentState(Enum):
+    """ECC Harness lifecycle states for companion agents."""
+    SPAWNED = "spawned"           # Created, not yet active
+    PREFLIGHT = "preflight"       # §1: Loading context, validating assumptions
+    PLANNING = "planning"         # §2: Building execution plan
+    EXECUTING = "executing"       # §3: Running bounded task
+    POSTFLIGHT = "postflight"     # §4: Validating results, logging decisions
+    COMPLETED = "completed"       # Task done successfully
+    HALTED = "halted"             # Kill heuristic triggered or error
+    RETIRED = "retired"           # Agent decommissioned
+class ProposalStatus(Enum):
+    PROPOSED = "proposed"
+    APPROVED = "approved"
+    REJECTED = "rejected"
+    APPLIED = "applied"
+class RiskLevel(Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+# ============================================================
+# Data Structures
+# ============================================================
+@dataclass
+class Proposal:
+    """
+    A companion agent's proposed change to the Research OS.
+    ALL companion output goes through proposals — never direct modification.
+    """
+    proposal_id: str
+    agent_id: str
+    proposal_type: str  # prompt_change, training_data, confidence_adjustment, new_claim, architecture_change
+    description: str
+    changes: dict
+    evidence: str
+    estimated_impact: dict  # {"metric": str, "expected_delta": float}
+    risk_assessment: str  # low, medium, high
+    reversible: bool
+    status: str = "proposed"
+    created_at: str = ""
+    reviewed_at: str = ""
+    reviewed_by: str = ""
+    rejection_reason: str = ""
+    def to_dict(self):
+        return asdict(self)
+    def to_json(self):
+        return json.dumps(self.to_dict(), indent=2)
+@dataclass
+class AuditEntry:
+    """Immutable audit log entry. Every agent action is recorded."""
+    entry_id: str
+    agent_id: str
+    phase: str          # preflight, planning, executing, postflight
+    action: str         # what was done
+    details: str        # specifics
+    confidence: float   # agent's self-assessed confidence [0,1]
+    timestamp: str
+    deviation: str = "" # if deviating from rules, document why
+@dataclass
+class AgentTask:
+    """A bounded task assigned to a companion agent."""
+    task_id: str
+    agent_id: str
+    description: str
+    state: str = "preflight"
+    plan: str = ""              # JSON execution plan
+    result: str = ""            # JSON result
+    iterations_used: int = 0
+    max_iterations: int = 3     # §3 iteration budget
+    time_budget_s: int = 3600   # default 1 hour
+    started_at: str = ""
+    completed_at: str = ""
+    kill_reason: str = ""
+# ============================================================
+# Database Extension (adds companion agent tables)
+# ============================================================
+def init_agent_os_db(db_path: str = None):
+    """Extend the Research OS database with companion agent tables."""
+    # First ensure base tables exist
+    init_db(db_path)
+    conn = get_db(db_path)
+    conn.executescript("""
+    CREATE TABLE IF NOT EXISTS companion_agents (
+        agent_id TEXT PRIMARY KEY,
+        agent_type TEXT NOT NULL,
+        purpose TEXT NOT NULL,
+        system_prompt TEXT NOT NULL,
+        state TEXT NOT NULL DEFAULT 'spawned',
+        config TEXT,                  -- JSON: model, temperature, etc.
+        created_at TEXT NOT NULL,
+        retired_at TEXT,
+        total_tasks_completed INTEGER DEFAULT 0,
+        total_proposals_made INTEGER DEFAULT 0,
+        schema_version TEXT NOT NULL DEFAULT '1.0'
+    );
+    CREATE TABLE IF NOT EXISTS agent_tasks (
+        task_id TEXT PRIMARY KEY,
+        agent_id TEXT NOT NULL,
+        description TEXT NOT NULL,
+        state TEXT NOT NULL DEFAULT 'preflight',
+        plan TEXT,                    -- JSON execution plan
+        result TEXT,                  -- JSON result
+        iterations_used INTEGER DEFAULT 0,
+        max_iterations INTEGER DEFAULT 3,
+        time_budget_s INTEGER DEFAULT 3600,
+        started_at TEXT,
+        completed_at TEXT,
+        kill_reason TEXT,
+        schema_version TEXT NOT NULL DEFAULT '1.0',
+        FOREIGN KEY(agent_id) REFERENCES companion_agents(agent_id)
+    );
+    CREATE TABLE IF NOT EXISTS proposals (
+        proposal_id TEXT PRIMARY KEY,
+        agent_id TEXT NOT NULL,
+        task_id TEXT,
+        proposal_type TEXT NOT NULL,
+        description TEXT NOT NULL,
+        changes TEXT NOT NULL,         -- JSON
+        evidence TEXT,
+        estimated_impact TEXT,         -- JSON
+        risk_assessment TEXT DEFAULT 'low',
+        reversible INTEGER DEFAULT 1,
+        status TEXT DEFAULT 'proposed',
+        created_at TEXT NOT NULL,
+        reviewed_at TEXT,
+        reviewed_by TEXT,
+        rejection_reason TEXT,
+        schema_version TEXT NOT NULL DEFAULT '1.0',
+        FOREIGN KEY(agent_id) REFERENCES companion_agents(agent_id),
+        FOREIGN KEY(task_id) REFERENCES agent_tasks(task_id)
+    );
+    CREATE TABLE IF NOT EXISTS agent_audit_log (
+        entry_id TEXT PRIMARY KEY,
+        agent_id TEXT NOT NULL,
+        task_id TEXT,
+        phase TEXT NOT NULL,
+        action TEXT NOT NULL,
+        details TEXT,
+        confidence REAL,
+        deviation TEXT,
+        timestamp TEXT NOT NULL,
+        FOREIGN KEY(agent_id) REFERENCES companion_agents(agent_id)
+    );
+    CREATE TABLE IF NOT EXISTS harness_evolution (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        rule_section TEXT NOT NULL,
+        amendment TEXT NOT NULL,
+        reason TEXT NOT NULL,
+        proposed_by TEXT NOT NULL,
+        timestamp TEXT NOT NULL,
+        approved INTEGER DEFAULT 0
+    );
+    CREATE TABLE IF NOT EXISTS memory_store (
+        key TEXT PRIMARY KEY,
+        value TEXT NOT NULL,
+        last_validated TEXT NOT NULL,
+        category TEXT DEFAULT 'assumption'
+    );
+    """)
+    conn.commit()
+    conn.close()
+# ============================================================
+# Companion Agent Definition
+# ============================================================
+# Pre-built companion types with their system prompts
+COMPANION_TYPES = {
+    "DataQualityAuditor": {
+        "purpose": "Audit claim extraction quality, detect drift, flag hallucination patterns",
+        "system_prompt": """You are a Data Quality Auditor for a PhD Research OS. Your job is to:
+1. Compare extracted claims against source text to detect hallucinations
+2. Monitor extraction quality metrics over time for drift
+3. Flag claims with suspicious confidence scores (too high for weak evidence)
+4. Propose corrections as Proposal objects — NEVER modify data directly
+Output JSON proposals: {"proposal_type": "confidence_adjustment", "changes": {...}, "evidence": "..."}
+You operate at Provenance Level 5. All your findings require human verification."""
+    },
+    "PromptOptimizer": {
+        "purpose": "Improve system prompts via evaluation against golden dataset",
+        "system_prompt": """You are a Prompt Optimizer for a PhD Research OS. Your job is to:
+1. Analyze current extraction/classification prompts and their metrics
+2. Propose specific prompt modifications with expected impact
+3. Design A/B test criteria for prompt changes
+4. Ensure any change is regression-tested before deployment
+Output JSON proposals: {"proposal_type": "prompt_change", "changes": {"prompt_name": "...", "old": "...", "new": "..."}, "evidence": "..."}
+CRITICAL: Every prompt change MUST pass the regression gate (recall ≥70%, hallucination ≤10%, epistemic accuracy ≥60%)."""
+    },
+    "DomainExpander": {
+        "purpose": "Generate training examples for new STEM domains",
+        "system_prompt": """You are a Domain Expander for a PhD Research OS. Your job is to:
+1. Identify STEM domains not well-covered by current training data
+2. Generate high-quality synthetic training examples in TRL conversational format
+3. Include realistic claim extraction, epistemic tagging, and confidence scoring examples
+4. Ensure examples follow the exact JSON schema used by the core system
+Output JSON proposals: {"proposal_type": "training_data", "changes": {"examples": [...]}, "evidence": "..."}
+Quality requirement: All generated JSON must be valid. Include diverse epistemic tags and study types."""
+    },
+    "CalibrationAnalyst": {
+        "purpose": "Analyze confidence calibration and recommend scoring adjustments",
+        "system_prompt": """You are a Calibration Analyst for a PhD Research OS. Your job is to:
+1. Analyze the calibration_log table for systematic over/under-confidence
+2. Compute Brier scores when sufficient data exists (≥50 data points)
+3. Propose adjustments to study_quality_weight or journal_tier_weight values
+4. Flag specific claim categories where confidence is poorly calibrated
+Output JSON proposals: {"proposal_type": "confidence_adjustment", "changes": {"parameter": "...", "old_value": N, "new_value": N}, "evidence": "Brier score analysis..."}
+Use fixed-point math (×1000) for all proposed values."""
+    },
+    "CitationChaser": {
+        "purpose": "Find papers that cite or contradict current claims in the knowledge base",
+        "system_prompt": """You are a Citation Chaser for a PhD Research OS. Your job is to:
+1. Identify high-impact claims that may have newer supporting or contradicting evidence
+2. Propose new papers for ingestion based on citation chains
+3. Flag claims whose source papers have been retracted or corrected
+4. Suggest claims that need confidence updates based on new evidence
+Output JSON proposals: {"proposal_type": "new_claim", "changes": {"suggested_papers": [...], "reason": "..."}, "evidence": "..."}
+All suggestions are proposals. You cannot add papers to the database directly."""
+    }
+}
+# ============================================================
+# The Agent OS — ECC Harness Orchestrator
+# ============================================================
+class AgentOS:
+    """
+    The meta-system for creating and managing companion AI agents.
+    Implements the full ECC Harness lifecycle:
+      spawn → preflight → plan → execute → postflight → retire
+    Every companion agent:
+    - Reads ARCHITECTURE.md and AGENTS.md before acting (Wake-Up Routine)
+    - Cannot directly modify the Research OS database
+    - Produces Proposals that require human approval
+    - Has bounded iteration budgets (Kill Heuristic)
+    - Logs every action to the audit trail
+    Usage:
+        os = AgentOS()
+        agent = os.spawn_companion("DataQualityAuditor")
+        task = os.assign_task(agent, "Audit last 50 claims for hallucination patterns")
+        os.run_task(task)  # Executes full ECC lifecycle
+        proposals = os.get_proposals(agent)  # Review what the agent found
+        os.approve_proposal(proposals[0])  # Human approves
+    """
+    def __init__(self, db_path: str = None, brain=None):
+        self.db_path = db_path or os.environ.get("RESEARCH_OS_DB", "data/research_os.db")
+        init_agent_os_db(self.db_path)
+        self.brain = brain  # ResearchOSBrain instance for API calls
+        self._architecture = None
+        self._agents_doc = None
+    # ============================================================
+    # §0: Wake-Up Routine — ALWAYS read the map first
+    # ============================================================
+    def _wake_up(self) -> dict:
+        """
+        CRITICAL: Read ARCHITECTURE.md and AGENTS.md before any operation.
+        This is the ground truth for file locations and contracts.
+        """
+        context = {}
+        # Find and read architecture docs
+        for doc_name in ["ARCHITECTURE.md", "AGENTS.md"]:
+            for search_dir in [
+                Path(__file__).parent,
+                Path(__file__).parent.parent,
+                Path.cwd(),
+            ]:
+                doc_path = search_dir / doc_name
+                if doc_path.exists():
+                    context[doc_name] = doc_path.read_text()
+                    break
+            else:
+                context[doc_name] = f"[WARNING: {doc_name} not found — operating without map]"
+        self._architecture = context.get("ARCHITECTURE.md", "")
+        self._agents_doc = context.get("AGENTS.md", "")
+        return context
+    # ============================================================
+    # Spawn: Create a new companion agent
+    # ============================================================
+    def spawn_companion(self, agent_type: str, purpose: str = None,
+                        system_prompt: str = None, config: dict = None) -> str:
+        """
+        Spawn a new companion AI agent.
+        Args:
+            agent_type: One of COMPANION_TYPES keys, or "custom"
+            purpose: Override default purpose (required for "custom")
+            system_prompt: Override default prompt (required for "custom")
+            config: Optional config dict (model, temperature, etc.)
+        Returns:
+            agent_id: Unique identifier for the companion agent
+        """
+        # Wake up first
+        self._wake_up()
+        # Resolve agent definition
+        if agent_type in COMPANION_TYPES:
+            defn = COMPANION_TYPES[agent_type]
+            purpose = purpose or defn["purpose"]
+            system_prompt = system_prompt or defn["system_prompt"]
+        elif agent_type == "custom":
+            if not purpose or not system_prompt:
+                raise ValueError("Custom agents require both 'purpose' and 'system_prompt'")
+        else:
+            raise ValueError(f"Unknown agent type: {agent_type}. "
+                           f"Available: {list(COMPANION_TYPES.keys())} + 'custom'")
+        agent_id = gen_id("COMP")
+        conn = get_db(self.db_path)
+        conn.execute("""
+            INSERT INTO companion_agents (agent_id, agent_type, purpose, system_prompt,
+                state, config, created_at, schema_version)
+            VALUES (?, ?, ?, ?, 'spawned', ?, ?, '1.0')
+        """, (agent_id, agent_type, purpose, system_prompt,
+              json.dumps(config or {}), now_iso()))
+        conn.commit()
+        conn.close()
+        self._audit(agent_id, None, "spawn", "Agent created",
+                   f"type={agent_type}, purpose={purpose[:100]}")
+        return agent_id
+    # ============================================================
+    # Task Assignment & Lifecycle
+    # ============================================================
+    def assign_task(self, agent_id: str, description: str,
+                    max_iterations: int = 3, time_budget_s: int = 3600) -> str:
+        """
+        Assign a bounded task to a companion agent.
+        Args:
+            agent_id: The companion agent to assign to
+            description: What the agent should do
+            max_iterations: Max retry loops (§3 iteration budget)
+            time_budget_s: Max time in seconds (Kill Heuristic)
+        Returns:
+            task_id: Unique identifier for this task
+        """
+        task_id = gen_id("TASK")
+        conn = get_db(self.db_path)
+        conn.execute("""
+            INSERT INTO agent_tasks (task_id, agent_id, description, state,
+                max_iterations, time_budget_s, started_at, schema_version)
+            VALUES (?, ?, ?, 'preflight', ?, ?, ?, '1.0')
+        """, (task_id, agent_id, description, max_iterations, time_budget_s, now_iso()))
+        conn.commit()
+        conn.close()
+        self._audit(agent_id, task_id, "preflight", "Task assigned", description)
+        return task_id
+    def run_task(self, task_id: str) -> dict:
+        """
+        Execute the full ECC lifecycle for a task.
+        Lifecycle: preflight → planning → executing → postflight → completed/halted
+        Returns dict with task result and any proposals generated.
+        """
+        conn = get_db(self.db_path)
+        task_row = conn.execute("SELECT * FROM agent_tasks WHERE task_id = ?",
+                               (task_id,)).fetchone()
+        if not task_row:
+            conn.close()
+            raise ValueError(f"Task {task_id} not found")
+        task = dict(task_row)
+        agent_row = conn.execute("SELECT * FROM companion_agents WHERE agent_id = ?",
+                                (task["agent_id"],)).fetchone()
+        if not agent_row:
+            conn.close()
+            raise ValueError(f"Agent {task['agent_id']} not found")
+        agent = dict(agent_row)
+        conn.close()
+        start_time = time.time()
+        result = {"task_id": task_id, "proposals": [], "status": "unknown", "audit": []}
+        try:
+            # §1: PRE-FLIGHT
+            self._update_task_state(task_id, "preflight")
+            preflight_ok = self._preflight(task, agent)
+            if not preflight_ok:
+                self._halt_task(task_id, "Preflight checks failed")
+                result["status"] = "halted"
+                return result
+            # §2: PLANNING
+            self._update_task_state(task_id, "planning")
+            plan = self._plan(task, agent)
+            # The Obviousness Test (§2): Is there a simple direct solution?
+            if plan.get("obvious_solution"):
+                self._audit(task["agent_id"], task_id, "planning",
+                           "Obviousness test passed", plan["obvious_solution"])
+            # §3: EXECUTION (with iteration budget)
+            self._update_task_state(task_id, "executing")
+            proposals = self._execute(task, agent, plan, start_time)
+            result["proposals"] = proposals
+            # §4: POST-FLIGHT
+            self._update_task_state(task_id, "postflight")
+            postflight_result = self._postflight(task, agent, proposals)
+            result["validation"] = postflight_result
+            # Mark completed
+            self._update_task_state(task_id, "completed")
+            result["status"] = "completed"
+            # Update agent stats
+            conn = get_db(self.db_path)
+            conn.execute("""
+                UPDATE companion_agents
+                SET total_tasks_completed = total_tasks_completed + 1,
+                    total_proposals_made = total_proposals_made + ?
+                WHERE agent_id = ?
+            """, (len(proposals), task["agent_id"]))
+            conn.commit()
+            conn.close()
+        except Exception as e:
+            self._halt_task(task_id, f"Execution error: {str(e)}")
+            result["status"] = "halted"
+            result["error"] = str(e)
+        return result
+    # ============================================================
+    # §1: Pre-Flight Implementation
+    # ============================================================
+    def _preflight(self, task: dict, agent: dict) -> bool:
+        """
+        ECC §1: Context loading, reality validation, assumption logging.
+        Checks:
+        - Architecture docs loaded (Wake-Up Routine)
+        - Database is accessible
+        - Agent is not retired
+        - Task description is non-empty
+        """
+        # Wake-Up: Read architecture docs
+        context = self._wake_up()
+        checks = []
+        # Check architecture docs loaded
+        checks.append(("ARCHITECTURE.md loaded", "WARNING" not in context.get("ARCHITECTURE.md", "WARNING")))
+        checks.append(("AGENTS.md loaded", "WARNING" not in context.get("AGENTS.md", "WARNING")))
+        # Check DB accessible
+        try:
+            conn = get_db(self.db_path)
+            conn.execute("SELECT 1").fetchone()
+            conn.close()
+            checks.append(("Database accessible", True))
+        except Exception:
+            checks.append(("Database accessible", False))
+        # Check agent state
+        checks.append(("Agent not retired", agent["state"] != "retired"))
+        # Check task has content
+        checks.append(("Task description non-empty", bool(task.get("description", "").strip())))
+        # Log all checks
+        all_passed = all(passed for _, passed in checks)
+        details = json.dumps([{"check": name, "passed": passed} for name, passed in checks])
+        self._audit(task["agent_id"], task["task_id"], "preflight",
+                   "Preflight checks" + (" PASSED" if all_passed else " FAILED"), details)
+        return all_passed
+    # ============================================================
+    # §2: Planning Implementation
+    # ============================================================
+    def _plan(self, task: dict, agent: dict) -> dict:
+        """
+        ECC §2: Build execution plan.
+        Includes:
+        - Obviousness test
+        - Reversibility classification
+        - Idempotence verification
+        - Confidence assessment
+        """
+        plan = {
+            "task_description": task["description"],
+            "agent_type": agent["agent_type"],
+            "steps": [],
+            "obvious_solution": None,
+            "reversible": True,
+            "confidence": 0.5,
+        }
+        # Obviousness Test: Can we solve this without a complex plan?
+        simple_tasks = ["audit", "check", "list", "count", "summarize"]
+        if any(word in task["description"].lower() for word in simple_tasks):
+            plan["obvious_solution"] = "Direct query against database — no complex planning needed"
+            plan["confidence"] = 0.8
+        # Build step list based on agent type
+        if agent["agent_type"] == "DataQualityAuditor":
+            plan["steps"] = [
+                "Query recent claims from database",
+                "Check each claim's evidence_strength vs epistemic_tag consistency",
+                "Flag claims where confidence > 0.8 but evidence is indirect",
+                "Generate proposals for flagged claims",
+            ]
+        elif agent["agent_type"] == "PromptOptimizer":
+            plan["steps"] = [
+                "Load current prompts from AGENTS.md",
+                "Run baseline evaluation against golden dataset",
+                "Identify weakest-performing task (lowest metric)",
+                "Generate 2-3 prompt variants",
+                "Propose A/B test with regression gate",
+            ]
+        elif agent["agent_type"] == "DomainExpander":
+            plan["steps"] = [
+                "Analyze current training data domain coverage",
+                "Identify underrepresented STEM fields",
+                "Generate 50-100 synthetic examples per field",
+                "Validate all examples produce valid JSON",
+                "Propose training data addition",
+            ]
+        elif agent["agent_type"] == "CalibrationAnalyst":
+            plan["steps"] = [
+                "Query calibration_log for all entries",
+                "Compute Brier scores per claim category",
+                "Identify systematic miscalibration patterns",
+                "Propose weight adjustments with evidence",
+            ]
+        elif agent["agent_type"] == "CitationChaser":
+            plan["steps"] = [
+                "Identify high-confidence canonical claims",
+                "Search for recent papers citing the same DOIs",
+                "Flag any new contradicting evidence",
+                "Propose new papers for ingestion",
+            ]
+        else:
+            # Custom agent — generic plan
+            plan["steps"] = [
+                "Analyze task requirements",
+                "Gather relevant data from database",
+                "Generate proposals based on findings",
+                "Self-validate proposals for consistency",
+            ]
+        # Save plan to DB
+        conn = get_db(self.db_path)
+        conn.execute("UPDATE agent_tasks SET plan = ? WHERE task_id = ?",
+                    (json.dumps(plan), task["task_id"]))
+        conn.commit()
+        conn.close()
+        self._audit(task["agent_id"], task["task_id"], "planning",
+                   "Plan created", f"{len(plan['steps'])} steps, confidence={plan['confidence']}")
+        return plan
+    # ============================================================
+    # §3: Execution Implementation
+    # ============================================================
+    def _execute(self, task: dict, agent: dict, plan: dict,
+                 start_time: float) -> list:
+        """
+        ECC §3: Bounded execution with iteration budget and kill heuristic.
+        Returns list of Proposal objects.
+        """
+        proposals = []
+        iteration = 0
+        max_iter = task.get("max_iterations", 3)
+        time_budget = task.get("time_budget_s", 3600)
+        while iteration < max_iter:
+            iteration += 1
+            # Kill Heuristic: check time budget
+            elapsed = time.time() - start_time
+            if elapsed > time_budget * 1.5:  # 50% over budget = HALT
+                self._audit(task["agent_id"], task["task_id"], "executing",
+                           "KILL HEURISTIC TRIGGERED",
+                           f"Elapsed {elapsed:.0f}s > budget {time_budget}s × 1.5")
+                break
+            # JIT State Verification (§3): Check DB hasn't been modified externally
+            # (In a full system, this would check file hashes / row versions)
+            # Execute based on agent type using the brain
+            if self.brain:
+                # Use the AI brain to execute the agent's task
+                messages = [
+                    {"role": "system", "content": agent["system_prompt"]},
+                    {"role": "user", "content": self._build_execution_prompt(task, plan, iteration)}
+                ]
+                try:
+                    if self.brain.backend == "local":
+                        raw = self.brain._generate_local(messages)
+                    else:
+                        raw = self.brain._generate_api(messages)
+                    # Parse proposals from response
+                    parsed = self._parse_proposals(raw, task["agent_id"], task["task_id"])
+                    proposals.extend(parsed)
+                    self._audit(task["agent_id"], task["task_id"], "executing",
+                               f"Iteration {iteration}: generated {len(parsed)} proposals",
+                               f"Total proposals: {len(proposals)}")
+                    # If we got results, we can stop iterating
+                    if parsed:
+                        break
+                except Exception as e:
+                    self._audit(task["agent_id"], task["task_id"], "executing",
+                               f"Iteration {iteration}: error", str(e))
+                    if iteration >= max_iter:
+                        break
+                    # Otherwise retry
+            else:
+                # No brain available — generate placeholder proposals from plan
+                self._audit(task["agent_id"], task["task_id"], "executing",
+                           "No brain configured",
+                           "Generating structural proposals without AI inference")
+                proposal = self._create_proposal(
+                    task["agent_id"], task["task_id"],
+                    proposal_type="architecture_change",
+                    description=f"[Placeholder] Agent {agent['agent_type']}: {task['description']}",
+                    changes={"note": "Brain not configured — this is a structural placeholder"},
+                    evidence="Companion agent spawned but requires API key or local model",
+                    estimated_impact={"metric": "system_coverage", "expected_delta": 0.0},
+                    risk="low",
+                    reversible=True,
+                )
+                proposals.append(proposal)
+                break
+        # Update iteration count
+        conn = get_db(self.db_path)
+        conn.execute("UPDATE agent_tasks SET iterations_used = ? WHERE task_id = ?",
+                    (iteration, task["task_id"]))
+        conn.commit()
+        conn.close()
+        return proposals
+    def _build_execution_prompt(self, task: dict, plan: dict, iteration: int) -> str:
+        """Build the user prompt for the agent's execution phase."""
+        # Gather relevant DB context
+        conn = get_db(self.db_path)
+        claim_count = conn.execute("SELECT COUNT(*) FROM claims").fetchone()[0]
+        conflict_count = conn.execute("SELECT COUNT(*) FROM conflicts WHERE resolution_status = 'Unresolved'").fetchone()[0]
+        # Get sample claims for context
+        recent_claims = conn.execute(
+            "SELECT claim_id, text, epistemic_tag, confidence FROM claims ORDER BY created_at DESC LIMIT 10"
+        ).fetchall()
+        conn.close()
+        claims_context = "\n".join(
+            f"  - [{dict(c)['claim_id']}] ({dict(c)['epistemic_tag']}, conf={from_fixed(dict(c)['confidence']):.3f}): {dict(c)['text'][:100]}..."
+            for c in recent_claims
+        )
+        return f"""TASK: {task['description']}
+ITERATION: {iteration}
+PLAN STEPS: {json.dumps(plan.get('steps', []))}
+CURRENT DATABASE STATE:
+- Total claims: {claim_count}
+- Unresolved conflicts: {conflict_count}
+- Recent claims:
+{claims_context}
+Based on your role and the above context, execute your task and output your findings
+as JSON proposals. Each proposal must include: proposal_type, description, changes, evidence,
+estimated_impact, risk_assessment, and reversible flag."""
+    # ============================================================
+    # §4: Post-Flight Implementation
+    # ============================================================
+    def _postflight(self, task: dict, agent: dict, proposals: list) -> dict:
+        """
+        ECC §4: Validate results, check definition of done, log meta-learning.
+        """
+        validation = {
+            "proposals_count": len(proposals),
+            "all_valid_json": True,
+            "invariants_preserved": True,
+            "expert_intuition_check": "pending_human_review",
+            "definition_of_done": {
+                "aligns_with_intent": True,
+                "invariants_hold": True,
+                "no_nfr_regression": True,
+            }
+        }
+        # Validate each proposal
+        for p in proposals:
+            if isinstance(p, dict):
+                # Check required fields
+                required = ["proposal_type", "description", "changes"]
+                if not all(k in p for k in required):
+                    validation["all_valid_json"] = False
+                # Check no proposal directly modifies claims (invariant)
+                changes = p.get("changes", {})
+                if "direct_db_write" in str(changes).lower():
+                    validation["invariants_preserved"] = False
+        self._audit(task["agent_id"], task["task_id"], "postflight",
+                   "Validation complete", json.dumps(validation))
+        return validation
+    # ============================================================
+    # Proposal Management
+    # ============================================================
+    def _create_proposal(self, agent_id: str, task_id: str,
+                         proposal_type: str, description: str,
+                         changes: dict, evidence: str,
+                         estimated_impact: dict, risk: str,
+                         reversible: bool) -> dict:
+        """Create and store a proposal."""
+        proposal_id = gen_id("PROP")
+        conn = get_db(self.db_path)
+        conn.execute("""
+            INSERT INTO proposals (proposal_id, agent_id, task_id, proposal_type,
+                description, changes, evidence, estimated_impact, risk_assessment,
+                reversible, status, created_at, schema_version)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'proposed', ?, '1.0')
+        """, (proposal_id, agent_id, task_id, proposal_type, description,
+              json.dumps(changes), evidence, json.dumps(estimated_impact),
+              risk, int(reversible), now_iso()))
+        conn.commit()
+        conn.close()
+        return {
+            "proposal_id": proposal_id,
+            "agent_id": agent_id,
+            "proposal_type": proposal_type,
+            "description": description,
+            "changes": changes,
+            "evidence": evidence,
+            "estimated_impact": estimated_impact,
+            "risk_assessment": risk,
+            "reversible": reversible,
+            "status": "proposed",
+        }
+    def _parse_proposals(self, raw_output: str, agent_id: str, task_id: str) -> list:
+        """Parse proposals from agent's raw output."""
+        proposals = []
+        # Try to extract JSON
+        text = raw_output.strip()
+        if text.startswith("```"):
+            text = text.split("```")[1]
+            if text.startswith("json"):
+                text = text[4:]
+            text = text.strip()
+        try:
+            data = json.loads(text)
+            # Handle single proposal or list
+            items = data if isinstance(data, list) else [data]
+            for item in items:
+                if isinstance(item, dict) and "proposal_type" in item:
+                    p = self._create_proposal(
+                        agent_id, task_id,
+                        item.get("proposal_type", "unknown"),
+                        item.get("description", ""),
+                        item.get("changes", {}),
+                        item.get("evidence", ""),
+                        item.get("estimated_impact", {}),
+                        item.get("risk_assessment", "low"),
+                        item.get("reversible", True),
+                    )
+                    proposals.append(p)
+        except json.JSONDecodeError:
+            # If JSON parsing fails, create a raw-text proposal
+            proposals.append(self._create_proposal(
+                agent_id, task_id,
+                "raw_finding",
+                raw_output[:500],
+                {"raw_output": raw_output},
+                "Agent output was not parseable JSON",
+                {"metric": "unknown", "expected_delta": 0},
+                "low", True,
+            ))
+        return proposals
+    def get_proposals(self, agent_id: str = None, status: str = None) -> list:
+        """Get proposals, optionally filtered by agent and/or status."""
+        conn = get_db(self.db_path)
+        conditions = []
+        params = []
+        if agent_id:
+            conditions.append("agent_id = ?")
+            params.append(agent_id)
+        if status:
+            conditions.append("status = ?")
+            params.append(status)
+        where = " AND ".join(conditions) if conditions else "1=1"
+        rows = conn.execute(
+            f"SELECT * FROM proposals WHERE {where} ORDER BY created_at DESC",
+            params
+        ).fetchall()
+        conn.close()
+        results = []
+        for row in rows:
+            d = dict(row)
+            d["changes"] = json.loads(d.get("changes", "{}"))
+            d["estimated_impact"] = json.loads(d.get("estimated_impact", "{}"))
+            results.append(d)
+        return results
+    def approve_proposal(self, proposal_id: str, reviewed_by: str = "human") -> bool:
+        """Human approves a proposal."""
+        conn = get_db(self.db_path)
+        conn.execute("""
+            UPDATE proposals SET status = 'approved', reviewed_at = ?, reviewed_by = ?
+            WHERE proposal_id = ?
+        """, (now_iso(), reviewed_by, proposal_id))
+        conn.commit()
+        conn.close()
+        return True
+    def reject_proposal(self, proposal_id: str, reason: str,
+                        reviewed_by: str = "human") -> bool:
+        """Human rejects a proposal with documented reason."""
+        conn = get_db(self.db_path)
+        conn.execute("""
+            UPDATE proposals SET status = 'rejected', reviewed_at = ?,
+                reviewed_by = ?, rejection_reason = ?
+            WHERE proposal_id = ?
+        """, (now_iso(), reviewed_by, reason, proposal_id))
+        conn.commit()
+        conn.close()
+        return True
+    # ============================================================
+    # Agent Management
+    # ============================================================
+    def list_companions(self, include_retired: bool = False) -> list:
+        """List all companion agents."""
+        conn = get_db(self.db_path)
+        if include_retired:
+            rows = conn.execute("SELECT * FROM companion_agents ORDER BY created_at DESC").fetchall()
+        else:
+            rows = conn.execute(
+                "SELECT * FROM companion_agents WHERE state != 'retired' ORDER BY created_at DESC"
+            ).fetchall()
+        conn.close()
+        return [dict(r) for r in rows]
+    def retire_companion(self, agent_id: str) -> bool:
+        """Retire a companion agent. Immutable — cannot be unretired."""
+        conn = get_db(self.db_path)
+        conn.execute("""
+            UPDATE companion_agents SET state = 'retired', retired_at = ?
+            WHERE agent_id = ?
+        """, (now_iso(), agent_id))
+        conn.commit()
+        conn.close()
+        self._audit(agent_id, None, "postflight", "Agent retired", "")
+        return True
+    def get_audit_log(self, agent_id: str = None, limit: int = 50) -> list:
+        """Get audit log entries."""
+        conn = get_db(self.db_path)
+        if agent_id:
+            rows = conn.execute(
+                "SELECT * FROM agent_audit_log WHERE agent_id = ? ORDER BY timestamp DESC LIMIT ?",
+                (agent_id, limit)
+            ).fetchall()
+        else:
+            rows = conn.execute(
+                "SELECT * FROM agent_audit_log ORDER BY timestamp DESC LIMIT ?",
+                (limit,)
+            ).fetchall()
+        conn.close()
+        return [dict(r) for r in rows]
+    # ============================================================
+    # Memory & Harness Evolution
+    # ============================================================
+    def set_memory(self, key: str, value: str, category: str = "assumption"):
+        """Store a persistent memory/assumption with validation timestamp."""
+        conn = get_db(self.db_path)
+        conn.execute("""
+            INSERT OR REPLACE INTO memory_store (key, value, last_validated, category)
+            VALUES (?, ?, ?, ?)
+        """, (key, value, now_iso(), category))
+        conn.commit()
+        conn.close()
+    def get_memory(self, key: str) -> Optional[dict]:
+        """Retrieve a stored memory/assumption."""
+        conn = get_db(self.db_path)
+        row = conn.execute("SELECT * FROM memory_store WHERE key = ?", (key,)).fetchone()
+        conn.close()
+        return dict(row) if row else None
+    def propose_harness_evolution(self, rule_section: str, amendment: str,
+                                  reason: str, proposed_by: str) -> int:
+        """
+        §4 Meta-Learning: Propose an amendment to the ECC Harness rules.
+        Requires human approval before taking effect.
+        """
+        conn = get_db(self.db_path)
+        cursor = conn.execute("""
+            INSERT INTO harness_evolution (rule_section, amendment, reason,
+                proposed_by, timestamp, approved)
+            VALUES (?, ?, ?, ?, ?, 0)
+        """, (rule_section, amendment, reason, proposed_by, now_iso()))
+        conn.commit()
+        evo_id = cursor.lastrowid
+        conn.close()
+        return evo_id
+    # ============================================================
+    # Internal Utilities
+    # ============================================================
+    def _update_task_state(self, task_id: str, state: str):
+        """Update task lifecycle state."""
+        conn = get_db(self.db_path)
+        updates = {"state": state}
+        if state == "completed":
+            conn.execute("UPDATE agent_tasks SET state = ?, completed_at = ? WHERE task_id = ?",
+                        (state, now_iso(), task_id))
+        else:
+            conn.execute("UPDATE agent_tasks SET state = ? WHERE task_id = ?",
+                        (state, task_id))
+        conn.commit()
+        conn.close()
+    def _halt_task(self, task_id: str, reason: str):
+        """Halt a task (kill heuristic or error)."""
+        conn = get_db(self.db_path)
+        conn.execute("""
+            UPDATE agent_tasks SET state = 'halted', kill_reason = ?, completed_at = ?
+            WHERE task_id = ?
+        """, (reason, now_iso(), task_id))
+        conn.commit()
+        conn.close()
+    def _audit(self, agent_id: str, task_id: Optional[str], phase: str,
+               action: str, details: str, confidence: float = 0.5,
+               deviation: str = ""):
+        """Write an immutable audit log entry."""
+        conn = get_db(self.db_path)
+        conn.execute("""
+            INSERT INTO agent_audit_log (entry_id, agent_id, task_id, phase,
+                action, details, confidence, deviation, timestamp)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+        """, (gen_id("AUDIT"), agent_id, task_id, phase, action, details,
+              confidence, deviation, now_iso()))
+        conn.commit()
+        conn.close()