""" ci.py — Skill Continuous Integration. Every skill mutation must pass CI before activation: 1. Immune scan (no injection/manipulation) 2. Eval tests (skill produces expected behavior) 3. Fitness comparison (new version >= old version) 4. Promote or rollback Darwinian selection: - High-fitness skills get retrieval boost - Low-fitness skills are mutated then re-tested - Failed skills are archived (never deleted — audit trail) """ from __future__ import annotations import logging from typing import Any from purpose_agent.skills.schema import SkillCard, SkillGenome from purpose_agent.immune import scan_memory from purpose_agent.memory import MemoryCard, MemoryKind logger = logging.getLogger(__name__) class SkillCI: """ CI pipeline for skills: scan → test → promote or rollback. Usage: ci = SkillCI() passed = ci.validate(skill_card) if passed: genome.promote(skill_card.id) else: genome.rollback() """ def __init__(self, fitness_threshold: float = 0.4): self.fitness_threshold = fitness_threshold self._scan_log: list[dict[str, Any]] = [] def validate(self, card: SkillCard) -> bool: """ Run full CI on a skill card. Returns True if skill passes all gates. """ # Gate 1: Immune scan memory_proxy = MemoryCard( kind=MemoryKind.SKILL_CARD, pattern=card.trigger, strategy=" ".join(card.procedure), content=card.name, ) scan_result = scan_memory(memory_proxy) if not scan_result.passed: self._log(card, "REJECTED", f"immune scan failed: {scan_result.threats}") card.status = "archived" return False # Gate 2: Fitness threshold if card.fitness_score < self.fitness_threshold: self._log(card, "REJECTED", f"fitness {card.fitness_score:.2f} < threshold {self.fitness_threshold}") card.status = "archived" return False # Gate 3: Non-empty procedure if not card.procedure: self._log(card, "REJECTED", "empty procedure") card.status = "archived" return False # All gates passed card.status = "tested" self._log(card, "PASSED", f"fitness={card.fitness_score:.2f}") return True def compare_fitness(self, new: SkillCard, old: SkillCard | None) -> bool: """Compare new skill version against old. New must be >= old.""" if old is None: return True return new.fitness_score >= old.fitness_score * 0.95 # Allow 5% tolerance def mutate(self, card: SkillCard) -> SkillCard: """ Create a mutated version of a low-fitness skill. Appends "[MUTATED]" marker for tracking. """ mutated = card.evolve( new_procedure=[f"[IMPROVED] {step}" for step in card.procedure] ) mutated.created_by = "mutation" return mutated def _log(self, card: SkillCard, result: str, detail: str) -> None: entry = {"skill_id": card.id, "name": card.name, "version": card.version, "result": result, "detail": detail} self._scan_log.append(entry) logger.info(f"SkillCI: {card.name} v{card.version} → {result}: {detail}") @property def log(self) -> list[dict]: return self._scan_log