Rohan03's picture
Sprint 7: skills/ci.py β€” skill testing, promotion, rollback, immune scan
08de4bd verified
"""
ci.py β€” Skill Continuous Integration.
Every skill mutation must pass CI before activation:
1. Immune scan (no injection/manipulation)
2. Eval tests (skill produces expected behavior)
3. Fitness comparison (new version >= old version)
4. Promote or rollback
Darwinian selection:
- High-fitness skills get retrieval boost
- Low-fitness skills are mutated then re-tested
- Failed skills are archived (never deleted β€” audit trail)
"""
from __future__ import annotations
import logging
from typing import Any
from purpose_agent.skills.schema import SkillCard, SkillGenome
from purpose_agent.immune import scan_memory
from purpose_agent.memory import MemoryCard, MemoryKind
logger = logging.getLogger(__name__)
class SkillCI:
"""
CI pipeline for skills: scan β†’ test β†’ promote or rollback.
Usage:
ci = SkillCI()
passed = ci.validate(skill_card)
if passed:
genome.promote(skill_card.id)
else:
genome.rollback()
"""
def __init__(self, fitness_threshold: float = 0.4):
self.fitness_threshold = fitness_threshold
self._scan_log: list[dict[str, Any]] = []
def validate(self, card: SkillCard) -> bool:
"""
Run full CI on a skill card.
Returns True if skill passes all gates.
"""
# Gate 1: Immune scan
memory_proxy = MemoryCard(
kind=MemoryKind.SKILL_CARD,
pattern=card.trigger,
strategy=" ".join(card.procedure),
content=card.name,
)
scan_result = scan_memory(memory_proxy)
if not scan_result.passed:
self._log(card, "REJECTED", f"immune scan failed: {scan_result.threats}")
card.status = "archived"
return False
# Gate 2: Fitness threshold
if card.fitness_score < self.fitness_threshold:
self._log(card, "REJECTED", f"fitness {card.fitness_score:.2f} < threshold {self.fitness_threshold}")
card.status = "archived"
return False
# Gate 3: Non-empty procedure
if not card.procedure:
self._log(card, "REJECTED", "empty procedure")
card.status = "archived"
return False
# All gates passed
card.status = "tested"
self._log(card, "PASSED", f"fitness={card.fitness_score:.2f}")
return True
def compare_fitness(self, new: SkillCard, old: SkillCard | None) -> bool:
"""Compare new skill version against old. New must be >= old."""
if old is None:
return True
return new.fitness_score >= old.fitness_score * 0.95 # Allow 5% tolerance
def mutate(self, card: SkillCard) -> SkillCard:
"""
Create a mutated version of a low-fitness skill.
Appends "[MUTATED]" marker for tracking.
"""
mutated = card.evolve(
new_procedure=[f"[IMPROVED] {step}" for step in card.procedure]
)
mutated.created_by = "mutation"
return mutated
def _log(self, card: SkillCard, result: str, detail: str) -> None:
entry = {"skill_id": card.id, "name": card.name, "version": card.version,
"result": result, "detail": detail}
self._scan_log.append(entry)
logger.info(f"SkillCI: {card.name} v{card.version} β†’ {result}: {detail}")
@property
def log(self) -> list[dict]:
return self._scan_log