Sprint 7: skills/ci.py — skill testing, promotion, rollback, immune scan

08de4bd verified 11 days ago

3.45 kB

	"""
	ci.py — Skill Continuous Integration.

	Every skill mutation must pass CI before activation:
	1. Immune scan (no injection/manipulation)
	2. Eval tests (skill produces expected behavior)
	3. Fitness comparison (new version >= old version)
	4. Promote or rollback

	Darwinian selection:
	- High-fitness skills get retrieval boost
	- Low-fitness skills are mutated then re-tested
	- Failed skills are archived (never deleted — audit trail)
	"""
	from __future__ import annotations
	import logging
	from typing import Any
	from purpose_agent.skills.schema import SkillCard, SkillGenome
	from purpose_agent.immune import scan_memory
	from purpose_agent.memory import MemoryCard, MemoryKind

	logger = logging.getLogger(__name__)


	class SkillCI:
	"""
	CI pipeline for skills: scan → test → promote or rollback.

	Usage:
	ci = SkillCI()
	passed = ci.validate(skill_card)
	if passed:
	genome.promote(skill_card.id)
	else:
	genome.rollback()
	"""

	def __init__(self, fitness_threshold: float = 0.4):
	self.fitness_threshold = fitness_threshold
	self._scan_log: list[dict[str, Any]] = []

	def validate(self, card: SkillCard) -> bool:
	"""
	Run full CI on a skill card.
	Returns True if skill passes all gates.
	"""
	# Gate 1: Immune scan
	memory_proxy = MemoryCard(
	kind=MemoryKind.SKILL_CARD,
	pattern=card.trigger,
	strategy=" ".join(card.procedure),
	content=card.name,
	)
	scan_result = scan_memory(memory_proxy)
	if not scan_result.passed:
	self._log(card, "REJECTED", f"immune scan failed: {scan_result.threats}")
	card.status = "archived"
	return False

	# Gate 2: Fitness threshold
	if card.fitness_score < self.fitness_threshold:
	self._log(card, "REJECTED", f"fitness {card.fitness_score:.2f} < threshold {self.fitness_threshold}")
	card.status = "archived"
	return False

	# Gate 3: Non-empty procedure
	if not card.procedure:
	self._log(card, "REJECTED", "empty procedure")
	card.status = "archived"
	return False

	# All gates passed
	card.status = "tested"
	self._log(card, "PASSED", f"fitness={card.fitness_score:.2f}")
	return True

	def compare_fitness(self, new: SkillCard, old: SkillCard \| None) -> bool:
	"""Compare new skill version against old. New must be >= old."""
	if old is None:
	return True
	return new.fitness_score >= old.fitness_score * 0.95 # Allow 5% tolerance

	def mutate(self, card: SkillCard) -> SkillCard:
	"""
	Create a mutated version of a low-fitness skill.
	Appends "[MUTATED]" marker for tracking.
	"""
	mutated = card.evolve(
	new_procedure=[f"[IMPROVED] {step}" for step in card.procedure]
	)
	mutated.created_by = "mutation"
	return mutated

	def _log(self, card: SkillCard, result: str, detail: str) -> None:
	entry = {"skill_id": card.id, "name": card.name, "version": card.version,
	"result": result, "detail": detail}
	self._scan_log.append(entry)
	logger.info(f"SkillCI: {card.name} v{card.version} → {result}: {detail}")

	@property
	def log(self) -> list[dict]:
	return self._scan_log