Sprint 10C: lora_plan.py + distill.py — adapter/distillation planning (dry-run, no GPU required)

2535843 verified 11 days ago

4.55 kB

	"""
	lora_plan.py + distill.py — Adapter training plans and distillation runner.

	These are DRY-RUN planning modules. They produce configs and plans
	but do NOT execute training (that requires purpose-agent[train] extra).

	Key rule: no distillation without eval data AND ROI check.
	"""
	from __future__ import annotations
	from dataclasses import dataclass, field
	from typing import Any, Literal


	@dataclass
	class DistillationPlan:
	"""
	Plan for creating a task-native mini-model or adapter.

	Modes:
	none → no optimization needed (performance is fine)
	prompt_pack → optimize prompts only (cheapest)
	lora → LoRA/QLoRA adapter on base model
	distill → full knowledge distillation teacher→student
	pdq → prune → distill → quantize (maximum compression)
	"""
	mode: Literal["none", "prompt_pack", "lora", "distill", "pdq"] = "none"
	teacher_model: str = ""
	student_base: str = ""
	dataset_path: str = ""
	eval_path: str = ""
	target_format: str = "gguf"
	acceptance_score: float = 0.9 # Candidate must achieve this on eval
	rollback_model: str = "" # What to revert to if candidate fails
	estimated_cost_usd: float = 0.0
	estimated_time_hours: float = 0.0
	reason: str = ""
	metadata: dict[str, Any] = field(default_factory=dict)

	@property
	def requires_gpu(self) -> bool:
	return self.mode in ("lora", "distill", "pdq")

	@property
	def requires_train_extra(self) -> bool:
	return self.mode in ("lora", "distill", "pdq")

	def to_dict(self) -> dict[str, Any]:
	return {
	"mode": self.mode, "teacher_model": self.teacher_model,
	"student_base": self.student_base, "dataset_path": self.dataset_path,
	"eval_path": self.eval_path, "target_format": self.target_format,
	"acceptance_score": self.acceptance_score, "rollback_model": self.rollback_model,
	"estimated_cost_usd": self.estimated_cost_usd,
	"estimated_time_hours": self.estimated_time_hours,
	"reason": self.reason, "requires_gpu": self.requires_gpu,
	}

	def summary(self) -> str:
	if self.mode == "none":
	return "No optimization needed."
	return (
	f"Plan: {self.mode}\n"
	f" Teacher: {self.teacher_model}\n"
	f" Student: {self.student_base}\n"
	f" Dataset: {self.dataset_path}\n"
	f" Acceptance: {self.acceptance_score:.0%}\n"
	f" Est. cost: ${self.estimated_cost_usd:.2f}\n"
	f" Est. time: {self.estimated_time_hours:.1f}h\n"
	f" Reason: {self.reason}"
	)


	def plan_distillation(
	fingerprint: dict[str, Any],
	dataset_size: int,
	current_model: str = "",
	target_model: str = "",
	has_gpu: bool = False,
	) -> DistillationPlan:
	"""
	Create a distillation plan based on capability fingerprint and available resources.

	Rules:
	- No dataset → prompt_pack only
	- Dataset < 100 examples → prompt_pack only
	- Dataset 100-1000 + GPU → LoRA
	- Dataset > 1000 + GPU → full distill
	- No GPU → prompt_pack regardless
	"""
	if dataset_size < 10:
	return DistillationPlan(mode="none", reason="Insufficient data for any optimization")

	if dataset_size < 100 or not has_gpu:
	return DistillationPlan(
	mode="prompt_pack",
	teacher_model=current_model,
	reason=f"{'No GPU available' if not has_gpu else 'Dataset too small for weight updates'} → prompt optimization only",
	estimated_cost_usd=0.0,
	estimated_time_hours=0.01,
	)

	if dataset_size < 1000:
	return DistillationPlan(
	mode="lora",
	teacher_model=current_model,
	student_base=target_model or current_model,
	reason=f"Dataset ({dataset_size} examples) suitable for LoRA adapter",
	estimated_cost_usd=2.0,
	estimated_time_hours=1.0,
	acceptance_score=0.9,
	rollback_model=current_model,
	)

	return DistillationPlan(
	mode="distill",
	teacher_model=current_model,
	student_base=target_model or "Qwen/Qwen2.5-1.5B-Instruct",
	reason=f"Large dataset ({dataset_size} examples) → full distillation viable",
	estimated_cost_usd=10.0,
	estimated_time_hours=4.0,
	acceptance_score=0.85,
	rollback_model=current_model,
	)