Sprint 10C: lora_plan.py + distill.py — adapter/distillation planning (dry-run, no GPU required)
Browse files
purpose_agent/optimization/lora_plan.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
lora_plan.py + distill.py — Adapter training plans and distillation runner.
|
| 3 |
+
|
| 4 |
+
These are DRY-RUN planning modules. They produce configs and plans
|
| 5 |
+
but do NOT execute training (that requires purpose-agent[train] extra).
|
| 6 |
+
|
| 7 |
+
Key rule: no distillation without eval data AND ROI check.
|
| 8 |
+
"""
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
from dataclasses import dataclass, field
|
| 11 |
+
from typing import Any, Literal
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass
|
| 15 |
+
class DistillationPlan:
|
| 16 |
+
"""
|
| 17 |
+
Plan for creating a task-native mini-model or adapter.
|
| 18 |
+
|
| 19 |
+
Modes:
|
| 20 |
+
none → no optimization needed (performance is fine)
|
| 21 |
+
prompt_pack → optimize prompts only (cheapest)
|
| 22 |
+
lora → LoRA/QLoRA adapter on base model
|
| 23 |
+
distill → full knowledge distillation teacher→student
|
| 24 |
+
pdq → prune → distill → quantize (maximum compression)
|
| 25 |
+
"""
|
| 26 |
+
mode: Literal["none", "prompt_pack", "lora", "distill", "pdq"] = "none"
|
| 27 |
+
teacher_model: str = ""
|
| 28 |
+
student_base: str = ""
|
| 29 |
+
dataset_path: str = ""
|
| 30 |
+
eval_path: str = ""
|
| 31 |
+
target_format: str = "gguf"
|
| 32 |
+
acceptance_score: float = 0.9 # Candidate must achieve this on eval
|
| 33 |
+
rollback_model: str = "" # What to revert to if candidate fails
|
| 34 |
+
estimated_cost_usd: float = 0.0
|
| 35 |
+
estimated_time_hours: float = 0.0
|
| 36 |
+
reason: str = ""
|
| 37 |
+
metadata: dict[str, Any] = field(default_factory=dict)
|
| 38 |
+
|
| 39 |
+
@property
|
| 40 |
+
def requires_gpu(self) -> bool:
|
| 41 |
+
return self.mode in ("lora", "distill", "pdq")
|
| 42 |
+
|
| 43 |
+
@property
|
| 44 |
+
def requires_train_extra(self) -> bool:
|
| 45 |
+
return self.mode in ("lora", "distill", "pdq")
|
| 46 |
+
|
| 47 |
+
def to_dict(self) -> dict[str, Any]:
|
| 48 |
+
return {
|
| 49 |
+
"mode": self.mode, "teacher_model": self.teacher_model,
|
| 50 |
+
"student_base": self.student_base, "dataset_path": self.dataset_path,
|
| 51 |
+
"eval_path": self.eval_path, "target_format": self.target_format,
|
| 52 |
+
"acceptance_score": self.acceptance_score, "rollback_model": self.rollback_model,
|
| 53 |
+
"estimated_cost_usd": self.estimated_cost_usd,
|
| 54 |
+
"estimated_time_hours": self.estimated_time_hours,
|
| 55 |
+
"reason": self.reason, "requires_gpu": self.requires_gpu,
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
def summary(self) -> str:
|
| 59 |
+
if self.mode == "none":
|
| 60 |
+
return "No optimization needed."
|
| 61 |
+
return (
|
| 62 |
+
f"Plan: {self.mode}\n"
|
| 63 |
+
f" Teacher: {self.teacher_model}\n"
|
| 64 |
+
f" Student: {self.student_base}\n"
|
| 65 |
+
f" Dataset: {self.dataset_path}\n"
|
| 66 |
+
f" Acceptance: {self.acceptance_score:.0%}\n"
|
| 67 |
+
f" Est. cost: ${self.estimated_cost_usd:.2f}\n"
|
| 68 |
+
f" Est. time: {self.estimated_time_hours:.1f}h\n"
|
| 69 |
+
f" Reason: {self.reason}"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def plan_distillation(
|
| 74 |
+
fingerprint: dict[str, Any],
|
| 75 |
+
dataset_size: int,
|
| 76 |
+
current_model: str = "",
|
| 77 |
+
target_model: str = "",
|
| 78 |
+
has_gpu: bool = False,
|
| 79 |
+
) -> DistillationPlan:
|
| 80 |
+
"""
|
| 81 |
+
Create a distillation plan based on capability fingerprint and available resources.
|
| 82 |
+
|
| 83 |
+
Rules:
|
| 84 |
+
- No dataset → prompt_pack only
|
| 85 |
+
- Dataset < 100 examples → prompt_pack only
|
| 86 |
+
- Dataset 100-1000 + GPU → LoRA
|
| 87 |
+
- Dataset > 1000 + GPU → full distill
|
| 88 |
+
- No GPU → prompt_pack regardless
|
| 89 |
+
"""
|
| 90 |
+
if dataset_size < 10:
|
| 91 |
+
return DistillationPlan(mode="none", reason="Insufficient data for any optimization")
|
| 92 |
+
|
| 93 |
+
if dataset_size < 100 or not has_gpu:
|
| 94 |
+
return DistillationPlan(
|
| 95 |
+
mode="prompt_pack",
|
| 96 |
+
teacher_model=current_model,
|
| 97 |
+
reason=f"{'No GPU available' if not has_gpu else 'Dataset too small for weight updates'} → prompt optimization only",
|
| 98 |
+
estimated_cost_usd=0.0,
|
| 99 |
+
estimated_time_hours=0.01,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
if dataset_size < 1000:
|
| 103 |
+
return DistillationPlan(
|
| 104 |
+
mode="lora",
|
| 105 |
+
teacher_model=current_model,
|
| 106 |
+
student_base=target_model or current_model,
|
| 107 |
+
reason=f"Dataset ({dataset_size} examples) suitable for LoRA adapter",
|
| 108 |
+
estimated_cost_usd=2.0,
|
| 109 |
+
estimated_time_hours=1.0,
|
| 110 |
+
acceptance_score=0.9,
|
| 111 |
+
rollback_model=current_model,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
return DistillationPlan(
|
| 115 |
+
mode="distill",
|
| 116 |
+
teacher_model=current_model,
|
| 117 |
+
student_base=target_model or "Qwen/Qwen2.5-1.5B-Instruct",
|
| 118 |
+
reason=f"Large dataset ({dataset_size} examples) → full distillation viable",
|
| 119 |
+
estimated_cost_usd=10.0,
|
| 120 |
+
estimated_time_hours=4.0,
|
| 121 |
+
acceptance_score=0.85,
|
| 122 |
+
rollback_model=current_model,
|
| 123 |
+
)
|