SofiTesfay2010
/

aria-llm

Model card Files Files and versions

xet

Community

SofiTesfay2010 commited on 9 days ago

Commit

5d94b1c

verified ·

1 Parent(s): b2bebdd

v0.3: core with save/load calibration + auto-tune

Browse files

Files changed (1) hide show

aria_llm/core.py +147 -50

aria_llm/core.py CHANGED Viewed

@@ -1,21 +1,29 @@
 """
-ARIA Core Module v0.2
 ======================
-Key v0.2 changes:
-- Correction budget: at most max_corrections_per_step correctors fire per step.
-  The highest-severity signal wins. This prevents corrector interference.
-- All correctors receive the global correction_scale from config.
-- Extended calibration phase: no corrections during first calibration_steps.
-- Reliability estimation only counts TRIGGERED signals (not every detection).
 Usage:
     from aria_llm import ARIA, ARIAConfig
-    config = ARIAConfig(calibration_steps=20, sensitivity_k=2.5,
-                        max_corrections_per_step=1, correction_scale=0.1, verbose=True)
     aria = ARIA.attach(model, tokenizer, config=config)
     output = model.generate(input_ids, max_new_tokens=500)
-    print(aria.report_text())
     aria.detach()
 """
@@ -25,6 +33,8 @@ from typing import Optional, Dict, List, Tuple, Any
 from collections import deque
 import time
 import json
 from aria_llm.config import ARIAConfig
 from aria_llm.detectors import (
@@ -66,16 +76,12 @@ class ARIAState:
 class ARIA:
-    """Adaptive Reliability & Integrity Attachment v0.2.
     Hooks into a HuggingFace Transformers model to provide real-time
-    detection and correction of four failure modes:
-    1. Compound Error Accumulation
-    2. Semantic Drift
-    3. Logic Looping
-    4. Median Trap (Lack of "Taste")
-    v0.2: Calibration-first, budget-limited, statistically-grounded.
     """
     def __init__(self, model, tokenizer, config: Optional[ARIAConfig] = None):
@@ -114,6 +120,11 @@ class ARIA:
         self._last_median_signal: Optional[DetectionSignal] = None
         self._last_drift_signal: Optional[DetectionSignal] = None
         self._model_info = self._detect_architecture()
     @classmethod
     def attach(cls, model, tokenizer, config: Optional[ARIAConfig] = None) -> 'ARIA':
@@ -146,7 +157,101 @@ class ARIA:
         self._last_loop_signal = None
         self._last_median_signal = None
         self._last_drift_signal = None
     def _can_correct(self) -> bool:
         return self._step_corrections_this_step < self.config.max_corrections_per_step
@@ -155,7 +260,7 @@ class ARIA:
     def _detect_architecture(self) -> Dict:
         info = {"arch": "unknown", "num_layers": 0, "hidden_dim": 0, "layers_attr": None}
-        for attr in ["model.layers", "transformer.h", "gpt_neox.layers",
                       "model.decoder.layers", "encoder.layer"]:
             parts = attr.split(".")
             obj = self.model
@@ -195,8 +300,6 @@ class ARIA:
     def _install_hooks(self):
         layers = self._get_layers_module()
         if layers is None:
-            if self.config.verbose:
-                print("[ARIA] Warning: Could not detect model layers. Logits-only mode.")
             self._install_output_hook()
             self._attached = True
             return
@@ -239,6 +342,12 @@ class ARIA:
                 self._current_step_id = step_id
                 self._step_corrections_this_step = 0
             drift_signal = self.drift_detector.detect(h)
             self._last_drift_signal = drift_signal
             self.state.record_signal(drift_signal)
@@ -246,12 +355,12 @@ class ARIA:
             candidates = []
             if drift_signal.triggered and self._can_correct():
                 candidates.append(("goal_anchor", drift_signal.severity, "drift"))
-            if (self._last_compound_signal is not None and
                 self._last_compound_signal.triggered and self._can_correct()):
                 candidates.append(("steering", self._last_compound_signal.severity, "compound"))
             else:
                 self.steering_corrector.update_good_state(h)
-            if (self._last_loop_signal is not None and
                 self._last_loop_signal.triggered and self._can_correct()):
                 candidates.append(("trajectory_diverger", self._last_loop_signal.severity, "loop"))
@@ -350,14 +459,9 @@ class ARIA:
     def report(self) -> Dict:
         n = self.state.step
-        avg_r_with_aria = sum(self.state.effective_r) / len(self.state.effective_r) if self.state.effective_r else 1.0
-        baseline_r_list = getattr(self.state, 'baseline_r', [])
-        baseline_r = sum(baseline_r_list) / len(baseline_r_list) if baseline_r_list else 0.95
-        import math
         n_steps = max(n, 1)
-        p_s_baseline = baseline_r ** n_steps if baseline_r > 0 else 0
-        p_s_aria = avg_r_with_aria ** n_steps if avg_r_with_aria > 0 else 0
         correction_counts = {}
         for c in self.state.corrections:
@@ -371,27 +475,24 @@ class ARIA:
         return {
             "summary": {
-                "version": "0.2.0", "total_steps": n_steps,
                 "calibration_steps": self.config.calibration_steps,
                 "sensitivity_k": self.config.sensitivity_k,
                 "correction_scale": self.config.correction_scale,
                 "max_corrections_per_step": self.config.max_corrections_per_step,
-                "baseline_R": round(baseline_r, 4), "aria_R": round(avg_r_with_aria, 4),
-                "R_improvement": round(avg_r_with_aria - baseline_r, 4),
-                "baseline_P_success": f"{p_s_baseline:.6e}",
-                "aria_P_success": f"{p_s_aria:.6e}",
-                "improvement_factor": round(p_s_aria / max(p_s_baseline, 1e-300), 2),
                 "total_corrections": len(self.state.corrections),
-                "total_signals_checked": len(self.state.signals),
                 "elapsed_seconds": round(time.time() - self.state.start_time, 2),
             },
             "corrections_by_type": correction_counts,
             "signals_detected": signal_counts,
             "signals_triggered": trigger_counts,
-            "reliability_curve": {
-                "per_step_R": [round(r, 4) for r in self.state.effective_r[-50:]],
-                "cumulative_R": [round(r, 6) for r in self.state.cumulative_r[-50:]],
-            },
             "calibration_info": {
                 "compound_error": {"mean": self.compound_detector.calibration.mean,
                     "std": self.compound_detector.calibration.std,
@@ -410,12 +511,10 @@ class ARIA:
         r = self.report()
         s = r["summary"]
         lines = [
-            "=" * 60, "  ARIA v0.2 RELIABILITY REPORT", "=" * 60, "",
             f"  Steps monitored:        {s['total_steps']}",
-            f"  Calibration steps:      {s['calibration_steps']}",
-            f"  Sensitivity (k):        {s['sensitivity_k']}",
-            f"  Correction scale:       {s['correction_scale']}",
-            f"  Max corrections/step:   {s['max_corrections_per_step']}",
             f"  Time elapsed:           {s['elapsed_seconds']}s", "",
             "  RELIABILITY (R per step):",
             f"    Baseline (no ARIA):    {s['baseline_R']}",
@@ -436,13 +535,11 @@ class ARIA:
             for name, count in r["signals_triggered"].items():
                 total = r["signals_detected"].get(name, count)
                 lines.append(f"    {name}: {count}/{total} ({count/max(total,1)*100:.1f}% of checks)")
-        lines += ["", "  CALIBRATION BASELINES:"]
-        for det_name, cal in r["calibration_info"].items():
-            if cal["mean"] is not None:
-                lines.append(f"    {det_name}: mean={cal['mean']:.4f}, std={cal['std']:.4f}, threshold={cal['threshold']:.4f}")
         lines += ["", "=" * 60]
         return "\n".join(lines)
     def __repr__(self):
         status = "attached" if self._attached else "detached"
-        return f"ARIA(status={status}, v=0.2, layers={len(self._hooks)} hooks, corrections={len(self.state.corrections)})"

 """
+ARIA Core Module v0.3
 ======================
+v0.3 changes:
+- save_calibration() / load_calibration(): Persist calibration profiles as JSON.
+  Skip the calibration phase on subsequent runs with the same model.
+- auto_tune_correction_scale(): After calibration, automatically set correction_scale
+  based on the observed signal variances. High-variance models get gentler corrections.
+- Calibration profile includes model fingerprint (name + hidden_dim + num_layers)
+  for safety checking.
 Usage:
     from aria_llm import ARIA, ARIAConfig
+    # First run: calibrate and save
+    config = ARIAConfig(auto_tune_correction_scale=True, verbose=True)
     aria = ARIA.attach(model, tokenizer, config=config)
     output = model.generate(input_ids, max_new_tokens=500)
+    aria.save_calibration("profiles/my_model.json")
+    aria.detach()
+    # Subsequent runs: load profile (instant, no calibration needed)
+    aria = ARIA.attach(model, tokenizer, config=ARIAConfig(
+        calibration_profile_path="profiles/my_model.json"))
+    output = model.generate(...)
     aria.detach()
 """
 from collections import deque
 import time
 import json
+import os
+import hashlib
 from aria_llm.config import ARIAConfig
 from aria_llm.detectors import (
 class ARIA:
+    """Adaptive Reliability & Integrity Attachment v0.3.
     Hooks into a HuggingFace Transformers model to provide real-time
+    detection and correction of four failure modes.
+    v0.3: Calibration profiles + auto-tune correction_scale.
     """
     def __init__(self, model, tokenizer, config: Optional[ARIAConfig] = None):
         self._last_median_signal: Optional[DetectionSignal] = None
         self._last_drift_signal: Optional[DetectionSignal] = None
         self._model_info = self._detect_architecture()
+        self._calibration_loaded = False
+        self._auto_tuned = False
+        if self.config.calibration_profile_path:
+            self.load_calibration(self.config.calibration_profile_path)
     @classmethod
     def attach(cls, model, tokenizer, config: Optional[ARIAConfig] = None) -> 'ARIA':
         self._last_loop_signal = None
         self._last_median_signal = None
         self._last_drift_signal = None
+        self._auto_tuned = False
+    def _model_fingerprint(self) -> Dict:
+        model_config = getattr(self.model, "config", None)
+        name = getattr(model_config, "_name_or_path", "unknown") if model_config else "unknown"
+        return {
+            "model_name": name,
+            "num_layers": self._model_info["num_layers"],
+            "hidden_dim": self._model_info["hidden_dim"],
+            "fingerprint_hash": hashlib.md5(
+                f"{name}_{self._model_info['num_layers']}_{self._model_info['hidden_dim']}".encode()
+            ).hexdigest()[:12],
+        }
+    def save_calibration(self, path: str):
+        os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
+        profile = {
+            "aria_version": "0.3.0",
+            "saved_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
+            "model": self._model_fingerprint(),
+            "config": {
+                "calibration_steps": self.config.calibration_steps,
+                "sensitivity_k": self.config.sensitivity_k,
+                "correction_scale": self.config.correction_scale,
+                "max_corrections_per_step": self.config.max_corrections_per_step,
+                "auto_tuned": self._auto_tuned,
+            },
+            "detectors": {},
+        }
+        profile["detectors"].update(self.compound_detector.export_calibration())
+        profile["detectors"].update(self.drift_detector.export_calibration())
+        profile["detectors"].update(self.loop_detector.export_calibration())
+        profile["detectors"].update(self.median_detector.export_calibration())
+        with open(path, "w") as f:
+            json.dump(profile, f, indent=2, default=str)
+        if self.config.verbose:
+            print(f"[ARIA] Calibration profile saved to {path}")
+        return profile
+    def load_calibration(self, path: str):
+        if not os.path.exists(path):
+            raise FileNotFoundError(f"Calibration profile not found: {path}")
+        with open(path, "r") as f:
+            profile = json.load(f)
+        saved_fp = profile.get("model", {})
+        current_fp = self._model_fingerprint()
+        if (saved_fp.get("num_layers") != current_fp["num_layers"] or
+            saved_fp.get("hidden_dim") != current_fp["hidden_dim"]):
+            raise ValueError(
+                f"Calibration profile mismatch! Saved for layers={saved_fp.get('num_layers')}, "
+                f"dim={saved_fp.get('hidden_dim')}. Current: layers={current_fp['num_layers']}, "
+                f"dim={current_fp['hidden_dim']}")
+        detectors = profile.get("detectors", {})
+        if "compound_error" in detectors:
+            self.compound_detector.load_calibration(detectors)
+        if "semantic_drift" in detectors:
+            self.drift_detector.load_calibration(detectors)
+        if "logic_loop" in detectors:
+            self.loop_detector.load_calibration(detectors)
+        if "median_trap" in detectors:
+            self.median_detector.load_calibration(detectors)
+        saved_config = profile.get("config", {})
+        if saved_config.get("auto_tuned") and "correction_scale" in saved_config:
+            self.config.correction_scale = saved_config["correction_scale"]
+            self._update_corrector_scales(self.config.correction_scale)
+            self._auto_tuned = True
+        self._calibration_loaded = True
+        if self.config.verbose:
+            print(f"[ARIA] Calibration profile loaded from {path}")
+    def auto_tune_correction_scale(self) -> float:
+        cvs = []
+        for cal in [self.compound_detector.calibration, self.drift_detector.calibration,
+                     self.median_detector.top1_calibration, self.median_detector.inv_entropy_calibration]:
+            if cal.mean is not None and cal.std is not None and abs(cal.mean) > 1e-8:
+                cvs.append(cal.std / abs(cal.mean))
+        if not cvs:
+            return self.config.correction_scale
+        avg_cv = sum(cvs) / len(cvs)
+        new_scale = max(self.config.auto_tune_min_scale,
+                       min(self.config.auto_tune_max_scale, 0.15 / (1.0 + avg_cv)))
+        old_scale = self.config.correction_scale
+        self.config.correction_scale = new_scale
+        self._update_corrector_scales(new_scale)
+        self._auto_tuned = True
+        if self.config.verbose:
+            print(f"[ARIA] Auto-tune correction_scale: {old_scale:.4f} -> {new_scale:.4f} (avg_cv={avg_cv:.3f})")
+        return new_scale
+    def _update_corrector_scales(self, scale: float):
+        self.steering_corrector.correction_scale = scale
+        self.goal_anchor.correction_scale = scale
+        self.trajectory_diverger.correction_scale = scale
+        self.taste_amplifier.correction_scale = scale
     def _can_correct(self) -> bool:
         return self._step_corrections_this_step < self.config.max_corrections_per_step
     def _detect_architecture(self) -> Dict:
         info = {"arch": "unknown", "num_layers": 0, "hidden_dim": 0, "layers_attr": None}
+        for attr in ["model.layers", "transformer.h", "gpt_neox.layers",
                       "model.decoder.layers", "encoder.layer"]:
             parts = attr.split(".")
             obj = self.model
     def _install_hooks(self):
         layers = self._get_layers_module()
         if layers is None:
             self._install_output_hook()
             self._attached = True
             return
                 self._current_step_id = step_id
                 self._step_corrections_this_step = 0
+            # Auto-tune after calibration completes (once)
+            if (self.config.auto_tune_correction_scale and
+                not self._auto_tuned and not self._calibration_loaded and
+                step_id == self.config.calibration_steps + 1):
+                self.auto_tune_correction_scale()
             drift_signal = self.drift_detector.detect(h)
             self._last_drift_signal = drift_signal
             self.state.record_signal(drift_signal)
             candidates = []
             if drift_signal.triggered and self._can_correct():
                 candidates.append(("goal_anchor", drift_signal.severity, "drift"))
+            if (self._last_compound_signal is not None and
                 self._last_compound_signal.triggered and self._can_correct()):
                 candidates.append(("steering", self._last_compound_signal.severity, "compound"))
             else:
                 self.steering_corrector.update_good_state(h)
+            if (self._last_loop_signal is not None and
                 self._last_loop_signal.triggered and self._can_correct()):
                 candidates.append(("trajectory_diverger", self._last_loop_signal.severity, "loop"))
     def report(self) -> Dict:
         n = self.state.step
+        avg_r = sum(self.state.effective_r) / len(self.state.effective_r) if self.state.effective_r else 1.0
+        bl_r = sum(self.state.baseline_r) / len(self.state.baseline_r) if self.state.baseline_r else 0.95
         n_steps = max(n, 1)
         correction_counts = {}
         for c in self.state.corrections:
         return {
             "summary": {
+                "version": "0.3.0", "total_steps": n_steps,
                 "calibration_steps": self.config.calibration_steps,
                 "sensitivity_k": self.config.sensitivity_k,
                 "correction_scale": self.config.correction_scale,
                 "max_corrections_per_step": self.config.max_corrections_per_step,
+                "auto_tuned": self._auto_tuned,
+                "calibration_loaded": self._calibration_loaded,
+                "baseline_R": round(bl_r, 4), "aria_R": round(avg_r, 4),
+                "R_improvement": round(avg_r - bl_r, 4),
+                "baseline_P_success": f"{bl_r ** n_steps:.6e}",
+                "aria_P_success": f"{avg_r ** n_steps:.6e}",
+                "improvement_factor": round((avg_r ** n_steps) / max(bl_r ** n_steps, 1e-300), 2),
                 "total_corrections": len(self.state.corrections),
                 "elapsed_seconds": round(time.time() - self.state.start_time, 2),
             },
             "corrections_by_type": correction_counts,
             "signals_detected": signal_counts,
             "signals_triggered": trigger_counts,
             "calibration_info": {
                 "compound_error": {"mean": self.compound_detector.calibration.mean,
                     "std": self.compound_detector.calibration.std,
         r = self.report()
         s = r["summary"]
         lines = [
+            "=" * 60, "  ARIA v0.3 RELIABILITY REPORT", "=" * 60, "",
             f"  Steps monitored:        {s['total_steps']}",
+            f"  Correction scale:       {s['correction_scale']}" + (" (auto-tuned)" if s['auto_tuned'] else ""),
+            f"  Calibration loaded:     {s['calibration_loaded']}",
             f"  Time elapsed:           {s['elapsed_seconds']}s", "",
             "  RELIABILITY (R per step):",
             f"    Baseline (no ARIA):    {s['baseline_R']}",
             for name, count in r["signals_triggered"].items():
                 total = r["signals_detected"].get(name, count)
                 lines.append(f"    {name}: {count}/{total} ({count/max(total,1)*100:.1f}% of checks)")
         lines += ["", "=" * 60]
         return "\n".join(lines)
     def __repr__(self):
         status = "attached" if self._attached else "detached"
+        loaded = " profile-loaded" if self._calibration_loaded else ""
+        tuned = " auto-tuned" if self._auto_tuned else ""
+        return f"ARIA(status={status}, v=0.3, layers={len(self._hooks)} hooks, corrections={len(self.state.corrections)}{loaded}{tuned})"