v0.3: correctors unchanged from v0.2
Browse files- aria_llm/correctors.py +7 -15
aria_llm/correctors.py
CHANGED
|
@@ -1,13 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
-
ARIA Correctors v0.
|
| 3 |
=====================
|
| 4 |
|
| 5 |
-
v0.2
|
| 6 |
-
|
| 7 |
-
(set in ARIAConfig), making it easy to dial down aggression.
|
| 8 |
-
- SteeringCorrector now scales corrections proportional to the hidden
|
| 9 |
-
state's own norm (not a fixed magnitude).
|
| 10 |
-
- TasteAmplifier reduces intervention intensity.
|
| 11 |
|
| 12 |
Grounded in:
|
| 13 |
- CAST (arxiv:2409.05907): Conditional activation steering
|
|
@@ -24,8 +20,7 @@ from dataclasses import dataclass, field
|
|
| 24 |
|
| 25 |
|
| 26 |
class SteeringCorrector:
|
| 27 |
-
"""CAST-pattern conditional activation steering for compound errors.
|
| 28 |
-
v0.2: correction magnitude is relative to the hidden state's own L2 norm."""
|
| 29 |
|
| 30 |
def __init__(self, alpha: float = 1.0, momentum: float = 0.95,
|
| 31 |
correction_scale: float = 0.1):
|
|
@@ -62,8 +57,7 @@ class SteeringCorrector:
|
|
| 62 |
|
| 63 |
|
| 64 |
class GoalAnchor:
|
| 65 |
-
"""Prevents semantic drift by reinforcing goal representation.
|
| 66 |
-
v0.2: correction_strength multiplied by correction_scale for gentler blending."""
|
| 67 |
|
| 68 |
def __init__(self, drift_threshold: float = 0.3, correction_strength: float = 0.2,
|
| 69 |
reanchor_interval: int = 50, correction_scale: float = 0.1):
|
|
@@ -96,8 +90,7 @@ class GoalAnchor:
|
|
| 96 |
|
| 97 |
|
| 98 |
class TrajectoryDiverger:
|
| 99 |
-
"""Breaks logic loops via orthogonal perturbation (Gram-Schmidt).
|
| 100 |
-
v0.2: perturbation magnitude normalized to model's own activation scale."""
|
| 101 |
|
| 102 |
def __init__(self, divergence_strength: float = 0.5, max_breaks: int = 3,
|
| 103 |
correction_scale: float = 0.1):
|
|
@@ -138,8 +131,7 @@ class TrajectoryDiverger:
|
|
| 138 |
|
| 139 |
|
| 140 |
class TasteAmplifier:
|
| 141 |
-
"""Combats the Median Trap by amplifying non-obvious token choices.
|
| 142 |
-
v0.2: Much gentler. Only fires at severity > 0.5, scaled by correction_scale."""
|
| 143 |
|
| 144 |
def __init__(self, temperature_boost: float = 1.15, novelty_bonus: float = 0.05,
|
| 145 |
top_k_suppress: int = 3, correction_scale: float = 0.1):
|
|
|
|
| 1 |
"""
|
| 2 |
+
ARIA Correctors v0.3
|
| 3 |
=====================
|
| 4 |
|
| 5 |
+
Unchanged from v0.2 — the correction logic is stable.
|
| 6 |
+
v0.3 only adds profile persistence at the ARIA core level, not per-corrector.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
Grounded in:
|
| 9 |
- CAST (arxiv:2409.05907): Conditional activation steering
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
class SteeringCorrector:
|
| 23 |
+
"""CAST-pattern conditional activation steering for compound errors."""
|
|
|
|
| 24 |
|
| 25 |
def __init__(self, alpha: float = 1.0, momentum: float = 0.95,
|
| 26 |
correction_scale: float = 0.1):
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
class GoalAnchor:
|
| 60 |
+
"""Prevents semantic drift by reinforcing goal representation."""
|
|
|
|
| 61 |
|
| 62 |
def __init__(self, drift_threshold: float = 0.3, correction_strength: float = 0.2,
|
| 63 |
reanchor_interval: int = 50, correction_scale: float = 0.1):
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
class TrajectoryDiverger:
|
| 93 |
+
"""Breaks logic loops via orthogonal perturbation (Gram-Schmidt)."""
|
|
|
|
| 94 |
|
| 95 |
def __init__(self, divergence_strength: float = 0.5, max_breaks: int = 3,
|
| 96 |
correction_scale: float = 0.1):
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
class TasteAmplifier:
|
| 134 |
+
"""Combats the Median Trap by amplifying non-obvious token choices."""
|
|
|
|
| 135 |
|
| 136 |
def __init__(self, temperature_boost: float = 1.15, novelty_bonus: float = 0.05,
|
| 137 |
top_k_suppress: int = 3, correction_scale: float = 0.1):
|