SofiTesfay2010
/

aria-llm

Model card Files Files and versions

xet

Community

SofiTesfay2010 commited on 10 days ago

Commit

b2bebdd

verified ·

1 Parent(s): f712b7c

v0.3: correctors unchanged from v0.2

Browse files

Files changed (1) hide show

aria_llm/correctors.py +7 -15

aria_llm/correctors.py CHANGED Viewed

@@ -1,13 +1,9 @@
 """
-ARIA Correctors v0.2
 =====================
-v0.2 changes:
-- All correction strengths are multiplied by the global correction_scale
-  (set in ARIAConfig), making it easy to dial down aggression.
-- SteeringCorrector now scales corrections proportional to the hidden
-  state's own norm (not a fixed magnitude).
-- TasteAmplifier reduces intervention intensity.
 Grounded in:
 - CAST (arxiv:2409.05907): Conditional activation steering
@@ -24,8 +20,7 @@ from dataclasses import dataclass, field
 class SteeringCorrector:
-    """CAST-pattern conditional activation steering for compound errors.
-    v0.2: correction magnitude is relative to the hidden state's own L2 norm."""
     def __init__(self, alpha: float = 1.0, momentum: float = 0.95,
                  correction_scale: float = 0.1):
@@ -62,8 +57,7 @@ class SteeringCorrector:
 class GoalAnchor:
-    """Prevents semantic drift by reinforcing goal representation.
-    v0.2: correction_strength multiplied by correction_scale for gentler blending."""
     def __init__(self, drift_threshold: float = 0.3, correction_strength: float = 0.2,
                  reanchor_interval: int = 50, correction_scale: float = 0.1):
@@ -96,8 +90,7 @@ class GoalAnchor:
 class TrajectoryDiverger:
-    """Breaks logic loops via orthogonal perturbation (Gram-Schmidt).
-    v0.2: perturbation magnitude normalized to model's own activation scale."""
     def __init__(self, divergence_strength: float = 0.5, max_breaks: int = 3,
                  correction_scale: float = 0.1):
@@ -138,8 +131,7 @@ class TrajectoryDiverger:
 class TasteAmplifier:
-    """Combats the Median Trap by amplifying non-obvious token choices.
-    v0.2: Much gentler. Only fires at severity > 0.5, scaled by correction_scale."""
     def __init__(self, temperature_boost: float = 1.15, novelty_bonus: float = 0.05,
                  top_k_suppress: int = 3, correction_scale: float = 0.1):

 """
+ARIA Correctors v0.3
 =====================
+Unchanged from v0.2 — the correction logic is stable.
+v0.3 only adds profile persistence at the ARIA core level, not per-corrector.
 Grounded in:
 - CAST (arxiv:2409.05907): Conditional activation steering
 class SteeringCorrector:
+    """CAST-pattern conditional activation steering for compound errors."""
     def __init__(self, alpha: float = 1.0, momentum: float = 0.95,
                  correction_scale: float = 0.1):
 class GoalAnchor:
+    """Prevents semantic drift by reinforcing goal representation."""
     def __init__(self, drift_threshold: float = 0.3, correction_strength: float = 0.2,
                  reanchor_interval: int = 50, correction_scale: float = 0.1):
 class TrajectoryDiverger:
+    """Breaks logic loops via orthogonal perturbation (Gram-Schmidt)."""
     def __init__(self, divergence_strength: float = 0.5, max_breaks: int = 3,
                  correction_scale: float = 0.1):
 class TasteAmplifier:
+    """Combats the Median Trap by amplifying non-obvious token choices."""
     def __init__(self, temperature_boost: float = 1.15, novelty_bonus: float = 0.05,
                  top_k_suppress: int = 3, correction_scale: float = 0.1):