SofiTesfay2010 commited on
Commit
b2bebdd
·
verified ·
1 Parent(s): f712b7c

v0.3: correctors unchanged from v0.2

Browse files
Files changed (1) hide show
  1. aria_llm/correctors.py +7 -15
aria_llm/correctors.py CHANGED
@@ -1,13 +1,9 @@
1
  """
2
- ARIA Correctors v0.2
3
  =====================
4
 
5
- v0.2 changes:
6
- - All correction strengths are multiplied by the global correction_scale
7
- (set in ARIAConfig), making it easy to dial down aggression.
8
- - SteeringCorrector now scales corrections proportional to the hidden
9
- state's own norm (not a fixed magnitude).
10
- - TasteAmplifier reduces intervention intensity.
11
 
12
  Grounded in:
13
  - CAST (arxiv:2409.05907): Conditional activation steering
@@ -24,8 +20,7 @@ from dataclasses import dataclass, field
24
 
25
 
26
  class SteeringCorrector:
27
- """CAST-pattern conditional activation steering for compound errors.
28
- v0.2: correction magnitude is relative to the hidden state's own L2 norm."""
29
 
30
  def __init__(self, alpha: float = 1.0, momentum: float = 0.95,
31
  correction_scale: float = 0.1):
@@ -62,8 +57,7 @@ class SteeringCorrector:
62
 
63
 
64
  class GoalAnchor:
65
- """Prevents semantic drift by reinforcing goal representation.
66
- v0.2: correction_strength multiplied by correction_scale for gentler blending."""
67
 
68
  def __init__(self, drift_threshold: float = 0.3, correction_strength: float = 0.2,
69
  reanchor_interval: int = 50, correction_scale: float = 0.1):
@@ -96,8 +90,7 @@ class GoalAnchor:
96
 
97
 
98
  class TrajectoryDiverger:
99
- """Breaks logic loops via orthogonal perturbation (Gram-Schmidt).
100
- v0.2: perturbation magnitude normalized to model's own activation scale."""
101
 
102
  def __init__(self, divergence_strength: float = 0.5, max_breaks: int = 3,
103
  correction_scale: float = 0.1):
@@ -138,8 +131,7 @@ class TrajectoryDiverger:
138
 
139
 
140
  class TasteAmplifier:
141
- """Combats the Median Trap by amplifying non-obvious token choices.
142
- v0.2: Much gentler. Only fires at severity > 0.5, scaled by correction_scale."""
143
 
144
  def __init__(self, temperature_boost: float = 1.15, novelty_bonus: float = 0.05,
145
  top_k_suppress: int = 3, correction_scale: float = 0.1):
 
1
  """
2
+ ARIA Correctors v0.3
3
  =====================
4
 
5
+ Unchanged from v0.2 — the correction logic is stable.
6
+ v0.3 only adds profile persistence at the ARIA core level, not per-corrector.
 
 
 
 
7
 
8
  Grounded in:
9
  - CAST (arxiv:2409.05907): Conditional activation steering
 
20
 
21
 
22
  class SteeringCorrector:
23
+ """CAST-pattern conditional activation steering for compound errors."""
 
24
 
25
  def __init__(self, alpha: float = 1.0, momentum: float = 0.95,
26
  correction_scale: float = 0.1):
 
57
 
58
 
59
  class GoalAnchor:
60
+ """Prevents semantic drift by reinforcing goal representation."""
 
61
 
62
  def __init__(self, drift_threshold: float = 0.3, correction_strength: float = 0.2,
63
  reanchor_interval: int = 50, correction_scale: float = 0.1):
 
90
 
91
 
92
  class TrajectoryDiverger:
93
+ """Breaks logic loops via orthogonal perturbation (Gram-Schmidt)."""
 
94
 
95
  def __init__(self, divergence_strength: float = 0.5, max_breaks: int = 3,
96
  correction_scale: float = 0.1):
 
131
 
132
 
133
  class TasteAmplifier:
134
+ """Combats the Median Trap by amplifying non-obvious token choices."""
 
135
 
136
  def __init__(self, temperature_boost: float = 1.15, novelty_bonus: float = 0.05,
137
  top_k_suppress: int = 3, correction_scale: float = 0.1):