feat: activate dormant paradigms — progressive looping, evolution with loss feedback, no progressive_unfreeze\n\nWith STE+AdamW (not MeZO), we can afford multi-loop training.\nProgressive loop schedule: 1→2→3 loops as training advances.\nEvolution engine now receives previous step loss for surprise\ndetection and memory writes.\nProgressive unfreeze disabled by default (counterproductive with backprop)."
Browse files- chimera/training/hyper.py +29 -1
chimera/training/hyper.py
CHANGED
|
@@ -119,10 +119,38 @@ class ProgressiveUnfreezer:
|
|
| 119 |
return self._current
|
| 120 |
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
def patch_training_loops(model, num_loops=1) -> None:
|
|
|
|
| 123 |
if hasattr(model, "loop_controller"):
|
| 124 |
model.loop_controller.loop_default = num_loops
|
| 125 |
model.loop_controller.loop_min = 1
|
| 126 |
-
model.loop_controller.loop_max = max(num_loops,
|
| 127 |
if hasattr(model, "evo_every_n_layers"):
|
| 128 |
model.evo_every_n_layers = max(model.evo_every_n_layers, 8)
|
|
|
|
| 119 |
return self._current
|
| 120 |
|
| 121 |
|
| 122 |
+
class ProgressiveLoopScheduler:
|
| 123 |
+
"""Gradually increase Parcae loop depth during training.
|
| 124 |
+
|
| 125 |
+
With STE+AdamW (not MeZO), multi-loop training is affordable.
|
| 126 |
+
Progressive schedule avoids instability from deep loops early on.
|
| 127 |
+
|
| 128 |
+
Default: loops=1 for 20%, loops=2 for 40%, loops=3 for 40%.
|
| 129 |
+
"""
|
| 130 |
+
|
| 131 |
+
def __init__(self, total_steps: int, max_loops: int = 3):
|
| 132 |
+
self._total = total_steps
|
| 133 |
+
self._max_loops = max_loops
|
| 134 |
+
# Schedule: (fraction_done_threshold, num_loops)
|
| 135 |
+
self._schedule = [
|
| 136 |
+
(0.20, 1), # First 20%: stabilize weights
|
| 137 |
+
(0.60, 2), # Next 40%: learn to iterate
|
| 138 |
+
(1.01, min(3, max_loops)), # Last 40%: deep refinement
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
def get_loops(self, step: int) -> int:
|
| 142 |
+
frac = step / max(1, self._total)
|
| 143 |
+
for threshold, loops in self._schedule:
|
| 144 |
+
if frac < threshold:
|
| 145 |
+
return loops
|
| 146 |
+
return self._schedule[-1][1]
|
| 147 |
+
|
| 148 |
+
|
| 149 |
def patch_training_loops(model, num_loops=1) -> None:
|
| 150 |
+
"""Set initial loop config. Use ProgressiveLoopScheduler to change during training."""
|
| 151 |
if hasattr(model, "loop_controller"):
|
| 152 |
model.loop_controller.loop_default = num_loops
|
| 153 |
model.loop_controller.loop_min = 1
|
| 154 |
+
model.loop_controller.loop_max = max(num_loops, 3) # ← allow up to 3
|
| 155 |
if hasattr(model, "evo_every_n_layers"):
|
| 156 |
model.evo_every_n_layers = max(model.evo_every_n_layers, 8)
|