Lgr54HFi
/

chomera

chimera51

custom_code

Model card Files Files and versions

xet

Community

Lgr54HFi commited on 11 days ago

Commit

310c416

verified ·

1 Parent(s): 6a7521a

Upload chimera/model.py

Browse files

Files changed (1) hide show

chimera/model.py +5 -11

chimera/model.py CHANGED Viewed

@@ -254,9 +254,6 @@ class Chimera51ForCausalLM(nn.Module):
             # Evolution modulation every N layers (lightweight)
             evo_mod = None
             if i % self.evo_every_n_layers == 0 and self.evolution is not None:
-                # Compute modulation from semantic memory
-                # Note: loss parameter requires a scalar loss tensor for TTT/surprise;
-                #       pass None during standard forward, compute explicitly for TTT
                 evo_result = self.evolution(
                     hidden_states=x.detach() if not x.requires_grad else x,
                     layer_idx=i,
@@ -270,7 +267,6 @@ class Chimera51ForCausalLM(nn.Module):
                 # TTT update for target layers (only in training, no backprop)
                 if self.training and evo_result.get('ttt_delta') is not None:
                     with torch.no_grad():
-                        # Apply TTT to MLP down-projection if this is a target layer
                         if hasattr(layer.mlp, 'w_down'):
                             layer.mlp.w_down.data.add_(evo_result['ttt_delta'] * self.evolution.ttt.inner_lr)
@@ -330,11 +326,11 @@ class Chimera51ForCausalLM(nn.Module):
             effective = num_loops
             if effective is None and not self.training and probe_logits is not None:
                 effective = self.entropy_valve.get_loop_count(probe_logits)
-            elif effective is None and self.evolution is not None:
-                # Use loop classifier from evolution
-                last_hidden = x[:, -1, :].mean(dim=0, keepdim=True)  # Average over batch
-                effective = self.evolution.loop_classifier(last_hidden).item()
-                effective = max(1, min(effective, 6))
             # Loop body
             loop_fn = lambda inp: self._run_layers(
@@ -395,8 +391,6 @@ class Chimera51ForCausalLM(nn.Module):
         # Store episodic case after forward (for inference mode)
         if not self.training and self.evolution is not None:
             last_hidden = x[:, -1, :].detach()
-            # Schedule episodic storage for end of sequence
-            # (In real use, call model.evolution.store_episodic() explicitly)
         return CausalLMOutput(
             loss=loss,

             # Evolution modulation every N layers (lightweight)
             evo_mod = None
             if i % self.evo_every_n_layers == 0 and self.evolution is not None:
                 evo_result = self.evolution(
                     hidden_states=x.detach() if not x.requires_grad else x,
                     layer_idx=i,
                 # TTT update for target layers (only in training, no backprop)
                 if self.training and evo_result.get('ttt_delta') is not None:
                     with torch.no_grad():
                         if hasattr(layer.mlp, 'w_down'):
                             layer.mlp.w_down.data.add_(evo_result['ttt_delta'] * self.evolution.ttt.inner_lr)
             effective = num_loops
             if effective is None and not self.training and probe_logits is not None:
                 effective = self.entropy_valve.get_loop_count(probe_logits)
+            elif effective is None:
+                # FIX: During training, use the loop_controller.loop_default directly
+                # instead of running the loop classifier (which calls .item() and is
+                # expensive). The ProgressiveLoopScheduler already sets loop_default.
+                effective = self.loop_controller.loop_default
             # Loop body
             loop_fn = lambda inp: self._run_layers(
         # Store episodic case after forward (for inference mode)
         if not self.training and self.evolution is not None:
             last_hidden = x[:, -1, :].detach()
         return CausalLMOutput(
             loss=loss,