Lgr54HFi commited on
Commit
310c416
·
verified ·
1 Parent(s): 6a7521a

Upload chimera/model.py

Browse files
Files changed (1) hide show
  1. chimera/model.py +5 -11
chimera/model.py CHANGED
@@ -254,9 +254,6 @@ class Chimera51ForCausalLM(nn.Module):
254
  # Evolution modulation every N layers (lightweight)
255
  evo_mod = None
256
  if i % self.evo_every_n_layers == 0 and self.evolution is not None:
257
- # Compute modulation from semantic memory
258
- # Note: loss parameter requires a scalar loss tensor for TTT/surprise;
259
- # pass None during standard forward, compute explicitly for TTT
260
  evo_result = self.evolution(
261
  hidden_states=x.detach() if not x.requires_grad else x,
262
  layer_idx=i,
@@ -270,7 +267,6 @@ class Chimera51ForCausalLM(nn.Module):
270
  # TTT update for target layers (only in training, no backprop)
271
  if self.training and evo_result.get('ttt_delta') is not None:
272
  with torch.no_grad():
273
- # Apply TTT to MLP down-projection if this is a target layer
274
  if hasattr(layer.mlp, 'w_down'):
275
  layer.mlp.w_down.data.add_(evo_result['ttt_delta'] * self.evolution.ttt.inner_lr)
276
 
@@ -330,11 +326,11 @@ class Chimera51ForCausalLM(nn.Module):
330
  effective = num_loops
331
  if effective is None and not self.training and probe_logits is not None:
332
  effective = self.entropy_valve.get_loop_count(probe_logits)
333
- elif effective is None and self.evolution is not None:
334
- # Use loop classifier from evolution
335
- last_hidden = x[:, -1, :].mean(dim=0, keepdim=True) # Average over batch
336
- effective = self.evolution.loop_classifier(last_hidden).item()
337
- effective = max(1, min(effective, 6))
338
 
339
  # Loop body
340
  loop_fn = lambda inp: self._run_layers(
@@ -395,8 +391,6 @@ class Chimera51ForCausalLM(nn.Module):
395
  # Store episodic case after forward (for inference mode)
396
  if not self.training and self.evolution is not None:
397
  last_hidden = x[:, -1, :].detach()
398
- # Schedule episodic storage for end of sequence
399
- # (In real use, call model.evolution.store_episodic() explicitly)
400
 
401
  return CausalLMOutput(
402
  loss=loss,
 
254
  # Evolution modulation every N layers (lightweight)
255
  evo_mod = None
256
  if i % self.evo_every_n_layers == 0 and self.evolution is not None:
 
 
 
257
  evo_result = self.evolution(
258
  hidden_states=x.detach() if not x.requires_grad else x,
259
  layer_idx=i,
 
267
  # TTT update for target layers (only in training, no backprop)
268
  if self.training and evo_result.get('ttt_delta') is not None:
269
  with torch.no_grad():
 
270
  if hasattr(layer.mlp, 'w_down'):
271
  layer.mlp.w_down.data.add_(evo_result['ttt_delta'] * self.evolution.ttt.inner_lr)
272
 
 
326
  effective = num_loops
327
  if effective is None and not self.training and probe_logits is not None:
328
  effective = self.entropy_valve.get_loop_count(probe_logits)
329
+ elif effective is None:
330
+ # FIX: During training, use the loop_controller.loop_default directly
331
+ # instead of running the loop classifier (which calls .item() and is
332
+ # expensive). The ProgressiveLoopScheduler already sets loop_default.
333
+ effective = self.loop_controller.loop_default
334
 
335
  # Loop body
336
  loop_fn = lambda inp: self._run_layers(
 
391
  # Store episodic case after forward (for inference mode)
392
  if not self.training and self.evolution is not None:
393
  last_hidden = x[:, -1, :].detach()
 
 
394
 
395
  return CausalLMOutput(
396
  loss=loss,