Lgr54HFi commited on
Commit
f6670ea
·
verified ·
1 Parent(s): dd57d33

fix: re-enable torch.compile in train_hyper_loop (STE graph breaks fixed)"

Browse files
Files changed (1) hide show
  1. chimera/training/loops.py +4 -3
chimera/training/loops.py CHANGED
@@ -147,17 +147,19 @@ def train_standard_loop(args, model, config, loader, compute_loss, optimizer, us
147
 
148
 
149
  def train_hyper_loop(args, model, config, dataset, initial_seq, grow, unfreezer):
 
 
150
  model, optimizer, scheduler = chimera_turbo.apply(
151
  model,
152
  max_steps=args.max_steps,
153
  lr=args.lr,
154
  weight_decay=0.05,
155
  warmup_steps=min(500, args.max_steps // 10),
156
- use_compile=False, # ← disabled: 84 graph breaks from STE
157
  use_ipex=True,
158
  )
159
  model.train()
160
- print(f"[P5] Train mode: BitLinear STE path (no invalidate_packed)")
161
  use_bf16 = bool(args.bf16)
162
 
163
  os.makedirs(args.output_dir, exist_ok=True)
@@ -199,7 +201,6 @@ def train_hyper_loop(args, model, config, dataset, initial_seq, grow, unfreezer)
199
  batch = next(data_iter)
200
 
201
  # grad_accum_steps=1: DataLoader already provides eff_batch items.
202
- # The effective batch IS eff_batch. No need to accumulate further.
203
  loss_val = chimera_turbo.training_step(
204
  model,
205
  batch,
 
147
 
148
 
149
  def train_hyper_loop(args, model, config, dataset, initial_seq, grow, unfreezer):
150
+ # use_compile=True now works: STE uses detach() trick = zero graph breaks
151
+ use_compile = getattr(args, "compile", True)
152
  model, optimizer, scheduler = chimera_turbo.apply(
153
  model,
154
  max_steps=args.max_steps,
155
  lr=args.lr,
156
  weight_decay=0.05,
157
  warmup_steps=min(500, args.max_steps // 10),
158
+ use_compile=use_compile,
159
  use_ipex=True,
160
  )
161
  model.train()
162
+ print(f"[P5] Train mode: BitLinear STE path (detach trick, compile-friendly)")
163
  use_bf16 = bool(args.bf16)
164
 
165
  os.makedirs(args.output_dir, exist_ok=True)
 
201
  batch = next(data_iter)
202
 
203
  # grad_accum_steps=1: DataLoader already provides eff_batch items.
 
204
  loss_val = chimera_turbo.training_step(
205
  model,
206
  batch,