pliny-the-prompter commited on
Commit
42c8118
·
verified ·
1 Parent(s): 24570b6

Upload 134 files

Browse files
Files changed (2) hide show
  1. obliteratus/.DS_Store +0 -0
  2. obliteratus/abliterate.py +23 -2
obliteratus/.DS_Store CHANGED
Binary files a/obliteratus/.DS_Store and b/obliteratus/.DS_Store differ
 
obliteratus/abliterate.py CHANGED
@@ -5726,8 +5726,17 @@ class AbliterationPipeline:
5726
  elapsed = time.time() - t0
5727
  self.log(f"[timing] Quick checkpoint saved to {self.output_dir} ({elapsed:.1f}s)")
5728
  except Exception as e:
5729
- # Non-fatal: if quick checkpoint fails, the pipeline continues
5730
- # normally and REBIRTH will save the final model.
 
 
 
 
 
 
 
 
 
5731
  self.log(f"Quick checkpoint save failed (non-fatal): {e}")
5732
 
5733
  def _pipeline_time_remaining(self, budget_secs: float = 300.0) -> float:
@@ -6319,10 +6328,22 @@ class AbliterationPipeline:
6319
  When device_map="auto" offloads weights to disk, model.state_dict()
6320
  returns meta tensors (no data) for those parameters. We resolve them
6321
  here so that save_pretrained gets real tensors.
 
 
 
 
 
6322
  """
6323
  model = self.handle.model
6324
  state_dict = model.state_dict()
6325
 
 
 
 
 
 
 
 
6326
  # Check for meta tensors (= disk-offloaded weights)
6327
  meta_keys = [k for k, v in state_dict.items() if v.device.type == "meta"]
6328
  if not meta_keys:
 
5726
  elapsed = time.time() - t0
5727
  self.log(f"[timing] Quick checkpoint saved to {self.output_dir} ({elapsed:.1f}s)")
5728
  except Exception as e:
5729
+ # In staged ZeroGPU mode, the quick checkpoint is the ONLY way
5730
+ # Stage 3 can recover the excised model (the GPU worker process
5731
+ # boundary discards in-memory state). A failure here is fatal.
5732
+ if getattr(self, "_staged_state_dir", None):
5733
+ self.log(f"Quick checkpoint save FAILED in staged mode: {e}")
5734
+ raise RuntimeError(
5735
+ f"Quick checkpoint save failed during staged ZeroGPU execution. "
5736
+ f"Stage 3 (VERIFY+REBIRTH) cannot proceed without this checkpoint. "
5737
+ f"Original error: {e}"
5738
+ ) from e
5739
+ # Non-staged: model stays in memory, so this is truly non-fatal.
5740
  self.log(f"Quick checkpoint save failed (non-fatal): {e}")
5741
 
5742
  def _pipeline_time_remaining(self, budget_secs: float = 300.0) -> float:
 
6328
  When device_map="auto" offloads weights to disk, model.state_dict()
6329
  returns meta tensors (no data) for those parameters. We resolve them
6330
  here so that save_pretrained gets real tensors.
6331
+
6332
+ All returned tensors are guaranteed to be contiguous, which is required
6333
+ by the safetensors serializer. After EXCISE, weight tensors may be
6334
+ non-contiguous or share underlying storage (e.g. from in-place
6335
+ projection operations), which causes ``SafetensorError`` during save.
6336
  """
6337
  model = self.handle.model
6338
  state_dict = model.state_dict()
6339
 
6340
+ # Ensure all tensors are contiguous — safetensors cannot serialize
6341
+ # non-contiguous tensors or tensors that share underlying storage.
6342
+ state_dict = {
6343
+ k: v.contiguous() if isinstance(v, torch.Tensor) and not v.is_contiguous() else v
6344
+ for k, v in state_dict.items()
6345
+ }
6346
+
6347
  # Check for meta tensors (= disk-offloaded weights)
6348
  meta_keys = [k for k, v in state_dict.items() if v.device.type == "meta"]
6349
  if not meta_keys: