Lgr54HFi commited on
Commit
f1fa72a
·
verified ·
1 Parent(s): bb2d3d5

fix: catch IPEX version mismatch crash (AttributeError from buggy os.exit in IPEX)"

Browse files
Files changed (1) hide show
  1. chimera_turbo.py +18 -16
chimera_turbo.py CHANGED
@@ -10,11 +10,10 @@ Paradigmes intégrés:
10
  P-TURBO-5: Invalidate BitLinear packed caches after optimizer step
11
  P-TURBO-6: INT8 ternary forward path (VNNI/AMX dispatch)
12
 
13
- v4 changes:
14
- - torch.compile mode changed from 'reduce-overhead' to 'default'.
15
- reduce-overhead uses CUDA graph capture + memory pool which corrupts
16
- glibc heap on CPU ('corrupted double-linked list' abort).
17
- mode='default' is the stable choice for CPU with graph breaks.
18
  """
19
 
20
  import math
@@ -56,11 +55,14 @@ def detect_cpu_info() -> Dict[str, Any]:
56
  info["has_avx512_bf16"] = "avx512_bf16" in cap or info["has_amx"]
57
  info["has_vnni"] = info["has_avx512"]
58
 
 
 
 
59
  try:
60
  import intel_extension_for_pytorch
61
  info["ipex_available"] = True
62
  info["ipex_version"] = intel_extension_for_pytorch.__version__
63
- except ImportError:
64
  info["ipex_available"] = False
65
 
66
  info["tcmalloc"] = "tcmalloc" in os.environ.get("LD_PRELOAD", "")
@@ -159,10 +161,14 @@ def try_ipex_optimize(
159
  ) -> Tuple[nn.Module, torch.optim.Optimizer]:
160
  """Apply IPEX optimization if available and beneficial."""
161
  if not cpu_info.get("ipex_available"):
162
- print("[TURBO-4] IPEX not available — install: pip install intel-extension-for-pytorch")
163
  return model, optimizer
164
 
165
- import intel_extension_for_pytorch as ipex
 
 
 
 
166
 
167
  if dtype is None:
168
  if cpu_info["has_amx"]:
@@ -190,13 +196,9 @@ def try_compile_model(model: nn.Module, mode: str = "default") -> nn.Module:
190
  Compile model with torch.compile for kernel fusion.
191
 
192
  Uses mode='default' for CPU stability. Do NOT use 'reduce-overhead'
193
- on CPU — it uses CUDA graph capture internals that corrupt the glibc
194
- heap allocator ('corrupted double-linked list' crash).
195
-
196
- mode='default': safe, fuses kernels via Inductor, ~1.3-2x speedup.
197
- mode='max-autotune': slower compile, better code, ~1.5-2.5x speedup.
198
 
199
- Expected: first ~10 steps slow (compilation), then steady speedup.
200
  """
201
  if not hasattr(torch, "compile"):
202
  warnings.warn("torch.compile not available (PyTorch < 2.0)")
@@ -207,7 +209,7 @@ def try_compile_model(model: nn.Module, mode: str = "default") -> nn.Module:
207
  model,
208
  backend="inductor",
209
  mode=mode,
210
- fullgraph=False, # safety net for evolution.py graph breaks
211
  )
212
  print(f"[TURBO-2] torch.compile enabled (backend=inductor, mode={mode})")
213
  print(f" First few steps will be slow (compilation). Then ~1.5-2x speedup.")
@@ -268,7 +270,7 @@ def apply(
268
 
269
  if verbose:
270
  print("=" * 65)
271
- print("CHIMERA TURBO v4 — CPU Acceleration Layer")
272
  print("=" * 65)
273
  print(f" Physical cores: {cpu_info['physical_cores']}")
274
  print(f" CPU capability: {cpu_info['capability']}")
 
10
  P-TURBO-5: Invalidate BitLinear packed caches after optimizer step
11
  P-TURBO-6: INT8 ternary forward path (VNNI/AMX dispatch)
12
 
13
+ v5 changes:
14
+ - Fix IPEX version mismatch crash: IPEX for PyTorch 2.8 installed with
15
+ PyTorch 2.11 calls os.exit(127) which doesn't exist AttributeError.
16
+ Now catches Exception (not just ImportError) on IPEX import.
 
17
  """
18
 
19
  import math
 
55
  info["has_avx512_bf16"] = "avx512_bf16" in cap or info["has_amx"]
56
  info["has_vnni"] = info["has_avx512"]
57
 
58
+ # IPEX import can crash in many ways: ImportError (not installed),
59
+ # SystemExit (version mismatch), AttributeError (buggy os.exit in IPEX),
60
+ # RuntimeError, etc. Catch broadly.
61
  try:
62
  import intel_extension_for_pytorch
63
  info["ipex_available"] = True
64
  info["ipex_version"] = intel_extension_for_pytorch.__version__
65
+ except Exception:
66
  info["ipex_available"] = False
67
 
68
  info["tcmalloc"] = "tcmalloc" in os.environ.get("LD_PRELOAD", "")
 
161
  ) -> Tuple[nn.Module, torch.optim.Optimizer]:
162
  """Apply IPEX optimization if available and beneficial."""
163
  if not cpu_info.get("ipex_available"):
164
+ print("[TURBO-4] IPEX not available — skipping")
165
  return model, optimizer
166
 
167
+ try:
168
+ import intel_extension_for_pytorch as ipex
169
+ except Exception:
170
+ print("[TURBO-4] IPEX import failed — skipping")
171
+ return model, optimizer
172
 
173
  if dtype is None:
174
  if cpu_info["has_amx"]:
 
196
  Compile model with torch.compile for kernel fusion.
197
 
198
  Uses mode='default' for CPU stability. Do NOT use 'reduce-overhead'
199
+ on CPU — it corrupts the glibc heap allocator.
 
 
 
 
200
 
201
+ Expected: first ~10 steps slow (compilation), then ~1.5-2x speedup.
202
  """
203
  if not hasattr(torch, "compile"):
204
  warnings.warn("torch.compile not available (PyTorch < 2.0)")
 
209
  model,
210
  backend="inductor",
211
  mode=mode,
212
+ fullgraph=False,
213
  )
214
  print(f"[TURBO-2] torch.compile enabled (backend=inductor, mode={mode})")
215
  print(f" First few steps will be slow (compilation). Then ~1.5-2x speedup.")
 
270
 
271
  if verbose:
272
  print("=" * 65)
273
+ print("CHIMERA TURBO v5 — CPU Acceleration Layer")
274
  print("=" * 65)
275
  print(f" Physical cores: {cpu_info['physical_cores']}")
276
  print(f" CPU capability: {cpu_info['capability']}")