Lgr54HFi
/

chomera

chimera51

custom_code

Model card Files Files and versions

xet

Community

Lgr54HFi commited on 12 days ago

Commit

f1fa72a

verified ·

1 Parent(s): bb2d3d5

fix: catch IPEX version mismatch crash (AttributeError from buggy os.exit in IPEX)"

Browse files

Files changed (1) hide show

chimera_turbo.py +18 -16

chimera_turbo.py CHANGED Viewed

@@ -10,11 +10,10 @@ Paradigmes intégrés:
   P-TURBO-5: Invalidate BitLinear packed caches after optimizer step
   P-TURBO-6: INT8 ternary forward path (VNNI/AMX dispatch)
-v4 changes:
-  - torch.compile mode changed from 'reduce-overhead' to 'default'.
-    reduce-overhead uses CUDA graph capture + memory pool which corrupts
-    glibc heap on CPU ('corrupted double-linked list' abort).
-    mode='default' is the stable choice for CPU with graph breaks.
 """
 import math
@@ -56,11 +55,14 @@ def detect_cpu_info() -> Dict[str, Any]:
     info["has_avx512_bf16"] = "avx512_bf16" in cap or info["has_amx"]
     info["has_vnni"] = info["has_avx512"]
     try:
         import intel_extension_for_pytorch
         info["ipex_available"] = True
         info["ipex_version"] = intel_extension_for_pytorch.__version__
-    except ImportError:
         info["ipex_available"] = False
     info["tcmalloc"] = "tcmalloc" in os.environ.get("LD_PRELOAD", "")
@@ -159,10 +161,14 @@ def try_ipex_optimize(
 ) -> Tuple[nn.Module, torch.optim.Optimizer]:
     """Apply IPEX optimization if available and beneficial."""
     if not cpu_info.get("ipex_available"):
-        print("[TURBO-4] IPEX not available — install: pip install intel-extension-for-pytorch")
         return model, optimizer
-    import intel_extension_for_pytorch as ipex
     if dtype is None:
         if cpu_info["has_amx"]:
@@ -190,13 +196,9 @@ def try_compile_model(model: nn.Module, mode: str = "default") -> nn.Module:
     Compile model with torch.compile for kernel fusion.
     Uses mode='default' for CPU stability. Do NOT use 'reduce-overhead'
-    on CPU — it uses CUDA graph capture internals that corrupt the glibc
-    heap allocator ('corrupted double-linked list' crash).
-    mode='default': safe, fuses kernels via Inductor, ~1.3-2x speedup.
-    mode='max-autotune': slower compile, better code, ~1.5-2.5x speedup.
-    Expected: first ~10 steps slow (compilation), then steady speedup.
     """
     if not hasattr(torch, "compile"):
         warnings.warn("torch.compile not available (PyTorch < 2.0)")
@@ -207,7 +209,7 @@ def try_compile_model(model: nn.Module, mode: str = "default") -> nn.Module:
             model,
             backend="inductor",
             mode=mode,
-            fullgraph=False,  # safety net for evolution.py graph breaks
         )
         print(f"[TURBO-2] torch.compile enabled (backend=inductor, mode={mode})")
         print(f"          First few steps will be slow (compilation). Then ~1.5-2x speedup.")
@@ -268,7 +270,7 @@ def apply(
     if verbose:
         print("=" * 65)
-        print("CHIMERA TURBO v4 — CPU Acceleration Layer")
         print("=" * 65)
         print(f"  Physical cores: {cpu_info['physical_cores']}")
         print(f"  CPU capability: {cpu_info['capability']}")

   P-TURBO-5: Invalidate BitLinear packed caches after optimizer step
   P-TURBO-6: INT8 ternary forward path (VNNI/AMX dispatch)
+v5 changes:
+  - Fix IPEX version mismatch crash: IPEX for PyTorch 2.8 installed with
+    PyTorch 2.11 calls os.exit(127) which doesn't exist → AttributeError.
+    Now catches Exception (not just ImportError) on IPEX import.
 """
 import math
     info["has_avx512_bf16"] = "avx512_bf16" in cap or info["has_amx"]
     info["has_vnni"] = info["has_avx512"]
+    # IPEX import can crash in many ways: ImportError (not installed),
+    # SystemExit (version mismatch), AttributeError (buggy os.exit in IPEX),
+    # RuntimeError, etc. Catch broadly.
     try:
         import intel_extension_for_pytorch
         info["ipex_available"] = True
         info["ipex_version"] = intel_extension_for_pytorch.__version__
+    except Exception:
         info["ipex_available"] = False
     info["tcmalloc"] = "tcmalloc" in os.environ.get("LD_PRELOAD", "")
 ) -> Tuple[nn.Module, torch.optim.Optimizer]:
     """Apply IPEX optimization if available and beneficial."""
     if not cpu_info.get("ipex_available"):
+        print("[TURBO-4] IPEX not available — skipping")
         return model, optimizer
+    try:
+        import intel_extension_for_pytorch as ipex
+    except Exception:
+        print("[TURBO-4] IPEX import failed — skipping")
+        return model, optimizer
     if dtype is None:
         if cpu_info["has_amx"]:
     Compile model with torch.compile for kernel fusion.
     Uses mode='default' for CPU stability. Do NOT use 'reduce-overhead'
+    on CPU — it corrupts the glibc heap allocator.
+    Expected: first ~10 steps slow (compilation), then ~1.5-2x speedup.
     """
     if not hasattr(torch, "compile"):
         warnings.warn("torch.compile not available (PyTorch < 2.0)")
             model,
             backend="inductor",
             mode=mode,
+            fullgraph=False,
         )
         print(f"[TURBO-2] torch.compile enabled (backend=inductor, mode={mode})")
         print(f"          First few steps will be slow (compilation). Then ~1.5-2x speedup.")
     if verbose:
         print("=" * 65)
+        print("CHIMERA TURBO v5 — CPU Acceleration Layer")
         print("=" * 65)
         print(f"  Physical cores: {cpu_info['physical_cores']}")
         print(f"  CPU capability: {cpu_info['capability']}")