Build uploaded using `kernels` (batch 1/10).

Files changed (10) hide show

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ build/torch211-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=l
 build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text

 build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text

build/torch210-cxx11-cu126-x86_64-linux/__init__.py CHANGED Viewed

@@ -3,12 +3,42 @@ import subprocess
 import torch
 # Import the compiled extension
-from ._ops import ops
 from . import utils
 __version__ = "2.3.0"
 # Runtime
@@ -661,12 +691,14 @@ if "DG_CUTLASS_INCLUDE" not in os.environ:
         # Fall back to nvidia-cutlass pip package
         try:
             import nvidia.cutlass as _nc
             os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
                 os.path.dirname(_nc.__file__), "include"
             )
         except ImportError:
             pass
 def _ensure_initialized():
     global _initialized
     if _initialized:

 import torch
 # Import the compiled extension
+from ._ops import ops, add_op_namespace_prefix
 from . import utils
 __version__ = "2.3.0"
+# ── Register fake tensor implementations for torch.compile ──────────────────
+# All GEMM ops mutate the output tensor `d` in-place and return void.
+# The fake implementations are no-ops since `d` is pre-allocated by the caller.
+for _op in [
+    "fp8_fp4_gemm_nt",
+    "fp8_fp4_gemm_nn",
+    "fp8_fp4_gemm_tn",
+    "fp8_fp4_gemm_tt",
+    "m_grouped_fp8_fp4_gemm_nt_contiguous",
+    "m_grouped_fp8_fp4_gemm_nn_contiguous",
+    "m_grouped_fp8_fp4_gemm_nt_masked",
+    "k_grouped_fp8_gemm_nt_contiguous",
+    "k_grouped_fp8_gemm_tn_contiguous",
+    "bf16_gemm_nt",
+    "bf16_gemm_nn",
+    "bf16_gemm_tn",
+    "bf16_gemm_tt",
+    "m_grouped_bf16_gemm_nt_contiguous",
+    "m_grouped_bf16_gemm_nn_contiguous",
+    "m_grouped_bf16_gemm_nt_masked",
+    "fp8_gemm_nt_skip_head_mid",
+]:
+    @torch.library.register_fake(add_op_namespace_prefix(_op))
+    def _fake(*args, **kwargs):
+        pass
 # Runtime
         # Fall back to nvidia-cutlass pip package
         try:
             import nvidia.cutlass as _nc
             os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
                 os.path.dirname(_nc.__file__), "include"
             )
         except ImportError:
             pass
 def _ensure_initialized():
     global _initialized
     if _initialized:

build/torch210-cxx11-cu126-x86_64-linux/{_deep_gemm_cuda_bf91a5a.abi3.so → _deep_gemm_cuda_8546a43.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e74230c00b1f3f6a5a7ea53dcd183ea1933837da2321293e015d8bd288ad3d2
 size 2825240

 version https://git-lfs.github.com/spec/v1
+oid sha256:5716c2500d5dfa25bfbbffaf3469481b0ba9a0f682ae22aa6526a744cc05b59b
 size 2825240

build/torch210-cxx11-cu126-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _deep_gemm_cuda_bf91a5a
-ops = torch.ops._deep_gemm_cuda_bf91a5a
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_deep_gemm_cuda_bf91a5a::{op_name}"

 import torch
+from . import _deep_gemm_cuda_8546a43
+ops = torch.ops._deep_gemm_cuda_8546a43
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_deep_gemm_cuda_8546a43::{op_name}"

build/torch210-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:085822267e9222797c3ed466e1987c5d8daa861882d62edeb4578a4b99edf39c
-size 2888040

build/torch210-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b6477c3d1134a3fdb754ab7fc4a8d9aa98576896e6ae4613debaab483a985254
-size 2897240

build/torch211-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ce90c573da5a8f2348156c8d18a0ff521f679bcff905ad700cb5e9742fe81d1c
-size 2825240

build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c2d1e6ceaf24536a9202d47f4cc633af33c6873bb5af4ca0c9bbb788dc71985d
-size 2888040

build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f97dd08a22f024b9b3d610031d14092518ed6b8a72422e1ea482e5059a58a326
-size 2897240

build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fd66ce2110c54ac6fa347088f2a9f566c5d0b32601729f2288b35906330613f2
-size 2870952