Build uploaded using `kernels` (batch 1/10).

Files changed (10) hide show

.gitattributes CHANGED Viewed

@@ -39,3 +39,4 @@ build/torch211-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=l
 build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text

 build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
 build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cu126-aarch64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text

build/torch210-cxx11-cu126-aarch64-linux/__init__.py CHANGED Viewed

@@ -3,12 +3,42 @@ import subprocess
 import torch
 # Import the compiled extension
-from ._ops import ops
 from . import utils
 __version__ = "2.3.0"
 # Runtime
@@ -661,12 +691,14 @@ if "DG_CUTLASS_INCLUDE" not in os.environ:
         # Fall back to nvidia-cutlass pip package
         try:
             import nvidia.cutlass as _nc
             os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
                 os.path.dirname(_nc.__file__), "include"
             )
         except ImportError:
             pass
 def _ensure_initialized():
     global _initialized
     if _initialized:

 import torch
 # Import the compiled extension
+from ._ops import ops, add_op_namespace_prefix
 from . import utils
 __version__ = "2.3.0"
+# ── Register fake tensor implementations for torch.compile ──────────────────
+# All GEMM ops mutate the output tensor `d` in-place and return void.
+# The fake implementations are no-ops since `d` is pre-allocated by the caller.
+for _op in [
+    "fp8_fp4_gemm_nt",
+    "fp8_fp4_gemm_nn",
+    "fp8_fp4_gemm_tn",
+    "fp8_fp4_gemm_tt",
+    "m_grouped_fp8_fp4_gemm_nt_contiguous",
+    "m_grouped_fp8_fp4_gemm_nn_contiguous",
+    "m_grouped_fp8_fp4_gemm_nt_masked",
+    "k_grouped_fp8_gemm_nt_contiguous",
+    "k_grouped_fp8_gemm_tn_contiguous",
+    "bf16_gemm_nt",
+    "bf16_gemm_nn",
+    "bf16_gemm_tn",
+    "bf16_gemm_tt",
+    "m_grouped_bf16_gemm_nt_contiguous",
+    "m_grouped_bf16_gemm_nn_contiguous",
+    "m_grouped_bf16_gemm_nt_masked",
+    "fp8_gemm_nt_skip_head_mid",
+]:
+    @torch.library.register_fake(add_op_namespace_prefix(_op))
+    def _fake(*args, **kwargs):
+        pass
 # Runtime
         # Fall back to nvidia-cutlass pip package
         try:
             import nvidia.cutlass as _nc
             os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
                 os.path.dirname(_nc.__file__), "include"
             )
         except ImportError:
             pass
 def _ensure_initialized():
     global _initialized
     if _initialized:

build/torch210-cxx11-cu126-aarch64-linux/{_deep_gemm_cuda_bf91a5a.abi3.so → _deep_gemm_cuda_8546a43.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:464160196612927accf9bff3229b025e28b5600182451d8b1c99a91ded85eeed
 size 2828112

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f126dba5c327a4e1bf36e218c694d4d718c8587ebcdeafecb75061836e6900b
 size 2828112

build/torch210-cxx11-cu126-aarch64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _deep_gemm_cuda_bf91a5a
-ops = torch.ops._deep_gemm_cuda_bf91a5a
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_deep_gemm_cuda_bf91a5a::{op_name}"

 import torch
+from . import _deep_gemm_cuda_8546a43
+ops = torch.ops._deep_gemm_cuda_8546a43
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_deep_gemm_cuda_8546a43::{op_name}"

build/torch210-cxx11-cu128-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1fb6dff7108b7d58c5fec43f44d1ebde57c44e301220ac024f3669b64de2b39b
-size 2892888

build/torch210-cxx11-cu130-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a85f42e78c9bba766a58ed7fdcf55e252895ae8f2ecad723b6772c6bf16f50dd
-size 2894336

build/torch211-cxx11-cu126-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:410a6edef82c909ab5406ea016407b8a31da0b07d9fc1b19940676fe75ce8f82
-size 2828112

build/torch211-cxx11-cu128-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:146cb3999a8dda660bb48ed5a70c20b88d413c98a80ec6b92bf28bbfbdc7a7d0
-size 2892888

build/torch211-cxx11-cu130-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b82529fc980cda5e620aa383d5848832c27d493ee6aba778b773fe11988f05c9
-size 2894336

build/torch29-cxx11-cu129-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a4127075b0ce56f91d6799076dc1bd8dd49fe2d06199b6e182c62a6896a58aa3
-size 2891904