Kernels
kernels-bot commited on
Commit
cc71acd
Β·
verified Β·
1 Parent(s): 6a74fae

Build uploaded using `kernels` (batch 1/10).

Browse files
.gitattributes CHANGED
@@ -39,3 +39,4 @@ build/torch211-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=l
39
  build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
40
  build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
41
  build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
 
 
39
  build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
40
  build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
41
  build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
42
+ build/torch210-cxx11-cu126-aarch64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
build/torch210-cxx11-cu126-aarch64-linux/__init__.py CHANGED
@@ -3,12 +3,42 @@ import subprocess
3
  import torch
4
 
5
  # Import the compiled extension
6
- from ._ops import ops
7
  from . import utils
8
 
9
  __version__ = "2.3.0"
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Runtime
13
 
14
 
@@ -661,12 +691,14 @@ if "DG_CUTLASS_INCLUDE" not in os.environ:
661
  # Fall back to nvidia-cutlass pip package
662
  try:
663
  import nvidia.cutlass as _nc
 
664
  os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
665
  os.path.dirname(_nc.__file__), "include"
666
  )
667
  except ImportError:
668
  pass
669
 
 
670
  def _ensure_initialized():
671
  global _initialized
672
  if _initialized:
 
3
  import torch
4
 
5
  # Import the compiled extension
6
+ from ._ops import ops, add_op_namespace_prefix
7
  from . import utils
8
 
9
  __version__ = "2.3.0"
10
 
11
 
12
+ # ── Register fake tensor implementations for torch.compile ──────────────────
13
+ # All GEMM ops mutate the output tensor `d` in-place and return void.
14
+ # The fake implementations are no-ops since `d` is pre-allocated by the caller.
15
+
16
+
17
+ for _op in [
18
+ "fp8_fp4_gemm_nt",
19
+ "fp8_fp4_gemm_nn",
20
+ "fp8_fp4_gemm_tn",
21
+ "fp8_fp4_gemm_tt",
22
+ "m_grouped_fp8_fp4_gemm_nt_contiguous",
23
+ "m_grouped_fp8_fp4_gemm_nn_contiguous",
24
+ "m_grouped_fp8_fp4_gemm_nt_masked",
25
+ "k_grouped_fp8_gemm_nt_contiguous",
26
+ "k_grouped_fp8_gemm_tn_contiguous",
27
+ "bf16_gemm_nt",
28
+ "bf16_gemm_nn",
29
+ "bf16_gemm_tn",
30
+ "bf16_gemm_tt",
31
+ "m_grouped_bf16_gemm_nt_contiguous",
32
+ "m_grouped_bf16_gemm_nn_contiguous",
33
+ "m_grouped_bf16_gemm_nt_masked",
34
+ "fp8_gemm_nt_skip_head_mid",
35
+ ]:
36
+
37
+ @torch.library.register_fake(add_op_namespace_prefix(_op))
38
+ def _fake(*args, **kwargs):
39
+ pass
40
+
41
+
42
  # Runtime
43
 
44
 
 
691
  # Fall back to nvidia-cutlass pip package
692
  try:
693
  import nvidia.cutlass as _nc
694
+
695
  os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
696
  os.path.dirname(_nc.__file__), "include"
697
  )
698
  except ImportError:
699
  pass
700
 
701
+
702
  def _ensure_initialized():
703
  global _initialized
704
  if _initialized:
build/torch210-cxx11-cu126-aarch64-linux/{_deep_gemm_cuda_bf91a5a.abi3.so β†’ _deep_gemm_cuda_8546a43.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:464160196612927accf9bff3229b025e28b5600182451d8b1c99a91ded85eeed
3
  size 2828112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f126dba5c327a4e1bf36e218c694d4d718c8587ebcdeafecb75061836e6900b
3
  size 2828112
build/torch210-cxx11-cu126-aarch64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _deep_gemm_cuda_bf91a5a
3
- ops = torch.ops._deep_gemm_cuda_bf91a5a
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_deep_gemm_cuda_bf91a5a::{op_name}"
 
1
  import torch
2
+ from . import _deep_gemm_cuda_8546a43
3
+ ops = torch.ops._deep_gemm_cuda_8546a43
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_deep_gemm_cuda_8546a43::{op_name}"
build/torch210-cxx11-cu128-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fb6dff7108b7d58c5fec43f44d1ebde57c44e301220ac024f3669b64de2b39b
3
- size 2892888
 
 
 
 
build/torch210-cxx11-cu130-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a85f42e78c9bba766a58ed7fdcf55e252895ae8f2ecad723b6772c6bf16f50dd
3
- size 2894336
 
 
 
 
build/torch211-cxx11-cu126-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:410a6edef82c909ab5406ea016407b8a31da0b07d9fc1b19940676fe75ce8f82
3
- size 2828112
 
 
 
 
build/torch211-cxx11-cu128-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:146cb3999a8dda660bb48ed5a70c20b88d413c98a80ec6b92bf28bbfbdc7a7d0
3
- size 2892888
 
 
 
 
build/torch211-cxx11-cu130-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b82529fc980cda5e620aa383d5848832c27d493ee6aba778b773fe11988f05c9
3
- size 2894336
 
 
 
 
build/torch29-cxx11-cu129-aarch64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4127075b0ce56f91d6799076dc1bd8dd49fe2d06199b6e182c62a6896a58aa3
3
- size 2891904