Kernels
kernels-bot commited on
Commit
5f3e612
Β·
verified Β·
1 Parent(s): 86d4a15

Build uploaded using `kernels` (batch 1/10).

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ build/torch211-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=l
32
  build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
33
  build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
34
  build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
 
 
32
  build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
33
  build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
34
  build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so filter=lfs diff=lfs merge=lfs -text
35
+ build/torch210-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_8546a43.abi3.so filter=lfs diff=lfs merge=lfs -text
build/torch210-cxx11-cu126-x86_64-linux/__init__.py CHANGED
@@ -3,12 +3,42 @@ import subprocess
3
  import torch
4
 
5
  # Import the compiled extension
6
- from ._ops import ops
7
  from . import utils
8
 
9
  __version__ = "2.3.0"
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Runtime
13
 
14
 
@@ -661,12 +691,14 @@ if "DG_CUTLASS_INCLUDE" not in os.environ:
661
  # Fall back to nvidia-cutlass pip package
662
  try:
663
  import nvidia.cutlass as _nc
 
664
  os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
665
  os.path.dirname(_nc.__file__), "include"
666
  )
667
  except ImportError:
668
  pass
669
 
 
670
  def _ensure_initialized():
671
  global _initialized
672
  if _initialized:
 
3
  import torch
4
 
5
  # Import the compiled extension
6
+ from ._ops import ops, add_op_namespace_prefix
7
  from . import utils
8
 
9
  __version__ = "2.3.0"
10
 
11
 
12
+ # ── Register fake tensor implementations for torch.compile ──────────────────
13
+ # All GEMM ops mutate the output tensor `d` in-place and return void.
14
+ # The fake implementations are no-ops since `d` is pre-allocated by the caller.
15
+
16
+
17
+ for _op in [
18
+ "fp8_fp4_gemm_nt",
19
+ "fp8_fp4_gemm_nn",
20
+ "fp8_fp4_gemm_tn",
21
+ "fp8_fp4_gemm_tt",
22
+ "m_grouped_fp8_fp4_gemm_nt_contiguous",
23
+ "m_grouped_fp8_fp4_gemm_nn_contiguous",
24
+ "m_grouped_fp8_fp4_gemm_nt_masked",
25
+ "k_grouped_fp8_gemm_nt_contiguous",
26
+ "k_grouped_fp8_gemm_tn_contiguous",
27
+ "bf16_gemm_nt",
28
+ "bf16_gemm_nn",
29
+ "bf16_gemm_tn",
30
+ "bf16_gemm_tt",
31
+ "m_grouped_bf16_gemm_nt_contiguous",
32
+ "m_grouped_bf16_gemm_nn_contiguous",
33
+ "m_grouped_bf16_gemm_nt_masked",
34
+ "fp8_gemm_nt_skip_head_mid",
35
+ ]:
36
+
37
+ @torch.library.register_fake(add_op_namespace_prefix(_op))
38
+ def _fake(*args, **kwargs):
39
+ pass
40
+
41
+
42
  # Runtime
43
 
44
 
 
691
  # Fall back to nvidia-cutlass pip package
692
  try:
693
  import nvidia.cutlass as _nc
694
+
695
  os.environ["DG_CUTLASS_INCLUDE"] = os.path.join(
696
  os.path.dirname(_nc.__file__), "include"
697
  )
698
  except ImportError:
699
  pass
700
 
701
+
702
  def _ensure_initialized():
703
  global _initialized
704
  if _initialized:
build/torch210-cxx11-cu126-x86_64-linux/{_deep_gemm_cuda_bf91a5a.abi3.so β†’ _deep_gemm_cuda_8546a43.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e74230c00b1f3f6a5a7ea53dcd183ea1933837da2321293e015d8bd288ad3d2
3
  size 2825240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5716c2500d5dfa25bfbbffaf3469481b0ba9a0f682ae22aa6526a744cc05b59b
3
  size 2825240
build/torch210-cxx11-cu126-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _deep_gemm_cuda_bf91a5a
3
- ops = torch.ops._deep_gemm_cuda_bf91a5a
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_deep_gemm_cuda_bf91a5a::{op_name}"
 
1
  import torch
2
+ from . import _deep_gemm_cuda_8546a43
3
+ ops = torch.ops._deep_gemm_cuda_8546a43
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_deep_gemm_cuda_8546a43::{op_name}"
build/torch210-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:085822267e9222797c3ed466e1987c5d8daa861882d62edeb4578a4b99edf39c
3
- size 2888040
 
 
 
 
build/torch210-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6477c3d1134a3fdb754ab7fc4a8d9aa98576896e6ae4613debaab483a985254
3
- size 2897240
 
 
 
 
build/torch211-cxx11-cu126-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce90c573da5a8f2348156c8d18a0ff521f679bcff905ad700cb5e9742fe81d1c
3
- size 2825240
 
 
 
 
build/torch211-cxx11-cu128-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2d1e6ceaf24536a9202d47f4cc633af33c6873bb5af4ca0c9bbb788dc71985d
3
- size 2888040
 
 
 
 
build/torch211-cxx11-cu130-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f97dd08a22f024b9b3d610031d14092518ed6b8a72422e1ea482e5059a58a326
3
- size 2897240
 
 
 
 
build/torch29-cxx11-cu129-x86_64-linux/_deep_gemm_cuda_bf91a5a.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd66ce2110c54ac6fa347088f2a9f566c5d0b32601729f2288b35906330613f2
3
- size 2870952