danieldk HF Staff commited on Mar 13

Commit

1c9cbe1

verified ·

1 Parent(s): 645c13c

Build uploaded using `kernels`.

Browse files

Files changed (18) hide show

build/torch210-cxx11-cu126-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} +1 -1
build/torch210-cxx11-cu126-aarch64-linux/_ops.py +3 -3
build/torch210-cxx11-cu126-aarch64-linux/flash_attn_interface.py +5 -1
build/torch210-cxx11-cu128-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} +1 -1
build/torch210-cxx11-cu128-aarch64-linux/_ops.py +3 -3
build/torch210-cxx11-cu128-aarch64-linux/flash_attn_interface.py +5 -1
build/torch210-cxx11-cu130-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} +1 -1
build/torch210-cxx11-cu130-aarch64-linux/_ops.py +3 -3
build/torch210-cxx11-cu130-aarch64-linux/flash_attn_interface.py +5 -1
build/torch29-cxx11-cu126-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} +1 -1
build/torch29-cxx11-cu126-aarch64-linux/_ops.py +3 -3
build/torch29-cxx11-cu126-aarch64-linux/flash_attn_interface.py +5 -1
build/torch29-cxx11-cu128-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} +1 -1
build/torch29-cxx11-cu128-aarch64-linux/_ops.py +3 -3
build/torch29-cxx11-cu128-aarch64-linux/flash_attn_interface.py +5 -1
build/torch29-cxx11-cu130-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} +1 -1
build/torch29-cxx11-cu130-aarch64-linux/_ops.py +3 -3
build/torch29-cxx11-cu130-aarch64-linux/flash_attn_interface.py +5 -1

build/torch210-cxx11-cu126-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7507abaa188e0334b03057f4fd4a5a50fe8378765edd644d0867fed70444403
 size 448533496

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ae4aff3e31a85f7126ce66f329308221464637b4aa3bb4f0db454747509f913
 size 448533496

build/torch210-cxx11-cu126-aarch64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_cuda_9f0ed09
-ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

 import torch
+from . import _flash_attn2_cuda_abda3a0
+ops = torch.ops._flash_attn2_cuda_abda3a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_abda3a0::{op_name}"

build/torch210-cxx11-cu126-aarch64-linux/flash_attn_interface.py CHANGED Viewed

@@ -33,8 +33,12 @@ def _get_device():
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
-    # This should match the block sizes in the CUDA kernel
     assert head_dim <= 256
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
+    if device.type == "xpu":
+        return 64
+    # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

build/torch210-cxx11-cu128-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58331e99449068ce623bb0d837d6a761092278872c0ffcde29691794b554d5da
 size 1037990944

 version https://git-lfs.github.com/spec/v1
+oid sha256:b00f987bd969cfdfc618e12a09631d6ad7dd067f9f3d3b346836bd3e205af5a4
 size 1037990944

build/torch210-cxx11-cu128-aarch64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_cuda_9f0ed09
-ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

 import torch
+from . import _flash_attn2_cuda_abda3a0
+ops = torch.ops._flash_attn2_cuda_abda3a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_abda3a0::{op_name}"

build/torch210-cxx11-cu128-aarch64-linux/flash_attn_interface.py CHANGED Viewed

@@ -33,8 +33,12 @@ def _get_device():
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
-    # This should match the block sizes in the CUDA kernel
     assert head_dim <= 256
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
+    if device.type == "xpu":
+        return 64
+    # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

build/torch210-cxx11-cu130-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b6f8697387e3cc2d5a17e6a9ce0da37e58d6b70193a0f2e9623ab5fbcd3cea4
 size 1008644344

 version https://git-lfs.github.com/spec/v1
+oid sha256:fed0b327e31440e1e9b53a58733d699df86d4c54b22b2c949d6b5815a643aca7
 size 1008644344

build/torch210-cxx11-cu130-aarch64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_cuda_9f0ed09
-ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

 import torch
+from . import _flash_attn2_cuda_abda3a0
+ops = torch.ops._flash_attn2_cuda_abda3a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_abda3a0::{op_name}"

build/torch210-cxx11-cu130-aarch64-linux/flash_attn_interface.py CHANGED Viewed

@@ -33,8 +33,12 @@ def _get_device():
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
-    # This should match the block sizes in the CUDA kernel
     assert head_dim <= 256
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
+    if device.type == "xpu":
+        return 64
+    # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

build/torch29-cxx11-cu126-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce0ad12fec47de04a14310237b258c8f756ba2645ec91fbcc63c7d289f64b2ac
 size 448464272

 version https://git-lfs.github.com/spec/v1
+oid sha256:597f8c8e32518898fd0439b8e9341474d536692e0d68e3083501bbf0fb35cf6f
 size 448464272

build/torch29-cxx11-cu126-aarch64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_cuda_9f0ed09
-ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

 import torch
+from . import _flash_attn2_cuda_abda3a0
+ops = torch.ops._flash_attn2_cuda_abda3a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_abda3a0::{op_name}"

build/torch29-cxx11-cu126-aarch64-linux/flash_attn_interface.py CHANGED Viewed

@@ -33,8 +33,12 @@ def _get_device():
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
-    # This should match the block sizes in the CUDA kernel
     assert head_dim <= 256
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
+    if device.type == "xpu":
+        return 64
+    # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

build/torch29-cxx11-cu128-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:faa4e7f179cc4e8c14ef3d3471458f9563072d21b79707c238227adeffe34ad0
 size 1037856008

 version https://git-lfs.github.com/spec/v1
+oid sha256:79fbcbef35dfc8d2c35e0e4c7587ffa332ae32c8c98d9727cfa376b51ad644b2
 size 1037856008

build/torch29-cxx11-cu128-aarch64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_cuda_9f0ed09
-ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

 import torch
+from . import _flash_attn2_cuda_abda3a0
+ops = torch.ops._flash_attn2_cuda_abda3a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_abda3a0::{op_name}"

build/torch29-cxx11-cu128-aarch64-linux/flash_attn_interface.py CHANGED Viewed

@@ -33,8 +33,12 @@ def _get_device():
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
-    # This should match the block sizes in the CUDA kernel
     assert head_dim <= 256
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
+    if device.type == "xpu":
+        return 64
+    # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

build/torch29-cxx11-cu130-aarch64-linux/{_flash_attn2_cuda_9f0ed09.abi3.so → _flash_attn2_cuda_abda3a0.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a69d566b56938ac50d625d504fea3c8c781598e4ebb3908b80397879b3cde372
 size 1008640480

 version https://git-lfs.github.com/spec/v1
+oid sha256:8feba56f589a7bd5d90cd68d62668029e2d06f8918c85cefd0112caae3c92c04
 size 1008640480

build/torch29-cxx11-cu130-aarch64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_cuda_9f0ed09
-ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

 import torch
+from . import _flash_attn2_cuda_abda3a0
+ops = torch.ops._flash_attn2_cuda_abda3a0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_abda3a0::{op_name}"

build/torch29-cxx11-cu130-aarch64-linux/flash_attn_interface.py CHANGED Viewed

@@ -33,8 +33,12 @@ def _get_device():
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
-    # This should match the block sizes in the CUDA kernel
     assert head_dim <= 256
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0

 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     assert head_dim <= 256
+    if device.type == "xpu":
+        return 64
+    # This should match the block sizes in the CUDA kernel
     major, minor = torch.cuda.get_device_capability(device)
     is_sm8x = major == 8 and minor > 0  # Only include sm86 and sm89, exclude sm80 (A100)
     is_sm80 = major == 8 and minor == 0