diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..d53cd9b27bef454d0290ae7bde6d6a1470b246b6 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +*.so filter=lfs diff=lfs merge=lfs -text +build/torch210-cu128-x86_64-windows/activation/_activation_e1b4b08.pyd filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..06cb917ab1eb4e645c04cdb314aa41bce05391d6 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +--- +tags: + - kernels +--- + +![Status](https://hubwebhook.dholtz.com/shield?repo=kernels-community/activation) + +## Activation + +Activation kernels from [vLLM](https://github.com/vllm-project/vllm/blob/main/csrc/activation_kernels.cu). + +Kernel source: https://github.com/huggingface/kernels-community/tree/main/activation + diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..aff9934dce70f0ba644482bc1408fcf27dbe5f01 --- /dev/null +++ b/benchmark.py @@ -0,0 +1,66 @@ +import torch +from kernels.benchmark import Benchmark + + +def setup_silu_tensors(self, num_tokens: int, hidden_dim: int, dtype=torch.float16): + self.x = torch.randn(num_tokens, 2 * hidden_dim, device="cuda", dtype=dtype) + self.out = torch.empty(num_tokens, hidden_dim, device="cuda", dtype=dtype) + + +def verify_silu(self): + d = self.x.shape[-1] // 2 + ref = torch.nn.functional.silu(self.x[..., :d]) * self.x[..., d:] + return torch.allclose(self.out, ref, atol=1e-3, rtol=1e-3) + + +class SiluWorkloads(Benchmark): + kernel_id = "kernels-community/activation" + seed = 42 + x: torch.Tensor # kernel specific input var + out: torch.Tensor # kernel specific output var + + # Workload 1 + def setup_small(self): + setup_silu_tensors(self, num_tokens=32, hidden_dim=256) + + def benchmark_small(self): + self.kernel.silu_and_mul(self.out, self.x) # type: ignore + + def verify_small(self): + return verify_silu(self) + + # Workload 2 + def setup_medium(self): + setup_silu_tensors(self, num_tokens=1024, hidden_dim=2048) + + def benchmark_medium(self): + self.kernel.silu_and_mul(self.out, self.x) # type: ignore + + def verify_medium(self): + return verify_silu(self) + + +class SiluWorkloads2(Benchmark): + kernel_id = "kernels-community/activation" + seed = 42 + x: torch.Tensor # kernel specific input var + out: torch.Tensor # kernel specific output var + + # Workload 1 + def setup_small(self): + setup_silu_tensors(self, num_tokens=32, hidden_dim=256) + + def benchmark_small(self): + self.kernel.silu_and_mul(self.out, self.x) # type: ignore + + def verify_small(self): + return verify_silu(self) + + # Workload 2 + def setup_medium(self): + setup_silu_tensors(self, num_tokens=1024, hidden_dim=2048) + + def benchmark_medium(self): + self.kernel.silu_and_mul(self.out, self.x) # type: ignore + + # Note: show case without a verify diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..0522bbfdbf8749d671faa8fe91b169dbd2cafa8d --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,5 @@ +from kernels.benchmarks import SiluAndMulBenchmark + + +class SiluWorkloads(SiluAndMulBenchmark): + pass diff --git a/build/torch210-cu128-x86_64-windows/activation/__init__.py b/build/torch210-cu128-x86_64-windows/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..379e245ef7515d04bfe4e680e2549fcf8790cc15 --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-cu128-x86_64-windows/activation/_activation_e1b4b08.pyd b/build/torch210-cu128-x86_64-windows/activation/_activation_e1b4b08.pyd new file mode 100644 index 0000000000000000000000000000000000000000..ca0f54ee392befa7fa8a084bcc730e416a912f23 --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/activation/_activation_e1b4b08.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d741006dd4fe8a85ed461fa3727d4d9f1b438083d2f1075ae54650bbdd2dc179 +size 2463744 diff --git a/build/torch210-cu128-x86_64-windows/activation/_ops.py b/build/torch210-cu128-x86_64-windows/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..110a36d47839efd80d8d58e5cce311e50d684990 --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_e1b4b08 +ops = torch.ops._activation_e1b4b08 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_e1b4b08::{op_name}" \ No newline at end of file diff --git a/build/torch210-cu128-x86_64-windows/activation/layers.py b/build/torch210-cu128-x86_64-windows/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..3dbfa19f89f2514b94e7b35d528a1e76ec4da7a3 --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/activation/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-cu128-x86_64-windows/metadata.json b/build/torch210-cu128-x86_64-windows/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch210-cu128-x86_64-windows/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch210-cxx11-cu126-aarch64-linux/__init__.py b/build/torch210-cxx11-cu126-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-cxx11-cu126-aarch64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch210-cxx11-cu126-aarch64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..5eb55ae25541a66de1833033597fe66562223b3b --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53cb1adecf7bb00650edb28c861b149c48729739ca1c2a6bae39fe52e22657bb +size 3228128 diff --git a/build/torch210-cxx11-cu126-aarch64-linux/_ops.py b/build/torch210-cxx11-cu126-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch210-cxx11-cu126-aarch64-linux/activation/__init__.py b/build/torch210-cxx11-cu126-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu126-aarch64-linux/layers.py b/build/torch210-cxx11-cu126-aarch64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-cxx11-cu126-aarch64-linux/metadata.json b/build/torch210-cxx11-cu126-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f --- /dev/null +++ b/build/torch210-cxx11-cu126-aarch64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch210-cxx11-cu126-x86_64-linux/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch210-cxx11-cu126-x86_64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..cb33ce25de90d3d8af2e226331edccc94e8090c4 --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a0be3112850b924da942b3913629a7ab0681277b29b23e34bfd79e24d16b2f +size 3126848 diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_ops.py b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch210-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu126-x86_64-linux/layers.py b/build/torch210-cxx11-cu126-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-cxx11-cu126-x86_64-linux/metadata.json b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch210-cxx11-cu128-aarch64-linux/__init__.py b/build/torch210-cxx11-cu128-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-cxx11-cu128-aarch64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch210-cxx11-cu128-aarch64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..447c7cb31abe735ab16a86c75f3ac6f10115e4e0 --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eac4ae53a85546234a483c48c9bfcb717d16bb434b1f9723909fb838d366cb3 +size 4538960 diff --git a/build/torch210-cxx11-cu128-aarch64-linux/_ops.py b/build/torch210-cxx11-cu128-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch210-cxx11-cu128-aarch64-linux/activation/__init__.py b/build/torch210-cxx11-cu128-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu128-aarch64-linux/layers.py b/build/torch210-cxx11-cu128-aarch64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-cxx11-cu128-aarch64-linux/metadata.json b/build/torch210-cxx11-cu128-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda --- /dev/null +++ b/build/torch210-cxx11-cu128-aarch64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cxx11-cu128-x86_64-linux/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch210-cxx11-cu128-x86_64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..b2727c565b0e820e7e6151ee7e74fe9a3e84f6d8 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5d89134ddac2eee668ec060a853c99a6a3099a05b01e6a372cfa89b25c9a4d5 +size 4406632 diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_ops.py b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch210-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu128-x86_64-linux/layers.py b/build/torch210-cxx11-cu128-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-cxx11-cu128-x86_64-linux/metadata.json b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cxx11-cu130-aarch64-linux/__init__.py b/build/torch210-cxx11-cu130-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-cxx11-cu130-aarch64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch210-cxx11-cu130-aarch64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..a7eb49e08987ab8f58ec64430a07df7b50784c73 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be21db6774214a4e1b58290f71e2e5cd0146af7d0646220ed4c0873d959b7e2 +size 4293520 diff --git a/build/torch210-cxx11-cu130-aarch64-linux/_ops.py b/build/torch210-cxx11-cu130-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch210-cxx11-cu130-aarch64-linux/activation/__init__.py b/build/torch210-cxx11-cu130-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu130-aarch64-linux/layers.py b/build/torch210-cxx11-cu130-aarch64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-cxx11-cu130-aarch64-linux/metadata.json b/build/torch210-cxx11-cu130-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..66651b7d3f95ac9e5ce5fc2a641b6f0f50788f87 --- /dev/null +++ b/build/torch210-cxx11-cu130-aarch64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-cxx11-cu130-x86_64-linux/__init__.py b/build/torch210-cxx11-cu130-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch210-cxx11-cu130-x86_64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..ac170933452130bbc40b403e8cc476811ccdc62d --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7f7097fa67bb40a27a26e2a6bdeec262eb878307336e9fb350388899e09a89 +size 4190176 diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_ops.py b/build/torch210-cxx11-cu130-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch210-cxx11-cu130-x86_64-linux/activation/__init__.py b/build/torch210-cxx11-cu130-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-cxx11-cu130-x86_64-linux/layers.py b/build/torch210-cxx11-cu130-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-cxx11-cu130-x86_64-linux/metadata.json b/build/torch210-cxx11-cu130-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..66651b7d3f95ac9e5ce5fc2a641b6f0f50788f87 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch210-metal-aarch64-darwin/__init__.py b/build/torch210-metal-aarch64-darwin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch210-metal-aarch64-darwin/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch210-metal-aarch64-darwin/_activation_63b875f.abi3.so b/build/torch210-metal-aarch64-darwin/_activation_63b875f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..ba3b331d8ec8fbebaa26c880f2be4824ae26de15 --- /dev/null +++ b/build/torch210-metal-aarch64-darwin/_activation_63b875f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b08339eb57c5db3a676d69eafc6d1be7cf14e71e57a544289e8922ab7c118c +size 221272 diff --git a/build/torch210-metal-aarch64-darwin/_ops.py b/build/torch210-metal-aarch64-darwin/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..602229319b5ec8bd38c2cd107da58e1e9e968b8d --- /dev/null +++ b/build/torch210-metal-aarch64-darwin/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_63b875f +ops = torch.ops._activation_63b875f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_63b875f::{op_name}" \ No newline at end of file diff --git a/build/torch210-metal-aarch64-darwin/activation/__init__.py b/build/torch210-metal-aarch64-darwin/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch210-metal-aarch64-darwin/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch210-metal-aarch64-darwin/layers.py b/build/torch210-metal-aarch64-darwin/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch210-metal-aarch64-darwin/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch210-metal-aarch64-darwin/metadata.json b/build/torch210-metal-aarch64-darwin/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch210-metal-aarch64-darwin/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch211-cxx11-cu126-aarch64-linux/__init__.py b/build/torch211-cxx11-cu126-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch211-cxx11-cu126-aarch64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch211-cxx11-cu126-aarch64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..b327bf7b9437c45d66e36e22e870c43bd975c0ef --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7015eb787a6fbbc2142ae85a8e169de810f27e09650870845d317305fa668eda +size 3224336 diff --git a/build/torch211-cxx11-cu126-aarch64-linux/_ops.py b/build/torch211-cxx11-cu126-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch211-cxx11-cu126-aarch64-linux/activation/__init__.py b/build/torch211-cxx11-cu126-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu126-aarch64-linux/layers.py b/build/torch211-cxx11-cu126-aarch64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch211-cxx11-cu126-aarch64-linux/metadata.json b/build/torch211-cxx11-cu126-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f --- /dev/null +++ b/build/torch211-cxx11-cu126-aarch64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch211-cxx11-cu126-x86_64-linux/__init__.py b/build/torch211-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch211-cxx11-cu126-x86_64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..134342e0f66f86e950091ca47942e03d57012e47 --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56d80bd2bbfb93ad648f3ef81e414d62a41c7b28d5221f51c5659ba1dd316b0 +size 3119768 diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_ops.py b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch211-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch211-cxx11-cu126-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu126-x86_64-linux/layers.py b/build/torch211-cxx11-cu126-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch211-cxx11-cu126-x86_64-linux/metadata.json b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch211-cxx11-cu128-aarch64-linux/__init__.py b/build/torch211-cxx11-cu128-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch211-cxx11-cu128-aarch64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch211-cxx11-cu128-aarch64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..2745fbfab07edaa414c31c9253d18471b5379a88 --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074f2ccbabf93779cef6f1c81167dbc6076a6787e059354128f4c84993d17b6b +size 4535168 diff --git a/build/torch211-cxx11-cu128-aarch64-linux/_ops.py b/build/torch211-cxx11-cu128-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch211-cxx11-cu128-aarch64-linux/activation/__init__.py b/build/torch211-cxx11-cu128-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu128-aarch64-linux/layers.py b/build/torch211-cxx11-cu128-aarch64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch211-cxx11-cu128-aarch64-linux/metadata.json b/build/torch211-cxx11-cu128-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda --- /dev/null +++ b/build/torch211-cxx11-cu128-aarch64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch211-cxx11-cu128-x86_64-linux/__init__.py b/build/torch211-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch211-cxx11-cu128-x86_64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..ac0456c8ad3c21d35e256011415655bdb7598cb5 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf04bec1ab934e3f8f0f60e94968067c85c4539daa4d2ffb345446debddf437a +size 4395464 diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_ops.py b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch211-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch211-cxx11-cu128-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu128-x86_64-linux/layers.py b/build/torch211-cxx11-cu128-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch211-cxx11-cu128-x86_64-linux/metadata.json b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch211-cxx11-cu130-aarch64-linux/__init__.py b/build/torch211-cxx11-cu130-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch211-cxx11-cu130-aarch64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch211-cxx11-cu130-aarch64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..b68f2b6f7accc327eea552330930caf3abaa0fa6 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9bedacf7611cdde0c229d46ec84b159c62154e4053cd21db492118d8ccddf8 +size 4289720 diff --git a/build/torch211-cxx11-cu130-aarch64-linux/_ops.py b/build/torch211-cxx11-cu130-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch211-cxx11-cu130-aarch64-linux/activation/__init__.py b/build/torch211-cxx11-cu130-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu130-aarch64-linux/layers.py b/build/torch211-cxx11-cu130-aarch64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch211-cxx11-cu130-aarch64-linux/metadata.json b/build/torch211-cxx11-cu130-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..66651b7d3f95ac9e5ce5fc2a641b6f0f50788f87 --- /dev/null +++ b/build/torch211-cxx11-cu130-aarch64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch211-cxx11-cu130-x86_64-linux/__init__.py b/build/torch211-cxx11-cu130-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch211-cxx11-cu130-x86_64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch211-cxx11-cu130-x86_64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..c54cc49b26506d24d635da5fe1d54c7abc833401 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd7d9b1a41ac1bb3a64c392f13251390570a94f2021c0fcf8168ebd32e64099 +size 4183096 diff --git a/build/torch211-cxx11-cu130-x86_64-linux/_ops.py b/build/torch211-cxx11-cu130-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch211-cxx11-cu130-x86_64-linux/activation/__init__.py b/build/torch211-cxx11-cu130-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch211-cxx11-cu130-x86_64-linux/layers.py b/build/torch211-cxx11-cu130-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch211-cxx11-cu130-x86_64-linux/metadata.json b/build/torch211-cxx11-cu130-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..66651b7d3f95ac9e5ce5fc2a641b6f0f50788f87 --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/metadata.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bbf3ad846a76e365312ad965559a177976801396 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..47765ef8e985a500bbb3e25990387a1f1f15c767 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de62862184381714910c79ecdf8db3ca14f8a753 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..c6c9665f880b574481be0f6464ac7637e732df84 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce06ec284ecd4ac5423d3822a60cd9eeb686d0054b38d66567de73e1137b0567 +size 2773632 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4d722bffa37106dd2bfdb75db14408c7eecefcb0 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29e76b5c619af9b19c5650edcfd4f63c4725d35f Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f54053b63e8c2b7598967b6ca9739ecc85d6142a Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d4a3c1172a3a2b4c954199c9762b3251d1c468c Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..e9e9102689a8ddf42f881abedcd19e137f22d5e4 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a529bd105aca5081398d63329e829b6b159570424cd654d3a9f275ca9a720e82 +size 2852200 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4d722bffa37106dd2bfdb75db14408c7eecefcb0 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/__init__.py b/build/torch27-cxx11-cu128-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..390f279894bed7ce9346ede4953b9ffc9e1b1808 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86ca448fc1e6e7e119172b94f978b4a88aeda3e1 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbcd1da77da3529c73226d8ed8decfae8b9e5436 Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/_activation_320b408.abi3.so b/build/torch27-cxx11-cu128-aarch64-linux/activation/_activation_320b408.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..4df8f1606a76b66c06d538cd25db8e894d282405 --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/activation/_activation_320b408.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34bdeb9ab72686850aef0a16b225b1b956162edb2cf46cba65c5e5b92ae267ae +size 4207000 diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-aarch64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0fe83704e6d8850cb94dd0434fb763bff8e7e953 --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_320b408 +ops = torch.ops._activation_320b408 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_320b408::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/layers.py b/build/torch27-cxx11-cu128-aarch64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch27-cxx11-cu128-aarch64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..364976ff5017b183a827c0dfcda90becfbab0e7c Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..008e1b91db1ae539587989af1a212f9cd38a1ae2 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d00f03a5b9a4944132d13ac0986acc2c54e0ca3c Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..6d8adc0f26f3b10cbc1b441b74bc7f49c0ebdaae --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_beeaae6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2cffcb6b5b9a49f03a2df46fc2ad36765676edecb468c233e78e1f5e21e206 +size 4127872 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4d722bffa37106dd2bfdb75db14408c7eecefcb0 --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_beeaae6 +ops = torch.ops._activation_beeaae6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_beeaae6::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu126-aarch64-linux/activation/__init__.py b/build/torch28-cxx11-cu126-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 --- /dev/null +++ b/build/torch28-cxx11-cu126-aarch64-linux/activation/__init__.py @@ -0,0 +1,57 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "layers", +] diff --git a/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7aaa364368efe0e765de132c08296d189a969ede Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc7b128cfd05527bc856b66cdaf7d33691835eae Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e547c241f319a637fa590b09ad35c1592aacce40 Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so b/build/torch28-cxx11-cu126-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..8121d3da5057e1d53e4dee4b60de1e13285bd3e0 --- /dev/null +++ b/build/torch28-cxx11-cu126-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b62f5d045f370c3fb7c0e7ef458165feb987fba186b8cb9aee55c735a82e93 +size 2699928 diff --git a/build/torch28-cxx11-cu126-aarch64-linux/activation/_ops.py b/build/torch28-cxx11-cu126-aarch64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0f883290f823dd4b9ad1432d6644d25bcd3a4acf --- /dev/null +++ b/build/torch28-cxx11-cu126-aarch64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_0c3eb4e_dirty +ops = torch.ops._activation_0c3eb4e_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_0c3eb4e_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-aarch64-linux/activation/layers.py b/build/torch28-cxx11-cu126-aarch64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..45b31181ffb80509a85d729a7f7ee86fc2cf014a --- /dev/null +++ b/build/torch28-cxx11-cu126-aarch64-linux/activation/layers.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu126-x86_64-linux/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_activation_f8d6759.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/_activation_f8d6759.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..cabcacd16040aad8134b2892ea8f1f9781a9a78b --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/_activation_f8d6759.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf76431ff46ef5bc002ce8813eeed3ae9618a15094d98ef4b164f7a10a54f0bc +size 3121056 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..140c6e96b3f93ce5b359648edac4dcb2913b8324 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_f8d6759 +ops = torch.ops._activation_f8d6759 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_f8d6759::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch28-cxx11-cu126-x86_64-linux/layers.py b/build/torch28-cxx11-cu126-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu126-x86_64-linux/metadata.json b/build/torch28-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-aarch64-linux/activation/__init__.py b/build/torch28-cxx11-cu128-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c4f207354093c6ef83eb5d7f3a5a3b22b95d357 --- /dev/null +++ b/build/torch28-cxx11-cu128-aarch64-linux/activation/__init__.py @@ -0,0 +1,57 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "layers", +] diff --git a/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bdb5a121a09f628a672c404f5207f691347f83c5 Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d8c166048d114380e068ca6448ab46ef96da034 Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea0551b7b1c5e408b9875b62598f6f5f0b489a30 Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so b/build/torch28-cxx11-cu128-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..f1d23623c037de97ee0207fe5f750d8ba9863d3c --- /dev/null +++ b/build/torch28-cxx11-cu128-aarch64-linux/activation/_activation_0c3eb4e_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7fe0a00eaf2e228f237ee3058ac9eb2c6fbc4927b1276d0f566bb05bb043b9 +size 3683080 diff --git a/build/torch28-cxx11-cu128-aarch64-linux/activation/_ops.py b/build/torch28-cxx11-cu128-aarch64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0f883290f823dd4b9ad1432d6644d25bcd3a4acf --- /dev/null +++ b/build/torch28-cxx11-cu128-aarch64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_0c3eb4e_dirty +ops = torch.ops._activation_0c3eb4e_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_0c3eb4e_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-aarch64-linux/activation/layers.py b/build/torch28-cxx11-cu128-aarch64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..45b31181ffb80509a85d729a7f7ee86fc2cf014a --- /dev/null +++ b/build/torch28-cxx11-cu128-aarch64-linux/activation/layers.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu128-x86_64-linux/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_activation_f8d6759.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/_activation_f8d6759.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..564e3aa415dbcea5a132bfb14301b4900373fb58 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/_activation_f8d6759.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc0a42d5ebcae09615265a3635bb90d33c76d9179fcfcec17fb2fc5cb16b7f5 +size 4400792 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..140c6e96b3f93ce5b359648edac4dcb2913b8324 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_f8d6759 +ops = torch.ops._activation_f8d6759 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_f8d6759::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch28-cxx11-cu128-x86_64-linux/layers.py b/build/torch28-cxx11-cu128-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu128-x86_64-linux/metadata.json b/build/torch28-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-aarch64-linux/activation/__init__.py b/build/torch28-cxx11-cu129-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch28-cxx11-cu129-aarch64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e53c600baf751d47e3c75f0ea262aaa74cbaa2a0 Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfe526dc3c92a5c7b1a46084e58d4448fc74b15b Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..878be1d140d35a1a92eb1b870cd3ccc0bbb65128 Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-aarch64-linux/activation/_activation_320b408.abi3.so b/build/torch28-cxx11-cu129-aarch64-linux/activation/_activation_320b408.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..485825618d1d0c2e93123fe5197999883b59b748 --- /dev/null +++ b/build/torch28-cxx11-cu129-aarch64-linux/activation/_activation_320b408.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af83bae80c8641200010ba586e5a2cac271fa4fcd344e3532ea7d5094fd7c17 +size 4275744 diff --git a/build/torch28-cxx11-cu129-aarch64-linux/activation/_ops.py b/build/torch28-cxx11-cu129-aarch64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0fe83704e6d8850cb94dd0434fb763bff8e7e953 --- /dev/null +++ b/build/torch28-cxx11-cu129-aarch64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_320b408 +ops = torch.ops._activation_320b408 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_320b408::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-aarch64-linux/activation/layers.py b/build/torch28-cxx11-cu129-aarch64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch28-cxx11-cu129-aarch64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu129-x86_64-linux/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_activation_f8d6759.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/_activation_f8d6759.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..ff0d1df159bdd317b6293331073a9aab2d4bd06c --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/_activation_f8d6759.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d0f9b82abd2e6d7154889814140b789e2d4452aac1296d921c9a2d4ab19e91 +size 4438672 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..140c6e96b3f93ce5b359648edac4dcb2913b8324 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_f8d6759 +ops = torch.ops._activation_f8d6759 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_f8d6759::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch28-cxx11-cu129-x86_64-linux/layers.py b/build/torch28-cxx11-cu129-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch28-cxx11-cu129-x86_64-linux/metadata.json b/build/torch28-cxx11-cu129-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch29-cxx11-cu126-aarch64-linux/activation/__init__.py b/build/torch29-cxx11-cu126-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60dc82724c779cfa41bd9b8dcf39c036e2a50109 Binary files /dev/null and b/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..48cda67561066b31e84ee5ecebcf0ef61e1ad322 Binary files /dev/null and b/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0082ca0b0e28577622a3e430602fabe010369318 Binary files /dev/null and b/build/torch29-cxx11-cu126-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu126-aarch64-linux/activation/_activation_320b408.abi3.so b/build/torch29-cxx11-cu126-aarch64-linux/activation/_activation_320b408.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..41c75640cfdc7eeff3d57f4a6d403f7e7f10b8d8 --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/activation/_activation_320b408.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c24e0eb75a09a9fc19e7096276d560226f198617291681c1a18e94002a629e +size 2963480 diff --git a/build/torch29-cxx11-cu126-aarch64-linux/activation/_ops.py b/build/torch29-cxx11-cu126-aarch64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0fe83704e6d8850cb94dd0434fb763bff8e7e953 --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_320b408 +ops = torch.ops._activation_320b408 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_320b408::{op_name}" \ No newline at end of file diff --git a/build/torch29-cxx11-cu126-aarch64-linux/activation/layers.py b/build/torch29-cxx11-cu126-aarch64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch29-cxx11-cu126-aarch64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu126-x86_64-linux/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_activation_63b875f.abi3.so b/build/torch29-cxx11-cu126-x86_64-linux/_activation_63b875f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..3bb70b2a77f8c7dd8f0125e896cfca9359138ff9 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/_activation_63b875f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c84b682f2dd4437835661f57f031d96865871f6f4ab25f5651d4f577acee1326 +size 3121128 diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..602229319b5ec8bd38c2cd107da58e1e9e968b8d --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_63b875f +ops = torch.ops._activation_63b875f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_63b875f::{op_name}" \ No newline at end of file diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu126-x86_64-linux/layers.py b/build/torch29-cxx11-cu126-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu126-x86_64-linux/metadata.json b/build/torch29-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch29-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch29-cxx11-cu128-aarch64-linux/activation/__init__.py b/build/torch29-cxx11-cu128-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d338b4d5170fa0130189f67e65562998f8f42be Binary files /dev/null and b/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be7ffd679d4afbc36ea076dbc57e3162a60bd409 Binary files /dev/null and b/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e50041e74611417f4e4037e568a9e041780a5e32 Binary files /dev/null and b/build/torch29-cxx11-cu128-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu128-aarch64-linux/activation/_activation_320b408.abi3.so b/build/torch29-cxx11-cu128-aarch64-linux/activation/_activation_320b408.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..dc83e4989904884309410757826ec095ea0fdfe4 --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/activation/_activation_320b408.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ee3dfa4d481eaf44ac3c11a0843598c05950f779dba66abd468fecb7839b32 +size 4208760 diff --git a/build/torch29-cxx11-cu128-aarch64-linux/activation/_ops.py b/build/torch29-cxx11-cu128-aarch64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0fe83704e6d8850cb94dd0434fb763bff8e7e953 --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_320b408 +ops = torch.ops._activation_320b408 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_320b408::{op_name}" \ No newline at end of file diff --git a/build/torch29-cxx11-cu128-aarch64-linux/activation/layers.py b/build/torch29-cxx11-cu128-aarch64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch29-cxx11-cu128-aarch64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu128-x86_64-linux/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_activation_63b875f.abi3.so b/build/torch29-cxx11-cu128-x86_64-linux/_activation_63b875f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..e2e49fb0c5b136351663cc36a368639afff8a47c --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/_activation_63b875f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3afed8f50b04121c408e2b7fc8f4920015ba696b97e54be8e165cbbdd7039d6b +size 4400864 diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..602229319b5ec8bd38c2cd107da58e1e9e968b8d --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_63b875f +ops = torch.ops._activation_63b875f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_63b875f::{op_name}" \ No newline at end of file diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu128-x86_64-linux/layers.py b/build/torch29-cxx11-cu128-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu128-x86_64-linux/metadata.json b/build/torch29-cxx11-cu128-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch29-cxx11-cu128-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch29-cxx11-cu129-aarch64-linux/__init__.py b/build/torch29-cxx11-cu129-aarch64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu129-aarch64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch29-cxx11-cu129-aarch64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..be95d14353d33115dffd14ed26748e19227084ff --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c3d96bdf09677af2537ac4a9f4cf67a241c6bd4a2888771faaa9e16c0973f4 +size 4538112 diff --git a/build/torch29-cxx11-cu129-aarch64-linux/_ops.py b/build/torch29-cxx11-cu129-aarch64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch29-cxx11-cu129-aarch64-linux/activation/__init__.py b/build/torch29-cxx11-cu129-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu129-aarch64-linux/layers.py b/build/torch29-cxx11-cu129-aarch64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu129-aarch64-linux/metadata.json b/build/torch29-cxx11-cu129-aarch64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda --- /dev/null +++ b/build/torch29-cxx11-cu129-aarch64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu129-x86_64-linux/__init__.py b/build/torch29-cxx11-cu129-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu129-x86_64-linux/_activation_cuda_5e1630d.abi3.so b/build/torch29-cxx11-cu129-x86_64-linux/_activation_cuda_5e1630d.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..388c9cd5d4f2977fc150126c3672e629aba36da2 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/_activation_cuda_5e1630d.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e5f0df9d07f3bc16feeaffb9863d669f677695c856045954c266c45246dc43 +size 4438768 diff --git a/build/torch29-cxx11-cu129-x86_64-linux/_ops.py b/build/torch29-cxx11-cu129-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..c8caf619763d118e067bb91d329c09e99f4a54a4 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_cuda_5e1630d +ops = torch.ops._activation_cuda_5e1630d + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_cuda_5e1630d::{op_name}" diff --git a/build/torch29-cxx11-cu129-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu129-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu129-x86_64-linux/layers.py b/build/torch29-cxx11-cu129-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu129-x86_64-linux/metadata.json b/build/torch29-cxx11-cu129-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda --- /dev/null +++ b/build/torch29-cxx11-cu129-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "10.1", + "12.0+PTX", + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch29-cxx11-cu130-aarch64-linux/activation/__init__.py b/build/torch29-cxx11-cu130-aarch64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/activation/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21696c8710d6b717d92ebd34545a9ac97cc44942 Binary files /dev/null and b/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1856969205a3825653d4be5e4c267a9585ff6594 Binary files /dev/null and b/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8e0f48d49bb34730201d17d0795310d829e20cb Binary files /dev/null and b/build/torch29-cxx11-cu130-aarch64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch29-cxx11-cu130-aarch64-linux/activation/_activation_320b408.abi3.so b/build/torch29-cxx11-cu130-aarch64-linux/activation/_activation_320b408.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..02267d619c1ad4c0bb7f84b243e5456c6bf7c798 --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/activation/_activation_320b408.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73748b54059552f5983322f7dedc36ed349b38ad6fb9318301bb4965b1fe49aa +size 4094968 diff --git a/build/torch29-cxx11-cu130-aarch64-linux/activation/_ops.py b/build/torch29-cxx11-cu130-aarch64-linux/activation/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0fe83704e6d8850cb94dd0434fb763bff8e7e953 --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/activation/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_320b408 +ops = torch.ops._activation_320b408 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_320b408::{op_name}" \ No newline at end of file diff --git a/build/torch29-cxx11-cu130-aarch64-linux/activation/layers.py b/build/torch29-cxx11-cu130-aarch64-linux/activation/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 --- /dev/null +++ b/build/torch29-cxx11-cu130-aarch64-linux/activation/layers.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu130-x86_64-linux/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-cxx11-cu130-x86_64-linux/_activation_63b875f.abi3.so b/build/torch29-cxx11-cu130-x86_64-linux/_activation_63b875f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..fcd78de80c8ca89e556f4f7255428b9dbbfaaf2d --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/_activation_63b875f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc5534a57cbe83a6ebc51a13bce94eab7c06ce5b4d41eb4e9db83f77ae64902 +size 4180240 diff --git a/build/torch29-cxx11-cu130-x86_64-linux/_ops.py b/build/torch29-cxx11-cu130-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..602229319b5ec8bd38c2cd107da58e1e9e968b8d --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_63b875f +ops = torch.ops._activation_63b875f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_63b875f::{op_name}" \ No newline at end of file diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-cxx11-cu130-x86_64-linux/layers.py b/build/torch29-cxx11-cu130-x86_64-linux/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-cxx11-cu130-x86_64-linux/metadata.json b/build/torch29-cxx11-cu130-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch29-cxx11-cu130-x86_64-linux/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file diff --git a/build/torch29-metal-aarch64-darwin/__init__.py b/build/torch29-metal-aarch64-darwin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 --- /dev/null +++ b/build/torch29-metal-aarch64-darwin/__init__.py @@ -0,0 +1,75 @@ +import torch + +from ._ops import ops + +from . import layers + + +def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu_and_mul(out, x) + return out + + +def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.mul_and_silu(out, x) + return out + + +def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_and_mul(out, x) + return out + + +def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh_and_mul(out, x) + return out + + +def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None: + ops.fatrelu_and_mul(out, x, threshold) + return out + + +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + +def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_fast(out, x) + return out + + +def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_new(out, x) + return out + + +def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_quick(out, x) + return out + + +__all__ = [ + "silu_and_mul", + "mul_and_silu", + "gelu_and_mul", + "gelu_tanh_and_mul", + "fatrelu_and_mul", + "gelu_fast", + "gelu_new", + "gelu_quick", + "gelu_tanh", + "silu", + "gelu", + "layers", +] diff --git a/build/torch29-metal-aarch64-darwin/_activation_63b875f.abi3.so b/build/torch29-metal-aarch64-darwin/_activation_63b875f.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..986b7947b413077b8d8acf3967a52ee556212268 --- /dev/null +++ b/build/torch29-metal-aarch64-darwin/_activation_63b875f.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:884e887217a67931f5a59b3c39487acb754ff51282adb6b13b5db669e39cb12e +size 220504 diff --git a/build/torch29-metal-aarch64-darwin/_ops.py b/build/torch29-metal-aarch64-darwin/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..602229319b5ec8bd38c2cd107da58e1e9e968b8d --- /dev/null +++ b/build/torch29-metal-aarch64-darwin/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _activation_63b875f +ops = torch.ops._activation_63b875f + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_activation_63b875f::{op_name}" \ No newline at end of file diff --git a/build/torch29-metal-aarch64-darwin/activation/__init__.py b/build/torch29-metal-aarch64-darwin/activation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309 --- /dev/null +++ b/build/torch29-metal-aarch64-darwin/activation/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch29-metal-aarch64-darwin/layers.py b/build/torch29-metal-aarch64-darwin/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2f66f39d58561e0ff9d43eb943fac9e92e6a8259 --- /dev/null +++ b/build/torch29-metal-aarch64-darwin/layers.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn + +from ._ops import ops + + +class SiluAndMul(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.silu_and_mul(out, x) + return out + +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + + +class MulAndSilu(nn.Module): + """An activation function for SwiGLU. + + The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.mul_and_silu(out, x) + return out + + +class GeluAndMul(nn.Module): + """An activation function for GeGLU. + + The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2. + + Shapes: + x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d) + return: (batch_size, seq_len, d) or (num_tokens, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_and_mul(out, x) + return out + + +class GeluTanhAndMul(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.gelu_tanh_and_mul(out, x) + return out + + +class FatreluAndMul(nn.Module): + """An activation function for FATReLU. + + The function computes x -> FATReLU(x[:d]) * x[d:] where + d = x.shape[-1] // 2. + This is used in openbmb/MiniCPM-S-1B-sft. + + Shapes: + x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def __init__(self, threshold: float = 0.0): + super().__init__() + self.threshold = threshold + + def forward(self, x: torch.Tensor): + if not x.is_contiguous(): + x = x.contiguous() + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + ops.fatrelu_and_mul(out, x, self.threshold) + return out + + +class FastGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_fast(out, x) + return out + + +class NewGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_new(out, x) + return out + + +class QuickGELU(nn.Module): + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not x.is_contiguous(): + x = x.contiguous() + out = torch.empty_like(x) + ops.gelu_quick(out, x) + return out diff --git a/build/torch29-metal-aarch64-darwin/metadata.json b/build/torch29-metal-aarch64-darwin/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8 --- /dev/null +++ b/build/torch29-metal-aarch64-darwin/metadata.json @@ -0,0 +1,4 @@ +{ + "version": 1, + "python-depends": [] +} \ No newline at end of file