drbh commited on 16 days ago

Commit

357c41f

unverified ·

0 Parent(s):

Migrated from kernels-community/rmsnorm

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +100 -0
README.md +5 -0
build/torch210-cxx11-cpu-x86_64-linux/__init__.py +27 -0
build/torch210-cxx11-cpu-x86_64-linux/_ops.py +9 -0
build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so +3 -0
build/torch210-cxx11-cpu-x86_64-linux/layers.py +59 -0
build/torch210-cxx11-cpu-x86_64-linux/metadata.json +8 -0
build/torch210-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py +26 -0
build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py +27 -0
build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py +9 -0
build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so +3 -0
build/torch210-cxx11-xpu20253-x86_64-linux/layers.py +59 -0
build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json +8 -0
build/torch210-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py +26 -0
build/torch210-xpu20253-x86_64-windows/__init__.py +27 -0
build/torch210-xpu20253-x86_64-windows/_ops.py +9 -0
build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_2aa36b6.pyd +3 -0
build/torch210-xpu20253-x86_64-windows/layers.py +59 -0
build/torch210-xpu20253-x86_64-windows/metadata.json +5 -0
build/torch210-xpu20253-x86_64-windows/rmsnorm/__init__.py +26 -0
build/torch211-cxx11-cpu-x86_64-linux/__init__.py +27 -0
build/torch211-cxx11-cpu-x86_64-linux/_ops.py +9 -0
build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so +3 -0
build/torch211-cxx11-cpu-x86_64-linux/layers.py +59 -0
build/torch211-cxx11-cpu-x86_64-linux/metadata.json +8 -0
build/torch211-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py +26 -0
build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py +27 -0
build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py +9 -0
build/torch211-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so +3 -0
build/torch211-cxx11-xpu20253-x86_64-linux/layers.py +59 -0
build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json +8 -0
build/torch211-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py +26 -0
build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__init__.py +14 -0
build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/layers.cpython-313.pyc +0 -0
build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_ops.py +9 -0
build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so +3 -0
build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/layers.py +36 -0
build/torch28-cxx11-cpu-x86_64-linux/__init__.py +27 -0
build/torch28-cxx11-cpu-x86_64-linux/_ops.py +9 -0
build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so +3 -0
build/torch28-cxx11-cpu-x86_64-linux/layers.py +59 -0
build/torch28-cxx11-cpu-x86_64-linux/metadata.json +4 -0
build/torch28-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py +26 -0
build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py +27 -0
build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py +9 -0
build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_235cde1.abi3.so +3 -0
build/torch28-cxx11-xpu20251-x86_64-linux/layers.py +59 -0
build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json +4 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,100 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-xpu20251-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_a7a4369.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_fb26d8c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_7606158.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_4367ce1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_a8702c9.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_235cde1.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_fd30c0c.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-xpu20252-x86_64-windows/rmsnorm/_rmsnorm_96c9886.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_ce2b5cc.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-xpu20253-x86_64-windows/rmsnorm/_rmsnorm_4cd2f5b.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_7bbf693.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_0f8f3b4.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_2aa36b6.pyd filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_b3d66c6.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-xpu20252-x86_64-linux/_rmsnorm_xpu_cec90b8.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch211-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+---
+tags:
+- kernels
+- cuda
+---

build/torch210-cxx11-cpu-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    # ops.apply_rms_norm returns [output, rstd]
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )[0]
+def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True):
+    return ops.apply_rms_norm_backward(
+            grad_output,
+            input,
+            weight,
+            output,
+            rstd,
+            eps,
+            input_requires_grad,
+            weight_requires_grad
+    )
+__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"]

build/torch210-cxx11-cpu-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_cpu_1a02f6f
+ops = torch.ops._rmsnorm_cpu_1a02f6f
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_cpu_1a02f6f::{op_name}"

build/torch210-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c102259696d99bbe9d4c686b4293195548faa4856123a358d44aab3d90148620
+size 2006072

build/torch210-cxx11-cpu-x86_64-linux/layers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from ._ops import ops
+class RMSNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, hidden_states, weight, variance_epsilon):
+        ctx.variance_epsilon = variance_epsilon
+        output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon)
+        ctx.save_for_backward(hidden_states, weight, output, rstd)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        hidden_states, weight, output, rstd = ctx.saved_tensors
+        grads = ops.apply_rms_norm_backward(
+            grad_output,
+            hidden_states,
+            weight,
+            output,
+            rstd,
+            ctx.variance_epsilon,
+            ctx.needs_input_grad[0],
+            ctx.needs_input_grad[1]
+        )
+        return grads[0], grads[1], None
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return RMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch210-cxx11-cpu-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cpu"
+  }
+}

build/torch210-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-cxx11-xpu20253-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    # ops.apply_rms_norm returns [output, rstd]
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )[0]
+def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True):
+    return ops.apply_rms_norm_backward(
+            grad_output,
+            input,
+            weight,
+            output,
+            rstd,
+            eps,
+            input_requires_grad,
+            weight_requires_grad
+    )
+__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"]

build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_xpu_1a02f6f
+ops = torch.ops._rmsnorm_xpu_1a02f6f
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_xpu_1a02f6f::{op_name}"

build/torch210-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a87f0910ab215646183ecd9f4b2cbc5be6c72c3eee20d167f42f71c14629e65
+size 104793360

build/torch210-cxx11-xpu20253-x86_64-linux/layers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from ._ops import ops
+class RMSNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, hidden_states, weight, variance_epsilon):
+        ctx.variance_epsilon = variance_epsilon
+        output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon)
+        ctx.save_for_backward(hidden_states, weight, output, rstd)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        hidden_states, weight, output, rstd = ctx.saved_tensors
+        grads = ops.apply_rms_norm_backward(
+            grad_output,
+            hidden_states,
+            weight,
+            output,
+            rstd,
+            ctx.variance_epsilon,
+            ctx.needs_input_grad[0],
+            ctx.needs_input_grad[1]
+        )
+        return grads[0], grads[1], None
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return RMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "xpu"
+  }
+}

build/torch210-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-xpu20253-x86_64-windows/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    # ops.apply_rms_norm returns [output, rstd]
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )[0]
+def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True):
+    return ops.apply_rms_norm_backward(
+            grad_output,
+            input,
+            weight,
+            output,
+            rstd,
+            eps,
+            input_requires_grad,
+            weight_requires_grad
+    )
+__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"]

build/torch210-xpu20253-x86_64-windows/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_xpu_2aa36b6
+ops = torch.ops._rmsnorm_xpu_2aa36b6
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_xpu_2aa36b6::{op_name}"

build/torch210-xpu20253-x86_64-windows/_rmsnorm_xpu_2aa36b6.pyd ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:690752b7e809e03b7be6d8f5521080ea84115db1078cf6a0010597612e5844d7
+size 2363904

build/torch210-xpu20253-x86_64-windows/layers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from ._ops import ops
+class RMSNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, hidden_states, weight, variance_epsilon):
+        ctx.variance_epsilon = variance_epsilon
+        output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon)
+        ctx.save_for_backward(hidden_states, weight, output, rstd)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        hidden_states, weight, output, rstd = ctx.saved_tensors
+        grads = ops.apply_rms_norm_backward(
+            grad_output,
+            hidden_states,
+            weight,
+            output,
+            rstd,
+            ctx.variance_epsilon,
+            ctx.needs_input_grad[0],
+            ctx.needs_input_grad[1]
+        )
+        return grads[0], grads[1], None
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return RMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch210-xpu20253-x86_64-windows/metadata.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": []
+}

build/torch210-xpu20253-x86_64-windows/rmsnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import sys
+import importlib
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch211-cxx11-cpu-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    # ops.apply_rms_norm returns [output, rstd]
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )[0]
+def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True):
+    return ops.apply_rms_norm_backward(
+            grad_output,
+            input,
+            weight,
+            output,
+            rstd,
+            eps,
+            input_requires_grad,
+            weight_requires_grad
+    )
+__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"]

build/torch211-cxx11-cpu-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_cpu_1a02f6f
+ops = torch.ops._rmsnorm_cpu_1a02f6f
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_cpu_1a02f6f::{op_name}"

build/torch211-cxx11-cpu-x86_64-linux/_rmsnorm_cpu_1a02f6f.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:439ac1a1bc4a6095844795cbccd7f2137c101bce3e3415bcebb3fd2b0dfcb97b
+size 2001976

build/torch211-cxx11-cpu-x86_64-linux/layers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from ._ops import ops
+class RMSNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, hidden_states, weight, variance_epsilon):
+        ctx.variance_epsilon = variance_epsilon
+        output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon)
+        ctx.save_for_backward(hidden_states, weight, output, rstd)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        hidden_states, weight, output, rstd = ctx.saved_tensors
+        grads = ops.apply_rms_norm_backward(
+            grad_output,
+            hidden_states,
+            weight,
+            output,
+            rstd,
+            ctx.variance_epsilon,
+            ctx.needs_input_grad[0],
+            ctx.needs_input_grad[1]
+        )
+        return grads[0], grads[1], None
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return RMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch211-cxx11-cpu-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cpu"
+  }
+}

build/torch211-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch211-cxx11-xpu20253-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    # ops.apply_rms_norm returns [output, rstd]
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )[0]
+def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True):
+    return ops.apply_rms_norm_backward(
+            grad_output,
+            input,
+            weight,
+            output,
+            rstd,
+            eps,
+            input_requires_grad,
+            weight_requires_grad
+    )
+__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"]

build/torch211-cxx11-xpu20253-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_xpu_1a02f6f
+ops = torch.ops._rmsnorm_xpu_1a02f6f
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_xpu_1a02f6f::{op_name}"

build/torch211-cxx11-xpu20253-x86_64-linux/_rmsnorm_xpu_1a02f6f.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:153aa232ee4f342e92075140aa796e86ccd2f55f07d27bcad90890ed2fac57bf
+size 104793120

build/torch211-cxx11-xpu20253-x86_64-linux/layers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from ._ops import ops
+class RMSNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, hidden_states, weight, variance_epsilon):
+        ctx.variance_epsilon = variance_epsilon
+        output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon)
+        ctx.save_for_backward(hidden_states, weight, output, rstd)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        hidden_states, weight, output, rstd = ctx.saved_tensors
+        grads = ops.apply_rms_norm_backward(
+            grad_output,
+            hidden_states,
+            weight,
+            output,
+            rstd,
+            ctx.variance_epsilon,
+            ctx.needs_input_grad[0],
+            ctx.needs_input_grad[1]
+        )
+        return grads[0], grads[1], None
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return RMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch211-cxx11-xpu20253-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "xpu"
+  }
+}

build/torch211-cxx11-xpu20253-x86_64-linux/rmsnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )
+__all__ = ["layers", "apply_rms_norm"]

build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (491 Bytes). View file

build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (520 Bytes). View file

build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/__pycache__/layers.cpython-313.pyc ADDED Viewed

Binary file (1.68 kB). View file

build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_0d12ee5
+ops = torch.ops._rmsnorm_0d12ee5
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_0d12ee5::{op_name}"

build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/_rmsnorm_0d12ee5.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79eb24cb07a24a3f829ce1d210bd0cbd79badd0cc236710a84e83c15575ddf04
+size 100963504

build/torch27-cxx11-xpu20250-x86_64-linux/rmsnorm/layers.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+from ._ops import ops
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return ops.apply_rms_norm(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch28-cxx11-cpu-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    # ops.apply_rms_norm returns [output, rstd]
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )[0]
+def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True):
+    return ops.apply_rms_norm_backward(
+            grad_output,
+            input,
+            weight,
+            output,
+            rstd,
+            eps,
+            input_requires_grad,
+            weight_requires_grad
+    )
+__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"]

build/torch28-cxx11-cpu-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_235cde1
+ops = torch.ops._rmsnorm_235cde1
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_235cde1::{op_name}"

build/torch28-cxx11-cpu-x86_64-linux/_rmsnorm_235cde1.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16c92de9cefabeeadc60ffff87189a1e66ecb9ea19b343570ac55e9d9c7d98fe
+size 156648

build/torch28-cxx11-cpu-x86_64-linux/layers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from ._ops import ops
+class RMSNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, hidden_states, weight, variance_epsilon):
+        ctx.variance_epsilon = variance_epsilon
+        output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon)
+        ctx.save_for_backward(hidden_states, weight, output, rstd)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        hidden_states, weight, output, rstd = ctx.saved_tensors
+        grads = ops.apply_rms_norm_backward(
+            grad_output,
+            hidden_states,
+            weight,
+            output,
+            rstd,
+            ctx.variance_epsilon,
+            ctx.needs_input_grad[0],
+            ctx.needs_input_grad[1]
+        )
+        return grads[0], grads[1], None
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return RMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch28-cxx11-cpu-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "version": 1,
+  "python-depends": []
+}

build/torch28-cxx11-cpu-x86_64-linux/rmsnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import sys
+import importlib
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch28-cxx11-xpu20251-x86_64-linux/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from . import layers
+from ._ops import ops
+def apply_rms_norm(input, weight, eps):
+    # ops.apply_rms_norm returns [output, rstd]
+    return ops.apply_rms_norm(
+            input,
+            weight,
+            eps,
+    )[0]
+def apply_rms_norm_backward(grad_output, input, weight, output, rstd, eps, input_requires_grad=True, weight_requires_grad=True):
+    return ops.apply_rms_norm_backward(
+            grad_output,
+            input,
+            weight,
+            output,
+            rstd,
+            eps,
+            input_requires_grad,
+            weight_requires_grad
+    )
+__all__ = ["layers", "apply_rms_norm_forward", "apply_rms_norm_backward"]

build/torch28-cxx11-xpu20251-x86_64-linux/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _rmsnorm_235cde1
+ops = torch.ops._rmsnorm_235cde1
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_rmsnorm_235cde1::{op_name}"

build/torch28-cxx11-xpu20251-x86_64-linux/_rmsnorm_235cde1.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77c4b43d63dc74b210633da81630023a6d6e359a7a1115bff55da9f4436053d9
+size 103700632

build/torch28-cxx11-xpu20251-x86_64-linux/layers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from ._ops import ops
+class RMSNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, hidden_states, weight, variance_epsilon):
+        ctx.variance_epsilon = variance_epsilon
+        output, rstd = ops.apply_rms_norm(hidden_states, weight, variance_epsilon)
+        ctx.save_for_backward(hidden_states, weight, output, rstd)
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        hidden_states, weight, output, rstd = ctx.saved_tensors
+        grads = ops.apply_rms_norm_backward(
+            grad_output,
+            hidden_states,
+            weight,
+            output,
+            rstd,
+            ctx.variance_epsilon,
+            ctx.needs_input_grad[0],
+            ctx.needs_input_grad[1]
+        )
+        return grads[0], grads[1], None
+class RMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return RMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+        )
+__all__ = ["RMSNorm"]

build/torch28-cxx11-xpu20251-x86_64-linux/metadata.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "version": 1,
+  "python-depends": []
+}