Spaces:

luh0502
/

NeAR

Running on Zero

App Files Files Community

luh1124 commited on about 1 month ago

Commit

c024a94

1 Parent(s): 9aa801c

Docs + runtime warning for LFS example assets

Browse files

Files changed (15) hide show

extensions/vox2seq/benchmark.py +45 -0
extensions/vox2seq/setup.py +34 -0
extensions/vox2seq/src/api.cu +92 -0
extensions/vox2seq/src/api.h +76 -0
extensions/vox2seq/src/ext.cpp +10 -0
extensions/vox2seq/src/hilbert.cu +133 -0
extensions/vox2seq/src/hilbert.h +35 -0
extensions/vox2seq/src/z_order.cu +66 -0
extensions/vox2seq/src/z_order.h +35 -0
extensions/vox2seq/test.py +25 -0
extensions/vox2seq/vox2seq/__init__.py +50 -0
extensions/vox2seq/vox2seq/pytorch/__init__.py +48 -0
extensions/vox2seq/vox2seq/pytorch/default.py +59 -0
extensions/vox2seq/vox2seq/pytorch/hilbert.py +303 -0
extensions/vox2seq/vox2seq/pytorch/z_order.py +126 -0

extensions/vox2seq/benchmark.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import time
+import torch
+import vox2seq
+if __name__ == "__main__":
+    stats = {
+        'z_order_cuda': [],
+        'z_order_pytorch': [],
+        'hilbert_cuda': [],
+        'hilbert_pytorch': [],
+    }
+    RES = [16, 32, 64, 128, 256]
+    for res in RES:
+        coords = torch.meshgrid(torch.arange(res), torch.arange(res), torch.arange(res))
+        coords = torch.stack(coords, dim=-1).reshape(-1, 3).int().cuda()
+        start = time.time()
+        for _ in range(100):
+            code_z_cuda = vox2seq.encode(coords, mode='z_order').cuda()
+        torch.cuda.synchronize()
+        stats['z_order_cuda'].append((time.time() - start) / 100)
+        start = time.time()
+        for _ in range(100):
+            code_z_pytorch = vox2seq.pytorch.encode(coords, mode='z_order').cuda()
+        torch.cuda.synchronize()
+        stats['z_order_pytorch'].append((time.time() - start) / 100)
+        start = time.time()
+        for _ in range(100):
+            code_h_cuda = vox2seq.encode(coords, mode='hilbert').cuda()
+        torch.cuda.synchronize()
+        stats['hilbert_cuda'].append((time.time() - start) / 100)
+        start = time.time()
+        for _ in range(100):
+            code_h_pytorch = vox2seq.pytorch.encode(coords, mode='hilbert').cuda()
+        torch.cuda.synchronize()
+        stats['hilbert_pytorch'].append((time.time() - start) / 100)
+    print(f"{'Resolution':<12}{'Z-Order (CUDA)':<24}{'Z-Order (PyTorch)':<24}{'Hilbert (CUDA)':<24}{'Hilbert (PyTorch)':<24}")
+    for res, z_order_cuda, z_order_pytorch, hilbert_cuda, hilbert_pytorch in zip(RES, stats['z_order_cuda'], stats['z_order_pytorch'], stats['hilbert_cuda'], stats['hilbert_pytorch']):
+        print(f"{res:<12}{z_order_cuda:<24.6f}{z_order_pytorch:<24.6f}{hilbert_cuda:<24.6f}{hilbert_pytorch:<24.6f}")

extensions/vox2seq/setup.py ADDED Viewed

	@@ -0,0 +1,34 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  george.drettakis@inria.fr
+#
+from setuptools import setup
+from torch.utils.cpp_extension import CUDAExtension, BuildExtension
+import os
+os.path.dirname(os.path.abspath(__file__))
+setup(
+    name="vox2seq",
+    packages=['vox2seq', 'vox2seq.pytorch'],
+    ext_modules=[
+        CUDAExtension(
+            name="vox2seq._C",
+            sources=[
+                "src/api.cu",
+                "src/z_order.cu",
+                "src/hilbert.cu",
+                "src/ext.cpp",
+            ],
+        )
+    ],
+    cmdclass={
+        'build_ext': BuildExtension
+    }
+)

extensions/vox2seq/src/api.cu ADDED Viewed

	@@ -0,0 +1,92 @@

+#include <torch/extension.h>
+#include "api.h"
+#include "z_order.h"
+#include "hilbert.h"
+torch::Tensor
+z_order_encode(
+    const torch::Tensor& x,
+    const torch::Tensor& y,
+    const torch::Tensor& z
+) {
+    // Allocate output tensor
+    torch::Tensor codes = torch::empty_like(x);
+    // Call CUDA kernel
+    z_order_encode_cuda<<<(x.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>(
+        x.size(0),
+        reinterpret_cast<uint32_t*>(x.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(y.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(z.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(codes.data_ptr<int>())
+    );
+    return codes;
+}
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>
+z_order_decode(
+    const torch::Tensor& codes
+) {
+    // Allocate output tensors
+    torch::Tensor x = torch::empty_like(codes);
+    torch::Tensor y = torch::empty_like(codes);
+    torch::Tensor z = torch::empty_like(codes);
+    // Call CUDA kernel
+    z_order_decode_cuda<<<(codes.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>(
+        codes.size(0),
+        reinterpret_cast<uint32_t*>(codes.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(x.data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(y.data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(z.data_ptr<int>())
+    );
+    return std::make_tuple(x, y, z);
+}
+torch::Tensor
+hilbert_encode(
+    const torch::Tensor& x,
+    const torch::Tensor& y,
+    const torch::Tensor& z
+) {
+    // Allocate output tensor
+    torch::Tensor codes = torch::empty_like(x);
+    // Call CUDA kernel
+    hilbert_encode_cuda<<<(x.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>(
+        x.size(0),
+        reinterpret_cast<uint32_t*>(x.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(y.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(z.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(codes.data_ptr<int>())
+    );
+    return codes;
+}
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>
+hilbert_decode(
+    const torch::Tensor& codes
+) {
+    // Allocate output tensors
+    torch::Tensor x = torch::empty_like(codes);
+    torch::Tensor y = torch::empty_like(codes);
+    torch::Tensor z = torch::empty_like(codes);
+    // Call CUDA kernel
+    hilbert_decode_cuda<<<(codes.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>(
+        codes.size(0),
+        reinterpret_cast<uint32_t*>(codes.contiguous().data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(x.data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(y.data_ptr<int>()),
+        reinterpret_cast<uint32_t*>(z.data_ptr<int>())
+    );
+    return std::make_tuple(x, y, z);
+}

extensions/vox2seq/src/api.h ADDED Viewed

	@@ -0,0 +1,76 @@

+/*
+ * Serialize a voxel grid
+ *
+ * Copyright (C) 2024, Jianfeng XIANG <belljig@outlook.com>
+ * All rights reserved.
+ *
+ * Licensed under The MIT License [see LICENSE for details]
+ *
+ * Written by Jianfeng XIANG
+ */
+#pragma once
+#include <torch/extension.h>
+#define BLOCK_SIZE 256
+/**
+ * Z-order encode 3D points
+ *
+ * @param x [N] tensor containing the x coordinates
+ * @param y [N] tensor containing the y coordinates
+ * @param z [N] tensor containing the z coordinates
+ *
+ * @return [N] tensor containing the z-order encoded values
+ */
+torch::Tensor
+z_order_encode(
+    const torch::Tensor& x,
+    const torch::Tensor& y,
+    const torch::Tensor& z
+);
+/**
+ * Z-order decode 3D points
+ *
+ * @param codes [N] tensor containing the z-order encoded values
+ *
+ * @return 3 tensors [N] containing the x, y, z coordinates
+ */
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>
+z_order_decode(
+    const torch::Tensor& codes
+);
+/**
+ * Hilbert encode 3D points
+ *
+ * @param x [N] tensor containing the x coordinates
+ * @param y [N] tensor containing the y coordinates
+ * @param z [N] tensor containing the z coordinates
+ *
+ * @return [N] tensor containing the Hilbert encoded values
+ */
+torch::Tensor
+hilbert_encode(
+    const torch::Tensor& x,
+    const torch::Tensor& y,
+    const torch::Tensor& z
+);
+/**
+ * Hilbert decode 3D points
+ *
+ * @param codes [N] tensor containing the Hilbert encoded values
+ *
+ * @return 3 tensors [N] containing the x, y, z coordinates
+ */
+std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>
+hilbert_decode(
+    const torch::Tensor& codes
+);

extensions/vox2seq/src/ext.cpp ADDED Viewed

	@@ -0,0 +1,10 @@

+#include <torch/extension.h>
+#include "api.h"
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+	m.def("z_order_encode", &z_order_encode);
+	m.def("z_order_decode", &z_order_decode);
+	m.def("hilbert_encode", &hilbert_encode);
+	m.def("hilbert_decode", &hilbert_decode);
+}

extensions/vox2seq/src/hilbert.cu ADDED Viewed

	@@ -0,0 +1,133 @@

+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <cooperative_groups.h>
+#include <cooperative_groups/memcpy_async.h>
+namespace cg = cooperative_groups;
+#include "hilbert.h"
+// Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit.
+static __device__ uint32_t expandBits(uint32_t v)
+{
+    v = (v * 0x00010001u) & 0xFF0000FFu;
+    v = (v * 0x00000101u) & 0x0F00F00Fu;
+    v = (v * 0x00000011u) & 0xC30C30C3u;
+    v = (v * 0x00000005u) & 0x49249249u;
+    return v;
+}
+// Removes 2 zeros after each bit in a 30-bit integer.
+static __device__ uint32_t extractBits(uint32_t v)
+{
+    v = v & 0x49249249;
+    v = (v ^ (v >>  2)) & 0x030C30C3u;
+    v = (v ^ (v >>  4)) & 0x0300F00Fu;
+    v = (v ^ (v >>  8)) & 0x030000FFu;
+    v = (v ^ (v >> 16)) & 0x000003FFu;
+    return v;
+}
+__global__ void hilbert_encode_cuda(
+    size_t N,
+    const uint32_t* x,
+    const uint32_t* y,
+    const uint32_t* z,
+    uint32_t* codes
+) {
+    size_t thread_id = cg::this_grid().thread_rank();
+    if (thread_id >= N) return;
+    uint32_t point[3] = {x[thread_id], y[thread_id], z[thread_id]};
+    uint32_t m = 1 << 9, q, p, t;
+    // Inverse undo excess work
+    q = m;
+    while (q > 1) {
+        p = q - 1;
+        for (int i = 0; i < 3; i++) {
+            if (point[i] & q) {
+                point[0] ^= p;  // invert
+            } else {
+                t = (point[0] ^ point[i]) & p;
+                point[0] ^= t;
+                point[i] ^= t;
+            }
+        }
+        q >>= 1;
+    }
+    // Gray encode
+    for (int i = 1; i < 3; i++) {
+        point[i] ^= point[i - 1];
+    }
+    t = 0;
+    q = m;
+    while (q > 1) {
+        if (point[2] & q) {
+            t ^= q - 1;
+        }
+        q >>= 1;
+    }
+    for (int i = 0; i < 3; i++) {
+        point[i] ^= t;
+    }
+    // Convert to 3D Hilbert code
+    uint32_t xx = expandBits(point[0]);
+    uint32_t yy = expandBits(point[1]);
+    uint32_t zz = expandBits(point[2]);
+    codes[thread_id] = xx * 4 + yy * 2 + zz;
+}
+__global__ void hilbert_decode_cuda(
+    size_t N,
+    const uint32_t* codes,
+    uint32_t* x,
+    uint32_t* y,
+    uint32_t* z
+) {
+    size_t thread_id = cg::this_grid().thread_rank();
+    if (thread_id >= N) return;
+    uint32_t point[3];
+    point[0] = extractBits(codes[thread_id] >> 2);
+    point[1] = extractBits(codes[thread_id] >> 1);
+    point[2] = extractBits(codes[thread_id]);
+    uint32_t m = 2 << 9, q, p, t;
+    // Gray decode by H ^ (H/2)
+    t = point[2] >> 1;
+    for (int i = 2; i > 0; i--) {
+        point[i] ^= point[i - 1];
+    }
+    point[0] ^= t;
+    // Undo excess work
+    q = 2;
+    while (q != m) {
+        p = q - 1;
+        for (int i = 2; i >= 0; i--) {
+            if (point[i] & q) {
+                point[0] ^= p;
+            } else {
+                t = (point[0] ^ point[i]) & p;
+                point[0] ^= t;
+                point[i] ^= t;
+            }
+        }
+        q <<= 1;
+    }
+    x[thread_id] = point[0];
+    y[thread_id] = point[1];
+    z[thread_id] = point[2];
+}

extensions/vox2seq/src/hilbert.h ADDED Viewed

	@@ -0,0 +1,35 @@

+#pragma once
+/**
+ * Hilbert encode 3D points
+ *
+ * @param x [N] tensor containing the x coordinates
+ * @param y [N] tensor containing the y coordinates
+ * @param z [N] tensor containing the z coordinates
+ *
+ * @return [N] tensor containing the z-order encoded values
+ */
+__global__ void hilbert_encode_cuda(
+    size_t N,
+    const uint32_t* x,
+    const uint32_t* y,
+    const uint32_t* z,
+    uint32_t* codes
+);
+/**
+ * Hilbert decode 3D points
+ *
+ * @param codes [N] tensor containing the z-order encoded values
+ * @param x [N] tensor containing the x coordinates
+ * @param y [N] tensor containing the y coordinates
+ * @param z [N] tensor containing the z coordinates
+ */
+__global__ void hilbert_decode_cuda(
+    size_t N,
+    const uint32_t* codes,
+    uint32_t* x,
+    uint32_t* y,
+    uint32_t* z
+);

extensions/vox2seq/src/z_order.cu ADDED Viewed

	@@ -0,0 +1,66 @@

+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <cooperative_groups.h>
+#include <cooperative_groups/memcpy_async.h>
+namespace cg = cooperative_groups;
+#include "z_order.h"
+// Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit.
+static __device__ uint32_t expandBits(uint32_t v)
+{
+    v = (v * 0x00010001u) & 0xFF0000FFu;
+    v = (v * 0x00000101u) & 0x0F00F00Fu;
+    v = (v * 0x00000011u) & 0xC30C30C3u;
+    v = (v * 0x00000005u) & 0x49249249u;
+    return v;
+}
+// Removes 2 zeros after each bit in a 30-bit integer.
+static __device__ uint32_t extractBits(uint32_t v)
+{
+    v = v & 0x49249249;
+    v = (v ^ (v >>  2)) & 0x030C30C3u;
+    v = (v ^ (v >>  4)) & 0x0300F00Fu;
+    v = (v ^ (v >>  8)) & 0x030000FFu;
+    v = (v ^ (v >> 16)) & 0x000003FFu;
+    return v;
+}
+__global__ void z_order_encode_cuda(
+    size_t N,
+    const uint32_t* x,
+    const uint32_t* y,
+    const uint32_t* z,
+    uint32_t* codes
+) {
+    size_t thread_id = cg::this_grid().thread_rank();
+	if (thread_id >= N) return;
+    uint32_t xx = expandBits(x[thread_id]);
+    uint32_t yy = expandBits(y[thread_id]);
+    uint32_t zz = expandBits(z[thread_id]);
+    codes[thread_id] = xx * 4 + yy * 2 + zz;
+}
+__global__ void z_order_decode_cuda(
+    size_t N,
+    const uint32_t* codes,
+    uint32_t* x,
+    uint32_t* y,
+    uint32_t* z
+) {
+    size_t thread_id = cg::this_grid().thread_rank();
+    if (thread_id >= N) return;
+    x[thread_id] = extractBits(codes[thread_id] >> 2);
+    y[thread_id] = extractBits(codes[thread_id] >> 1);
+    z[thread_id] = extractBits(codes[thread_id]);
+}

extensions/vox2seq/src/z_order.h ADDED Viewed

	@@ -0,0 +1,35 @@

+#pragma once
+/**
+ * Z-order encode 3D points
+ *
+ * @param x [N] tensor containing the x coordinates
+ * @param y [N] tensor containing the y coordinates
+ * @param z [N] tensor containing the z coordinates
+ *
+ * @return [N] tensor containing the z-order encoded values
+ */
+__global__ void z_order_encode_cuda(
+    size_t N,
+    const uint32_t* x,
+    const uint32_t* y,
+    const uint32_t* z,
+    uint32_t* codes
+);
+/**
+ * Z-order decode 3D points
+ *
+ * @param codes [N] tensor containing the z-order encoded values
+ * @param x [N] tensor containing the x coordinates
+ * @param y [N] tensor containing the y coordinates
+ * @param z [N] tensor containing the z coordinates
+ */
+__global__ void z_order_decode_cuda(
+    size_t N,
+    const uint32_t* codes,
+    uint32_t* x,
+    uint32_t* y,
+    uint32_t* z
+);

extensions/vox2seq/test.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import torch
+import vox2seq
+if __name__ == "__main__":
+    RES = 256
+    coords = torch.meshgrid(torch.arange(RES), torch.arange(RES), torch.arange(RES))
+    coords = torch.stack(coords, dim=-1).reshape(-1, 3).int().cuda()
+    code_z_cuda = vox2seq.encode(coords, mode='z_order')
+    code_z_pytorch = vox2seq.pytorch.encode(coords, mode='z_order')
+    code_h_cuda = vox2seq.encode(coords, mode='hilbert')
+    code_h_pytorch = vox2seq.pytorch.encode(coords, mode='hilbert')
+    assert torch.equal(code_z_cuda, code_z_pytorch)
+    assert torch.equal(code_h_cuda, code_h_pytorch)
+    code = torch.arange(RES**3).int().cuda()
+    coords_z_cuda = vox2seq.decode(code, mode='z_order')
+    coords_z_pytorch = vox2seq.pytorch.decode(code, mode='z_order')
+    coords_h_cuda = vox2seq.decode(code, mode='hilbert')
+    coords_h_pytorch = vox2seq.pytorch.decode(code, mode='hilbert')
+    assert torch.equal(coords_z_cuda, coords_z_pytorch)
+    assert torch.equal(coords_h_cuda, coords_h_pytorch)
+    print("All tests passed.")

extensions/vox2seq/vox2seq/__init__.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from typing import *
+import torch
+from . import _C
+from . import pytorch
+@torch.no_grad()
+def encode(coords: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor:
+    """
+    Encodes 3D coordinates into a 30-bit code.
+    Args:
+        coords: a tensor of shape [N, 3] containing the 3D coordinates.
+        permute: the permutation of the coordinates.
+        mode: the encoding mode to use.
+    """
+    assert coords.shape[-1] == 3 and coords.ndim == 2, "Input coordinates must be of shape [N, 3]"
+    x = coords[:, permute[0]].int()
+    y = coords[:, permute[1]].int()
+    z = coords[:, permute[2]].int()
+    if mode == 'z_order':
+        return _C.z_order_encode(x, y, z)
+    elif mode == 'hilbert':
+        return _C.hilbert_encode(x, y, z)
+    else:
+        raise ValueError(f"Unknown encoding mode: {mode}")
+@torch.no_grad()
+def decode(code: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor:
+    """
+    Decodes a 30-bit code into 3D coordinates.
+    Args:
+        code: a tensor of shape [N] containing the 30-bit code.
+        permute: the permutation of the coordinates.
+        mode: the decoding mode to use.
+    """
+    assert code.ndim == 1, "Input code must be of shape [N]"
+    if mode == 'z_order':
+        coords = _C.z_order_decode(code)
+    elif mode == 'hilbert':
+        coords = _C.hilbert_decode(code)
+    else:
+        raise ValueError(f"Unknown decoding mode: {mode}")
+    x = coords[permute.index(0)]
+    y = coords[permute.index(1)]
+    z = coords[permute.index(2)]
+    return torch.stack([x, y, z], dim=-1)

extensions/vox2seq/vox2seq/pytorch/__init__.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+from typing import *
+from .default import (
+    encode,
+    decode,
+    z_order_encode,
+    z_order_decode,
+    hilbert_encode,
+    hilbert_decode,
+)
+@torch.no_grad()
+def encode(coords: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor:
+    """
+    Encodes 3D coordinates into a 30-bit code.
+    Args:
+        coords: a tensor of shape [N, 3] containing the 3D coordinates.
+        permute: the permutation of the coordinates.
+        mode: the encoding mode to use.
+    """
+    if mode == 'z_order':
+        return z_order_encode(coords[:, permute], depth=10).int()
+    elif mode == 'hilbert':
+        return hilbert_encode(coords[:, permute], depth=10).int()
+    else:
+        raise ValueError(f"Unknown encoding mode: {mode}")
+@torch.no_grad()
+def decode(code: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor:
+    """
+    Decodes a 30-bit code into 3D coordinates.
+    Args:
+        code: a tensor of shape [N] containing the 30-bit code.
+        permute: the permutation of the coordinates.
+        mode: the decoding mode to use.
+    """
+    if mode == 'z_order':
+        return z_order_decode(code, depth=10)[:, permute].float()
+    elif mode == 'hilbert':
+        return hilbert_decode(code, depth=10)[:, permute].float()
+    else:
+        raise ValueError(f"Unknown decoding mode: {mode}")

extensions/vox2seq/vox2seq/pytorch/default.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from .z_order import xyz2key as z_order_encode_
+from .z_order import key2xyz as z_order_decode_
+from .hilbert import encode as hilbert_encode_
+from .hilbert import decode as hilbert_decode_
+@torch.inference_mode()
+def encode(grid_coord, batch=None, depth=16, order="z"):
+    assert order in {"z", "z-trans", "hilbert", "hilbert-trans"}
+    if order == "z":
+        code = z_order_encode(grid_coord, depth=depth)
+    elif order == "z-trans":
+        code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth)
+    elif order == "hilbert":
+        code = hilbert_encode(grid_coord, depth=depth)
+    elif order == "hilbert-trans":
+        code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth)
+    else:
+        raise NotImplementedError
+    if batch is not None:
+        batch = batch.long()
+        code = batch << depth * 3 | code
+    return code
+@torch.inference_mode()
+def decode(code, depth=16, order="z"):
+    assert order in {"z", "hilbert"}
+    batch = code >> depth * 3
+    code = code & ((1 << depth * 3) - 1)
+    if order == "z":
+        grid_coord = z_order_decode(code, depth=depth)
+    elif order == "hilbert":
+        grid_coord = hilbert_decode(code, depth=depth)
+    else:
+        raise NotImplementedError
+    return grid_coord, batch
+def z_order_encode(grid_coord: torch.Tensor, depth: int = 16):
+    x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long()
+    # we block the support to batch, maintain batched code in Point class
+    code = z_order_encode_(x, y, z, b=None, depth=depth)
+    return code
+def z_order_decode(code: torch.Tensor, depth):
+    x, y, z, _ = z_order_decode_(code, depth=depth)
+    grid_coord = torch.stack([x, y, z], dim=-1)  # (N,  3)
+    return grid_coord
+def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16):
+    return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth)
+def hilbert_decode(code: torch.Tensor, depth: int = 16):
+    return hilbert_decode_(code, num_dims=3, num_bits=depth)

extensions/vox2seq/vox2seq/pytorch/hilbert.py ADDED Viewed

	@@ -0,0 +1,303 @@

+"""
+Hilbert Order
+Modified from https://github.com/PrincetonLIPS/numpy-hilbert-curve
+Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Kaixin Xu
+Please cite our work if the code is helpful to you.
+"""
+import torch
+def right_shift(binary, k=1, axis=-1):
+    """Right shift an array of binary values.
+    Parameters:
+    -----------
+     binary: An ndarray of binary values.
+     k: The number of bits to shift. Default 1.
+     axis: The axis along which to shift.  Default -1.
+    Returns:
+    --------
+     Returns an ndarray with zero prepended and the ends truncated, along
+     whatever axis was specified."""
+    # If we're shifting the whole thing, just return zeros.
+    if binary.shape[axis] <= k:
+        return torch.zeros_like(binary)
+    # Determine the padding pattern.
+    # padding = [(0,0)] * len(binary.shape)
+    # padding[axis] = (k,0)
+    # Determine the slicing pattern to eliminate just the last one.
+    slicing = [slice(None)] * len(binary.shape)
+    slicing[axis] = slice(None, -k)
+    shifted = torch.nn.functional.pad(
+        binary[tuple(slicing)], (k, 0), mode="constant", value=0
+    )
+    return shifted
+def binary2gray(binary, axis=-1):
+    """Convert an array of binary values into Gray codes.
+    This uses the classic X ^ (X >> 1) trick to compute the Gray code.
+    Parameters:
+    -----------
+     binary: An ndarray of binary values.
+     axis: The axis along which to compute the gray code. Default=-1.
+    Returns:
+    --------
+     Returns an ndarray of Gray codes.
+    """
+    shifted = right_shift(binary, axis=axis)
+    # Do the X ^ (X >> 1) trick.
+    gray = torch.logical_xor(binary, shifted)
+    return gray
+def gray2binary(gray, axis=-1):
+    """Convert an array of Gray codes back into binary values.
+    Parameters:
+    -----------
+     gray: An ndarray of gray codes.
+     axis: The axis along which to perform Gray decoding. Default=-1.
+    Returns:
+    --------
+     Returns an ndarray of binary values.
+    """
+    # Loop the log2(bits) number of times necessary, with shift and xor.
+    shift = 2 ** (torch.Tensor([gray.shape[axis]]).log2().ceil().int() - 1)
+    while shift > 0:
+        gray = torch.logical_xor(gray, right_shift(gray, shift))
+        shift = torch.div(shift, 2, rounding_mode="floor")
+    return gray
+def encode(locs, num_dims, num_bits):
+    """Decode an array of locations in a hypercube into a Hilbert integer.
+    This is a vectorized-ish version of the Hilbert curve implementation by John
+    Skilling as described in:
+    Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference
+      Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics.
+    Params:
+    -------
+     locs - An ndarray of locations in a hypercube of num_dims dimensions, in
+            which each dimension runs from 0 to 2**num_bits-1.  The shape can
+            be arbitrary, as long as the last dimension of the same has size
+            num_dims.
+     num_dims - The dimensionality of the hypercube. Integer.
+     num_bits - The number of bits for each dimension. Integer.
+    Returns:
+    --------
+     The output is an ndarray of uint64 integers with the same shape as the
+     input, excluding the last dimension, which needs to be num_dims.
+    """
+    # Keep around the original shape for later.
+    orig_shape = locs.shape
+    bitpack_mask = 1 << torch.arange(0, 8).to(locs.device)
+    bitpack_mask_rev = bitpack_mask.flip(-1)
+    if orig_shape[-1] != num_dims:
+        raise ValueError(
+            """
+      The shape of locs was surprising in that the last dimension was of size
+      %d, but num_dims=%d.  These need to be equal.
+      """
+            % (orig_shape[-1], num_dims)
+        )
+    if num_dims * num_bits > 63:
+        raise ValueError(
+            """
+      num_dims=%d and num_bits=%d for %d bits total, which can't be encoded
+      into a int64.  Are you sure you need that many points on your Hilbert
+      curve?
+      """
+            % (num_dims, num_bits, num_dims * num_bits)
+        )
+    # Treat the location integers as 64-bit unsigned and then split them up into
+    # a sequence of uint8s.  Preserve the association by dimension.
+    locs_uint8 = locs.long().view(torch.uint8).reshape((-1, num_dims, 8)).flip(-1)
+    # Now turn these into bits and truncate to num_bits.
+    gray = (
+        locs_uint8.unsqueeze(-1)
+        .bitwise_and(bitpack_mask_rev)
+        .ne(0)
+        .byte()
+        .flatten(-2, -1)[..., -num_bits:]
+    )
+    # Run the decoding process the other way.
+    # Iterate forwards through the bits.
+    for bit in range(0, num_bits):
+        # Iterate forwards through the dimensions.
+        for dim in range(0, num_dims):
+            # Identify which ones have this bit active.
+            mask = gray[:, dim, bit]
+            # Where this bit is on, invert the 0 dimension for lower bits.
+            gray[:, 0, bit + 1 :] = torch.logical_xor(
+                gray[:, 0, bit + 1 :], mask[:, None]
+            )
+            # Where the bit is off, exchange the lower bits with the 0 dimension.
+            to_flip = torch.logical_and(
+                torch.logical_not(mask[:, None]).repeat(1, gray.shape[2] - bit - 1),
+                torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]),
+            )
+            gray[:, dim, bit + 1 :] = torch.logical_xor(
+                gray[:, dim, bit + 1 :], to_flip
+            )
+            gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip)
+    # Now flatten out.
+    gray = gray.swapaxes(1, 2).reshape((-1, num_bits * num_dims))
+    # Convert Gray back to binary.
+    hh_bin = gray2binary(gray)
+    # Pad back out to 64 bits.
+    extra_dims = 64 - num_bits * num_dims
+    padded = torch.nn.functional.pad(hh_bin, (extra_dims, 0), "constant", 0)
+    # Convert binary values into uint8s.
+    hh_uint8 = (
+        (padded.flip(-1).reshape((-1, 8, 8)) * bitpack_mask)
+        .sum(2)
+        .squeeze()
+        .type(torch.uint8)
+    )
+    # Convert uint8s into uint64s.
+    hh_uint64 = hh_uint8.view(torch.int64).squeeze()
+    return hh_uint64
+def decode(hilberts, num_dims, num_bits):
+    """Decode an array of Hilbert integers into locations in a hypercube.
+    This is a vectorized-ish version of the Hilbert curve implementation by John
+    Skilling as described in:
+    Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference
+      Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics.
+    Params:
+    -------
+     hilberts - An ndarray of Hilbert integers.  Must be an integer dtype and
+                cannot have fewer bits than num_dims * num_bits.
+     num_dims - The dimensionality of the hypercube. Integer.
+     num_bits - The number of bits for each dimension. Integer.
+    Returns:
+    --------
+     The output is an ndarray of unsigned integers with the same shape as hilberts
+     but with an additional dimension of size num_dims.
+    """
+    if num_dims * num_bits > 64:
+        raise ValueError(
+            """
+      num_dims=%d and num_bits=%d for %d bits total, which can't be encoded
+      into a uint64.  Are you sure you need that many points on your Hilbert
+      curve?
+      """
+            % (num_dims, num_bits)
+        )
+    # Handle the case where we got handed a naked integer.
+    hilberts = torch.atleast_1d(hilberts)
+    # Keep around the shape for later.
+    orig_shape = hilberts.shape
+    bitpack_mask = 2 ** torch.arange(0, 8).to(hilberts.device)
+    bitpack_mask_rev = bitpack_mask.flip(-1)
+    # Treat each of the hilberts as a s equence of eight uint8.
+    # This treats all of the inputs as uint64 and makes things uniform.
+    hh_uint8 = (
+        hilberts.ravel().type(torch.int64).view(torch.uint8).reshape((-1, 8)).flip(-1)
+    )
+    # Turn these lists of uints into lists of bits and then truncate to the size
+    # we actually need for using Skilling's procedure.
+    hh_bits = (
+        hh_uint8.unsqueeze(-1)
+        .bitwise_and(bitpack_mask_rev)
+        .ne(0)
+        .byte()
+        .flatten(-2, -1)[:, -num_dims * num_bits :]
+    )
+    # Take the sequence of bits and Gray-code it.
+    gray = binary2gray(hh_bits)
+    # There has got to be a better way to do this.
+    # I could index them differently, but the eventual packbits likes it this way.
+    gray = gray.reshape((-1, num_bits, num_dims)).swapaxes(1, 2)
+    # Iterate backwards through the bits.
+    for bit in range(num_bits - 1, -1, -1):
+        # Iterate backwards through the dimensions.
+        for dim in range(num_dims - 1, -1, -1):
+            # Identify which ones have this bit active.
+            mask = gray[:, dim, bit]
+            # Where this bit is on, invert the 0 dimension for lower bits.
+            gray[:, 0, bit + 1 :] = torch.logical_xor(
+                gray[:, 0, bit + 1 :], mask[:, None]
+            )
+            # Where the bit is off, exchange the lower bits with the 0 dimension.
+            to_flip = torch.logical_and(
+                torch.logical_not(mask[:, None]),
+                torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]),
+            )
+            gray[:, dim, bit + 1 :] = torch.logical_xor(
+                gray[:, dim, bit + 1 :], to_flip
+            )
+            gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip)
+    # Pad back out to 64 bits.
+    extra_dims = 64 - num_bits
+    padded = torch.nn.functional.pad(gray, (extra_dims, 0), "constant", 0)
+    # Now chop these up into blocks of 8.
+    locs_chopped = padded.flip(-1).reshape((-1, num_dims, 8, 8))
+    # Take those blocks and turn them unto uint8s.
+    # from IPython import embed; embed()
+    locs_uint8 = (locs_chopped * bitpack_mask).sum(3).squeeze().type(torch.uint8)
+    # Finally, treat these as uint64s.
+    flat_locs = locs_uint8.view(torch.int64)
+    # Return them in the expected shape.
+    return flat_locs.reshape((*orig_shape, num_dims))

extensions/vox2seq/vox2seq/pytorch/z_order.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# --------------------------------------------------------
+# Octree-based Sparse Convolutional Neural Networks
+# Copyright (c) 2022 Peng-Shuai Wang <wangps@hotmail.com>
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Peng-Shuai Wang
+# --------------------------------------------------------
+import torch
+from typing import Optional, Union
+class KeyLUT:
+    def __init__(self):
+        r256 = torch.arange(256, dtype=torch.int64)
+        r512 = torch.arange(512, dtype=torch.int64)
+        zero = torch.zeros(256, dtype=torch.int64)
+        device = torch.device("cpu")
+        self._encode = {
+            device: (
+                self.xyz2key(r256, zero, zero, 8),
+                self.xyz2key(zero, r256, zero, 8),
+                self.xyz2key(zero, zero, r256, 8),
+            )
+        }
+        self._decode = {device: self.key2xyz(r512, 9)}
+    def encode_lut(self, device=torch.device("cpu")):
+        if device not in self._encode:
+            cpu = torch.device("cpu")
+            self._encode[device] = tuple(e.to(device) for e in self._encode[cpu])
+        return self._encode[device]
+    def decode_lut(self, device=torch.device("cpu")):
+        if device not in self._decode:
+            cpu = torch.device("cpu")
+            self._decode[device] = tuple(e.to(device) for e in self._decode[cpu])
+        return self._decode[device]
+    def xyz2key(self, x, y, z, depth):
+        key = torch.zeros_like(x)
+        for i in range(depth):
+            mask = 1 << i
+            key = (
+                key
+                | ((x & mask) << (2 * i + 2))
+                | ((y & mask) << (2 * i + 1))
+                | ((z & mask) << (2 * i + 0))
+            )
+        return key
+    def key2xyz(self, key, depth):
+        x = torch.zeros_like(key)
+        y = torch.zeros_like(key)
+        z = torch.zeros_like(key)
+        for i in range(depth):
+            x = x | ((key & (1 << (3 * i + 2))) >> (2 * i + 2))
+            y = y | ((key & (1 << (3 * i + 1))) >> (2 * i + 1))
+            z = z | ((key & (1 << (3 * i + 0))) >> (2 * i + 0))
+        return x, y, z
+_key_lut = KeyLUT()
+def xyz2key(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    z: torch.Tensor,
+    b: Optional[Union[torch.Tensor, int]] = None,
+    depth: int = 16,
+):
+    r"""Encodes :attr:`x`, :attr:`y`, :attr:`z` coordinates to the shuffled keys
+    based on pre-computed look up tables. The speed of this function is much
+    faster than the method based on for-loop.
+    Args:
+      x (torch.Tensor): The x coordinate.
+      y (torch.Tensor): The y coordinate.
+      z (torch.Tensor): The z coordinate.
+      b (torch.Tensor or int): The batch index of the coordinates, and should be
+          smaller than 32768. If :attr:`b` is :obj:`torch.Tensor`, the size of
+          :attr:`b` must be the same as :attr:`x`, :attr:`y`, and :attr:`z`.
+      depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17).
+    """
+    EX, EY, EZ = _key_lut.encode_lut(x.device)
+    x, y, z = x.long(), y.long(), z.long()
+    mask = 255 if depth > 8 else (1 << depth) - 1
+    key = EX[x & mask] | EY[y & mask] | EZ[z & mask]
+    if depth > 8:
+        mask = (1 << (depth - 8)) - 1
+        key16 = EX[(x >> 8) & mask] | EY[(y >> 8) & mask] | EZ[(z >> 8) & mask]
+        key = key16 << 24 | key
+    if b is not None:
+        b = b.long()
+        key = b << 48 | key
+    return key
+def key2xyz(key: torch.Tensor, depth: int = 16):
+    r"""Decodes the shuffled key to :attr:`x`, :attr:`y`, :attr:`z` coordinates
+    and the batch index based on pre-computed look up tables.
+    Args:
+      key (torch.Tensor): The shuffled key.
+      depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17).
+    """
+    DX, DY, DZ = _key_lut.decode_lut(key.device)
+    x, y, z = torch.zeros_like(key), torch.zeros_like(key), torch.zeros_like(key)
+    b = key >> 48
+    key = key & ((1 << 48) - 1)
+    n = (depth + 2) // 3
+    for i in range(n):
+        k = key >> (i * 9) & 511
+        x = x | (DX[k] << (i * 3))
+        y = y | (DY[k] << (i * 3))
+        z = z | (DZ[k] << (i * 3))
+    return x, y, z, b