| [general] |
| name = "moe" |
| universal = false |
|
|
| [torch] |
| include = ["."] |
| pyext = [ |
| "py", |
| "json", |
| ] |
| src = [ |
| "core/scalar_type.hpp", |
| "torch-ext/torch_binding.cpp", |
| "torch-ext/torch_binding.h", |
| ] |
|
|
| [kernel.moe-marlin] |
| backend = "cuda" |
| cuda-capabilities = [ |
| "8.0", |
| "8.6", |
| "8.7", |
| "8.9", |
| "9.0", |
| "10.0", |
| "10.1", |
| "12.0", |
| ] |
| depends = ["torch"] |
| include = ["."] |
| src = [ |
| "core/exception.hpp", |
| "core/scalar_type.hpp", |
| "marlin-moe/marlin_moe_ops.cu", |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.cu", |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.cu", |
| "marlin-moe/marlin_kernels/marlin_moe_kernel.h", |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.h", |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.h", |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.cu", |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.h", |
| ] |
|
|
| [kernel.activation] |
| backend = "cuda" |
| depends = ["torch"] |
| src = [ |
| "activation/activation_kernels.cu", |
| "activation/cuda_compat.h", |
| "activation/dispatch_utils.h", |
| ] |
|
|
| [kernel.fp8] |
| backend = "cuda" |
| depends = ["torch"] |
| include = ["."] |
| src = [ |
| "cuda_compat.h", |
| "dispatch_utils.h", |
| "fp8/amd/hip_float8.h", |
| "fp8/amd/hip_float8_impl.h", |
| "fp8/common.cu", |
| "fp8/common.cuh", |
| "fp8/vectorization.cuh", |
| ] |
|
|
| [kernel.moe] |
| backend = "cuda" |
| depends = ["torch"] |
| src = [ |
| "cuda_compat.h", |
| "dispatch_utils.h", |
| "moe/moe_align_sum_kernels.cu", |
| "moe/moe_wna16.cu", |
| "moe/moe_wna16_utils.h", |
| "moe/topk_softmax_kernels.cu", |
| ] |
|
|