|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #ifndef CLBLAST_CLBLAST_H_ |
| #define CLBLAST_CLBLAST_H_ |
|
|
| #include <cstdlib> |
| #include <string> |
| #include <unordered_map> |
|
|
| |
| #if defined(__APPLE__) || defined(__MACOSX) |
| #include <OpenCL/opencl.h> |
| #else |
| #include <CL/opencl.h> |
| #endif |
|
|
| |
| |
| #if defined(_WIN32) && defined(CLBLAST_DLL) |
| #if defined(COMPILING_DLL) |
| #define PUBLIC_API __declspec(dllexport) |
| #else |
| #define PUBLIC_API __declspec(dllimport) |
| #endif |
| #else |
| #define PUBLIC_API |
| #endif |
|
|
| |
| #define CLBLAST_VERSION_MAJOR 1 |
| #define CLBLAST_VERSION_MINOR 6 |
| #define CLBLAST_VERSION_PATCH 0 |
|
|
| namespace clblast { |
| |
|
|
| |
| |
| enum class StatusCode { |
|
|
| |
| kSuccess = 0, |
| kOpenCLCompilerNotAvailable= -3, |
| kTempBufferAllocFailure = -4, |
| kOpenCLOutOfResources = -5, |
| kOpenCLOutOfHostMemory = -6, |
| kOpenCLBuildProgramFailure = -11, |
| kInvalidValue = -30, |
| kInvalidCommandQueue = -36, |
| kInvalidMemObject = -38, |
| kInvalidBinary = -42, |
| kInvalidBuildOptions = -43, |
| kInvalidProgram = -44, |
| kInvalidProgramExecutable = -45, |
| kInvalidKernelName = -46, |
| kInvalidKernelDefinition = -47, |
| kInvalidKernel = -48, |
| kInvalidArgIndex = -49, |
| kInvalidArgValue = -50, |
| kInvalidArgSize = -51, |
| kInvalidKernelArgs = -52, |
| kInvalidLocalNumDimensions = -53, |
| kInvalidLocalThreadsTotal = -54, |
| kInvalidLocalThreadsDim = -55, |
| kInvalidGlobalOffset = -56, |
| kInvalidEventWaitList = -57, |
| kInvalidEvent = -58, |
| kInvalidOperation = -59, |
| kInvalidBufferSize = -61, |
| kInvalidGlobalWorkSize = -63, |
|
|
| |
| kNotImplemented = -1024, |
| kInvalidMatrixA = -1022, |
| kInvalidMatrixB = -1021, |
| kInvalidMatrixC = -1020, |
| kInvalidVectorX = -1019, |
| kInvalidVectorY = -1018, |
| kInvalidDimension = -1017, |
| kInvalidLeadDimA = -1016, |
| kInvalidLeadDimB = -1015, |
| kInvalidLeadDimC = -1014, |
| kInvalidIncrementX = -1013, |
| kInvalidIncrementY = -1012, |
| kInsufficientMemoryA = -1011, |
| kInsufficientMemoryB = -1010, |
| kInsufficientMemoryC = -1009, |
| kInsufficientMemoryX = -1008, |
| kInsufficientMemoryY = -1007, |
|
|
| |
| kInsufficientMemoryTemp = -2050, |
| kInvalidBatchCount = -2049, |
| kInvalidOverrideKernel = -2048, |
| kMissingOverrideParameter = -2047, |
| kInvalidLocalMemUsage = -2046, |
| kNoHalfPrecision = -2045, |
| kNoDoublePrecision = -2044, |
| kInvalidVectorScalar = -2043, |
| kInsufficientMemoryScalar = -2042, |
| kDatabaseError = -2041, |
| kUnknownError = -2040, |
| kUnexpectedError = -2039, |
| }; |
|
|
| |
| enum class Layout { kRowMajor = 101, kColMajor = 102 }; |
| enum class Transpose { kNo = 111, kYes = 112, kConjugate = 113 }; |
| enum class Triangle { kUpper = 121, kLower = 122 }; |
| enum class Diagonal { kNonUnit = 131, kUnit = 132 }; |
| enum class Side { kLeft = 141, kRight = 142 }; |
| enum class KernelMode { kCrossCorrelation = 151, kConvolution = 152 }; |
|
|
| |
| enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64, |
| kComplexSingle = 3232, kComplexDouble = 6464, kAny = -1 }; |
|
|
| |
| |
| |
|
|
| |
| template <typename T> |
| StatusCode Rotg(cl_mem sa_buffer, const size_t sa_offset, |
| cl_mem sb_buffer, const size_t sb_offset, |
| cl_mem sc_buffer, const size_t sc_offset, |
| cl_mem ss_buffer, const size_t ss_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Rotmg(cl_mem sd1_buffer, const size_t sd1_offset, |
| cl_mem sd2_buffer, const size_t sd2_offset, |
| cl_mem sx1_buffer, const size_t sx1_offset, |
| const cl_mem sy1_buffer, const size_t sy1_offset, |
| cl_mem sparam_buffer, const size_t sparam_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Rot(const size_t n, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| const T cos, |
| const T sin, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Rotm(const size_t n, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem sparam_buffer, const size_t sparam_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Swap(const size_t n, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Scal(const size_t n, |
| const T alpha, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Copy(const size_t n, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Axpy(const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Dot(const size_t n, |
| cl_mem dot_buffer, const size_t dot_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Dotu(const size_t n, |
| cl_mem dot_buffer, const size_t dot_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Dotc(const size_t n, |
| cl_mem dot_buffer, const size_t dot_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Nrm2(const size_t n, |
| cl_mem nrm2_buffer, const size_t nrm2_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Asum(const size_t n, |
| cl_mem asum_buffer, const size_t asum_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Sum(const size_t n, |
| cl_mem sum_buffer, const size_t sum_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Amax(const size_t n, |
| cl_mem imax_buffer, const size_t imax_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Amin(const size_t n, |
| cl_mem imin_buffer, const size_t imin_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Max(const size_t n, |
| cl_mem imax_buffer, const size_t imax_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Min(const size_t n, |
| cl_mem imin_buffer, const size_t imin_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| |
| |
|
|
| |
| template <typename T> |
| StatusCode Gemv(const Layout layout, const Transpose a_transpose, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Gbmv(const Layout layout, const Transpose a_transpose, |
| const size_t m, const size_t n, const size_t kl, const size_t ku, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Hemv(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Hbmv(const Layout layout, const Triangle triangle, |
| const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Hpmv(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem ap_buffer, const size_t ap_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Symv(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Sbmv(const Layout layout, const Triangle triangle, |
| const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Spmv(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem ap_buffer, const size_t ap_offset, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const T beta, |
| cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t n, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t n, const size_t k, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t n, |
| const cl_mem ap_buffer, const size_t ap_offset, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t n, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Tbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t n, const size_t k, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Tpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t n, |
| const cl_mem ap_buffer, const size_t ap_offset, |
| cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Ger(const Layout layout, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Geru(const Layout layout, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Gerc(const Layout layout, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Her(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Hpr(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem ap_buffer, const size_t ap_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Her2(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Hpr2(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem ap_buffer, const size_t ap_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Syr(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Spr(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| cl_mem ap_buffer, const size_t ap_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Syr2(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Spr2(const Layout layout, const Triangle triangle, |
| const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| cl_mem ap_buffer, const size_t ap_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| |
| |
|
|
| |
| template <typename T> |
| StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| const size_t m, const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| const T beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, cl_event* event = nullptr, |
| cl_mem temp_buffer = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| const T beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| const T beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, |
| const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const T beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_transpose, |
| const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const T beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, |
| const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| const T beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T, typename U> |
| StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, |
| const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| const U beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| |
| |
|
|
| |
| template <typename T> |
| StatusCode Had(const size_t n, |
| const T alpha, |
| const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| const T beta, |
| cl_mem z_buffer, const size_t z_offset, const size_t z_inc, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, |
| const size_t m, const size_t n, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Im2col(const KernelMode kernel_mode, |
| const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, |
| const cl_mem im_buffer, const size_t im_offset, |
| cl_mem col_buffer, const size_t col_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Col2im(const KernelMode kernel_mode, |
| const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, |
| const cl_mem col_buffer, const size_t col_offset, |
| cl_mem im_buffer, const size_t im_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode Convgemm(const KernelMode kernel_mode, |
| const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const size_t num_kernels, const size_t batch_count, |
| const cl_mem im_buffer, const size_t im_offset, |
| const cl_mem kernel_buffer, const size_t kernel_offset, |
| cl_mem result_buffer, const size_t result_offset, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode AxpyBatched(const size_t n, |
| const T *alphas, |
| const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc, |
| cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc, |
| const size_t batch_count, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode GemmBatched(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| const size_t m, const size_t n, const size_t k, |
| const T *alphas, |
| const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld, |
| const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld, |
| const T *betas, |
| cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld, |
| const size_t batch_count, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
| template <typename T> |
| StatusCode GemmStridedBatched(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| const size_t m, const size_t n, const size_t k, |
| const T alpha, |
| const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, |
| const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, |
| const T beta, |
| cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, |
| const size_t batch_count, |
| cl_command_queue* queue, cl_event* event = nullptr); |
|
|
| |
|
|
| |
| template <typename T> |
| StatusCode GemmTempBufferSize(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| const size_t m, const size_t n, const size_t k, |
| const size_t a_offset, const size_t a_ld, |
| const size_t b_offset, const size_t b_ld, |
| const size_t c_offset, const size_t c_ld, |
| cl_command_queue* queue, size_t& temp_buffer_size); |
|
|
| |
|
|
| |
| |
| StatusCode PUBLIC_API ClearCache(); |
|
|
| |
| |
| StatusCode PUBLIC_API FillCache(const cl_device_id device); |
|
|
| |
|
|
| |
| StatusCode PUBLIC_API RetrieveParameters(const cl_device_id device, const std::string &kernel_name, |
| const Precision precision, |
| std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| |
| StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name, |
| const Precision precision, |
| const std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
|
|
| |
| template <typename T> |
| StatusCode TuneXaxpy(cl_command_queue* queue, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneXdot(cl_command_queue* queue, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneXgemv(cl_command_queue* queue, const size_t m, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneXger(cl_command_queue* queue, const size_t m, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneXgemm(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneXgemmDirect(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneCopy(cl_command_queue* queue, const size_t m, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TunePad(cl_command_queue* queue, const size_t m, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneTranspose(cl_command_queue* queue, const size_t m, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TunePadtranspose(cl_command_queue* queue, const size_t m, const size_t n, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
| template <typename T> |
| StatusCode TuneInvert(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, |
| const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
|
|
| |
|
|
| } |
|
|
| |
| #endif |
|
|