From ae7bbb634ced89e9fdb6ea6c360dc2e88e6201b7 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Tue, 23 May 2023 14:41:30 -0400 Subject: [PATCH] gpus: Add the hgemm kernel for cuda and hip --- gpucublas/compute/CMakeLists.txt | 4 +++ gpucublas/compute/cuda_hgemm.c | 42 +++++++++++++++++++++++++++++++ gpucublas/include/gpucublas.h | 16 ++++++++++-- gpuhipblas/compute/CMakeLists.txt | 5 ++-- gpuhipblas/compute/hip_hgemm.c | 41 ++++++++++++++++++++++++++++++ gpuhipblas/include/gpuhipblas.h | 14 +++++++++-- 6 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 gpucublas/compute/cuda_hgemm.c create mode 100644 gpuhipblas/compute/hip_hgemm.c diff --git a/gpucublas/compute/CMakeLists.txt b/gpucublas/compute/CMakeLists.txt index d06b47928..81b998145 100644 --- a/gpucublas/compute/CMakeLists.txt +++ b/gpucublas/compute/CMakeLists.txt @@ -291,9 +291,13 @@ precisions_rules_py( set(GPUCUBLAS_SRCS ${GPUCUBLAS_SRCS_GENERATED} + cuda_hgemm.c cudaglobal.c ) +# Need to use CXX compiler to have the __half support and access to cublasHgemm() +set_source_files_properties( cuda_hgemm.c PROPERTIES LANGUAGE CXX ) + # Force generation of sources # --------------------------- add_custom_target(gpucublas_sources ALL SOURCES ${GPUCUBLAS_SRCS}) diff --git a/gpucublas/compute/cuda_hgemm.c b/gpucublas/compute/cuda_hgemm.c new file mode 100644 index 000000000..02f465a6a --- /dev/null +++ b/gpucublas/compute/cuda_hgemm.c @@ -0,0 +1,42 @@ +/** + * + * @file cuda_hgemm.c + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon cuda_hgemm GPU kernel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-07-04 + * + */ +#include "gpucublas.h" + +extern "C" int +CUDA_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + cublasHandle_t handle ) +{ + cublasStatus_t rc; + + rc = cublasHgemm( handle, + (cublasOperation_t)chameleon_cublas_const(transa), + (cublasOperation_t)chameleon_cublas_const(transb), + m, n, k, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + CUBLAS_VALUE(beta), C, ldc); + + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/gpucublas/include/gpucublas.h b/gpucublas/include/gpucublas.h index 8a9deb977..29a7046a8 100644 --- a/gpucublas/include/gpucublas.h +++ b/gpucublas/include/gpucublas.h @@ -11,11 +11,11 @@ * * @brief Chameleon GPU kernels main header * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Mathieu Faverge * @author Nathalie Furmento - * @date 2022-02-22 + * @date 2023-07-04 * @precisions normal z -> c d s * */ @@ -35,6 +35,9 @@ #include <cuda.h> #include <cuComplex.h> +#if CUDA_VERSION >= 7500 +#include <cuda_fp16.h> +#endif #include <cublas_v2.h> @@ -58,6 +61,15 @@ BEGIN_C_DECLS #include "gpucublas/gpucublas_c.h" #include "gpucublas/gpucublas_s.h" +int CUDA_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + cublasHandle_t handle ); + END_C_DECLS /** diff --git a/gpuhipblas/compute/CMakeLists.txt b/gpuhipblas/compute/CMakeLists.txt index bd7ac4540..cd3db9e9e 100644 --- a/gpuhipblas/compute/CMakeLists.txt +++ b/gpuhipblas/compute/CMakeLists.txt @@ -17,12 +17,12 @@ # Univ. of California Berkeley, # Univ. of Colorado Denver. # -# @version 1.2.0 +# @version 1.3.0 # @author Florent Pruvost # @author Guillaume Sylvand # @author Mathieu Faverge # @author Loris Lucido -# @date 2023-01-30 +# @date 2023-07-04 # ### @@ -47,6 +47,7 @@ precisions_rules_py( set(GPUHIPBLAS_SRCS ${GPUHIPBLAS_SRCS_GENERATED} + hip_hgemm.c hipglobal.c ) diff --git a/gpuhipblas/compute/hip_hgemm.c b/gpuhipblas/compute/hip_hgemm.c new file mode 100644 index 000000000..12d7feac2 --- /dev/null +++ b/gpuhipblas/compute/hip_hgemm.c @@ -0,0 +1,41 @@ +/** + * + * @file hip_hgemm.c + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_hgemm GPU kernel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-07-04 + * + */ +#include "gpuhipblas.h" + +int +HIP_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasHgemm( handle, + chameleon_hipblas_const(transa), chameleon_hipblas_const(transb), + m, n, k, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb, + HIPBLAS_VALUE(beta), C, ldc ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/gpuhipblas/include/gpuhipblas.h b/gpuhipblas/include/gpuhipblas.h index 572abcf96..705084958 100644 --- a/gpuhipblas/include/gpuhipblas.h +++ b/gpuhipblas/include/gpuhipblas.h @@ -11,12 +11,12 @@ * * @brief Chameleon GPU kernels main header * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Mathieu Faverge * @author Nathalie Furmento * @author Loris Lucido - * @date 2023-01-30 + * @date 2023-07-04 * @precisions normal z -> c d s * */ @@ -36,6 +36,7 @@ #include <hip/hip_runtime.h> #include <hip/hip_complex.h> +#include <hip/hip_fp16.h> #include <hipblas/hipblas.h> @@ -59,6 +60,15 @@ BEGIN_C_DECLS #include "gpuhipblas/gpuhipblas_c.h" #include "gpuhipblas/gpuhipblas_s.h" +int HIP_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + hipblasHandle_t handle ); + END_C_DECLS /** -- GitLab