diff --git a/gpucublas/compute/CMakeLists.txt b/gpucublas/compute/CMakeLists.txt index d06b4792883a9541e66a37d259bec6a20032a31e..81b99814503b779db0e7b75d4c05e5aded4eec53 100644 --- a/gpucublas/compute/CMakeLists.txt +++ b/gpucublas/compute/CMakeLists.txt @@ -291,9 +291,13 @@ precisions_rules_py( set(GPUCUBLAS_SRCS ${GPUCUBLAS_SRCS_GENERATED} + cuda_hgemm.c cudaglobal.c ) +# Need to use CXX compiler to have the __half support and access to cublasHgemm() +set_source_files_properties( cuda_hgemm.c PROPERTIES LANGUAGE CXX ) + # Force generation of sources # --------------------------- add_custom_target(gpucublas_sources ALL SOURCES ${GPUCUBLAS_SRCS}) diff --git a/gpucublas/compute/cuda_hgemm.c b/gpucublas/compute/cuda_hgemm.c new file mode 100644 index 0000000000000000000000000000000000000000..02f465a6a2b8efbc5cfb5551c3266d30736591e7 --- /dev/null +++ b/gpucublas/compute/cuda_hgemm.c @@ -0,0 +1,42 @@ +/** + * + * @file cuda_hgemm.c + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon cuda_hgemm GPU kernel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-07-04 + * + */ +#include "gpucublas.h" + +extern "C" int +CUDA_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + cublasHandle_t handle ) +{ + cublasStatus_t rc; + + rc = cublasHgemm( handle, + (cublasOperation_t)chameleon_cublas_const(transa), + (cublasOperation_t)chameleon_cublas_const(transb), + m, n, k, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + CUBLAS_VALUE(beta), C, ldc); + + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/gpucublas/include/gpucublas.h b/gpucublas/include/gpucublas.h index 8a9deb977ccae2c998f3b1390c5085febaee9104..29a7046a8c840faecc4fee7b2edb70de35700b98 100644 --- a/gpucublas/include/gpucublas.h +++ b/gpucublas/include/gpucublas.h @@ -11,11 +11,11 @@ * * @brief Chameleon GPU kernels main header * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Mathieu Faverge * @author Nathalie Furmento - * @date 2022-02-22 + * @date 2023-07-04 * @precisions normal z -> c d s * */ @@ -35,6 +35,9 @@ #include <cuda.h> #include <cuComplex.h> +#if CUDA_VERSION >= 7500 +#include <cuda_fp16.h> +#endif #include <cublas_v2.h> @@ -58,6 +61,15 @@ BEGIN_C_DECLS #include "gpucublas/gpucublas_c.h" #include "gpucublas/gpucublas_s.h" +int CUDA_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + cublasHandle_t handle ); + END_C_DECLS /** diff --git a/gpuhipblas/compute/CMakeLists.txt b/gpuhipblas/compute/CMakeLists.txt index bd7ac4540ccad44d08140014a6af1af43df248b5..cd3db9e9e0d0f951d03d331ed14bb8c0840cfcc5 100644 --- a/gpuhipblas/compute/CMakeLists.txt +++ b/gpuhipblas/compute/CMakeLists.txt @@ -17,12 +17,12 @@ # Univ. of California Berkeley, # Univ. of Colorado Denver. # -# @version 1.2.0 +# @version 1.3.0 # @author Florent Pruvost # @author Guillaume Sylvand # @author Mathieu Faverge # @author Loris Lucido -# @date 2023-01-30 +# @date 2023-07-04 # ### @@ -47,6 +47,7 @@ precisions_rules_py( set(GPUHIPBLAS_SRCS ${GPUHIPBLAS_SRCS_GENERATED} + hip_hgemm.c hipglobal.c ) diff --git a/gpuhipblas/compute/hip_hgemm.c b/gpuhipblas/compute/hip_hgemm.c new file mode 100644 index 0000000000000000000000000000000000000000..12d7feac2bf453a72614e7a5271898f51b461879 --- /dev/null +++ b/gpuhipblas/compute/hip_hgemm.c @@ -0,0 +1,41 @@ +/** + * + * @file hip_hgemm.c + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_hgemm GPU kernel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-07-04 + * + */ +#include "gpuhipblas.h" + +int +HIP_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasHgemm( handle, + chameleon_hipblas_const(transa), chameleon_hipblas_const(transb), + m, n, k, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb, + HIPBLAS_VALUE(beta), C, ldc ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/gpuhipblas/include/gpuhipblas.h b/gpuhipblas/include/gpuhipblas.h index 572abcf9677fa3caf1e16f8c6a68a64f6e202cb9..705084958bc28c6c66f8d062f675d96cee90bb9d 100644 --- a/gpuhipblas/include/gpuhipblas.h +++ b/gpuhipblas/include/gpuhipblas.h @@ -11,12 +11,12 @@ * * @brief Chameleon GPU kernels main header * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Mathieu Faverge * @author Nathalie Furmento * @author Loris Lucido - * @date 2023-01-30 + * @date 2023-07-04 * @precisions normal z -> c d s * */ @@ -36,6 +36,7 @@ #include <hip/hip_runtime.h> #include <hip/hip_complex.h> +#include <hip/hip_fp16.h> #include <hipblas/hipblas.h> @@ -59,6 +60,15 @@ BEGIN_C_DECLS #include "gpuhipblas/gpuhipblas_c.h" #include "gpuhipblas/gpuhipblas_s.h" +int HIP_hgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const CHAMELEON_Real16_t *alpha, + const CHAMELEON_Real16_t *A, int lda, + const CHAMELEON_Real16_t *B, int ldb, + const CHAMELEON_Real16_t *beta, + CHAMELEON_Real16_t *C, int ldc, + hipblasHandle_t handle ); + END_C_DECLS /**