Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ae7bbb63 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

gpus: Add the hgemm kernel for cuda and hip

parent 0036dfaa
No related branches found
No related tags found
1 merge request!395Introduce half-precision conversion and gemm kernels for GPUs
...@@ -291,9 +291,13 @@ precisions_rules_py( ...@@ -291,9 +291,13 @@ precisions_rules_py(
set(GPUCUBLAS_SRCS set(GPUCUBLAS_SRCS
${GPUCUBLAS_SRCS_GENERATED} ${GPUCUBLAS_SRCS_GENERATED}
cuda_hgemm.c
cudaglobal.c cudaglobal.c
) )
# Need to use CXX compiler to have the __half support and access to cublasHgemm()
set_source_files_properties( cuda_hgemm.c PROPERTIES LANGUAGE CXX )
# Force generation of sources # Force generation of sources
# --------------------------- # ---------------------------
add_custom_target(gpucublas_sources ALL SOURCES ${GPUCUBLAS_SRCS}) add_custom_target(gpucublas_sources ALL SOURCES ${GPUCUBLAS_SRCS})
......
/**
*
* @file cuda_hgemm.c
*
* @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon cuda_hgemm GPU kernel
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-07-04
*
*/
#include "gpucublas.h"
extern "C" int
CUDA_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
cublasHandle_t handle )
{
cublasStatus_t rc;
rc = cublasHgemm( handle,
(cublasOperation_t)chameleon_cublas_const(transa),
(cublasOperation_t)chameleon_cublas_const(transb),
m, n, k,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
CUBLAS_VALUE(beta), C, ldc);
assert( rc == CUBLAS_STATUS_SUCCESS );
(void)rc;
return CHAMELEON_SUCCESS;
}
...@@ -11,11 +11,11 @@ ...@@ -11,11 +11,11 @@
* *
* @brief Chameleon GPU kernels main header * @brief Chameleon GPU kernels main header
* *
* @version 1.2.0 * @version 1.3.0
* @author Florent Pruvost * @author Florent Pruvost
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Nathalie Furmento * @author Nathalie Furmento
* @date 2022-02-22 * @date 2023-07-04
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
*/ */
...@@ -35,6 +35,9 @@ ...@@ -35,6 +35,9 @@
#include <cuda.h> #include <cuda.h>
#include <cuComplex.h> #include <cuComplex.h>
#if CUDA_VERSION >= 7500
#include <cuda_fp16.h>
#endif
#include <cublas_v2.h> #include <cublas_v2.h>
...@@ -58,6 +61,15 @@ BEGIN_C_DECLS ...@@ -58,6 +61,15 @@ BEGIN_C_DECLS
#include "gpucublas/gpucublas_c.h" #include "gpucublas/gpucublas_c.h"
#include "gpucublas/gpucublas_s.h" #include "gpucublas/gpucublas_s.h"
int CUDA_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
cublasHandle_t handle );
END_C_DECLS END_C_DECLS
/** /**
......
...@@ -17,12 +17,12 @@ ...@@ -17,12 +17,12 @@
# Univ. of California Berkeley, # Univ. of California Berkeley,
# Univ. of Colorado Denver. # Univ. of Colorado Denver.
# #
# @version 1.2.0 # @version 1.3.0
# @author Florent Pruvost # @author Florent Pruvost
# @author Guillaume Sylvand # @author Guillaume Sylvand
# @author Mathieu Faverge # @author Mathieu Faverge
# @author Loris Lucido # @author Loris Lucido
# @date 2023-01-30 # @date 2023-07-04
# #
### ###
...@@ -47,6 +47,7 @@ precisions_rules_py( ...@@ -47,6 +47,7 @@ precisions_rules_py(
set(GPUHIPBLAS_SRCS set(GPUHIPBLAS_SRCS
${GPUHIPBLAS_SRCS_GENERATED} ${GPUHIPBLAS_SRCS_GENERATED}
hip_hgemm.c
hipglobal.c hipglobal.c
) )
......
/**
*
* @file hip_hgemm.c
*
* @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon hip_hgemm GPU kernel
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-07-04
*
*/
#include "gpuhipblas.h"
int
HIP_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
hipblasHandle_t handle )
{
hipblasStatus_t rc;
rc = hipblasHgemm( handle,
chameleon_hipblas_const(transa), chameleon_hipblas_const(transb),
m, n, k,
HIPBLAS_VALUE(alpha), A, lda,
B, ldb,
HIPBLAS_VALUE(beta), C, ldc );
assert( rc == HIPBLAS_STATUS_SUCCESS );
(void)rc;
return CHAMELEON_SUCCESS;
}
...@@ -11,12 +11,12 @@ ...@@ -11,12 +11,12 @@
* *
* @brief Chameleon GPU kernels main header * @brief Chameleon GPU kernels main header
* *
* @version 1.2.0 * @version 1.3.0
* @author Florent Pruvost * @author Florent Pruvost
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Nathalie Furmento * @author Nathalie Furmento
* @author Loris Lucido * @author Loris Lucido
* @date 2023-01-30 * @date 2023-07-04
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
*/ */
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <hip/hip_complex.h> #include <hip/hip_complex.h>
#include <hip/hip_fp16.h>
#include <hipblas/hipblas.h> #include <hipblas/hipblas.h>
...@@ -59,6 +60,15 @@ BEGIN_C_DECLS ...@@ -59,6 +60,15 @@ BEGIN_C_DECLS
#include "gpuhipblas/gpuhipblas_c.h" #include "gpuhipblas/gpuhipblas_c.h"
#include "gpuhipblas/gpuhipblas_s.h" #include "gpuhipblas/gpuhipblas_s.h"
int HIP_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
hipblasHandle_t handle );
END_C_DECLS END_C_DECLS
/** /**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment