Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ae7bbb63 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

gpus: Add the hgemm kernel for cuda and hip

parent 0036dfaa
No related branches found
No related tags found
1 merge request!395Introduce half-precision conversion and gemm kernels for GPUs
......@@ -291,9 +291,13 @@ precisions_rules_py(
set(GPUCUBLAS_SRCS
${GPUCUBLAS_SRCS_GENERATED}
cuda_hgemm.c
cudaglobal.c
)
# Need to use CXX compiler to have the __half support and access to cublasHgemm()
set_source_files_properties( cuda_hgemm.c PROPERTIES LANGUAGE CXX )
# Force generation of sources
# ---------------------------
add_custom_target(gpucublas_sources ALL SOURCES ${GPUCUBLAS_SRCS})
......
/**
*
* @file cuda_hgemm.c
*
* @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon cuda_hgemm GPU kernel
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-07-04
*
*/
#include "gpucublas.h"
extern "C" int
CUDA_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
cublasHandle_t handle )
{
cublasStatus_t rc;
rc = cublasHgemm( handle,
(cublasOperation_t)chameleon_cublas_const(transa),
(cublasOperation_t)chameleon_cublas_const(transb),
m, n, k,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
CUBLAS_VALUE(beta), C, ldc);
assert( rc == CUBLAS_STATUS_SUCCESS );
(void)rc;
return CHAMELEON_SUCCESS;
}
......@@ -11,11 +11,11 @@
*
* @brief Chameleon GPU kernels main header
*
* @version 1.2.0
* @version 1.3.0
* @author Florent Pruvost
* @author Mathieu Faverge
* @author Nathalie Furmento
* @date 2022-02-22
* @date 2023-07-04
* @precisions normal z -> c d s
*
*/
......@@ -35,6 +35,9 @@
#include <cuda.h>
#include <cuComplex.h>
#if CUDA_VERSION >= 7500
#include <cuda_fp16.h>
#endif
#include <cublas_v2.h>
......@@ -58,6 +61,15 @@ BEGIN_C_DECLS
#include "gpucublas/gpucublas_c.h"
#include "gpucublas/gpucublas_s.h"
int CUDA_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
cublasHandle_t handle );
END_C_DECLS
/**
......
......@@ -17,12 +17,12 @@
# Univ. of California Berkeley,
# Univ. of Colorado Denver.
#
# @version 1.2.0
# @version 1.3.0
# @author Florent Pruvost
# @author Guillaume Sylvand
# @author Mathieu Faverge
# @author Loris Lucido
# @date 2023-01-30
# @date 2023-07-04
#
###
......@@ -47,6 +47,7 @@ precisions_rules_py(
set(GPUHIPBLAS_SRCS
${GPUHIPBLAS_SRCS_GENERATED}
hip_hgemm.c
hipglobal.c
)
......
/**
*
* @file hip_hgemm.c
*
* @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon hip_hgemm GPU kernel
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-07-04
*
*/
#include "gpuhipblas.h"
int
HIP_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
hipblasHandle_t handle )
{
hipblasStatus_t rc;
rc = hipblasHgemm( handle,
chameleon_hipblas_const(transa), chameleon_hipblas_const(transb),
m, n, k,
HIPBLAS_VALUE(alpha), A, lda,
B, ldb,
HIPBLAS_VALUE(beta), C, ldc );
assert( rc == HIPBLAS_STATUS_SUCCESS );
(void)rc;
return CHAMELEON_SUCCESS;
}
......@@ -11,12 +11,12 @@
*
* @brief Chameleon GPU kernels main header
*
* @version 1.2.0
* @version 1.3.0
* @author Florent Pruvost
* @author Mathieu Faverge
* @author Nathalie Furmento
* @author Loris Lucido
* @date 2023-01-30
* @date 2023-07-04
* @precisions normal z -> c d s
*
*/
......@@ -36,6 +36,7 @@
#include <hip/hip_runtime.h>
#include <hip/hip_complex.h>
#include <hip/hip_fp16.h>
#include <hipblas/hipblas.h>
......@@ -59,6 +60,15 @@ BEGIN_C_DECLS
#include "gpuhipblas/gpuhipblas_c.h"
#include "gpuhipblas/gpuhipblas_s.h"
int HIP_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k,
const CHAMELEON_Real16_t *alpha,
const CHAMELEON_Real16_t *A, int lda,
const CHAMELEON_Real16_t *B, int ldb,
const CHAMELEON_Real16_t *beta,
CHAMELEON_Real16_t *C, int ldc,
hipblasHandle_t handle );
END_C_DECLS
/**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment