Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 7369bb37 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

starpu/codelets: Add zlag2c/clag2 gpu codelet

parent 06432c12
No related branches found
No related tags found
1 merge request!401Add all the architecture to manage multi-precision data descriptors.
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
# Univ. of California Berkeley, # Univ. of California Berkeley,
# Univ. of Colorado Denver. # Univ. of Colorado Denver.
# #
# @version 1.2.0 # @version 1.3.0
# @author Florent Pruvost # @author Florent Pruvost
# @author Mathieu Faverge # @author Mathieu Faverge
# @date 2022-02-22 # @date 2023-07-06
# #
### ###
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
set(GPUCUBLAS_HDRS_GENERATED "") set(GPUCUBLAS_HDRS_GENERATED "")
set(ZHDR set(ZHDR
gpucublas/gpucublas_z.h gpucublas/gpucublas_z.h
gpucublas/gpucublas_zc.h
) )
precisions_rules_py( precisions_rules_py(
GPUCUBLAS_HDRS_GENERATED "${ZHDR}" GPUCUBLAS_HDRS_GENERATED "${ZHDR}"
......
...@@ -61,6 +61,8 @@ BEGIN_C_DECLS ...@@ -61,6 +61,8 @@ BEGIN_C_DECLS
#include "gpucublas/gpucublas_d.h" #include "gpucublas/gpucublas_d.h"
#include "gpucublas/gpucublas_c.h" #include "gpucublas/gpucublas_c.h"
#include "gpucublas/gpucublas_s.h" #include "gpucublas/gpucublas_s.h"
#include "gpucublas/gpucublas_zc.h"
#include "gpucublas/gpucublas_ds.h"
int CUDA_hgemm( cham_trans_t transa, cham_trans_t transb, int CUDA_hgemm( cham_trans_t transa, cham_trans_t transb,
int m, int n, int k, int m, int n, int k,
......
/**
*
* @file gpucublas_zc.h
*
* @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon GPU Mixed-precision kernels header
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-07-06
* @precisions mixed zc -> zc ds
*
*/
#ifndef _gpucublas_zc_h_
#define _gpucublas_zc_h_
int CUDA_clag2z( int m, int n, const cuFloatComplex *A, int lda, cuDoubleComplex *B, int ldb, cublasHandle_t handle );
int CUDA_zlag2c( int m, int n, const cuDoubleComplex *A, int lda, cuFloatComplex *B, int ldb, cublasHandle_t handle );
#endif /* _gpucublas_zc_h_ */
...@@ -11,16 +11,14 @@ ...@@ -11,16 +11,14 @@
* *
* @brief Chameleon zlag2c StarPU codelet * @brief Chameleon zlag2c StarPU codelet
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis * @author Lucas Barros de Assis
* @author Florent Pruvost * @author Florent Pruvost
* @author Samuel Thibault * @author Samuel Thibault
* @date 2022-02-22 * @date 2023-07-06
* @precisions mixed zc -> ds * @precisions mixed zc -> ds
* *
*/ */
...@@ -28,7 +26,8 @@ ...@@ -28,7 +26,8 @@
#include "runtime_codelet_zc.h" #include "runtime_codelet_zc.h"
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) static void
cl_zlag2c_cpu_func( void *descr[], void *cl_arg )
{ {
int info = 0; int info = 0;
int m; int m;
...@@ -39,15 +38,44 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) ...@@ -39,15 +38,44 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &m, &n); starpu_codelet_unpack_args( cl_arg, &m, &n );
TCORE_zlag2c( m, n, tileA, tileB, &info ); TCORE_zlag2c( m, n, tileA, tileB, &info );
} }
#if defined(CHAMELEON_USE_CUDA)
static void
cl_zlag2c_cuda_func( void *descr[], void *cl_arg )
{
cublasHandle_t handle = starpu_cublas_get_local_handle();
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
int m, n;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
starpu_codelet_unpack_args( cl_arg, &m, &n );
int rc = CUDA_zlag2c(
m, n,
tileA->mat, tileA->ld,
tileB->mat, tileB->ld,
handle );
if ( rc != 0 ) {
fprintf( stderr, "core_zlag2c failed with info(%d)\n", rc );
}
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
/* /*
* Codelet definition * Codelet definition
*/ */
CODELETS_CPU(zlag2c, cl_zlag2c_cpu_func) CODELETS( zlag2c, cl_zlag2c_cpu_func, cl_zlag2c_cuda_func, STARPU_CUDA_ASYNC )
/** /**
* *
...@@ -72,8 +100,8 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, ...@@ -72,8 +100,8 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
codelet, codelet,
STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &n, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn), STARPU_W, RTBLKADDR(B, ChamComplexFloat, Bm, Bn),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback, STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid, STARPU_EXECUTE_ON_WORKER, options->workerid,
...@@ -97,17 +125,45 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) ...@@ -97,17 +125,45 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &m, &n); starpu_codelet_unpack_args(cl_arg, &m, &n);
TCORE_clag2z( m, n, tileA, tileB); TCORE_clag2z( m, n, tileA, tileB);
} }
#if defined(CHAMELEON_USE_CUDA)
static void
cl_clag2z_cuda_func( void *descr[], void *cl_arg )
{
cublasHandle_t handle = starpu_cublas_get_local_handle();
CHAM_tile_t *tileA;
CHAM_tile_t *tileB;
int m, n;
tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]);
assert( tileA->format & CHAMELEON_TILE_FULLRANK );
assert( tileB->format & CHAMELEON_TILE_FULLRANK );
starpu_codelet_unpack_args( cl_arg, &m, &n );
int rc = CUDA_clag2z(
m, n,
tileA->mat, tileA->ld,
tileB->mat, tileB->ld,
handle );
if ( rc != 0 ) {
fprintf( stderr, "core_clag2z failed with info(%d)\n", rc );
}
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
/* /*
* Codelet definition * Codelet definition
*/ */
CODELETS_CPU(clag2z, cl_clag2z_cpu_func) CODELETS( clag2z, cl_clag2z_cpu_func, cl_clag2z_cuda_func, STARPU_CUDA_ASYNC )
void INSERT_TASK_clag2z(const RUNTIME_option_t *options, void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
int m, int n, int nb, int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *A, int Am, int An,
const CHAM_desc_t *B, int Bm, int Bn) const CHAM_desc_t *B, int Bm, int Bn )
{ {
(void)nb; (void)nb;
struct starpu_codelet *codelet = &cl_clag2z; struct starpu_codelet *codelet = &cl_clag2z;
...@@ -122,8 +178,8 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options, ...@@ -122,8 +178,8 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
codelet, codelet,
STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &n, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An), STARPU_R, RTBLKADDR(A, ChamComplexFloat, Am, An),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_W, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback, STARPU_CALLBACK, callback,
STARPU_EXECUTE_ON_WORKER, options->workerid, STARPU_EXECUTE_ON_WORKER, options->workerid,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment