diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 34a5bc904d05debd04053b4452611b87df3d94be..0f404e1a52472500e2ada61ac251bff26bbb7c6d 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -41,6 +41,7 @@ _extra_blas = [ ('', 'sgepdf', 'dgepdf', 'cgepdf', 'zgepdf' ), ('', 'scesca', 'dcesca', 'ccesca', 'zcesca' ), ('', 'sgesum', 'dgesum', 'cgesum', 'zgesum' ), + ('', 'sgersum', 'dgersum', 'cgersum', 'zgersum' ), ] _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ] diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index f8aa0620a623afc62b30a08a8f91febef23c70f7..ae307f760eb78d5ec4b3fec9a41db694211382be 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -458,4 +458,8 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options, const CHAM_desc_t *D, int Dm, int Dn, CHAM_desc_t *A, int Am, int An); +void RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An ); +void RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An ); + #endif /* _chameleon_tasks_z_h_ */ diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index bda8ef2732cdf6a7cb8b9a12128bb9175e9f584e..dec860e8e112b70e2e40798ebc4feda3e16ab46c 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -107,6 +107,10 @@ set(CODELETS_ZSRC codelets/codelet_zgesum.c codelets/codelet_zcesca.c codelets/codelet_zgram.c + ################## + # Reduction methods + ################## + codelets/codelet_zgersum.c ) set(CODELETS_SRC diff --git a/runtime/openmp/codelets/codelet_zgersum.c b/runtime/openmp/codelets/codelet_zgersum.c new file mode 100644 index 0000000000000000000000000000000000000000..b320707c993a1521ef76d1219ae60c86db96113e --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgersum.c @@ -0,0 +1,41 @@ +/** + * + * @file starpu/codelet_zgersum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgersum OpenMP codelet + * + * @version 1.2.0 + * @author Romain Peressoni + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" + +void +RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An ) +{ + fprintf( stderr, "WARNING: Reductions are not available with OpenMP\n" ); + + (void)A; + (void)Am; + (void)An; +} + +void +RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An ) +{ + (void)options; + (void)A; + (void)Am; + (void)An; +} diff --git a/runtime/parsec/codelets/codelet_zgersum.c b/runtime/parsec/codelets/codelet_zgersum.c new file mode 100644 index 0000000000000000000000000000000000000000..fb460724d0ab51b55f1ea5833af0dd7dba5d9912 --- /dev/null +++ b/runtime/parsec/codelets/codelet_zgersum.c @@ -0,0 +1,41 @@ +/** + * + * @file starpu/codelet_zgersum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgersum Parsec codelet + * + * @version 1.2.0 + * @author Romain Peressoni + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_parsec.h" + +void +RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An ) +{ + fprintf( stderr, "WARNING: Reductions are not available with Parsec\n" ); + + (void)A; + (void)Am; + (void)An; +} + +void +RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An ) +{ + (void)options; + (void)A; + (void)Am; + (void)An; +} diff --git a/runtime/quark/codelets/codelet_zgersum.c b/runtime/quark/codelets/codelet_zgersum.c new file mode 100644 index 0000000000000000000000000000000000000000..e6accf1cf76c9b6af4e1ec918f09d8ac62e0dca5 --- /dev/null +++ b/runtime/quark/codelets/codelet_zgersum.c @@ -0,0 +1,41 @@ +/** + * + * @file starpu/codelet_zgersum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgersum Quark codelet + * + * @version 1.2.0 + * @author Romain Peressoni + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_quark.h" + +void +RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An ) +{ + fprintf( stderr, "WARNING: Reductions are not available with Quark\n" ); + + (void)A; + (void)Am; + (void)An; +} + +void +RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An ) +{ + (void)options; + (void)A; + (void)Am; + (void)An; +} diff --git a/runtime/starpu/codelets/codelet_zgersum.c b/runtime/starpu/codelets/codelet_zgersum.c new file mode 100644 index 0000000000000000000000000000000000000000..26327c982f997473de0e17885a8381a258ecaeb6 --- /dev/null +++ b/runtime/starpu/codelets/codelet_zgersum.c @@ -0,0 +1,129 @@ +/** + * + * @file starpu/codelet_zgersum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgersum StarPU codelet + * + * @version 1.2.0 + * @author Romain Peressoni + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_starpu.h" +#include "runtime_codelet_z.h" + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgersum_redux_cpu_func(void *descr[], void *cl_arg) +{ + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + + assert( tileA->m == tileB->m ); + assert( tileA->n == tileB->n ); + + TCORE_zgeadd( ChamNoTrans, tileA->m, tileA->n, 1., tileB, 1., tileA ); + + return; +} + +#ifdef CHAMELEON_USE_CUBLAS +static void cl_zgersum_redux_cuda_func(void *descr[], void *cl_arg) +{ + cublasHandle_t handle = starpu_cublas_get_local_handle(); + CHAMELEON_Complex64_t zone = 1.; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + + assert( tileA->m == tileB->m ); + assert( tileA->n == tileB->n ); + + CUDA_zgeadd( ChamNoTrans, tileA->m, tileA->n, + &zone, tileB->mat, tileB->ld, + &zone, tileA->mat, tileA->ld, + handle ); + + return; +} +#endif /* defined(CHAMELEON_USE_CUBLAS) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +#if defined(CHAMELEON_USE_CUBLAS) +CODELETS(zgersum_redux, cl_zgersum_redux_cpu_func, cl_zgersum_redux_cuda_func, STARPU_CUDA_ASYNC) +#else +CODELETS_CPU(zgersum_redux, cl_zgersum_redux_cpu_func) +#endif + +#if !defined(CHAMELEON_SIMULATION) +static void +cl_zgersum_init_cpu_func( void *descr[], void *cl_arg ) +{ + CHAM_tile_t *tileA; + + tileA = cti_interface_get(descr[0]); + + TCORE_zlaset( ChamUpperLower, tileA->m, tileA->n, 0., 0., tileA ); + + (void)cl_arg; +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU( zgersum_init, cl_zgersum_init_cpu_func ); + +void +RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An ) +{ +#if defined(HAVE_STARPU_MPI_REDUX) + starpu_data_set_reduction_methods( RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + &cl_zgersum_redux, + &cl_zgersum_init ); +#endif +} + +void +RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An ) +{ +#if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI) + starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD, + RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + options->priority + 1, + 2 /* Arbre binaire */ ); +#else + (void)options; + (void)A; + (void)Am; + (void)An; +#endif +} + +void RUNTIME_zgersum_init( void ) __attribute__( ( constructor ) ); +void RUNTIME_zgersum_init( void ) +{ + cl_zgersum_init.nbuffers = 1; + cl_zgersum_init.modes[0] = STARPU_W; + + cl_zgersum_redux.nbuffers = 2; + cl_zgersum_redux.modes[0] = STARPU_RW | STARPU_COMMUTE; + cl_zgersum_redux.modes[1] = STARPU_R; +}