From de10a29dbe5ebfeaefd5bbd3a13290b1943525c7 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 30 Aug 2023 14:01:27 +0200 Subject: [PATCH] codelets: Add zlaswp_{get,set} codelets --- include/chameleon/tasks_z.h | 12 ++- runtime/CMakeLists.txt | 3 +- runtime/openmp/codelets/codelet_zlaswp.c | 62 +++++++++++++ runtime/parsec/codelets/codelet_zlaswp.c | 92 ++++++++++++++++++ runtime/parsec/include/chameleon_parsec.h | 24 ++++- runtime/quark/codelets/codelet_zlaswp.c | 78 ++++++++++++++++ runtime/starpu/codelets/codelet_zlaswp.c | 108 ++++++++++++++++++++++ 7 files changed, 374 insertions(+), 5 deletions(-) create mode 100644 runtime/openmp/codelets/codelet_zlaswp.c create mode 100644 runtime/parsec/codelets/codelet_zlaswp.c create mode 100644 runtime/quark/codelets/codelet_zlaswp.c create mode 100644 runtime/starpu/codelets/codelet_zlaswp.c diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index c5704884e..c5fdfdf4e 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -24,7 +24,7 @@ * @author Alycia Lisito * @author Romain Peressoni * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2023-08-31 * @precisions normal z -> c d s * */ @@ -186,6 +186,16 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options, void INSERT_TASK_zlaset2( const RUNTIME_option_t *options, cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *tileA, int tileAm, int tileAn ); +void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *tIPIV, int tIPIVk, + const CHAM_desc_t *tileA, int tileAm, int tileAn, + const CHAM_desc_t *tileB, int tileBm, int tileBn ); +void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *tIPIV, int tIPIVk, + const CHAM_desc_t *tileA, int tileAm, int tileAn, + const CHAM_desc_t *tileB, int tileBm, int tileBn ); void INSERT_TASK_zlatro( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 527034336..f011e6d96 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -76,8 +76,9 @@ set(CODELETS_ZSRC codelets/codelet_zlanhe.c codelets/codelet_zlansy.c codelets/codelet_zlantr.c - codelets/codelet_zlaset2.c codelets/codelet_zlaset.c + codelets/codelet_zlaset2.c + codelets/codelet_zlaswp.c codelets/codelet_zlatro.c codelets/codelet_zlauum.c codelets/codelet_zplghe.c diff --git a/runtime/openmp/codelets/codelet_zlaswp.c b/runtime/openmp/codelets/codelet_zlaswp.c new file mode 100644 index 000000000..452b73926 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlaswp.c @@ -0,0 +1,62 @@ +/** + * + * @file openmp/codelet_zlaswp.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon OpenMP codelets to apply zlaswp on a panel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-08-31 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_ztile.h" + +void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ) +{ + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + CHAM_tile_t *tileU = U->get_blktile( U, Um, Un ); + int *perm = NULL; // get perm from ipiv + + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); + assert( tileU->format & CHAMELEON_TILE_FULLRANK ); + +#pragma omp task firstprivate( m0, k, ipiv, tileA, tileU ) depend( in:perm ) depend( in:tileA[0] ) depend( inout:tileU[0] ) + { + TCORE_zlaswp_get( m0, A->m, A->n, k, tileA, tileU, perm ); + } + + (void)options; +} + +void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) +{ + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn ); + int *invp = NULL; // get invp from ipiv + + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); + assert( tileB->format & CHAMELEON_TILE_FULLRANK ); + +#pragma omp task firstprivate( m0, k, ipiv, tileA, tileB ) depend( in:invp ) depend( in:tileA[0] ) depend( inout:tileB[0] ) + { + TCORE_zlaswp_set( m0, A->m, A->n, k, tileA, tileB, invp ); + } + + (void)options; +} diff --git a/runtime/parsec/codelets/codelet_zlaswp.c b/runtime/parsec/codelets/codelet_zlaswp.c new file mode 100644 index 000000000..284c450aa --- /dev/null +++ b/runtime/parsec/codelets/codelet_zlaswp.c @@ -0,0 +1,92 @@ +/** + * + * @file parsec/codelet_zlaswp.c + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon PaRSEC codelets to apply zlaswp on a panel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-08-31 + * @precisions normal z -> c d s + * + */ +#include "chameleon_parsec.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +static inline int +CORE_zlaswp_get_parsec( parsec_execution_stream_t *context, + parsec_task_t *this_task ) +{ + int m0, m, n, k, lda, ldb, *perm; + CHAMELEON_Complex64_t *A, *B; + + parsec_dtd_unpack_args( this_task, &m0, &m, &n, &k, &A, lda, &B, ldb, &perm ); + + CORE_zlaswp_get( m0, m, n, k, A, lda, B, ldb, perm ); +} + +void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ) +{ + parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + CHAM_tile_t *tileU = U->get_blktile( U, Um, Un ); + + parsec_dtd_taskpool_insert_task( + PARSEC_dtd_taskpool, CORE_zlaswp_get_parsec, options->priority, "laswp_get", + sizeof(int), &m0, VALUE, + sizeof(int), &(tileA->m), VALUE, + sizeof(int), &(tileA->n), VALUE, + sizeof(int), &k, VALUE, + PASSED_BY_REF, RTBLKADDR(A, ChamComplexDouble, Am, An), chameleon_parsec_get_arena_index( A ) | INPUT, + sizeof(int), &(tileA->ld), VALUE, + PASSED_BY_REF, RTBLKADDR(U, ChamComplexDouble, Um, Un), chameleon_parsec_get_arena_index( U ) | INOUT, + sizeof(int), &(tileU->ld), VALUE, + PASSED_BY_REF, RUNTIME_perm_getaddr( ipiv, ipivk ), chameleon_parsec_get_arena_index_perm( ipiv ) | INPUT, + PARSEC_DTD_ARG_END ); +} + +static inline int +CORE_zlaswp_set_parsec( parsec_execution_stream_t *context, + parsec_task_t *this_task ) +{ + int m0, m, n, k, lda, ldb, *invp; + CHAMELEON_Complex64_t *A, *B; + + parsec_dtd_unpack_args( this_task, &m0, &m, &n, &k, &A, lda, &B, ldb, &invp ); + + CORE_zlaswp_set( m0, m, n, k, A, lda, B, ldb, invp ); +} + +void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) +{ + parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); + CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn ); + + parsec_dtd_taskpool_insert_task( + PARSEC_dtd_taskpool, CORE_zlaswp_set_parsec, options->priority, "laswp_set", + sizeof(int), &m0, VALUE, + sizeof(int), &(tileB->m), VALUE, + sizeof(int), &(tileB->n), VALUE, + sizeof(int), &k, VALUE, + PASSED_BY_REF, RTBLKADDR(A, ChamComplexDouble, Am, An), chameleon_parsec_get_arena_index( A ) | INPUT, + sizeof(int), &(tileA->ld), VALUE, + PASSED_BY_REF, RTBLKADDR(B, ChamComplexDouble, Bm, Bn), chameleon_parsec_get_arena_index( B ) | INOUT, + sizeof(int), &(tileB->ld), VALUE, + PASSED_BY_REF, RUNTIME_invp_getaddr( ipiv, ipivk ), chameleon_parsec_get_arena_index_invp( ipiv ) | INPUT, + PARSEC_DTD_ARG_END ); +} diff --git a/runtime/parsec/include/chameleon_parsec.h b/runtime/parsec/include/chameleon_parsec.h index 30518fb80..23d19fd3f 100644 --- a/runtime/parsec/include/chameleon_parsec.h +++ b/runtime/parsec/include/chameleon_parsec.h @@ -11,12 +11,12 @@ * * @brief Chameleon PaRSEC runtime header * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Reazul Hoque * @author Florent Pruvost * @author Samuel Thibault - * @date 2022-02-22 + * @date 2023-08-31 * */ #ifndef _chameleon_parsec_h_ @@ -38,10 +38,28 @@ struct chameleon_parsec_desc_s { typedef struct chameleon_parsec_desc_s chameleon_parsec_desc_t; static inline int -chameleon_parsec_get_arena_index(const CHAM_desc_t *desc) { +chameleon_parsec_get_arena_index( const CHAM_desc_t *desc ) { return ((chameleon_parsec_desc_t *)desc->schedopt)->arena_index; } +static inline int +chameleon_parsec_get_arena_index_ipiv( const CHAM_ipiv_t *ipiv ) { + assert(0); + return -1; +} + +static inline int +chameleon_parsec_get_arena_index_perm( const CHAM_ipiv_t *ipiv ) { + assert(0); + return -1; +} + +static inline int +chameleon_parsec_get_arena_index_invp( const CHAM_ipiv_t *ipiv ) { + assert(0); + return -1; +} + static inline int cham_to_parsec_access( cham_access_t accessA ) { if ( accessA == ChamR ) { return INPUT; diff --git a/runtime/quark/codelets/codelet_zlaswp.c b/runtime/quark/codelets/codelet_zlaswp.c new file mode 100644 index 000000000..117d67618 --- /dev/null +++ b/runtime/quark/codelets/codelet_zlaswp.c @@ -0,0 +1,78 @@ +/** + * + * @file quark/codelet_zlaswp.c + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon Quark codelets to apply zlaswp on a panel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-08-31 + * @precisions normal z -> c d s + * + */ +#include "chameleon_quark.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_ztile.h" + +static void CORE_zlaswp_get_quark( Quark *quark ) +{ + int m0, k, *perm; + CHAM_tile_t *A, *B; + + quark_unpack_args_5( quark, m0, k, perm, A, B ); + + TCORE_zlaswp_get( m0, A->m, A->n, k, A, B, perm ); +} + +void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ) +{ + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_LASWP; + + QUARK_Insert_Task( + opt->quark, CORE_zlaswp_get_quark, (Quark_Task_Flags*)opt, + sizeof(int), &m0, VALUE, + sizeof(int), &k, VALUE, + sizeof(int*), RUNTIME_perm_getaddr( ipiv, ipivk ), INPUT, + sizeof(CHAM_tile_t*), RTBLKADDR(A, ChamComplexDouble, Am, An), INPUT, + sizeof(CHAM_tile_t*), RTBLKADDR(U, ChamComplexDouble, Um, Un), INOUT, + 0 ); +} + +static void CORE_zlaswp_set_quark( Quark *quark ) +{ + int m0, k, *invp; + CHAM_tile_t *A, *B; + + quark_unpack_args_5( quark, m0, k, invp, A, B ); + + TCORE_zlaswp_set( m0, A->m, A->n, k, A, B, invp ); +} + +void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) +{ + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_LASWP; + + QUARK_Insert_Task( + opt->quark, CORE_zlaswp_set_quark, (Quark_Task_Flags*)opt, + sizeof(int), &m0, VALUE, + sizeof(int), &k, VALUE, + sizeof(int*), RUNTIME_invp_getaddr( ipiv, ipivk ), INPUT, + sizeof(CHAM_tile_t*), RTBLKADDR(A, ChamComplexDouble, Am, An), INPUT, + sizeof(CHAM_tile_t*), RTBLKADDR(B, ChamComplexDouble, Bm, Bn), INOUT, + 0 ); +} diff --git a/runtime/starpu/codelets/codelet_zlaswp.c b/runtime/starpu/codelets/codelet_zlaswp.c new file mode 100644 index 000000000..2d8fc31d4 --- /dev/null +++ b/runtime/starpu/codelets/codelet_zlaswp.c @@ -0,0 +1,108 @@ +/** + * + * @file starpu/codelet_zlaswp.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU codelets to apply zlaswp on a panel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-31 + * @precisions normal z -> c d s + * + */ +#include "chameleon_starpu.h" +#include "runtime_codelet_z.h" + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlaswp_get_cpu_func( void *descr[], void *cl_arg ) +{ + int m0, k, *perm; + CHAM_tile_t *A, *B; + + starpu_codelet_unpack_args( cl_arg, &m0, &k ); + + perm = (int *)STARPU_VECTOR_GET_PTR( descr[0] ); + A = (CHAM_tile_t *) cti_interface_get( descr[1] ); + B = (CHAM_tile_t *) cti_interface_get( descr[2] ); + + TCORE_zlaswp_get( m0, A->m, A->n, k, A, B, perm ); +} +#endif + +/* + * Codelet definition + */ +CODELETS_CPU( zlaswp_get, cl_zlaswp_get_cpu_func ) + +void INSERT_TASK_zlaswp_get( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ) +{ + struct starpu_codelet *codelet = &cl_zlaswp_get; + + //void (*callback)(void*) = options->profiling ? cl_zlaswp_get_callback : NULL; + + rt_starpu_insert_task( + codelet, + STARPU_VALUE, &m0, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_R, RUNTIME_perm_getaddr( ipiv, ipivk ), + STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An), + STARPU_RW | STARPU_COMMUTE, RTBLKADDR(U, ChamComplexDouble, Um, Un), + STARPU_PRIORITY, options->priority, + //STARPU_CALLBACK, callback, + STARPU_EXECUTE_ON_WORKER, options->workerid, + 0 ); +} + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zlaswp_set_cpu_func( void *descr[], void *cl_arg ) +{ + int m0, k, *invp; + CHAM_tile_t *A, *B; + + starpu_codelet_unpack_args( cl_arg, &m0, &k ); + + invp = (int *)STARPU_VECTOR_GET_PTR( descr[0] ); + A = (CHAM_tile_t *) cti_interface_get( descr[1] ); + B = (CHAM_tile_t *) cti_interface_get( descr[2] ); + + TCORE_zlaswp_set( m0, B->m, B->n, k, A, B, invp ); +} +#endif + +/* + * Codelet definition + */ +CODELETS_CPU( zlaswp_set, cl_zlaswp_set_cpu_func ) + +void INSERT_TASK_zlaswp_set( const RUNTIME_option_t *options, + int m0, int k, + const CHAM_ipiv_t *ipiv, int ipivk, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) +{ + struct starpu_codelet *codelet = &cl_zlaswp_set; + + //void (*callback)(void*) = options->profiling ? cl_zlaswp_set_callback : NULL; + + rt_starpu_insert_task( + codelet, + STARPU_VALUE, &m0, sizeof(int), + STARPU_VALUE, &k, sizeof(int), + STARPU_R, RUNTIME_invp_getaddr( ipiv, ipivk ), + STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An), + STARPU_RW, RTBLKADDR(B, ChamComplexDouble, Bm, Bn), + STARPU_PRIORITY, options->priority, + //STARPU_CALLBACK, callback, + STARPU_EXECUTE_ON_WORKER, options->workerid, + 0 ); +} -- GitLab