From a59ca4f3574db6eb782a838b7827d7cc6edd4201 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 28 Sep 2022 22:20:13 +0200 Subject: [PATCH] mixed-precision: update files in runtimes --- runtime/openmp/codelets/codelet_zlag2c.c | 13 +++-- runtime/parsec/codelets/codelet_zlag2c.c | 28 ++++----- runtime/quark/codelets/codelet_zlag2c.c | 61 ++++++++++---------- runtime/starpu/codelets/codelet_zccallback.c | 26 +++++++++ runtime/starpu/codelets/codelet_zlag2c.c | 5 +- runtime/starpu/include/runtime_codelet_z.h | 5 -- runtime/starpu/include/runtime_codelet_zc.h | 44 ++++++++++++++ 7 files changed, 128 insertions(+), 54 deletions(-) create mode 100644 runtime/starpu/codelets/codelet_zccallback.c create mode 100644 runtime/starpu/include/runtime_codelet_zc.h diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c index 369b5dd05..30a7c94e8 100644 --- a/runtime/openmp/codelets/codelet_zlag2c.c +++ b/runtime/openmp/codelets/codelet_zlag2c.c @@ -17,8 +17,8 @@ * */ #include "chameleon_openmp.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_ztile.h" +#include "chameleon/tasks_zc.h" +#include "coreblas/coreblas_zctile.h" void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, int m, int n, int nb, @@ -26,9 +26,12 @@ void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn ) { CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); - CHAMELEON_Complex32_t *tileB = B->get_blktile( B, Bm, Bn ); + CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn ); #pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] ) - TCORE_zlag2c( m, n, tileA, tileB ); + { + int info = 0; + TCORE_zlag2c( m, n, tileA, tileB, &info ); + } (void)options; (void)nb; @@ -39,7 +42,7 @@ void INSERT_TASK_clag2z( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *B, int Bm, int Bn ) { - CHAMELEON_Complex32_t *tileA = A->get_blktile( A, Am, An ); + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn ); #pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] ) TCORE_clag2z( m, n, tileA, tileB ); diff --git a/runtime/parsec/codelets/codelet_zlag2c.c b/runtime/parsec/codelets/codelet_zlag2c.c index 24255ade3..6df975c11 100644 --- a/runtime/parsec/codelets/codelet_zlag2c.c +++ b/runtime/parsec/codelets/codelet_zlag2c.c @@ -16,17 +16,18 @@ * @author Florent Pruvost * @author Mathieu Faverge * @date 2022-02-22 - * @precisions normal z -> c d s + * @precisions mixed zc -> ds * */ #include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" +#include "chameleon/tasks_zc.h" +#include "coreblas/coreblas_zc.h" static inline int CORE_zlag2c_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { + int info; int m; int n; CHAMELEON_Complex64_t *A; @@ -37,16 +38,16 @@ CORE_zlag2c_parsec( parsec_execution_stream_t *context, parsec_dtd_unpack_args( this_task, &m, &n, &A, &lda, &B, &ldb ); - CORE_zlag2c( m, n, A, lda, B, ldb ); + CORE_zlag2c( m, n, A, lda, B, ldb, &info ); (void)context; return PARSEC_HOOK_RETURN_DONE; } -void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); @@ -68,7 +69,8 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, * */ static inline int -CORE_clag2z_parsec(parsec_execution_stream_t *context, parsec_task_t *this_task) +CORE_clag2z_parsec( parsec_execution_stream_t *context, + parsec_task_t *this_task ) { int m; int n; @@ -86,10 +88,10 @@ CORE_clag2z_parsec(parsec_execution_stream_t *context, parsec_task_t *this_task) return PARSEC_HOOK_RETURN_DONE; } -void INSERT_TASK_clag2z(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_clag2z( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); diff --git a/runtime/quark/codelets/codelet_zlag2c.c b/runtime/quark/codelets/codelet_zlag2c.c index 336540606..2c6bccb84 100644 --- a/runtime/quark/codelets/codelet_zlag2c.c +++ b/runtime/quark/codelets/codelet_zlag2c.c @@ -21,10 +21,10 @@ * */ #include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_ztile.h" +#include "chameleon/tasks_zc.h" +#include "coreblas/coreblas_zctile.h" -void CORE_zlag2c_quark(Quark *quark) +void CORE_zlag2c_quark( Quark *quark ) { int m; int n; @@ -34,50 +34,53 @@ void CORE_zlag2c_quark(Quark *quark) RUNTIME_request_t *request; int info; - quark_unpack_args_6(quark, m, n, tileA, tileB, sequence, request); + quark_unpack_args_6( quark, m, n, tileA, tileB, sequence, request ); TCORE_zlag2c( m, n, tileA, tileB, &info ); if ( (sequence->status != CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, info ); } } -void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LAG2C; - QUARK_Insert_Task(opt->quark, CORE_zlag2c_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, - sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn), OUTPUT, - sizeof(RUNTIME_sequence_t*), &(options->sequence), VALUE, - sizeof(RUNTIME_request_t*), &(options->request), VALUE, - 0); + QUARK_Insert_Task( opt->quark, CORE_zlag2c_quark, (Quark_Task_Flags*)opt, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, + sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn), OUTPUT, + sizeof(RUNTIME_sequence_t*), &(options->sequence), VALUE, + sizeof(RUNTIME_request_t*), &(options->request), VALUE, + 0 ); } -void CORE_clag2z_quark(Quark *quark) +void CORE_clag2z_quark( Quark *quark ) { int m; int n; CHAM_tile_t *tileA; CHAM_tile_t *tileB; - quark_unpack_args_6(quark, m, n, tileA, tileB); - TCORE_clag2z( m, n, tileA, tileB); + quark_unpack_args_4( quark, m, n, tileA, tileB ); + TCORE_clag2z( m, n, tileA, tileB ); } -void INSERT_TASK_clag2z(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_clag2z( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { - QUARK_Insert_Task(opt->quark, CORE_clag2z_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An), INPUT, - sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, - 0); + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_LAG2C; + + QUARK_Insert_Task( opt->quark, CORE_clag2z_quark, (Quark_Task_Flags*)opt, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An), INPUT, + sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, + 0 ); } diff --git a/runtime/starpu/codelets/codelet_zccallback.c b/runtime/starpu/codelets/codelet_zccallback.c new file mode 100644 index 000000000..40bdd7dc9 --- /dev/null +++ b/runtime/starpu/codelets/codelet_zccallback.c @@ -0,0 +1,26 @@ +/** + * + * @file starpu/codelet_zccallback.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zc callback StarPU codelet + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Cedric Augonnet + * @author Florent Pruvost + * @date 2022-02-22 + * @precisions mixed zc -> ds + * + */ +#include "chameleon_starpu.h" +#include "runtime_codelet_zc.h" + +CHAMELEON_CL_CB(zlag2c, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*N) +CHAMELEON_CL_CB(clag2z, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*N) diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index a259373a2..3ca3d7e70 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -25,11 +25,12 @@ * */ #include "chameleon_starpu.h" -#include "runtime_codelet_z.h" +#include "runtime_codelet_zc.h" #if !defined(CHAMELEON_SIMULATION) static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) { + int info = 0; int m; int n; CHAM_tile_t *tileA; @@ -39,7 +40,7 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) tileB = cti_interface_get(descr[1]); starpu_codelet_unpack_args(cl_arg, &m, &n); - TCORE_zlag2c( m, n, tileA, tileB); + TCORE_zlag2c( m, n, tileA, tileB, &info ); } #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index bd823f410..13c2cdb8b 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -105,11 +105,6 @@ CODELETS_HEADER(zlatro); CODELETS_HEADER(zplssq); CODELETS_HEADER(zplssq2); -/* - * MIXED PRECISION functions - */ -CODELETS_HEADER(zlag2c); - /* * DZ functions */ diff --git a/runtime/starpu/include/runtime_codelet_zc.h b/runtime/starpu/include/runtime_codelet_zc.h new file mode 100644 index 000000000..d357630c5 --- /dev/null +++ b/runtime/starpu/include/runtime_codelet_zc.h @@ -0,0 +1,44 @@ +/** + * + * @file starpu/runtime_codelet_zc.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU CHAMELEON_Complex64_t codelets header + * + * @version 1.2.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @author Florent Pruvost + * @date 2022-02-22 + * @precisions mixed zc -> ds + * + */ +#ifndef _runtime_codelet_zc_h_ +#define _runtime_codelet_zc_h_ + +#include <stdio.h> +#include "runtime_codelets.h" + +#include "chameleon/tasks_zc.h" +#if !defined(CHAMELEON_SIMULATION) +#include "coreblas/coreblas_zc.h" +#include "coreblas/coreblas_zctile.h" +#if defined(CHAMELEON_USE_CUDA) +#include "cudablas.h" +#endif +#endif + +/* + * MIXED PRECISION functions + */ +CODELETS_HEADER(zlag2c); +CODELETS_HEADER(clag2z); + +#endif /* _runtime_codelet_zc_h_ */ -- GitLab