From 0cc694735ddcd0645e4c4a2c23dd2ee3a5d2934d Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Tue, 12 Mar 2024 14:40:46 +0100 Subject: [PATCH] parsec/map: Fix the map codelet to always use a tile interface and do not enforce the user to check for the runtime used in the map functions. --- compute/pzlatms.c | 28 +++------- compute/zprint.c | 26 ++------- runtime/parsec/codelets/codelet_map.c | 78 ++++++++++++++++++++++----- 3 files changed, 75 insertions(+), 57 deletions(-) diff --git a/compute/pzlatms.c b/compute/pzlatms.c index 8e6ba0f5f..e8daf897e 100644 --- a/compute/pzlatms.c +++ b/compute/pzlatms.c @@ -13,7 +13,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Lionel Eyraud-Dubois - * @date 2024-03-11 + * @date 2024-03-14 * @precisions normal z -> s d c * */ @@ -26,39 +26,26 @@ #define A(m, n) A, m, n -/* - * Static variable to know how to handle the data within the kernel - * This assumes that only one runtime is enabled at a time. - */ -static RUNTIME_id_t zlatms_runtime_id = RUNTIME_SCHED_STARPU; - static inline int zlaset_diag_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int ndata, const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) { - CHAMELEON_Complex64_t *A; - const double *D = (const double *)op_args; + const double *D = (const double *)op_args; + CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA ); int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; - int minmn = chameleon_min( tempmm, tempnn ); - int lda, i; + int minmn = chameleon_min( tempmm, tempnn ); + int lda = tileA->ld; + int i; if ( ndata > 1 ) { fprintf( stderr, "zlaset_diag_cpu: supports only one piece of data and %d have been given\n", ndata ); } - if ( zlatms_runtime_id == RUNTIME_SCHED_PARSEC ) { - A = (CHAMELEON_Complex64_t*)tileA; - lda = descA->get_blkldd( descA, m ); - } - else { - A = tileA->mat; - lda = tileA->ld; - } - assert( m == n ); + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); /* Shift to the values corresponding to the tile */ D += m * descA->mb; @@ -104,7 +91,6 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym return; } ib = CHAMELEON_IB; - zlatms_runtime_id = chamctxt->scheduler; RUNTIME_options_init(&options, chamctxt, sequence, request); diff --git a/compute/zprint.c b/compute/zprint.c index cc2be4076..e17a406a1 100644 --- a/compute/zprint.c +++ b/compute/zprint.c @@ -12,7 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2024-03-11 + * @date 2024-03-14 * @precisions normal z -> s d c * */ @@ -21,12 +21,6 @@ #include <coreblas/coreblas_z.h> #endif -/* - * Static variable to know how to handle the data within the kernel - * This assumes that only one runtime is enabled at a time. - */ -static RUNTIME_id_t zprint_runtime_id = RUNTIME_SCHED_STARPU; - struct zprint_args_s { FILE *file; const char *header; @@ -37,25 +31,17 @@ zprint_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int ndata, const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) { - CHAMELEON_Complex64_t *A; struct zprint_args_s *options = (struct zprint_args_s *)op_args; + CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA ); int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; - int lda; + int lda = tileA->ld; if ( ndata > 1 ) { fprintf( stderr, "zprint_cpu: supports only one piece of data and %d have been given\n", ndata ); } - - if ( zprint_runtime_id == RUNTIME_SCHED_PARSEC ) { - A = (CHAMELEON_Complex64_t*)tileA; - lda = descA->get_blkldd( descA, m ); - } - else { - A = CHAM_tile_get_ptr( tileA ); - lda = tileA->ld; - } + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); #if !defined(CHAMELEON_SIMULATION) CORE_zprint( options->file, options->header, uplo, @@ -162,8 +148,6 @@ int CHAMELEON_zprint( FILE *file, const char *header, A, NB, NB, LDA, N, M, N, sequence, &request ); /* Call the tile interface */ - zprint_runtime_id = chamctxt->scheduler; - data.access = ChamR; data.desc = &descAt; chameleon_pmap( uplo, 1, &data, &zprint_map, &options, sequence, &request ); @@ -230,8 +214,6 @@ int CHAMELEON_zprint_Tile( FILE *file, const char *header, } chameleon_sequence_create( chamctxt, &sequence ); - zprint_runtime_id = chamctxt->scheduler; - data.access = ChamR; data.desc = A; diff --git a/runtime/parsec/codelets/codelet_map.c b/runtime/parsec/codelets/codelet_map.c index 12a701bf3..d14ee9996 100644 --- a/runtime/parsec/codelets/codelet_map.c +++ b/runtime/parsec/codelets/codelet_map.c @@ -11,7 +11,7 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2024-03-11 + * @date 2024-03-14 * */ #include "chameleon_parsec.h" @@ -30,11 +30,21 @@ CORE_map_one_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { struct parsec_map_args_s *pargs = NULL; - CHAM_tile_t *tileA; + const CHAM_desc_t *descA; + CHAM_tile_t tileA; + + parsec_dtd_unpack_args( this_task, &pargs, &(tileA.mat) ); + + descA = pargs->desc[0]; + tileA.rank = 0; + tileA.m = (pargs->m == (descA->mt-1)) ? (descA->m - pargs->m * descA->mb) : descA->mb; + tileA.n = (pargs->n == (descA->nt-1)) ? (descA->n - pargs->n * descA->nb) : descA->nb; + tileA.ld = descA->get_blkldd( descA, pargs->m ); + tileA.format = CHAMELEON_TILE_FULLRANK; + tileA.flttype = descA->dtyp; - parsec_dtd_unpack_args( this_task, &pargs, &tileA ); pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 1, - pargs->desc[0], tileA ); + descA, &tileA ); free( pargs ); } @@ -44,12 +54,29 @@ CORE_map_two_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { struct parsec_map_args_s *pargs = NULL; - CHAM_tile_t *tileA; - CHAM_tile_t *tileB; + const CHAM_desc_t *descA, *descB; + CHAM_tile_t tileA, tileB; + + parsec_dtd_unpack_args( this_task, &pargs, &(tileA.mat), &(tileB.mat) ); + + descA = pargs->desc[0]; + tileA.rank = 0; + tileA.m = (pargs->m == (descA->mt-1)) ? (descA->m - pargs->m * descA->mb) : descA->mb; + tileA.n = (pargs->n == (descA->nt-1)) ? (descA->n - pargs->n * descA->nb) : descA->nb; + tileA.ld = descA->get_blkldd( descA, pargs->m ); + tileA.format = CHAMELEON_TILE_FULLRANK; + tileA.flttype = descA->dtyp; + + descB = pargs->desc[1]; + tileB.rank = 0; + tileB.m = (pargs->m == (descB->mt-1)) ? (descB->m - pargs->m * descB->mb) : descB->mb; + tileB.n = (pargs->n == (descB->nt-1)) ? (descB->n - pargs->n * descB->nb) : descB->nb; + tileB.ld = descB->get_blkldd( descB, pargs->m ); + tileB.format = CHAMELEON_TILE_FULLRANK; + tileB.flttype = descB->dtyp; - parsec_dtd_unpack_args( this_task, &pargs, &tileA, &tileB ); pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 2, - pargs->desc[0], tileA, pargs->desc[1], tileB ); + descA, &tileA, descB, &tileB ); free( pargs ); } @@ -59,14 +86,37 @@ CORE_map_three_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { struct parsec_map_args_s *pargs = NULL; - CHAM_tile_t *tileA; - CHAM_tile_t *tileB; - CHAM_tile_t *tileC; + const CHAM_desc_t *descA, *descB, *descC; + CHAM_tile_t tileA, tileB, tileC; + + parsec_dtd_unpack_args( this_task, &pargs, &(tileA.mat), &(tileB.mat), &(tileC.mat) ); + + descA = pargs->desc[0]; + tileA.rank = 0; + tileA.m = (pargs->m == (descA->mt-1)) ? (descA->m - pargs->m * descA->mb) : descA->mb; + tileA.n = (pargs->n == (descA->nt-1)) ? (descA->n - pargs->n * descA->nb) : descA->nb; + tileA.ld = descA->get_blkldd( descA, pargs->m ); + tileA.format = CHAMELEON_TILE_FULLRANK; + tileA.flttype = descA->dtyp; + + descB = pargs->desc[1]; + tileB.rank = 0; + tileB.m = (pargs->m == (descB->mt-1)) ? (descB->m - pargs->m * descB->mb) : descB->mb; + tileB.n = (pargs->n == (descB->nt-1)) ? (descB->n - pargs->n * descB->nb) : descB->nb; + tileB.ld = descB->get_blkldd( descB, pargs->m ); + tileB.format = CHAMELEON_TILE_FULLRANK; + tileB.flttype = descB->dtyp; + + descC = pargs->desc[2]; + tileC.rank = 0; + tileC.m = (pargs->m == (descC->mt-1)) ? (descC->m - pargs->m * descC->mb) : descC->mb; + tileC.n = (pargs->n == (descC->nt-1)) ? (descC->n - pargs->n * descC->nb) : descC->nb; + tileC.ld = descC->get_blkldd( descC, pargs->m ); + tileC.format = CHAMELEON_TILE_FULLRANK; + tileC.flttype = descC->dtyp; - parsec_dtd_unpack_args( this_task, &pargs, &tileA, &tileB, &tileC ); pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 3, - pargs->desc[0], tileA, pargs->desc[1], tileB, - pargs->desc[2], tileC ); + descA, &tileA, descB, &tileB, descC, &tileC ); free( pargs ); } -- GitLab