diff --git a/compute/map.c b/compute/map.c index 16f10b5ec47f87dacfaaa9a00dc931865640c1ff..78840d6922d6cbaf6f2b6a7a5454b1cea1d7fa1d 100644 --- a/compute/map.c +++ b/compute/map.c @@ -16,6 +16,33 @@ */ #include "control/common.h" +struct map_args_s { + cham_unary_operator_t function; + void *args; +}; + +static inline int +map_cpu( void *op_args, + cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) +{ + struct map_args_s *options = (struct map_args_s *)op_args; + + if ( ndata > 1 ) { + fprintf( stderr, "map_cpu: supports only one piece of data and %d have been given\n", ndata ); + } + options->function( descA, uplo, m, n, tileA, options->args ); + + return 0; +} + +static cham_map_operator_t map_op = { + .name = "map", + .cpufunc = map_cpu, + .cudafunc = NULL, + .hipfunc = NULL, +}; + /** ******************************************************************************** * @@ -78,8 +105,17 @@ int CHAMELEON_map_Tile( cham_access_t access, } chameleon_sequence_create( chamctxt, &sequence ); - CHAMELEON_map_Tile_Async( access, uplo, A, op_fct, op_args, sequence, &request ); - + { + cham_map_data_t data = { + .access = access, + .desc = A + }; + struct map_args_s map_args = { + .function = op_fct, + .args = op_args, + }; + chameleon_pmap( uplo, 1, &data, &map_op, &map_args, sequence, &request ); + } CHAMELEON_Desc_Flush( A, sequence ); chameleon_sequence_wait( chamctxt, sequence ); @@ -164,7 +200,20 @@ int CHAMELEON_map_Tile_Async( cham_access_t access, return CHAMELEON_SUCCESS; } - chameleon_pmap( access, uplo, A, op_fct, op_args, sequence, request ); + { + cham_map_data_t data = { + .access = access, + .desc = A + }; + struct map_args_s map_args = { + .function = op_fct, + .args = op_args, + }; + chameleon_pmap( uplo, 1, &data, &map_op, &map_args, sequence, request ); + /* Need to wait to make sure no one access map_args after this function returned. */ + CHAMELEON_Desc_Flush( A, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + } return CHAMELEON_SUCCESS; } diff --git a/compute/pzlatms.c b/compute/pzlatms.c index 29cc20fe587cdaaa7731b0931e913187297c8357..8e6ba0f5fc1b85a612ff835b351332d3fe831c44 100644 --- a/compute/pzlatms.c +++ b/compute/pzlatms.c @@ -13,7 +13,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Lionel Eyraud-Dubois - * @date 2023-07-05 + * @date 2024-03-11 * @precisions normal z -> s d c * */ @@ -33,9 +33,9 @@ static RUNTIME_id_t zlatms_runtime_id = RUNTIME_SCHED_STARPU; static inline int -zlaset_diag( const CHAM_desc_t *descA, - cham_uplo_t uplo, int m, int n, - CHAM_tile_t *tileA, void *op_args ) +zlaset_diag_cpu( void *op_args, + cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) { CHAMELEON_Complex64_t *A; const double *D = (const double *)op_args; @@ -45,6 +45,10 @@ zlaset_diag( const CHAM_desc_t *descA, int minmn = chameleon_min( tempmm, tempnn ); int lda, i; + if ( ndata > 1 ) { + fprintf( stderr, "zlaset_diag_cpu: supports only one piece of data and %d have been given\n", ndata ); + } + if ( zlatms_runtime_id == RUNTIME_SCHED_PARSEC ) { A = (CHAMELEON_Complex64_t*)tileA; lda = descA->get_blkldd( descA, m ); @@ -68,6 +72,13 @@ zlaset_diag( const CHAM_desc_t *descA, return 0; } +static cham_map_operator_t zlaset_diag_map = { + .name = "zlaset_diag", + .cpufunc = zlaset_diag_cpu, + .cudafunc = NULL, + .hipfunc = NULL, +}; + /** * Parallel scale of a matrix A */ @@ -163,11 +174,16 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym #endif /* Copy D to the diagonal of A */ - for (n = 0; n < kt; n++) { - INSERT_TASK_map( - &options, - ChamRW, ChamUpperLower, A(n, n), - zlaset_diag, D ); + { + cham_map_data_t data = { + .access = ChamRW, + .desc = A, + }; + for (n = 0; n < kt; n++) { + INSERT_TASK_map( + &options, ChamUpperLower, n, n, + 1, &data, &zlaset_diag_map, D ); + } } /** diff --git a/compute/zprint.c b/compute/zprint.c index 53243ff9b41912c49bbc44be76cbc6aa2ef69e50..cc2be40769abf806dd9012e92b705fb297ab9ca7 100644 --- a/compute/zprint.c +++ b/compute/zprint.c @@ -12,7 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2023-07-05 + * @date 2024-03-11 * @precisions normal z -> s d c * */ @@ -33,9 +33,9 @@ struct zprint_args_s { }; static inline int -zprint( const CHAM_desc_t *descA, - cham_uplo_t uplo, int m, int n, - CHAM_tile_t *tileA, void *op_args ) +zprint_cpu( void *op_args, + cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) { CHAMELEON_Complex64_t *A; struct zprint_args_s *options = (struct zprint_args_s *)op_args; @@ -44,6 +44,10 @@ zprint( const CHAM_desc_t *descA, int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; int lda; + if ( ndata > 1 ) { + fprintf( stderr, "zprint_cpu: supports only one piece of data and %d have been given\n", ndata ); + } + if ( zprint_runtime_id == RUNTIME_SCHED_PARSEC ) { A = (CHAMELEON_Complex64_t*)tileA; lda = descA->get_blkldd( descA, m ); @@ -61,6 +65,13 @@ zprint( const CHAM_desc_t *descA, return 0; } +static cham_map_operator_t zprint_map = { + .name = "zprint", + .cpufunc = zprint_cpu, + .cudafunc = NULL, + .hipfunc = NULL, +}; + /** ******************************************************************************** * @@ -99,12 +110,12 @@ int CHAMELEON_zprint( FILE *file, const char *header, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, int LDA ) { - int NB; - int status; - CHAM_context_t *chamctxt; - RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - CHAM_desc_t descAl, descAt; + int NB, status; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + cham_map_data_t data; struct zprint_args_s options = { .file = file, .header = header, @@ -152,7 +163,10 @@ int CHAMELEON_zprint( FILE *file, const char *header, /* Call the tile interface */ zprint_runtime_id = chamctxt->scheduler; - chameleon_pmap( ChamR, uplo, &descAt, zprint, &options, sequence, &request ); + + data.access = ChamR; + data.desc = &descAt; + chameleon_pmap( uplo, 1, &data, &zprint_map, &options, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, @@ -199,9 +213,10 @@ int CHAMELEON_zprint( FILE *file, const char *header, int CHAMELEON_zprint_Tile( FILE *file, const char *header, cham_uplo_t uplo, CHAM_desc_t *A ) { - CHAM_context_t *chamctxt; - RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + cham_map_data_t data; struct zprint_args_s options = { .file = file, .header = header, @@ -216,7 +231,11 @@ int CHAMELEON_zprint_Tile( FILE *file, const char *header, chameleon_sequence_create( chamctxt, &sequence ); zprint_runtime_id = chamctxt->scheduler; - chameleon_pmap( ChamR, uplo, A, zprint, &options, sequence, &request ); + + data.access = ChamR; + data.desc = A; + + chameleon_pmap( uplo, 1, &data, &zprint_map, &options, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); chameleon_sequence_wait( chamctxt, sequence );