diff --git a/compute/mapv.c b/compute/mapv.c index a27cd2d49842a671ec83d7ecf458d231294e4b19..c492aa621921e2a35c983bd0aa80082adbd07287 100644 --- a/compute/mapv.c +++ b/compute/mapv.c @@ -11,7 +11,7 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2024-03-11 + * @date 2024-03-14 * */ #include "control/common.h" @@ -21,35 +21,34 @@ * * @ingroup CHAMELEON_Tile * - * Apply a given operator on each tile of the given matrix. Operates on - * matrices stored by tiles. All matrices are passed through descriptors. All - * dimensions are taken from the descriptors. + * Apply a given operator f on each tile of the given matrices. + * For each (m,n) \in [1,MT]x[1xNT], apply f( data[0](m, n), data[1](m, n), ... ) * ******************************************************************************* * - * @param[in] access - * - ChamR: A is accessed in read-only mode. - * - ChamW: A is accessed in write-only mode. - * WARNING: if the descriptor is set for allocation on the fly, the - * flush call included in this synchronous API will free all allocated - * data, prefer asynchronous call if you want to initialiaze data - * before submitting another algorithm. - * - ChamRW: A is accessed in read-write mode. - * * @param[in] uplo - * - ChamUpper: Only the upper triangular part of the matrix is touched - * - ChamLower: Only the lower triangular part of the matrix is touched - * - ChamUpperLower: The entire the matrix is touched + * - ChamUpper: only the upper part of the matrices are referenced. + * - ChamLower: only the lower part of the matrices are referenced. + * - ChamUpperLower: the full matrices are references. + * + * @param[in] ndata + * - the numbe of matrices given to the map function. + * - ChamLower: only the lower part of the matrices are referenced. + * - ChamUpperLower: the full matrices are references. * - * @param[in,out] A - * On exit, the operator has been applied on each tile of the matrix A. + * @param[in] data + * Array of size ndata that contains the ndata couples { access, + * descriptor } used in the operator. * * @param[in] op_fct - * The operator function to apply on each tile of the matrix. + * The operator function to apply on each tile of the matrices. Must + * support the number of data ndata given as parameter. * * @param[in,out] op_args * The arguments structure passed to the operator function when applied - * on each tile. May be updated by the operator function. + * on each tile. May be updated by the operator function. If concurrent + * accesses must be protected, it is let to the user to do it in the + * op_fct. * ******************************************************************************* * @@ -93,20 +92,38 @@ int CHAMELEON_mapv_Tile( cham_uplo_t uplo, /** ******************************************************************************** * - * @ingroup CHAMELEON_Tile_Async + * @ingroup CHAMELEON_Tile * - * Apply a given operator on each tile of the given matrix. Non-blocking equivalent of - * CHAMELEON_mapv_Tile(). May return before the computation is finished. - * Allows for pipelining of operations at runtime. + * Apply a given operator f on each tile of the given matrices. + * For each (m,n) \in [1,MT]x[1xNT], apply f( data[0](m, n), data[1](m, n), ... ) + * Non-blocking equivalent of CHAMELEON_map_Tile(). May return before the + * computation is finished. Allows for pipelining of operations at runtime. * ******************************************************************************* * - * @param[in] access - * - ChamR: A is accessed in read-only mode. - * - ChamW: A is accessed in write-only mode. - * INFO: tile of A can be unallocated before the call if the - * descriptor is set for allocation on the fly. - * - ChamRW: A is accessed in read-write mode. + * @param[in] uplo + * - ChamUpper: only the upper part of the matrices are referenced. + * - ChamLower: only the lower part of the matrices are referenced. + * - ChamUpperLower: the full matrices are references. + * + * @param[in] ndata + * - the numbe of matrices given to the map function. + * - ChamLower: only the lower part of the matrices are referenced. + * - ChamUpperLower: the full matrices are references. + * + * @param[in] data + * Array of size ndata that contains the ndata couples { access, + * descriptor } used in the operator. + * + * @param[in] op_fct + * The operator function to apply on each tile of the matrices. Must + * support the number of data ndata given as parameter. + * + * @param[in,out] op_args + * The arguments structure passed to the operator function when applied + * on each tile. May be updated by the operator function. If concurrent + * accesses must be protected, it is let to the user to do it in the + * op_fct. * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to diff --git a/compute/pmap.c b/compute/pmap.c index bcc1a868e82fb6c7defec63d90ffcbdbed311e58..ef30f09de23cf92e71d20b6ae86ac69cc83da819 100644 --- a/compute/pmap.c +++ b/compute/pmap.c @@ -11,7 +11,7 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2024-03-11 + * @date 2024-03-14 * */ #include "control/common.h" @@ -28,31 +28,34 @@ void chameleon_pmap( cham_uplo_t uplo, int ndata, cham_map_data_t *data, CHAM_context_t *chamctxt; RUNTIME_option_t options; const CHAM_desc_t *A = data[0].desc; - int m, n; + int m, n, minmn; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) return; RUNTIME_options_init( &options, chamctxt, sequence, request ); + minmn = chameleon_min( A->mt, A->nt ); + switch( uplo ) { case ChamUpper: - for (n = 0; n < A->nt; n++) { - for (m = 0; m < n; m++) { + for (m = 0; m < minmn; m++) { + INSERT_TASK_map( + &options, uplo, m, m, + ndata, data, + op_fct, op_args ); + + for (n = m+1; n < A->nt; n++) { INSERT_TASK_map( &options, ChamUpperLower, m, n, ndata, data, op_fct, op_args ); } - INSERT_TASK_map( - &options, uplo, n, n, - ndata, data, - op_fct, op_args ); } break; case ChamLower: - for (n = 0; n < A->nt; n++) { + for (n = 0; n < minmn; n++){ INSERT_TASK_map( &options, uplo, n, n, ndata, data, diff --git a/compute/pzlatms.c b/compute/pzlatms.c index 8e6ba0f5fc1b85a612ff835b351332d3fe831c44..e8daf897e08937862fe4a08563ab050209984794 100644 --- a/compute/pzlatms.c +++ b/compute/pzlatms.c @@ -13,7 +13,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Lionel Eyraud-Dubois - * @date 2024-03-11 + * @date 2024-03-14 * @precisions normal z -> s d c * */ @@ -26,39 +26,26 @@ #define A(m, n) A, m, n -/* - * Static variable to know how to handle the data within the kernel - * This assumes that only one runtime is enabled at a time. - */ -static RUNTIME_id_t zlatms_runtime_id = RUNTIME_SCHED_STARPU; - static inline int zlaset_diag_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int ndata, const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) { - CHAMELEON_Complex64_t *A; - const double *D = (const double *)op_args; + const double *D = (const double *)op_args; + CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA ); int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; - int minmn = chameleon_min( tempmm, tempnn ); - int lda, i; + int minmn = chameleon_min( tempmm, tempnn ); + int lda = tileA->ld; + int i; if ( ndata > 1 ) { fprintf( stderr, "zlaset_diag_cpu: supports only one piece of data and %d have been given\n", ndata ); } - if ( zlatms_runtime_id == RUNTIME_SCHED_PARSEC ) { - A = (CHAMELEON_Complex64_t*)tileA; - lda = descA->get_blkldd( descA, m ); - } - else { - A = tileA->mat; - lda = tileA->ld; - } - assert( m == n ); + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); /* Shift to the values corresponding to the tile */ D += m * descA->mb; @@ -104,7 +91,6 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym return; } ib = CHAMELEON_IB; - zlatms_runtime_id = chamctxt->scheduler; RUNTIME_options_init(&options, chamctxt, sequence, request); diff --git a/compute/zbuild.c b/compute/zbuild.c index 6a56eaadbe738e78a4498d7344b5cf1bc67f502e..f36980ecc536d4cd24614b168d7b04c91651a053 100644 --- a/compute/zbuild.c +++ b/compute/zbuild.c @@ -128,7 +128,7 @@ int CHAMELEON_zbuild( cham_uplo_t uplo, int M, int N, A, NB, NB, LDA, N, M, N, sequence, &request ); /* Call the tile interface */ - CHAMELEON_zbuild_Tile_Async( uplo, &descAt, user_data, user_build_callback, sequence, &request ); + chameleon_pzbuild( uplo, &descAt, user_data, user_build_callback, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, @@ -201,7 +201,7 @@ int CHAMELEON_zbuild_Tile( cham_uplo_t uplo, CHAM_desc_t *A, } chameleon_sequence_create( chamctxt, &sequence ); - CHAMELEON_zbuild_Tile_Async( uplo, A, user_data, user_build_callback, sequence, &request ); + chameleon_pzbuild( uplo, A, user_data, user_build_callback, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); diff --git a/compute/zprint.c b/compute/zprint.c index cc2be40769abf806dd9012e92b705fb297ab9ca7..e17a406a1b5adcdf80ff90e2d49a87ec62c104eb 100644 --- a/compute/zprint.c +++ b/compute/zprint.c @@ -12,7 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2024-03-11 + * @date 2024-03-14 * @precisions normal z -> s d c * */ @@ -21,12 +21,6 @@ #include <coreblas/coreblas_z.h> #endif -/* - * Static variable to know how to handle the data within the kernel - * This assumes that only one runtime is enabled at a time. - */ -static RUNTIME_id_t zprint_runtime_id = RUNTIME_SCHED_STARPU; - struct zprint_args_s { FILE *file; const char *header; @@ -37,25 +31,17 @@ zprint_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int ndata, const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) { - CHAMELEON_Complex64_t *A; struct zprint_args_s *options = (struct zprint_args_s *)op_args; + CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA ); int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; - int lda; + int lda = tileA->ld; if ( ndata > 1 ) { fprintf( stderr, "zprint_cpu: supports only one piece of data and %d have been given\n", ndata ); } - - if ( zprint_runtime_id == RUNTIME_SCHED_PARSEC ) { - A = (CHAMELEON_Complex64_t*)tileA; - lda = descA->get_blkldd( descA, m ); - } - else { - A = CHAM_tile_get_ptr( tileA ); - lda = tileA->ld; - } + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); #if !defined(CHAMELEON_SIMULATION) CORE_zprint( options->file, options->header, uplo, @@ -162,8 +148,6 @@ int CHAMELEON_zprint( FILE *file, const char *header, A, NB, NB, LDA, N, M, N, sequence, &request ); /* Call the tile interface */ - zprint_runtime_id = chamctxt->scheduler; - data.access = ChamR; data.desc = &descAt; chameleon_pmap( uplo, 1, &data, &zprint_map, &options, sequence, &request ); @@ -230,8 +214,6 @@ int CHAMELEON_zprint_Tile( FILE *file, const char *header, } chameleon_sequence_create( chamctxt, &sequence ); - zprint_runtime_id = chamctxt->scheduler; - data.access = ChamR; data.desc = A; diff --git a/coreblas/include/coreblas.h b/coreblas/include/coreblas.h index ee6cda933cc791aa70bf7308c21c0262c2cfbb38..cc889a9ee28a46915d4deb115407d568c2616cba 100644 --- a/coreblas/include/coreblas.h +++ b/coreblas/include/coreblas.h @@ -18,7 +18,7 @@ * @author Guillaume Sylvand * @author Mathieu Faverge * @author Raphael Boucherie - * @date 2023-08-31 + * @date 2024-03-14 * */ #ifndef _coreblas_h_ @@ -51,6 +51,13 @@ BEGIN_C_DECLS #include "coreblas/coreblas_zc.h" #include "coreblas/coreblas_ds.h" +#include "coreblas/coreblas_ztile.h" +#include "coreblas/coreblas_dtile.h" +#include "coreblas/coreblas_ctile.h" +#include "coreblas/coreblas_stile.h" +#include "coreblas/coreblas_zctile.h" +#include "coreblas/coreblas_dstile.h" + END_C_DECLS /** diff --git a/example/lapack_to_chameleon/step7.c b/example/lapack_to_chameleon/step7.c index 7390979f401662f70f45d0c9eb7dcfae5bd459a7..8802ee7e74dfcdd147e67e47070b0f253fa8981e 100644 --- a/example/lapack_to_chameleon/step7.c +++ b/example/lapack_to_chameleon/step7.c @@ -11,12 +11,12 @@ * * @brief Chameleon step7 example * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Guillaume Sylvand * @author Mathieu Faverge * @author Philippe Virouleau - * @date 2022-02-22 + * @date 2024-03-14 * */ #include "step7.h" @@ -135,12 +135,32 @@ int main(int argc, char *argv[]) { /* generate A matrix with random values such that it is spd. We use the callback function Cham_build_callback_plgsy() defined in step7.h In this example, it is just a wrapper toward CORE_dplgsy() */ - struct data_pl data_A={(double)N, 51, N}; - CHAMELEON_dbuild_Tile(ChamUpperLower, descA, (void*)&data_A, Cham_build_callback_plgsy); + struct data_pl plgsy_args = { (double)N, 51 }; + struct cham_map_operator_s plgsy_op = { + .name = "plgsy", + .cpufunc = Cham_build_plgsy_cpu, + .cudafunc = NULL, + .hipfunc = NULL, + }; + struct cham_map_data_s plgsy_data = { + .access = ChamW, + .desc = descA, + }; + CHAMELEON_mapv_Tile( ChamUpperLower, 1, &plgsy_data, &plgsy_op, &plgsy_args ); /* generate RHS with the callback Cham_build_callback_plrnt() */ - struct data_pl data_B={0., 5673, N}; - CHAMELEON_dbuild_Tile(ChamUpperLower, descB, (void*)&data_B, Cham_build_callback_plrnt); + struct data_pl plrnt_args = { 0., 5673 }; + struct cham_map_operator_s plrnt_op = { + .name = "plrnt", + .cpufunc = Cham_build_plrnt_cpu, + .cudafunc = NULL, + .hipfunc = NULL, + }; + struct cham_map_data_s plrnt_data = { + .access = ChamW, + .desc = descA, + }; + CHAMELEON_mapv_Tile( ChamUpperLower, 1, &plrnt_data, &plrnt_op, &plrnt_args ); /* copy A before facto. in order to check the result */ CHAMELEON_dlacpy_Tile(ChamUpperLower, descA, descAC); diff --git a/example/lapack_to_chameleon/step7.h b/example/lapack_to_chameleon/step7.h index 5026f8750456e486f12846eb83a92589987ea47f..33d74144209dd9b3cdbfc06fd14ffc1d65c456fc 100644 --- a/example/lapack_to_chameleon/step7.h +++ b/example/lapack_to_chameleon/step7.h @@ -11,11 +11,11 @@ * * @brief Chameleon step7 example header * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Guillaume Sylvand * @author Mathieu Faverge - * @date 2022-02-22 + * @date 2024-03-14 * */ #ifndef _step7_h_ @@ -74,19 +74,42 @@ static void init_iparam(int iparam[IPARAM_SIZEOF]){ * and store it at the adresse 'buffer' with leading dimension 'ld' */ struct data_pl { - double bump; + double bump; unsigned long long int seed; - int bigM; }; -static void Cham_build_callback_plgsy(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) { - struct data_pl *data=(struct data_pl *)user_data; - CORE_dplgsy(data->bump, row_max-row_min+1, col_max-col_min+1, buffer, ld, data->bigM, row_min, col_min, data->seed); +static int Cham_build_plgsy_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) +{ + struct data_pl *data = (struct data_pl *)op_args; + int tempmm, tempnn; + + /* Get the dimension of the tile */ + tempmm = (m == (descA->mt-1)) ? (descA->m - m * descA->mb) : descA->mb; + tempnn = (n == (descA->nt-1)) ? (descA->n - n * descA->nb) : descA->nb; + + TCORE_dplgsy( data->bump, tempmm, tempnn, tileA, + descA->m, m * descA->mb, n * descA->nb, data->seed ); + + (void)uplo; + return 0; } -static void Cham_build_callback_plrnt(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) { - struct data_pl *data=(struct data_pl *)user_data; - CORE_dplrnt(row_max-row_min+1, col_max-col_min+1, buffer, ld, data->bigM, row_min, col_min, data->seed); +static int Cham_build_plrnt_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *descA, CHAM_tile_t *tileA, ... ) +{ + struct data_pl *data = (struct data_pl *)op_args; + int tempmm, tempnn; + + /* Get the dimension of the tile */ + tempmm = (m == (descA->mt-1)) ? (descA->m - m * descA->mb) : descA->mb; + tempnn = (n == (descA->nt-1)) ? (descA->n - n * descA->nb) : descA->nb; + + TCORE_dplrnt( tempmm, tempnn, tileA, + descA->m, m * descA->mb, n * descA->nb, data->seed ); + + (void)uplo; + return 0; } /** diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 6fbf8a1584e6ec9a0ba071d0c852f213cf244335..166feeef619018307a7f0873d0dc3ccf1b4983b4 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -23,7 +23,7 @@ * @author Florent Pruvost * @author Alycia Lisito * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2024-03-14 * @precisions normal z -> c d s * */ @@ -374,9 +374,9 @@ int CHAMELEON_zDesc2Lap( cham_uplo_t uplo, CHAM_desc_t *A, CHAMELEON_Complex64_t /** * User Builder function prototypes */ -int CHAMELEON_zbuild(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, int LDA, void *user_data, void* user_build_callback); -int CHAMELEON_zbuild_Tile(cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback ); -int CHAMELEON_zbuild_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zbuild(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, int LDA, void *user_data, void* user_build_callback) __attribute__((deprecated("Please refer to CHAMELEON_mapv_Tile() instead"))); +int CHAMELEON_zbuild_Tile(cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback ) __attribute__((deprecated("Please refer to CHAMELEON_mapv_Tile() instead"))); +int CHAMELEON_zbuild_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) __attribute__((deprecated("Please refer to CHAMELEON_mapv_Tile_Async() instead"))); /** * Centered-Scaled function prototypes diff --git a/runtime/parsec/codelets/codelet_map.c b/runtime/parsec/codelets/codelet_map.c index 12a701bf379b41e5e9e42a61b4df3153c4b3cf60..d14ee9996087df3d6ee4357e649f9e93c832a0a4 100644 --- a/runtime/parsec/codelets/codelet_map.c +++ b/runtime/parsec/codelets/codelet_map.c @@ -11,7 +11,7 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2024-03-11 + * @date 2024-03-14 * */ #include "chameleon_parsec.h" @@ -30,11 +30,21 @@ CORE_map_one_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { struct parsec_map_args_s *pargs = NULL; - CHAM_tile_t *tileA; + const CHAM_desc_t *descA; + CHAM_tile_t tileA; + + parsec_dtd_unpack_args( this_task, &pargs, &(tileA.mat) ); + + descA = pargs->desc[0]; + tileA.rank = 0; + tileA.m = (pargs->m == (descA->mt-1)) ? (descA->m - pargs->m * descA->mb) : descA->mb; + tileA.n = (pargs->n == (descA->nt-1)) ? (descA->n - pargs->n * descA->nb) : descA->nb; + tileA.ld = descA->get_blkldd( descA, pargs->m ); + tileA.format = CHAMELEON_TILE_FULLRANK; + tileA.flttype = descA->dtyp; - parsec_dtd_unpack_args( this_task, &pargs, &tileA ); pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 1, - pargs->desc[0], tileA ); + descA, &tileA ); free( pargs ); } @@ -44,12 +54,29 @@ CORE_map_two_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { struct parsec_map_args_s *pargs = NULL; - CHAM_tile_t *tileA; - CHAM_tile_t *tileB; + const CHAM_desc_t *descA, *descB; + CHAM_tile_t tileA, tileB; + + parsec_dtd_unpack_args( this_task, &pargs, &(tileA.mat), &(tileB.mat) ); + + descA = pargs->desc[0]; + tileA.rank = 0; + tileA.m = (pargs->m == (descA->mt-1)) ? (descA->m - pargs->m * descA->mb) : descA->mb; + tileA.n = (pargs->n == (descA->nt-1)) ? (descA->n - pargs->n * descA->nb) : descA->nb; + tileA.ld = descA->get_blkldd( descA, pargs->m ); + tileA.format = CHAMELEON_TILE_FULLRANK; + tileA.flttype = descA->dtyp; + + descB = pargs->desc[1]; + tileB.rank = 0; + tileB.m = (pargs->m == (descB->mt-1)) ? (descB->m - pargs->m * descB->mb) : descB->mb; + tileB.n = (pargs->n == (descB->nt-1)) ? (descB->n - pargs->n * descB->nb) : descB->nb; + tileB.ld = descB->get_blkldd( descB, pargs->m ); + tileB.format = CHAMELEON_TILE_FULLRANK; + tileB.flttype = descB->dtyp; - parsec_dtd_unpack_args( this_task, &pargs, &tileA, &tileB ); pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 2, - pargs->desc[0], tileA, pargs->desc[1], tileB ); + descA, &tileA, descB, &tileB ); free( pargs ); } @@ -59,14 +86,37 @@ CORE_map_three_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { struct parsec_map_args_s *pargs = NULL; - CHAM_tile_t *tileA; - CHAM_tile_t *tileB; - CHAM_tile_t *tileC; + const CHAM_desc_t *descA, *descB, *descC; + CHAM_tile_t tileA, tileB, tileC; + + parsec_dtd_unpack_args( this_task, &pargs, &(tileA.mat), &(tileB.mat), &(tileC.mat) ); + + descA = pargs->desc[0]; + tileA.rank = 0; + tileA.m = (pargs->m == (descA->mt-1)) ? (descA->m - pargs->m * descA->mb) : descA->mb; + tileA.n = (pargs->n == (descA->nt-1)) ? (descA->n - pargs->n * descA->nb) : descA->nb; + tileA.ld = descA->get_blkldd( descA, pargs->m ); + tileA.format = CHAMELEON_TILE_FULLRANK; + tileA.flttype = descA->dtyp; + + descB = pargs->desc[1]; + tileB.rank = 0; + tileB.m = (pargs->m == (descB->mt-1)) ? (descB->m - pargs->m * descB->mb) : descB->mb; + tileB.n = (pargs->n == (descB->nt-1)) ? (descB->n - pargs->n * descB->nb) : descB->nb; + tileB.ld = descB->get_blkldd( descB, pargs->m ); + tileB.format = CHAMELEON_TILE_FULLRANK; + tileB.flttype = descB->dtyp; + + descC = pargs->desc[2]; + tileC.rank = 0; + tileC.m = (pargs->m == (descC->mt-1)) ? (descC->m - pargs->m * descC->mb) : descC->mb; + tileC.n = (pargs->n == (descC->nt-1)) ? (descC->n - pargs->n * descC->nb) : descC->nb; + tileC.ld = descC->get_blkldd( descC, pargs->m ); + tileC.format = CHAMELEON_TILE_FULLRANK; + tileC.flttype = descC->dtyp; - parsec_dtd_unpack_args( this_task, &pargs, &tileA, &tileB, &tileC ); pargs->op_fcts->cpufunc( pargs->op_args, pargs->uplo, pargs->m, pargs->n, 3, - pargs->desc[0], tileA, pargs->desc[1], tileB, - pargs->desc[2], tileC ); + descA, &tileA, descB, &tileB, descC, &tileC ); free( pargs ); }