diff --git a/CMakeLists.txt b/CMakeLists.txt index 3892d9b94325f20b38d2f8a1b13099786e466d8c..8f89d65c47f19efebacccf92390318f33c9af3bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,8 @@ # @author Alycia Lisito # @author Loris Lucido # @author Nathan Précigout -# @date 2024-03-11 +# @author Abel Calluaud +# @date 2024-07-17 # ### cmake_minimum_required(VERSION 3.5) @@ -260,6 +261,8 @@ if (CHAMELEON_ENABLE_TESTING) message("-- ${BoldGreen}CHAMELEON_ENABLE_TESTING is set to ON, turn it OFF to avoid building testing${ColourReset}") endif() +option(CHAMELEON_DEBUG_GERED "Enable GERED debug" OFF) + # Option to activate or not simulation mode (use Simgrid through StarPU) # ---------------------------------------------------------------------- cmake_dependent_option(CHAMELEON_SIMULATION diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 6dfa83891f9f2ded3dfb3252e5365d375508f3c6..cc25fb7d233c7206ced2d9e5fc59766e1bef25fe 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -27,7 +27,8 @@ # @author Alycia Lisito # @author Loris Lucido # @author Matthieu Kuhn -# @date 2024-04-03 +# @author Ana Hourcau +# @date 2024-07-17 # ### @@ -193,10 +194,12 @@ set(ZSRC ################## # MIXED PRECISION ################## + pzhered.c pzlag2c.c pzgered.c pzgerst.c ### + zhered.c zgered.c zgerst.c #zcgels.c diff --git a/compute/pzgered.c b/compute/pzgered.c index c1624db1ba06c0f43a7d84ea485a089d33965ade..1051ee91f45370bba6cefbdb21d3b17a6862c99f 100644 --- a/compute/pzgered.c +++ b/compute/pzgered.c @@ -13,7 +13,8 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2023-07-06 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> z d * */ @@ -28,8 +29,8 @@ static inline void chameleon_pzgered_frb( cham_uplo_t uplo, - CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, - RUNTIME_option_t *options ) + CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, + RUNTIME_option_t *options ) { double alpha = 1.0; double beta = 0.0; @@ -233,21 +234,17 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, for(n = nmin; n < nmax; n++) { CHAM_tile_t *tile = A->get_blktile( A, m, n ); - if ( tile->rank == A->myrank ) { - int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb; - - /* Get the frobenius norm of the tile A( m, n ) */ - lnorm = ((double*)((Wcol.get_blktile( &Wcol, m, n ))->mat))[0]; - - /* - * u_{high} = 1e-16 (later should be application accuraccy) - * u_{low} = 1e-8 - * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) - * ||A_{i,j}||_F < threshold / u_{low} - */ - INSERT_TASK_zgered( &options, threshold, lnorm, - tempmm, tempnn, A( m, n ) ); - } + + int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb; + + /* + * u_{high} = 1e-16 (later should be application accuracy) + * u_{low} = 1e-8 + * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) + * ||A_{i,j}||_F < threshold / u_{low} + */ + INSERT_TASK_zgered( &options, threshold, + tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); } } diff --git a/compute/pzhered.c b/compute/pzhered.c new file mode 100644 index 0000000000000000000000000000000000000000..97e171b3f11dd6a40ccb44e7d2f85ee41324b62f --- /dev/null +++ b/compute/pzhered.c @@ -0,0 +1,288 @@ +/** + * + * @file pzhered.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhered parallel algorithm + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Ana Hourcau + * @date 2024-07-17 + * @precisions normal z -> z d + * + */ +// ALLOC_WS : A->mb +// ALLOC_WS : A->nb +// WS_ADD : A->mb + A->nb +#include "control/common.h" +#include <coreblas/lapacke.h> + +#define A(m, n) A, (m), (n) +#define W(desc, m, n) (desc), (m), (n) + +static inline void +chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, + CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, + RUNTIME_option_t *options ) +{ + double alpha = 1.0; + double beta = 0.0; + + int m, n; + int MT = A->mt; + int NT = A->nt; + int M = A->m; + int N = A->n; + int P = Welt->p; + int Q = Welt->q; + + /* Initialize workspaces for tile norms */ + for (m = 0; m < Wnorm->mt; m++) + { + for (n = 0; n < NT; n++) + { + INSERT_TASK_dlaset( + options, + ChamUpperLower, Wnorm->mb, Wnorm->nb, + alpha, beta, + W(Wnorm, m, n)); + } + } + + /* Initialize workspaces */ + for (m = 0; m < Welt->mt; m++) + { + for (n = 0; n < Welt->nt; n++) + { + INSERT_TASK_dlaset( + options, + ChamUpperLower, Welt->mb, Welt->nb, + alpha, beta, + W(Welt, m, n)); + } + } + + /** + * Step 1: + * For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) ) + */ + for (m = 0; m < MT; m++) + { + int nmin = (uplo == ChamUpper) ? m : 0; + int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, NT) : NT; + + int tempmm = (m == (MT - 1)) ? M - m * A->mb : A->mb; + + for (n = nmin; n < nmax; n++) + { + int tempnn = (n == (NT - 1)) ? N - n * A->nb : A->nb; + + if (n == m) + { + if ( trans == ChamConjTrans ) { + INSERT_TASK_zhessq( + options, ChamEltwise, uplo, tempmm, + A(m, n), W( Wnorm, m, n) ); + } + else { + INSERT_TASK_zsyssq( + options, ChamEltwise, uplo, tempmm, + A(m, n), W( Wnorm, m, n) ); + } + } + else + { + INSERT_TASK_zgessq( + options, ChamEltwise, tempmm, tempnn, + A(m, n), W( Wnorm, m, n )); + INSERT_TASK_zgessq( + options, ChamEltwise, tempmm, tempnn, + A(m, n), W( Wnorm, n, m )); + } + } + } + + for(m = 0; m < MT; m++) { + for(n = Q; n < NT; n++) { + INSERT_TASK_dplssq( + options, ChamEltwise, 1, 1, W( Wnorm, m, n), W( Welt, m, n%Q) ); + } + + /** + * Step 2: + * For each j, W(m, j) = reduce( W( Welt, m, 0..Q-1) ) + */ + for(n = 1; n < Q; n++) { + INSERT_TASK_dplssq( + options, ChamEltwise, 1, 1, W( Welt, m, n), W( Welt, m, 0) ); + } + } + + /** + * Step 3: + * For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) ) + */ + for(m = P; m < MT; m++) { + INSERT_TASK_dplssq( + options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, m%P, 0) ); + } + + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + */ + for(m = 1; m < P; m++) { + INSERT_TASK_dplssq( + options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, 0, 0) ); + } + + /* Compute the norm of each tile, and the full norm */ + for (m = 0; m < MT; m++) + { + int nmin = (uplo == ChamUpper) ? m : 0; + int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, NT) : NT; + + for (n = nmin; n < nmax; n++) + { + /* Compute the final norm of the tile */ + INSERT_TASK_dplssq2( + options, 1, W( Wnorm, m, n ) ); + } + } + INSERT_TASK_dplssq2( + options, 1, W( Welt, 0, 0) ); + + /** + * Broadcast the result + */ + for (m = 0; m < A->p; m++) + { + for (n = 0; n < A->q; n++) + { + if ((m != 0) || (n != 0)) + { + INSERT_TASK_dlacpy( + options, + ChamUpperLower, 1, 1, + W(Welt, 0, 0), W(Welt, m, n)); + } + } + } +} + +/** + * + */ +void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + CHAM_desc_t Wcol; + CHAM_desc_t Welt; + double gnorm, lnorm, threshold, eps; + + int workmt, worknt; + int m, n; + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) + { + return; + } + RUNTIME_options_init(&options, chamctxt, sequence, request); + + workmt = chameleon_max(A->mt, A->p); + worknt = chameleon_max(A->nt, A->q); + + RUNTIME_options_ws_alloc(&options, 1, 0); + + /* Matrix to store the norm of each element */ + chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, + NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg); + + /* Matrix to compute the global frobenius norm */ + chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q, + NULL, NULL, NULL, NULL); + + chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options ); + + CHAMELEON_Desc_Flush(&Wcol, sequence); + CHAMELEON_Desc_Flush(&Welt, sequence); + CHAMELEON_Desc_Flush(A, sequence); + + RUNTIME_sequence_wait(chamctxt, sequence); + + gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / A->q, A->myrank % A->q)); + chameleon_desc_destroy(&Welt); + + /** + * Reduce the precision of the tiles if possible + */ + if (prec < 0.) + { +#if !defined(CHAMELEON_SIMULATION) + eps = LAPACKE_dlamch_work('e'); +#else +#if defined(PRECISION_z) || defined(PRECISION_d) + eps = 1.e-15; +#else + eps = 1.e-7; +#endif +#endif + } + else + { + eps = prec; + } + threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt)); + +#if defined(CHAMELEON_DEBUG_GERED) + fprintf(stderr, + "[%2d] The norm of A is: %e\n" + "[%2d] The requested precision is: %e\n" + "[%2d] The computed threshold is: %e\n", + A->myrank, gnorm, + A->myrank, eps, + A->myrank, threshold); +#endif + for (m = 0; m < A->mt; m++) + { + int tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + int nmin = (uplo == ChamUpper) ? m : 0; + int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, A->nt) : A->nt; + + for (n = nmin; n < nmax; n++) + { + CHAM_tile_t *tile = A->get_blktile(A, m, n); + + int tempnn = (n == (A->nt - 1)) ? A->n - n * A->nb : A->nb; + + /* + * u_{high} = 1e-16 (later should be application accuracy) + * u_{low} = 1e-8 + * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) + * ||A_{i,j}||_F < threshold / u_{low} + */ + + INSERT_TASK_zgered( &options, threshold, + tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); + } + } + + CHAMELEON_Desc_Flush(A, sequence); + RUNTIME_sequence_wait(chamctxt, sequence); + + chameleon_desc_destroy(&Wcol); + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, chamctxt); +} diff --git a/compute/zgered.c b/compute/zgered.c index f3783ad3446449be36eb5e0b9f0015ef2a249c60..a58e8f145fa459861008e8600852e9c06878c40f 100644 --- a/compute/zgered.c +++ b/compute/zgered.c @@ -13,7 +13,7 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2023-07-06 + * @date 2024-07-17 * @precisions normal z -> z d * */ @@ -166,6 +166,15 @@ int CHAMELEON_zgered_Tile_Async( cham_uplo_t uplo, double precision, CHAM_desc_t return CHAMELEON_SUCCESS; } + if ( precision < 0. ) { + char *algostr = chameleon_getenv( "CHAMELEON_GERED_ACC" ); + if ( algostr == NULL ) { + precision = 1e-12; + } + else { + precision = strtod( algostr, NULL ); + } + } chameleon_pzgered( uplo, precision, A, sequence, request ); return CHAMELEON_SUCCESS; diff --git a/compute/zhered.c b/compute/zhered.c new file mode 100644 index 0000000000000000000000000000000000000000..32e5c81570689af3e95b69bf65d5129195d76b1d --- /dev/null +++ b/compute/zhered.c @@ -0,0 +1,182 @@ +/** + * + * @file zhered.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhered wrappers + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Ana Hourcau + * @date 2024-07-17 + * @precisions normal z -> z d + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * @brief Computes the Cholesky factorization of a symmetric positive definite + * or Hermitian positive definite matrix with mixed precision. + * + * This is the synchronous version of CHAMELEON_zheredinit_Tile_Async(). It + * operates on matrices stored by tiles with tiles of potentially different + * precisions. All matrices are passed through descriptors. All dimensions are + * taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] uplo + * = ChamUpper: Upper triangle of A is stored; + * = ChamLower: Lower triangle of A is stored. + * + * @param[in] A + * On entry, the symmetric positive definite (or Hermitian) matrix A. + * If uplo = ChamUpper, the leading N-by-N upper triangular part of A + * contains the upper triangular part of the matrix A, and the strictly lower triangular + * part of A is not referenced. + * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower + * triangular part of the matrix A, and the strictly upper triangular part of A is not + * referenced. + * On exit, if return value = 0, the factor U or L from the Cholesky factorization + * A = U^H*U or A = L*L^H. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not positive definite, so the + * factorization could not be completed, and the solution has not been computed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zhered + * @sa CHAMELEON_zhered_Tile_Async + * @sa CHAMELEON_cpotrfmp_Tile + * @sa CHAMELEON_dpotrfmp_Tile + * @sa CHAMELEON_spotrfmp_Tile + * @sa CHAMELEON_zpotrs_Tile + * + */ +int CHAMELEON_zhered_Tile( cham_uplo_t uplo, double precision, CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zheredinit_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + CHAMELEON_zhered_Tile_Async( uplo, precision, A, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * @brief Computes the Cholesky factorization of a symmetric positive definite + * or Hermitian positive definite matrix with mixed precision. + * + * This is the non-blocking equivalent of CHAMELEON_zhered_Tile(). It + * operates on matrices stored by tiles with tiles of potentially different + * precisions. All matrices are passed through descriptors. All dimensions are + * taken from the descriptors. It may return before the computation is + * finished. This function allows for pipelining operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zhered + * @sa CHAMELEON_zhered_Tile + * @sa CHAMELEON_cpotrfmp_Tile_Async + * @sa CHAMELEON_dpotrfmp_Tile_Async + * @sa CHAMELEON_spotrfmp_Tile_Async + * @sa CHAMELEON_zpotrs_Tile_Async + * + */ +int CHAMELEON_zhered_Tile_Async( cham_uplo_t uplo, double precision, CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zhered_Tile_Async", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + chameleon_fatal_error("CHAMELEON_zhered_Tile_Async", "NULL sequence"); + return CHAMELEON_ERR_UNALLOCATED; + } + if (request == NULL) { + chameleon_fatal_error("CHAMELEON_zhered_Tile_Async", "NULL request"); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == CHAMELEON_SUCCESS) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + } + + /* Check descriptors for correctness */ + if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_zhered_Tile_Async", "invalid descriptor"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + chameleon_error("CHAMELEON_zhered_Tile_Async", "only square tiles supported"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + + /* + * Quick return + */ + if ( chameleon_max( A->m, A->n ) == 0 ) { + return CHAMELEON_SUCCESS; + } + + if ( precision < 0. ) { + char *algostr = chameleon_getenv( "CHAMELEON_GERED_ACC" ); + if ( algostr == NULL ) { + precision = 1e-12; + } + else { + precision = strtod( algostr, NULL ); + } + } + chameleon_pzhered( ChamConjTrans, uplo, precision, A, sequence, request ); + + return CHAMELEON_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index 645018f833f835cccf50ea9f25858d5410e8fce2..088e03140baff5b167727931a6fb9e6b7a1641f0 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -22,7 +22,8 @@ * @author Alycia Lisito * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois - * @date 2023-09-08 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> c d s * */ @@ -81,6 +82,8 @@ int chameleon_zshift(CHAM_context_t *chamctxt, int m, int n, CHAMELEON_Complex64 #if defined(PRECISION_z) || defined(PRECISION_d) void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgerst( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); #endif diff --git a/coreblas/compute/global.c b/coreblas/compute/global.c index 0c0ad0e769c11b6dc759996169b82c9640c00e09..54deb11cfc1915cc1c515a86b950e9bd4d385e16 100644 --- a/coreblas/compute/global.c +++ b/coreblas/compute/global.c @@ -11,14 +11,14 @@ * * @brief Chameleon global coreblas variables and functions * - * @version 1.2.0 + * @version 1.3.0 * @author Jakub Kurzak * @author Piotr Luszczek * @author Florent Pruvost * @author Guillaume Sylvand * @author Mathieu Faverge * @author Alycia Lisito - * @date 2022-02-22 + * @date 2024-07-17 * */ #include "coreblas.h" @@ -58,6 +58,8 @@ void __coreblas_kernel_trace( const char *func, ... ) size += snprintf( output+size, len-size, "%s%s", first ? "" : ", ", tile->name ); + size += snprintf( output+size, len-size, " / %p", + CHAM_tile_get_ptr( tile ) ); first = 0; } va_end( va_list ); diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 5d667cca39e1fe42eb61d29257ac45e38e2f3075..3f33260f4436ec195323874d3c13b9b44d2c62e4 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -23,7 +23,8 @@ * @author Florent Pruvost * @author Alycia Lisito * @author Matthieu Kuhn - * @date 2024-04-03 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> c d s * */ @@ -168,10 +169,9 @@ int CHAMELEON_zplrnk_Tile(int K, CHAM_desc_t *C, unsigned long long int seedA, u int CHAMELEON_zpoinv_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zposv_Tile(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B); int CHAMELEON_zpotrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A); -#if defined(PRECISION_z) || defined(PRECISION_d) int CHAMELEON_zgered_Tile( cham_uplo_t uplo, double prec, CHAM_desc_t *A ); +int CHAMELEON_zhered_Tile( cham_uplo_t uplo, double prec, CHAM_desc_t *A ); int CHAMELEON_zgerst_Tile( cham_uplo_t uplo, CHAM_desc_t *A ); -#endif int CHAMELEON_zsytrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zpotri_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zpotrimm_Tile(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *C); @@ -249,10 +249,9 @@ int CHAMELEON_zplrnk_Tile_Async(int K, CHAM_desc_t *C, unsigned long long int se int CHAMELEON_zpoinv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zposv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zpotrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -#if defined(PRECISION_z) || defined(PRECISION_d) int CHAMELEON_zgered_Tile_Async(cham_uplo_t uplo, double prec, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zhered_Tile_Async(cham_uplo_t uplo, double prec, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgerst_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); -#endif int CHAMELEON_zsytrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zpotri_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zpotrimm_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index 49885a8993781dfd8e0454862f91792c96e21688..c274a28841066c66ba54536d8e29a8e909104fdc 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -11,13 +11,15 @@ * * @brief Chameleon configuration file * - * @version 1.2.0 + * @version 1.3.0 * @author Florent Pruvost * @author Mathieu Faverge * @author Philippe Virouleau * @author Raphael Boucherie * @author Loris Lucido - * @date 2023-01-30 + * @author Abel Calluaud + * @author Alycia Lisito + * @date 2024-07-17 * */ #ifndef CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED @@ -79,6 +81,9 @@ /* chameleon compute */ #cmakedefine CHAMELEON_COPY_DIAG +/* Debug options */ +#cmakedefine CHAMELEON_DEBUG_GERED + /* Define the maximum batch size for kernels using it */ #define CHAMELEON_BATCH_SIZE @CHAMELEON_BATCH_SIZE@ diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index b330ec7d840bb3136f8575e240bed5b8a9bc5847..795ebd2d186f9c1e88a44ab6312d40583b1a4d5d 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -24,7 +24,8 @@ * @author Alycia Lisito * @author Romain Peressoni * @author Matthieu Kuhn - * @date 2023-09-11 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> c d s * */ @@ -79,8 +80,9 @@ void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *T, int Tm, int Tn ); void INSERT_TASK_zgered( const RUNTIME_option_t *options, - double threshold, double Anorm, int m, int n, - const CHAM_desc_t *A, int Am, int An ); + double threshold, int m, int n, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *Wnorm, int Wnm, int Wnn ); void INSERT_TASK_zgerst( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An ); diff --git a/runtime/openmp/codelets/codelet_zgered.c b/runtime/openmp/codelets/codelet_zgered.c index 19e6f9118969c74540a6a729af02f56f47ea47c6..20b0c191205ec8fe333c556943bac9d520ddc5f1 100644 --- a/runtime/openmp/codelets/codelet_zgered.c +++ b/runtime/openmp/codelets/codelet_zgered.c @@ -11,24 +11,28 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2023-07-06 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> d * */ #include "chameleon_openmp.h" void INSERT_TASK_zgered( const RUNTIME_option_t *options, - double threshold, double Anorm, int m, int n, - const CHAM_desc_t *A, int Am, int An ) + double threshold, int m, int n, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *Wnorm, int Wnm, int Wnn ) { fprintf( stderr, "WARNING: gered kernel is not available with OpenMP\n" ); (void)options; (void)threshold; - (void)Anorm; (void)m; (void)n; (void)A; (void)Am; (void)An; + (void)Wnorm; + (void)Wnm; + (void)Wnn; } diff --git a/runtime/parsec/codelets/codelet_zgered.c b/runtime/parsec/codelets/codelet_zgered.c index dcc20888b04936244f2e6ddade9ad3932a3b8413..338a7b5ff34c0f2a3f7b7bab193bd1aa4c049bd5 100644 --- a/runtime/parsec/codelets/codelet_zgered.c +++ b/runtime/parsec/codelets/codelet_zgered.c @@ -11,24 +11,28 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2023-07-06 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> d * */ #include "chameleon_parsec.h" void INSERT_TASK_zgered( const RUNTIME_option_t *options, - double threshold, double Anorm, int m, int n, - const CHAM_desc_t *A, int Am, int An ) + double threshold, int m, int n, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *Wnorm, int Wnm, int Wnn ) { fprintf( stderr, "WARNING: gered kernel is not available with PaRSEC\n" ); (void)options; (void)threshold; - (void)Anorm; (void)m; (void)n; (void)A; (void)Am; (void)An; + (void)Wnorm; + (void)Wnm; + (void)Wnn; } diff --git a/runtime/quark/codelets/codelet_zgered.c b/runtime/quark/codelets/codelet_zgered.c index 773bd7cd94dd1e20f57ef0c0f577a5bc98d68d33..b07695f70e0ebb9a10669965efa2917383f693d4 100644 --- a/runtime/quark/codelets/codelet_zgered.c +++ b/runtime/quark/codelets/codelet_zgered.c @@ -11,24 +11,28 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2023-07-06 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> d * */ #include "chameleon_quark.h" void INSERT_TASK_zgered( const RUNTIME_option_t *options, - double threshold, double Anorm, int m, int n, - const CHAM_desc_t *A, int Am, int An ) + double threshold, int m, int n, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *Wnorm, int Wnm, int Wnn ) { fprintf( stderr, "WARNING: gered kernel is not available with Quark\n" ); (void)options; (void)threshold; - (void)Anorm; (void)m; (void)n; (void)A; (void)Am; (void)An; + (void)Wnorm; + (void)Wnm; + (void)Wnn; } diff --git a/runtime/starpu/codelets/codelet_zgered.c b/runtime/starpu/codelets/codelet_zgered.c index a6f8cab2804921a580f33344eae60fb88d57b744..fe1c4927ef525aa24dd53a6b83f22d3c5e9959f4 100644 --- a/runtime/starpu/codelets/codelet_zgered.c +++ b/runtime/starpu/codelets/codelet_zgered.c @@ -13,7 +13,8 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2023-07-06 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> d * */ @@ -22,24 +23,36 @@ #include "runtime_codelet_zc.h" #include "runtime_codelet_z.h" -//#define CHAMELEON_DEBUG_GERED - void INSERT_TASK_zgered( const RUNTIME_option_t *options, - double threshold, double Anorm, int m, int n, - const CHAM_desc_t *A, int Am, int An ) + double threshold, int m, int n, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *Wnorm, int Wnm, int Wnn ) { CHAM_tile_t *tileA; - double u_low; + double u_low, lnorm; int64_t mm, nn; -#if defined(CHAMELEON_USE_MPI) - int tag; -#endif + int tag = -1; starpu_data_handle_t *handleAin; starpu_data_handle_t handleAout; - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); - CHAMELEON_END_ACCESS_DECLARATION; + /* + * Collect the norm of the tile on all nodes to do the the data conversion + * if owned, and only the new data registration if not owned + */ + { + starpu_data_handle_t handleNorm = RTBLKADDR( Wnorm, ChamDouble, Wnm, Wnn ); + CHAM_tile_t *tileNorm; + +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_get_data_on_all_nodes_detached( options->sequence->comm, handleNorm ); +#endif + starpu_data_acquire( handleNorm, STARPU_R ); + + tileNorm = cti_handle_get( handleNorm ); + lnorm = ((double *)(tileNorm->mat))[0]; + + starpu_data_release( handleNorm ); + } /* Get the Input handle */ mm = Am + (A->i / A->mb); @@ -47,8 +60,6 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, handleAin = A->schedopt; handleAin += ((int64_t)A->lmt) * nn + mm; - assert( *handleAin != NULL ); - /* * Lets convert the tile precision based on the following criteria: * @@ -56,10 +67,14 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, * ||A_{i,j}||_F < u_{high} * || A ||_F / nt * 1/ u_{low} * ||A_{i,j}||_F < threshold / u_{low} */ - tileA = A->get_blktile( A, Am, An ); + #if defined(CHAMELEON_USE_MPI) - tag = starpu_mpi_data_get_tag( *handleAin ); + /* Backup the MPI tag */ + if (A->myrank == tileA->rank) + { + tag = starpu_mpi_data_get_tag( *handleAin ); + } #endif /* defined(CHAMELEON_USE_MPI) */ #if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500) @@ -69,15 +84,16 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, * Check for half precision */ u_low = 1.e-4; - if ( Anorm < (threshold / u_low) ) { + if ( lnorm < (threshold / u_low) ) + { #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert the tile ( %d, %d ) to half precision\n", - A->myrank, Am, An ); + A->myrank, Am, An); #endif starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf ); - rt_starpu_insert_task( + rt_shm_starpu_insert_task( &cl_dlag2h, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), @@ -90,14 +106,22 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, #endif 0); - starpu_data_unregister_submit( *handleAin ); + starpu_data_unregister_no_coherency( *handleAin ); *handleAin = handleAout; tileA->flttype = ChamComplexHalf; -#if defined(CHAMELEON_USE_MPI) starpu_mpi_data_register( handleAout, tag, tileA->rank ); -#endif - return; } + else + { + tileA->flttype = ChamComplexHalf; + if (*handleAin != NULL) + { + starpu_data_unregister_no_coherency(*handleAin); + *handleAin = NULL; + } + } + return; + } #endif #endif @@ -110,33 +134,44 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, #else u_low = 1e-8; #endif - if ( Anorm < (threshold / u_low) ) { + if ( lnorm < (threshold / u_low) ) + { #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert the tile ( %d, %d ) to single precision\n", A->myrank, Am, An ); #endif - starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexFloat ); + if (A->myrank == tileA->rank) + { + starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexFloat ); - rt_starpu_insert_task( - &cl_zlag2c, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, - STARPU_EXECUTE_ON_WORKER, options->workerid, + rt_shm_starpu_insert_task( + &cl_zlag2c, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, + STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlag2c", + STARPU_NAME, "zlag2c", #endif - 0); + 0); - starpu_data_unregister_submit( *handleAin ); - *handleAin = handleAout; - tileA->flttype = ChamComplexFloat; -#if defined(CHAMELEON_USE_MPI) - starpu_mpi_data_register( *handleAin, tag, tileA->rank ); -#endif + starpu_data_unregister_no_coherency( *handleAin ); + *handleAin = handleAout; + tileA->flttype = ChamComplexFloat; + starpu_mpi_data_register( *handleAin, tag, tileA->rank ); + } + else + { + tileA->flttype = ChamComplexFloat; + if (*handleAin != NULL) + { + starpu_data_unregister_no_coherency(*handleAin); + *handleAin = NULL; + } + } return; } } diff --git a/runtime/starpu/codelets/codelet_zgerst.c b/runtime/starpu/codelets/codelet_zgerst.c index 7aca89b00fd3731ee07d46c8fd7ecd236abb6b26..9a5c825f149c171dd2ad14f812d6bab7ed926546 100644 --- a/runtime/starpu/codelets/codelet_zgerst.c +++ b/runtime/starpu/codelets/codelet_zgerst.c @@ -11,7 +11,8 @@ * * @version 1.3.0 * @author Mathieu Faverge - * @date 2023-07-06 + * @author Ana Hourcau + * @date 2024-07-17 * @precisions normal z -> d * */ @@ -20,28 +21,17 @@ #include "runtime_codelet_zc.h" #include "runtime_codelet_z.h" -//#define CHAMELEON_DEBUG_GERST - void INSERT_TASK_zgerst( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An ) { CHAM_tile_t *tileA; int64_t mm, nn; -#if defined(CHAMELEON_USE_MPI) - int tag; -#endif + int tag = -1; starpu_data_handle_t *handleAin; starpu_data_handle_t handleAout; - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_RW(A, Am, An); - CHAMELEON_END_ACCESS_DECLARATION; - tileA = A->get_blktile( A, Am, An ); - if ( tileA->flttype == ChamComplexDouble ) { - return; - } /* Get the Input handle */ mm = Am + (A->i / A->mb); @@ -49,7 +39,36 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, handleAin = A->schedopt; handleAin += ((int64_t)A->lmt) * nn + mm; - assert( *handleAin != NULL ); + if ( tileA->flttype == ChamComplexDouble ) { + starpu_data_handle_t *copy = handleAin; + + /* Remove first copy */ + copy += ((int64_t)A->lmt * (int64_t)A->lnt); + if ( *copy ) { + starpu_data_unregister_no_coherency( *copy ); + *copy = NULL; + } + + /* Remove second copy */ + copy += ((int64_t)A->lmt * (int64_t)A->lnt); + if ( *copy ) { + starpu_data_unregister_no_coherency( *copy ); + *copy = NULL; + } + + return; + } + + if (A->myrank != tileA->rank) + { + tileA->flttype = ChamComplexDouble; + if (*handleAin != NULL) + { + starpu_data_unregister_no_coherency(*handleAin); + *handleAin = NULL; + } + return; + } #if defined(CHAMELEON_USE_MPI) tag = starpu_mpi_data_get_tag( *handleAin ); @@ -64,12 +83,13 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, * Restore from half precision */ case ChamComplexHalf: -#if defined(CHAMELEON_DEBUG_GERST) + assert( options->withcuda ); +#if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert back the tile ( %d, %d ) from half precision\n", A->myrank, Am, An ); #endif - rt_starpu_insert_task( + rt_shm_starpu_insert_task( &cl_hlag2d, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), @@ -86,12 +106,12 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, #endif case ChamComplexFloat: -#if defined(CHAMELEON_DEBUG_GERST) +#if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert back the tile ( %d, %d ) from half precision\n", A->myrank, Am, An ); #endif - rt_starpu_insert_task( + rt_shm_starpu_insert_task( &cl_clag2z, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), @@ -109,10 +129,8 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, fprintf( stderr, "ERROR: Unknonw input datatype" ); } - starpu_data_unregister_submit( *handleAin ); + starpu_data_unregister_no_coherency( *handleAin ); *handleAin = handleAout; tileA->flttype = ChamComplexDouble; -#if defined(CHAMELEON_USE_MPI) starpu_mpi_data_register( handleAout, tag, tileA->rank ); -#endif } diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index cbe083b3e49a4c002fda07a9feeda7db6c360cae..e00b75badbc9a0108f55c4b5a54b6f7160e5b11c 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -20,7 +20,7 @@ * @author Raphael Boucherie * @author Samuel Thibault * @author Loris Lucido - * @date 2024-03-16 + * @date 2024-07-17 * */ #include "chameleon_starpu.h" @@ -432,6 +432,26 @@ void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options, /* Get the correct starpu_handle */ ptrtile += shift; + /* Invalidate copies on write access */ + if ( access & ChamW ) { + starpu_data_handle_t *copy = ptrtile; + assert( fltshift == 0 ); + + /* Remove first copy */ + copy += ((int64_t)A->lmt * (int64_t)A->lnt); + if ( *copy ) { + starpu_data_unregister_no_coherency( *copy ); + *copy = NULL; + } + + /* Remove second copy */ + copy += ((int64_t)A->lmt * (int64_t)A->lnt); + if ( *copy ) { + starpu_data_unregister_no_coherency( *copy ); + *copy = NULL; + } + } + if ( *ptrtile != NULL ) { return (void*)(*ptrtile); } @@ -440,7 +460,7 @@ void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options, int myrank = A->myrank; int owner = A->get_rankof( A, m, n ); - if ( myrank == owner ) { + if ( (myrank == owner) && (shift == 0) ) { if ( (tile->format & CHAMELEON_TILE_HMAT) || (tile->mat != NULL) ) { @@ -476,6 +496,8 @@ void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options, starpu_data_handle_t *totile = ptrtile; fromtile += ((int64_t)A->lmt) * nn + mm; + assert( fromtile != totile ); + assert( tile->flttype != flttype ); if ( *fromtile != NULL ) { insert_task_convert( options, tile->m, tile->n, tile->flttype, *fromtile, flttype, *totile ); } diff --git a/runtime/starpu/include/cham_tile_interface.h b/runtime/starpu/include/cham_tile_interface.h index 8abc48abcabd665bec47975bef768bc21850d8b4..5dc7672d8c90c4127cdf956f0e1bb8d4e718634a 100644 --- a/runtime/starpu/include/cham_tile_interface.h +++ b/runtime/starpu/include/cham_tile_interface.h @@ -9,10 +9,11 @@ * * @brief Header to describe the Chameleon tile interface in StarPU * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Gwenole Lucas - * @date 2022-02-22 + * @author Ana Hourcau + * @date 2024-07-17 * */ #ifndef _cham_tile_interface_h_ @@ -53,6 +54,20 @@ cti_interface_get( starpu_cham_tile_interface_t *interface ) return &(interface->tile); } +static inline CHAM_tile_t * +cti_handle_get( starpu_data_handle_t handle ) +{ + starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) + starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM ); + +#ifdef STARPU_DEBUG + STARPU_ASSERT_MSG( cham_tile_interface->id == STARPU_CHAM_TILE_INTERFACE_ID, + "Error. The given data is not a cham_tile." ); +#endif + + return &(cham_tile_interface->tile); +} + void starpu_cham_tile_interface_init(); void starpu_cham_tile_interface_fini(); diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index b795b4c79454e65ad9e22ca9a37b124bfe6c734a..41949dfbb7c345050a5260b47646276c7af57002 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -20,7 +20,7 @@ * @author Loris Lucido * @author Terry Cojean * @author Matthieu Kuhn - * @date 2024-03-16 + * @date 2024-07-17 * */ #ifndef _chameleon_starpu_h_ @@ -149,6 +149,14 @@ void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options, #endif +#if defined(CHAMELEON_RUNTIME_SYNC) +#define rt_shm_starpu_insert_task( _codelet_, ... ) \ + starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) +#else +#define rt_shm_starpu_insert_task( _codelet_, ... ) \ + starpu_insert_task( (_codelet_), ##__VA_ARGS__ ) +#endif + /* * Enable codelets names */ diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c index 352d7bd288833a3ffcb0e0d04f7cf06a6f96fe2f..89904548b70ed8ecced9b07fe76c9d0c541e66fb 100644 --- a/runtime/starpu/interface/cham_tile_interface.c +++ b/runtime/starpu/interface/cham_tile_interface.c @@ -13,7 +13,9 @@ * @author Mathieu Faverge * @author Gwenole Lucas * @author Samuel Thibault - * @date 2023-08-22 + * @author Abel Calluaud + * @author Ana Hourcau + * @date 2024-07-17 * */ #include "chameleon_starpu.h" @@ -77,20 +79,6 @@ cti_get_hmat_required_size( starpu_cham_tile_interface_t *cham_tile_interface _ } #endif -static inline CHAM_tile_t * -cti_handle_get( starpu_data_handle_t handle ) -{ - starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) - starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM ); - -#ifdef STARPU_DEBUG - STARPU_ASSERT_MSG( cham_tile_interface->id == STARPU_CHAM_TILE_INTERFACE_ID, - "Error. The given data is not a cham_tile." ); -#endif - - return &(cham_tile_interface->tile); -} - int cti_handle_get_m( starpu_data_handle_t handle ) {