diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 1c7b5a0b2e623d1091819c72004998b1cd673ac1..d60af1f6018b8c13545da8e74bd5d8477dd1d69e 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -27,7 +27,7 @@ # @author Alycia Lisito # @author Loris Lucido # @author Matthieu Kuhn -# @date 2023-07-06 +# @date 2023-08-22 # ### @@ -41,6 +41,7 @@ set(CHAMELEON_CONTROL ../control/descriptor.c ../control/descriptor_rec.c ../control/descriptor_helpers.c + ../control/descriptor_ipiv.c ../control/workspace.c ../control/tile.c ../control/chameleon_f77.c diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 1d93acc5ed3ec411fea4396c85012689e6560ee3..1060432565ee5d581cd82bc74da7c53df0829383 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -11,63 +11,19 @@ * * @brief Chameleon zgetrf parallel algorithm * - * @version 1.2.0 + * @version 1.3.0 * @author Omar Zenati * @author Mathieu Faverge * @author Emmanuel Agullo * @author Matthieu Kuhn - * @date 2023-02-21 + * @date 2023-08-22 * @precisions normal z -> s d c * */ #include "control/common.h" -#define A(m,n) A, m, n -#define U(m,n) &(ws->U), m, n -#define IPIV(m) IPIV, m, 1 - -/* - * Static variable to know how to handle the data within the kernel - * This assumes that only one runtime is enabled at a time. - */ -static RUNTIME_id_t zgetrf_runtime_id = RUNTIME_SCHED_STARPU; - -static inline int -zgetrf_ipiv_init( const CHAM_desc_t *descIPIV, - cham_uplo_t uplo, int m, int n, - CHAM_tile_t *tileIPIV, void *op_args ) -{ - int *IPIV; - (void)op_args; - - if ( zgetrf_runtime_id == RUNTIME_SCHED_PARSEC ) { - IPIV = (int*)tileIPIV; - } - else { - IPIV = CHAM_tile_get_ptr( tileIPIV ); - } - -#if !defined(CHAMELEON_SIMULATION) - { - int tempmm = m == descIPIV->mt-1 ? descIPIV->m - m * descIPIV->mb : descIPIV->mb; - int i; - - for( i=0; i<tempmm; i++ ) { - IPIV[i] = m * descIPIV->mb + i + 1; - } - } -#endif - - return 0; -} - -static inline void -chameleon_pzgetrf_ipiv_init( CHAM_desc_t *IPIV, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request ) -{ - chameleon_pmap( ChamW, ChamUpperLower, IPIV, zgetrf_ipiv_init, NULL, sequence, request ); -} +#define A(m,n) A, m, n +#define U(m,n) &(ws->U), m, n /* * All the functions below are panel factorization variant. @@ -79,10 +35,10 @@ chameleon_pzgetrf_ipiv_init( CHAM_desc_t *IPIV, * @param[inout] A * The descriptor of the full matrix A (not just the panel) * - * @param[in] k - * The index of the column to factorize + * @param[inout] ipiv + * The descriptor of the pivot array associated to A. * - * @param[in] ib + * @param[in] k * The index of the column to factorize * * @param[inout] options @@ -91,6 +47,7 @@ chameleon_pzgetrf_ipiv_init( CHAM_desc_t *IPIV, static inline void chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, + CHAM_ipiv_t *ipiv, int k, RUNTIME_option_t *options ) { @@ -122,6 +79,7 @@ chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws, static inline void chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, + CHAM_ipiv_t *ipiv, int k, RUNTIME_option_t *options ) { @@ -136,13 +94,13 @@ chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws, * Algorithm per column without pivoting */ for(h=0; h<minmn; h++){ - INSERT_TASK_zgetrf_panel_nopiv_percol_diag( + INSERT_TASK_zgetrf_nopiv_percol_diag( options, tempkm, tempkn, h, A( k, k ), U( k, k ), A->mb * k ); for (m = k+1; m < A->mt; m++) { tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; - INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( + INSERT_TASK_zgetrf_nopiv_percol_trsm( options, tempmm, tempkn, h, A( m, k ), U( k, k ) ); } @@ -151,18 +109,79 @@ chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws, RUNTIME_data_flush( options->sequence, U(k, k) ); } +static inline void +chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + CHAM_ipiv_t *ipiv, + int k, + RUNTIME_option_t *options ) +{ + int m, h; + int tempkm, tempkn, minmn; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + minmn = chameleon_min( tempkm, tempkn ); + + /* Update the number of column */ + ipiv->n = minmn; + + /* + * Algorithm per column with pivoting + */ + for (h=0; h<=minmn; h++){ + + INSERT_TASK_zgetrf_percol_diag( + options, + h, k * A->mb, + A(k, k), + ipiv ); + + for (m = k+1; m < A->mt; m++) { + //tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + + INSERT_TASK_zgetrf_percol_offdiag( + options, + h, m * A->mb, + A(m, k), + ipiv ); + } + + if ( h < minmn ) { + /* Reduce globally (between MPI processes) */ + RUNTIME_ipiv_reducek( options, ipiv, k, h ); + } + } + + /* Flush temporary data used for the pivoting */ + RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); +} + static inline void chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, + CHAM_ipiv_t *ipiv, int k, RUNTIME_option_t *options ) { /* TODO: Should be replaced by a function pointer */ - if ( ws->alg == ChamGetrfNoPivPerColumn ) { - chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options ); - } - else { - chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options ); + switch( ws->alg ) { + case ChamGetrfNoPivPerColumn: + chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, ipiv, k, options ); + break; + + case ChamGetrfPPivPerColumn: + chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options ); + break; + + case ChamGetrfPPiv: + chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options ); + break; + + case ChamGetrfNoPiv: + chameleon_attr_fallthrough; + default: + chameleon_pzgetrf_panel_facto_nopiv( ws, A, ipiv, k, options ); } } @@ -227,7 +246,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, */ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, - CHAM_desc_t *IPIV, + CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { @@ -243,14 +262,11 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, } RUNTIME_options_init( &options, chamctxt, sequence, request ); - /* Initialize IPIV */ - chameleon_pzgetrf_ipiv_init( IPIV, sequence, request ); - for (k = 0; k < min_mnt; k++) { RUNTIME_iteration_push( chamctxt, k ); options.priority = A->nt; - chameleon_pzgetrf_panel_facto( ws, A, k, &options ); + chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options ); for (n = k+1; n < A->nt; n++) { options.priority = A->nt-n; @@ -272,5 +288,12 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, } } + /* Initialize IPIV */ + if ( (ws->alg == ChamGetrfNoPivPerColumn) || + (ws->alg == ChamGetrfNoPiv ) ) + { + RUNTIME_ipiv_init( IPIV ); + } + RUNTIME_options_finalize( &options, chamctxt ); } diff --git a/compute/zgetrf.c b/compute/zgetrf.c index 44d57502a84287db609f701b64a74f8a489aada0..73c810be2c1f3294583b6599aff49e726f8f049d 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -19,7 +19,7 @@ * @author Florent Pruvost * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois - * @date 2023-07-05 + * @date 2023-08-22 * * @precisions normal z -> s d c * @@ -68,15 +68,21 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) { char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" ); - if ( algostr ) { - if ( strcasecmp( algostr, "nopiv" ) ) { + if ( algostr != NULL ) { + if ( strcasecmp( algostr, "nopiv" ) == 0 ) { ws->alg = ChamGetrfNoPiv; } else if ( strcasecmp( algostr, "nopivpercolumn" ) == 0 ) { ws->alg = ChamGetrfNoPivPerColumn; } + else if ( strcasecmp( algostr, "ppiv" ) == 0 ) { + ws->alg = ChamGetrfPPiv; + } + else if ( strcasecmp( algostr, "ppivpercolumn" ) == 0 ) { + ws->alg = ChamGetrfPPivPerColumn; + } else { - fprintf( stderr, "ERROR: CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn => Switch back to NoPiv\n" ); + chameleon_error( "CHAMELEON_zgetrf_WS_Alloc", "CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn, PPiv, PPivPerColumn => Switch back to NoPiv\n" ); } } chameleon_cleanenv( algostr ); @@ -90,6 +96,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); } + /* Set ib to 1 if per column algorithm */ + if ( ( ws->alg == ChamGetrfNoPivPerColumn ) || + ( ws->alg == ChamGetrfPPivPerColumn ) ) + { + ws->ib = 1; + } + return ws; } @@ -123,7 +136,6 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws ) free( ws ); } -#if defined(NOT_AVAILABLE_YET) /** ******************************************************************************** * @@ -149,6 +161,11 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws ) * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * + * @param[out] IPIV + * Integer array of dimension min(M,N). + * The pivot indices; for 1 <= i <= min(M,N), row i of the + * matrix was interchanged with row IPIV(i). + * ******************************************************************************* * * @retval CHAMELEON_SUCCESS successful exit @@ -173,10 +190,11 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) int NB; int status; CHAM_desc_t descAl, descAt; + CHAM_ipiv_t descIPIV; CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - void *ws; + struct chameleon_pzgetrf_s *ws; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { @@ -218,25 +236,36 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) /* Allocate workspace for partial pivoting */ ws = CHAMELEON_zgetrf_WS_Alloc( &descAt ); + + if ( ws->alg == ChamGetrfPPivPerColumn ) { + chameleon_ipiv_init( &descIPIV, &descAt, IPIV ); + } + /* Call the tile interface */ - CHAMELEON_zgetrf_Tile_Async( &descAt, ws, sequence, &request ); + CHAMELEON_zgetrf_Tile_Async( &descAt, &descIPIV, ws, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower, sequence, &request ); + if ( ws->alg == ChamGetrfPPivPerColumn ) { + RUNTIME_ipiv_gather( &descIPIV, IPIV, 0 ); + } chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ CHAMELEON_zgetrf_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + if ( ws->alg == ChamGetrfPPivPerColumn ) { + chameleon_ipiv_destroy( &descIPIV ); + } + status = sequence->status; chameleon_sequence_destroy( chamctxt, sequence ); return status; } -#endif /** ******************************************************************************** @@ -254,12 +283,19 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) * On entry, the M-by-N matrix to be factored. * On exit, the tile factors L and U from the factorization. * + * @param[in,out] IPIV + * On entry, ipiv descriptor associated to A and created with + * CHAMELEON_Ipiv_Create(). + * On exit, it contains the pivot indices associated to the PLU + * factorization of A. + * ******************************************************************************* * * @retval CHAMELEON_SUCCESS successful exit - * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, - * but the factor U is exactly singular, and division by zero will occur - * if it is used to solve a system of equations. + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been + * completed, but the factor U is exactly singular, and division + * by zero will occur if it is used to solve a system of + * equations. * ******************************************************************************* * @@ -272,7 +308,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) * */ int -CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV ) +CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV ) { CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; @@ -316,9 +352,10 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV ) * On exit, the tile factors L and U from the factorization. * * @param[in,out] IPIV - * On entry, the descriptor of an min(M, N)-by-1 matrix that may not - * have been initialized. - * On exit, the pivot vector generated during the factorization. + * On entry, ipiv descriptor associated to A and created with + * CHAMELEON_Ipiv_Create(). + * On exit, it contains the pivot indices associated to the PLU + * factorization of A. * * @param[in,out] user_ws * The opaque pointer to pre-allocated getrf workspace through @@ -345,7 +382,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV ) */ int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, - CHAM_desc_t *IPIV, + CHAM_ipiv_t *IPIV, void *user_ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) @@ -383,10 +420,6 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" ); return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if ( chameleon_desc_check( IPIV ) != CHAMELEON_SUCCESS ) { - chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid second descriptor" ); - return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); - } /* Check input arguments */ if ( A->nb != A->mb ) { @@ -397,10 +430,6 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must have the number of rows as tiles of A" ); return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if ( IPIV->nb != 1 ) { - chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must be vectore with only one column per tile" ); - return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); - } if ( user_ws == NULL ) { ws = CHAMELEON_zgetrf_WS_Alloc( A ); @@ -409,7 +438,7 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, ws = user_ws; } - chameleon_pzgetrf( user_ws, A, IPIV, sequence, request ); + chameleon_pzgetrf( ws, A, IPIV, sequence, request ); if ( user_ws == NULL ) { CHAMELEON_Desc_Flush( A, sequence ); diff --git a/control/compute_z.h b/control/compute_z.h index 06eae17b508c012918bbd011bad9cbb25a7bb7d4..9032c20f24666d4e751b1e422e9afd07b41d047f 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -22,7 +22,7 @@ * @author Alycia Lisito * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois - * @date 2023-07-06 + * @date 2023-08-22 * @precisions normal z -> c d s * */ @@ -94,7 +94,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/control/descriptor.h b/control/descriptor.h index 38153b6b2bc444155315d249139236a827a2b1de..d3d65c20c20fed8f9836c8a10aaf5a02572b3ca6 100644 --- a/control/descriptor.h +++ b/control/descriptor.h @@ -20,7 +20,7 @@ * @author Raphael Boucherie * @author Samuel Thibault * @author Lionel Eyraud-Dubois - * @date 2023-07-05 + * @date 2023-08-22 * */ #ifndef _chameleon_descriptor_h_ @@ -76,6 +76,9 @@ CHAM_desc_t* chameleon_desc_submatrix( CHAM_desc_t *descA, int i, int j, int m, void chameleon_desc_destroy ( CHAM_desc_t *desc ); int chameleon_desc_check ( const CHAM_desc_t *desc ); +int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, void *data ); +void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv ); + /** * Internal function to return address of block (m,n) with m,n = block indices */ diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c new file mode 100644 index 0000000000000000000000000000000000000000..54c9fec4068dfdf0c05ae9b18262d2e179c3995b --- /dev/null +++ b/control/descriptor_ipiv.c @@ -0,0 +1,245 @@ +/** + * + * @file descriptor_ipiv.c + * + * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon descriptors routines + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * + *** + * + * @defgroup Descriptor + * @brief Group descriptor routines exposed to users to manipulate IPIV data structures + * + */ +#define _GNU_SOURCE 1 +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include "control/common.h" +#include "control/descriptor.h" +#include "chameleon/runtime.h" + +/** + ****************************************************************************** + * + * @ingroup Descriptor + * + * @brief Internal function to create tiled descriptor associated to a pivot array. + * + ****************************************************************************** + * + * @param[in,out] ipiv + * The pointer to the ipiv descriptor to initialize. + * + * @param[in] desc + * The tile descriptor for which an associated ipiv descriptor must be generated. + * + * @param[in] data + * The pointer to the original vector where to store the pivot values. + * + ****************************************************************************** + * + * @return CHAMELEON_SUCCESS on success, CHAMELEON_ERR_NOT_INITIALIZED otherwise. + * + */ +int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, void *data ) +{ + CHAM_context_t *chamctxt; + int rc = CHAMELEON_SUCCESS; + + memset( ipiv, 0, sizeof(CHAM_ipiv_t) ); + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + + ipiv->desc = desc; + ipiv->data = data; + ipiv->i = 0; + ipiv->m = chameleon_min( desc->m, desc->n ); + ipiv->mb = desc->mb; + ipiv->mt = chameleon_ceil( ipiv->m, ipiv->mb ); + + /* Create runtime specific structure like registering data */ + RUNTIME_ipiv_create( ipiv ); + + return rc; +} + +/** + ****************************************************************************** + * + * @ingroup Descriptor + * + * @brief Internal function to destroy a tiled descriptor associated to a pivot array. + * + ****************************************************************************** + * + * @param[in,out] ipiv + * The pointer to the ipiv descriptor to destroy. + * + */ +void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv ) +{ + RUNTIME_ipiv_destroy( ipiv ); +} + +/** + ***************************************************************************** + * + * @ingroup Descriptor + * + * @brief Create a tiled ipiv descriptor associated to a given matrix. + * + ****************************************************************************** + * + * @param[in,out] ipiv + * The pointer to the ipiv descriptor to initialize. + * + * @param[in] desc + * The tile descriptor for which an associated ipiv descriptor must be generated. + * + * @param[in] data + * The pointer to the original vector where to store the pivot values. + * + ****************************************************************************** + * + * @retval CHAMELEON_SUCCESS on successful exit + * @retval CHAMELEON_ERR_NOT_INITIALIZED if failed to initialize the descriptor. + * @retval CHAMELEON_ERR_OUT_OF_RESOURCES if failed to allocated some ressources. + * + */ +int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void *data ) +{ + CHAM_context_t *chamctxt; + CHAM_ipiv_t *ipiv; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_error("CHAMELEON_Ipiv_Create", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + + /* Allocate memory and initialize the ipivriptor */ + ipiv = (CHAM_ipiv_t*)malloc(sizeof(CHAM_ipiv_t)); + if (ipiv == NULL) { + chameleon_error("CHAMELEON_Ipiv_Create", "malloc() failed"); + return CHAMELEON_ERR_OUT_OF_RESOURCES; + } + + chameleon_ipiv_init( ipiv, desc, data ); + + *ipivptr = ipiv; + return CHAMELEON_SUCCESS; +} + +/** + ***************************************************************************** + * + * @ingroup Descriptor + * + * @brief Destroys an ipiv tile descriptor. + * + ****************************************************************************** + * + * @param[in] ipivptr + * The Ipiv tile descriptor to destroy. + * + ****************************************************************************** + * + * @retval CHAMELEON_SUCCESS successful exit + * + */ +int CHAMELEON_Ipiv_Destroy(CHAM_ipiv_t **ipivptr) +{ + CHAM_context_t *chamctxt; + CHAM_ipiv_t *ipiv; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_error("CHAMELEON_Ipiv_Destroy", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + + if ((ipivptr == NULL) || (*ipivptr == NULL)) { + chameleon_error("CHAMELEON_Ipiv_Destroy", "attempting to destroy a NULL descriptor"); + return CHAMELEON_ERR_UNALLOCATED; + } + + ipiv = *ipivptr; + chameleon_ipiv_destroy( ipiv ); + free(ipiv); + *ipivptr = NULL; + return CHAMELEON_SUCCESS; +} + + /** + ***************************************************************************** + * + * @ingroup Descriptor + * + * @brief Flushes the data in the sequence when they won't be reused. This calls + * cleans up the distributed communication caches, and transfer the data back to + * the CPU. + * + ****************************************************************************** + * + * @param[in] ipiv + * ipiv vector descriptor. + * + * @param[in] sequence + * The seqeunce in which to submit the calls to flush the data. + * + ****************************************************************************** + * + * @retval CHAMELEON_SUCCESS successful exit + * + */ +int CHAMELEON_Ipiv_Flush( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ) +{ + RUNTIME_ipiv_flush( ipiv, sequence ); + return CHAMELEON_SUCCESS; +} + +/** + ***************************************************************************** + * + * @ingroup Descriptor + * + * @brief Gathers an IPIV tile descriptor in a single vector on the given root node. + * + ****************************************************************************** + * + * @param[in] ipivdesc + * the ipiv vector descriptor to gather. + * + * @param[in] ipiv + * The ipiv vector where to store the result. Allocated vector of size + * ipivdesc->m on root, not referenced on other nodes. + * + * @param[in] root + * root node on which to gather the data. + * + ****************************************************************************** + * + * @retval CHAMELEON_SUCCESS successful exit + * + */ +int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, int *ipiv, int root ) +{ + RUNTIME_ipiv_gather( ipivdesc, ipiv, root ); + return CHAMELEON_SUCCESS; +} diff --git a/coreblas/compute/core_zgetrf.c b/coreblas/compute/core_zgetrf.c index 9c3a8fb7c0ec6a0b360dce5f8e9c6627bb710c5b..3c65462504d3792ca61a48d423b99b9efff0d89d 100644 --- a/coreblas/compute/core_zgetrf.c +++ b/coreblas/compute/core_zgetrf.c @@ -11,14 +11,15 @@ * * @brief Chameleon core_zgetrf CPU kernel * - * @version 1.2.0 + * @version 1.3.0 * @comment This file has been automatically generated * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede * @author Florent Pruvost - * @date 2022-02-22 + * @author Matthieu Kuhn + * @date 2023-07-26 * @precisions normal z -> c d s * */ @@ -26,12 +27,10 @@ #include "coreblas.h" -int CORE_zgetrf(int m, int n, +int CORE_zgetrf( int m, int n, CHAMELEON_Complex64_t *A, int lda, - int *IPIV, int *info) + int *IPIV, int *info ) { *info = LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, m, n, A, lda, IPIV ); return CHAMELEON_SUCCESS; } - - diff --git a/coreblas/compute/core_zgetrf_panel.c b/coreblas/compute/core_zgetrf_panel.c index 2ec4b23a8f3e2f7cb37ff08f0721d96f1ed6dd9e..68911699b39b62aa2e12007048bab72311a620f6 100644 --- a/coreblas/compute/core_zgetrf_panel.c +++ b/coreblas/compute/core_zgetrf_panel.c @@ -9,10 +9,10 @@ * * @brief Chameleon core_zgetrf with partial pivoting CPU kernel * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2022-02-22 + * @date 2023-08-22 * @precisions normal z -> c d s * */ @@ -42,6 +42,10 @@ static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0; * @param[in] h * The index of the column to factorize in the matrix A. * + * @param[in] m0 + * The number of rows above the diagonale tile A in the global matrix to + * be factorized. + * * @param[in,out] A * On entry, the matrix A where column h-1 needs to be factorized, and * pivot for column h needs to be selected. @@ -56,9 +60,11 @@ static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0; * The leading dimension of the array A. lda >= max(1,m). * * @param[in,out] IPIV - * On entry, the pivot array of size min(m,n) with the first h-2 columns initialized. - * On exit, IPIV[h-1] is updated with the selected pivot for the previous column. - * + * On entry, the pivot array of size min(m,n) with the first h-2 + * columns initialized. + * On exit, IPIV[h-1] is updated with the selected pivot for the + * previous column. + * * * @param[in,out] nextpiv * On entry, the allocated and initialized CHAM_piv_t structure to * store the information related to pivot at stage h. @@ -169,6 +175,7 @@ CORE_zgetrf_panel_diag( int m, int n, int h, int m0, /* Store current diagonal row (in full) into pivot structure */ cblas_zcopy( n, A + h, lda, nextpiv->diagrow, 1 ); + return 0; } diff --git a/include/chameleon.h b/include/chameleon.h index cce6539fc5cc06e00d3cd887c18bd12f7d570ae1..253767cbb86d28adf7acc00a4b21d3edb935af39 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -18,7 +18,7 @@ * @author Florent Pruvost * @author Philippe Virouleau * @author Lionel Eyraud-Dubois - * @date 2023-07-05 + * @date 2023-08-22 * */ #ifndef _chameleon_h_ @@ -195,6 +195,13 @@ int CHAMELEON_Recursive_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flt blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd, blkrankof_fct_t get_rankof, void* get_rankof_arg ); +int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void *data ); +int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr ); +int CHAMELEON_Ipiv_Flush ( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ); +int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, int *ipiv, int root ); +void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv ); + /** * * @ingroup Control diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index fa5f069e6057cb0f79eb47a7f0e2bbc38777a14b..2362d8b1a8ff0b1e8b4ff96b26a795d71f25761d 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -23,7 +23,7 @@ * @author Florent Pruvost * @author Alycia Lisito * @author Matthieu Kuhn - * @date 2023-07-06 + * @date 2023-08-22 * @precisions normal z -> c d s * */ @@ -135,7 +135,7 @@ int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, dou //int CHAMELEON_zgetrf_Tile(CHAM_desc_t *A, int *IPIV); int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV); int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A); -int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV ); +int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV ); //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV); //int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B); int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B); @@ -216,7 +216,7 @@ int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t * //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index fe50552f405fdef0e5e985d3ebcdcdd39e49b4d5..9dbc2678d2f9e609889a0590ecd92b96d6515e8a 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -18,7 +18,8 @@ * @author Florent Pruvost * @author Alycia Lisito * @author Terry Cojean - * @date 2023-07-04 + * @author Matthieu Kuhn + * @date 2023-08-22 * */ #ifndef _chameleon_constants_h_ @@ -268,6 +269,8 @@ typedef enum chameleon_gemm_e { typedef enum chameleon_getrf_e { ChamGetrfNoPiv, ChamGetrfNoPivPerColumn, + ChamGetrfPPiv, + ChamGetrfPPivPerColumn, } cham_getrf_t; #define ChameleonTrd 1001 diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index 82818ba75fd6884940601b5e022a40ee96024a5a..a8aaaef56a42b2dbaa25664d86022c51c2f4cd09 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -10,7 +10,7 @@ *** * * @brief The common runtimes API - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Cedric Augonnet * @author Cedric Castagnede @@ -18,7 +18,7 @@ * @author Samuel Thibault * @author Philippe Swartvagher * @author Matthieu Kuhn - * @date 2022-02-22 + * @date 2023-08-22 * */ #ifndef _chameleon_runtime_h_ @@ -705,6 +705,32 @@ void RUNTIME_ddisplay_oneprofile (cham_tasktype_t task); void RUNTIME_sdisplay_allprofile (); void RUNTIME_sdisplay_oneprofile (cham_tasktype_t task); +void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv ); +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ); +void RUNTIME_ipiv_init ( CHAM_ipiv_t *ipiv ); +void RUNTIME_ipiv_gather ( CHAM_ipiv_t *desc, int *ipiv, int node ); + +void *RUNTIME_ipiv_getaddr ( CHAM_ipiv_t *ipiv, int m ); +void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ); +void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ); + +static inline void * +RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) { + if ( h%2 == 0 ) { + return RUNTIME_nextpiv_getaddr( ipiv, m, -1 ); + } + else { + return RUNTIME_prevpiv_getaddr( ipiv, m, -1 ); + } +} + +void RUNTIME_ipiv_flushk ( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ); +void RUNTIME_ipiv_flush ( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ); +void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, + CHAM_ipiv_t *ws, int k, int h ); + /** * @} */ diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index efa64a1c534b8472c4514cedb330919b31d4fa0c..d7dd07f48dc6fad1cff9359bf3410defdd657357 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -19,7 +19,7 @@ * @author Samuel Thibault * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois - * @date 2023-07-05 + * @date 2023-08-22 * */ #ifndef _chameleon_struct_h_ @@ -137,6 +137,25 @@ struct chameleon_desc_s { void *schedopt; // scheduler (QUARK|StarPU) specific structure }; +/** + * CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting + */ +typedef struct chameleon_piv_s { + const CHAM_desc_t *desc; /**> Reference descriptor to compute data mapping based on diagonal tiles, + and get floating reference type */ + int *data; /**> Pointer to the data */ + void *ipiv; /**> Opaque array of pointers for the runtimes to handle the ipiv array */ + void *nextpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ + void *prevpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ + int64_t mpitag_ipiv; /**> Initial mpi tag values for the ipiv handles */ + int64_t mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles */ + int64_t mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles */ + int i; /**> row index to the beginning of the submatrix */ + int m; /**> The number of row in the vector ipiv */ + int mb; /**> The number of row per block */ + int mt; /**> The number of tiles */ + int n; /**> The number of column considered (must be updated for each panel) */ +} CHAM_ipiv_t; /** * CHAMELEON request uniquely identifies each asynchronous function call. diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index b58895aa4346b597fbd7dece2606df7a6de35fd3..c5704884e1e11331008519bff1a2b955fd6e4321 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -24,7 +24,7 @@ * @author Alycia Lisito * @author Romain Peressoni * @author Matthieu Kuhn - * @date 2023-07-06 + * @date 2023-08-22 * @precisions normal z -> c d s * */ @@ -486,15 +486,25 @@ void RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, /* * Tasks for LU factorization with partial pivoting */ -void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un, - int iinfo ); +void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un, + int iinfo ); -void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un ); +void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ); + +void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ws ); + +void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ws ); #endif /* _chameleon_tasks_z_h_ */ diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index fc1aac3355eb6fda653647270b5103280117c0d9..e63a4dd5e7203333b4890a2aa09c27f71fda66c4 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -17,14 +17,14 @@ # Univ. of California Berkeley, # Univ. of Colorado Denver. # -# @version 1.2.0 +# @version 1.3.0 # @author Cedric Castagnede # @author Emmanuel Agullo # @author Mathieu Faverge # @author Florent Pruvost # @author Philippe Virouleau # @author Matthieu Kuhn -# @date 2023-02-21 +# @date 2023-08-22 # ### @@ -66,7 +66,8 @@ set(CODELETS_ZSRC codelets/codelet_zgetrf.c codelets/codelet_zgetrf_incpiv.c codelets/codelet_zgetrf_nopiv.c - codelets/codelet_zpanel.c + codelets/codelet_zgetrf_nopiv_percol.c + codelets/codelet_zgetrf_percol.c codelets/codelet_zhe2ge.c codelets/codelet_zherfb.c codelets/codelet_zhessq.c diff --git a/runtime/openmp/CMakeLists.txt b/runtime/openmp/CMakeLists.txt index 36a5e18baed52702ea14b82b1c2209d6bb53b9f0..b9b712e5dde02765ed05fae4c8273abd369af2a8 100644 --- a/runtime/openmp/CMakeLists.txt +++ b/runtime/openmp/CMakeLists.txt @@ -76,6 +76,7 @@ set(RUNTIME_COMMON control/runtime_context.c control/runtime_control.c control/runtime_descriptor.c + control/runtime_descriptor_ipiv.c control/runtime_options.c control/runtime_profiling.c ${RUNTIME_COMMON_GENERATED} diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv_percol.c new file mode 100644 index 0000000000000000000000000000000000000000..589bce56f943e8e105fcbc4ed0f0ab704948b49b --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv_percol.c @@ -0,0 +1,60 @@ +/** + * + * @file openmp/codelet_zgetrf_nopiv_percol.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_nopiv_percol OpenMP codelets + * + * @version 1.3.0 + * @comment Codelets to perform panel factorization with partial pivoting + * + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un, + int iinfo ) +{ + assert( 0 ); + (void)options; + (void)m; + (void)n; + (void)k; + (void)A; + (void)Am; + (void)An; + (void)U; + (void)Um; + (void)Un; + (void)iinfo; +} + +void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ) +{ + assert( 0 ); + (void)options; + (void)m; + (void)n; + (void)k; + (void)A; + (void)Am; + (void)An; + (void)U; + (void)Um; + (void)Un; +} diff --git a/runtime/openmp/codelets/codelet_zgetrf_percol.c b/runtime/openmp/codelets/codelet_zgetrf_percol.c new file mode 100644 index 0000000000000000000000000000000000000000..4a503f25f849e46b851e9279e1ca623ac68153ae --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf_percol.c @@ -0,0 +1,52 @@ +/** + * + * @file openmp/codelet_zgetrf_percol.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_percol OpenMP codelets + * + * @version 1.3.0 + * @comment Codelets to perform panel factorization with partial pivoting + * + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)options; + (void)h; + (void)m0; + (void)A; + (void)Am; + (void)An; + (void)ipiv; +} + +void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)options; + (void)h; + (void)m0; + (void)A; + (void)Am; + (void)An; + (void)ipiv; +} diff --git a/runtime/openmp/codelets/codelet_zpanel.c b/runtime/openmp/codelets/codelet_zpanel.c deleted file mode 100644 index 6c321a849d28cd46c80c15592bda618d917bd29c..0000000000000000000000000000000000000000 --- a/runtime/openmp/codelets/codelet_zpanel.c +++ /dev/null @@ -1,60 +0,0 @@ -/** - * - * @file openmp/codelet_zpanel.c - * - * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zpanel OpenMP codelets - * - * @version 1.2.0 - * @comment Codelets to perform panel factorization with partial pivoting - * - * @author Mathieu Faverge - * @date 2023-02-21 - * @precisions normal z -> c d s - * - */ -#include "chameleon_openmp.h" -#include "chameleon/tasks_z.h" - -void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un, - int iinfo ) -{ - assert( 0 ); - (void)options; - (void)m; - (void)n; - (void)k; - (void)A; - (void)Am; - (void)An; - (void)U; - (void)Um; - (void)Un; - (void)iinfo; -} - -void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un ) -{ - assert( 0 ); - (void)options; - (void)m; - (void)n; - (void)k; - (void)A; - (void)Am; - (void)An; - (void)U; - (void)Um; - (void)Un; -} - diff --git a/runtime/openmp/control/runtime_descriptor_ipiv.c b/runtime/openmp/control/runtime_descriptor_ipiv.c new file mode 100644 index 0000000000000000000000000000000000000000..03886ca650340279207c8163bc30eac81f4a1054 --- /dev/null +++ b/runtime/openmp/control/runtime_descriptor_ipiv.c @@ -0,0 +1,97 @@ +/** + * + * @file openmp/runtime_descriptor_ipiv.c + * + * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon OpenMP descriptor routines + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * + */ +#include "chameleon_openmp.h" + +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + return NULL; +} + +void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + (void)h; + return NULL; +} + +void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + (void)h; + return NULL; +} + +void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)sequence; + (void)ipiv; + (void)m; +} + +void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ) +{ + assert( 0 ); + (void)ipiv; + (void)sequence; +} + +void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, + CHAM_ipiv_t *ipiv, int k, int h ) +{ + assert( 0 ); + (void)options; + (void)ipiv; + (void)k; + (void)h; +} + +void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node ) +{ + assert( 0 ); + (void)desc; + (void)ipiv; + (void)node; +} diff --git a/runtime/parsec/CMakeLists.txt b/runtime/parsec/CMakeLists.txt index 74c4d0f928a8da8041e30e918ed9aae67057b70b..573d31d7956d9e7a29724a4770e77ce78f8533d9 100644 --- a/runtime/parsec/CMakeLists.txt +++ b/runtime/parsec/CMakeLists.txt @@ -88,6 +88,7 @@ set(RUNTIME_COMMON control/runtime_context.c control/runtime_control.c control/runtime_descriptor.c + control/runtime_descriptor_ipiv.c control/runtime_options.c control/runtime_profiling.c ${RUNTIME_COMMON_GENERATED} diff --git a/runtime/parsec/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/parsec/codelets/codelet_zgetrf_nopiv_percol.c new file mode 100644 index 0000000000000000000000000000000000000000..e8cf820534e15f6bcceab0004405892175aa7570 --- /dev/null +++ b/runtime/parsec/codelets/codelet_zgetrf_nopiv_percol.c @@ -0,0 +1,60 @@ +/** + * + * @file parsec/codelet_zgetrf_nopiv_percol.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_nopiv_percol Parsec codelets + * + * @version 1.3.0 + * @comment Codelets to perform panel factorization with partial pivoting + * + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_parsec.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un, + int iinfo ) +{ + assert( 0 ); + (void)options; + (void)m; + (void)n; + (void)k; + (void)A; + (void)Am; + (void)An; + (void)U; + (void)Um; + (void)Un; + (void)iinfo; +} + +void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ) +{ + assert( 0 ); + (void)options; + (void)m; + (void)n; + (void)k; + (void)A; + (void)Am; + (void)An; + (void)U; + (void)Um; + (void)Un; +} diff --git a/runtime/parsec/codelets/codelet_zgetrf_percol.c b/runtime/parsec/codelets/codelet_zgetrf_percol.c new file mode 100644 index 0000000000000000000000000000000000000000..f94717696c1d93378ac515ee77d3b6d3b6170b92 --- /dev/null +++ b/runtime/parsec/codelets/codelet_zgetrf_percol.c @@ -0,0 +1,52 @@ +/** + * + * @file parsec/codelet_zgetrf_percol.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_percol Parsec codelets + * + * @version 1.3.0 + * @comment Codelets to perform panel factorization with partial pivoting + * + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_parsec.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)options; + (void)h; + (void)m0; + (void)A; + (void)Am; + (void)An; + (void)ipiv; +} + +void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)options; + (void)h; + (void)m0; + (void)A; + (void)Am; + (void)An; + (void)ipiv; +} diff --git a/runtime/parsec/codelets/codelet_zpanel.c b/runtime/parsec/codelets/codelet_zpanel.c deleted file mode 100644 index 41e9e2b5f0c6600805dac3596e64b10c3ac3b86d..0000000000000000000000000000000000000000 --- a/runtime/parsec/codelets/codelet_zpanel.c +++ /dev/null @@ -1,60 +0,0 @@ -/** - * - * @file parsec/codelet_zpanel.c - * - * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zpanel Parsec codelets - * - * @version 1.2.0 - * @comment Codelets to perform panel factorization with partial pivoting - * - * @author Mathieu Faverge - * @date 2023-02-21 - * @precisions normal z -> c d s - * - */ -#include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" - -void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un, - int iinfo ) -{ - assert( 0 ); - (void)options; - (void)m; - (void)n; - (void)k; - (void)A; - (void)Am; - (void)An; - (void)U; - (void)Um; - (void)Un; - (void)iinfo; -} - -void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un ) -{ - assert( 0 ); - (void)options; - (void)m; - (void)n; - (void)k; - (void)A; - (void)Am; - (void)An; - (void)U; - (void)Um; - (void)Un; -} - diff --git a/runtime/parsec/control/runtime_descriptor_ipiv.c b/runtime/parsec/control/runtime_descriptor_ipiv.c new file mode 100644 index 0000000000000000000000000000000000000000..04a0b791139d5c6a247b25630e126d4a3eb467bf --- /dev/null +++ b/runtime/parsec/control/runtime_descriptor_ipiv.c @@ -0,0 +1,97 @@ +/** + * + * @file parsec/runtime_descriptor_ipiv.c + * + * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon PaRSEC descriptor routines + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * + */ +#include "chameleon_parsec.h" + +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + return NULL; +} + +void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + (void)h; + return NULL; +} + +void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + (void)h; + return NULL; +} + +void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)sequence; + (void)ipiv; + (void)m; +} + +void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ) +{ + assert( 0 ); + (void)ipiv; + (void)sequence; +} + +void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, + CHAM_ipiv_t *ipiv, int k, int h ) +{ + assert( 0 ); + (void)options; + (void)ipiv; + (void)k; + (void)h; +} + +void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node ) +{ + assert( 0 ); + (void)desc; + (void)ipiv; + (void)node; +} diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt index 12a673a12606310bae05f6f90d13ccd816d17e9f..e4acef7127cb671ed670195ea62e74ca209ec311 100644 --- a/runtime/quark/CMakeLists.txt +++ b/runtime/quark/CMakeLists.txt @@ -90,6 +90,7 @@ set(RUNTIME_COMMON control/runtime_context.c control/runtime_control.c control/runtime_descriptor.c + control/runtime_descriptor_ipiv.c control/runtime_options.c control/runtime_profiling.c ${RUNTIME_COMMON_GENERATED} diff --git a/runtime/quark/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/quark/codelets/codelet_zgetrf_nopiv_percol.c new file mode 100644 index 0000000000000000000000000000000000000000..dc1d00d0a7d98248c3e4b0b0e3ccb8091eccf383 --- /dev/null +++ b/runtime/quark/codelets/codelet_zgetrf_nopiv_percol.c @@ -0,0 +1,60 @@ +/** + * + * @file quark/codelet_zgetrf_nopiv_percol.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_nopiv_percol Quark codelets + * + * @version 1.3.0 + * @comment Codelets to perform panel factorization with partial pivoting + * + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_quark.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un, + int iinfo ) +{ + assert( 0 ); + (void)options; + (void)m; + (void)n; + (void)k; + (void)A; + (void)Am; + (void)An; + (void)U; + (void)Um; + (void)Un; + (void)iinfo; +} + +void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options, + int m, int n, int k, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *U, int Um, int Un ) +{ + assert( 0 ); + (void)options; + (void)m; + (void)n; + (void)k; + (void)A; + (void)Am; + (void)An; + (void)U; + (void)Um; + (void)Un; +} diff --git a/runtime/quark/codelets/codelet_zgetrf_percol.c b/runtime/quark/codelets/codelet_zgetrf_percol.c new file mode 100644 index 0000000000000000000000000000000000000000..baea4553d52827ea63db58f487b00d8d119bee9a --- /dev/null +++ b/runtime/quark/codelets/codelet_zgetrf_percol.c @@ -0,0 +1,52 @@ +/** + * + * @file quark/codelet_zgetrf_percol.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_percol Quark codelets + * + * @version 1.3.0 + * @comment Codelets to perform panel factorization with partial pivoting + * + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_quark.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)options; + (void)h; + (void)m0; + (void)A; + (void)Am; + (void)An; + (void)ipiv; +} + +void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)options; + (void)h; + (void)m0; + (void)A; + (void)Am; + (void)An; + (void)ipiv; +} diff --git a/runtime/quark/codelets/codelet_zpanel.c b/runtime/quark/codelets/codelet_zpanel.c deleted file mode 100644 index 015ea31c720b42fb3fe965d84f291f2b45e30649..0000000000000000000000000000000000000000 --- a/runtime/quark/codelets/codelet_zpanel.c +++ /dev/null @@ -1,60 +0,0 @@ -/** - * - * @file quark/codelet_zpanel.c - * - * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zpanel Quark codelets - * - * @version 1.2.0 - * @comment Codelets to perform panel factorization with partial pivoting - * - * @author Mathieu Faverge - * @date 2023-02-21 - * @precisions normal z -> c d s - * - */ -#include "chameleon_quark.h" -#include "chameleon/tasks_z.h" - -void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un, - int iinfo ) -{ - assert( 0 ); - (void)options; - (void)m; - (void)n; - (void)k; - (void)A; - (void)Am; - (void)An; - (void)U; - (void)Um; - (void)Un; - (void)iinfo; -} - -void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options, - int m, int n, int k, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *U, int Um, int Un ) -{ - assert( 0 ); - (void)options; - (void)m; - (void)n; - (void)k; - (void)A; - (void)Am; - (void)An; - (void)U; - (void)Um; - (void)Un; -} - diff --git a/runtime/quark/control/runtime_descriptor_ipiv.c b/runtime/quark/control/runtime_descriptor_ipiv.c new file mode 100644 index 0000000000000000000000000000000000000000..34706a55518f95f0e4b229a772534e3f062d05d2 --- /dev/null +++ b/runtime/quark/control/runtime_descriptor_ipiv.c @@ -0,0 +1,97 @@ +/** + * + * @file quark/runtime_descriptor_ipiv.c + * + * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon Quark descriptor routines + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * + */ +#include "chameleon_quark.h" + +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + return NULL; +} + +void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + (void)h; + return NULL; +} + +void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + assert( 0 ); + (void)ipiv; + (void)m; + (void)h; + return NULL; +} + +void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)sequence; + (void)ipiv; + (void)m; +} + +void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ) +{ + assert( 0 ); + (void)ipiv; + (void)sequence; +} + +void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, + CHAM_ipiv_t *ipiv, int k, int h ) +{ + assert( 0 ); + (void)options; + (void)ipiv; + (void)k; + (void)h; +} + +void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv ) +{ + assert( 0 ); + (void)ipiv; +} + +void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node ) +{ + assert( 0 ); + (void)desc; + (void)ipiv; + (void)node; +} diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index 30ea76045131884c610a0f4ee430393f732f6360..2107134e1125e2ef2e9dcb55cf5e84cc7bd8b1cf 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -26,7 +26,7 @@ # @author Matthieu Kuhn # @author Loris Lucido # @author Terry Cojean -# @date 2023-07-06 +# @date 2023-08-22 # ### cmake_minimum_required(VERSION 3.1) @@ -222,11 +222,13 @@ set(RUNTIME_COMMON control/runtime_context.c control/runtime_control.c control/runtime_descriptor.c + control/runtime_descriptor_ipiv.c control/runtime_tags.c control/runtime_options.c control/runtime_profiling.c control/runtime_workspace.c interface/cham_tile_interface.c + interface/cppi_interface.c ${RUNTIME_COMMON_GENERATED} ) diff --git a/runtime/starpu/codelets/codelet_zpanel.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c similarity index 79% rename from runtime/starpu/codelets/codelet_zpanel.c rename to runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c index 7e450986c7448fbc1bd6c2398a7e21df9c63bd89..554735a47e654cadb16c77b209f1e6ed2f28d398 100644 --- a/runtime/starpu/codelets/codelet_zpanel.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c @@ -1,20 +1,19 @@ /** * - * @file starpu/codelet_zpanel.c + * @file starpu/codelet_zgetrf_nopiv_percol.c * * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. * *** * - * @brief Chameleon zpanel StarPU codelets + * @brief Chameleon getrf codelets to factorize the panel with no pivoting * * @version 1.3.0 - * @comment Codelets to perform panel factorization with partial pivoting * * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2023-07-06 + * @date 2023-08-22 * @precisions normal z -> c d s * */ @@ -22,11 +21,14 @@ #include "runtime_codelet_z.h" #include <coreblas/cblas_wrapper.h> +CHAMELEON_CL_CB( zgetrf_nopiv_percol_diag, cti_handle_get_m(task->handles[0]), 0, 0, M ); +CHAMELEON_CL_CB( zgetrf_nopiv_percol_trsm, cti_handle_get_m(task->handles[0]), 0, 0, M ); + static const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0; #if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_panel_nopiv_percol_diag_cpu_func( void *descr[], void *cl_arg ) +static void cl_zgetrf_nopiv_percol_diag_cpu_func( void *descr[], void *cl_arg ) { CHAM_tile_t *tileA, *tileU; int m, n, k, lda, iinfo; @@ -71,17 +73,16 @@ static void cl_zgetrf_panel_nopiv_percol_diag_cpu_func( void *descr[], void *cl_ /* * Codelet definition */ -CODELETS_CPU( zgetrf_panel_nopiv_percol_diag, cl_zgetrf_panel_nopiv_percol_diag_cpu_func ); +CODELETS_CPU( zgetrf_nopiv_percol_diag, cl_zgetrf_nopiv_percol_diag_cpu_func ); -void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options, +void INSERT_TASK_zgetrf_nopiv_percol_diag( const RUNTIME_option_t *options, int m, int n, int k, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *U, int Um, int Un, int iinfo ) { - struct starpu_codelet *codelet = &cl_zgetrf_panel_nopiv_percol_diag; - // void (*callback)(void*) = options->profiling ? cl_zgetrf_panel_nopiv_percol_diag_callback : NULL; - void (*callback)(void*) = NULL; + struct starpu_codelet *codelet = &cl_zgetrf_nopiv_percol_diag; + void (*callback)(void*) = options->profiling ? cl_zgetrf_nopiv_percol_diag_callback : NULL; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW( A, Am, An ); @@ -102,7 +103,7 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zgetrf_panel_nopiv_percol_diag", + STARPU_NAME, "zgetrf_nopiv_percol_diag", #endif 0); } @@ -111,7 +112,7 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options * Update column blocs */ #if !defined(CHAMELEON_SIMULATION) -static void cl_zgetrf_panel_nopiv_percol_trsm_cpu_func( void *descr[], void *cl_arg ) +static void cl_zgetrf_nopiv_percol_trsm_cpu_func( void *descr[], void *cl_arg ) { CHAM_tile_t *tileA, *tileU; int m, n, k, lda; @@ -144,16 +145,15 @@ static void cl_zgetrf_panel_nopiv_percol_trsm_cpu_func( void *descr[], void *cl_ /* * Codelet definition */ -CODELETS_CPU( zgetrf_panel_nopiv_percol_trsm, cl_zgetrf_panel_nopiv_percol_trsm_cpu_func ); +CODELETS_CPU( zgetrf_nopiv_percol_trsm, cl_zgetrf_nopiv_percol_trsm_cpu_func ); -void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options, +void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options, int m, int n, int k, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *U, int Um, int Un ) { - struct starpu_codelet *codelet = &cl_zgetrf_panel_nopiv_percol_trsm; - // void (*callback)(void*) = options->profiling ? cl_zgetrf_panel_nopiv_percol_trsm_callback : NULL; - void (*callback)(void*) = NULL; + struct starpu_codelet *codelet = &cl_zgetrf_nopiv_percol_trsm; + void (*callback)(void*) = options->profiling ? cl_zgetrf_nopiv_percol_trsm_callback : NULL; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A, Am, An); @@ -171,8 +171,7 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zgetrf_panel_nopiv_percol_trsm", + STARPU_NAME, "zgetrf_nopiv_percol_trsm", #endif 0); } - diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c new file mode 100644 index 0000000000000000000000000000000000000000..e5887b02b7faee4ccd67b3e989b040f87b142f82 --- /dev/null +++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c @@ -0,0 +1,164 @@ +/** + * + * @file starpu/codelet_zgetrf_percol.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zpanel StarPU codelets + * + * @version 1.3.0 + * @comment Codelets to perform panel factorization with partial pivoting + * + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * @precisions normal z -> c d s + * + */ +#include "chameleon_starpu.h" +#include "runtime_codelet_z.h" +#include <coreblas/cblas_wrapper.h> + +CHAMELEON_CL_CB( zgetrf_percol_diag, cti_handle_get_m(task->handles[0]), 0, 0, M ); +CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M ); + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) +{ + int h, m0; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + CHAM_tile_t *tileA; + int *ipiv; + cppi_interface_t *nextpiv; + cppi_interface_t *prevpiv; + + starpu_codelet_unpack_args( cl_arg, &h, &m0, + &sequence, &request ); + + tileA = cti_interface_get(descr[0]); + ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[1]); + nextpiv = (cppi_interface_t*) descr[2]; + prevpiv = (cppi_interface_t*) descr[3]; + + if ( h > 0 ) { + cppi_display_dbg( prevpiv, stderr, "Prevpiv before call: " ); + } + if ( h < tileA->n ) { + cppi_display_dbg( nextpiv, stderr, "Nextpiv before call: " ); + } + + /* + * Make sure the nextpiv interface store the right information about the + * column and diagonal row for the reduction + */ + nextpiv->h = h; + nextpiv->has_diag = 1; + + CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0, + CHAM_tile_get_ptr( tileA ), tileA->ld, + ipiv, &(nextpiv->pivot), &(prevpiv->pivot) ); + + if ( h > 0 ) { + cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " ); + } + if ( h < tileA->n ) { + cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " ); + } +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func ); + +void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + struct starpu_codelet *codelet = &cl_zgetrf_percol_diag; + void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_diag_callback : NULL; + + int access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; + int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + + rt_starpu_insert_task( + codelet, + STARPU_VALUE, &h, sizeof(int), + STARPU_VALUE, &m0, sizeof(int), + STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), + STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), + access_npiv, RUNTIME_pivot_getaddr( ipiv, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, + STARPU_EXECUTE_ON_WORKER, options->workerid, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zgetrf_percol_diag", +#endif + 0); +} + +#if !defined(CHAMELEON_SIMULATION) +static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg) +{ + int h, m0; + RUNTIME_sequence_t *sequence; + RUNTIME_request_t *request; + CHAM_tile_t *tileA; + cppi_interface_t *nextpiv; + cppi_interface_t *prevpiv; + + starpu_codelet_unpack_args( cl_arg, &h, &m0, &sequence, &request ); + + tileA = cti_interface_get(descr[0]); + nextpiv = (cppi_interface_t*) descr[1]; + prevpiv = (cppi_interface_t*) descr[2]; + + nextpiv->h = h; /* Initialize in case it uses a copy */ + + CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0, + CHAM_tile_get_ptr(tileA), tileA->ld, + &(nextpiv->pivot), &(prevpiv->pivot) ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + +/* + * Codelet definition + */ +CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func) + +void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, + int h, int m0, + CHAM_desc_t *A, int Am, int An, + CHAM_ipiv_t *ipiv ) +{ + struct starpu_codelet *codelet = &cl_zgetrf_percol_offdiag; + + void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL; + + rt_starpu_insert_task( + codelet, + STARPU_VALUE, &h, sizeof(int), + STARPU_VALUE, &m0, sizeof(int), + STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t *), + STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t *), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_REDUX, RUNTIME_pivot_getaddr( ipiv, An, h ), + STARPU_R, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, + STARPU_EXECUTE_ON_WORKER, options->workerid, +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "zgetrf_percol_offdiag", +#endif + 0); +} diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c index 450fecf315ff69f85e62c2b790eb6fee6cfe30a4..62c89a00f70c160a8c0ceef38ad78a086189923e 100644 --- a/runtime/starpu/control/runtime_control.c +++ b/runtime/starpu/control/runtime_control.c @@ -11,7 +11,7 @@ * * @brief Chameleon StarPU control routines * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Cedric Augonnet * @author Cedric Castagnede @@ -21,7 +21,7 @@ * @author Matthieu Kuhn * @author Loris Lucido * @author Terry Cojean - * @date 2023-01-30 + * @date 2023-08-22 * */ #include "chameleon_starpu.h" @@ -232,6 +232,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt, #endif starpu_cham_tile_interface_init(); + cppi_interface_init(); chameleon_starpu_parallel_worker_init( sched_opt ); return hres; diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index ee4817fe4cc35abf5a73baffad8b224fcf79452a..2ed4ba05a14db73dc624e812b14ad36e6769d70d 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -20,26 +20,11 @@ * @author Raphael Boucherie * @author Samuel Thibault * @author Loris Lucido - * @date 2023-07-06 + * @date 2023-08-22 * */ #include "chameleon_starpu.h" -/** - * Set the tag sizes - */ -#if defined(CHAMELEON_USE_MPI) - -#ifndef HAVE_STARPU_MPI_DATA_REGISTER -#define starpu_mpi_data_register( handle_, tag_, owner_ ) \ - do { \ - starpu_data_set_rank( (handle_), (owner_) ); \ - starpu_data_set_tag( (handle_), (tag_) ); \ - } while(0) -#endif - -#endif - /** * Malloc/Free of the data */ @@ -289,42 +274,6 @@ void RUNTIME_flush() #endif } -/** - * Different implementations of the flush call based on StarPU version - */ -#if defined(HAVE_STARPU_DATA_WONT_USE) - -static inline void -chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) { - starpu_data_wont_use( handle ); -} - -#elif defined(HAVE_STARPU_IDLE_PREFETCH) - -static inline void -chameleon_starpu_data_flush( void *_handle) -{ - starpu_data_handle_t handle = (starpu_data_handle_t)_handle; - starpu_data_idle_prefetch_on_node(handle, STARPU_MAIN_RAM, 1); - starpu_data_release_on_node(handle, -1); -} - -static inline void -chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) { - starpu_data_acquire_on_node_cb( handle, -1, STARPU_R, - chameleon_starpu_data_flush, handle ); -} - -#else - -static inline void -chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) { - starpu_data_acquire_cb( handle, STARPU_R, - (void (*)(void*))&starpu_data_release, handle ); -} - -#endif - void RUNTIME_desc_flush( const CHAM_desc_t *desc, const RUNTIME_sequence_t *sequence ) { diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c new file mode 100644 index 0000000000000000000000000000000000000000..4131f7d6c79858624ed0b324f6785aebfb195d7e --- /dev/null +++ b/runtime/starpu/control/runtime_descriptor_ipiv.c @@ -0,0 +1,306 @@ +/** + * + * @file starpu/runtime_descriptor_ipiv.c + * + * @copyright 2022-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU descriptor routines + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * + */ +#include "chameleon_starpu.h" + +/** + * Create ws_pivot runtime structures + */ +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) +{ + assert( ipiv ); + + ipiv->ipiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) ); + ipiv->nextpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) ); + ipiv->prevpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) ); +#if defined(CHAMELEON_USE_MPI) + /* + * Book the number of tags required to describe pivot structure + * One per handle type + */ + { + chameleon_starpu_tag_init(); + ipiv->mpitag_ipiv = chameleon_starpu_tag_book( (int64_t)(ipiv->mt) * 3 ); + if ( ipiv->mpitag_ipiv == -1 ) { + chameleon_fatal_error("RUNTIME_ipiv_create", "Can't pursue computation since no more tags are available for ipiv structure"); + return; + } + ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv + ipiv->mt; + ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + ipiv->mt; + } +#endif +} + +/** + * Destroy ws_pivot runtime structures + */ +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) +{ + int i; + starpu_data_handle_t *ipiv_handle = (starpu_data_handle_t*)(ipiv->ipiv); + starpu_data_handle_t *nextpiv_handle = (starpu_data_handle_t*)(ipiv->nextpiv); + starpu_data_handle_t *prevpiv_handle = (starpu_data_handle_t*)(ipiv->prevpiv); + + for(i=0; i<ipiv->mt; i++) { + if ( *ipiv_handle != NULL ) { + starpu_data_unregister( *ipiv_handle ); + *ipiv_handle = NULL; + } + ipiv_handle++; + + if ( *nextpiv_handle != NULL ) { + starpu_data_unregister( *nextpiv_handle ); + *nextpiv_handle = NULL; + } + nextpiv_handle++; + + if ( *prevpiv_handle != NULL ) { + starpu_data_unregister( *prevpiv_handle ); + *prevpiv_handle = NULL; + } + prevpiv_handle++; + } + + free( ipiv->ipiv ); + free( ipiv->nextpiv ); + free( ipiv->prevpiv ); + chameleon_starpu_tag_release( ipiv->mpitag_ipiv ); +} + +void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +{ + starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv); + int64_t mm = m + (ipiv->i / ipiv->mb); + + handle += mm; + assert( handle ); + + if ( *handle != NULL ) { + return *handle; + } + + const CHAM_desc_t *A = ipiv->desc; + int owner = A->get_rankof( A, m, m ); + int ncols = (mm == (ipiv->mt-1)) ? ipiv->m - mm * ipiv->mb : ipiv->mb; + + starpu_vector_data_register( handle, -1, (uintptr_t)NULL, ncols, sizeof(int) ); + +#if defined(CHAMELEON_USE_MPI) + { + int64_t tag = ipiv->mpitag_ipiv + mm; + starpu_mpi_data_register( *handle, tag, owner ); + } +#endif /* defined(CHAMELEON_USE_MPI) */ + + assert( *handle ); + return *handle; +} + +void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv); + int64_t mm = m + (ipiv->i / ipiv->mb); + + nextpiv += mm; + assert( nextpiv ); + + if ( *nextpiv != NULL ) { + return *nextpiv; + } + + const CHAM_desc_t *A = ipiv->desc; + int owner = A->get_rankof( A, m, m ); + int ncols = (mm == (ipiv->mt-1)) ? ipiv->m - mm * ipiv->mb : ipiv->mb; + int64_t tag = ipiv->mpitag_nextpiv + mm; + + cppi_register( nextpiv, A->dtyp, ncols, tag, owner ); + + assert( *nextpiv ); + return *nextpiv; +} + +void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +{ + starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv); + int64_t mm = m + (ipiv->i / ipiv->mb); + + prevpiv += mm; + assert( prevpiv ); + + if ( *prevpiv != NULL ) { + return *prevpiv; + } + + const CHAM_desc_t *A = ipiv->desc; + int owner = A->get_rankof( A, m, m ); + int ncols = (mm == (ipiv->mt-1)) ? ipiv->m - mm * ipiv->mb : ipiv->mb; + int64_t tag = ipiv->mpitag_prevpiv + mm; + + cppi_register( prevpiv, A->dtyp, ncols, tag, owner ); + + assert( *prevpiv ); + return *prevpiv; +} + +void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + starpu_data_handle_t *handle; + const CHAM_desc_t *A = ipiv->desc; + int64_t mm = m + ( ipiv->i / ipiv->mb ); + + handle = (starpu_data_handle_t*)(ipiv->nextpiv); + handle += mm; + + if ( *handle != NULL ) { +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) +#endif + { + chameleon_starpu_data_wont_use( *handle ); + } + } + + handle = (starpu_data_handle_t*)(ipiv->prevpiv); + handle += mm; + + if ( *handle != NULL ) { +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) +#endif + { + chameleon_starpu_data_wont_use( *handle ); + } + } + + (void)sequence; + (void)ipiv; + (void)m; +} + +void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ) +{ + int m; + + for (m = 0; m < ipiv->mt; m++) + { + RUNTIME_ipiv_flushk( sequence, ipiv, m ); + } +} + +void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, + CHAM_ipiv_t *ipiv, int k, int h ) +{ + starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, k, h ); + starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( ipiv, k, h-1 ); + + if ( h < ipiv->n ) { +#if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI) +#if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP) + starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD, nextpiv, + options->priority, 2 /* Binary tree */ ); +#endif +#endif + } + + /* Invalidate the previous pivot structure for correct initialization in later reuse */ + if ( h > 0 ) { + starpu_data_invalidate_submit( prevpiv ); + } + + (void)options; +} + +static void cl_ipiv_init_cpu_func(void *descr[], void *cl_arg) +{ + int *ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + +#if !defined(CHAMELEON_SIMULATION) + { + int i, m0, n; + starpu_codelet_unpack_args( cl_arg, &m0, &n ); + + for( i=0; i<n; i++ ) { + ipiv[i] = m0 + i + 1; + } + } +#endif +} + +struct starpu_codelet cl_ipiv_init = { + .where = STARPU_CPU, + .cpu_func = cl_ipiv_init_cpu_func, + .nbuffers = 1, +}; + +void RUNTIME_ipiv_init( CHAM_ipiv_t *ipiv ) +{ + int64_t mt = ipiv->mt; + int64_t mb = ipiv->mb; + int m; + + for (m = 0; m < mt; m++) { + starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m ); + int m0 = m * mb; + int n = (m == (mt-1)) ? ipiv->m - m0 : mb; + + rt_starpu_insert_task( + &cl_ipiv_init, + STARPU_VALUE, &m0, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_W, ipiv_src, + 0); + } +} + +void RUNTIME_ipiv_gather( CHAM_ipiv_t *desc, int *ipiv, int node ) +{ + int64_t mt = desc->mt; + int64_t mb = desc->mb; + int64_t tag = chameleon_starpu_tag_book( (int64_t)(desc->mt) ); + int rank = CHAMELEON_Comm_rank(); + int m; + + for (m = 0; m < mt; m++, ipiv += mb) { + starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( desc, m ); + +#if defined(CHAMELEON_USE_MPI) + if ( (rank == node) || + (rank == starpu_mpi_data_get_rank(ipiv_src)) ) +#endif + { + starpu_data_handle_t ipiv_dst; + int ncols = (m == (mt-1)) ? desc->m - m * mb : mb; + uintptr_t ipivptr = (rank == node) ? (uintptr_t)ipiv : 0; + int home_node = (rank == node) ? STARPU_MAIN_RAM : -1; + + starpu_vector_data_register( &ipiv_dst, home_node, ipivptr, ncols, sizeof(int) ); + +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_data_register( ipiv_dst, tag + m, 0 ); +#endif /* defined(CHAMELEON_USE_MPI) */ + + assert( ipiv_dst ); + + starpu_data_cpy( ipiv_dst, ipiv_src, 0, NULL, NULL ); + starpu_data_unregister( ipiv_dst ); + } + } +} diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index 8d421ddbb5234d073ba9ea71b3c97ff3046ff9fc..4d21fe0d6e03be17be9d04bf5a3b680ee00cfc19 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -19,7 +19,8 @@ * @author Samuel Thibault * @author Loris Lucido * @author Terry Cojean - * @date 2023-07-06 + * @author Matthieu Kuhn + * @date 2023-08-22 * */ #ifndef _chameleon_starpu_h_ @@ -95,6 +96,7 @@ #include "runtime_codelet_profile.h" #include "runtime_workspace.h" #include "cham_tile_interface.h" +#include "cppi_interface.h" typedef struct starpu_schedopt_s { @@ -166,6 +168,9 @@ void chameleon_starpu_tag_release( int64_t min ); void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, cham_flttype_t dtyp); +#include "runtime_mpi.h" +#include "runtime_wontuse.h" + #if defined(CHAMELEON_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE) static inline int chameleon_starpu_data_iscached(const CHAM_desc_t *A, int m, int n) diff --git a/runtime/starpu/include/cppi_interface.h b/runtime/starpu/include/cppi_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..537bc9cd807c9e27f0cf550d6611e2bc974255d3 --- /dev/null +++ b/runtime/starpu/include/cppi_interface.h @@ -0,0 +1,90 @@ +/** + * + * @file starpu/cppi_interface.h + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Header to describe the Chameleon pivot panel interface in StarPU + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * + */ +#ifndef _cppi_interface_h_ +#define _cppi_interface_h_ + +#include "chameleon_starpu.h" +#include <starpu_data.h> + +extern struct starpu_data_interface_ops cppi_ops; +#define CPPI_INTERFACE_ID cppi_ops.interfaceid + +struct cppi_interface_s; +typedef struct cppi_interface_s cppi_interface_t; + +/** + * Chameleon pivot interface + */ +struct cppi_interface_s +{ + CHAM_pivot_t pivot; /**< Copy of the pivot structure */ + size_t arraysize; /**< Allocated size */ + cham_flttype_t flttype; /**< Type of the elements of the matrix */ + int has_diag; /**< Bool to determine if pivot corresponds to diagonal block of current panel */ + int h; /**< Index of the current column being factorized */ + int n; /**< Number of elements in each row */ + enum starpu_data_interface_id id; /**< Identifier of the interface */ +}; + +void cppi_interface_init(); +void cppi_interface_fini(); + +void cppi_register( starpu_data_handle_t *handleptr, + cham_flttype_t flttype, + int n, + int64_t data_tag, + int data_rank ); + +void cl_cppi_redux_cpu_func( void *descr[], void *cl_arg ); + +#if defined(CHAMELEON_DEBUG_STARPU_CPPI_INTERFACE) +static inline void +cppi_display_dbg( cppi_interface_t *cppi_interface, FILE *f, const char *title ) +{ + int i; + double *diagrow, *pivrow; + diagrow = cppi_interface->pivot.diagrow; + pivrow = cppi_interface->pivot.pivrow; + + fprintf( f, "%sn=%2d, h=%2d, has_diag=%2d, m0=%2d, idx=%2d\n", + title, + cppi_interface->n, + cppi_interface->h, + cppi_interface->has_diag, + cppi_interface->pivot.blkm0, + cppi_interface->pivot.blkidx ); + + fprintf(stderr, "Diagonal row: " ); + for( i=0; i<cppi_interface->n; i++) { + fprintf(stderr, "%e ", diagrow[i] ); + } + fprintf(stderr, "\n" ); + fprintf(stderr, "Piv row: " ); + for( i=0; i<cppi_interface->n; i++) { + fprintf(stderr, "%e ", pivrow[i] ); + } + fprintf(stderr, "\n" ); +} +#else +static inline void +cppi_display_dbg( cppi_interface_t *, FILE *, const char * ) +{ + return; +} +#endif +#endif /* _cppi_interface_h_ */ diff --git a/runtime/starpu/include/runtime_mpi.h b/runtime/starpu/include/runtime_mpi.h new file mode 100644 index 0000000000000000000000000000000000000000..6d307bc6ae597ec075caf05c7dcbd382a16c4043 --- /dev/null +++ b/runtime/starpu/include/runtime_mpi.h @@ -0,0 +1,41 @@ +/** + * + * @file starpu/runtime_mpi.h + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU mpi function implementation + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-08-22 + * + */ +#ifndef _runtime_mpi_h_ +#define _runtime_mpi_h_ + +/** + * Set the tag sizes + */ +#if defined(CHAMELEON_USE_MPI) + +#if !defined(HAVE_STARPU_MPI_DATA_REGISTER) +static inline starpu_mpi_data_register( starpu_data_handle_t handle, int64_t tag, int owner ) +{ + starpu_data_set_rank( handle, owner ); + starpu_data_set_tag( handle, tag ); +} +#endif + +#else + +static inline starpu_mpi_data_register( starpu_data_handle_t, int64_t, int ) +{ +} + +#endif + +#endif /* _runtime_mpi_h_ */ diff --git a/runtime/starpu/include/runtime_wontuse.h b/runtime/starpu/include/runtime_wontuse.h new file mode 100644 index 0000000000000000000000000000000000000000..c5b1526d84156cbb004aa39fe114a8e395f6b32d --- /dev/null +++ b/runtime/starpu/include/runtime_wontuse.h @@ -0,0 +1,57 @@ +/** + * + * @file starpu/runtime_wontuse.h + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU wont use implementations to flush pieces of data + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2023-08-22 + * + */ +#ifndef _runtime_wontuse_h_ +#define _runtime_wontuse_h_ + +#include "chameleon_starpu.h" + +/** + * Different implementations of the flush call based on StarPU version + */ +#if defined(HAVE_STARPU_DATA_WONT_USE) + +static inline void +chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) { + starpu_data_wont_use( handle ); +} + +#elif defined(HAVE_STARPU_IDLE_PREFETCH) + +static inline void +chameleon_starpu_data_flush( void *_handle) +{ + starpu_data_handle_t handle = (starpu_data_handle_t)_handle; + starpu_data_idle_prefetch_on_node( handle, STARPU_MAIN_RAM, 1 ); + starpu_data_release_on_node( handle, -1 ); +} + +static inline void +chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) { + starpu_data_acquire_on_node_cb( handle, -1, STARPU_R, + chameleon_starpu_data_flush, handle ); +} + +#else + +static inline void +chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) { + starpu_data_acquire_cb( handle, STARPU_R, + (void (*)(void*))&starpu_data_release, handle ); +} + +#endif +#endif /* _runtime_wontuse_h_ */ diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c index 1e837e1b0ca4c4a5e8418e9acf86dcfb71029851..1a048178623ea2dea461e01218ad4dfd93bfb86f 100644 --- a/runtime/starpu/interface/cham_tile_interface.c +++ b/runtime/starpu/interface/cham_tile_interface.c @@ -13,7 +13,7 @@ * @author Mathieu Faverge * @author Gwenole Lucas * @author Samuel Thibault - * @date 2023-07-06 + * @date 2023-08-22 * */ #include "chameleon_starpu.h" @@ -115,7 +115,7 @@ cti_init( void *data_interface ) static void cti_register_data_handle( starpu_data_handle_t handle, - unsigned int home_node, + int home_node, void *data_interface ) { starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) data_interface; diff --git a/runtime/starpu/interface/cppi_interface.c b/runtime/starpu/interface/cppi_interface.c new file mode 100644 index 0000000000000000000000000000000000000000..2249f88b135e7152b751d26d4510fdd8e95c41a7 --- /dev/null +++ b/runtime/starpu/interface/cppi_interface.c @@ -0,0 +1,529 @@ +/** + * + * @file starpu/cppi_interface.c + * + * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon pivot panel interface for StarPU + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-22 + * + */ +#include "chameleon_starpu.h" +#undef HAVE_STARPU_REUSE_DATA_ON_NODE + +static inline CHAM_pivot_t * +cppi_handle_get( starpu_data_handle_t handle ) +{ + cppi_interface_t *cppi_interface = (cppi_interface_t *) + starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM ); + +#if defined(STARPU_DEBUG) + STARPU_ASSERT_MSG( cppi_interface->id == CPPI_INTERFACE_ID, + "Error. The given data is not a CHAM_pivot interface." ); +#endif + + return &(cppi_interface->pivot); +} + +static void +cppi_init( void *data_interface ) +{ + cppi_interface_t *cppi_interface = (cppi_interface_t *)data_interface; + cppi_interface->id = CPPI_INTERFACE_ID; + cppi_interface->h = -1; + cppi_interface->has_diag = 0; +} + +static void +cppi_register_data_handle( starpu_data_handle_t handle, + int home_node, + void *data_interface ) +{ + cppi_interface_t *cppi_interface = + (cppi_interface_t *) data_interface; + int node; + + for (node = 0; node < STARPU_MAXNODES; node++) + { + cppi_interface_t *local_interface = (cppi_interface_t *) + starpu_data_get_interface_on_node(handle, node); + + memcpy( local_interface, cppi_interface, + sizeof( cppi_interface_t ) ); + + if ( node != home_node ) + { + local_interface->pivot.pivrow = NULL; + local_interface->pivot.diagrow = NULL; + } + } +} + +static starpu_ssize_t +cppi_allocate_data_on_node( void *data_interface, unsigned node ) +{ + cppi_interface_t *cppi_interface = (cppi_interface_t *) data_interface; + starpu_ssize_t requested_memory = cppi_interface->arraysize * 2; + void *dataptr = NULL; + + dataptr = (void*) starpu_malloc_on_node( node, requested_memory ); + if ( !dataptr ) { + return -ENOMEM; + } + + /* WARNING: Should not be a memset if GPU */ + //memset ((void*) dataptr, 0, requested_memory ); + + /* update the data properly in consequence */ + cppi_interface->h = -1; + cppi_interface->has_diag = 0; + cppi_interface->pivot.pivrow = dataptr; + cppi_interface->pivot.diagrow = ((char*)dataptr) + cppi_interface->arraysize; + memset( cppi_interface->pivot.pivrow, 0, cppi_interface->arraysize * 2 ); + + return requested_memory; +} + +static void +cppi_free_data_on_node( void *data_interface, unsigned node ) +{ + cppi_interface_t *cppi_interface = (cppi_interface_t *) data_interface; + starpu_ssize_t requested_memory = cppi_interface->arraysize * 2; + + starpu_free_on_node( node, (uintptr_t)(cppi_interface->pivot.pivrow), requested_memory ); + + cppi_interface->pivot.pivrow = NULL; + cppi_interface->pivot.diagrow = NULL; +} + +#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE) +static void +cppi_reuse_data_on_node( void *dst_data_interface, const void *cached_interface, unsigned node ) +{ + (void)node; + cppi_interface_t *dst_pivot = (cppi_interface_t *)dst_data_interface; + cppi_interface_t *src_pivot = (cppi_interface_t *)cached_interface; + + /* update the data properly */ + dst_pivot->has_diag = 0; //src_pivot->has_diag; + dst_pivot->h = -1; //src_pivot->h; + dst_pivot->n = src_pivot->n; + dst_pivot->pivot = src_pivot->pivot; +} +#endif + +static size_t +cppi_get_size(starpu_data_handle_t handle) +{ + cppi_interface_t *cppi_interface = + starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM ); + size_t size; + +#if defined(STARPU_DEBUG) + STARPU_ASSERT_MSG( cppi_interface->id == CPPI_INTERFACE_ID, + "Error. The given data is not a pivot interface." ); +#endif + + size = cppi_interface->arraysize * 2 + 4 * sizeof(int); + return size; +} + +#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE) +static size_t +cppi_get_alloc_size(starpu_data_handle_t handle) +{ + cppi_interface_t *cppi_interface = + starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM ); + +#if defined(STARPU_DEBUG) + STARPU_ASSERT_MSG( cppi_interface->id == CPPI_INTERFACE_ID, + "Error. The given data is not a pivot interface." ); +#endif + + return cppi_interface->arraysize * 2; +} +#endif + +static uint32_t +cppi_footprint( starpu_data_handle_t handle ) +{ + cppi_interface_t *cppi_interface = + starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM ); + + return starpu_hash_crc32c_be( 2., cppi_interface->n ); +} + +#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE) +static uint32_t +cppi_alloc_footprint( starpu_data_handle_t handle ) +{ + return starpu_hash_crc32c_be( cti_handle_get_allocsize(handle), 0 ); +} +#endif + +static int +cppi_compare( void *data_interface_a, void *data_interface_b ) +{ + cppi_interface_t *cppi_interface_a = (cppi_interface_t *) data_interface_a; + cppi_interface_t *cppi_interface_b = (cppi_interface_t *) data_interface_b; + + /* Two matrices are considered compatible if they have the same size and the same flttype */ + return ( ( cppi_interface_a->n == cppi_interface_b->n ) && + ( cppi_interface_a->flttype == cppi_interface_b->flttype) ); +} + +#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE) +static int +cppi_alloc_compare(void *data_interface_a, void *data_interface_b) +{ + cppi_interface_t *cppi_a = (cppi_interface_t *) data_interface_a; + cppi_interface_t *cppi_b = (cppi_interface_t *) data_interface_b; + + /* Two matrices are considered compatible if they have the same allocated size */ + return ( cppi_a->arraysize == cppi_b->arraysize ); +} +#endif + +static void +cppi_display( starpu_data_handle_t handle, FILE *f ) +{ + cppi_interface_t *cppi_interface = (cppi_interface_t *) handle; + + fprintf( f, "%d\t%d\t%d\t%d\n", + cppi_interface->n, + cppi_interface->h, + cppi_interface->pivot.blkm0, + cppi_interface->pivot.blkidx ); +} + +static int +cppi_pack_data( starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count ) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + cppi_interface_t *cppi_interface = (cppi_interface_t *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + + *count = cppi_get_size( handle ); + + if ( ptr != NULL ) + { + int *tmp; + *ptr = (void *)starpu_malloc_on_node_flags( node, *count, 0 ); + tmp = (int*)(*ptr); + + /* Copy the tile metadata */ + tmp[0] = cppi_interface->has_diag; + tmp[1] = cppi_interface->h; + tmp[2] = cppi_interface->pivot.blkm0; + tmp[3] = cppi_interface->pivot.blkidx; + tmp += 4; + + memcpy( tmp, cppi_interface->pivot.pivrow, cppi_interface->arraysize * 2 ); + } + + return 0; +} + +static int +cppi_peek_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t count ) +{ + STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); + + cppi_interface_t *cppi_interface = (cppi_interface_t *) + starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + int *tmp = ptr; + size_t size = cppi_interface->arraysize * 2; + +#if defined(CHAMELEON_USE_MPI_DATATYPES) && 0 + /* + * We may end up here if an early reception occured before the handle of the + * received data has been registered. Thus, datatype was not existant and we + * need to unpack the data ourselves + */ + STARPU_ASSERT( count == (size + 4 * sizeof(int)) ); + +#else + + cppi_interface->has_diag = tmp[0]; + cppi_interface->h = tmp[1]; + cppi_interface->pivot.blkm0 = tmp[2]; + cppi_interface->pivot.blkidx = tmp[3]; + tmp += 4; + + memcpy( cppi_interface->pivot.pivrow, tmp, size ); + +#endif + return 0; +} + +static int +cppi_unpack_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t count ) +{ + cppi_peek_data( handle, node, ptr, count ); + + /* Free the received information */ + starpu_free_on_node_flags( node, (uintptr_t)ptr, count, 0 ); + + return 0; +} + +static starpu_ssize_t +cppi_describe( void *data_interface, char *buf, size_t size ) +{ + cppi_interface_t *cppi_interface = (cppi_interface_t *) data_interface; + + return snprintf( buf, size, "Pivot structure, n %d, blkm0 %d, blkidx %d", + cppi_interface->n, + cppi_interface->pivot.blkm0, + cppi_interface->pivot.blkidx ); +} + +static int +cppi_copy_any_to_any( void *src_interface, unsigned src_node, + void *dst_interface, unsigned dst_node, void *async_data ) +{ + cppi_interface_t *cppi_interface_src = (cppi_interface_t *) src_interface; + cppi_interface_t *cppi_interface_dst = (cppi_interface_t *) dst_interface; + size_t size; + int ret = 0; + + STARPU_ASSERT( cppi_interface_src->n == cppi_interface_dst->n ); + STARPU_ASSERT( cppi_interface_src->flttype == cppi_interface_dst->flttype ); + + cppi_interface_dst->h = cppi_interface_src->h; + cppi_interface_dst->pivot.blkm0 = cppi_interface_src->pivot.blkm0; + cppi_interface_dst->pivot.blkidx = cppi_interface_src->pivot.blkidx; + + void *src_mat = cppi_interface_src->pivot.pivrow; + void *dst_mat = cppi_interface_dst->pivot.pivrow; + + size = cppi_interface_src->arraysize * 2; + + if ( starpu_interface_copy( (uintptr_t) src_mat, 0, src_node, + (uintptr_t) dst_mat, 0, dst_node, + size, async_data ) ) + { + ret = -EAGAIN; + } + + starpu_interface_data_copy( src_node, dst_node, size ); + + return ret; +} + +static const struct starpu_data_copy_methods cppi_copy_methods = +{ + .any_to_any = cppi_copy_any_to_any, +}; + +struct starpu_data_interface_ops cppi_ops = +{ + .init = cppi_init, + .register_data_handle = cppi_register_data_handle, + .allocate_data_on_node = cppi_allocate_data_on_node, + .free_data_on_node = cppi_free_data_on_node, +#if defined(HAVE_STARPU_REUSE_DATA_ON_NODE) + .reuse_data_on_node = cppi_reuse_data_on_node, + .alloc_compare = cppi_alloc_compare, + .alloc_footprint = cppi_alloc_footprint, +#endif + .get_size = cppi_get_size, + .footprint = cppi_footprint, + .compare = cppi_compare, + .display = cppi_display, + .pack_data = cppi_pack_data, +#if defined(HAVE_STARPU_DATA_PEEK) + .peek_data = cppi_peek_data, +#endif + .unpack_data = cppi_unpack_data, + .describe = cppi_describe, + .copy_methods =&cppi_copy_methods, + .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, + .interface_size = sizeof(cppi_interface_t), + .name = "CPPI_INTERFACE" +}; + + +static int compare_pivots( cham_flttype_t type, int h, void * redux, void * input ){ + if ( type == ChamRealFloat ) + { + float *valredux = redux; + float *valinput = input; + return fabsf( valredux[h] ) < fabsf( valinput[h] ); + } + else if ( type == ChamRealDouble ) + { + double *valredux = redux; + double *valinput = input; + return fabs( valredux[h] ) < fabs( valinput[h] ); + } + else if (type == ChamComplexFloat) + { + CHAMELEON_Complex32_t *valredux = redux; + CHAMELEON_Complex32_t *valinput = input; + return cabsf( valredux[h] ) < cabsf( valinput[h] ); + } + else if (type == ChamComplexDouble) + { + CHAMELEON_Complex64_t *valredux = redux; + CHAMELEON_Complex64_t *valinput = input; + return cabs( valredux[h] ) < cabs( valinput[h] ); + } + return 0; +} + +void +cl_cppi_redux_cpu_func(void *descr[], void *cl_arg) +{ + cppi_interface_t *cppi_redux = ((cppi_interface_t *) descr[0]); + cppi_interface_t *cppi_input = ((cppi_interface_t *) descr[1]); + + STARPU_ASSERT( cppi_redux->n == cppi_input->n ); + STARPU_ASSERT( cppi_redux->flttype == cppi_input->flttype ); + STARPU_ASSERT( cppi_redux->arraysize == cppi_input->arraysize ); + + cppi_display_dbg( cppi_input, stderr, "BRed Input: "); + cppi_display_dbg( cppi_redux, stderr, "BRed Inout: "); + + /* Set redux pivot h index to current h index */ + if ( cppi_input->h == -1 ) { + cppi_input->h = cppi_redux->h; + } + if ( cppi_redux->h == -1 ) { + cppi_redux->h = cppi_input->h; + } + assert( cppi_redux->h == cppi_input->h ); + + /* Let's copy the diagonal row if needed */ + if ( cppi_input->has_diag ) { + assert( cppi_redux->has_diag == 0 ); + + memcpy( cppi_redux->pivot.diagrow, + cppi_input->pivot.diagrow, + cppi_input->arraysize ); + cppi_redux->has_diag = 1; + } + + /* + * Let's now select the pivot: + * we have to compare the column entry corresponding to the diagonal element. + */ + { + int h = cppi_redux->h; + void *pivrow_redux = cppi_redux->pivot.pivrow; + void *pivrow_input = cppi_input->pivot.pivrow; + + if( compare_pivots( cppi_redux->flttype, h, pivrow_redux, pivrow_input ) ) + { + cppi_redux->pivot.blkm0 = cppi_input->pivot.blkm0; + cppi_redux->pivot.blkidx = cppi_input->pivot.blkidx; + memcpy( pivrow_redux, + pivrow_input, + cppi_input->arraysize ); + } + } + + cppi_display_dbg( cppi_input, stderr, "ARed Input: "); + cppi_display_dbg( cppi_redux, stderr, "ARed Inout: "); + + return; +} + +/* + * Codelet definition + */ +CODELETS_CPU(cppi_redux, cl_cppi_redux_cpu_func) + +static void +cl_cppi_init_redux_cpu_func( void *descr[], void *cl_arg ) +{ + (void)cl_arg; + /* (void)descr; */ + cppi_interface_t *cppi_redux = ((cppi_interface_t *) descr[0]); + + /* Redux pivot never has diagonal at initialization */ + cppi_redux->has_diag = 0; + cppi_redux->h = -1; + + /* No need to set to 0, as copies will be made to initalize them */ +#if defined(CHAMELEON_DEBUG_STARPU) + size_t size = cppi_redux->arraysize; + memset( cppi_redux->pivot.pivrow, 0, size ); + memset( cppi_redux->pivot.diagrow, 0, size ); +#endif +} + +/* + * Codelet definition + */ +CODELETS_CPU( cppi_init_redux, cl_cppi_init_redux_cpu_func ); + +static void cppi_redux_init( void ) __attribute__( ( constructor ) ); +static void cppi_redux_init( void ) +{ + cl_cppi_init_redux.nbuffers = 1; + cl_cppi_init_redux.modes[0] = STARPU_W; + cl_cppi_init_redux.name = "CPPI ALLREDUX INIT"; + + cl_cppi_redux.nbuffers = 2; + cl_cppi_redux.modes[0] = STARPU_RW | STARPU_COMMUTE; + cl_cppi_redux.modes[1] = STARPU_R; + /* Shoulb be RW | COMMUTE to be an allreduce */ + //cl_cppi_redux.modes[1] = STARPU_RW | STARPU_COMMUTE; + cl_cppi_redux.name = "CPPI ALLREDUX TASK"; +} + +static void +cppi_set_reduction_methods( starpu_data_handle_t handle) +{ + starpu_data_set_reduction_methods( handle , + &cl_cppi_redux, + &cl_cppi_init_redux ); +} + +void +cppi_register( starpu_data_handle_t *handleptr, + cham_flttype_t flttype, + int n, + int64_t data_tag, + int data_rank ) +{ + cppi_interface_t cppi_interface = + { + .id = CPPI_INTERFACE_ID, + .arraysize = n * CHAMELEON_Element_Size( flttype ), + .flttype = flttype, + .has_diag = 0, + .h = -1, + .n = n, + }; + starpu_data_register( handleptr, -1, &cppi_interface, &cppi_ops ); + +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_data_register( *handleptr, data_tag, data_rank ); +#endif /* defined(CHAMELEON_USE_MPI) */ + + cppi_set_reduction_methods( *handleptr ); +} + +void +cppi_interface_init() +{ + if ( cppi_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID ) + { + cppi_ops.interfaceid = starpu_data_interface_get_next_id(); + } + cppi_redux_init(); +} + +void +cppi_interface_fini() +{} diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c index 978017c413f879736b8f1321b081618608c45622..9ee58764c33ef8c6f4c53660ed1dc5be50af599b 100644 --- a/testing/testing_zgetrf.c +++ b/testing/testing_zgetrf.c @@ -15,7 +15,7 @@ * @author Alycia Lisito * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois - * @date 2023-07-05 + * @date 2023-08-22 * @precisions normal z -> c d s * */ @@ -39,19 +39,19 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) int M = run_arg_get_int( args, "M", N ); int LDA = run_arg_get_int( args, "LDA", M ); int seedA = run_arg_get_int( args, "seedA", testing_ialea() ); - cham_diag_t diag = run_arg_get_diag( args, "diag", ChamUnit ); + cham_diag_t diag = run_arg_get_diag( args, "diag", ChamNonUnit ); int minMN = chameleon_min( M, N ); /* Descriptors */ - CHAM_desc_t *descA, *descIPIV; + CHAM_desc_t *descA; + CHAM_ipiv_t *descIPIV; void *ws = NULL; CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); /* Creates the matrices */ parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N ); - CHAMELEON_Desc_Create( - &descIPIV, CHAMELEON_MAT_ALLOC_TILE, ChamInteger, nb, 1, nb, minMN, 1, 0, 0, minMN, 1, CHAMELEON_Comm_size(), 1 ); + CHAMELEON_Ipiv_Create( &descIPIV, descA, NULL ); /* Fills the matrix with random values */ if ( diag == ChamUnit ) { @@ -71,7 +71,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) if ( async ) { hres = CHAMELEON_zgetrf_Tile_Async( descA, descIPIV, ws, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); - CHAMELEON_Desc_Flush( descIPIV, test_data.sequence ); + CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); } else { hres = CHAMELEON_zgetrf_Tile( descA, descIPIV ); @@ -82,19 +82,14 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) /* Checks the factorization and residual */ #if !defined(CHAMELEON_SIMULATION) if ( check ) { - CHAM_desc_t *descA0c, *descIPIVc; + CHAM_desc_t *descA0c; CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE ); - int *ipiv; /* Create A0c as local to rank 0 on all nodes to gather the matrix */ CHAMELEON_Desc_Create_User( &descA0c, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, nb, nb, nb*nb, M, N, 0, 0, M, N, 1, 1, chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL ); - CHAMELEON_Desc_Create_User( - &descIPIVc, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamInteger, - nb, 1, nb, M, 1, 0, 0, M, 1, 1, 1, - chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL ); if ( diag == ChamUnit ) { CHAMELEON_zplgtr_Tile( 0, ChamUpper, descA0c, seedA ); @@ -104,18 +99,21 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) CHAMELEON_zplrnt_Tile( descA0c, seedA ); } - /* Cheat code: float (s) is the same size as int */ - CHAMELEON_slacpy_Tile( ChamUpperLower, descIPIV, descIPIVc ); - ipiv = descIPIVc->mat; - /* Compute the permutation of A0: P * A0 */ if ( CHAMELEON_Comm_rank() == 0 ) { - LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, 1, M, ipiv, 1 ); + int *ipiv; + + ipiv = malloc( minMN * sizeof(int) ); + CHAMELEON_Ipiv_Gather( descIPIV, ipiv, 0 ); + LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, 1, minMN, ipiv, 1 ); + free( ipiv ); + } + else { + CHAMELEON_Ipiv_Gather( descIPIV, NULL, 0 ); } CHAMELEON_zlacpy_Tile( ChamUpperLower, descA0c, descA0 ); CHAMELEON_Desc_Destroy( &descA0c ); - CHAMELEON_Desc_Destroy( &descIPIVc ); hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower, descA0, descA ); @@ -129,7 +127,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) } parameters_desc_destroy( &descA ); - CHAMELEON_Desc_Destroy( &descIPIV ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); return hres; }