diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index a8aaaef56a42b2dbaa25664d86022c51c2f4cd09..2655ef34669642404a79a566db7f0020323f9c65 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -18,7 +18,7 @@ * @author Samuel Thibault * @author Philippe Swartvagher * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2023-08-31 * */ #ifndef _chameleon_runtime_h_ @@ -710,9 +710,11 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ); void RUNTIME_ipiv_init ( CHAM_ipiv_t *ipiv ); void RUNTIME_ipiv_gather ( CHAM_ipiv_t *desc, int *ipiv, int node ); -void *RUNTIME_ipiv_getaddr ( CHAM_ipiv_t *ipiv, int m ); -void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ); -void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ); +void *RUNTIME_ipiv_getaddr ( const CHAM_ipiv_t *ipiv, int m ); +void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ); +void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ); +void *RUNTIME_perm_getaddr ( const CHAM_ipiv_t *ipiv, int m ); +void *RUNTIME_invp_getaddr ( const CHAM_ipiv_t *ipiv, int m ); static inline void * RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) { @@ -730,6 +732,8 @@ void RUNTIME_ipiv_flush ( const CHAM_ipiv_t *ipiv, const RUNTIME_sequence_t *sequence ); void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, CHAM_ipiv_t *ws, int k, int h ); +void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ); /** * @} diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index d7dd07f48dc6fad1cff9359bf3410defdd657357..e995dcbe18321d0b9bfef2c375318cee8c336b1b 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -19,7 +19,7 @@ * @author Samuel Thibault * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois - * @date 2023-08-22 + * @date 2023-08-31 * */ #ifndef _chameleon_struct_h_ @@ -143,13 +143,17 @@ struct chameleon_desc_s { typedef struct chameleon_piv_s { const CHAM_desc_t *desc; /**> Reference descriptor to compute data mapping based on diagonal tiles, and get floating reference type */ - int *data; /**> Pointer to the data */ - void *ipiv; /**> Opaque array of pointers for the runtimes to handle the ipiv array */ - void *nextpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ - void *prevpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ + int *data; /**> Pointer to the data */ + void *ipiv; /**> Opaque array of pointers for the runtimes to handle the ipiv array */ + void *nextpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ + void *prevpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ + void *perm; /**> Opaque array of pointers for the runtimes to handle the temporary permutation array */ + void *invp; /**> Opaque array of pointers for the runtimes to handle the temporary inverse permutation array */ int64_t mpitag_ipiv; /**> Initial mpi tag values for the ipiv handles */ int64_t mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles */ int64_t mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles */ + int64_t mpitag_perm; /**> Initial mpi tag values for the nextpiv handles */ + int64_t mpitag_invp; /**> Initial mpi tag values for the prevpiv handles */ int i; /**> row index to the beginning of the submatrix */ int m; /**> The number of row in the vector ipiv */ int mb; /**> The number of row per block */ diff --git a/runtime/openmp/control/runtime_descriptor_ipiv.c b/runtime/openmp/control/runtime_descriptor_ipiv.c index 03886ca650340279207c8163bc30eac81f4a1054..f10c4156d83f3e50d4b523f3942b0757475b913f 100644 --- a/runtime/openmp/control/runtime_descriptor_ipiv.c +++ b/runtime/openmp/control/runtime_descriptor_ipiv.c @@ -12,7 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2023-08-31 * */ #include "chameleon_openmp.h" @@ -29,7 +29,7 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) (void)ipiv; } -void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) { assert( 0 ); (void)ipiv; @@ -37,7 +37,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) return NULL; } -void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { assert( 0 ); (void)ipiv; @@ -46,7 +46,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return NULL; } -void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { assert( 0 ); (void)ipiv; @@ -55,6 +55,22 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return NULL; } +void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int k ) +{ + assert( 0 ); + (void)ipiv; + (void)k; + return NULL; +} + +void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k ) +{ + assert( 0 ); + (void)ipiv; + (void)k; + return NULL; +} + void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ) { @@ -72,6 +88,15 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, (void)sequence; } +void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)sequence; + (void)ipiv; + (void)m; +} + void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv, int k, int h ) { diff --git a/runtime/parsec/control/runtime_descriptor_ipiv.c b/runtime/parsec/control/runtime_descriptor_ipiv.c index 04a0b791139d5c6a247b25630e126d4a3eb467bf..fefb42abf9aaa65f98e2959bf09ca24779c95a7d 100644 --- a/runtime/parsec/control/runtime_descriptor_ipiv.c +++ b/runtime/parsec/control/runtime_descriptor_ipiv.c @@ -12,7 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2023-08-31 * */ #include "chameleon_parsec.h" @@ -29,7 +29,7 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) (void)ipiv; } -void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) { assert( 0 ); (void)ipiv; @@ -37,7 +37,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) return NULL; } -void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { assert( 0 ); (void)ipiv; @@ -46,7 +46,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return NULL; } -void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { assert( 0 ); (void)ipiv; @@ -55,6 +55,22 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return NULL; } +void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int k ) +{ + assert( 0 ); + (void)ipiv; + (void)k; + return NULL; +} + +void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k ) +{ + assert( 0 ); + (void)ipiv; + (void)k; + return NULL; +} + void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ) { @@ -72,6 +88,15 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, (void)sequence; } +void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)sequence; + (void)ipiv; + (void)m; +} + void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv, int k, int h ) { diff --git a/runtime/quark/control/runtime_descriptor_ipiv.c b/runtime/quark/control/runtime_descriptor_ipiv.c index 34706a55518f95f0e4b229a772534e3f062d05d2..88e8f886e8578f99e066868e6dfb2880fc4035d0 100644 --- a/runtime/quark/control/runtime_descriptor_ipiv.c +++ b/runtime/quark/control/runtime_descriptor_ipiv.c @@ -12,7 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2023-08-31 * */ #include "chameleon_quark.h" @@ -29,7 +29,7 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) (void)ipiv; } -void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) { assert( 0 ); (void)ipiv; @@ -37,7 +37,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) return NULL; } -void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { assert( 0 ); (void)ipiv; @@ -46,7 +46,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return NULL; } -void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { assert( 0 ); (void)ipiv; @@ -55,6 +55,22 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return NULL; } +void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int k ) +{ + assert( 0 ); + (void)ipiv; + (void)k; + return NULL; +} + +void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k ) +{ + assert( 0 ); + (void)ipiv; + (void)k; + return NULL; +} + void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ) { @@ -72,6 +88,15 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, (void)sequence; } +void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); + (void)sequence; + (void)ipiv; + (void)m; +} + void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv, int k, int h ) { diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c index ffe3d1e47a24dbfa3e9986a477df07a95c2cdf32..69f09cb0ae291365ee9aafae8e579662c9ffd387 100644 --- a/runtime/starpu/control/runtime_descriptor_ipiv.c +++ b/runtime/starpu/control/runtime_descriptor_ipiv.c @@ -12,7 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn - * @date 2023-08-22 + * @date 2023-08-31 * */ #include "chameleon_starpu.h" @@ -23,10 +23,16 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) { assert( ipiv ); - - ipiv->ipiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) ); - ipiv->nextpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) ); - ipiv->prevpiv = (void*)calloc( ipiv->mt, sizeof(starpu_data_handle_t) ); + starpu_data_handle_t *handles = calloc( 5 * ipiv->mt, sizeof(starpu_data_handle_t) ); + ipiv->ipiv = handles; + handles += ipiv->mt; + ipiv->nextpiv = handles; + handles += ipiv->mt; + ipiv->prevpiv = handles; + handles += ipiv->mt; + ipiv->perm = handles; + handles += ipiv->mt; + ipiv->invp = handles; #if defined(CHAMELEON_USE_MPI) /* * Book the number of tags required to describe pivot structure @@ -34,13 +40,15 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) */ { chameleon_starpu_tag_init(); - ipiv->mpitag_ipiv = chameleon_starpu_tag_book( (int64_t)(ipiv->mt) * 3 ); + ipiv->mpitag_ipiv = chameleon_starpu_tag_book( (int64_t)(ipiv->mt) * 5 ); if ( ipiv->mpitag_ipiv == -1 ) { chameleon_fatal_error("RUNTIME_ipiv_create", "Can't pursue computation since no more tags are available for ipiv structure"); return; } ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv + ipiv->mt; ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + ipiv->mt; + ipiv->mpitag_perm = ipiv->mpitag_prevpiv + ipiv->mt; + ipiv->mpitag_invp = ipiv->mpitag_perm + ipiv->mt; } #endif } @@ -51,37 +59,26 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) { int i; - starpu_data_handle_t *ipiv_handle = (starpu_data_handle_t*)(ipiv->ipiv); - starpu_data_handle_t *nextpiv_handle = (starpu_data_handle_t*)(ipiv->nextpiv); - starpu_data_handle_t *prevpiv_handle = (starpu_data_handle_t*)(ipiv->prevpiv); - - for(i=0; i<ipiv->mt; i++) { - if ( *ipiv_handle != NULL ) { - starpu_data_unregister( *ipiv_handle ); - *ipiv_handle = NULL; - } - ipiv_handle++; - - if ( *nextpiv_handle != NULL ) { - starpu_data_unregister( *nextpiv_handle ); - *nextpiv_handle = NULL; - } - nextpiv_handle++; + starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv); - if ( *prevpiv_handle != NULL ) { - starpu_data_unregister( *prevpiv_handle ); - *prevpiv_handle = NULL; + for(i=0; i<(5 * ipiv->mt); i++) { + if ( *handle != NULL ) { + starpu_data_unregister( *handle ); + *handle = NULL; } - prevpiv_handle++; + handle++; } free( ipiv->ipiv ); - free( ipiv->nextpiv ); - free( ipiv->prevpiv ); + ipiv->ipiv = NULL; + ipiv->nextpiv = NULL; + ipiv->prevpiv = NULL; + ipiv->perm = NULL; + ipiv->invp = NULL; chameleon_starpu_tag_release( ipiv->mpitag_ipiv ); } -void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) +void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) { starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv); int64_t mm = m + (ipiv->i / ipiv->mb); @@ -110,7 +107,7 @@ void *RUNTIME_ipiv_getaddr( CHAM_ipiv_t *ipiv, int m ) return *handle; } -void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv); int64_t mm = m + (ipiv->i / ipiv->mb); @@ -133,7 +130,7 @@ void *RUNTIME_nextpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return *nextpiv; } -void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) { starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv); int64_t mm = m + (ipiv->i / ipiv->mb); @@ -156,6 +153,64 @@ void *RUNTIME_prevpiv_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) return *prevpiv; } +void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int m ) +{ + starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->perm); + int64_t mm = m + (ipiv->i / ipiv->mb); + + handle += mm; + assert( handle ); + + if ( *handle != NULL ) { + return *handle; + } + + const CHAM_desc_t *A = ipiv->desc; + int owner = A->get_rankof( A, m, m ); + int ncols = ipiv->mb; + + starpu_vector_data_register( handle, -1, (uintptr_t)NULL, ncols, sizeof(int) ); + +#if defined(CHAMELEON_USE_MPI) + { + int64_t tag = ipiv->mpitag_perm + mm; + starpu_mpi_data_register( *handle, tag, owner ); + } +#endif /* defined(CHAMELEON_USE_MPI) */ + + assert( *handle ); + return *handle; +} + +void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int m ) +{ + starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->invp); + int64_t mm = m + (ipiv->i / ipiv->mb); + + handle += mm; + assert( handle ); + + if ( *handle != NULL ) { + return *handle; + } + + const CHAM_desc_t *A = ipiv->desc; + int owner = A->get_rankof( A, m, m ); + int ncols = ipiv->mb; + + starpu_vector_data_register( handle, -1, (uintptr_t)NULL, ncols, sizeof(int) ); + +#if defined(CHAMELEON_USE_MPI) + { + int64_t tag = ipiv->mpitag_invp + mm; + starpu_mpi_data_register( *handle, tag, owner ); + } +#endif /* defined(CHAMELEON_USE_MPI) */ + + assert( *handle ); + return *handle; +} + void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ) { @@ -205,6 +260,44 @@ void RUNTIME_ipiv_flush( const CHAM_ipiv_t *ipiv, } } +void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + starpu_data_handle_t *handle; + const CHAM_desc_t *A = ipiv->desc; + int64_t mm = m + ( ipiv->i / ipiv->mb ); + + handle = (starpu_data_handle_t*)(ipiv->perm); + handle += mm; + + if ( *handle != NULL ) { +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) +#endif + { + chameleon_starpu_data_wont_use( *handle ); + } + } + + handle = (starpu_data_handle_t*)(ipiv->invp); + handle += mm; + + if ( *handle != NULL ) { +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle ); + if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) +#endif + { + chameleon_starpu_data_wont_use( *handle ); + } + } + + (void)sequence; + (void)ipiv; + (void)m; +} + void RUNTIME_ipiv_reducek( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv, int k, int h ) {