diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 584a965969186b0b16ea12122ac8944d6929283e..cdc98668ccf450b9bbfeef9af564836fa85e3af4 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -149,13 +149,13 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, if ( h < minmn ) { /* Reduce globally (between MPI processes) */ - INSERT_TASK_ipiv_reducek( options, ipiv, k, h ); + INSERT_TASK_ipiv_reducek( options, ipiv, k, h, A->myrank ); } } /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); + RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); } /* @@ -198,7 +198,7 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, if ( h < minmn ) { /* Reduce globally (between MPI processes) */ - INSERT_TASK_ipiv_reducek( options, ipiv, k, h ); + INSERT_TASK_ipiv_reducek( options, ipiv, k, h, A->myrank ); } } @@ -206,7 +206,7 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); + RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); } static inline void @@ -266,7 +266,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, assert( j<= minmn ); if ( j < minmn ) { /* Reduce globally (between MPI processes) */ - INSERT_TASK_ipiv_reducek( options, ipiv, k, j ); + INSERT_TASK_ipiv_reducek( options, ipiv, k, j, A->myrank ); } } } @@ -274,7 +274,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); + RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); } /* @@ -330,7 +330,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, assert( j <= minmn ); if ( j < minmn ) { /* Reduce globally (between MPI processes) */ - INSERT_TASK_ipiv_reducek( options, ipiv, k, j ); + INSERT_TASK_ipiv_reducek( options, ipiv, k, j, A->myrank ); } } } @@ -339,7 +339,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); + RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); } static inline void diff --git a/compute/zgetrf.c b/compute/zgetrf.c index a94a05551e554645e23311a20a0f1d2ef7794dd5..508a781254dbf71c53a84853e01184dd526831d6 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -19,6 +19,8 @@ * @author Florent Pruvost * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois + * @author Alycia Lisito + * @author Xavier Lacoste * @date 2024-03-16 * * @precisions normal z -> s d c @@ -88,7 +90,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_cleanenv( algostr ); } - ws->batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 1 ); + ws->batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 0 ); if ( ws->batch_size > CHAMELEON_BATCH_SIZE ) { chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_GETRF_BATCH_SIZE value\n" ); ws->batch_size = CHAMELEON_BATCH_SIZE; @@ -300,7 +302,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) if ( ( ws->alg == ChamGetrfPPivPerColumn ) || ( ws->alg == ChamGetrfPPiv ) ) { - chameleon_ipiv_destroy( &descIPIV ); + chameleon_ipiv_destroy( &descIPIV, &descAt ); } CHAMELEON_zgetrf_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); diff --git a/control/descriptor.h b/control/descriptor.h index 306abe6c5d320076eac9ed7c06aa82d10926aa46..1e0315fae2c70cdec40052e49a58b47c32a46ec9 100644 --- a/control/descriptor.h +++ b/control/descriptor.h @@ -20,6 +20,7 @@ * @author Raphael Boucherie * @author Samuel Thibault * @author Lionel Eyraud-Dubois + * @author Alycia Lisito * @date 2023-08-22 * */ @@ -77,7 +78,7 @@ void chameleon_desc_destroy ( CHAM_desc_t *desc ); int chameleon_desc_check ( const CHAM_desc_t *desc ); int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, void *data ); -void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv ); +void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc ); /** * Internal function to return address of block (m,n) with m,n = block indices diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c index e9631909b89689df5498c29da368298d8753bc40..c3369b7a4126ea0b245eb73ed2d3b547f7f11523 100644 --- a/control/descriptor_ipiv.c +++ b/control/descriptor_ipiv.c @@ -12,6 +12,8 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn + * @author Alycia Lisito + * @author Florent Pruvost * @date 2024-03-16 * *** @@ -73,7 +75,7 @@ int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, void *data ipiv->mt = chameleon_ceil( ipiv->m, ipiv->mb ); /* Create runtime specific structure like registering data */ - RUNTIME_ipiv_create( ipiv ); + RUNTIME_ipiv_create( ipiv, desc ); return rc; } @@ -91,9 +93,10 @@ int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, void *data * The pointer to the ipiv descriptor to destroy. * */ -void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv ) +void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv, + const CHAM_desc_t *desc ) { - RUNTIME_ipiv_destroy( ipiv ); + RUNTIME_ipiv_destroy( ipiv, desc ); } /** @@ -162,7 +165,8 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Ipiv_Destroy(CHAM_ipiv_t **ipivptr) +int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr, + const CHAM_desc_t *desc ) { CHAM_context_t *chamctxt; CHAM_ipiv_t *ipiv; @@ -179,7 +183,7 @@ int CHAMELEON_Ipiv_Destroy(CHAM_ipiv_t **ipivptr) } ipiv = *ipivptr; - chameleon_ipiv_destroy( ipiv ); + chameleon_ipiv_destroy( ipiv, desc ); free(ipiv); *ipivptr = NULL; return CHAMELEON_SUCCESS; diff --git a/include/chameleon.h b/include/chameleon.h index f1d33549595e475ee4bf60514bd08689d5416b40..12c295a7732ef73f1a1fac421bb38be6f0cdd9ea 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -18,6 +18,8 @@ * @author Florent Pruvost * @author Philippe Virouleau * @author Lionel Eyraud-Dubois + * @author Alycia Lisito + * @author Loris Lucido * @date 2024-03-16 * */ @@ -214,11 +216,16 @@ int CHAMELEON_Recursive_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flt blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd, blkrankof_fct_t get_rankof, void* get_rankof_arg ); -int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void *data ); -int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr ); +int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t **ipivptr, + const CHAM_desc_t *desc, + void *data ); +int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr, + const CHAM_desc_t *desc ); int CHAMELEON_Ipiv_Flush ( const CHAM_ipiv_t *ipiv, const RUNTIME_sequence_t *sequence ); -int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, int *ipiv, int root ); +int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, + int *ipiv, + int root ); void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv ); /** diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index e64390f6c2c16d3c6c730748be710075e6e70f21..52993c9a6a8130bc1727a74777511bd03a3f48f3 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -18,6 +18,7 @@ * @author Samuel Thibault * @author Philippe Swartvagher * @author Matthieu Kuhn + * @author Alycia Lisito * @date 2024-03-16 * */ @@ -717,8 +718,10 @@ void RUNTIME_ddisplay_oneprofile (cham_tasktype_t task); void RUNTIME_sdisplay_allprofile (); void RUNTIME_sdisplay_oneprofile (cham_tasktype_t task); -void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv ); -void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ); +void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv, + const CHAM_desc_t *desc ); +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, + const CHAM_desc_t *desc ); void RUNTIME_ipiv_gather ( const RUNTIME_sequence_t *sequence, CHAM_ipiv_t *desc, int *ipiv, int node ); @@ -730,18 +733,18 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ); void *RUNTIME_ipiv_getaddr ( const CHAM_ipiv_t *ipiv, int m ); -void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ); -void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ); +void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ); +void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ); void *RUNTIME_perm_getaddr ( const CHAM_ipiv_t *ipiv, int m ); void *RUNTIME_invp_getaddr ( const CHAM_ipiv_t *ipiv, int m ); static inline void * -RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int m, int h ) { +RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int rank, int k, int h ) { if ( h%2 == 0 ) { - return RUNTIME_nextpiv_getaddr( ipiv, m, -1 ); + return RUNTIME_nextpiv_getaddr( ipiv, rank, k, h ); } else { - return RUNTIME_prevpiv_getaddr( ipiv, m, -1 ); + return RUNTIME_prevpiv_getaddr( ipiv, rank, k, h ); } } diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h index aa21e99d8f85c82b9484da1aa7d599d995b66bb4..99d70dbade30332f9af8ce5397636f8023a10e24 100644 --- a/include/chameleon/tasks.h +++ b/include/chameleon/tasks.h @@ -16,6 +16,7 @@ * @author Cedric Augonnet * @author Florent Pruvost * @author Matthieu Kuhn + * @author Alycia Lisito * @date 2024-03-16 * */ @@ -165,7 +166,7 @@ void INSERT_TASK_hgemm( const RUNTIME_option_t *options, void INSERT_TASK_ipiv_init ( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv ); void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, - CHAM_ipiv_t *ws, int k, int h ); + CHAM_ipiv_t *ws, int k, int h, int rank ); void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options, int m0, int m, int k, const CHAM_ipiv_t *ipivdesc, int ipivk ); diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c index 64e6031391793de8dc829e2ac47eddabfdba7be5..e5dba252a6312d625a825485cc84d0657973f435 100644 --- a/runtime/starpu/codelets/codelet_ipiv.c +++ b/runtime/starpu/codelets/codelet_ipiv.c @@ -12,6 +12,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn + * @author Alycia Lisito * @date 2024-03-16 * */ @@ -62,13 +63,13 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options, } void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, int k, int h ) + CHAM_ipiv_t *ipiv, int k, int h, int rank ) { - starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( ipiv, k, h-1 ); + starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h-1 ); #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI) #if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP) - starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, k, h ); + starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h ); if ( h < ipiv->n ) { starpu_mpi_redux_data_prio_tree( options->sequence->comm, nextpiv, options->priority, 2 /* Binary tree */ ); diff --git a/runtime/starpu/codelets/codelet_zgetrf_batched.c b/runtime/starpu/codelets/codelet_zgetrf_batched.c index 1d4cb37da9bc6099305ddcf9eb4516fb17feaf52..1ead5ec173f734b8538fdd80abe5e34676d37450 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_batched.c +++ b/runtime/starpu/codelets/codelet_zgetrf_batched.c @@ -43,15 +43,16 @@ cl_zgetrf_panel_offdiag_batched_cpu_func( void *descr[], void *cl_arg ) { struct cl_getrf_batched_args_t *clargs = (struct cl_getrf_batched_args_t *) cl_arg; - cppi_interface_t *nextpiv = (cppi_interface_t*) descr[0]; - cppi_interface_t *prevpiv = (cppi_interface_t*) descr[1]; + cppi_interface_t *nextpiv = (cppi_interface_t*) descr[ clargs->tasks_nbr ]; + cppi_interface_t *prevpiv = (cppi_interface_t*) descr[ clargs->tasks_nbr + 1 ]; int i, m, n, h, m0, lda; CHAM_tile_t *tileA; nextpiv->h = clargs->h; + nextpiv->has_diag = chameleon_max( -1, nextpiv->has_diag ); for ( i = 0; i < clargs->tasks_nbr; i++ ) { - tileA = cti_interface_get( descr[ i + 2 ] ); + tileA = cti_interface_get( descr[ i ] ); lda = tileA->ld; m = clargs->m[ i ]; n = clargs->n[ i ]; @@ -77,6 +78,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, int batch_size = ((struct chameleon_pzgetrf_s *)ws)->batch_size; void (*callback)(void*) = NULL; struct cl_getrf_batched_args_t *clargs = *clargs_ptr; + int rankA = A->get_rankof( A, Am, An ); /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; @@ -85,6 +87,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, if ( clargs == NULL ) { clargs = malloc( sizeof( struct cl_getrf_batched_args_t ) ) ; + memset( clargs, 0, sizeof( struct cl_getrf_batched_args_t ) ); clargs->tasks_nbr = 0; clargs->h = h; clargs->cl_name = "zgetrf_panel_offdiag_batched"; @@ -104,13 +107,15 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, A->get_blktile( A, Am, An ) ); if ( clargs->tasks_nbr == batch_size ) { + int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; rt_starpu_insert_task( &cl_zgetrf_panel_offdiag_batched, /* Task codelet arguments */ STARPU_CL_ARGS, clargs, sizeof(struct cl_getrf_batched_args_t), - STARPU_REDUX, RUNTIME_pivot_getaddr( ipiv, An, h ), - STARPU_R, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, @@ -132,18 +137,21 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, { void (*callback)(void*) = NULL; struct cl_getrf_batched_args_t *clargs = *clargs_ptr; + int rankA = A->myrank; if ( clargs == NULL ) { return; } + int access_npiv = ( clargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + int access_ppiv = ( clargs->h == 0 ) ? STARPU_NONE : STARPU_R; rt_starpu_insert_task( &cl_zgetrf_panel_offdiag_batched, /* Task codelet arguments */ STARPU_CL_ARGS, clargs, sizeof(struct cl_getrf_batched_args_t), - STARPU_REDUX, RUNTIME_pivot_getaddr( ipiv, An, clargs->h ), - STARPU_R, RUNTIME_pivot_getaddr( ipiv, An, clargs->h-1 ), STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h-1 ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, @@ -162,20 +170,27 @@ cl_zgetrf_panel_blocked_batched_cpu_func( void *descr[], void *cl_arg ) { struct cl_getrf_batched_args_t *clargs = ( struct cl_getrf_batched_args_t * ) cl_arg; - int *ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[clargs->tasks_nbr]); - cppi_interface_t *nextpiv = (cppi_interface_t*) descr[clargs->tasks_nbr + 1]; - cppi_interface_t *prevpiv = (cppi_interface_t*) descr[clargs->tasks_nbr + 2]; + int *ipiv; + cppi_interface_t *nextpiv = (cppi_interface_t*) descr[clargs->tasks_nbr ]; + cppi_interface_t *prevpiv = (cppi_interface_t*) descr[clargs->tasks_nbr + 1]; int i, h, ib; CHAM_tile_t *tileA, *tileU; CHAMELEON_Complex64_t *U = NULL; int ldu = -1; nextpiv->h = clargs->h; + nextpiv->has_diag = chameleon_max( -1, nextpiv->has_diag); h = clargs->h; ib = clargs->ib; i = 0; if ( clargs->diag ) { + if ( h == 0 ) { + ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[clargs->tasks_nbr + 1]); + } + else { + ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[clargs->tasks_nbr + 2]); + } if ( h != 0 ) { tileU = cti_interface_get( descr[ clargs->tasks_nbr + 3 ] ); U = CHAM_tile_get_ptr( tileU ); @@ -190,7 +205,7 @@ cl_zgetrf_panel_blocked_batched_cpu_func( void *descr[], i++; } if ( ( h%ib == 0 ) && ( h > 0 ) ) { - tileU = cti_interface_get( descr[ clargs->tasks_nbr + 3 ] ); + tileU = cti_interface_get( descr[ clargs->tasks_nbr + 2 + clargs->diag ] ); U = CHAM_tile_get_ptr( tileU ); ldu = tileU->ld; } @@ -225,6 +240,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, void (*callback)(void*) = NULL; int accessU, access_npiv, access_ipiv, access_ppiv; struct cl_getrf_batched_args_t *clargs = *clargs_ptr; + int rankA = A->get_rankof(A, Am, An); /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; @@ -232,7 +248,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, CHAMELEON_END_ACCESS_DECLARATION; if ( clargs == NULL ) { - clargs = malloc( sizeof( struct cl_getrf_batched_args_t ) ) ; + clargs = malloc( sizeof( struct cl_getrf_batched_args_t ) ); + memset( clargs, 0, sizeof( struct cl_getrf_batched_args_t ) ); clargs->tasks_nbr = 0; clargs->diag = ( Am == An ); clargs->ib = ib; @@ -271,24 +288,25 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, } /* If there isn't a diag task then use offdiag access */ if ( clargs->diag == 0 ) { - accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE; + accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE; + access_ipiv = STARPU_NONE; } rt_starpu_insert_task( &cl_zgetrf_panel_blocked_batched, /* Task codelet arguments */ STARPU_CL_ARGS, clargs, sizeof(struct cl_getrf_batched_args_t), + STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), + accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, clargs->cl_name, #endif - STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, - access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), - access_npiv, RUNTIME_pivot_getaddr( ipiv, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), - accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ), 0); /* clargs is freed by starpu. */ @@ -306,6 +324,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, int accessU, access_npiv, access_ipiv, access_ppiv; void (*callback)(void*) = NULL; struct cl_getrf_batched_args_t *clargs = *clargs_ptr; + int rankA = A->myrank; if ( clargs == NULL ) { return; @@ -328,24 +347,25 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, } /* If there isn't a diag task then use offdiag access */ if ( clargs->diag == 0 ) { - accessU = ((clargs->h%clargs->ib == 0) && (clargs->h > 0)) ? STARPU_R : STARPU_NONE; + accessU = ((clargs->h%clargs->ib == 0) && (clargs->h > 0)) ? STARPU_R : STARPU_NONE; + access_ipiv = STARPU_NONE; } rt_starpu_insert_task( &cl_zgetrf_panel_blocked_batched, /* Task codelet arguments */ STARPU_CL_ARGS, clargs, sizeof(struct cl_getrf_batched_args_t), + STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h - 1 ), + access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), + accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, clargs->cl_name, #endif - STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, - access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), - access_npiv, RUNTIME_pivot_getaddr( ipiv, An, clargs->h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, clargs->h - 1 ), - accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ), 0); /* clargs is freed by starpu. */ diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c index 2c6daa18d9bda1f7ff433305aa98ad77f648b4b5..d11d27365fe730e1db163c51c868359d6f40a129 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c +++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c @@ -14,6 +14,7 @@ * * @author Mathieu Faverge * @author Matthieu Kuhn + * @author Alycia Lisito * @date 2024-03-11 * @precisions normal z -> c d s * @@ -67,6 +68,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg) nextpiv->h = h; nextpiv->has_diag = 1; + coreblas_kernel_trace( tileA ); CORE_zgetrf_panel_diag( m, n, h, m0, ib, CHAM_tile_get_ptr( tileA ), tileA->ld, U, ldu, @@ -95,6 +97,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, struct starpu_codelet *codelet = &cl_zgetrf_blocked_diag; void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_diag_callback : NULL; const char *cl_name = "zgetrf_blocked_diag"; + int rankA = A->get_rankof(A, Am, An); int access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; @@ -130,25 +133,24 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, cl_name, #endif - /* STARPU_NONE must be the last argument for older version of StarPU where STARPU_NONE = 0 */ - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), - access_npiv, RUNTIME_pivot_getaddr( ipiv, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), - accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), 0); } #if !defined(CHAMELEON_SIMULATION) static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg) { - int m, n, h, m0, ib; + int m, n, h, k, m0, ib; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; CHAM_tile_t *tileA; @@ -156,9 +158,9 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg) cppi_interface_t *nextpiv; cppi_interface_t *prevpiv; CHAMELEON_Complex64_t *U = NULL; - int ldu = -1;; + int ldu = -1; - starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib, &sequence, &request ); + starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &k, &m0, &ib, &sequence, &request ); tileA = cti_interface_get(descr[0]); nextpiv = (cppi_interface_t*) descr[1]; @@ -169,12 +171,28 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg) ldu = tileU->ld; } + if ( h > 0 ) { + cppi_display_dbg( prevpiv, stderr, "Prevpiv offdiag before call: " ); + } + if ( h < tileA->n ) { + cppi_display_dbg( nextpiv, stderr, "Nextpiv offdiag before call: " ); + } + nextpiv->h = h; /* Initialize in case it uses a copy */ + nextpiv->has_diag = chameleon_max( -1, nextpiv->has_diag); + coreblas_kernel_trace( tileA ); CORE_zgetrf_panel_offdiag( m, n, h, m0, ib, CHAM_tile_get_ptr(tileA), tileA->ld, U, ldu, &(nextpiv->pivot), &(prevpiv->pivot) ); + + if ( h > 0 ) { + cppi_display_dbg( prevpiv, stderr, "Prevpiv offdiag after call: " ); + } + if ( h < tileA->n ) { + cppi_display_dbg( nextpiv, stderr, "Nextpiv offdiag after call: " ); + } } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -190,9 +208,11 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv ) { struct starpu_codelet *codelet = &cl_zgetrf_blocked_offdiag; + int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; int accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE; + int rankA = A->get_rankof(A, Am, An); void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_offdiag_callback : NULL; const char *cl_name = "zgetrf_blocked_offdiag"; @@ -200,6 +220,9 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW( A, Am, An ); + if ((h%ib == 0) && (h > 0)) { + CHAMELEON_ACCESS_R( U, Um, Un ); + } CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ @@ -211,21 +234,21 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &h, sizeof(int), + STARPU_VALUE, &An, sizeof(int), STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t *), STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t *), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, cl_name, #endif - /* STARPU_NONE must be the last argument for older version of StarPU where STARPU_NONE = 0 */ - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - access_npiv, RUNTIME_pivot_getaddr( ipiv, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), - accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), 0); } @@ -247,6 +270,8 @@ static void cl_zgetrf_blocked_trsm_cpu_func(void *descr[], void *cl_arg) U = CHAM_tile_get_ptr( tileU ); ldu = tileU->ld; + coreblas_kernel_trace( tileU ); + /* Copy the final max line of the block and solve */ cblas_zcopy( n, prevpiv->pivot.pivrow, 1, U + m - 1, ldu ); @@ -276,6 +301,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_trsm_callback : NULL; const char *cl_name = "zgetrf_blocked_trsm"; + int rankU = U->get_rankof(U, Um, Un); /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; @@ -293,7 +319,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, STARPU_VALUE, &h, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), - STARPU_R, RUNTIME_pivot_getaddr( ipiv, Un, h-1 ), + STARPU_R, RUNTIME_pivot_getaddr( ipiv, rankU, Un, h-1 ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c index 5d3f83b6ce046a72135c8f513c8cc23822159595..df2301782ec00590159297c9a2473ed28edfd548 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_percol.c +++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c @@ -14,6 +14,7 @@ * * @author Mathieu Faverge * @author Matthieu Kuhn + * @author Alycia Lisito * @date 2024-03-11 * @precisions normal z -> c d s * @@ -95,8 +96,7 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ - cl_name = chameleon_codelet_name( cl_name, 1, - A->get_blktile( A, Am, An ) ); + cl_name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) ); rt_starpu_insert_task( codelet, @@ -106,17 +106,16 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, cl_name, #endif - /* STARPU_NONE must be the last argument for older version of StarPU where STARPU_NONE = 0 */ - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), - access_npiv, RUNTIME_pivot_getaddr( ipiv, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), 0); } @@ -137,6 +136,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg) prevpiv = (cppi_interface_t*) descr[2]; nextpiv->h = h; /* Initialize in case it uses a copy */ + nextpiv->has_diag = chameleon_max( -1, nextpiv->has_diag); CORE_zgetrf_panel_offdiag( m, n, h, m0, tileA->n, CHAM_tile_get_ptr(tileA), tileA->ld, @@ -159,6 +159,9 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL; const char *cl_name = "zgetrf_percol_offdiag"; + int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + int rankA = A->get_rankof(A, Am, An); /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; @@ -166,8 +169,7 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, CHAMELEON_END_ACCESS_DECLARATION; /* Refine name */ - cl_name = chameleon_codelet_name( cl_name, 1, - A->get_blktile( A, Am, An ) ); + cl_name = chameleon_codelet_name( cl_name, 1, A->get_blktile( A, Am, An ) ); rt_starpu_insert_task( codelet, @@ -178,8 +180,8 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t *), STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t *), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_REDUX, RUNTIME_pivot_getaddr( ipiv, An, h ), - STARPU_R, RUNTIME_pivot_getaddr( ipiv, An, h-1 ), + access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c index 48be66e17652b487c246c2eec0dd8211d7890b36..1ad0f7a142fd9272a3ffb445dd797db774959d60 100644 --- a/runtime/starpu/control/runtime_descriptor_ipiv.c +++ b/runtime/starpu/control/runtime_descriptor_ipiv.c @@ -12,6 +12,8 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Matthieu Kuhn + * @author Alycia Lisito + * @author Florent Pruvost * @date 2024-03-16 * */ @@ -20,16 +22,18 @@ /** * Create ws_pivot runtime structures */ -void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, + const CHAM_desc_t *desc ) { assert( ipiv ); - starpu_data_handle_t *handles = calloc( 5 * ipiv->mt, sizeof(starpu_data_handle_t) ); + size_t nbhandles = 3 * ipiv->mt + 2 * desc->p; + starpu_data_handle_t *handles = calloc( nbhandles, sizeof(starpu_data_handle_t) ); ipiv->ipiv = handles; handles += ipiv->mt; ipiv->nextpiv = handles; - handles += ipiv->mt; + handles += desc->p; ipiv->prevpiv = handles; - handles += ipiv->mt; + handles += desc->p; ipiv->perm = handles; handles += ipiv->mt; ipiv->invp = handles; @@ -40,14 +44,14 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) */ { chameleon_starpu_tag_init(); - ipiv->mpitag_ipiv = chameleon_starpu_tag_book( (int64_t)(ipiv->mt) * 5 ); + ipiv->mpitag_ipiv = chameleon_starpu_tag_book( nbhandles ); if ( ipiv->mpitag_ipiv == -1 ) { chameleon_fatal_error("RUNTIME_ipiv_create", "Can't pursue computation since no more tags are available for ipiv structure"); return; } ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv + ipiv->mt; - ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + ipiv->mt; - ipiv->mpitag_perm = ipiv->mpitag_prevpiv + ipiv->mt; + ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + desc->p; + ipiv->mpitag_perm = ipiv->mpitag_prevpiv + desc->p; ipiv->mpitag_invp = ipiv->mpitag_perm + ipiv->mt; } #endif @@ -56,12 +60,14 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) /** * Destroy ws_pivot runtime structures */ -void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, + const CHAM_desc_t *desc ) { int i; starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv); + size_t nbhandles = 3 * ipiv->mt + 2 * desc->p; - for(i=0; i<(5 * ipiv->mt); i++) { + for(i=0; i<nbhandles; i++) { if ( *handle != NULL ) { starpu_data_unregister( *handle ); *handle = NULL; @@ -107,49 +113,51 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) return (void*)(*handle); } -void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) { starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv); - int64_t mm = m + (ipiv->i / ipiv->mb); + const CHAM_desc_t *A = ipiv->desc; - nextpiv += mm; + nextpiv += rank/A->q; assert( nextpiv ); if ( *nextpiv != NULL ) { return (void*)(*nextpiv); } - const CHAM_desc_t *A = ipiv->desc; - int owner = A->get_rankof( A, m, m ); - int ncols = (mm == (A->nt-1)) ? A->n - mm * A->nb : A->nb; - int64_t tag = ipiv->mpitag_nextpiv + mm; + int64_t kk = k + (ipiv->i / ipiv->mb); + int owner = rank; + int ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb; + int64_t tag = ipiv->mpitag_nextpiv + owner/A->q; cppi_register( nextpiv, A->dtyp, ncols, tag, owner ); assert( *nextpiv ); + (void)h; return (void*)(*nextpiv); } -void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) { starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv); - int64_t mm = m + (ipiv->i / ipiv->mb); + const CHAM_desc_t *A = ipiv->desc; - prevpiv += mm; + prevpiv += rank/A->q; assert( prevpiv ); if ( *prevpiv != NULL ) { return (void*)(*prevpiv); } - const CHAM_desc_t *A = ipiv->desc; - int owner = A->get_rankof( A, m, m ); - int ncols = (mm == (A->nt-1)) ? A->n - mm * A->nb : A->nb; - int64_t tag = ipiv->mpitag_prevpiv + mm; + int64_t kk = k + (ipiv->i / ipiv->mb); + int owner = rank; + int ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb; + int64_t tag = ipiv->mpitag_prevpiv + owner/A->q; cppi_register( prevpiv, A->dtyp, ncols, tag, owner ); assert( *prevpiv ); + (void)h; return (void*)(*prevpiv); } @@ -212,19 +220,18 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int m ) } void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, - const CHAM_ipiv_t *ipiv, int m ) + const CHAM_ipiv_t *ipiv, int rank ) { starpu_data_handle_t *handle; const CHAM_desc_t *A = ipiv->desc; - int64_t mm = m + ( ipiv->i / ipiv->mb ); handle = (starpu_data_handle_t*)(ipiv->nextpiv); - handle += mm; + handle += rank/A->q; if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) starpu_mpi_cache_flush( sequence->comm, *handle ); - if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) + if ( starpu_mpi_data_get_rank( *handle ) == rank ) #endif { chameleon_starpu_data_wont_use( *handle ); @@ -232,12 +239,12 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, } handle = (starpu_data_handle_t*)(ipiv->prevpiv); - handle += mm; + handle += rank/A->q; if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) starpu_mpi_cache_flush( sequence->comm, *handle ); - if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) + if ( starpu_mpi_data_get_rank( *handle ) == rank ) #endif { chameleon_starpu_data_wont_use( *handle ); @@ -246,7 +253,7 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, (void)sequence; (void)ipiv; - (void)m; + (void)rank; } void RUNTIME_ipiv_flush( const RUNTIME_sequence_t *sequence, diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c index dc978bc6f13b224ebe19fdaf3a653ad4e09cd56f..4645631a7a86a72e5c8fe2fa5f8b40e61991e66c 100644 --- a/testing/testing_zgetrf.c +++ b/testing/testing_zgetrf.c @@ -151,8 +151,8 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) CHAMELEON_zgetrf_WS_Free( ws ); } + CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); parameters_desc_destroy( &descA ); - CHAMELEON_Ipiv_Destroy( &descIPIV ); return hres; }