diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 240d0a49886efebacc91ec7c62e57644ef465e4e..ee148020a6833c8ad44a19f4e9f51c708d4af1d2 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -24,11 +24,11 @@ */ #include "control/common.h" -#define A(m,n) A, m, n -#define U(m,n) &(ws->U), m, n -#define Up(m,n) &(ws->Up), m, n -#define Wu(m,n) &(ws->Wu), m, n -#define Wl(m,n) &(ws->Wl), m, n +#define A(m,n) A, m, n +#define U(m,n) &(ws->U), m, n +#define Up(m,n) &(ws->Up), m, n +#define Wu(m,n) &(ws->laswp->W), m, n +#define Wl(m,n) &(ws->Wl), m, n /* * All the functions below are panel factorization variant. @@ -118,6 +118,7 @@ static inline void chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, RUNTIME_option_t *options ) { @@ -129,7 +130,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ - ipiv->n = minmn; + pivot->n = minmn; /* * Algorithm per column with pivoting @@ -139,7 +140,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, options, tempkm, tempkn, h, k * A->mb, A(k, k), - ipiv ); + ipiv, pivot ); for (m = k+1; m < A->mt; m++) { tempmm = A->get_blkdim( A, m, DIM_m, A->m ); @@ -147,16 +148,16 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, options, tempmm, tempkn, h, m * A->mb, A(m, k), - ipiv ); + pivot ); } /* Reduce globally (between MPI processes) */ - INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, h, tempkn, ws ); + INSERT_TASK_zipiv_allreduce( options, A, pivot, k, h, tempkn, ws->laswp ); } /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); + RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank ); } /* @@ -166,6 +167,7 @@ static inline void chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, RUNTIME_option_t *options ) { @@ -179,7 +181,7 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ - ipiv->n = minmn; + pivot->n = minmn; /* * Algorithm per column with pivoting (no recursion) @@ -188,29 +190,30 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, /* Since index h scales column h-1, we need to iterate up to minmn (included) */ for ( h = 0; h <= minmn; h++ ) { - INSERT_TASK_zgetrf_percol_diag( options, tempkm, tempkn, h, k * A->mb, A(k, k), ipiv ); + INSERT_TASK_zgetrf_percol_diag( options, tempkm, tempkn, h, k * A->mb, A(k, k), ipiv, pivot ); for ( m = k+1; m < A->mt; m++ ) { tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgetrf_panel_offdiag_batched( options, tempmm, tempkn, h, m * A->mb, - (void *)ws, A(m, k), clargs, ipiv ); + (void *)ws, A(m, k), clargs, pivot ); } - INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, k, clargs, ipiv ); + INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, k, clargs, pivot ); - INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, h, tempkn, ws ); + INSERT_TASK_zipiv_allreduce( options, A, pivot, k, h, tempkn, ws->laswp ); } free( clargs ); /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); + RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank ); } static inline void chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, RUNTIME_option_t *options ) { @@ -222,7 +225,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ - ipiv->n = minmn; + pivot->n = minmn; nbblock = chameleon_ceil( minmn, ws->ib ); /* @@ -238,7 +241,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, options, tempkm, tempkn, j, k * A->mb, ws->ib, A(k, k), Up(k, k), - ipiv ); + ipiv, pivot ); for (m = k+1; m < A->mt; m++) { tempmm = A->get_blkdim( A, m, DIM_m, A->m ); @@ -246,19 +249,19 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, options, tempmm, tempkn, j, m * A->mb, ws->ib, A(m, k), Up(k, k), - ipiv ); + pivot ); } assert( j <= minmn ); /* Reduce globally (between MPI processes) */ - INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, j, tempkn, ws ); + INSERT_TASK_zipiv_allreduce( options, A, pivot, k, j, tempkn, ws->laswp ); if ( ( b < (nbblock-1) ) && ( h == hmax-1 ) ) { INSERT_TASK_zgetrf_blocked_trsm( options, ws->ib, tempkn, j+1, ws->ib, Up(k, k), - ipiv ); + pivot ); } } } @@ -266,7 +269,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); + RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank ); } /* @@ -276,6 +279,7 @@ static inline void chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, RUNTIME_option_t *options ) { @@ -289,7 +293,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ - ipiv->n = minmn; + pivot->n = minmn; nbblock = chameleon_ceil( minmn, ws->ib ); /* @@ -306,21 +310,21 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, for ( m = k; m < A->mt; m++ ) { tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgetrf_panel_blocked_batched( options, tempmm, tempkn, j, m * A->mb, - (void *)ws, A(m, k), Up(k, k), clargs, ipiv ); + (void *)ws, A(m, k), Up(k, k), clargs, ipiv, pivot ); } INSERT_TASK_zgetrf_panel_blocked_batched_flush( options, A, k, - Up(k, k), clargs, ipiv ); + Up(k, k), clargs, ipiv, pivot ); assert( j <= minmn ); /* Reduce globally (between MPI processes) */ - INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, j, tempkn, ws ); + INSERT_TASK_zipiv_allreduce( options, A, pivot, k, j, tempkn, ws->laswp ); if ( (b < (nbblock-1)) && (h == hmax-1) ) { INSERT_TASK_zgetrf_blocked_trsm( options, ws->ib, tempkn, b * ws->ib + hmax, ws->ib, Up(k, k), - ipiv ); + pivot ); } } } @@ -329,19 +333,21 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, /* Flush temporary data used for the pivoting */ INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, 0, A->m, ipiv, k ); - RUNTIME_ipiv_flushk( options->sequence, ipiv, A->myrank ); + RUNTIME_pivot_flushk( options->sequence, pivot, A->myrank ); } static inline void chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, RUNTIME_option_t *options ) { #if defined(CHAMELEON_USE_MPI) - chameleon_get_proc_involved_in_panelk_2dbc( A, k, k, ws ); - if ( !ws->involved ) { + CHAM_reduce_t *reduce = &(ws->laswp->reduce); + chameleon_get_proc_involved_in_panelk_2dbc( A, k, k, reduce ); + if ( !reduce->involved ) { return; } #endif @@ -354,19 +360,19 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, case ChamGetrfPPivPerColumn: if ( ws->batch_size_blas2 > 0 ) { - chameleon_pzgetrf_panel_facto_percol_batched( ws, A, ipiv, k, options ); + chameleon_pzgetrf_panel_facto_percol_batched( ws, A, ipiv, pivot, k, options ); } else { - chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options ); + chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, pivot, k, options ); } break; case ChamGetrfPPiv: if ( ws->batch_size_blas2 > 0 ) { - chameleon_pzgetrf_panel_facto_blocked_batched( ws, A, ipiv, k, options ); + chameleon_pzgetrf_panel_facto_blocked_batched( ws, A, ipiv, pivot, k, options ); } else { - chameleon_pzgetrf_panel_facto_blocked( ws, A, ipiv, k, options ); + chameleon_pzgetrf_panel_facto_blocked( ws, A, ipiv, pivot, k, options ); } break; @@ -423,7 +429,7 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws, ipiv, k, A(k, n), A(m, n) ); } - INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws ); + INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws->laswp ); } break; default: @@ -475,7 +481,7 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws, } INSERT_TASK_zlaswp_batched_flush( options, ipiv, k, A(k, n), Wu(A->myrank, n), clargs ); - INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws ); + INSERT_TASK_zperm_allreduce_row( options, ChamDirForward, A, Wu(A->myrank, n), ipiv, k, k, n, ws->laswp ); free( clargs ); } @@ -494,16 +500,17 @@ chameleon_pzgetrf_panel_permute_forward( struct chameleon_pzgetrf_s *ws, RUNTIME_option_t *options ) { #if defined(CHAMELEON_USE_MPI) - chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws ); + CHAM_reduce_t *reduce = &(ws->laswp->reduce); + chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, reduce ); if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) { - INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved ); + INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved ); INSERT_TASK_zperm_allreduce_send_invp_row( options, ChamDirForward, ipiv, k, A, k, n ); } if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) { - INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved ); + INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, reduce->np_involved, reduce->proc_involved ); } - if ( !ws->involved ) { + if ( !reduce->involved ) { return; } #endif @@ -525,19 +532,20 @@ chameleon_pzgetrf_panel_permute_backward( struct chameleon_pzgetrf_s *ws, RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ) { - int tempkm, tempnn; + CHAM_reduce_t *reduce = &(ws->laswp->reduce); + int tempkm, tempnn; #if defined(CHAMELEON_USE_MPI) - chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws ); + chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, reduce ); if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) { - INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved ); + INSERT_TASK_zperm_allreduce_send_perm( options, ChamDirForward, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved ); INSERT_TASK_zperm_allreduce_send_invp_row( options, ChamDirForward, ipiv, k, A, k, n ); } if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) { - INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved ); + INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, reduce->np_involved, reduce->proc_involved ); } - if ( !ws->involved ) { + if ( !reduce->involved ) { return; } #endif @@ -557,6 +565,7 @@ chameleon_pzgetrf_panel_permute_backward( struct chameleon_pzgetrf_s *ws, Wu(A->myrank, n), A(k, n) ); RUNTIME_data_flush( sequence, A(k, n) ); } + (void)reduce; } static inline void @@ -666,7 +675,8 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, { const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0; - CHAM_context_t *chamctxt = chameleon_context_self(); + CHAM_context_t *chamctxt = chameleon_context_self(); + CHAM_reduce_t *reduce = &(ws->laswp->reduce); int m, tempkm, tempmm, tempnn, rankAmn; @@ -680,7 +690,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, chameleon_pzgetrf_panel_permute_forward( ws, A, ipiv, k, n, options ); #if defined(CHAMELEON_USE_MPI) - if ( ws->involved ) + if ( reduce->involved ) #endif { INSERT_TASK_ztrsm( @@ -713,6 +723,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, RUNTIME_data_flush( options->sequence, Wu(A->myrank, n) ); RUNTIME_data_flush( options->sequence, A(k, n) ); + (void)reduce; } /** @@ -724,8 +735,9 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; - RUNTIME_option_t options; + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + CHAM_desc_pivot_t pivot; int k, m, n; int min_mnt = chameleon_min( A->mt, A->nt ); @@ -736,6 +748,8 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, } RUNTIME_options_init( &options, chamctxt, sequence, request ); + chameleon_pivot_init( &pivot, A ); + for (k = 0; k < min_mnt; k++) { RUNTIME_iteration_push( chamctxt, k ); @@ -746,7 +760,7 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, */ options.forcesub = chameleon_involved_in_panelk_2dbc( A, k ); if ( chameleon_involved_in_panelk_2dbc( A, k ) ) { - chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options ); + chameleon_pzgetrf_panel_facto( ws, A, IPIV, &pivot, k, &options ); } options.forcesub = 0; @@ -770,6 +784,8 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, RUNTIME_iteration_pop( chamctxt ); } CHAMELEON_Desc_Flush( &(ws->Wl), sequence ); + CHAMELEON_Ipiv_Flush( IPIV, sequence ); + chameleon_pivot_destroy( &pivot ); /* Backward pivoting */ for (k = 1; k < min_mnt; k++) { @@ -783,7 +799,7 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, } RUNTIME_perm_flushk( sequence, IPIV, k ); } - CHAMELEON_Desc_Flush( &(ws->Wu), sequence ); + CHAMELEON_Desc_Flush( &(ws->laswp->W), sequence ); /* Initialize IPIV with default values if needed */ if ( (ws->alg == ChamGetrfNoPivPerColumn) || diff --git a/compute/pzlaswp.c b/compute/pzlaswp.c index 85f384fecd37b8528b400602d7549c7b906f6332..2afedfd5a0bf6f0bf1fb38567f4043fcfdc7b5ea 100644 --- a/compute/pzlaswp.c +++ b/compute/pzlaswp.c @@ -20,14 +20,14 @@ */ #include "control/common.h" -#define A(m,n) A, m, n -#define Wu(m,n) &(ws->Wu), m, n +#define A(m,n) A, m, n +#define Wu(m,n) &(ws->W), m, n /** * Permutation of the panel n at step k */ static inline void -chameleon_pzlaswp_panel_permute( struct chameleon_pzgetrf_s *ws, +chameleon_pzlaswp_panel_permute( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, @@ -35,9 +35,9 @@ chameleon_pzlaswp_panel_permute( struct chameleon_pzgetrf_s *ws, int n, RUNTIME_option_t *options ) { - int m; - int tempkm, tempnn; - int withlacpy; + int m; + int tempkm, tempnn; + int withlacpy; tempkm = A->get_blkdim( A, k, DIM_m, A->m ); tempnn = A->get_blkdim( A, n, DIM_n, A->n ); @@ -65,7 +65,7 @@ chameleon_pzlaswp_panel_permute( struct chameleon_pzgetrf_s *ws, } static inline void -chameleon_pzlaswp_panel( struct chameleon_pzgetrf_s *ws, +chameleon_pzlaswp_panel( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, @@ -74,19 +74,20 @@ chameleon_pzlaswp_panel( struct chameleon_pzgetrf_s *ws, RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ) { - int tempkm, tempnn; + CHAM_reduce_t *reduce = &(ws->reduce); + int tempkm, tempnn; #if defined(CHAMELEON_USE_MPI) - chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, ws ); - if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) { - INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved ); + chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, reduce ); + if ( A->myrank == ipiv->get_rankof( ipiv, k, k ) ) { + INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved ); INSERT_TASK_zperm_allreduce_send_invp_row( options, dir, ipiv, k, A, k, n ); } if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) { - INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, ws->np_involved, ws->proc_involved ); + INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, reduce->np_involved, reduce->proc_involved ); } - if ( !ws->involved ) { + if ( !reduce->involved ) { return; } #endif @@ -101,10 +102,11 @@ chameleon_pzlaswp_panel( struct chameleon_pzgetrf_s *ws, Wu(A->myrank, n), A(k, n) ); RUNTIME_data_flush( sequence, A(k, n) ); } + (void)reduce; } void -chameleon_pzlaswp( struct chameleon_pzgetrf_s *ws, +chameleon_pzlaswp( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, diff --git a/compute/pzlaswpc.c b/compute/pzlaswpc.c index 3ba7c37a4240d1f3f164f8647ff9bcf3c9336726..a3328070bdf6717ed4614f42f9493f8ffdcb36db 100644 --- a/compute/pzlaswpc.c +++ b/compute/pzlaswpc.c @@ -18,14 +18,14 @@ */ #include "control/common.h" -#define A(m,n) A, m, n -#define Wc(m,n) &(ws->Wc), m, n +#define A(m,n) A, m, n +#define Wc(m,n) &(ws->W), m, n /** * Permutation of the panel n at step k */ static inline void -chameleon_pzlaswpc_panel_permute( struct chameleon_pzgetrf_s *ws, +chameleon_pzlaswpc_panel_permute( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, @@ -33,9 +33,9 @@ chameleon_pzlaswpc_panel_permute( struct chameleon_pzgetrf_s *ws, int k, RUNTIME_option_t *options ) { - int n; - int tempkn, tempmm; - int withlacpy; + int n; + int tempkn, tempmm; + int withlacpy; tempkn = A->get_blkdim( A, k, DIM_n, A->n ); tempmm = A->get_blkdim( A, m, DIM_m, A->m ); @@ -63,7 +63,7 @@ chameleon_pzlaswpc_panel_permute( struct chameleon_pzgetrf_s *ws, } static inline void -chameleon_pzlaswpc_panel( struct chameleon_pzgetrf_s *ws, +chameleon_pzlaswpc_panel( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *ipiv, @@ -72,19 +72,20 @@ chameleon_pzlaswpc_panel( struct chameleon_pzgetrf_s *ws, RUNTIME_option_t *options, RUNTIME_sequence_t *sequence ) { - int tempkn, tempmm; + CHAM_reduce_t *reduce = &(ws->reduce); + int tempkn, tempmm; #if defined(CHAMELEON_USE_MPI) - chameleon_get_proc_involved_in_rowpanelk_2dbc( A, m, k, ws ); - if ( A->myrank == chameleon_getrankof_2d( A, k, k ) ) { - INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, ws->np_involved, ws->proc_involved ); + chameleon_get_proc_involved_in_rowpanelk_2dbc( A, m, k, reduce ); + if ( A->myrank == ipiv->get_rankof( ipiv, k, k ) ) { + INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved ); INSERT_TASK_zperm_allreduce_send_invp_col( options, dir, ipiv, k, A, m, k ); } if ( A->myrank == chameleon_getrankof_2d( A, m, k ) ) { - INSERT_TASK_zperm_allreduce_send_A( options, A, m, k, A->myrank, ws->np_involved, ws->proc_involved ); + INSERT_TASK_zperm_allreduce_send_A( options, A, m, k, A->myrank, reduce->np_involved, reduce->proc_involved ); } - if ( !ws->involved ) { + if ( !reduce->involved ) { return; } #endif @@ -99,10 +100,11 @@ chameleon_pzlaswpc_panel( struct chameleon_pzgetrf_s *ws, Wc(m, A->myrank), A(m, k) ); RUNTIME_data_flush( sequence, A(m, k) ); } + (void)reduce; } void -chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws, +chameleon_pzlaswpc( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, @@ -121,7 +123,7 @@ chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws, RUNTIME_options_init( &options, chamctxt, sequence, request ); if ( dir == ChamDirForward ) { - for ( k = 0; k < A->nt; k++ ) { + for ( k = 0; k < IPIV->mt; k++ ) { for ( m = 0; m < A->mt; m++ ) { options.priority = A->mt-m; @@ -131,7 +133,7 @@ chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws, } } else { - for ( k = A->nt - 1; k > -1; k-- ) { + for ( k = IPIV->mt - 1; k > -1; k-- ) { for ( m = 0; m < A->mt; m++ ) { options.priority = A->mt-m; chameleon_pzlaswpc_panel( ws, dir, A, IPIV, m, k, &options, sequence ); diff --git a/compute/zgesv.c b/compute/zgesv.c index c412cd3211b11aeae696faee3b2226501c89e05a..72aa2e99573272f3f80cec5dbbfed0d80d6212e8 100644 --- a/compute/zgesv.c +++ b/compute/zgesv.c @@ -84,6 +84,7 @@ int CHAMELEON_zgesv( int N, int NRHS, CHAM_desc_t descAl, descAt; CHAM_desc_t descBl, descBt; struct chameleon_pzgetrf_s *wsA, *wsB; + int P, Q; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { @@ -130,6 +131,9 @@ int CHAMELEON_zgesv( int N, int NRHS, chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower, B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request ); + P = chameleon_desc_datadist_get_iparam( &descAt, 0 ); + Q = chameleon_desc_datadist_get_iparam( &descAt, 1 ); + /* Allocate workspace for partial pivoting */ wsA = CHAMELEON_zgetrf_WS_Alloc( &descAt ); wsB = CHAMELEON_zgetrf_WS_Alloc( &descBt ); @@ -137,7 +141,7 @@ int CHAMELEON_zgesv( int N, int NRHS, if ( ( wsA->alg == ChamGetrfPPivPerColumn ) || ( wsA->alg == ChamGetrfPPiv ) ) { - chameleon_ipiv_init( &descIPIV, &descAt, N, IPIV ); + chameleon_ipiv_init( &descIPIV, ChamLeft, descAt.mb, N, P, P*Q, IPIV, chameleon_getrankof_ipiv_2d_diag ); } /* Call the tile interface */ @@ -161,7 +165,7 @@ int CHAMELEON_zgesv( int N, int NRHS, if ( ( wsA->alg == ChamGetrfPPivPerColumn ) || ( wsA->alg == ChamGetrfPPiv ) ) { - chameleon_ipiv_destroy( &descIPIV, &descAt ); + chameleon_ipiv_destroy( &descIPIV ); } /* Cleanup the temporary data */ @@ -240,7 +244,6 @@ int CHAMELEON_zgesv_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B ) CHAMELEON_zgesv_Tile_Async( A, IPIV, B, wsA, wsB, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); - CHAMELEON_Ipiv_Flush( IPIV, sequence ); CHAMELEON_Desc_Flush( B, sequence ); chameleon_sequence_wait( chamctxt, sequence ); @@ -369,6 +372,8 @@ int CHAMELEON_zgesv_Tile_Async( CHAM_desc_t *A, wsB = user_wsB; } + IPIV->get_rankof = chameleon_getrankof_ipiv_2d_diag; + chameleon_pzgetrf( wsA, A, IPIV, sequence, request ); CHAMELEON_zgetrs_Tile_Async( ChamNoTrans, A, IPIV, B, wsB, sequence, request ); diff --git a/compute/zgetrf.c b/compute/zgetrf.c index 4470c33f06a4f4477757a10c4d478d6b48f91709..ea1d50bc8c7251ffeb37a0b8c9e0668cfe31792f 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -59,7 +59,9 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) { CHAM_context_t *chamctxt; struct chameleon_pzgetrf_s *ws; - int lookahead, batch_size; + int lookahead, batch_size; + int P = chameleon_desc_datadist_get_iparam( A, 0 ); + int Q = chameleon_desc_datadist_get_iparam( A, 1 ); chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { @@ -70,11 +72,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) ws->alg = ChamGetrfPPiv; ws->ib = CHAMELEON_IB; -#if defined (CHAMELEON_USE_MPI) - ws->proc_involved = malloc( sizeof( int ) * chameleon_desc_datadist_get_iparam(A, 0) ); - ws->involved = 0; - ws->np_involved = 0; -#endif + ws->laswp = CHAMELEON_zlaswp_WS_Alloc( ChamLeft, A ); { char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" ); @@ -99,21 +97,6 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_cleanenv( algostr ); } - { - char *allreduce = chameleon_getenv( "CHAMELEON_GETRF_ALL_REDUCE" ); - - if ( allreduce != NULL ) { - if ( strcasecmp( allreduce, "cham_spu_tasks" ) == 0 ) { - ws->alg_allreduce = ChamStarPUTasks; - } - else { - chameleon_error( "CHAMELEON_zgetrf_WS_Alloc", "CHAMELEON_GETRF_ALL_REDUCE is not one of chameleon_starpu_tasks, chameleon_starpu, chameleon_starpu_mpi, chameleon_mpi => Switch back to chameleon_starpu_tasks\n" ); - ws->alg_allreduce = ChamStarPUTasks; - } - } - chameleon_cleanenv( allreduce ); - } - batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 0 ); if ( batch_size > CHAMELEON_BATCH_SIZE ) { chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_GETRF_BATCH_SIZE value\n" ); @@ -132,9 +115,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, 1, A->nb, A->nb, A->mt, A->nt * A->nb, 0, 0, - A->mt, A->nt * A->nb, - chameleon_desc_datadist_get_iparam(A, 0), - chameleon_desc_datadist_get_iparam(A, 1), + A->mt, A->nt * A->nb, P, Q, NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); } else if ( ( ws->alg == ChamGetrfPPiv ) || @@ -143,25 +124,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, A->mb, A->nb, A->mb*A->nb, A->m, A->n, 0, 0, - A->m, A->n, - chameleon_desc_datadist_get_iparam(A, 0), - chameleon_desc_datadist_get_iparam(A, 1), + A->m, A->n, P, Q, NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); - chameleon_desc_init( &(ws->Wu), CHAMELEON_MAT_ALLOC_TILE, - ChamComplexDouble, A->mb, A->nb, A->mb*A->nb, - A->mb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), A->n, 0, 0, - A->mb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), A->n, chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), 1, - NULL, NULL, NULL, A->get_rankof_init_arg ); - chameleon_desc_init( &(ws->Wc), CHAMELEON_MAT_ALLOC_TILE, - ChamComplexDouble, A->mb, A->nb, A->mb*A->nb, - A->m, A->nb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), 0, 0, - A->m, A->nb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), 1, chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), - NULL, NULL, NULL, A->get_rankof_init_arg ); lookahead = chamctxt->lookahead; chameleon_desc_init( &(ws->Wl), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb), - A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead, 0, 0, - A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1), + A->mt * A->mb, A->nb * Q * lookahead, 0, 0, + A->mt * A->mb, A->nb * Q * lookahead, P, Q, NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); } @@ -180,9 +149,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_desc_init( &(ws->Up), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ws->ib, A->nb, ws->ib * A->nb, A->mt * ws->ib, A->nt * A->nb, 0, 0, - A->mt * ws->ib, A->nt * A->nb, - chameleon_desc_datadist_get_iparam(A, 0), - chameleon_desc_datadist_get_iparam(A, 1), + A->mt * ws->ib, A->nt * A->nb, P, Q, NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); } @@ -213,10 +180,6 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws ) { struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws; -#if defined (CHAMELEON_USE_MPI) - free( ws->proc_involved ); -#endif - if ( ( ws->alg == ChamGetrfNoPivPerColumn ) || ( ws->alg == ChamGetrfPPiv ) || ( ws->alg == ChamGetrfPPivPerColumn ) ) @@ -230,8 +193,6 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws ) if ( ( ws->alg == ChamGetrfPPiv ) || ( ws->alg == ChamGetrfPPivPerColumn ) ) { - chameleon_desc_destroy( &(ws->Wu) ); - chameleon_desc_destroy( &(ws->Wc) ); chameleon_desc_destroy( &(ws->Wl) ); } free( ws ); @@ -289,13 +250,14 @@ int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) { int NB; - int status; - CHAM_desc_t descAl, descAt; - CHAM_ipiv_t descIPIV; - CHAM_context_t *chamctxt; - RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + CHAM_desc_t descAl, descAt; + CHAM_ipiv_t descIPIV; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; struct chameleon_pzgetrf_s *ws; + int P, Q; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { @@ -335,13 +297,16 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower, A, NB, NB, LDA, N, M, N, sequence, &request ); + P = chameleon_desc_datadist_get_iparam( &descAt, 0 ); + Q = chameleon_desc_datadist_get_iparam( &descAt, 1 ); + /* Allocate workspace for partial pivoting */ ws = CHAMELEON_zgetrf_WS_Alloc( &descAt ); if ( ( ws->alg == ChamGetrfPPivPerColumn ) || ( ws->alg == ChamGetrfPPiv ) ) { - chameleon_ipiv_init( &descIPIV, &descAt, chameleon_min( M, N ), IPIV ); + chameleon_ipiv_init( &descIPIV, ChamLeft, descAt.mb, chameleon_min( M, N ), P, P*Q, IPIV, chameleon_getrankof_ipiv_2d_diag); } /* Call the tile interface */ @@ -362,7 +327,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) if ( ( ws->alg == ChamGetrfPPivPerColumn ) || ( ws->alg == ChamGetrfPPiv ) ) { - chameleon_ipiv_destroy( &descIPIV, &descAt ); + chameleon_ipiv_destroy( &descIPIV ); } CHAMELEON_zgetrf_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); @@ -432,7 +397,6 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV ) ws = CHAMELEON_zgetrf_WS_Alloc( A ); CHAMELEON_zgetrf_Tile_Async( A, IPIV, ws, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); - CHAMELEON_Ipiv_Flush( IPIV, sequence ); chameleon_sequence_wait( chamctxt, sequence ); CHAMELEON_zgetrf_WS_Free( ws ); @@ -545,6 +509,8 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, ws = user_ws; } + IPIV->get_rankof = chameleon_getrankof_ipiv_2d_diag; + chameleon_pzgetrf( ws, A, IPIV, sequence, request ); if ( user_ws == NULL ) { diff --git a/compute/zgetrs.c b/compute/zgetrs.c index 7e8fd22a4c72ad092bf8899f8417dbb9b7a26b2a..46850bd6faa89ce35fea05c257b139de59403adb 100644 --- a/compute/zgetrs.c +++ b/compute/zgetrs.c @@ -89,6 +89,7 @@ int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS, CHAM_desc_t descAl, descAt; CHAM_desc_t descBl, descBt; struct chameleon_pzgetrf_s *ws; + int P, Q; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { @@ -138,9 +139,12 @@ int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS, chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower, B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request ); + P = chameleon_desc_datadist_get_iparam( &descAt, 0 ); + Q = chameleon_desc_datadist_get_iparam( &descAt, 1 ); + ws = CHAMELEON_zgetrf_WS_Alloc( &descBt ); - CHAMELEON_Ipiv_Create( &descIPIV, &descAt, N, IPIV ); - CHAMELEON_Ipiv_Init( &descAt, descIPIV ); + CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descAt.mb, N, P, P*Q, IPIV ); + CHAMELEON_Ipiv_Init( descIPIV ); /* Call the tile interface */ CHAMELEON_zgetrs_Tile_Async( trans, &descAt, descIPIV, &descBt, ws, sequence, &request ); @@ -154,7 +158,7 @@ int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS, chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ - CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); CHAMELEON_zgetrf_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt ); @@ -377,7 +381,7 @@ int CHAMELEON_zgetrs_Tile_Async( cham_trans_t trans, } if ( trans == ChamNoTrans ) { - chameleon_pzlaswp( ws, ChamDirForward, B, IPIV, sequence, request ); + chameleon_pzlaswp( ws->laswp, ChamDirForward, B, IPIV, sequence, request ); chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); @@ -388,7 +392,7 @@ int CHAMELEON_zgetrs_Tile_Async( cham_trans_t trans, chameleon_pztrsm( ChamLeft, ChamLower, ChamTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); - chameleon_pzlaswp( ws, ChamDirBackward, B, IPIV, sequence, request ); + chameleon_pzlaswp( ws->laswp, ChamDirBackward, B, IPIV, sequence, request ); } if ( user_ws == NULL ) { diff --git a/compute/zlaswp.c b/compute/zlaswp.c index a4c6635c20a38a215e9e585663ed32593e0d63ae..f00140efedbd5cc829d73cb581820720bfa0217c 100644 --- a/compute/zlaswp.c +++ b/compute/zlaswp.c @@ -18,6 +18,124 @@ */ #include "control/common.h" +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * CHAMELEON_zlaswp_WS_Alloc - Allocate the required workspaces for laswp + * + ******************************************************************************* + * + * @param[in] A + * The descriptor of the matrix A. + * + * @param[in] side + * Specifies whether the permutation is done on the rows or the columns. + * = ChamLeft: op(A) = A + * = ChamRight: op(A) = A^T + * + ******************************************************************************* + * + * @retval An allocated opaque pointer to use in CHAMELEON_laswp_Tile_Async() + * and to free with CHAMELEON_laswp_WS_Free(). + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_Tile_Async + * @sa CHAMELEON_zgetrf_WS_Free + * + */ +void * +CHAMELEON_zlaswp_WS_Alloc( cham_side_t side, const CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + struct chameleon_pzlaswp_s *ws; + CHAM_reduce_t *reduce; + int P = chameleon_desc_datadist_get_iparam( A, 0 ); + int Q = chameleon_desc_datadist_get_iparam( A, 1 ); + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + return NULL; + } + + ws = calloc( 1, sizeof(struct chameleon_pzlaswp_s) ); + + reduce = &(ws->reduce); + +#if defined (CHAMELEON_USE_MPI) + reduce->proc_involved = malloc( sizeof( int ) * P ); + reduce->involved = 0; + reduce->np_involved = 0; +#endif + + { + char *allreduce = chameleon_getenv( "CHAMELEON_ALLREDUCE" ); + + if ( allreduce != NULL ) { + if ( strcasecmp( allreduce, "cham_spu_tasks" ) == 0 ) { + reduce->alg_allreduce = ChamStarPUTasks; + } + else { + chameleon_error( "CHAMELEON_zlaswp_WS_Alloc", "CHAMELEON_ALLREDUCE is not one of chameleon_starpu_tasks, chameleon_starpu, chameleon_starpu_mpi, chameleon_mpi => Switch back to chameleon_starpu_tasks\n" ); + reduce->alg_allreduce = ChamStarPUTasks; + } + } + chameleon_cleanenv( allreduce ); + } + + if ( side == ChamLeft ) { + chameleon_desc_init( &(ws->W), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, A->mb, A->nb, A->mb*A->nb, + A->mb * P * Q, A->n, 0, 0, + A->mb * P * Q, A->n, P * Q, 1, + NULL, NULL, NULL, A->get_rankof_init_arg ); + } + else { + chameleon_desc_init( &(ws->W), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, A->mb, A->nb, A->mb*A->nb, + A->m, A->nb * P * Q, 0, 0, + A->m, A->nb * P * Q, 1, P * Q, + NULL, NULL, NULL, A->get_rankof_init_arg ); + } + + return ws; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Free the allocated workspaces for asynchronous laswp + * + ******************************************************************************* + * + * @param[in,out] user_ws + * On entry, the opaque pointer allocated by + * CHAMELEON_zlaswp_WS_Alloc() On exit, all data are freed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zlaswp_Tile_Async + * @sa CHAMELEON_zlaswp_WS_Alloc + * + */ +void +CHAMELEON_zlaswp_WS_Free( void *user_ws ) +{ + struct chameleon_pzlaswp_s *ws = (struct chameleon_pzlaswp_s *)user_ws; + +#if defined (CHAMELEON_USE_MPI) + free( ws->reduce.proc_involved ); +#endif + + chameleon_desc_destroy( &(ws->W) ); + + free( ws ); +} + /** ******************************************************************************** * @@ -88,6 +206,7 @@ int CHAMELEON_zlaswp( cham_side_t side, CHAM_desc_t descAl, descAt; CHAM_ipiv_t *descIPIV; int K = ( side == ChamLeft ) ? M : N; + int P, Q; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { @@ -136,9 +255,13 @@ int CHAMELEON_zlaswp( cham_side_t side, /* Submit the matrix conversion */ chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower, A, NB, NB, LDA, N, M, N, sequence, &request ); - CHAMELEON_Ipiv_Create( &descIPIV, &descAt, K, IPIV ); - CHAMELEON_Ipiv_Init( &descAt, descIPIV ); + P = chameleon_desc_datadist_get_iparam( &descAt, 0 ); + Q = chameleon_desc_datadist_get_iparam( &descAt, 1 ); + + CHAMELEON_Ipiv_Create( &descIPIV, side, descAt.mb, K, P, P*Q, IPIV ); + + CHAMELEON_Ipiv_Init( descIPIV ); /* Call the tile interface */ CHAMELEON_zlaswp_Tile_Async( side, dir, &descAt, K1, K2, descIPIV, sequence, &request ); @@ -150,7 +273,7 @@ int CHAMELEON_zlaswp( cham_side_t side, chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ - CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); chameleon_sequence_destroy( chamctxt, sequence ); @@ -218,17 +341,18 @@ int CHAMELEON_zlaswp_Tile( cham_side_t side, RUNTIME_sequence_t *sequence = NULL; RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; int status; + int K = ( side == ChamLeft ) ? A->m : A->n; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { chameleon_fatal_error("CHAMELEON_zlaswp_Tile", "CHAMELEON not initialized"); return CHAMELEON_ERR_NOT_INITIALIZED; } - if ( ( K1 < 1 ) || ( K1 > A->m ) ) { + if ( ( K1 < 1 ) || ( K1 > K ) ) { chameleon_error("CHAMELEON_zlaswp", "illegal value of K1"); return CHAMELEON_ERR_ILLEGAL_VALUE; } - if ( ( K2 < 1 ) || ( K2 > A->m ) ) { + if ( ( K2 < 1 ) || ( K2 > K ) ) { chameleon_error("CHAMELEON_zlaswp", "illegal value of K2"); return CHAMELEON_ERR_ILLEGAL_VALUE; } @@ -237,7 +361,6 @@ int CHAMELEON_zlaswp_Tile( cham_side_t side, CHAMELEON_zlaswp_Tile_Async( side, dir, A, K1, K2, IPIV, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); - CHAMELEON_Ipiv_Flush( IPIV, sequence ); chameleon_sequence_wait( chamctxt, sequence ); status = sequence->status; @@ -307,7 +430,7 @@ int CHAMELEON_zlaswp_Tile_Async( cham_side_t side, RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; - struct chameleon_pzgetrf_s *ws; + struct chameleon_pzlaswp_s *ws; RUNTIME_option_t options; int k; int K = ( side == ChamLeft ) ? A->m : A->n; @@ -367,6 +490,7 @@ int CHAMELEON_zlaswp_Tile_Async( cham_side_t side, m0 = k * A->mb; INSERT_TASK_ipiv_to_perm( &options, m0, tempkm, tempkm, K1 - 1, K2 - 1, IPIV, k ); + RUNTIME_ipiv_flushk( sequence, IPIV, k); } } else { @@ -376,13 +500,14 @@ int CHAMELEON_zlaswp_Tile_Async( cham_side_t side, tempkn = A->get_blkdim( A, k, DIM_n, A->n ); n0 = k * A->nb; INSERT_TASK_ipiv_to_perm( &options, n0, tempkn, tempkn, K1 - 1, K2 - 1, - IPIV, k ); + IPIV, k ); + RUNTIME_ipiv_flushk( sequence, IPIV, k); } } chameleon_sequence_wait( chamctxt, sequence ); } - ws = CHAMELEON_zgetrf_WS_Alloc( A ); + ws = CHAMELEON_zlaswp_WS_Alloc( side, A ); if ( side == ChamLeft ) { chameleon_pzlaswp( ws, dir, A, IPIV, sequence, request ); diff --git a/control/common.h b/control/common.h index aa1b39ca5b6a696412eeff04cb69d5ace70baa56..bf43d3c58e40a38f4492ea32ade9850f8cae2287 100644 --- a/control/common.h +++ b/control/common.h @@ -92,6 +92,17 @@ #define ChamIPT_Panel 1 #define ChamIPT_All 2 +/** + * Structure for reduction operations + */ +struct chameleon_reduce_s { + cham_getrf_allreduce_t alg_allreduce; /**< Specifies the algorithm used for the allreduce */ + int *proc_involved; /**< Specifies the processes involved in the reduction operation */ + unsigned int involved; /**< Specifies if the current process is involved in the reduction operation */ + int np_involved; /**< Specifies the number of involved processes in the reduction operation */ +}; +typedef struct chameleon_reduce_s CHAM_reduce_t; + /** * Global array of LAPACK constants */ diff --git a/control/compute_z.h b/control/compute_z.h index 3d86c666ab5655cce9d95decb7a75a50f3948be9..e747c2ac2203c63266acf4c928ad5a28495e0a86 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -40,25 +40,28 @@ struct chameleon_pzgemm_s { CHAM_desc_t WB; }; +/** + * @brief Data structure to handle the LASWP workspaces + */ +struct chameleon_pzlaswp_s { + CHAM_desc_t W; /**< Workspace used for the row/column permutation. */ + CHAM_reduce_t reduce; /**< Structure for reduction operations */ +}; + /** * @brief Data structure to handle the GETRF workspaces with partial pivoting */ struct chameleon_pzgetrf_s { - cham_getrf_t alg; - cham_getrf_allreduce_t alg_allreduce; - int ib; /**< Internal blocking parameter */ - int batch_size_blas2; /**< Batch size for the blas 2 operations of the panel factorization */ - int batch_size_blas3; /**< Batch size for the blas 3 operations of the panel factorization */ - int batch_size_swap; /**< Batch size for the permutation */ - int ringswitch; /**< Define when to switch to ring bcast */ - CHAM_desc_t U; - CHAM_desc_t Up; /**< Workspace used for the panel factorization */ - CHAM_desc_t Wu; /**< Workspace used for the permutation and update */ - CHAM_desc_t Wc; /**< Workspace used for the column permutation. */ - CHAM_desc_t Wl; /**< Workspace used the update */ - int *proc_involved; - unsigned int involved; - int np_involved; + struct chameleon_pzlaswp_s *laswp; /**< Structure containing the permutation workspace and the reduce data */ + cham_getrf_t alg; /**< Define the algorithm used to compute the getrf */ + int ib; /**< Internal blocking parameter */ + int batch_size_blas2; /**< Batch size for the blas 2 operations of the panel factorization */ + int batch_size_blas3; /**< Batch size for the blas 3 operations of the panel factorization */ + int batch_size_swap; /**< Batch size for the permutation */ + int ringswitch; /**< Define when to switch to ring bcast */ + CHAM_desc_t U; /**< Workspaces used for the panels permutation in getrf without pivoting */ + CHAM_desc_t Up; /**< Workspace used for the panel factorization */ + CHAM_desc_t Wl; /**< Workspace used for the update */ }; /** @@ -173,8 +176,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzlaswp( struct chameleon_pzgetrf_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); -void chameleon_pzlaswpc( struct chameleon_pzgetrf_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzlaswp( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzlaswpc( struct chameleon_pzlaswp_s *ws, cham_dir_t dir, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzplghe(double bump, cham_uplo_t uplo, CHAM_desc_t *A, int bigM, int m0, int n0, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); diff --git a/control/descriptor.h b/control/descriptor.h index 02c77e502cf1620cded3d6137ef7b6c8072ac249..8dc99c8383feed99cfc8806c0879325f787615fd 100644 --- a/control/descriptor.h +++ b/control/descriptor.h @@ -54,7 +54,6 @@ int chameleon_desc_init_internal( CHAM_desc_t *desc, const char *name, void *mat int (*get_rankof) ( const CHAM_desc_t*, int, int ), void* get_rankof_arg ); - static inline int chameleon_desc_init( CHAM_desc_t *desc, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, int lm, int ln, int i, int j, @@ -78,8 +77,13 @@ CHAM_desc_t* chameleon_desc_submatrix( CHAM_desc_t *descA, int i, int j, int m, void chameleon_desc_destroy ( CHAM_desc_t *desc ); int chameleon_desc_check ( const CHAM_desc_t *desc ); -int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void *data ); -void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc ); +int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, cham_side_t side, int mb, int m, + int p, int np, void *data, + blkrankof_ipiv_fct_t get_rankof ); +void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv ); + +int chameleon_pivot_init ( CHAM_desc_pivot_t *pivot, const CHAM_desc_t *desc ); +void chameleon_pivot_destroy( CHAM_desc_pivot_t *pivot ); /** * Internal function to return address of block (m,n) with m,n = block indices diff --git a/control/descriptor_helpers.c b/control/descriptor_helpers.c index 6db15fa1e37dffa3e3dd5d8182a62c2a05de63a7..259f3eaf21f36108725b1cd8aa7d49ea445bb039 100644 --- a/control/descriptor_helpers.c +++ b/control/descriptor_helpers.c @@ -84,6 +84,73 @@ int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n ) return (mm % chameleon_desc_datadist_get_iparam(A,0)) * chameleon_desc_datadist_get_iparam(A,1) + (mm % chameleon_desc_datadist_get_iparam(A,1)); } +/** + * @brief Return the rank of the process responsible for the permutation of the tile (m, n) + * in a classic 2D Block Cyclic distribution PxQ. + * + * @param[in] IPIV + * The ipiv descriptor. + * + * @param[in] m + * The row index of the tile. + * + * @param[in] n + * The column index of the tile. + * + * @return The rank of the process responsible for the row permutation of the tile (m, n) + * + */ +int chameleon_getrankof_ipiv_2d_row( const CHAM_ipiv_t *IPIV, int m, int n ) +{ + int Q = IPIV->NP / IPIV->P; + return ( m % IPIV->P ) * Q; +} + +/** + * @brief Return the rank of the process responsible for the column permutation of the tile (m, n) + * in a classic 2D Block Cyclic distribution PxQ. + * + * @param[in] IPIV + * The ipiv descriptor. + * + * @param[in] m + * The row index of the tile. + * + * @param[in] n + * The column index of the tile. + * + * @return The rank of the process responsible for the permutation of the tile (m, n) + * + */ +int chameleon_getrankof_ipiv_2d_col( const CHAM_ipiv_t *IPIV, int m, int n ) +{ + int Q = IPIV->NP / IPIV->P; + return n % Q; +} + +/** + * @brief Return the rank of the process responsible for the permutation of the tile (m, n) + * when used for getrf in a classic 2D Block Cyclic distribution PxQ. + * + * @param[in] IPIV + * The ipiv descriptor. + * + * @param[in] m + * The row and column index of the tile. + * + * @param[in] n + * Unused + * + * @return The rank of the process responsible for the permutation of the tile (m, n) + * + */ +int chameleon_getrankof_ipiv_2d_diag( const CHAM_ipiv_t *IPIV, int m, int n ) +{ + (void)n; + int Q = IPIV->NP / IPIV->P; + return (m % IPIV->P) * Q + (m % Q); +} + /** * @brief Test if the current MPI process is involved in the panel k for 2DBC distributions. * @@ -134,37 +201,37 @@ int chameleon_p_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k, int p ) { * @param[in] n * The index of the panel to test. * - * @param[inout] ws_getrf + * @param[inout] ws_reduce * The i. * */ void chameleon_get_proc_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k, int n, - void *ws_getrf ) + void *ws_reduce ) { #if defined (CHAMELEON_USE_MPI) - struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)ws_getrf; - int *proc_involved = ws->proc_involved; - int b, rank, np; + CHAM_reduce_t *reduce = (CHAM_reduce_t*) ws_reduce; + int *proc_involved = reduce->proc_involved; + int b, rank, np; np = 0; - ws->involved = 0; + reduce->involved = 0; for ( b = k; (b < A->mt) && ((b-k) < chameleon_desc_datadist_get_iparam(A, 0)); b ++ ) { rank = chameleon_getrankof_2d( A, b, n ); proc_involved[ b-k ] = rank; np ++; if ( rank == A->myrank ) { - ws->involved = 1; + reduce->involved = 1; } } - ws->proc_involved = proc_involved; - ws->np_involved = np; + reduce->proc_involved = proc_involved; + reduce->np_involved = np; #else (void)A; (void)k; (void)n; - (void)ws_getrf; + (void)ws_reduce; #endif } @@ -180,37 +247,37 @@ void chameleon_get_proc_involved_in_panelk_2dbc( const CHAM_desc_t *A, * @param[in] k * The index of the panel to test. * - * @param[inout] ws_getrf + * @param[inout] ws_reduce * The i. * */ void chameleon_get_proc_involved_in_rowpanelk_2dbc( const CHAM_desc_t *A, int m, int k, - void *ws_getrf ) + void *ws_reduce ) { #if defined (CHAMELEON_USE_MPI) - struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)ws_getrf; - int *proc_involved = ws->proc_involved; - int b, rank, np; + CHAM_reduce_t *reduce = (CHAM_reduce_t*) ws_reduce; + int *proc_involved = reduce->proc_involved; + int b, rank, np; np = 0; - ws->involved = 0; + reduce->involved = 0; for ( b = k; (b < A->nt) && ((b-k) < chameleon_desc_datadist_get_iparam(A, 1)); b ++ ) { rank = chameleon_getrankof_2d( A, m, b ); proc_involved[ b-k ] = rank; np ++; if ( rank == A->myrank ) { - ws->involved = 1; + reduce->involved = 1; } } - ws->proc_involved = proc_involved; - ws->np_involved = np; + reduce->proc_involved = proc_involved; + reduce->np_involved = np; #else (void)A; (void)k; (void)m; - (void)ws_getrf; + (void)ws_reduce; #endif } diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c index bc58b4cc4448cac8e781e10995065bbb99c69af6..eafcc3dedf53774508db78bf20b003bd36068502 100644 --- a/control/descriptor_ipiv.c +++ b/control/descriptor_ipiv.c @@ -43,21 +43,35 @@ * @param[in,out] ipiv * The pointer to the ipiv descriptor to initialize. * - * @param[in] desc - * The tile descriptor for which an associated ipiv descriptor must be generated. + * @param[in] side + * Specifies whenever the permutation will be done on the rows or on the columns + * + * @param[in] mb + * The number of tile in the pivot array. * * @param[in] m * The size of the pivot array. * + * @param[in] p + * Number of processes rows for the 2D block-cyclic distribution. + * + * @param[in] np + * The total number of processes. + * * @param[in] data * The pointer to the original vector where to store the pivot values. * + * @param[in] get_rankof + * The function used to determine which process is responsible for the permutation + * of a tile ****************************************************************************** * * @return CHAMELEON_SUCCESS on success, CHAMELEON_ERR_NOT_INITIALIZED otherwise. * */ -int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void *data ) +int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, cham_side_t side, int mb, int m, + int p, int np, void *data, + blkrankof_ipiv_fct_t get_rankof ) { CHAM_context_t *chamctxt; int rc = CHAMELEON_SUCCESS; @@ -70,15 +84,72 @@ int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void return CHAMELEON_ERR_NOT_INITIALIZED; } - ipiv->desc = desc; - ipiv->data = data; - ipiv->i = 0; - ipiv->m = m; - ipiv->mb = desc->mb; - ipiv->mt = chameleon_ceil( ipiv->m, ipiv->mb ); + if ( get_rankof ) { + ipiv->get_rankof = get_rankof; + } + else { + ipiv->get_rankof = ( side == ChamLeft ) ? chameleon_getrankof_ipiv_2d_row : + chameleon_getrankof_ipiv_2d_col; + } + + ipiv->get_blkdim = chameleon_getblkdim_ipiv; + + ipiv->data = data; + ipiv->myrank = RUNTIME_comm_rank( chamctxt ); + ipiv->i = 0; + ipiv->m = m; + ipiv->mb = mb; + ipiv->mt = chameleon_ceil( ipiv->m, ipiv->mb ); + ipiv->P = p; + ipiv->NP = np; + + /* Create runtime specific structure like registering data */ + RUNTIME_ipiv_create( ipiv ); + + return rc; +} + +/** + ****************************************************************************** + * + * @ingroup Descriptor + * + * @brief Internal function to create tiled descriptor associated to a pivot. + * + ****************************************************************************** + * + * @param[in,out] pivot + * The pointer to the pivot descriptor to initialize. + * + * @param[in] desc + * The tile descriptor for which an associated pivot descriptor must be generated. + * + ****************************************************************************** + * + * @return CHAMELEON_SUCCESS on success, CHAMELEON_ERR_NOT_INITIALIZED otherwise. + * + */ +int chameleon_pivot_init( CHAM_desc_pivot_t *pivot, const CHAM_desc_t *desc ) +{ + CHAM_context_t *chamctxt; + int rc = CHAMELEON_SUCCESS; + + memset( pivot, 0, sizeof(CHAM_desc_pivot_t) ); + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + + pivot->P = chameleon_desc_datadist_get_iparam( desc, 0 ); + pivot->Q = chameleon_desc_datadist_get_iparam( desc, 1 ); + pivot->n = chameleon_min(desc->mb, desc->nb); + pivot->nb = desc->mb; + pivot->dtyp = desc->dtyp; /* Create runtime specific structure like registering data */ - RUNTIME_ipiv_create( ipiv, desc ); + RUNTIME_pivot_create( pivot ); return rc; } @@ -96,10 +167,30 @@ int chameleon_ipiv_init( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc, int m, void * The pointer to the ipiv descriptor to destroy. * */ -void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv ) +{ + RUNTIME_ipiv_destroy( ipiv ); +} + +/** + ****************************************************************************** + * + * @ingroup Descriptor + * + * @brief Internal function to destroy a tiled descriptor associated to a pivot array. + * + ****************************************************************************** + * + * @param[in,out] pivot + * The pointer to the pivot descriptor to destroy. + * + * @param[in] desc + * The tile descriptor for which an associated pivot descriptor must be generated. + * + */ +void chameleon_pivot_destroy( CHAM_desc_pivot_t *pivot ) { - RUNTIME_ipiv_destroy( ipiv, desc ); + RUNTIME_pivot_destroy( pivot ); } /** @@ -114,6 +205,9 @@ void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv, * @param[in,out] ipiv * The pointer to the ipiv descriptor to initialize. * + * @param[in] side + * Specifies whenever the permutation will be done on the rows or on the columns + * * @param[in] desc * The tile descriptor for which an associated ipiv descriptor must be generated. * @@ -130,7 +224,8 @@ void chameleon_ipiv_destroy( CHAM_ipiv_t *ipiv, * @retval CHAMELEON_ERR_OUT_OF_RESOURCES if failed to allocated some ressources. * */ -int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m, void *data ) +int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, cham_side_t side, int mb, int m, + int p, int np, void *data ) { CHAM_context_t *chamctxt; CHAM_ipiv_t *ipiv; @@ -148,7 +243,7 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m return CHAMELEON_ERR_OUT_OF_RESOURCES; } - chameleon_ipiv_init( ipiv, desc, m, data ); + chameleon_ipiv_init( ipiv, side, mb, m, p, np, data, NULL ); *ipivptr = ipiv; return CHAMELEON_SUCCESS; @@ -163,9 +258,6 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m * ******************************************************************************* * - * @param[in] descA - * Descriptor of the matrix A. - * * @param[in,out] descIPIV * Descriptor of the pivot array. Should be initialized using * CHAMELEON_Ipiv_Create() with data filled with the vector of pivot. @@ -174,8 +266,7 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, int m * * */ -void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, - CHAM_ipiv_t *descIPIV ) +void CHAMELEON_Ipiv_Init( CHAM_ipiv_t *descIPIV ) { RUNTIME_option_t options; @@ -210,8 +301,7 @@ void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr, - const CHAM_desc_t *desc ) +int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr ) { CHAM_context_t *chamctxt; CHAM_ipiv_t *ipiv; @@ -228,7 +318,7 @@ int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr, } ipiv = *ipivptr; - chameleon_ipiv_destroy( ipiv, desc ); + chameleon_ipiv_destroy( ipiv ); free(ipiv); *ipivptr = NULL; return CHAMELEON_SUCCESS; @@ -246,10 +336,10 @@ int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr, ****************************************************************************** * * @param[in] ipiv - * ipiv vector descriptor. + * ipiv descriptor. * * @param[in] sequence - * The seqeunce in which to submit the calls to flush the data. + * The sequence in which to submit the calls to flush the data. * ****************************************************************************** * diff --git a/include/chameleon.h b/include/chameleon.h index eb159547dc2594418ab41f27e7e6efd43e6f96d8..60320c596ca81a9407e41756844cb4eb37e65fbe 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -215,14 +215,16 @@ int CHAMELEON_Recursive_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flt blkaddr_fct_t get_blkaddr, blkldd_fct_t get_blkldd, blkrankof_fct_t get_rankof, void* get_rankof_arg ); -int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t **ipivptr, - const CHAM_desc_t *desc, - int m, - void *data ); -int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr, - const CHAM_desc_t *desc ); -int CHAMELEON_Ipiv_Flush ( const CHAM_ipiv_t *ipiv, - const RUNTIME_sequence_t *sequence ); +int CHAMELEON_Ipiv_Create ( CHAM_ipiv_t **ipivptr, + cham_side_t side, + int mb, + int m, + int p, + int np, + void *data ); +int CHAMELEON_Ipiv_Destroy( CHAM_ipiv_t **ipivptr ); +int CHAMELEON_Ipiv_Flush( const CHAM_ipiv_t *ipiv, + const RUNTIME_sequence_t *sequence ); int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, int *ipiv, int root ); diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index d9540d5e8472ed15af9ead33296f9ac9cd76d6ab..279cf70dc8a999b91f80e24763f8ca13e49b42d5 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -333,6 +333,8 @@ void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgetrf_WS_Free( void *ws ); void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgetrf_nopiv_WS_Free( void *ws ); +void *CHAMELEON_zlaswp_WS_Alloc( cham_side_t side, const CHAM_desc_t *A ); +void CHAMELEON_zlaswp_WS_Free( void *ws ); int CHAMELEON_Alloc_Workspace_zgesv_incpiv( int N, CHAM_desc_t **descL, int **IPIV, int p, int q); int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q); @@ -369,7 +371,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t int CHAMELEON_zTile_to_Lapack( CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA ) __attribute__((deprecated("Please refer to CHAMELEON_zDesc2Lap() instead"))); int CHAMELEON_zLap2Desc( cham_uplo_t uplo, CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t *A ); int CHAMELEON_zDesc2Lap( cham_uplo_t uplo, CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA ); -void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, CHAM_ipiv_t *descIPIV ); +void CHAMELEON_Ipiv_Init( CHAM_ipiv_t *descIPIV ); /** * User Builder function prototypes diff --git a/include/chameleon/descriptor_helpers.h b/include/chameleon/descriptor_helpers.h index 3c607885ae07f8410b85a90b4c70181ef9ba4491..208e588b17b9d41e53b556dfe74c6664ba8f1b7a 100644 --- a/include/chameleon/descriptor_helpers.h +++ b/include/chameleon/descriptor_helpers.h @@ -45,8 +45,11 @@ extern "C" { * @name Mapping functions * @{ */ -int chameleon_getrankof_2d ( const CHAM_desc_t *A, int m, int n ); -int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n ); +int chameleon_getrankof_2d ( const CHAM_desc_t *A, int m, int n ); +int chameleon_getrankof_2d_diag ( const CHAM_desc_t *A, int m, int n ); +int chameleon_getrankof_ipiv_2d_row ( const CHAM_ipiv_t *ipiv, int m, int n ); +int chameleon_getrankof_ipiv_2d_col ( const CHAM_ipiv_t *ipiv, int m, int n ); +int chameleon_getrankof_ipiv_2d_diag( const CHAM_ipiv_t *ipiv, int m, int n ); typedef struct custom_dist_s{ int *blocks_dist; // Matrix of size dist_m times dist_n with values from 1 to number of process MPI @@ -69,12 +72,12 @@ int chameleon_p_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k, int p ); void chameleon_get_proc_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k, int n, - void *ws_getrf ); + void *ws_reduce ); void chameleon_get_proc_involved_in_rowpanelk_2dbc( const CHAM_desc_t *A, int m, int k, - void *ws_getrf ); + void *ws_reduce ); /** * @} @@ -178,6 +181,28 @@ chameleon_getblkdim( const CHAM_desc_t *A, int m, cham_dim_t dim, int lm ) return chameleon_getblkdim_n( A, m, lm ); } } + +/** + * + * @ingroup Descriptor + * + * @brief Return tile dimension along the n dimension with regular tile sizes. + * + * @param[in] IPIV + * The ipiv descriptor. + * + * @param[in] m + * The index of the tile. + * + * @retval The length of the tile. + * + */ +static inline int +chameleon_getblkdim_ipiv( const CHAM_ipiv_t *IPIV, int m ) +{ + return ( ( m + 1 ) * IPIV->mb > IPIV->m ) ? IPIV->m - m * IPIV->mb : IPIV->mb; +} + /** * @} */ diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index 2d7125621127ea5d63df0bf97d133444ece6a3ac..66b601c83454be2b1f5e081a7826101fe2456886 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -718,33 +718,37 @@ void RUNTIME_ddisplay_oneprofile (cham_tasktype_t task); void RUNTIME_sdisplay_allprofile (); void RUNTIME_sdisplay_oneprofile (cham_tasktype_t task); -void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ); -void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ); +void RUNTIME_ipiv_create ( CHAM_ipiv_t *ipiv ); +void RUNTIME_pivot_create ( CHAM_desc_pivot_t *pivot ); +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ); +void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot ); void RUNTIME_ipiv_gather ( const RUNTIME_sequence_t *sequence, CHAM_ipiv_t *desc, int *ipiv, int node ); +void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot, int m ); +void RUNTIME_pivot_flush ( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot ); void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ); void RUNTIME_ipiv_flush ( const RUNTIME_sequence_t *sequence, - const CHAM_ipiv_t *ipiv ); + const CHAM_ipiv_t *ipiv ); void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ); void *RUNTIME_ipiv_getaddr ( const CHAM_ipiv_t *ipiv, int m ); -void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ); -void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ); void *RUNTIME_perm_getaddr ( const CHAM_ipiv_t *ipiv, int m ); void *RUNTIME_invp_getaddr ( const CHAM_ipiv_t *ipiv, int m ); +void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ); +void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ); static inline void * -RUNTIME_pivot_getaddr( CHAM_ipiv_t *ipiv, int rank, int k, int h ) { +RUNTIME_pivot_getaddr( CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { if ( h%2 == 0 ) { - return RUNTIME_nextpiv_getaddr( ipiv, rank, k, h ); + return RUNTIME_nextpiv_getaddr( pivot, rank, k, h ); } else { - return RUNTIME_prevpiv_getaddr( ipiv, rank, k, h ); + return RUNTIME_prevpiv_getaddr( pivot, rank, k, h ); } } diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index c1d3d3a105a861f0083f7a6046bf8afbd321501d..d1f7dc73841230b5626d09556369d2a8e1b00fde 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -173,29 +173,49 @@ struct chameleon_desc_s { void *schedopt; /**> scheduler (QUARK|StarPU) specific structure */ }; +typedef struct chameleon_ipiv_s CHAM_ipiv_t; + +typedef int (*blkdim_ipiv_fct_t) ( const CHAM_ipiv_t*, int ); +typedef int (*blkrankof_ipiv_fct_t) ( const CHAM_ipiv_t*, int, int ); + +/** + * CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting + */ +struct chameleon_ipiv_s { + blkdim_ipiv_fct_t get_blkdim; /**> function to get chameleon tiles dimension within algorithms */ + blkrankof_ipiv_fct_t get_rankof; /**> function to get chameleon tiles MPI rank */ + + int *data; /**> Pointer to the data */ + void *ipiv; /**> Opaque array of pointers for the runtimes to handle the ipiv array */ + void *perm; /**> Opaque array of pointers for the runtimes to handle the temporary permutation array */ + void *invp; /**> Opaque array of pointers for the runtimes to handle the temporary inverse permutation array */ + int64_t mpitag_ipiv; /**> Initial mpi tag values for the ipiv handles */ + int64_t mpitag_perm; /**> Initial mpi tag values for the nextpiv handles */ + int64_t mpitag_invp; /**> Initial mpi tag values for the prevpiv handles */ + + int myrank; /**> MPI rank of the descriptor */ + int i; /**> row index to the beginning of the submatrix */ + int m; /**> The number of row in the vector ipiv */ + int mb; /**> The number of row per block */ + int mt; /**> The number of tiles */ + int P; /**> The number of processes per column on a tiled matrix */ + int NP; /**> The total number of processes */ +}; + /** * CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting */ -typedef struct chameleon_piv_s { - const CHAM_desc_t *desc; /**> Reference descriptor to compute data mapping based on diagonal tiles, - and get floating reference type */ - int *data; /**> Pointer to the data */ - void *ipiv; /**> Opaque array of pointers for the runtimes to handle the ipiv array */ - void *nextpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ - void *prevpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ - void *perm; /**> Opaque array of pointers for the runtimes to handle the temporary permutation array */ - void *invp; /**> Opaque array of pointers for the runtimes to handle the temporary inverse permutation array */ - int64_t mpitag_ipiv; /**> Initial mpi tag values for the ipiv handles */ - int64_t mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles */ - int64_t mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles */ - int64_t mpitag_perm; /**> Initial mpi tag values for the nextpiv handles */ - int64_t mpitag_invp; /**> Initial mpi tag values for the prevpiv handles */ - int i; /**> row index to the beginning of the submatrix */ - int m; /**> The number of row in the vector ipiv */ - int mb; /**> The number of row per block */ - int mt; /**> The number of tiles */ - int n; /**> The number of column considered (must be updated for each panel) */ -} CHAM_ipiv_t; +typedef struct chameleon_desc_pivot_s { + void *nextpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ + void *prevpiv; /**> Opaque array of pointers for the runtimes to handle the pivot computation structure */ + int64_t mpitag_nextpiv; /**> Initial mpi tag values for the nextpiv handles */ + int64_t mpitag_prevpiv; /**> Initial mpi tag values for the prevpiv handles */ + int P; /**> The number of processes per column of the tiled matrix */ + int Q; /**> The number of processes per line of the tiled matrix */ + int nb; /**> The number of row per block */ + int n; /**> The number of column considered (must be updated for each panel) */ + cham_flttype_t dtyp; /**> Arithmetic used to store the rows/columns to swap */ +} CHAM_desc_pivot_t; static inline void * CHAM_tile_get_ptr( const CHAM_tile_t *tile ) diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h index d6e8e698c2b8d6a4edd9d516a16b0317638f2851..ee36352d4e2350c2793dde2b6847a20d53c572e0 100644 --- a/include/chameleon/tasks.h +++ b/include/chameleon/tasks.h @@ -174,7 +174,7 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options, void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv ); void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, - CHAM_ipiv_t *ws, int k, int h, int rank ); + CHAM_desc_pivot_t *pivot, int k, int h, int rank ); void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options, int m0, int m, int k, int K1, int K2, const CHAM_ipiv_t *ipivdesc, int ipivk ); diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index 8ff0ae0485e4427a01480ad8cfcfdedcceda76c2..abe3a2c543dfe1e196163f9ae84e8812e97b0bed 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -535,36 +535,38 @@ void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ws ); + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ws ); + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ws ); + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ws ); + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, int m, int n, int h, int m0, void *ws, CHAM_desc_t *A, int Am, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ); + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ); + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, int m, int n, int h, int m0, @@ -572,22 +574,24 @@ void INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ); + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ); + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, int m, int n, int h, int ib, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ws ); + CHAM_desc_pivot_t *pivot ); void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, CHAM_desc_t *A, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, int h, int n, diff --git a/runtime/openmp/codelets/codelet_ipiv.c b/runtime/openmp/codelets/codelet_ipiv.c index ccc7e8f46ea496f30d00d81dd0f418ba07fdd175..6cede3e07f3c8a47df8c766bd3056855352eb210 100644 --- a/runtime/openmp/codelets/codelet_ipiv.c +++ b/runtime/openmp/codelets/codelet_ipiv.c @@ -30,7 +30,7 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options, } void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv ) { assert( 0 ); (void)options; @@ -38,11 +38,11 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, } void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, int k, int h, int rank ) + CHAM_desc_pivot_t *pivot, int k, int h, int rank ) { assert( 0 ); (void)options; - (void)ipiv; + (void)pivot; (void)k; (void)h; (void)rank; diff --git a/runtime/openmp/codelets/codelet_zgetrf_batched.c b/runtime/openmp/codelets/codelet_zgetrf_batched.c index 707cf59c58e37e9e8fdb3e01aed538c8d8f0dc1b..2f2b7c99785ffa738de81a54cdf5e964fa6c69a3 100644 --- a/runtime/openmp/codelets/codelet_zgetrf_batched.c +++ b/runtime/openmp/codelets/codelet_zgetrf_batched.c @@ -28,7 +28,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, void *ws, CHAM_desc_t *A, int Am, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -40,21 +40,21 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, (void)Am; (void)An; (void)clargs_ptr; - (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; (void)A; (void)An; (void)clargs_ptr; - (void)ipiv; + (void)pivot; } void @@ -64,7 +64,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -81,6 +82,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, (void)Un; (void)clargs_ptr; (void)ipiv; + (void)pivot; } void @@ -88,7 +90,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -99,4 +102,5 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, (void)Un; (void)clargs_ptr; (void)ipiv; + (void)pivot; } diff --git a/runtime/openmp/codelets/codelet_zgetrf_blocked.c b/runtime/openmp/codelets/codelet_zgetrf_blocked.c index 399a557a4d41f806aa51ab983b6412710c732d1b..a071bec0db476e523d4743c9575e4fea81d8ccf0 100644 --- a/runtime/openmp/codelets/codelet_zgetrf_blocked.c +++ b/runtime/openmp/codelets/codelet_zgetrf_blocked.c @@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -41,13 +42,14 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, (void)Um; (void)Un; (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -62,13 +64,13 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, (void)U; (void)Um; (void)Un; - (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, int m, int n, int h, int ib, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -79,5 +81,5 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, (void)U; (void)Um; (void)Un; - (void)ipiv; + (void)pivot; } diff --git a/runtime/openmp/codelets/codelet_zgetrf_percol.c b/runtime/openmp/codelets/codelet_zgetrf_percol.c index 6a1d2f6634fa8d6cd818b8ee87f9038abf2f0041..77fcca1ab9fbbe7f23973e22a04198c0df320134 100644 --- a/runtime/openmp/codelets/codelet_zgetrf_percol.c +++ b/runtime/openmp/codelets/codelet_zgetrf_percol.c @@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -37,12 +38,13 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, (void)Am; (void)An; (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -53,5 +55,5 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, (void)A; (void)Am; (void)An; - (void)ipiv; + (void)pivot; } diff --git a/runtime/openmp/codelets/codelet_zipiv_allreduce.c b/runtime/openmp/codelets/codelet_zipiv_allreduce.c index f96857000c70933cc7f3a7ce3a6885f9c9ccb913..088ff53ae34f0fbd7830c1d8b77820212546d437 100644 --- a/runtime/openmp/codelets/codelet_zipiv_allreduce.c +++ b/runtime/openmp/codelets/codelet_zipiv_allreduce.c @@ -19,7 +19,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, CHAM_desc_t *A, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, int h, int n, @@ -27,7 +27,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, { (void)options; (void)A; - (void)ipiv; + (void)pivot; (void)k; (void)h; (void)n; diff --git a/runtime/openmp/control/runtime_descriptor_ipiv.c b/runtime/openmp/control/runtime_descriptor_ipiv.c index 97b1fbdeccf87b670a87ce98e941afd44fe1d0b0..2cb34db52346e8f02e33a8ba564603fc09105765 100644 --- a/runtime/openmp/control/runtime_descriptor_ipiv.c +++ b/runtime/openmp/control/runtime_descriptor_ipiv.c @@ -19,20 +19,28 @@ */ #include "chameleon_openmp.h" -void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) { assert( 0 ); (void)ipiv; - (void)desc; } -void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; +} + +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) { assert( 0 ); (void)ipiv; - (void)desc; +} + +void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; } void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) @@ -43,22 +51,22 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) return NULL; } -void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { assert( 0 ); - (void)ipiv; + (void)pivot; (void)rank; - (void)m; + (void)k; (void)h; return NULL; } -void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { assert( 0 ); - (void)ipiv; + (void)pivot; (void)rank; - (void)m; + (void)k; (void)h; return NULL; } @@ -79,21 +87,38 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k ) return NULL; } -void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, - const CHAM_ipiv_t *ipiv, int m ) +void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot, int rank ) { assert( 0 ); (void)sequence; - (void)ipiv; - (void)m; + (void)pivot; + (void)rank; +} + +void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; + (void)sequence; } void RUNTIME_ipiv_flush( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv ) { assert( 0 ); + (void)sequence; (void)ipiv; +} + +void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + assert( 0 ); (void)sequence; + (void)ipiv; + (void)m; } void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, diff --git a/runtime/parsec/codelets/codelet_ipiv.c b/runtime/parsec/codelets/codelet_ipiv.c index 2145e00b3575d7de659f28422064616815acd22a..ded4d960ca5917342d7cef6235a68f69caafe075 100644 --- a/runtime/parsec/codelets/codelet_ipiv.c +++ b/runtime/parsec/codelets/codelet_ipiv.c @@ -38,11 +38,11 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, } void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, int k, int h, int rank ) + CHAM_desc_pivot_t *pivot, int k, int h, int rank ) { assert( 0 ); (void)options; - (void)ipiv; + (void)pivot; (void)k; (void)h; (void)rank; diff --git a/runtime/parsec/codelets/codelet_zgetrf_batched.c b/runtime/parsec/codelets/codelet_zgetrf_batched.c index 366255b3d54a3e9176dca321416b08e40426929c..50cb396c299d642bc906b42d18f368c62c6b738a 100644 --- a/runtime/parsec/codelets/codelet_zgetrf_batched.c +++ b/runtime/parsec/codelets/codelet_zgetrf_batched.c @@ -28,7 +28,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, void *ws, CHAM_desc_t *A, int Am, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -40,21 +40,21 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, (void)Am; (void)An; (void)clargs_ptr; - (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; (void)A; (void)An; (void)clargs_ptr; - (void)ipiv; + (void)pivot; } void @@ -64,7 +64,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -81,6 +82,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, (void)Un; (void)clargs_ptr; (void)ipiv; + (void)pivot; } void @@ -88,7 +90,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -99,4 +102,5 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, (void)Un; (void)clargs_ptr; (void)ipiv; + (void)pivot; } diff --git a/runtime/parsec/codelets/codelet_zgetrf_blocked.c b/runtime/parsec/codelets/codelet_zgetrf_blocked.c index 812ab095e0bf27c6c0858db4c8d261ff3ac5c234..81a7df7a5acc01515e68a1960f36dc7e22a10065 100644 --- a/runtime/parsec/codelets/codelet_zgetrf_blocked.c +++ b/runtime/parsec/codelets/codelet_zgetrf_blocked.c @@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -41,13 +42,14 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, (void)Um; (void)Un; (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -62,13 +64,13 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, (void)U; (void)Um; (void)Un; - (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, int m, int n, int h, int ib, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -79,5 +81,5 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, (void)U; (void)Um; (void)Un; - (void)ipiv; + (void)pivot; } diff --git a/runtime/parsec/codelets/codelet_zgetrf_percol.c b/runtime/parsec/codelets/codelet_zgetrf_percol.c index f7f5d0205a13e53d7224d0ea5bc25921888751fd..c18466a6c8990247d249b1596d19ad71da1a588f 100644 --- a/runtime/parsec/codelets/codelet_zgetrf_percol.c +++ b/runtime/parsec/codelets/codelet_zgetrf_percol.c @@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -37,12 +38,13 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, (void)Am; (void)An; (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -53,5 +55,5 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, (void)A; (void)Am; (void)An; - (void)ipiv; + (void)pivot; } diff --git a/runtime/parsec/codelets/codelet_zipiv_allreduce.c b/runtime/parsec/codelets/codelet_zipiv_allreduce.c index 4b9cacd70fb879357d4850b0839a3ce7a112b35a..d7bb5a1823cdbfef9fcc5275d9b6592b72a742b6 100644 --- a/runtime/parsec/codelets/codelet_zipiv_allreduce.c +++ b/runtime/parsec/codelets/codelet_zipiv_allreduce.c @@ -19,7 +19,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, CHAM_desc_t *A, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, int h, int n, @@ -27,7 +27,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, { (void)options; (void)A; - (void)ipiv; + (void)pivot; (void)k; (void)h; (void)n; diff --git a/runtime/parsec/control/runtime_descriptor_ipiv.c b/runtime/parsec/control/runtime_descriptor_ipiv.c index 4b34eef933a12b295a1927c5264a40f0edc5f586..cda5e9ad0ee881ea31a50fe759b727c50ee95b1d 100644 --- a/runtime/parsec/control/runtime_descriptor_ipiv.c +++ b/runtime/parsec/control/runtime_descriptor_ipiv.c @@ -19,20 +19,28 @@ */ #include "chameleon_parsec.h" -void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) { assert( 0 ); (void)ipiv; - (void)desc; } -void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; +} + +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) { assert( 0 ); (void)ipiv; - (void)desc; +} + +void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; } void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) @@ -43,22 +51,22 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) return NULL; } -void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { assert( 0 ); - (void)ipiv; + (void)pivot; (void)rank; - (void)m; + (void)k; (void)h; return NULL; } -void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { assert( 0 ); - (void)ipiv; + (void)pivot; (void)rank; - (void)m; + (void)k; (void)h; return NULL; } @@ -79,6 +87,23 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k ) return NULL; } +void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot, int rank ) +{ + assert( 0 ); + (void)sequence; + (void)pivot; + (void)rank; +} + +void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; + (void)sequence; +} + void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ) { diff --git a/runtime/quark/codelets/codelet_ipiv.c b/runtime/quark/codelets/codelet_ipiv.c index bf0846d3dfe9d6043162827a4d0a3eab9414caed..77dc6f6d475df9808cd0dd8ceb4985bd6ad9ac5d 100644 --- a/runtime/quark/codelets/codelet_ipiv.c +++ b/runtime/quark/codelets/codelet_ipiv.c @@ -38,11 +38,11 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, } void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, int k, int h, int rank ) + CHAM_desc_pivot_t *pivot, int k, int h, int rank ) { assert( 0 ); (void)options; - (void)ipiv; + (void)pivot; (void)k; (void)h; (void)rank; diff --git a/runtime/quark/codelets/codelet_zgetrf_batched.c b/runtime/quark/codelets/codelet_zgetrf_batched.c index a3a21329086b657c26ca726f985f31c771818f0e..a41f230ff85d504e9325f5807bee88cb25dbe2bf 100644 --- a/runtime/quark/codelets/codelet_zgetrf_batched.c +++ b/runtime/quark/codelets/codelet_zgetrf_batched.c @@ -28,7 +28,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, void *ws, CHAM_desc_t *A, int Am, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -40,21 +40,21 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, (void)Am; (void)An; (void)clargs_ptr; - (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; (void)A; (void)An; (void)clargs_ptr; - (void)ipiv; + (void)pivot; } void @@ -64,7 +64,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -81,6 +82,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, (void)Un; (void)clargs_ptr; (void)ipiv; + (void)pivot; } void @@ -88,7 +90,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -99,4 +102,5 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, (void)Un; (void)clargs_ptr; (void)ipiv; + (void)pivot; } diff --git a/runtime/quark/codelets/codelet_zgetrf_blocked.c b/runtime/quark/codelets/codelet_zgetrf_blocked.c index d3e1029709c256286c0615ff97714b6295efb011..674b898fd03d0919d881ad6418428a2278f56075 100644 --- a/runtime/quark/codelets/codelet_zgetrf_blocked.c +++ b/runtime/quark/codelets/codelet_zgetrf_blocked.c @@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -41,13 +42,14 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, (void)Um; (void)Un; (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -62,13 +64,13 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, (void)U; (void)Um; (void)Un; - (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, int m, int n, int h, int ib, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -79,5 +81,5 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, (void)U; (void)Um; (void)Un; - (void)ipiv; + (void)pivot; } diff --git a/runtime/quark/codelets/codelet_zgetrf_percol.c b/runtime/quark/codelets/codelet_zgetrf_percol.c index cda25ec30442422a1347899ffd83b2643637e4ac..a6041fc88a55e2a24c4e8e656b7c27b7b2a76bd8 100644 --- a/runtime/quark/codelets/codelet_zgetrf_percol.c +++ b/runtime/quark/codelets/codelet_zgetrf_percol.c @@ -25,7 +25,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -37,12 +38,13 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, (void)Am; (void)An; (void)ipiv; + (void)pivot; } void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { assert( 0 ); (void)options; @@ -53,5 +55,5 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, (void)A; (void)Am; (void)An; - (void)ipiv; + (void)pivot; } diff --git a/runtime/quark/codelets/codelet_zipiv_allreduce.c b/runtime/quark/codelets/codelet_zipiv_allreduce.c index fe169b65d847f3d74a9d8967dbb1223e15e7cca2..9d93d2cd38fd0aefeb50ed15c4aeb61d98a0de6c 100644 --- a/runtime/quark/codelets/codelet_zipiv_allreduce.c +++ b/runtime/quark/codelets/codelet_zipiv_allreduce.c @@ -19,7 +19,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, CHAM_desc_t *A, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, int h, int n, @@ -27,7 +27,7 @@ void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, { (void)options; (void)A; - (void)ipiv; + (void)pivot; (void)k; (void)h; (void)n; diff --git a/runtime/quark/control/runtime_descriptor_ipiv.c b/runtime/quark/control/runtime_descriptor_ipiv.c index 16c83c381b4ebd88a5fd44ed00a5ece48ce89bb4..3f16bfe4cdd03398ce01406a0d40050846669a70 100644 --- a/runtime/quark/control/runtime_descriptor_ipiv.c +++ b/runtime/quark/control/runtime_descriptor_ipiv.c @@ -19,20 +19,28 @@ */ #include "chameleon_quark.h" -void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) { assert( 0 ); (void)ipiv; - (void)desc; } -void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; +} + +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) { assert( 0 ); (void)ipiv; - (void)desc; +} + +void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; } void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) @@ -43,22 +51,22 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) return NULL; } -void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { assert( 0 ); - (void)ipiv; + (void)pivot; (void)rank; - (void)m; + (void)k; (void)h; return NULL; } -void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int m, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { assert( 0 ); - (void)ipiv; + (void)pivot; (void)rank; - (void)m; + (void)k; (void)h; return NULL; } @@ -79,6 +87,23 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int k ) return NULL; } +void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot, int rank ) +{ + assert( 0 ); + (void)sequence; + (void)pivot; + (void)rank; +} + +void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot ) +{ + assert( 0 ); + (void)pivot; + (void)sequence; +} + void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ) { diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c index 5a16c6e2dda5d2e411415bf368f214bbbc8ec71b..97e228eab636ce9c310b84a5c692a1c5fefee670 100644 --- a/runtime/starpu/codelets/codelet_ipiv.c +++ b/runtime/starpu/codelets/codelet_ipiv.c @@ -111,7 +111,7 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, cl_args = malloc( sizeof(struct cl_laswp_args_s) ); cl_args->m0 = m0; cl_args->n = n; - cl_args->m = ipiv->desc->m; + cl_args->m = ipiv->m; cl_args->data = ipiv->data + m0; @@ -124,14 +124,14 @@ void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, } void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, int k, int h, int rank ) + CHAM_desc_pivot_t *pivot, int k, int h, int rank ) { - starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h-1 ); + starpu_data_handle_t prevpiv = RUNTIME_pivot_getaddr( pivot, rank, k, h-1 ); #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI) #if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP) - starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h ); - if ( h < ipiv->n ) { + starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( pivot, rank, k, h ); + if ( h < pivot->n ) { starpu_mpi_redux_data_prio_tree( options->sequence->comm, nextpiv, options->priority, 2 /* Binary tree */ ); } diff --git a/runtime/starpu/codelets/codelet_zgetrf_batched.c b/runtime/starpu/codelets/codelet_zgetrf_batched.c index 0ff4ed9854228109928e30ae4b34013338a32a5c..79aae3a122819a32cbd1c2969feb1154627efa2b 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_batched.c +++ b/runtime/starpu/codelets/codelet_zgetrf_batched.c @@ -94,7 +94,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, void *ws, CHAM_desc_t *A, int Am, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { #if !defined(HAVE_STARPU_NONE_NONZERO) /* STARPU_NONE can't be equal to 0 */ @@ -136,7 +136,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched( const RUNTIME_option_t *options, A->get_blktile( A, Am, An ) ); if ( clargs->tasks_nbr == batch_size ) { - INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, An, clargs_ptr, ipiv ); + INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, An, clargs_ptr, pivot ); } } @@ -146,7 +146,7 @@ void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { #if !defined(HAVE_STARPU_NONE_NONZERO) /* STARPU_NONE can't be equal to 0 */ @@ -160,7 +160,7 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, if ( clargs == NULL ) { return; } - int access_npiv = ( clargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + int access_npiv = ( clargs->h == pivot->n ) ? STARPU_R : STARPU_REDUX; int access_ppiv = ( clargs->h == 0 ) ? STARPU_NONE : STARPU_R; rt_starpu_insert_task( @@ -168,8 +168,8 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, /* Task codelet arguments */ STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_batched_args_s), STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, - access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h-1 ), + access_npiv, RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h ), + access_ppiv, RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h-1 ), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, options->workerid, @@ -186,7 +186,7 @@ void INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { struct cl_zgetrf_batched_args_s *myclargs = *clargs_ptr; int rankA = A->myrank; @@ -199,8 +199,8 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_panel_percol_offdiag_batched, zgetrf_panel_offdiag_batched, zgetrf_batched, myclargs->tasks_nbr + 2 ); - access_npiv = ( myclargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX; - access_ppiv = ( myclargs->h == 0 ) ? STARPU_NONE : STARPU_R; + access_npiv = ( myclargs->h == pivot->n ) ? STARPU_R : STARPU_REDUX; + access_ppiv = ( myclargs->h == 0 ) ? STARPU_NONE : STARPU_R; /* * Register the data handles, no exchange needed @@ -209,8 +209,8 @@ INSERT_TASK_zgetrf_panel_offdiag_batched_flush( const RUNTIME_option_t *options, for ( k = 0; k < myclargs->tasks_nbr; k++ ) { starpu_cham_register_descr( &nbdata, descrs, myclargs->handle_mode[ k ].handle, STARPU_RW ); } - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h ), access_npiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h-1 ), access_ppiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h-1 ), access_ppiv ); task = starpu_task_create(); task->cl = cl; @@ -311,7 +311,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { #if !defined(HAVE_STARPU_NONE_NONZERO) /* STARPU_NONE can't be equal to 0 */ @@ -370,7 +371,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched( const RUNTIME_option_t *options, A->get_blktile( A, Am, An ) ); if ( clargs->tasks_nbr == batch_size ) { - INSERT_TASK_zgetrf_panel_blocked_batched_flush( options, A, An, U, Um, Un, clargs_ptr, ipiv ); + INSERT_TASK_zgetrf_panel_blocked_batched_flush( options, A, An, U, Um, Un, clargs_ptr, ipiv, pivot ); } } @@ -381,7 +382,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { #if !defined(HAVE_STARPU_NONE_NONZERO) /* STARPU_NONE can't be equal to 0 */ @@ -397,7 +399,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, return; } - access_npiv = ( clargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + access_npiv = ( clargs->h == pivot->n ) ? STARPU_R : STARPU_REDUX; access_ipiv = STARPU_RW; access_ppiv = STARPU_R; accessU = STARPU_RW; @@ -423,8 +425,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, /* Task codelet arguments */ STARPU_CL_ARGS, clargs, sizeof(struct cl_zgetrf_batched_args_s), STARPU_DATA_MODE_ARRAY, clargs->handle_mode, clargs->tasks_nbr, - access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, clargs->h - 1 ), + access_npiv, RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h ), + access_ppiv, RUNTIME_pivot_getaddr( pivot, rankA, An, clargs->h - 1 ), access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un ), STARPU_PRIORITY, options->priority, @@ -444,7 +446,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, CHAM_desc_t *A, int An, CHAM_desc_t *U, int Um, int Un, void **clargs_ptr, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { struct cl_zgetrf_batched_args_s *myclargs = *clargs_ptr; int rankA = A->myrank; @@ -458,7 +461,7 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_panel_blocked_batched, zgetrf_panel_blocked_batched, zgetrf_batched, myclargs->tasks_nbr + 4 ); - access_npiv = ( myclargs->h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + access_npiv = ( myclargs->h == pivot->n ) ? STARPU_R : STARPU_REDUX; access_ipiv = STARPU_RW; access_ppiv = STARPU_R; accessU = STARPU_RW; @@ -486,8 +489,8 @@ INSERT_TASK_zgetrf_panel_blocked_batched_flush( const RUNTIME_option_t *options, for ( k = 0; k < myclargs->tasks_nbr; k++ ) { starpu_cham_register_descr( &nbdata, descrs, myclargs->handle_mode[ k ].handle, STARPU_RW ); } - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h ), access_npiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, myclargs->h-1 ), access_ppiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, myclargs->h-1 ), access_ppiv ); starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An), access_ipiv ); starpu_cham_exchange_handle_before_execution( options, ¶ms, &nbdata, descrs, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c index f1df48f3cc7c3b6d460f859bc064e841bd4f5dc7..b2c3b1c5801a9fd9c6c877191553265d455387e5 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c +++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c @@ -98,7 +98,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { #if !defined(HAVE_STARPU_NONE_NONZERO) /* STARPU_NONE can't be equal to 0 */ @@ -130,9 +131,9 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, clargs->sequence = options->sequence; clargs->request = options->request; - int access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; - int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; - int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + int access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; + int access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; + int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; int accessU = STARPU_RW; if ( h == 0 ) { @@ -163,8 +164,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, /* Task handles */ STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), - access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + access_npiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), /* Common task arguments */ @@ -181,7 +182,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { int ret, access_ipiv, access_npiv, access_ppiv, accessU; struct starpu_task *task; @@ -199,9 +201,9 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_blocked_diag, zgetrf_blocked_diag, zgetrf_blocked, 5 ); - access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; - access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; - access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; + access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; + access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; accessU = STARPU_RW; if ( h == 0 ) { accessU = STARPU_NONE; @@ -220,8 +222,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, starpu_cham_exchange_init_params( options, ¶ms, rankA ); starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW ); starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An), access_ipiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv ); starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ), accessU ); task = starpu_task_create(); @@ -318,15 +320,15 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { #if !defined(HAVE_STARPU_NONE_NONZERO) /* STARPU_NONE can't be equal to 0 */ fprintf( stderr, "INSERT_TASK_zgetrf_blocked_diag: STARPU_NONE can not be equal to 0\n" ); assert( 0 ); #endif - int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; - int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + int access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; + int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; int accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE; int rankA = A->get_rankof(A, Am, An); @@ -376,8 +378,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, /* Task handles */ STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + access_npiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), accessU, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), /* Common task arguments */ @@ -394,12 +396,12 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, int ib, CHAM_desc_t *A, int Am, int An, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { int ret; struct starpu_task *task; int rankA = A->get_rankof(A, Am, An); - int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + int access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; int accessU = ((h%ib == 0) && (h > 0)) ? STARPU_R : STARPU_NONE; @@ -423,8 +425,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, */ starpu_cham_exchange_init_params( options, ¶ms, rankA ); starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv ); starpu_cham_exchange_handle_before_execution( options, ¶ms, &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ), accessU ); @@ -510,7 +512,7 @@ CODELETS_CPU(zgetrf_blocked_trsm, cl_zgetrf_blocked_trsm_cpu_func) void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, int m, int n, int h, int ib, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { void (*callback)(void*) = options->profiling ? cl_zgetrf_blocked_trsm_callback : NULL; const char *cl_name = "zgetrf_blocked_trsm"; @@ -544,7 +546,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, /* Task handles */ STARPU_RW, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), - STARPU_R, RUNTIME_pivot_getaddr( ipiv, rankU, Un, h-1 ), + STARPU_R, RUNTIME_pivot_getaddr( pivot, rankU, Un, h-1 ), /* Common task arguments */ STARPU_PRIORITY, options->priority, @@ -559,7 +561,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, int m, int n, int h, int ib, CHAM_desc_t *U, int Um, int Un, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { int ret; struct starpu_task *task; @@ -576,7 +578,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, */ starpu_cham_exchange_init_params( options, ¶ms, rankU ); starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( U, ChamComplexDouble, Um, Un ), STARPU_RW ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankU, Un, h-1 ), STARPU_R ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankU, Un, h-1 ), STARPU_R ); task = starpu_task_create(); task->cl = cl; diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c index 9a0ec048b78b68569974267edb5c62aa97ce65d2..95fb7ea88c785d65dbdf095563db664b5656d8ff 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_percol.c +++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c @@ -86,7 +86,8 @@ CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func ) void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_diag_callback : NULL; const char *cl_name = "zgetrf_percol_diag"; @@ -102,9 +103,9 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, return; } - int access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; - int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; - int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + int access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; + int access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; + int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; /* Handle cache */ CHAMELEON_BEGIN_ACCESS_DECLARATION; @@ -132,8 +133,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, /* Task handles */ STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), access_ipiv, RUNTIME_ipiv_getaddr( ipiv, An ), - access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + access_npiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), /* Common task arguments */ STARPU_PRIORITY, options->priority, @@ -148,7 +149,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot ) { int ret, access_ipiv, access_npiv, access_ppiv; struct starpu_task *task; @@ -160,9 +162,9 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_diag, zgetrf_percol_diag, zgetrf_percol, 4 ); - access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; - access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; - access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + access_ipiv = ( h == 0 ) ? STARPU_W : STARPU_RW; + access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; + access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; /* * Register the data handles, no exchange needed @@ -170,8 +172,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, starpu_cham_exchange_init_params( options, ¶ms, rankA ); starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW ); starpu_cham_register_descr( &nbdata, descrs, RUNTIME_ipiv_getaddr( ipiv, An), access_ipiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv ); task = starpu_task_create(); task->cl = cl; @@ -242,11 +244,11 @@ CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func) void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL; const char *cl_name = "zgetrf_percol_offdiag"; - int access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; + int access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; int access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; int rankA = A->get_rankof(A, Am, An); #if !defined(HAVE_STARPU_NONE_NONZERO) @@ -284,8 +286,8 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, /* Task handles */ STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - access_npiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), - access_ppiv, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), + access_npiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), + access_ppiv, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), /* Common task arguments */ STARPU_PRIORITY, options->priority, @@ -300,7 +302,7 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, int m, int n, int h, int m0, CHAM_desc_t *A, int Am, int An, - CHAM_ipiv_t *ipiv ) + CHAM_desc_pivot_t *pivot ) { int ret, access_npiv, access_ppiv; struct starpu_task *task; @@ -312,16 +314,16 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, INSERT_TASK_COMMON_PARAMETERS_EXTENDED( zgetrf_percol_offdiag, zgetrf_percol_offdiag, zgetrf_percol, 3 ); - access_npiv = ( h == ipiv->n ) ? STARPU_R : STARPU_REDUX; - access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; + access_npiv = ( h == pivot->n ) ? STARPU_R : STARPU_REDUX; + access_ppiv = ( h == 0 ) ? STARPU_NONE : STARPU_R; /* * Register the data handles, no exchange needed */ starpu_cham_exchange_init_params( options, ¶ms, rankA ); starpu_cham_register_descr( &nbdata, descrs, RTBLKADDR( A, ChamComplexDouble, Am, An ), STARPU_RW ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h ), access_npiv ); - starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( ipiv, rankA, An, h-1 ), access_ppiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h ), access_npiv ); + starpu_cham_register_descr( &nbdata, descrs, RUNTIME_pivot_getaddr( pivot, rankA, An, h-1 ), access_ppiv ); task = starpu_task_create(); task->cl = cl; diff --git a/runtime/starpu/codelets/codelet_zipiv_allreduce.c b/runtime/starpu/codelets/codelet_zipiv_allreduce.c index e79a1841d491524d67ffd3c398bba8efe5706690..51680cd9d4a99469541856b530f227a26a8682e3 100644 --- a/runtime/starpu/codelets/codelet_zipiv_allreduce.c +++ b/runtime/starpu/codelets/codelet_zipiv_allreduce.c @@ -81,7 +81,7 @@ CODELETS_CPU( zipiv_allreduce, cl_zipiv_allreduce_cpu_func ) static void INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int me, int dst, int k, @@ -90,14 +90,14 @@ INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options, rt_starpu_insert_task( NULL, STARPU_EXECUTE_ON_NODE, dst, - STARPU_R, RUNTIME_pivot_getaddr( ipiv, me, k, h ), + STARPU_R, RUNTIME_pivot_getaddr( pivot, me, k, h ), STARPU_PRIORITY, options->priority, 0 ); } static void INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int me, int src, int k, @@ -112,20 +112,20 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options, rt_starpu_insert_task( &cl_zipiv_allreduce, STARPU_CL_ARGS, clargs, sizeof(struct cl_redux_args_s), - STARPU_RW, RUNTIME_pivot_getaddr( ipiv, me, k, h ), - STARPU_R, RUNTIME_pivot_getaddr( ipiv, src, k, h ), + STARPU_RW, RUNTIME_pivot_getaddr( pivot, me, k, h ), + STARPU_R, RUNTIME_pivot_getaddr( pivot, src, k, h ), STARPU_EXECUTE_ON_NODE, me, STARPU_EXECUTE_ON_WORKER, options->workerid, STARPU_PRIORITY, options->priority, 0 ); - starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( ipiv, src, k, h ) ); + starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( pivot, src, k, h ) ); } #else /* defined(CHAMELEON_STARPU_USE_INSERT) */ static void INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int me, int dst, int k, @@ -135,7 +135,7 @@ INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options, starpu_cham_exchange_init_params( options, ¶ms, dst ); starpu_cham_exchange_handle_before_execution( options, ¶ms, &nbdata, descrs, - RUNTIME_pivot_getaddr( ipiv, me, k, h ), + RUNTIME_pivot_getaddr( pivot, me, k, h ), STARPU_R ); starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs ); (void)cl; @@ -144,7 +144,7 @@ INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options, static void INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int me, int src, int k, @@ -157,10 +157,10 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options, starpu_cham_exchange_init_params( options, ¶ms, me ); starpu_cham_exchange_handle_before_execution( options, ¶ms, &nbdata, descrs, - RUNTIME_pivot_getaddr( ipiv, me, k, h ), + RUNTIME_pivot_getaddr( pivot, me, k, h ), STARPU_RW ); starpu_cham_exchange_handle_before_execution( options, ¶ms, &nbdata, descrs, - RUNTIME_pivot_getaddr( ipiv, src, k, h ), + RUNTIME_pivot_getaddr( pivot, src, k, h ), STARPU_R ); task = starpu_task_create(); @@ -193,7 +193,7 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options, } starpu_cham_task_exchange_data_after_execution( options, params, nbdata, descrs ); - starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( ipiv, src, k, h ) ); + starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( pivot, src, k, h ) ); } #endif /* defined(CHAMELEON_STARPU_USE_INSERT) */ @@ -201,7 +201,7 @@ INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options, static void zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options, CHAM_desc_t *A, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int *proc_involved, int k, int h, @@ -213,9 +213,9 @@ zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options, int shift = 1; if ( h > 0 ) { - starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( ipiv, A->myrank, k, h-1 ) ); + starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( pivot, A->myrank, k, h-1 ) ); } - if ( h >= ipiv->n ) { + if ( h >= pivot->n ) { return; } @@ -233,8 +233,8 @@ zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options, p_send = proc_involved[ ( me + shift ) % np_involved ]; p_recv = proc_involved[ ( me - shift + np_involved ) % np_involved ]; - INSERT_TASK_zipiv_allreduce_send( options, ipiv, A->myrank, p_send, k, h ); - INSERT_TASK_zipiv_allreduce_recv( options, ipiv, A->myrank, p_recv, k, h, n ); + INSERT_TASK_zipiv_allreduce_send( options, pivot, A->myrank, p_send, k, h ); + INSERT_TASK_zipiv_allreduce_recv( options, pivot, A->myrank, p_recv, k, h, n ); shift = shift << 1; np_iter = chameleon_ceil( np_iter, 2 ); @@ -245,32 +245,32 @@ zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options, void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, CHAM_desc_t *A, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, int h, int n, void *ws ) { - struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws; - cham_getrf_allreduce_t alg = tmp->alg_allreduce; + struct chameleon_pzlaswp_s *tmp = (struct chameleon_pzlaswp_s *)ws; + cham_getrf_allreduce_t alg = tmp->reduce.alg_allreduce; switch( alg ) { case ChamStarPUTasks: default: - zipiv_allreduce_chameleon_starpu_task( options, A, ipiv, tmp->proc_involved, k, h, n ); + zipiv_allreduce_chameleon_starpu_task( options, A, pivot, tmp->reduce.proc_involved, k, h, n ); } } #else void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options, CHAM_desc_t *A, - CHAM_ipiv_t *ipiv, + CHAM_desc_pivot_t *pivot, int k, int h, int n, void *ws ) { if ( h > 0 ) { - starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( ipiv, A->myrank, k, h-1 ) ); + starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( pivot, A->myrank, k, h-1 ) ); } (void)options; diff --git a/runtime/starpu/codelets/codelet_zperm_allreduce.c b/runtime/starpu/codelets/codelet_zperm_allreduce.c index 6e6bf8a0c3408f6064fa84f1674336c861938411..eccba676db24837421da8e9cc6d760e5feaee149 100644 --- a/runtime/starpu/codelets/codelet_zperm_allreduce.c +++ b/runtime/starpu/codelets/codelet_zperm_allreduce.c @@ -250,9 +250,9 @@ zperm_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options, int ipivk, int k, int n, - struct chameleon_pzgetrf_s *ws ) + CHAM_reduce_t *reduce ) { - int *proc_involved = ws->proc_involved; + int *proc_involved = reduce->proc_involved; int np_involved = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k ); int np_iter = np_involved; int p_recv, p_send, me, p_first; @@ -299,12 +299,12 @@ INSERT_TASK_zperm_allreduce_row( const RUNTIME_option_t *options, int n, void *ws ) { - struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws; - cham_getrf_allreduce_t alg = tmp->alg_allreduce; + struct chameleon_pzlaswp_s *tmp = (struct chameleon_pzlaswp_s *)ws; + cham_getrf_allreduce_t alg = tmp->reduce.alg_allreduce; switch( alg ) { case ChamStarPUTasks: default: - zperm_allreduce_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, k, n, tmp ); + zperm_allreduce_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, k, n, &(tmp->reduce) ); } } diff --git a/runtime/starpu/codelets/codelet_zperm_allreduce_col.c b/runtime/starpu/codelets/codelet_zperm_allreduce_col.c index 192b977632c54780d36e0086a909e1edca30a3c2..f1ad9ff9bcc1217410373d77162eedac87a3f578 100644 --- a/runtime/starpu/codelets/codelet_zperm_allreduce_col.c +++ b/runtime/starpu/codelets/codelet_zperm_allreduce_col.c @@ -250,9 +250,9 @@ zperm_allreduce_col_chameleon_starpu_task( const RUNTIME_option_t *options, int ipivk, int m, int k, - struct chameleon_pzgetrf_s *ws ) + CHAM_reduce_t *reduce ) { - int *proc_involved = ws->proc_involved; + int *proc_involved = reduce->proc_involved; int np_involved = chameleon_min( chameleon_desc_datadist_get_iparam(A, 1), A->nt - k ); int np_iter = np_involved; int p_recv, p_send, me, p_first; @@ -299,12 +299,12 @@ INSERT_TASK_zperm_allreduce_col( const RUNTIME_option_t *options, int k, void *ws ) { - struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws; - cham_getrf_allreduce_t alg = tmp->alg_allreduce; + struct chameleon_pzlaswp_s *tmp = (struct chameleon_pzlaswp_s *)ws; + cham_getrf_allreduce_t alg = tmp->reduce.alg_allreduce; switch( alg ) { case ChamStarPUTasks: default: - zperm_allreduce_col_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, m, k, tmp ); + zperm_allreduce_col_chameleon_starpu_task( options, dir, A, U, Um, Un, ipiv, ipivk, m, k, &(tmp->reduce) ); } } diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c index b7d07b171ac8303984afa29e2556be5559eeab3e..ada48a52cb15af180cb870f876fdf5e01f0442db 100644 --- a/runtime/starpu/control/runtime_descriptor_ipiv.c +++ b/runtime/starpu/control/runtime_descriptor_ipiv.c @@ -23,19 +23,13 @@ /** * Create ws_pivot runtime structures */ -void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv ) { assert( ipiv ); - size_t P = chameleon_desc_datadist_get_iparam(desc, 0); - size_t nbhandles = 3 * ipiv->mt + 2 * P; + size_t nbhandles = 3 * ipiv->mt; starpu_data_handle_t *handles = calloc( nbhandles, sizeof(starpu_data_handle_t) ); ipiv->ipiv = handles; handles += ipiv->mt; - ipiv->nextpiv = handles; - handles += P; - ipiv->prevpiv = handles; - handles += P; ipiv->perm = handles; handles += ipiv->mt; ipiv->invp = handles; @@ -51,10 +45,36 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, chameleon_fatal_error("RUNTIME_ipiv_create", "Can't pursue computation since no more tags are available for ipiv structure"); return; } - ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv + ipiv->mt; - ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + P; - ipiv->mpitag_perm = ipiv->mpitag_prevpiv + P; - ipiv->mpitag_invp = ipiv->mpitag_perm + ipiv->mt; + ipiv->mpitag_perm = ipiv->mpitag_ipiv + ipiv->mt; + ipiv->mpitag_invp = ipiv->mpitag_perm + ipiv->mt; + } +#endif +} + +/** + * Create ws_pivot runtime structures + */ +void RUNTIME_pivot_create( CHAM_desc_pivot_t *pivot ) +{ + assert( pivot ); + size_t nbhandles = 2 * pivot->P; + starpu_data_handle_t *handles = calloc( nbhandles, sizeof(starpu_data_handle_t) ); + pivot->nextpiv = handles; + handles += pivot->P; + pivot->prevpiv = handles; +#if defined(CHAMELEON_USE_MPI) + /* + * Book the number of tags required to describe pivot structure + * One per handle type + */ + { + chameleon_starpu_tag_init(); + pivot->mpitag_nextpiv = chameleon_starpu_tag_book( nbhandles ); + if ( pivot->mpitag_nextpiv == -1 ) { + chameleon_fatal_error("RUNTIME_pivot_create", "Can't pursue computation since no more tags are available for pivot structure"); + return; + } + pivot->mpitag_prevpiv = pivot->mpitag_nextpiv + pivot->P; } #endif } @@ -62,12 +82,11 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, /** * Destroy ws_pivot runtime structures */ -void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, - const CHAM_desc_t *desc ) +void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv ) { int i; starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv); - size_t nbhandles = 3 * ipiv->mt + 2 * chameleon_desc_datadist_get_iparam(desc, 0); + size_t nbhandles = 3 * ipiv->mt; for(i=0; i<nbhandles; i++) { if ( *handle != NULL ) { @@ -79,13 +98,34 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, free( ipiv->ipiv ); ipiv->ipiv = NULL; - ipiv->nextpiv = NULL; - ipiv->prevpiv = NULL; ipiv->perm = NULL; ipiv->invp = NULL; chameleon_starpu_tag_release( ipiv->mpitag_ipiv ); } +/** + * Destroy ws_pivot runtime structures + */ +void RUNTIME_pivot_destroy( CHAM_desc_pivot_t *pivot ) +{ + int i; + starpu_data_handle_t *handle = (starpu_data_handle_t*)(pivot->nextpiv); + size_t nbhandles = 2 * pivot->P; + + for(i=0; i<nbhandles; i++) { + if ( *handle != NULL ) { + starpu_data_unregister_submit( *handle ); + *handle = NULL; + } + handle++; + } + + free( pivot->nextpiv ); + pivot->nextpiv = NULL; + pivot->prevpiv = NULL; + chameleon_starpu_tag_release( pivot->mpitag_nextpiv ); +} + void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) { starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv); @@ -104,8 +144,7 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) #if defined(CHAMELEON_USE_MPI) { - const CHAM_desc_t *A = ipiv->desc; - int owner = A->get_rankof( A, m, m ); + int owner = ipiv->get_rankof( ipiv, m, m ); int64_t tag = ipiv->mpitag_ipiv + mm; starpu_mpi_data_register( *handle, tag, owner ); } @@ -115,48 +154,45 @@ void *RUNTIME_ipiv_getaddr( const CHAM_ipiv_t *ipiv, int m ) return (void*)(*handle); } -void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) +void *RUNTIME_nextpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { - starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv); - const CHAM_desc_t *A = ipiv->desc; + starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(pivot->nextpiv); + int Q = pivot->Q; - nextpiv += rank/chameleon_desc_datadist_get_iparam(A, 1); + nextpiv += rank/Q; assert( nextpiv ); if ( *nextpiv != NULL ) { return (void*)(*nextpiv); } - - int64_t kk = k + (ipiv->i / ipiv->mb); int owner = rank; - int ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb; - int64_t tag = ipiv->mpitag_nextpiv + owner/chameleon_desc_datadist_get_iparam(A, 1); + int ncols = pivot->nb; + int64_t tag = pivot->mpitag_nextpiv + owner/Q; - cppi_register( nextpiv, A->dtyp, ncols, tag, owner ); + cppi_register( nextpiv, pivot->dtyp, ncols, tag, owner ); assert( *nextpiv ); (void)h; return (void*)(*nextpiv); } -void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) +void *RUNTIME_prevpiv_getaddr( const CHAM_desc_pivot_t *pivot, int rank, int k, int h ) { - starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv); - const CHAM_desc_t *A = ipiv->desc; + starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(pivot->prevpiv); + int Q = pivot->Q; - prevpiv += rank/chameleon_desc_datadist_get_iparam(A, 1); + prevpiv += rank/Q; assert( prevpiv ); if ( *prevpiv != NULL ) { return (void*)(*prevpiv); } - int64_t kk = k + (ipiv->i / ipiv->mb); int owner = rank; - int ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb; - int64_t tag = ipiv->mpitag_prevpiv + owner/chameleon_desc_datadist_get_iparam(A, 1); + int ncols = pivot->nb; + int64_t tag = pivot->mpitag_prevpiv + owner/Q; - cppi_register( prevpiv, A->dtyp, ncols, tag, owner ); + cppi_register( prevpiv, pivot->dtyp, ncols, tag, owner ); assert( *prevpiv ); (void)h; @@ -181,8 +217,7 @@ void *RUNTIME_perm_getaddr( const CHAM_ipiv_t *ipiv, int m ) #if defined(CHAMELEON_USE_MPI) { - const CHAM_desc_t *A = ipiv->desc; - int owner = A->get_rankof( A, m, m ); + int owner = ipiv->get_rankof( ipiv, m, m ); int64_t tag = ipiv->mpitag_perm + mm; starpu_mpi_data_register( *handle, tag, owner ); } @@ -210,8 +245,7 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int m ) #if defined(CHAMELEON_USE_MPI) { - const CHAM_desc_t *A = ipiv->desc; - int owner = A->get_rankof( A, m, m ); + int owner = ipiv->get_rankof( ipiv, m, m ); int64_t tag = ipiv->mpitag_invp + mm; starpu_mpi_data_register( *handle, tag, owner ); } @@ -221,14 +255,14 @@ void *RUNTIME_invp_getaddr( const CHAM_ipiv_t *ipiv, int m ) return (void*)(*handle); } -void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, - const CHAM_ipiv_t *ipiv, int rank ) +void RUNTIME_pivot_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot, int rank ) { starpu_data_handle_t *handle; - const CHAM_desc_t *A = ipiv->desc; + int Q = pivot->Q; - handle = (starpu_data_handle_t*)(ipiv->nextpiv); - handle += rank/chameleon_desc_datadist_get_iparam(A, 1); + handle = (starpu_data_handle_t*)(pivot->nextpiv); + handle += rank/Q; if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) @@ -240,8 +274,8 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, } } - handle = (starpu_data_handle_t*)(ipiv->prevpiv); - handle += rank/chameleon_desc_datadist_get_iparam(A, 1); + handle = (starpu_data_handle_t*)(pivot->prevpiv); + handle += rank/Q; if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) @@ -254,10 +288,21 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, } (void)sequence; - (void)ipiv; + (void)pivot; (void)rank; } +void RUNTIME_pivot_flush( const RUNTIME_sequence_t *sequence, + const CHAM_desc_pivot_t *pivot ) +{ + int m; + + for (m = 0; m < pivot->Q; m++) + { + RUNTIME_pivot_flushk( sequence, pivot, m ); + } +} + void RUNTIME_ipiv_flush( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv ) { @@ -269,11 +314,34 @@ void RUNTIME_ipiv_flush( const RUNTIME_sequence_t *sequence, } } +void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, + const CHAM_ipiv_t *ipiv, int m ) +{ + starpu_data_handle_t *handle; + int64_t mm = m + ( ipiv->i / ipiv->mb ); + + handle = (starpu_data_handle_t*)(ipiv->ipiv); + handle += mm; + + if ( *handle != NULL ) { +#if defined(CHAMELEON_USE_MPI) + starpu_mpi_cache_flush( sequence->comm, *handle ); + if ( starpu_mpi_data_get_rank( *handle ) == ipiv->myrank ) +#endif + { + chameleon_starpu_data_wont_use( *handle ); + } + } + + (void)sequence; + (void)ipiv; + (void)m; +} + void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, const CHAM_ipiv_t *ipiv, int m ) { starpu_data_handle_t *handle; - const CHAM_desc_t *A = ipiv->desc; int64_t mm = m + ( ipiv->i / ipiv->mb ); handle = (starpu_data_handle_t*)(ipiv->perm); @@ -282,7 +350,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) starpu_mpi_cache_flush( sequence->comm, *handle ); - if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) + if ( starpu_mpi_data_get_rank( *handle ) == ipiv->myrank ) #endif { chameleon_starpu_data_wont_use( *handle ); @@ -295,7 +363,7 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) starpu_mpi_cache_flush( sequence->comm, *handle ); - if ( starpu_mpi_data_get_rank( *handle ) == A->myrank ) + if ( starpu_mpi_data_get_rank( *handle ) == ipiv->myrank ) #endif { chameleon_starpu_data_wont_use( *handle ); @@ -305,7 +373,6 @@ void RUNTIME_perm_flushk( const RUNTIME_sequence_t *sequence, (void)sequence; (void)ipiv; (void)m; - (void)A; } void RUNTIME_ipiv_gather( const RUNTIME_sequence_t *sequence, diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 297d2628a1d114bc9ec2472b1caf2ad381fa442c..e04ed9503b376caa599e5d92fa905b53b74a7f2d 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -119,7 +119,7 @@ if (NOT CHAMELEON_SIMULATION) add_test( test_${cat}_${prec}gesv_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/gesv.in ) add_test( test_${cat}_${prec}getrf_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrf.in ) set_tests_properties( test_${cat}_${prec}getrf_ppiv_comm_with_task - PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_GETRF_ALL_REDUCE=cham_spu_tasks" ) + PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_ALLREDUCE=cham_spu_tasks" ) endif() endif() endif() diff --git a/testing/testing_zgesv.c b/testing/testing_zgesv.c index 0b5161d55d5134255f674f921a3654965690a0d6..1f3872b9fc844a447f4c014a3bef9b9e7d975f30 100644 --- a/testing/testing_zgesv.c +++ b/testing/testing_zgesv.c @@ -38,6 +38,7 @@ testing_zgesv_desc( run_arg_list_t *args, int check ) { testdata_t test_data = { .args = args }; int hres = 0; + int P, Q; /* Read arguments */ int async = parameters_getvalue_int( "async" ); @@ -62,7 +63,11 @@ testing_zgesv_desc( run_arg_list_t *args, int check ) /* Creates the matrices */ parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N ); parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS ); - CHAMELEON_Ipiv_Create( &descIPIV, descA, N, NULL ); + + P = chameleon_desc_datadist_get_iparam( descA, 0 ); + Q = chameleon_desc_datadist_get_iparam( descA, 1 ); + + CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descA->mb, N, P, P*Q, NULL ); /* Fills the matrix with random values */ CHAMELEON_zplrnt_Tile( descA, seedA ); @@ -80,7 +85,6 @@ testing_zgesv_desc( run_arg_list_t *args, int check ) test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); CHAMELEON_Desc_Flush( descX, test_data.sequence ); - CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); } else { hres = CHAMELEON_zgesv_Tile( descA, descIPIV, descX ); @@ -107,7 +111,7 @@ testing_zgesv_desc( run_arg_list_t *args, int check ) if ( hres ) { CHAMELEON_Desc_Destroy( &descA0 ); - CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); parameters_desc_destroy( &descA ); parameters_desc_destroy( &descX ); return hres; @@ -124,7 +128,7 @@ testing_zgesv_desc( run_arg_list_t *args, int check ) CHAMELEON_Desc_Destroy( &descB ); } - CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); parameters_desc_destroy( &descA ); parameters_desc_destroy( &descX ); diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c index bf7d52ab70fecddc68c046f71563330ba2971187..65d9defcc2b5624cebf2eb6fd8cd020af13694ab 100644 --- a/testing/testing_zgetrf.c +++ b/testing/testing_zgetrf.c @@ -39,6 +39,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) { testdata_t test_data = { .args = args }; int hres = 0; + int P, Q; /* Read arguments */ int async = parameters_getvalue_int( "async" ); @@ -78,7 +79,11 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) /* Creates the matrices */ parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N ); - CHAMELEON_Ipiv_Create( &descIPIV, descA, minMN, NULL ); + + P = chameleon_desc_datadist_get_iparam( descA, 0 ); + Q = chameleon_desc_datadist_get_iparam( descA, 1 ); + + CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descA->mb, N, P, P*Q, NULL ); /* Fills the matrix with random values */ if ( diag == ChamUnit ) { @@ -98,7 +103,6 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) if ( async ) { hres = CHAMELEON_zgetrf_Tile_Async( descA, descIPIV, ws, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); - CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); } else { hres = CHAMELEON_zgetrf_Tile( descA, descIPIV ); @@ -130,7 +134,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) CHAMELEON_zgetrf_WS_Free( ws ); } - CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); parameters_desc_destroy( &descA ); return hres; diff --git a/testing/testing_zgetrs.c b/testing/testing_zgetrs.c index 6a7fcfc50aa252ace498086f8abf19352599888a..4f9cb6505b0259bc8aedfa0f5a82169782457a94 100644 --- a/testing/testing_zgetrs.c +++ b/testing/testing_zgetrs.c @@ -33,6 +33,7 @@ testing_zgetrs_desc( run_arg_list_t *args, int check ) { testdata_t test_data = { .args = args }; int hres = 0; + int P, Q; /* Read arguments */ int async = parameters_getvalue_int( "async" ); @@ -57,7 +58,11 @@ testing_zgetrs_desc( run_arg_list_t *args, int check ) /* Creates the matrices */ parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N ); parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS ); - CHAMELEON_Ipiv_Create( &descIPIV, descA, N, NULL ); + + P = chameleon_desc_datadist_get_iparam( descA, 0 ); + Q = chameleon_desc_datadist_get_iparam( descA, 1 ); + + CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descA->mb, N, P, P*Q, NULL ); CHAMELEON_zplrnt_Tile( descA, seedA ); CHAMELEON_zplrnt_Tile( descX, seedB ); @@ -73,7 +78,6 @@ testing_zgetrs_desc( run_arg_list_t *args, int check ) if ( async ) { hres = CHAMELEON_zgetrs_Tile_Async( trans, descA, descIPIV, descX, ws, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); - CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); } else { hres = CHAMELEON_zgetrs_Tile( trans, descA, descIPIV, descX ); @@ -103,7 +107,7 @@ testing_zgetrs_desc( run_arg_list_t *args, int check ) CHAMELEON_zgetrf_WS_Free( ws ); } - CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); parameters_desc_destroy( &descA ); parameters_desc_destroy( &descX ); diff --git a/testing/testing_zlaswp.c b/testing/testing_zlaswp.c index 0c6e04d39469a995bdb078a8f29b01c44e6c2743..8fca88da027cd5f1fe6f902ae0b81fa3274a0ab4 100644 --- a/testing/testing_zlaswp.c +++ b/testing/testing_zlaswp.c @@ -40,6 +40,7 @@ testing_zlaswp_desc( run_arg_list_t *args, int check ) { testdata_t test_data = { .args = args }; int hres = 0; + int P, Q; /* Read arguments */ int async = parameters_getvalue_int( "async" ); @@ -55,28 +56,31 @@ testing_zlaswp_desc( run_arg_list_t *args, int check ) int K = ( side == ChamLeft ) ? M : N; int *IPIV = malloc( sizeof(int) * K ); + int kb; /* Descriptors */ - CHAM_desc_t *descA, *descInit; + CHAM_desc_t *descA; CHAM_ipiv_t *descIPIV; CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); /* Creates the matrices */ - parameters_desc_create( "Init", &descInit, ChamComplexDouble, nb, nb, K, K, K, K ); parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N ); CHAMELEON_zplrnt_Tile( descA, seedA ); + P = chameleon_desc_datadist_get_iparam( descA, 0 ); + Q = chameleon_desc_datadist_get_iparam( descA, 1 ); + kb = ( side == ChamLeft ) ? descA->nb : descA->mb; + testing_zlaswp_ipiv_gen( IPIV, K ); - CHAMELEON_Ipiv_Create( &descIPIV, descInit, K, IPIV ); - CHAMELEON_Ipiv_Init( descInit, descIPIV ); + CHAMELEON_Ipiv_Create( &descIPIV, side, kb, K, P, P*Q, IPIV ); + CHAMELEON_Ipiv_Init( descIPIV ); /* Calculates the solution */ testing_start( &test_data ); if ( async ) { hres = CHAMELEON_zlaswp_Tile_Async( side, dir, descA, K1, K2, descIPIV, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); - CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); } else { hres = CHAMELEON_zlaswp_Tile( side, dir, descA, K1, K2, descIPIV ); @@ -116,7 +120,7 @@ testing_zlaswp_desc( run_arg_list_t *args, int check ) } #endif /* !defined(CHAMELEON_SIMULATION) */ - CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + CHAMELEON_Ipiv_Destroy( &descIPIV ); parameters_desc_destroy( &descA ); free( IPIV );