diff --git a/compute/pzcesca.c b/compute/pzcesca.c index b61c54aa664e008a053e1aaf5a9495db28d47e6f..d09505c2ff68796825105f20093e9f2d4d783a15 100644 --- a/compute/pzcesca.c +++ b/compute/pzcesca.c @@ -37,8 +37,8 @@ chameleon_pzcesca_internal( int center, int NT = A->nt; int M = A->m; int N = A->n; - int P = A->p; - int Q = A->q; + int P = chameleon_desc_datadist_get_iparam(A, 0); + int Q = chameleon_desc_datadist_get_iparam(A, 1); /** * 1) compute sums and sum-square (scl,ssq) in each tile diff --git a/compute/pzgemm.c b/compute/pzgemm.c index 5c6563d2d6d6280f0f08a35b9a25f266eb5e9764..a4a12eca743aa1e9ef69a15ddfec7bc1564edc92 100644 --- a/compute/pzgemm.c +++ b/compute/pzgemm.c @@ -195,15 +195,15 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran KT = transA == ChamNoTrans ? A->nt : A->mt; K = transA == ChamNoTrans ? A->n : A->m; - myp = C->myrank / C->q; - myq = C->myrank % C->q; + myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); + myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); /* * A: ChamNoTrans / B: ChamNoTrans */ for (k = 0; k < KT; k++ ) { - lp = (k % lookahead) * C->p; - lq = (k % lookahead) * C->q; + lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); + lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); tempkk = k == KT - 1 ? K - k * A->nb : A->nb; zbeta = k == 0 ? beta : zone; @@ -216,16 +216,16 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran options, ChamUpperLower, tempmm, tempkk, A( m, k ), - WA( m, (k % C->q) + lq ) ); + WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); RUNTIME_data_flush( sequence, A( m, k ) ); - for ( q=1; q < C->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkk, - WA( m, ((k+q-1) % C->q) + lq ), - WA( m, ((k+q) % C->q) + lq ) ); + WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ), + WA( m, ((k+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); } } else { @@ -233,16 +233,16 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran options, ChamUpperLower, tempkk, tempmm, A( k, m ), - WA( m, (m % C->q) + lq ) ); + WA( m, (m % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); RUNTIME_data_flush( sequence, A( k, m ) ); - for ( q=1; q < C->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempkk, tempmm, - WA( m, ((m+q-1) % C->q) + lq ), - WA( m, ((m+q) % C->q) + lq ) ); + WA( m, ((m+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ), + WA( m, ((m+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); } } } @@ -256,16 +256,16 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran options, ChamUpperLower, tempkk, tempnn, B( k, n ), - WB( (k % C->p) + lp, n ) ); + WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); RUNTIME_data_flush( sequence, B( k, n ) ); - for ( p=1; p < C->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempkk, tempnn, - WB( ((k+p-1) % C->p) + lp, n ), - WB( ((k+p) % C->p) + lp, n ) ); + WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ), + WB( ((k+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); } } else { @@ -273,24 +273,24 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran options, ChamUpperLower, tempnn, tempkk, B( n, k ), - WB( (n % C->p) + lp, n ) ); + WB( (n % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); RUNTIME_data_flush( sequence, B( n, k ) ); - for ( p=1; p < C->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempnn, tempkk, - WB( ((n+p-1) % C->p) + lp, n ), - WB( ((n+p) % C->p) + lp, n ) ); + WB( ((n+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ), + WB( ((n+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); } } } - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; INSERT_TASK_zgemm( diff --git a/compute/pzgenm2.c b/compute/pzgenm2.c index 97f8ae8937b6c5c5821bb2e5ae8226fcbb0ca65b..99455940aae73822a1e7dfa937873b30715dce9d 100644 --- a/compute/pzgenm2.c +++ b/compute/pzgenm2.c @@ -38,8 +38,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, RUNTIME_option_t options; CHAM_desc_t X, SX, NRMX, NRMSX, DROW; int m, n, k; - int myp = A->myrank / A->q; - int myq = A->myrank % A->q; + int myp = A->myrank / chameleon_desc_datadist_get_iparam(A, 1); + int myq = A->myrank % chameleon_desc_datadist_get_iparam(A, 1); int tempmm, tempnn; int cnt, maxiter; double e0, normx, normsx, beta, scl; @@ -67,7 +67,9 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, * This is the same issue for X and SX to be reused from one iteration to another. */ chameleon_desc_init( &DROW, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, A->nb, A->nb, - A->p, A->n, 0, 0, A->p, A->n, A->p, A->q, + chameleon_desc_datadist_get_iparam(A, 0), A->n, 0, 0, chameleon_desc_datadist_get_iparam(A, 0), A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); /** * NRMX must be allocated with GLOBAL to be able to access the norm value @@ -75,7 +77,10 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, * This is the same issue for NRMSX. */ chameleon_desc_init( &NRMX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q, + chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1), 0, 0 + , chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1), + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); /** @@ -86,7 +91,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, * So drow[j] = sum( S_{p,j}, p=0..P-1 ) with S_{p,j} = sum( |A_{i,j}|, i=0..m-1 \ i%P = p ) * */ - for(n = myq; n < A->nt; n += A->q) { + for(n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) { tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; /* Zeroes the local intermediate vector */ @@ -97,7 +102,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, DROW( myp, n ) ); /* Computes the sums of the local tiles into the local vector */ - for(m = myp; m < A->mt; m += A->p) { + for(m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0)) { tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; INSERT_TASK_dzasum( &options, @@ -106,7 +111,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Reduce on first row of nodes */ - for(m = 1; m < A->p; m++) { + for(m = 1; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { INSERT_TASK_daxpy( &options, tempnn, 1., DROW( m, n ), 1, @@ -125,7 +130,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, 1., 0., NRMX( myp, myq ) ); - for( n = myq; n < A->nt; n += A->q ) { + for( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; INSERT_TASK_dgessq( &options, ChamEltwise, 1, tempnn, @@ -134,7 +139,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Reduce on first row of nodes */ - for(n = 1; n < A->q; n++) { + for(n = 1; n < chameleon_desc_datadist_get_iparam(A, 1); n++) { INSERT_TASK_dplssq( &options, ChamEltwise, 1, 1, NRMX( myp, n ), @@ -146,8 +151,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Bcast norm over processes from node (0,0) */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { + for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { + for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) { if ( (m != 0) || (n != 0) ) { INSERT_TASK_dlacpy( &options, @@ -171,13 +176,22 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } chameleon_desc_init( &NRMSX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q, + chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1), 0, 0, + chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1), + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &X, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, 1, A->nb, A->nb, - A->p, A->n, 0, 0, A->p, A->n, A->p, A->q, + chameleon_desc_datadist_get_iparam(A, 0), A->n, 0, 0, + chameleon_desc_datadist_get_iparam(A, 0), A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &SX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, 1, A->mb, - A->m, A->q, 0, 0, A->m, A->q, A->p, A->q, + A->m, chameleon_desc_datadist_get_iparam(A, 1), 0, 0, + A->m, chameleon_desc_datadist_get_iparam(A, 1), + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); cnt = 0; @@ -192,7 +206,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, /* Initialization of X in the first loop */ if ( cnt == 0 ) { - for (n = myq; n < A->nt; n += A->q) { + for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) { tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; if ( myp == 0 ) { @@ -212,7 +226,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Broadcast X */ - for (m = 1; m < A->p; m++) { + for (m = 1; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { INSERT_TASK_zlacpy( &options, ChamUpperLower, 1, tempnn, @@ -230,7 +244,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, * copy of the scaled X. */ scl = 1. / e0; - for (n = myq; n < A->nt; n += A->q) { + for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) { tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; INSERT_TASK_zlascal( @@ -242,10 +256,10 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, /** * Compute Sx = S * x */ - for(m = myp; m < A->mt; m+=A->p) { + for(m = myp; m < A->mt; m+=chameleon_desc_datadist_get_iparam(A, 0)) { tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; - for (n = myq; n < A->nt; n += A->q ) { + for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; beta = n == myq ? 0. : 1.; @@ -258,14 +272,14 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Reduce columns */ - for (k = 1; k < chameleon_min( A->q, A->nt ); k++) { + for (k = 1; k < chameleon_min( chameleon_desc_datadist_get_iparam(A, 1), A->nt ); k++) { INSERT_TASK_zaxpy( &options, tempmm, 1., SX( m, k ), 1, SX( m, 0 ), 1 ); } /* Broadcast SX to ease the following gemv */ - for (k = 1; k < A->q; k++) { + for (k = 1; k < chameleon_desc_datadist_get_iparam(A, 1); k++) { INSERT_TASK_zlacpy( &options, ChamUpperLower, tempmm, 1, @@ -277,10 +291,10 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, /** * Compute x = S' * S * x = S' * Sx */ - for ( n = myq; n < A->nt; n += A->q ) { + for ( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; - for( m = myp; m < A->mt; m += A->p ) { + for( m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0) ) { tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; beta = m == myp ? 0. : 1.; @@ -293,14 +307,14 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Reduce rows */ - for (k = 1; k < chameleon_min( A->p, A->mt ); k++) { + for (k = 1; k < chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt ); k++) { INSERT_TASK_zaxpy( &options, tempnn, 1., X( k, n ), 1, X( 0, n ), 1 ); } /* Broadcast */ - for (k = 1; k < A->p; k++) { + for (k = 1; k < chameleon_desc_datadist_get_iparam(A, 0); k++) { INSERT_TASK_zlacpy( &options, ChamUpperLower, 1, tempnn, @@ -321,7 +335,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, 1., 0., NRMX( myp, myq ) ); - for( n = myq; n < A->nt; n += A->q ) { + for( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; INSERT_TASK_zgessq( @@ -331,7 +345,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Reduce columns */ - for(n = 1; n < chameleon_min( A->q, A->nt ); n++) { + for(n = 1; n < chameleon_min( chameleon_desc_datadist_get_iparam(A, 1), A->nt ); n++) { INSERT_TASK_dplssq( &options, ChamEltwise, 1, 1, NRMX( myp, n ), @@ -342,7 +356,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, &options, 1, NRMX( myp, 0 ) ); /* Broadcast the results to processes in the same row */ - for(n = 1; n < A->q; n++) { + for(n = 1; n < chameleon_desc_datadist_get_iparam(A, 1); n++) { INSERT_TASK_dlacpy( &options, ChamUpperLower, 1, 1, @@ -363,7 +377,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, 1., 0., NRMSX( myp, myq ) ); - for( m = myp; m < A->mt; m += A->p ) { + for( m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0) ) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; INSERT_TASK_zgessq( &options, ChamEltwise, tempmm, 1, @@ -372,7 +386,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, } /* Reduce rows */ - for( m = 1; m < chameleon_min( A->p, A->mt ); m++ ) { + for( m = 1; m < chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt ); m++ ) { INSERT_TASK_dplssq( &options, ChamEltwise, 1, 1, NRMSX( m, myq ), @@ -383,7 +397,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, &options, 1, NRMSX( 0, myq ) ); /* Broadcast the results to processes in the same column */ - for(m = 1; m < A->p; m++) { + for(m = 1; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { INSERT_TASK_dlacpy( &options, ChamUpperLower, 1, 1, diff --git a/compute/pzgepdf_qdwh.c b/compute/pzgepdf_qdwh.c index 0d1b66cb267f374b5ce493c01a2ecb619603d645..a96afd5b56ff21bd8c04a200bca9d353aafd697a 100644 --- a/compute/pzgepdf_qdwh.c +++ b/compute/pzgepdf_qdwh.c @@ -109,23 +109,31 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H, chameleon_desc_init( TS1, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, nb, ib * nb, ib * U->mt, nb * U->nt, 0, 0, - ib * U->mt, nb * U->nt, U->p, U->q, + ib * U->mt, nb * U->nt, + chameleon_desc_datadist_get_iparam(U, 0), + chameleon_desc_datadist_get_iparam(U, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( TT1, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, nb, ib * nb, ib * U->mt, nb * U->nt, 0, 0, - ib * U->mt, nb * U->nt, H->p, H->q, + ib * U->mt, nb * U->nt, + chameleon_desc_datadist_get_iparam(H, 0), + chameleon_desc_datadist_get_iparam(H, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( TS2, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, nb, ib * nb, ib * H->mt, nb * H->nt, 0, 0, - ib * H->mt, nb * H->nt, U->p, U->q, + ib * H->mt, nb * H->nt, + chameleon_desc_datadist_get_iparam(U, 0), + chameleon_desc_datadist_get_iparam(U, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( TT2, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, nb, ib * nb, ib * H->mt, nb * H->nt, 0, 0, - ib * H->mt, nb * H->nt, H->p, H->q, + ib * H->mt, nb * H->nt, + chameleon_desc_datadist_get_iparam(H, 0), + chameleon_desc_datadist_get_iparam(H, 1), NULL, NULL, NULL, NULL ); /* @@ -135,8 +143,8 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H, libhqr_matrix_t mat = { .mt = B1->mt, .nt = B1->nt, - .nodes = B1->p * B1-> q, - .p = B1->p, + .nodes = chameleon_desc_datadist_get_iparam(B1, 0) * chameleon_desc_datadist_get_iparam(B1,1), + .p = chameleon_desc_datadist_get_iparam(B1, 0), }; /* Tree for the top matrix */ @@ -144,7 +152,7 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H, -1, /*low level tree */ -1, /* high level tree */ -1, /* TS tree size */ - B1->p, /* High level size */ + chameleon_desc_datadist_get_iparam(B1, 0), /* High level size */ -1, /* Domino */ 0 /* TSRR (unstable) */ ); @@ -156,7 +164,7 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H, /* high level tree (Could be greedy, but flat should reduce the volume of comm) */ LIBHQR_FLAT_TREE, -1, /* TS tree size */ - B2->p /* High level size */ ); + chameleon_desc_datadist_get_iparam(B2, 0) /* High level size */ ); } /* @@ -170,7 +178,9 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H, chameleon_desc_init( Ut, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, U->mb, U->nb, U->mb * U->nb, U->n, U->m, 0, 0, - U->n, U->m, U->p, U->q, + U->n, U->m, + chameleon_desc_datadist_get_iparam(U, 0), + chameleon_desc_datadist_get_iparam(U, 1), NULL, NULL, NULL, NULL ); /* diff --git a/compute/pzgered.c b/compute/pzgered.c index e7feeed9d7ba8f0a3e433295e1d5af449035d531..dd2c4a597c063ae33f468c529a6302808e3f7356 100644 --- a/compute/pzgered.c +++ b/compute/pzgered.c @@ -44,8 +44,8 @@ chameleon_pzgered_frb( cham_uplo_t uplo, int NT = (uplo == ChamLower) ? minMNT : A->nt; int M = (uplo == ChamUpper) ? minMN : A->m; int N = (uplo == ChamLower) ? minMN : A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /* Initialize workspaces for tile norms */ for(m = 0; m < Wnorm->mt; m++) { @@ -142,8 +142,8 @@ chameleon_pzgered_frb( cham_uplo_t uplo, /** * Broadcast the result */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { + for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { + for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) { if ( (m != 0) || (n != 0) ) { INSERT_TASK_dlacpy( options, @@ -178,19 +178,23 @@ void chameleon_pzgered( cham_uplo_t uplo, } RUNTIME_options_init(&options, chamctxt, sequence, request); - workmt = chameleon_max( A->mt, A->p ); - worknt = chameleon_max( A->nt, A->q ); + workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) ); + worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) ); RUNTIME_options_ws_alloc( &options, 1, 0 ); /* Matrix to store the norm of each element */ chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, + A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); /* Matrix to compute the global frobenius norm */ chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - workmt*2, worknt, 0, 0, workmt*2, worknt, A->p, A->q, + workmt*2, worknt, 0, 0, workmt*2, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_pzgered_frb( uplo, A, &Wcol, &Welt, &options ); @@ -201,7 +205,7 @@ void chameleon_pzgered( cham_uplo_t uplo, RUNTIME_sequence_wait( chamctxt, sequence ); - gnorm = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q )); + gnorm = *((double *)Welt.get_blkaddr( &Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1) )); chameleon_desc_destroy( &Welt ); /** diff --git a/compute/pzgetrf_nopiv.c b/compute/pzgetrf_nopiv.c index 18fb33ed81656b462cdcff0f52fe82e6080b90de..424842bc06c9639442124d13509e1f2308b11820 100644 --- a/compute/pzgetrf_nopiv.c +++ b/compute/pzgetrf_nopiv.c @@ -57,7 +57,7 @@ void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t *A, if ( chamctxt->autominmax_enabled && (chamctxt->scheduler == RUNTIME_SCHED_STARPU) ) { int lookahead = chamctxt->lookahead; - int nbtasks_per_step = (A->mt * A->nt) / (A->p * A->q); + int nbtasks_per_step = (A->mt * A->nt) / (chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1)); int mintasks = nbtasks_per_step * lookahead; int maxtasks = nbtasks_per_step * (lookahead+1); @@ -149,13 +149,13 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, ib = CHAMELEON_IB; lookahead = chamctxt->lookahead; - myp = A->myrank / A->q; - myq = A->myrank % A->q; + myp = A->myrank / chameleon_desc_datadist_get_iparam(A, 1); + myq = A->myrank % chameleon_desc_datadist_get_iparam(A, 1); for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { RUNTIME_iteration_push(chamctxt, k); - lp = (k % lookahead) * A->p; - lq = (k % lookahead) * A->q; + lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 0); + lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 1); tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; @@ -174,27 +174,27 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, &options, ChamUpperLower, tempkm, tempkn, A( k, k ), - WL( k, (k % A->q) + lq ) ); + WL( k, (k % chameleon_desc_datadist_get_iparam(A, 1)) + lq ) ); INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempkn, A( k, k ), - WU( (k % A->p) + lp, k ) ); + WU( (k % chameleon_desc_datadist_get_iparam(A, 0)) + lp, k ) ); - for ( q=1; q < A->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(A, 1); q++ ) { INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempkn, - WL( k, ((k+q-1) % A->q) + lq ), - WL( k, ((k+q) % A->q) + lq ) ); + WL( k, ((k+q-1) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ), + WL( k, ((k+q) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ) ); } - for ( p=1; p < A->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(A, 0); p++ ) { INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempkn, - WU( ((k+p-1) % A->p) + lp, k ), - WU( ((k+p) % A->p) + lp, k ) ); + WU( ((k+p-1) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, k ), + WU( ((k+p) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, k ) ); } } RUNTIME_data_flush( sequence, A( k, k ) ); @@ -202,7 +202,7 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, for (m = k+1; m < A->mt; m++) { /* Skip the row if you are not involved with */ - if ( m%A->p != myp ) { + if ( m%chameleon_desc_datadist_get_iparam(A, 0) != myp ) { continue; } @@ -219,19 +219,19 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, /* Broadcast A(m,k) into temp buffers through a ring */ { - assert( A->get_rankof( A, m, k ) == WL->get_rankof( WL, m, (k % A->q) + lq) ); + assert( A->get_rankof( A, m, k ) == WL->get_rankof( WL, m, (k % chameleon_desc_datadist_get_iparam(A, 1)) + lq) ); INSERT_TASK_zlacpy( &options, ChamUpperLower, tempmm, tempkn, A( m, k ), - WL( m, (k % A->q) + lq) ); + WL( m, (k % chameleon_desc_datadist_get_iparam(A, 1)) + lq) ); - for ( q=1; q < A->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(A, 1); q++ ) { INSERT_TASK_zlacpy( &options, ChamUpperLower, tempmm, tempkn, - WL( m, ((k+q-1) % A->q) + lq ), - WL( m, ((k+q) % A->q) + lq ) ); + WL( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ), + WL( m, ((k+q) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ) ); } } RUNTIME_data_flush( sequence, A( m, k ) ); @@ -240,7 +240,7 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, for (n = k+1; n < A->nt; n++) { /* Skip the column if you are not involved with */ - if ( n%A->q != myq ) { + if ( n%chameleon_desc_datadist_get_iparam(A, 1) != myq ) { continue; } @@ -257,19 +257,19 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, /* Broadcast A(k,n) into temp buffers through a ring */ { - assert( A->get_rankof( A, k, n ) == WU->get_rankof( WU, (k%A->p) + lp, n) ); + assert( A->get_rankof( A, k, n ) == WU->get_rankof( WU, (k%chameleon_desc_datadist_get_iparam(A, 0)) + lp, n) ); INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempnn, A( k, n ), - WU( (k % A->p) + lp, n ) ); + WU( (k % chameleon_desc_datadist_get_iparam(A, 0)) + lp, n ) ); - for ( p=1; p < A->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(A, 0); p++ ) { INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempnn, - WU( ((k+p-1) % A->p) + lp, n ), - WU( ((k+p) % A->p) + lp, n ) ); + WU( ((k+p-1) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, n ), + WU( ((k+p) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, n ) ); } } RUNTIME_data_flush( sequence, A( k, n ) ); @@ -277,7 +277,7 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, for (m = k+1; m < A->mt; m++) { /* Skip the row if you are not involved with */ - if ( m%A->p != myp ) { + if ( m%chameleon_desc_datadist_get_iparam(A, 0) != myp ) { continue; } diff --git a/compute/pzgram.c b/compute/pzgram.c index 0ceda1b41e389fa4063f10b01357b909ded9edcf..04f849c6ba3a0092ec846fd31a56fdc259e7bc59 100644 --- a/compute/pzgram.c +++ b/compute/pzgram.c @@ -33,8 +33,8 @@ chameleon_pzgram_internal( cham_uplo_t uplo, int NT = A->nt; int M = A->m; int N = A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * 1) compute (scl,ssq) over columns in each tile diff --git a/compute/pzhemm.c b/compute/pzhemm.c index 12269d34a3abf4de45163eb75657441cceac5e74..1d9205311c3e108a0fe4162a5d2f32ef9ed528e0 100644 --- a/compute/pzhemm.c +++ b/compute/pzhemm.c @@ -302,12 +302,12 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->nt; K = A->n; - myp = C->myrank / C->q; - myq = C->myrank % C->q; + myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); + myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { - lp = (k % lookahead) * C->p; - lq = (k % lookahead) * C->q; + lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); + lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); tempkk = k == KT - 1 ? K - k * A->nb : A->nb; zbeta = k == 0 ? beta : zone; @@ -339,16 +339,16 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempam, tempak, A( Am, Ak ), - WA( m, (Ak % C->q) + lq ) ); + WA( m, (Ak % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); RUNTIME_data_flush( sequence, A( Am, Ak ) ); - for ( q=1; q < C->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempam, tempak, - WA( m, ((Ak+q-1) % C->q) + lq ), - WA( m, ((Ak+q) % C->q) + lq ) ); + WA( m, ((Ak+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ), + WA( m, ((Ak+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); } } @@ -361,25 +361,25 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempkk, tempnn, B( k, n ), - WB( (k % C->p) + lp, n ) ); + WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); RUNTIME_data_flush( sequence, B( k, n ) ); - for ( p=1; p < C->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempkk, tempnn, - WB( ((k+p-1) % C->p) + lp, n ), - WB( ((k+p) % C->p) + lp, n ) ); + WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ), + WB( ((k+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); } } /* Perform the update of this iteration */ - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; if ( k == m ) { - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; INSERT_TASK_zhemm( @@ -400,7 +400,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, transA = ChamNoTrans; } - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; INSERT_TASK_zgemm( @@ -438,12 +438,12 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->mt; K = A->m; - myp = C->myrank / C->q; - myq = C->myrank % C->q; + myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); + myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { - lp = (k % lookahead) * C->p; - lq = (k % lookahead) * C->q; + lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); + lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); tempkk = k == KT - 1 ? K - k * A->nb : A->nb; zbeta = k == 0 ? beta : zone; @@ -456,16 +456,16 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempmm, tempkk, B( m, k ), - WA( m, (k % C->q) + lq ) ); + WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); RUNTIME_data_flush( sequence, B( m, k ) ); - for ( q=1; q < C->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkk, - WA( m, ((k+q-1) % C->q) + lq ), - WA( m, ((k+q) % C->q) + lq ) ); + WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ), + WA( m, ((k+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); } } @@ -496,25 +496,25 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempak, tempan, A( Ak, An ), - WB( (Ak % C->p) + lp, n ) ); + WB( (Ak % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); RUNTIME_data_flush( sequence, A( Ak, An ) ); - for ( p=1; p < C->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempak, tempan, - WB( ((Ak+p-1) % C->p) + lp, n ), - WB( ((Ak+p) % C->p) + lp, n ) ); + WB( ((Ak+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ), + WB( ((Ak+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); } } /* Perform the update of this iteration */ - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; if ( k == n ) { - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; /* A has been stored in WA or WB for the summa ring */ @@ -536,7 +536,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, transA = ChamNoTrans; } - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; INSERT_TASK_zgemm( diff --git a/compute/pzhered.c b/compute/pzhered.c index 869d748cf49c4d131ccb90292b492bb3658167da..621cb87ae8a3e3136cf95b9acc12502ee6b7543f 100644 --- a/compute/pzhered.c +++ b/compute/pzhered.c @@ -43,8 +43,8 @@ chameleon_pzhered_frb( cham_trans_t trans, int NT = A->nt; int M = A->m; int N = A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /* Initialize workspaces for tile norms */ for (m = 0; m < Wnorm->mt; m++) @@ -163,9 +163,9 @@ chameleon_pzhered_frb( cham_trans_t trans, /** * Broadcast the result */ - for (m = 0; m < A->p; m++) + for (m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { - for (n = 0; n < A->q; n++) + for (n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) { if ( ( m != 0 ) || ( n != 0 ) ) { @@ -204,20 +204,20 @@ void chameleon_pzhered( cham_trans_t trans, } RUNTIME_options_init(&options, chamctxt, sequence, request); - workmt = chameleon_max(A->mt, A->p); - worknt = chameleon_max(A->nt, A->q); + workmt = chameleon_max(A->mt, chameleon_desc_datadist_get_iparam(A, 0)); + worknt = chameleon_max(A->nt, chameleon_desc_datadist_get_iparam(A, 1)); RUNTIME_options_ws_alloc(&options, 1, 0); /* Matrix to store the norm of each element */ - chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, - NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); + chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1), + NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg); /* Matrix to compute the global frobenius norm */ - chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q, - NULL, NULL, NULL, NULL ); + chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + workmt * 2, worknt, 0, 0, workmt * 2, worknt, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1), + NULL, NULL, NULL, NULL); chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options ); @@ -227,7 +227,7 @@ void chameleon_pzhered( cham_trans_t trans, RUNTIME_sequence_wait( chamctxt, sequence ); - gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / A->q, A->myrank % A->q)); + gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1))); chameleon_desc_destroy(&Welt); /** diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c index ca26a7f0af1a87ff0c5b43af8529ff02d13d58dc..706740891ade6bc8d3cd10ac1ff1be2338ce0c55 100644 --- a/compute/pzhetrd_he2hb.c +++ b/compute/pzhetrd_he2hb.c @@ -90,7 +90,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ - chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, A->p, A->q ); + chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1) ); chameleon_desc_init( &AT, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, A->nb, (A->mb*A->nb), chameleon_min(A->mt, A->nt) * A->mb, A->nb, 0, 0, diff --git a/compute/pzlange.c b/compute/pzlange.c index 0917c5f40e017a146bbe8aa901586512033dd7c0..abfb27a366655537e0fa0e6e4a48d7e3626aa2bf 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -43,8 +43,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, int NT = (uplo == ChamLower) ? minMNT : A->nt; int M = (uplo == ChamUpper) ? minMN : A->m; int N = (uplo == ChamLower) ? minMN : A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * Step 1: @@ -133,8 +133,8 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, int NT = (uplo == ChamLower) ? minMNT : A->nt; int M = (uplo == ChamUpper) ? minMN : A->m; int N = (uplo == ChamLower) ? minMN : A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * Step 1: @@ -219,8 +219,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ int NT = (uplo == ChamLower) ? minMNT : A->nt; int M = (uplo == ChamUpper) ? minMN : A->m; int N = (uplo == ChamLower) ? minMN : A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * Step 1: @@ -302,8 +302,8 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ int NT = (uplo == ChamLower) ? minMNT : A->nt; int M = (uplo == ChamUpper) ? minMN : A->m; int N = (uplo == ChamLower) ? minMN : A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * Step 1: @@ -395,15 +395,17 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia *result = 0.0; - workmt = chameleon_max( A->mt, A->p ); - worknt = chameleon_max( A->nt, A->q ); + workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) ); + worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) ); switch ( norm ) { case ChamOneNorm: RUNTIME_options_ws_alloc( &options, 1, 0 ); chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 1, A->nb, A->nb, - workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q, + workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); wcol_init = 1; @@ -411,7 +413,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia * Use the global allocator for Welt, otherwise flush may free the data before the result is read. */ chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, - A->p, worknt, 0, 0, A->p, worknt, A->p, A->q, + chameleon_desc_datadist_get_iparam(A, 0), worknt, 0, 0, chameleon_desc_datadist_get_iparam(A, 0), worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); break; @@ -423,12 +427,16 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia RUNTIME_options_ws_alloc( &options, A->mb, 0 ); chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb, - workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, + workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); wcol_init = 1; chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, - workmt, A->q, 0, 0, workmt, A->q, A->p, A->q, + workmt, chameleon_desc_datadist_get_iparam(A, 1), 0, 0, workmt, chameleon_desc_datadist_get_iparam(A, 1), + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); break; @@ -440,7 +448,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia alpha = 1.; chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - workmt*2, worknt, 0, 0, workmt*2, worknt, A->p, A->q, + workmt*2, worknt, 0, 0, workmt*2, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); break; @@ -452,7 +462,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia RUNTIME_options_ws_alloc( &options, 1, 0 ); chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, - workmt, worknt, 0, 0, workmt, worknt, A->p, A->q, + workmt, worknt, 0, 0, workmt, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); } @@ -504,8 +516,8 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia /** * Broadcast the result */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { + for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { + for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) { if ( (m != 0) || (n != 0) ) { INSERT_TASK_dlacpy( &options, @@ -522,7 +534,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia CHAMELEON_Desc_Flush( A, sequence ); RUNTIME_sequence_wait( chamctxt, sequence ); - *result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q )); + *result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1) )); if ( wcol_init ) { chameleon_desc_destroy( &Wcol ); diff --git a/compute/pzlansy.c b/compute/pzlansy.c index fb4b0f4e97a8db5d73a35ef4716a2470eb3d1f54..ab709ac9e396a578ad3e5db31c7577661505848b 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -40,8 +40,8 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, int NT = A->nt; int M = A->m; int N = A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * Step 1: @@ -133,8 +133,8 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, int NT = A->nt; int M = A->m; int N = A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * Step 1: @@ -219,8 +219,8 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, int NT = A->nt; int M = A->m; int N = A->n; - int P = Welt->p; - int Q = Welt->q; + int P = chameleon_desc_datadist_get_iparam(Welt, 0); + int Q = chameleon_desc_datadist_get_iparam(Welt, 1); /** * Step 1: @@ -321,8 +321,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra *result = 0.0; - workmt = chameleon_max( A->mt, A->p ); - worknt = chameleon_max( A->nt, A->q ); + workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) ); + worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) ); switch ( norm ) { case ChamOneNorm: @@ -330,7 +330,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra RUNTIME_options_ws_alloc( &options, 1, 0 ); chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb, - workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, + workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); wcol_init = 1; @@ -338,7 +340,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra * Use the global allocator for Welt, otherwise flush may free the data before the result is read. */ chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, - workmt, A->q, 0, 0, workmt, A->q, A->p, A->q, + workmt, chameleon_desc_datadist_get_iparam(A, 1), 0, 0, workmt, chameleon_desc_datadist_get_iparam(A, 1), + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); break; @@ -350,7 +354,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra alpha = 1.; chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - workmt*2, worknt, 0, 0, workmt*2, worknt, A->p, A->q, + workmt*2, worknt, 0, 0, workmt*2, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); break; @@ -362,7 +368,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra RUNTIME_options_ws_alloc( &options, 1, 0 ); chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, - workmt, worknt, 0, 0, workmt, worknt, A->p, A->q, + workmt, worknt, 0, 0, workmt, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); } @@ -410,8 +418,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra /** * Broadcast the result */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { + for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) { + for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) { if ( (m != 0) || (n != 0) ) { INSERT_TASK_dlacpy( &options, @@ -428,7 +436,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra CHAMELEON_Desc_Flush( A, sequence ); RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ); + *result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1) ); if ( wcol_init ) { chameleon_desc_destroy( &Wcol ); diff --git a/compute/pzlatms.c b/compute/pzlatms.c index f974d04175a619bf27cb89898643a302a4817be7..483ce2a1f7afa2b79623bba80bb22b86afc51fec 100644 --- a/compute/pzlatms.c +++ b/compute/pzlatms.c @@ -198,8 +198,8 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym /* Apply a QR factorization */ mat.mt = descU.mt; mat.nt = descU.nt; - mat.nodes = descU.p * descU.q; - mat.p = descU.p; + mat.nodes = chameleon_desc_datadist_get_iparam(&descU, 0) * chameleon_desc_datadist_get_iparam(&descU, 1); + mat.p = chameleon_desc_datadist_get_iparam(&descU, 0); libhqr_init_hqr( &qrtree, LIBHQR_QR, &mat, -1, /*low level tree */ @@ -217,12 +217,16 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, descU.nb, ib * descU.nb, ib * descU.mt, descU.nb * descU.nt, 0, 0, - ib * descU.mt, descU.nb * descU.nt, descU.p, descU.q, + ib * descU.mt, descU.nb * descU.nt, + chameleon_desc_datadist_get_iparam(&descU, 0), + chameleon_desc_datadist_get_iparam(&descU, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, descU.nb, ib * descU.nb, ib * descU.mt, descU.nb * descU.nt, 0, 0, - ib * descU.mt, descU.nb * descU.nt, descU.p, descU.q, + ib * descU.mt, descU.nb * descU.nt, + chameleon_desc_datadist_get_iparam(&descU, 0), + chameleon_desc_datadist_get_iparam(&descU, 1), NULL, NULL, NULL, NULL ); /* U <= qr(U) */ @@ -269,8 +273,8 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym /* Apply a QR factorization */ mat.mt = descV.mt; mat.nt = descV.nt; - mat.nodes = descV.p * descV.q; - mat.p = descV.q; + mat.nodes = chameleon_desc_datadist_get_iparam(&descV, 0) * chameleon_desc_datadist_get_iparam(&descV, 1); + mat.p = chameleon_desc_datadist_get_iparam(&descV, 1); libhqr_init_hqr( &qrtree, LIBHQR_LQ, &mat, -1, /*low level tree */ @@ -288,12 +292,16 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, descV.nb, ib * descV.nb, ib * descV.mt, descV.nb * descV.nt, 0, 0, - ib * descV.mt, descV.nb * descV.nt, descV.p, descV.q, + ib * descV.mt, descV.nb * descV.nt, + chameleon_desc_datadist_get_iparam(&descV, 0), + chameleon_desc_datadist_get_iparam(&descV, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ib, descV.nb, ib * descV.nb, ib * descV.mt, descV.nb * descV.nt, 0, 0, - ib * descV.mt, descV.nb * descV.nt, descV.p, descV.q, + ib * descV.mt, descV.nb * descV.nt, + chameleon_desc_datadist_get_iparam(&descV, 0), + chameleon_desc_datadist_get_iparam(&descV, 1), NULL, NULL, NULL, NULL ); /* V <= qr(V) */ diff --git a/compute/pzplrnk.c b/compute/pzplrnk.c index 2705dd029afcc05805ecda7ddbfd2f548059a69d..86f6c3364e07e81d2a12d2acd741cbd68d1be5c0 100644 --- a/compute/pzplrnk.c +++ b/compute/pzplrnk.c @@ -47,24 +47,28 @@ void chameleon_pzplrnk( int K, CHAM_desc_t *C, chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mt * C->mb, C->nb * C->q, 0, 0, - C->mt * C->mb, C->nb * C->q, C->p, C->q, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1), 0, 0, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1), + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mb * C->p, C->nt * C->nb, 0, 0, - C->mb * C->p, C->nt * C->nb, C->p, C->q, + C->mb * chameleon_desc_datadist_get_iparam(C, 0), C->nt * C->nb, 0, 0, + C->mb * chameleon_desc_datadist_get_iparam(C, 0), C->nt * C->nb, + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); KT = (K + C->mb - 1) / C->mb; - myp = C->myrank / C->q; - myq = C->myrank % C->q; + myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); + myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++) { tempkk = k == KT-1 ? K - k * WA.nb : WA.nb; zbeta = k == 0 ? 0. : 1.; - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; INSERT_TASK_zplrnt( @@ -73,7 +77,7 @@ void chameleon_pzplrnk( int K, CHAM_desc_t *C, WB.m, k * WB.mb, n * WB.nb, seedB ); } - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; INSERT_TASK_zplrnt( @@ -81,7 +85,7 @@ void chameleon_pzplrnk( int K, CHAM_desc_t *C, tempmm, tempkk, WA(m, myq), WA.m, m * WA.mb, k * WA.nb, seedA ); - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; INSERT_TASK_zgemm( @@ -94,7 +98,7 @@ void chameleon_pzplrnk( int K, CHAM_desc_t *C, } RUNTIME_data_flush( sequence, WA(m, 0) ); } - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { RUNTIME_data_flush( sequence, WB(0, n) ); } } diff --git a/compute/pzsymm.c b/compute/pzsymm.c index 944ac75ea2daef41f206207d420e9d0543fd0ae1..8f16daf6fa1a1ed601ea31e6425f14c949891e3c 100644 --- a/compute/pzsymm.c +++ b/compute/pzsymm.c @@ -303,12 +303,12 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->nt; K = A->n; - myp = C->myrank / C->q; - myq = C->myrank % C->q; + myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); + myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { - lp = (k % lookahead) * C->p; - lq = (k % lookahead) * C->q; + lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); + lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); tempkk = k == KT - 1 ? K - k * A->nb : A->nb; zbeta = k == 0 ? beta : zone; @@ -340,16 +340,16 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempam, tempak, A( Am, Ak ), - WA( m, (Ak % C->q) + lq ) ); + WA( m, (Ak % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); RUNTIME_data_flush( sequence, A( Am, Ak ) ); - for ( q=1; q < C->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempam, tempak, - WA( m, ((Ak+q-1) % C->q) + lq ), - WA( m, ((Ak+q) % C->q) + lq ) ); + WA( m, ((Ak+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ), + WA( m, ((Ak+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); } } @@ -362,25 +362,25 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempkk, tempnn, B( k, n ), - WB( (k % C->p) + lp, n ) ); + WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); RUNTIME_data_flush( sequence, B( k, n ) ); - for ( p=1; p < C->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempkk, tempnn, - WB( ((k+p-1) % C->p) + lp, n ), - WB( ((k+p) % C->p) + lp, n ) ); + WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ), + WB( ((k+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); } } /* Perform the update of this iteration */ - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; if ( k == m ) { - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; INSERT_TASK_zsymm( @@ -401,7 +401,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, transA = ChamNoTrans; } - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; INSERT_TASK_zgemm( @@ -439,12 +439,12 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->mt; K = A->m; - myp = C->myrank / C->q; - myq = C->myrank % C->q; + myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); + myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { - lp = (k % lookahead) * C->p; - lq = (k % lookahead) * C->q; + lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); + lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); tempkk = k == KT - 1 ? K - k * A->nb : A->nb; zbeta = k == 0 ? beta : zone; @@ -457,16 +457,16 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempmm, tempkk, B( m, k ), - WA( m, (k % C->q) + lq ) ); + WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); RUNTIME_data_flush( sequence, B( m, k ) ); - for ( q=1; q < C->q; q++ ) { + for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkk, - WA( m, ((k+q-1) % C->q) + lq ), - WA( m, ((k+q) % C->q) + lq ) ); + WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ), + WA( m, ((k+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) ); } } @@ -497,25 +497,25 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, options, ChamUpperLower, tempak, tempan, A( Ak, An ), - WB( (Ak % C->p) + lp, n ) ); + WB( (Ak % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); RUNTIME_data_flush( sequence, A( Ak, An ) ); - for ( p=1; p < C->p; p++ ) { + for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) { INSERT_TASK_zlacpy( options, ChamUpperLower, tempak, tempan, - WB( ((Ak+p-1) % C->p) + lp, n ), - WB( ((Ak+p) % C->p) + lp, n ) ); + WB( ((Ak+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ), + WB( ((Ak+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) ); } } /* Perform the update of this iteration */ - for (n = myq; n < C->nt; n+=C->q) { + for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; if ( k == n ) { - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; /* A has been stored in WA or WB for the summa ring */ @@ -537,7 +537,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, transA = ChamNoTrans; } - for (m = myp; m < C->mt; m+=C->p) { + for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; INSERT_TASK_zgemm( diff --git a/compute/zcesca.c b/compute/zcesca.c index 92de039f37269152768055de00a5a123e94bf0fe..a0c8694dabdca07c8e8b9d21b516d75c5afbdd48 100644 --- a/compute/zcesca.c +++ b/compute/zcesca.c @@ -55,37 +55,47 @@ void *CHAMELEON_zcesca_WS_Alloc( const CHAM_desc_t *A ) options = calloc( 1, sizeof(struct chameleon_pzcesca_s) ); - workmt = chameleon_max( A->mt, A->p ); - worknt = chameleon_max( A->nt, A->q ); + workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) ); + worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) ); chameleon_desc_init( &(options->Wgcol), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, 1, A->nb, A->nb, workmt, A->n, 0, 0, - workmt, A->n, A->p, A->q, + workmt, A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->Wgrow), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, A->mb, 1, A->mb, A->m, worknt, 0, 0, - A->m, worknt, A->p, A->q, + A->m, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->Wgelt), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, 1, 1, 1, 1, worknt, 0, 0, - 1, worknt, A->p, A->q, + 1, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->Wdcol), CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 2, A->nb, 2*A->nb, 2*workmt, A->n, 0, 0, - 2*workmt, A->n, A->p, A->q, + 2*workmt, A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->Wdrow), CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 2, 2*A->mb, A->m, 2*worknt, 0, 0, - A->m, 2*worknt, A->p, A->q, + A->m, 2*worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); return (void*)options; diff --git a/compute/zgemm.c b/compute/zgemm.c index fb5ebff37b416841e891f59bb3c191b537a36bd4..98519384e2ced2c0efc92d965b23247f3bee6f9c 100644 --- a/compute/zgemm.c +++ b/compute/zgemm.c @@ -110,7 +110,7 @@ void *CHAMELEON_zgemm_WS_Alloc( cham_trans_t transA __attribute__((unused) * If only one process, or if generic has been globally enforced, we switch * to generic immediately. */ - if ( ((C->p == 1) && (C->q == 1)) || + if ( ((chameleon_desc_datadist_get_iparam(C, 0) == 1) && (chameleon_desc_datadist_get_iparam(C, 1) == 1)) || (chamctxt->generic_enabled == CHAMELEON_TRUE) ) { options->alg = ChamGemmAlgGeneric; @@ -151,9 +151,9 @@ void *CHAMELEON_zgemm_WS_Alloc( cham_trans_t transA __attribute__((unused) double ratio = 1.5; /* Arbitrary ratio to give more weight to writes wrt reads. */ /* Compute the average array per node for each matrix */ - sizeA = ((double)A->m * (double)A->n) / (double)(A->p * A->q); - sizeB = ((double)B->m * (double)B->n) / (double)(B->p * B->q); - sizeC = ((double)C->m * (double)C->n) / (double)(C->p * C->q) * ratio; + sizeA = ((double)A->m * (double)A->n) / (double)(chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1)); + sizeB = ((double)B->m * (double)B->n) / (double)(chameleon_desc_datadist_get_iparam(B, 0) * chameleon_desc_datadist_get_iparam(B, 1)); + sizeC = ((double)C->m * (double)C->n) / (double)(chameleon_desc_datadist_get_iparam(C, 0) * chameleon_desc_datadist_get_iparam(C, 1)) * ratio; options->alg = ChamGemmAlgGeneric; if ( (sizeC > sizeA) && (sizeC > sizeB) ) @@ -192,13 +192,17 @@ void *CHAMELEON_zgemm_WS_Alloc( cham_trans_t transA __attribute__((unused) chameleon_desc_init( &(options->WA), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mt * C->mb, C->nb * C->q * lookahead, 0, 0, - C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead, 0, 0, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead, + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->WB), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mb * C->p * lookahead, C->nt * C->nb, 0, 0, - C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q, + C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb, 0, 0, + C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb, + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); } diff --git a/compute/zgetrf.c b/compute/zgetrf.c index b7e8f87b622c35d68f557f9c59393eabc017c679..9dabf735d3280d77f92ea30388a291a2db8cb861 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -68,7 +68,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) ws->ib = CHAMELEON_IB; #if defined (CHAMELEON_USE_MPI) - ws->proc_involved = malloc( sizeof( int ) * A->p ); + ws->proc_involved = malloc( sizeof( int ) * chameleon_desc_datadist_get_iparam(A, 0) ); ws->involved = 0; ws->np_involved = 0; #endif @@ -107,7 +107,9 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, 1, A->nb, A->nb, A->mt, A->nt * A->nb, 0, 0, - A->mt, A->nt * A->nb, A->p, A->q, + A->mt, A->nt * A->nb, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); } else if ( ( ws->alg == ChamGetrfPPiv ) || @@ -116,12 +118,14 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, A->mb, A->nb, A->mb*A->nb, A->m, A->n, 0, 0, - A->m, A->n, A->p, A->q, + A->m, A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); chameleon_desc_init( &(ws->Wu), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, A->mb, A->nb, A->mb*A->nb, - A->mb * A->p * A->q, A->n, 0, 0, - A->mb * A->p * A->q, A->n, A->p * A->q, 1, + A->mb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), A->n, 0, 0, + A->mb * chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), A->n, chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1), 1, NULL, NULL, NULL, A->get_rankof_init_arg ); } @@ -140,7 +144,9 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) chameleon_desc_init( &(ws->Up), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, ws->ib, A->nb, ws->ib * A->nb, A->mt * ws->ib, A->nt * A->nb, 0, 0, - A->mt * ws->ib, A->nt * A->nb, A->p, A->q, + A->mt * ws->ib, A->nt * A->nb, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); } diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c index 3409e97f7a80cfb0844b4eefa66d6646be6239f1..63e40d5367bb630095c52836dc0b4bc685eaae13 100644 --- a/compute/zgetrf_nopiv.c +++ b/compute/zgetrf_nopiv.c @@ -61,7 +61,7 @@ void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A ) options = calloc( 1, sizeof(struct chameleon_pzgetrf_nopiv_s) ); options->use_workspace = 0; - if ( ( ( A->p > 1 ) || ( A->q > 1 ) ) && + if ( ( ( chameleon_desc_datadist_get_iparam(A, 0) > 1 ) || ( chameleon_desc_datadist_get_iparam(A, 1) > 1 ) ) && ( A->get_rankof_init == chameleon_getrankof_2d ) && ( chamctxt->generic_enabled != CHAMELEON_TRUE ) ) { @@ -70,15 +70,19 @@ void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A ) chameleon_desc_init( &(options->WL), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb), - A->mt * A->mb, A->nb * A->q * lookahead, 0, 0, - A->mt * A->mb, A->nb * A->q * lookahead, A->p, A->q, + A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead, 0, 0, + A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); chameleon_desc_init( &(options->WU), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb), - A->mb * A->p * lookahead, A->nt * A->nb, 0, 0, - A->mb * A->p * lookahead, A->nt * A->nb, A->p, A->q, + A->mb * chameleon_desc_datadist_get_iparam(A, 0) * lookahead, A->nt * A->nb, 0, 0, + A->mb * chameleon_desc_datadist_get_iparam(A, 0) * lookahead, A->nt * A->nb, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); } diff --git a/compute/zgram.c b/compute/zgram.c index c40c696a0c04abb5b7bb63bdf3749253da497837..07c9d7d066827578b2bcbc01beeafed80a98b10e 100644 --- a/compute/zgram.c +++ b/compute/zgram.c @@ -58,19 +58,23 @@ void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A ) options = calloc( 1, sizeof(struct chameleon_pzgram_s) ); - workmt = chameleon_max( A->mt, A->p ); - worknt = chameleon_max( A->nt, A->q ); + workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) ); + worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) ); chameleon_desc_init( &(options->Wcol), CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 2, A->nb, 2*A->nb, 2*workmt, A->n, 0, 0, - 2*workmt, A->n, A->p, A->q, + 2*workmt, A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->Welt), CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 2, 1, 2, 2, worknt, 0, 0, - 2, worknt, A->p, A->q, + 2, worknt, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1), NULL, NULL, NULL, NULL ); return (void*)options; diff --git a/compute/zhemm.c b/compute/zhemm.c index a174e2cfb14e9846d2496649deb4a22c8c5862f3..c8fc553b958132224aa983fbce18605f0ce3ed72 100644 --- a/compute/zhemm.c +++ b/compute/zhemm.c @@ -89,7 +89,7 @@ void *CHAMELEON_zhemm_WS_Alloc( cham_side_t side __attribute__((unused)), * If only one process, or if generic has been globally enforced, we switch * to generic immediately. */ - if ( ((C->p == 1) && (C->q == 1)) || + if ( ((chameleon_desc_datadist_get_iparam(C, 0) == 1) && (chameleon_desc_datadist_get_iparam(C, 1) == 1)) || (chamctxt->generic_enabled == CHAMELEON_TRUE) ) { options->alg = ChamGemmAlgGeneric; @@ -130,9 +130,9 @@ void *CHAMELEON_zhemm_WS_Alloc( cham_side_t side __attribute__((unused)), double ratio = 1.5; /* Arbitrary ratio to give more weight to writes wrt reads. */ /* Compute the average array per node for each matrix */ - sizeA = ((double)A->m * (double)A->n) / (double)(A->p * A->q); - sizeB = ((double)B->m * (double)B->n) / (double)(B->p * B->q); - sizeC = ((double)C->m * (double)C->n) / (double)(C->p * C->q) * ratio; + sizeA = ((double)A->m * (double)A->n) / (double)(chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1)); + sizeB = ((double)B->m * (double)B->n) / (double)(chameleon_desc_datadist_get_iparam(B, 0) * chameleon_desc_datadist_get_iparam(B, 1)); + sizeC = ((double)C->m * (double)C->n) / (double)(chameleon_desc_datadist_get_iparam(C, 0) * chameleon_desc_datadist_get_iparam(C, 1)) * ratio; if ( (sizeC > sizeA) && (sizeC > sizeB) ) { options->alg = ChamGemmAlgSummaC; @@ -168,13 +168,17 @@ void *CHAMELEON_zhemm_WS_Alloc( cham_side_t side __attribute__((unused)), chameleon_desc_init( &(options->WA), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mt * C->mb, C->nb * C->q * lookahead, 0, 0, - C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead, 0, 0, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead, + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->WB), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mb * C->p * lookahead, C->nt * C->nb, 0, 0, - C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q, + C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb, 0, 0, + C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb, + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); } diff --git a/compute/zhetrd.c b/compute/zhetrd.c index cc9826b2f383670f405f7ae4b35e66c4cebc0ce4..b846cc90fd3f56a09f7dab0adde5e413720f227e 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -391,7 +391,9 @@ int CHAMELEON_zhetrd_Tile_Async( cham_job_t jobz, NB = descA.mb; #if defined(CHAMELEON_COPY_DIAG) { - chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, A->p, A->q ); + chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, + chameleon_desc_datadist_get_iparam(A, 0), + chameleon_desc_datadist_get_iparam(A, 1) ); Dptr = &D; } #endif diff --git a/compute/zsymm.c b/compute/zsymm.c index dbb1ae72212f5fe52de2a7f8d9f4f2f567e79b83..ab1f68eb4cea8632b5734b081be73f932f0b0cb8 100644 --- a/compute/zsymm.c +++ b/compute/zsymm.c @@ -89,7 +89,7 @@ void *CHAMELEON_zsymm_WS_Alloc( cham_side_t side __attribute__((unused)), * If only one process, or if generic has been globally enforced, we switch * to generic immediately. */ - if ( ((C->p == 1) && (C->q == 1)) || + if ( ((chameleon_desc_datadist_get_iparam(C, 0) == 1) && (chameleon_desc_datadist_get_iparam(C, 1) == 1)) || (chamctxt->generic_enabled == CHAMELEON_TRUE) ) { options->alg = ChamGemmAlgGeneric; @@ -130,9 +130,9 @@ void *CHAMELEON_zsymm_WS_Alloc( cham_side_t side __attribute__((unused)), double ratio = 1.5; /* Arbitrary ratio to give more weight to writes wrt reads. */ /* Compute the average array per node for each matrix */ - sizeA = ((double)A->m * (double)A->n) / (double)(A->p * A->q); - sizeB = ((double)B->m * (double)B->n) / (double)(B->p * B->q); - sizeC = ((double)C->m * (double)C->n) / (double)(C->p * C->q) * ratio; + sizeA = ((double)A->m * (double)A->n) / (double)(chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1)); + sizeB = ((double)B->m * (double)B->n) / (double)(chameleon_desc_datadist_get_iparam(B, 0) * chameleon_desc_datadist_get_iparam(B, 1)); + sizeC = ((double)C->m * (double)C->n) / (double)(chameleon_desc_datadist_get_iparam(C, 0) * chameleon_desc_datadist_get_iparam(C, 1)) * ratio; if ( (sizeC > sizeA) && (sizeC > sizeB) ) { options->alg = ChamGemmAlgSummaC; @@ -168,13 +168,17 @@ void *CHAMELEON_zsymm_WS_Alloc( cham_side_t side __attribute__((unused)), chameleon_desc_init( &(options->WA), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mt * C->mb, C->nb * C->q * lookahead, 0, 0, - C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead, 0, 0, + C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead, + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); chameleon_desc_init( &(options->WB), CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mb * C->p * lookahead, C->nt * C->nb, 0, 0, - C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q, + C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb, 0, 0, + C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb, + chameleon_desc_datadist_get_iparam(C, 0), + chameleon_desc_datadist_get_iparam(C, 1), NULL, NULL, NULL, NULL ); } diff --git a/control/compute_z.h b/control/compute_z.h index acb9599f295c77774acccef98c88d7db22c59362..df4b18d79010af05a0d5f482b7e9d9390442c29a 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -310,7 +310,7 @@ chameleon_zdesc_copy_and_restrict( const CHAM_desc_t *descIn, int rc; rc = chameleon_desc_init( descOut, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, descIn->mb, descIn->nb, descIn->mb * descIn->nb, - m, n, 0, 0, m, n, descIn->p, descIn->q, + m, n, 0, 0, m, n, chameleon_desc_datadist_get_iparam(descIn, 0), chameleon_desc_datadist_get_iparam(descIn, 1), descIn->get_blkaddr, descIn->get_blkldd, descIn->get_rankof_init, descIn->get_rankof_init_arg ); diff --git a/control/descriptor.c b/control/descriptor.c index 556dc8ba7671e1fd32b57f8690bc80bd14d87804..22ddfd71e15548d49d32b4af912203abade845fe 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -86,6 +86,7 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc ) #endif free( desc->tiles ); } + free( desc->data_dist ); return CHAMELEON_SUCCESS; } @@ -116,6 +117,26 @@ void chameleon_desc_init_tiles( CHAM_desc_t *desc, blkrankof_fct_t rankof ) } } +/* Get access to data dist */ +int chameleon_desc_datadist_get_iparam( const CHAM_desc_t *desc, int i ) +{ + return desc->data_dist->distrib[desc->data_dist->get_distrib(desc, i)]; +} + +int chameleon_get_2d_block_cyclic( const CHAM_desc_t *desc, int i ) { return i; } + +void chameleon_desc_set_datadist( CHAM_desc_t *to, cham_data_dist_t *from ) +{ + int i; + to->data_dist = malloc(sizeof(cham_data_dist_t)); + to->data_dist->get_distrib = from->get_distrib; + to->data_dist->distrib_array_size = from->distrib_array_size; + + for (i = 0; i < to->data_dist->distrib_array_size; i++) { + to->data_dist->distrib[i] = from->distrib[i]; + } +} + /** ****************************************************************************** * @@ -238,8 +259,11 @@ int chameleon_desc_init_internal( CHAM_desc_t *desc, const char *name, void *mat desc->myrank = RUNTIME_comm_rank( chamctxt ); /* Grid size */ - desc->p = p; - desc->q = q; + cham_data_dist_t dist = { + .get_distrib = (datadist_access_fct_t)chameleon_get_2d_block_cyclic, + .distrib_array_size = 2, + .distrib = {p, q} }; + chameleon_desc_set_datadist( desc, &dist ); /* Local dimensions in tiles */ if ( desc->myrank < (p*q) ) { @@ -808,7 +832,8 @@ CHAM_desc_t *CHAMELEON_Desc_Copy( const CHAM_desc_t *descin, void *mat ) CHAM_desc_t *descout = NULL; CHAMELEON_Desc_Create_User( &descout, mat, descin->dtyp, descin->mb, descin->nb, descin->bsiz, - descin->lm, descin->ln, descin->i, descin->j, descin->m, descin->n, descin->p, descin->q, + descin->lm, descin->ln, descin->i, descin->j, descin->m, descin->n, + chameleon_desc_datadist_get_iparam(descin, 0), chameleon_desc_datadist_get_iparam(descin, 1), NULL, NULL, descin->get_rankof_init, descin->get_rankof_init_arg ); return descout; } diff --git a/control/descriptor.h b/control/descriptor.h index 1e0315fae2c70cdec40052e49a58b47c32a46ec9..739d752830cc0917a3d366ca5cdd9bfec3ab50e4 100644 --- a/control/descriptor.h +++ b/control/descriptor.h @@ -106,8 +106,8 @@ inline static void* chameleon_geteltaddr(const CHAM_desc_t *A, int m, int n, int #if defined(CHAMELEON_USE_MPI) assert( A->myrank == A->get_rankof( A, mm, nn) ); - mm = mm / A->p; - nn = nn / A->q; + mm = mm / chameleon_desc_datadist_get_iparam(A, 0); + nn = nn / chameleon_desc_datadist_get_iparam(A, 1); #endif if (mm < (size_t)(A->llm1)) { diff --git a/control/descriptor_helpers.c b/control/descriptor_helpers.c index b49cb69e9b751e4494ae01de14571010c64e980c..d5e1430638e8b507e4ac6163869d87b3021d97f4 100644 --- a/control/descriptor_helpers.c +++ b/control/descriptor_helpers.c @@ -56,7 +56,7 @@ int chameleon_getrankof_2d( const CHAM_desc_t *A, int m, int n ) { int mm = m + A->i / A->mb; int nn = n + A->j / A->nb; - return (mm % A->p) * A->q + (nn % A->q); + return (mm % chameleon_desc_datadist_get_iparam(A,0)) * chameleon_desc_datadist_get_iparam(A,1) + (nn % chameleon_desc_datadist_get_iparam(A,1)); } /** @@ -79,7 +79,7 @@ int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n ) { int mm = m + A->i / A->mb; (void)n; - return (mm % A->p) * A->q + (mm % A->q); + return (mm % chameleon_desc_datadist_get_iparam(A,0)) * chameleon_desc_datadist_get_iparam(A,1) + (mm % chameleon_desc_datadist_get_iparam(A,1)); } /** @@ -97,7 +97,7 @@ int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n ) */ int chameleon_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k ) { int myrank = A->myrank; - return ( myrank % A->q == k % A->q ); + return ( myrank % chameleon_desc_datadist_get_iparam(A,1) == k % chameleon_desc_datadist_get_iparam(A,1) ); } /** @@ -128,7 +128,7 @@ void chameleon_get_proc_involved_in_panelk_2dbc( const CHAM_desc_t *A, np = 0; ws->involved = 0; - for ( b = k; (b < A->mt) && ((b-k) < A->p); b ++ ) { + for ( b = k; (b < A->mt) && ((b-k) < chameleon_desc_datadist_get_iparam(A, 0)); b ++ ) { rank = chameleon_getrankof_2d( A, b, n ); proc_involved[ b-k ] = rank; np ++; @@ -325,8 +325,8 @@ void* chameleon_getaddr_ccrb( const CHAM_desc_t *A, int m, int n ) #if defined(CHAMELEON_USE_MPI) assert( A->myrank == A->get_rankof( A, mm, nn ) ); - mm = mm / A->p; - nn = nn / A->q; + mm = mm / chameleon_desc_datadist_get_iparam(A, 0); + nn = nn / chameleon_desc_datadist_get_iparam(A, 1); #endif if (mm < (size_t)(A->llm1)) { @@ -372,8 +372,8 @@ void *chameleon_getaddr_cm( const CHAM_desc_t *A, int m, int n ) #if defined(CHAMELEON_USE_MPI) assert( A->myrank == A->get_rankof( A, mm, nn ) ); - mm = mm / A->p; - nn = nn / A->q; + mm = mm / chameleon_desc_datadist_get_iparam(A, 0); + nn = nn / chameleon_desc_datadist_get_iparam(A, 1); #endif offset = (size_t)(A->llm * A->nb) * nn + (size_t)(A->mb) * mm; diff --git a/example/lapack_to_chameleon/step3.h b/example/lapack_to_chameleon/step3.h index cbeeccaad4bff639ef8c266789fc28dec17dcfb4..1fe156e99a8a8b88f556001659ef9240bd0780b2 100644 --- a/example/lapack_to_chameleon/step3.h +++ b/example/lapack_to_chameleon/step3.h @@ -168,8 +168,8 @@ inline static void* user_getaddr_arrayofpointers(const CHAM_desc_t *A, int m, in #if defined(CHAMELEON_USE_MPI) assert( A->myrank == A->get_rankof( A, mm, nn) ); - mm = mm / A->p; - nn = nn / A->q; + mm = mm / chameleon_desc_datadist_get_iparam(A, 0); + nn = nn / chameleon_desc_datadist_get_iparam(A, 1); #endif offset = A->mt*nn + mm; diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index 19b6137f0f48951bed799c9563af05fb84e32e89..e61f95b18ba8a9e2f5c204bc1aff1428e24e6620 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -35,6 +35,8 @@ BEGIN_C_DECLS #define CHAMELEON_TILE_DESC (1 << 1) #define CHAMELEON_TILE_HMAT (1 << 2) +#define CHAMELEON_MAX_DIMENSION 10 + /** * @brief CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting */ @@ -76,10 +78,41 @@ typedef struct chameleon_tile_s { struct chameleon_desc_s; typedef struct chameleon_desc_s CHAM_desc_t; -typedef void* (*blkaddr_fct_t) ( const CHAM_desc_t*, int, int ); -typedef int (*blkldd_fct_t) ( const CHAM_desc_t*, int ); -typedef int (*blkrankof_fct_t)( const CHAM_desc_t*, int, int ); -typedef CHAM_tile_t* (*blktile_fct_t) ( const CHAM_desc_t*, int, int ); +typedef void* (*blkaddr_fct_t) ( const CHAM_desc_t*, int, int ); +typedef int (*blkldd_fct_t) ( const CHAM_desc_t*, int ); +typedef int (*blkrankof_fct_t) ( const CHAM_desc_t*, int, int ); +typedef int (*datadist_access_fct_t)( const CHAM_desc_t*, int, ... ); +typedef CHAM_tile_t* (*blktile_fct_t) ( const CHAM_desc_t*, int, int ); + +/** + * Data distribution type and acces functions + */ +/** + * @brief Function discribing the indexed access to a 2D block cyclic data + * distribution holding the the grid size in the data distribution + * array [p,q] : + * 0 returns p, 1 returns q. + */ +int chameleon_get_2d_block_cyclic(const CHAM_desc_t *desc, int i ); + +/** + * @brief Function call which forward the index i to the access function + */ +int chameleon_desc_datadist_get_iparam( const CHAM_desc_t *desc, int i ); + +/** + * @brief Data distribution type + */ +typedef struct cham_data_dist_s { + datadist_access_fct_t get_distrib; /**> function describing how to index the distribution array */ + int distrib_array_size; /**> number of parameters stored in the distribution array */ + int distrib[CHAMELEON_MAX_DIMENSION]; /**> array holding the parameters */ +} cham_data_dist_t; + +/** + * @brief Function for initialising the data distribution + */ +void chameleon_desc_set_datadist( CHAM_desc_t *to, cham_data_dist_t *from ); struct chameleon_desc_s { const char *name; @@ -116,14 +149,13 @@ struct chameleon_desc_s { int lnt; /**> number of tile columns of the entire matrix - derived parameter */ /* Distributed case */ - int p; /**> number of rows of the 2D distribution grid */ - int q; /**> number of columns of the 2D distribution grid */ - int llm; /**> local number of rows of the full matrix - derived parameter */ - int lln; /**> local number of columns of the full matrix - derived parameter */ - int llm1; /**> local number of tile rows of the A11 matrix - derived parameter */ - int lln1; /**> local number of tile columns of the A11 matrix - derived parameter */ - int llmt; /**> local number of tile rows of the full matrix - derived parameter */ - int llnt; /**> local number of tile columns of the full matrix - derived parameter */ + cham_data_dist_t *data_dist; /**> data distribution type used to retrieve the distributed layout */ + int llm; /**> local number of rows of the full matrix - derived parameter */ + int lln; /**> local number of columns of the full matrix - derived parameter */ + int llm1; /**> local number of tile rows of the A11 matrix - derived parameter */ + int lln1; /**> local number of tile columns of the A11 matrix - derived parameter */ + int llmt; /**> local number of tile rows of the full matrix - derived parameter */ + int llnt; /**> local number of tile columns of the full matrix - derived parameter */ int id; /**> identification number of the descriptor */ int occurences; /**> identify main matrix desc (occurances=1) or */ diff --git a/runtime/starpu/codelets/codelet_zipiv_allreduce.c b/runtime/starpu/codelets/codelet_zipiv_allreduce.c index f296bb3df7433bf6eb8d8c47e468b262ee741f16..5e7254a28548bfac4778c49850daa543197f69fe 100644 --- a/runtime/starpu/codelets/codelet_zipiv_allreduce.c +++ b/runtime/starpu/codelets/codelet_zipiv_allreduce.c @@ -114,7 +114,7 @@ void INSERT_TASK_zipiv_allreduce( CHAM_desc_t *A, int h, int n ) { - int np_involved = chameleon_min( A->p, A->mt - k); + int np_involved = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k); int np_iter = np_involved; int p_recv, p_send, me; int shift = 1; diff --git a/runtime/starpu/codelets/codelet_zperm_allreduce.c b/runtime/starpu/codelets/codelet_zperm_allreduce.c index ab9cf702294f7a54348b8d7995f45aca5afc32e3..87c4cc61af30b117d86b54f4a5ae517c6dbd5f0e 100644 --- a/runtime/starpu/codelets/codelet_zperm_allreduce.c +++ b/runtime/starpu/codelets/codelet_zperm_allreduce.c @@ -138,7 +138,7 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options, { struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws; int *proc_involved = tmp->proc_involved; - int np_involved = chameleon_min( A->p, A->mt - k); + int np_involved = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k); int np_iter = np_involved; int p_recv, p_send, me, p_first; int shift = 1; @@ -161,7 +161,7 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options, INSERT_TASK_zperm_allreduce_send( options, U, A->myrank, p_send, n ); INSERT_TASK_zperm_allreduce_recv( options, U, ipiv, ipivk, A->myrank, p_recv, n, k == (A->mt-1) ? A->m - k * A->mb : A->mb, - A->p, A->q, shift, np_involved, p_first ); + chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1), shift, np_involved, p_first ); shift = shift << 1; np_iter = chameleon_ceil( np_iter, 2 ); @@ -220,7 +220,7 @@ INSERT_TASK_zperm_allreduce_send_invp( const RUNTIME_option_t *options, { int b, rank; - for ( b = k+1; (b < A->mt) && ((b-(k+1)) < A->p); b ++ ) { + for ( b = k+1; (b < A->mt) && ((b-(k+1)) < chameleon_desc_datadist_get_iparam(A, 0)); b ++ ) { rank = A->get_rankof( A, b, n ); if ( rank == A->myrank ) { continue; diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c index e8cc1a1b3500ab1a742563bc9d3ad86a00929e12..977c8676e3acad779f2c1f9e27ec59c92026e3d1 100644 --- a/runtime/starpu/control/runtime_descriptor_ipiv.c +++ b/runtime/starpu/control/runtime_descriptor_ipiv.c @@ -26,14 +26,14 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, const CHAM_desc_t *desc ) { assert( ipiv ); - size_t nbhandles = 3 * ipiv->mt + 2 * desc->p; + size_t nbhandles = 3 * ipiv->mt + 2 * chameleon_desc_datadist_get_iparam(desc, 0); starpu_data_handle_t *handles = calloc( nbhandles, sizeof(starpu_data_handle_t) ); ipiv->ipiv = handles; handles += ipiv->mt; ipiv->nextpiv = handles; - handles += desc->p; + handles += chameleon_desc_datadist_get_iparam(desc, 0); ipiv->prevpiv = handles; - handles += desc->p; + handles += chameleon_desc_datadist_get_iparam(desc, 0); ipiv->perm = handles; handles += ipiv->mt; ipiv->invp = handles; @@ -50,8 +50,8 @@ void RUNTIME_ipiv_create( CHAM_ipiv_t *ipiv, return; } ipiv->mpitag_nextpiv = ipiv->mpitag_ipiv + ipiv->mt; - ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + desc->p; - ipiv->mpitag_perm = ipiv->mpitag_prevpiv + desc->p; + ipiv->mpitag_prevpiv = ipiv->mpitag_nextpiv + chameleon_desc_datadist_get_iparam(desc, 0); + ipiv->mpitag_perm = ipiv->mpitag_prevpiv + chameleon_desc_datadist_get_iparam(desc, 0); ipiv->mpitag_invp = ipiv->mpitag_perm + ipiv->mt; } #endif @@ -65,7 +65,7 @@ void RUNTIME_ipiv_destroy( CHAM_ipiv_t *ipiv, { int i; starpu_data_handle_t *handle = (starpu_data_handle_t*)(ipiv->ipiv); - size_t nbhandles = 3 * ipiv->mt + 2 * desc->p; + size_t nbhandles = 3 * ipiv->mt + 2 * chameleon_desc_datadist_get_iparam(desc, 0); for(i=0; i<nbhandles; i++) { if ( *handle != NULL ) { @@ -118,7 +118,7 @@ void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) starpu_data_handle_t *nextpiv = (starpu_data_handle_t*)(ipiv->nextpiv); const CHAM_desc_t *A = ipiv->desc; - nextpiv += rank/A->q; + nextpiv += rank/chameleon_desc_datadist_get_iparam(A, 1); assert( nextpiv ); if ( *nextpiv != NULL ) { @@ -128,7 +128,7 @@ void *RUNTIME_nextpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) int64_t kk = k + (ipiv->i / ipiv->mb); int owner = rank; int ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb; - int64_t tag = ipiv->mpitag_nextpiv + owner/A->q; + int64_t tag = ipiv->mpitag_nextpiv + owner/chameleon_desc_datadist_get_iparam(A, 1); cppi_register( nextpiv, A->dtyp, ncols, tag, owner ); @@ -142,7 +142,7 @@ void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) starpu_data_handle_t *prevpiv = (starpu_data_handle_t*)(ipiv->prevpiv); const CHAM_desc_t *A = ipiv->desc; - prevpiv += rank/A->q; + prevpiv += rank/chameleon_desc_datadist_get_iparam(A, 1); assert( prevpiv ); if ( *prevpiv != NULL ) { @@ -152,7 +152,7 @@ void *RUNTIME_prevpiv_getaddr( const CHAM_ipiv_t *ipiv, int rank, int k, int h ) int64_t kk = k + (ipiv->i / ipiv->mb); int owner = rank; int ncols = (kk == (A->nt-1)) ? A->n - kk * A->nb : A->nb; - int64_t tag = ipiv->mpitag_prevpiv + owner/A->q; + int64_t tag = ipiv->mpitag_prevpiv + owner/chameleon_desc_datadist_get_iparam(A, 1); cppi_register( prevpiv, A->dtyp, ncols, tag, owner ); @@ -226,7 +226,7 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, const CHAM_desc_t *A = ipiv->desc; handle = (starpu_data_handle_t*)(ipiv->nextpiv); - handle += rank/A->q; + handle += rank/chameleon_desc_datadist_get_iparam(A, 1); if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) @@ -239,7 +239,7 @@ void RUNTIME_ipiv_flushk( const RUNTIME_sequence_t *sequence, } handle = (starpu_data_handle_t*)(ipiv->prevpiv); - handle += rank/A->q; + handle += rank/chameleon_desc_datadist_get_iparam(A, 1); if ( *handle != NULL ) { #if defined(CHAMELEON_USE_MPI) diff --git a/testing/testing_zcheck_qr_lq.c b/testing/testing_zcheck_qr_lq.c index 48162e3d79b298b98f5f7d1bab21551b6a92c23e..502b19fa035a5bea8e3a6619baacfc191fea2f0d 100644 --- a/testing/testing_zcheck_qr_lq.c +++ b/testing/testing_zcheck_qr_lq.c @@ -683,7 +683,7 @@ int check_zgeqrs( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t *descA, * */ CHAMELEON_Desc_Create( &descRR, NULL, ChamComplexDouble, nb, nb, nb*nb, - NRHS, N, 0, 0, NRHS, N, descA->p, descA->q ); + NRHS, N, 0, 0, NRHS, N, chameleon_desc_datadist_get_iparam(descA, 0), chameleon_desc_datadist_get_iparam(descA, 1) ); CHAMELEON_zgemm_Tile( ChamConjTrans, trans, 1., descR, descA, 0., descRR ); @@ -776,7 +776,8 @@ int check_zgelqs( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t *descA, * where R = op(A)*X - B, op(A) is A or A', and alpha = ||B|| * */ - CHAMELEON_Desc_Create( &descRR, NULL, ChamComplexDouble, nb, nb, nb*nb, NRHS, M, 0, 0, NRHS, M, descA->p, descA->q ); + CHAMELEON_Desc_Create( &descRR, NULL, ChamComplexDouble, nb, nb, nb*nb, NRHS, M, 0, 0, NRHS, M, + chameleon_desc_datadist_get_iparam(descA, 0), chameleon_desc_datadist_get_iparam(descA, 1) ); CHAMELEON_zgemm_Tile( ChamConjTrans, trans, 1., descR, descA, 0., descRR ); diff --git a/testing/testing_zgepdf_qr.c b/testing/testing_zgepdf_qr.c index 62553f45bb0c15067d16dd1187722ea67b0fa19b..24b4fc823b9db2acd34ae7df388f508202eba200 100644 --- a/testing/testing_zgepdf_qr.c +++ b/testing/testing_zgepdf_qr.c @@ -108,8 +108,8 @@ testing_zgepdf_qr_desc( run_arg_list_t *args, int check ) libhqr_matrix_t mat = { .mt = descA1->mt, .nt = descA1->nt, - .nodes = descA1->p * descA1->q, - .p = descA1->p, + .nodes = chameleon_desc_datadist_get_iparam(descA1, 0) * chameleon_desc_datadist_get_iparam(descA1, 1), + .p = chameleon_desc_datadist_get_iparam(descA1, 0), }; /* Tree for the top matrix */ @@ -117,7 +117,7 @@ testing_zgepdf_qr_desc( run_arg_list_t *args, int check ) -1, /*low level tree */ -1, /* high level tree */ -1, /* TS tree size */ - descA1->p, /* High level size */ + chameleon_desc_datadist_get_iparam(descA1, 0), /* High level size */ -1, /* Domino */ 0 /* TSRR (unstable) */ ); @@ -129,7 +129,7 @@ testing_zgepdf_qr_desc( run_arg_list_t *args, int check ) /* high level tree (Could be greedy, but flat should reduce the volume of comm) */ LIBHQR_FLAT_TREE, -1, /* TS tree size */ - descA2->p /* High level size */ ); + chameleon_desc_datadist_get_iparam(descA2, 0) /* High level size */ ); } /* Calculates the solution */