diff --git a/compute/pzcesca.c b/compute/pzcesca.c index d09505c2ff68796825105f20093e9f2d4d783a15..ff464ad222563abee098b108bf4a68bb6df83161 100644 --- a/compute/pzcesca.c +++ b/compute/pzcesca.c @@ -44,9 +44,9 @@ chameleon_pzcesca_internal( int center, * 1) compute sums and sum-square (scl,ssq) in each tile */ for(n = 0; n < NT; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); for(m = 0; m < MT; m++) { - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) { INSERT_TASK_zgesum( options, ChamColumnwise, tempmm, tempnn, @@ -71,7 +71,7 @@ chameleon_pzcesca_internal( int center, } for(n = 0; n < NT; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) { /** @@ -126,7 +126,7 @@ chameleon_pzcesca_internal( int center, } for(m = 0; m < MT; m++) { - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); if ( (center == 1) && ( (axis == ChamRowwise) || (axis == ChamEltwise) ) ) { /** @@ -193,10 +193,10 @@ chameleon_pzcesca_internal( int center, /* Finally compute Centered-Scaled matrix coefficients inplace */ for(n = 0; n < NT; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); for(m = 0; m < MT; m++) { - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); INSERT_TASK_zcesca( options, @@ -234,9 +234,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c /* Initialize Wgcol */ for(m = 0; m < Wgcol->mt; m++) { - tempmm = m == Wgcol->mt-1 ? Wgcol->m-m*Wgcol->mb : Wgcol->mb; + tempmm = Wgcol->get_blkdim( Wgcol, m, DIM_m, Wgcol->m ); for(n = 0; n < Wgcol->nt; n++) { - tempnn = n == Wgcol->nt-1 ? Wgcol->n-n*Wgcol->nb : Wgcol->nb; + tempnn = Wgcol->get_blkdim( Wgcol, n, DIM_n, Wgcol->n ); INSERT_TASK_dlaset( &options, ChamUpperLower, tempmm, tempnn, @@ -246,9 +246,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c } /* Initialize Wgrow */ for(m = 0; m < Wgrow->mt; m++) { - tempmm = m == Wgrow->mt-1 ? Wgrow->m-m*Wgrow->mb : Wgrow->mb; + tempmm = Wgrow->get_blkdim( Wgrow, m, DIM_m, Wgrow->m ); for(n = 0; n < Wgrow->nt; n++) { - tempnn = n == Wgrow->nt-1 ? Wgrow->n-n*Wgrow->nb : Wgrow->nb; + tempnn = Wgrow->get_blkdim( Wgrow, n, DIM_n, Wgrow->n ); INSERT_TASK_dlaset( &options, ChamUpperLower, tempmm, tempnn, @@ -258,9 +258,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c } /* Initialize Wgelt */ for(m = 0; m < Wgelt->mt; m++) { - tempmm = m == Wgelt->mt-1 ? Wgelt->m-m*Wgelt->mb : Wgelt->mb; + tempmm = Wgelt->get_blkdim( Wgelt, m, DIM_m, Wgelt->m ); for(n = 0; n < Wgelt->nt; n++) { - tempnn = n == Wgelt->nt-1 ? Wgelt->n-n*Wgelt->nb : Wgelt->nb; + tempnn = Wgelt->get_blkdim( Wgelt, n, DIM_n, Wgelt->n ); INSERT_TASK_dlaset( &options, ChamUpperLower, tempmm, tempnn, @@ -270,9 +270,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c } /* Initialize Wdcol */ for(m = 0; m < Wdcol->mt; m++) { - tempmm = m == Wdcol->mt-1 ? Wdcol->m-m*Wdcol->mb : Wdcol->mb; + tempmm = Wdcol->get_blkdim( Wdcol, m, DIM_m, Wdcol->m ); for(n = 0; n < Wdcol->nt; n++) { - tempnn = n == Wdcol->nt-1 ? Wdcol->n-n*Wdcol->nb : Wdcol->nb; + tempnn = Wdcol->get_blkdim( Wdcol, n, DIM_n, Wdcol->n ); INSERT_TASK_dlaset( &options, ChamUpperLower, tempmm, tempnn, @@ -282,9 +282,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c } /* Initialize Wdrow */ for(m = 0; m < Wdrow->mt; m++) { - tempmm = m == Wdrow->mt-1 ? Wdrow->m-m*Wdrow->mb : Wdrow->mb; + tempmm = Wdrow->get_blkdim( Wdrow, m, DIM_m, Wdrow->m ); for(n = 0; n < Wdrow->nt; n++) { - tempnn = n == Wdrow->nt-1 ? Wdrow->n-n*Wdrow->nb : Wdrow->nb; + tempnn = Wdrow->get_blkdim( Wdrow, n, DIM_n, Wdrow->n ); INSERT_TASK_dlaset( &options, ChamUpperLower, tempmm, tempnn, diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 19fec4446eda909e39cdfca22dc4d9993750ddac..0ef55f468f4205c6c25768aa5fc3460cb39ff76f 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -46,8 +46,8 @@ int chameleon_pzgelqf_step( int genD, int k, int ib, int m, n; int tempkm, tempkn, tempmm, tempnn; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgelqt( options, tempkm, tempkn, ib, T->nb, @@ -55,8 +55,8 @@ int chameleon_pzgelqf_step( int genD, int k, int ib, T(k, k)); if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( options, ChamUpper, tempDkm, tempDkn, @@ -72,7 +72,7 @@ int chameleon_pzgelqf_step( int genD, int k, int ib, } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zunmlq( options, ChamRight, ChamConjTrans, @@ -85,7 +85,7 @@ int chameleon_pzgelqf_step( int genD, int k, int ib, RUNTIME_data_flush( sequence, T(k, k) ); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); RUNTIME_data_migrate( sequence, A(k, k), A->get_rankof( A, k, n ) ); @@ -98,7 +98,7 @@ int chameleon_pzgelqf_step( int genD, int k, int ib, A(k, n), T(k, n)); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); RUNTIME_data_migrate( sequence, A(m, k), A->get_rankof( A, m, n ) ); diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c index 60921a28b040d02b77b8431e097d8c51544bba35..e97d99f1ca3af1e70488b0bb9d3bddb1cdaee007 100644 --- a/compute/pzgelqf_param.c +++ b/compute/pzgelqf_param.c @@ -49,7 +49,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, int tempkmin, tempkm, tempnn, tempmm, temppn; int node, nbtiles; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); /* The number of geqrt to apply */ nbgelqt = qrtree->getnbgeqrf( qrtree, k ); @@ -63,7 +63,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, continue; } - temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + temppn = A->get_blkdim( A, p, DIM_n, A->n ); tempkmin = chameleon_min(tempkm, temppn); INSERT_TASK_zgelqt( @@ -72,8 +72,8 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, A(k, p), T(k, p)); if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( options, @@ -89,7 +89,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zunmlq( options, ChamRight, ChamConjTrans, @@ -112,7 +112,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, n = tiles[i]; p = qrtree->currpiv( qrtree, k, n ); - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); if ( qrtree->gettype( qrtree, k, n ) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -142,7 +142,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib, T(k, n)); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); node = A->get_rankof( A, m, n ); RUNTIME_data_migrate( sequence, A(m, p), node ); diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index 09d9bab3e0c459232e2b2dbf5f369a30ee2f8cda..ae9afdf58647e23f32b166468b88a93a19ce914c 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -92,10 +92,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM for (k = 0; k < K; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); for (N = k; N < A->nt; N += BS) { - tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; + tempNn = A->get_blkdim( A, N, DIM_n, A->n ); tempkmin = chameleon_min(tempkm, tempNn); INSERT_TASK_zgelqt( &options, @@ -103,8 +103,8 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM A(k, N), T(k, N)); if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; - int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); + int tempDNn = D->get_blkdim( D, N, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -120,7 +120,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM #endif } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zunmlq( &options, ChamRight, ChamConjTrans, @@ -133,7 +133,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM RUNTIME_data_flush( sequence, T(k, N) ); for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); RUNTIME_data_migrate( sequence, A(k, N), A->get_rankof( A, k, n ) ); @@ -147,7 +147,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM T(k, n)); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); RUNTIME_data_migrate( sequence, A(m, N), A->get_rankof( A, m, n ) ); @@ -167,7 +167,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM } for (RD = BS; RD < A->nt-k; RD *= 2) { for (N = k; N+RD < A->nt; N += 2*RD) { - tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb; + tempNRDn = A->get_blkdim( A, N+RD, DIM_n, A->n ); node = A->get_rankof( A, k, N+RD ); RUNTIME_data_migrate( sequence, A(k, N), node ); @@ -182,7 +182,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM T2(k, N+RD)); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); node = A->get_rankof( A, m, N+RD ); RUNTIME_data_migrate( sequence, A(m, N), node ); diff --git a/compute/pzgemm.c b/compute/pzgemm.c index a4a12eca743aa1e9ef69a15ddfec7bc1564edc92..1c8b05aaf43de10bb0b734a48e9e352887ab7434 100644 --- a/compute/pzgemm.c +++ b/compute/pzgemm.c @@ -84,9 +84,9 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); /* Scale C */ options->forcesub = 0; @@ -100,7 +100,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran if (transA == ChamNoTrans) { if (transB == ChamNoTrans) { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgemm_Astat( options, @@ -116,7 +116,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran */ else { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgemm_Astat( options, @@ -134,7 +134,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran else { if (transB == ChamNoTrans) { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); INSERT_TASK_zgemm_Astat( options, @@ -150,7 +150,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran */ else { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); INSERT_TASK_zgemm_Astat( options, @@ -186,17 +186,25 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran RUNTIME_sequence_t *sequence = options->sequence; int m, n, k, p, q, KT, K, lp, lq; int tempmm, tempnn, tempkk; - int lookahead, myp, myq; + int lookahead, myp, myq, DIM_k; CHAMELEON_Complex64_t zbeta; CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0; lookahead = chamctxt->lookahead; - KT = transA == ChamNoTrans ? A->nt : A->mt; - K = transA == ChamNoTrans ? A->n : A->m; - myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); - myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); + if ( transA == ChamNoTrans ) { + KT = A->nt; + K = A->n; + DIM_k = DIM_n; + } + else { + KT = A->mt; + K = A->m; + DIM_k = DIM_m; + } + myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); + myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); /* * A: ChamNoTrans / B: ChamNoTrans @@ -204,12 +212,13 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran for (k = 0; k < KT; k++ ) { lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); - tempkk = k == KT - 1 ? K - k * A->nb : A->nb; + + tempkk = A->get_blkdim( A, k, DIM_k, K ); zbeta = k == 0 ? beta : zone; /* Transfert ownership of the k column of A */ for (m = 0; m < C->mt; m ++ ) { - tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if ( transA == ChamNoTrans ) { INSERT_TASK_zlacpy( @@ -249,7 +258,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran /* Transfert ownership of the k row of B */ for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if ( transB == ChamNoTrans ) { INSERT_TASK_zlacpy( @@ -288,10 +297,10 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran } for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zgemm( options, @@ -327,16 +336,16 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0; for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); /* * A: ChamNoTrans / B: ChamNoTrans */ if (transA == ChamNoTrans) { if (transB == ChamNoTrans) { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( options, @@ -352,7 +361,7 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr */ else { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( options, @@ -370,7 +379,7 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr else { if (transB == ChamNoTrans) { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( options, @@ -386,7 +395,7 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr */ else { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( options, diff --git a/compute/pzgenm2.c b/compute/pzgenm2.c index 99455940aae73822a1e7dfa937873b30715dce9d..922559a5824a81525e4d2b86dc5e05ff3b531ab2 100644 --- a/compute/pzgenm2.c +++ b/compute/pzgenm2.c @@ -92,7 +92,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, * */ for(n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) { - tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); /* Zeroes the local intermediate vector */ INSERT_TASK_dlaset( @@ -103,7 +103,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, /* Computes the sums of the local tiles into the local vector */ for(m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0)) { - tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_dzasum( &options, ChamColumnwise, ChamUpperLower, tempmm, tempnn, @@ -131,7 +131,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, NRMX( myp, myq ) ); for( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_dgessq( &options, ChamEltwise, 1, tempnn, DROW( myp, n ), @@ -207,7 +207,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, if ( cnt == 0 ) { for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) { - tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); if ( myp == 0 ) { #if defined(PRECISION_z) || defined(PRECISION_c) @@ -245,7 +245,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, */ scl = 1. / e0; for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) { - tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zlascal( &options, @@ -257,10 +257,10 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, * Compute Sx = S * x */ for(m = myp; m < A->mt; m+=chameleon_desc_datadist_get_iparam(A, 0)) { - tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { - tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); beta = n == myq ? 0. : 1.; INSERT_TASK_zgemv( @@ -292,10 +292,10 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, * Compute x = S' * S * x = S' * Sx */ for ( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { - tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); for( m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0) ) { - tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); beta = m == myp ? 0. : 1.; INSERT_TASK_zgemv( @@ -336,7 +336,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, NRMX( myp, myq ) ); for( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zgessq( &options, ChamEltwise, 1, tempnn, @@ -378,7 +378,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, NRMSX( myp, myq ) ); for( m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0) ) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgessq( &options, ChamEltwise, tempmm, 1, SX( m, myq ), diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index 450f3fb968d8a759e84474ffde3b8c6774c96147..a827177fa9074d42e0487eaad856e545877ba0d6 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -45,8 +45,8 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib, int m, n; int tempkm, tempkn, tempnn, tempmm; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgeqrt( options, @@ -55,8 +55,8 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib, T(k, k)); if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( options, ChamLower, tempDkm, tempDkn, @@ -71,7 +71,7 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib, #endif } for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zunmqr( options, ChamLeft, ChamConjTrans, @@ -84,7 +84,7 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib, RUNTIME_data_flush( sequence, T(k, k) ); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); RUNTIME_data_migrate( sequence, A(k, k), A->get_rankof( A, m, k ) ); @@ -98,7 +98,7 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib, T(m, k)); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); RUNTIME_data_migrate( sequence, A(k, n), A->get_rankof( A, m, n ) ); diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c index ad75e652eb79cd26b01d1f4ae7ed1253140f8e23..f8c9020ffc9b461d94f199bbf77079a87635df1b 100644 --- a/compute/pzgeqrf_param.c +++ b/compute/pzgeqrf_param.c @@ -50,7 +50,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, int tempkmin, tempkn, tempnn, tempmm, temppm; int node, nbtiles; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); /* The number of geqrt to apply */ nbgeqrt = qrtree->getnbgeqrf( qrtree, k ); @@ -64,7 +64,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, continue; } - temppm = p == A->mt-1 ? A->m-p*A->mb : A->mb; + temppm = A->get_blkdim( A, p, DIM_m, A->m ); tempkmin = chameleon_min(temppm, tempkn); INSERT_TASK_zgeqrt( @@ -73,8 +73,8 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, A(p, k), T(p, k) ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( options, @@ -90,7 +90,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, } for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zunmqr( options, ChamLeft, ChamConjTrans, @@ -113,7 +113,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, m = tiles[i]; p = qrtree->currpiv( qrtree, k, m ); - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); if ( qrtree->gettype( qrtree, k, m ) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -143,7 +143,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib, T(m, k)); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); node = A->get_rankof( A, m, n ); RUNTIME_data_migrate( sequence, A(p, n), node ); diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index 546f802d9c8161bce7734a1ce50bb612ed4a5970..83f93fc3d8e3ef46ec65a8b8bc6eebf495a1d929 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -89,9 +89,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM for (k = 0; k < K; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (M = k; M < A->mt; M += BS) { - tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; + tempMm = A->get_blkdim( A, M, DIM_m, A->m ); tempkmin = chameleon_min(tempMm, tempkn); INSERT_TASK_zgeqrt( @@ -100,8 +100,8 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM A(M, k), T(M, k)); if ( genD ) { - int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb; - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDMm = D->get_blkdim( D, M, DIM_m, D->m ); + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -117,7 +117,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM #endif } for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zunmqr( &options, ChamLeft, ChamConjTrans, @@ -130,7 +130,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM RUNTIME_data_flush( sequence, T(M, k) ); for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); RUNTIME_data_migrate( sequence, A(M, k), A->get_rankof( A, m, k ) ); @@ -144,7 +144,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM T(m, k)); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); RUNTIME_data_migrate( sequence, A(M, n), A->get_rankof( A, m, n ) ); @@ -164,7 +164,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM } for (RD = BS; RD < A->mt-k; RD *= 2) { for (M = k; M+RD < A->mt; M += 2*RD) { - tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb; + tempMRDm = A->get_blkdim( A, M+RD, DIM_m, A->m ); node = A->get_rankof( A, M+RD, k ); RUNTIME_data_migrate( sequence, A(M, k), node ); @@ -179,7 +179,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM T2(M+RD, k)); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); node = A->get_rankof( A, M+RD, n ); RUNTIME_data_migrate( sequence, A(M, n), node ); diff --git a/compute/pzgered.c b/compute/pzgered.c index dd2c4a597c063ae33f468c529a6302808e3f7356..a0c38cb8ea320a90863842b8c92639b7e3cfe0be 100644 --- a/compute/pzgered.c +++ b/compute/pzgered.c @@ -80,10 +80,10 @@ chameleon_pzgered_frb( cham_uplo_t uplo, int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( (n == m) && (uplo != ChamUpperLower) ) { INSERT_TASK_ztrssq( @@ -235,13 +235,13 @@ void chameleon_pzgered( cham_uplo_t uplo, for(m = 0; m < A->mt; m++) { - int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, A->m ); int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt; for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, A->n ); /* * u_{high} = 1e-16 (later should be application accuracy) diff --git a/compute/pzgerst.c b/compute/pzgerst.c index 52c801cbe0945bb3e3249b602c97e92fe6c310d5..df031662ffcfd7397f5d9f4a49dfdbc4aef6f1fc 100644 --- a/compute/pzgerst.c +++ b/compute/pzgerst.c @@ -36,7 +36,7 @@ void chameleon_pzgerst( cham_uplo_t uplo, RUNTIME_options_init(&options, chamctxt, sequence, request); for(m = 0; m < A->mt; m++) { - int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, A->m ); int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt; @@ -46,7 +46,7 @@ void chameleon_pzgerst( cham_uplo_t uplo, if (( tile->rank == A->myrank ) && ( tile->flttype != ChamComplexDouble ) ) { - int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zgerst( &options, tempmm, tempnn, A( m, n ) ); diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 8c41f81d550fa77e1c5bb0fd6d6dc59af3b0dea4..6db9a8a40148a67fcaeda74f9a718c949b12e59d 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -58,8 +58,8 @@ chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws, const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; int m, tempkm, tempkn, tempmm; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); /* * Algorithm per block without pivoting @@ -70,7 +70,7 @@ chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws, A(k, k), 0); for (m = k+1; m < A->mt; m++) { - tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_ztrsm( options, ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit, @@ -90,8 +90,8 @@ chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws, int m, h; int tempkm, tempkn, tempmm, minmn; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); minmn = chameleon_min( tempkm, tempkn ); /* @@ -103,7 +103,7 @@ chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws, A( k, k ), U( k, k ), A->mb * k ); for (m = k+1; m < A->mt; m++) { - tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgetrf_nopiv_percol_trsm( options, tempmm, tempkn, h, A( m, k ), U( k, k ) ); @@ -123,8 +123,8 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, int m, h; int tempkm, tempkn, tempmm, minmn; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ @@ -141,7 +141,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, ipiv ); for (m = k+1; m < A->mt; m++) { - tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgetrf_percol_offdiag( options, tempmm, tempkn, h, m * A->mb, @@ -173,8 +173,8 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, void **clargs = malloc( sizeof(char *) ); memset( clargs, 0, sizeof(char *) ); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ @@ -190,7 +190,7 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, INSERT_TASK_zgetrf_percol_diag( options, tempkm, tempkn, h, k * A->mb, A(k, k), ipiv ); for ( m = k+1; m < A->mt; m++ ) { - tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgetrf_panel_offdiag_batched( options, tempmm, tempkn, h, m * A->mb, (void *)ws, A(m, k), clargs, ipiv ); } @@ -216,8 +216,8 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, int m, h, b, nbblock; int tempkm, tempkn, tempmm, minmn; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ @@ -240,7 +240,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, ipiv ); for (m = k+1; m < A->mt; m++) { - tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgetrf_blocked_offdiag( options, tempmm, tempkn, j, m * A->mb, ws->ib, @@ -283,8 +283,8 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, void **clargs = malloc( sizeof(char *) ); memset( clargs, 0, sizeof(char *) ); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); minmn = chameleon_min( tempkm, tempkn ); /* Update the number of column */ @@ -303,7 +303,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws, j = h + b * ws->ib; for ( m = k; m < A->mt; m++ ) { - tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgetrf_panel_blocked_batched( options, tempmm, tempkn, j, m * A->mb, (void *)ws, A(m, k), Up(k, k), clargs, ipiv ); } @@ -405,9 +405,9 @@ chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws, return; } - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); minmn = chameleon_min( tempkm, tempkn ); /* Extract selected rows into U */ @@ -474,9 +474,9 @@ chameleon_pzgetrf_panel_permute_batched( struct chameleon_pzgetrf_s *ws, void **clargs = malloc( sizeof(char *) ); *clargs = NULL; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); minmn = chameleon_min( tempkm, tempkn ); /* Extract selected rows into U */ @@ -523,13 +523,13 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws, int lq = (k % lookahead) * Q; int myp = A->myrank / Q; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); if ( k >= ws->ringswitch ) { for ( m = k+1; m < A->mt; m++ ) { if ( ( m % P ) != myp ) continue; - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkn, @@ -550,7 +550,7 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws, for ( m = k+1; m < A->mt; m++ ) { if ( ( m % P ) != myp ) continue; - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for ( q = 0; q < Q; q++ ) { INSERT_TASK_zlacpy( options, @@ -581,8 +581,8 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, int myq = A->myrank % chameleon_desc_datadist_get_iparam(A, 1); int lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 1); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); if ( ws->batch_size > 0 ) { chameleon_pzgetrf_panel_permute_batched( ws, A, ipiv, k, n, options ); @@ -612,7 +612,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); rankAmn = A->get_rankof( A, m, n ); if ( A->myrank == rankAmn ) { @@ -700,8 +700,8 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, { chameleon_pzgetrf_panel_permute_batched( ws, A, IPIV, k, n, &options ); if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempnn, Wu(A->myrank, n), A(k, n) ); RUNTIME_data_flush( sequence, A(k, n) ); @@ -720,8 +720,8 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, { chameleon_pzgetrf_panel_permute( ws, A, IPIV, k, n, &options ); if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zlacpy( &options, ChamUpperLower, tempkm, tempnn, Wu(A->myrank, n), A(k, n) ); RUNTIME_data_flush( sequence, A(k, n) ); diff --git a/compute/pzgetrf_incpiv.c b/compute/pzgetrf_incpiv.c index 090bb03fd611de7c15689511190b9a6fba8f654f..932a0f6678b43bade54ca8e920dce93207b4371b 100644 --- a/compute/pzgetrf_incpiv.c +++ b/compute/pzgetrf_incpiv.c @@ -84,8 +84,8 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i for (k = 0; k < minMNT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgetrf_incpiv( &options, tempkm, tempkn, ib, L->nb, @@ -105,7 +105,7 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i } for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zgessm( &options, tempkm, tempnn, tempkm, ib, L->nb, @@ -115,7 +115,7 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i A(k, n)); } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_ztstrf( &options, tempmm, tempkn, ib, L->nb, @@ -126,7 +126,7 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i m == A->mt-1, A->nb*k); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zssssm( &options, A->nb, tempnn, tempmm, tempnn, A->nb, ib, L->nb, diff --git a/compute/pzgetrf_nopiv.c b/compute/pzgetrf_nopiv.c index 424842bc06c9639442124d13509e1f2308b11820..5e7f8423e0c4b1d5f4de257768db4599cfe114cc 100644 --- a/compute/pzgetrf_nopiv.c +++ b/compute/pzgetrf_nopiv.c @@ -71,8 +71,8 @@ void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t *A, for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); options.priority = 2*A->nt - 2*k; INSERT_TASK_zgetrf_nopiv( @@ -82,7 +82,7 @@ void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t *A, for (m = k+1; m < A->mt; m++) { options.priority = 2*A->nt - 2*k - m; - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_ztrsm( &options, ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit, @@ -91,7 +91,7 @@ void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t *A, A(m, k)); } for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = 2*A->nt - 2*k - n; INSERT_TASK_ztrsm( &options, @@ -101,7 +101,7 @@ void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t *A, A(k, n)); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = 2*A->nt - 2*k - n - m; INSERT_TASK_zgemm( &options, @@ -157,8 +157,8 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 0); lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 1); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); options.priority = 2*A->nt - 2*k; INSERT_TASK_zgetrf_nopiv( @@ -207,7 +207,7 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, } options.priority = 2*A->nt - 2*k - m; - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); assert( A->get_rankof( A, m, k ) == WU->get_rankof( WU, myp + lp, k) ); INSERT_TASK_ztrsm( @@ -244,7 +244,7 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, continue; } - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = 2*A->nt - 2*k - n; assert( A->get_rankof( A, k, n ) == WL->get_rankof( WL, k, myq+lq) ); @@ -281,7 +281,7 @@ void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, continue; } - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = 2*A->nt - 2*k - n - m; assert( A->get_rankof( A, m, n ) == WL->get_rankof( WL, m, myq + lq) ); diff --git a/compute/pzgram.c b/compute/pzgram.c index 04f849c6ba3a0092ec846fd31a56fdc259e7bc59..788b83998c013345bbbf623a286947e93f06d476 100644 --- a/compute/pzgram.c +++ b/compute/pzgram.c @@ -42,10 +42,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo, for(n = 0; n < NT; n++) { int mmin = ( uplo == ChamLower ) ? n : 0; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); for(m = mmin; m < mmax; m++) { - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); if ( n == m ) { INSERT_TASK_dsyssq( @@ -66,7 +66,7 @@ chameleon_pzgram_internal( cham_uplo_t uplo, } for(n = 0; n < NT; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); /** * 2) reduce columns (scl,ssq) tiles per processus (between lines) @@ -116,10 +116,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo, for(n = 0; n < NT; n++) { int mmin = ( uplo == ChamLower ) ? n : 0; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); for(m = mmin; m < mmax; m++) { - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); INSERT_TASK_zgram( options, @@ -152,9 +152,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des /* Initialize Wcol */ for(m = 0; m < Wcol->mt; m++) { - tempmm = m == Wcol->mt-1 ? Wcol->m-m*Wcol->mb : Wcol->mb; + tempmm = Wcol->get_blkdim( Wcol, m, DIM_m, Wcol->m ); for(n = 0; n < Wcol->nt; n++) { - tempnn = n == Wcol->nt-1 ? Wcol->n-n*Wcol->nb : Wcol->nb; + tempnn = Wcol->get_blkdim( Wcol, n, DIM_n, Wcol->n ); INSERT_TASK_dlaset( &options, ChamUpperLower, tempmm, tempnn, @@ -164,9 +164,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des } /* Initialize Welt */ for(m = 0; m < Welt->mt; m++) { - tempmm = m == Welt->mt-1 ? Welt->m-m*Welt->mb : Welt->mb; + tempmm = Welt->get_blkdim( Welt, m, DIM_m, Welt->m ); for(n = 0; n < Welt->nt; n++) { - tempnn = n == Welt->nt-1 ? Welt->n-n*Welt->nb : Welt->nb; + tempnn = Welt->get_blkdim( Welt, n, DIM_n, Welt->n ); INSERT_TASK_dlaset( &options, ChamUpperLower, tempmm, tempnn, diff --git a/compute/pzhemm.c b/compute/pzhemm.c index 1d9205311c3e108a0fe4162a5d2f32ef9ed528e0..2a6b69f429cfb19b8b2a2da3d98cb9690e52ff45 100644 --- a/compute/pzhemm.c +++ b/compute/pzhemm.c @@ -109,9 +109,9 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t } for(n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for(m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); /* Scale C */ options->forcesub = 0; @@ -125,7 +125,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t if (side == ChamLeft) { if (uplo == ChamLower) { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); if (k < m) { INSERT_TASK_zgemm_Astat( @@ -161,7 +161,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t */ else { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); if (k < m) { INSERT_TASK_zgemm_Astat( @@ -199,7 +199,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t else { if (uplo == ChamLower) { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); if (k < n) { INSERT_TASK_zgemm_Astat( @@ -235,7 +235,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t */ else { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); if (k < n) { INSERT_TASK_zgemm_Astat( @@ -292,7 +292,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, { RUNTIME_sequence_t *sequence = options->sequence; cham_trans_t transA; - int m, n, k, p, q, KT, K, lp, lq; + int m, n, k, p, q, KT, lp, lq; int tempmm, tempnn, tempkk; int lookahead, myp, myq; @@ -301,14 +301,13 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->nt; - K = A->n; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); - tempkk = k == KT - 1 ? K - k * A->nb : A->nb; + tempkk = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; /* Transfert ownership of the k column of A or B */ @@ -316,7 +315,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, int Am, Ak; int tempam, tempak; - tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if ( (( uplo == ChamUpper ) && ( m > k )) || (( uplo == ChamLower ) && ( m < k )) ) @@ -355,7 +354,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, /* Transfert ownership of the k row of B, or A */ for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zlacpy( options, @@ -376,11 +375,11 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, /* Perform the update of this iteration */ for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if ( k == m ) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zhemm( options, ChamLeft, uplo, @@ -401,7 +400,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, } for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zgemm( options, transA, ChamNoTrans, @@ -428,7 +427,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, { RUNTIME_sequence_t *sequence = options->sequence; cham_trans_t transA; - int m, n, k, p, q, KT, K, lp, lq; + int m, n, k, p, q, KT, lp, lq; int tempmm, tempnn, tempkk; int lookahead, myp, myq; @@ -437,20 +436,19 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->mt; - K = A->m; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); - tempkk = k == KT - 1 ? K - k * A->nb : A->nb; + tempkk = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; /* Transfert ownership of the k column of A or B */ for (m = 0; m < C->mt; m++ ) { - tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zlacpy( options, @@ -474,7 +472,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, int Ak, An; int tempak, tempan; - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if ( (( uplo == ChamUpper ) && ( n < k )) || (( uplo == ChamLower ) && ( n > k )) ) @@ -511,11 +509,11 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, /* Perform the update of this iteration */ for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if ( k == n ) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); /* A has been stored in WA or WB for the summa ring */ INSERT_TASK_zhemm( @@ -537,7 +535,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, } for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zgemm( options, ChamNoTrans, transA, @@ -594,16 +592,16 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0; for(m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for(n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); /* * ChamLeft / ChamLower */ if (side == ChamLeft) { if (uplo == ChamLower) { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); zbeta = k == 0 ? beta : zone; if (k < m) { INSERT_TASK_zgemm( @@ -641,7 +639,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ */ else { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); zbeta = k == 0 ? beta : zone; if (k < m) { INSERT_TASK_zgemm( @@ -681,7 +679,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ else { if (uplo == ChamLower) { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); zbeta = k == 0 ? beta : zone; if (k < n) { INSERT_TASK_zgemm( @@ -719,7 +717,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ */ else { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); zbeta = k == 0 ? beta : zone; if (k < n) { INSERT_TASK_zgemm( diff --git a/compute/pzher2k.c b/compute/pzher2k.c index 5a1be1c13a13b6f172897f39198e54ea3f6c032e..060926d88ba863248d14838c134167f5ea1398ba 100644 --- a/compute/pzher2k.c +++ b/compute/pzher2k.c @@ -52,7 +52,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, RUNTIME_options_init(&options, chamctxt, sequence, request); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if (uplo == ChamLower) { mmin = n+1; @@ -68,7 +68,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, */ if (trans == ChamNoTrans) { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); dbeta = k == 0 ? beta : 1.0; INSERT_TASK_zher2k( &options, @@ -79,9 +79,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, dbeta, C(n, n)); /* ldc * N */ } for (m = mmin; m < mmax; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; INSERT_TASK_zgemm( &options, @@ -106,7 +106,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, */ else { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); dbeta = k == 0 ? beta : 1.0; INSERT_TASK_zher2k( &options, @@ -117,9 +117,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, dbeta, C(n, n)); /* ldc * N */ } for (m = mmin; m < mmax; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; INSERT_TASK_zgemm( &options, diff --git a/compute/pzhered.c b/compute/pzhered.c index 621cb87ae8a3e3136cf95b9acc12502ee6b7543f..c6821d3638501008710155a2d6ab1ac7137d0dda 100644 --- a/compute/pzhered.c +++ b/compute/pzhered.c @@ -81,11 +81,11 @@ chameleon_pzhered_frb( cham_trans_t trans, int nmin = (uplo == ChamUpper) ? m : 0; int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, NT) : NT; - int tempmm = (m == (MT - 1)) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for (n = nmin; n < nmax; n++) { - int tempnn = (n == (NT - 1)) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( n == m ) { if ( trans == ChamConjTrans ) { @@ -257,13 +257,13 @@ void chameleon_pzhered( cham_trans_t trans, for (m = 0; m < A->mt; m++) { - int tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, A->m ); int nmin = (uplo == ChamUpper) ? m : 0; int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, A->nt) : A->nt; for (n = nmin; n < nmax; n++) { - int tempnn = (n == (A->nt - 1)) ? A->n - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, A->n ); /* * u_{high} = 1e-16 (later should be application accuracy) diff --git a/compute/pzherk.c b/compute/pzherk.c index 4fec1fd779e9acda3c9fe2de5cdb4e5a996cff4c..d1e88310c00e1392cdb668b201ca89445b4723c9 100644 --- a/compute/pzherk.c +++ b/compute/pzherk.c @@ -52,13 +52,13 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, RUNTIME_options_init(&options, chamctxt, sequence, request); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); /* * ChamNoTrans */ if (trans == ChamNoTrans) { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); dbeta = k == 0 ? beta : 1.0; INSERT_TASK_zherk( &options, @@ -72,9 +72,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, */ if (uplo == ChamLower) { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; INSERT_TASK_zgemm( &options, @@ -91,9 +91,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, */ else { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; INSERT_TASK_zgemm( &options, @@ -111,7 +111,7 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, */ else { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); dbeta = k == 0 ? beta : 1.0; INSERT_TASK_zherk( &options, @@ -125,9 +125,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, */ if (uplo == ChamLower) { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; INSERT_TASK_zgemm( &options, @@ -144,9 +144,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, */ else { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; INSERT_TASK_zgemm( &options, diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c index 706740891ade6bc8d3cd10ac1ff1be2338ce0c55..ae4906dd0c360d5b2c265c42ecdf98539122a80d 100644 --- a/compute/pzhetrd_he2hb.c +++ b/compute/pzhetrd_he2hb.c @@ -101,7 +101,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, /* Let's extract the diagonal in a temporary copy that contains A and A' */ for (k = 1; k < A->nt; k++){ - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zhe2ge( &options, uplo, tempkn, tempkn, A->mb, @@ -112,8 +112,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, for (k = 0; k < A->nt-1; k++){ RUNTIME_iteration_push(chamctxt, k); - tempkm = k+1 == A->mt-1 ? A->m-(k+1)*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n- k *A->nb : A->nb; + tempkm = A->get_blkdim( A, k+1, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgeqrt( &options, @@ -147,7 +147,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, /* RIGHT on the remaining tiles until the bottom */ for (m = k+2; m < A->mt ; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zunmqr( &options, ChamRight, ChamNoTrans, @@ -158,7 +158,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, } for (m = k+2; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = 1; INSERT_TASK_ztsqrt( @@ -183,7 +183,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, /* RIGHT */ for (j = m+1; j < A->mt ; j++) { - tempjj = j == A->mt-1 ? A->m-j*A->mb : A->mb; + tempjj = A->get_blkdim( A, j, DIM_m, A->m ); INSERT_TASK_ztsmqr( &options, ChamRight, ChamNoTrans, @@ -264,8 +264,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, for (k = 0; k < A->nt-1; k++){ RUNTIME_iteration_push(chamctxt, k); - tempkn = k+1 == A->nt-1 ? A->n-(k+1)*A->nb : A->nb; - tempkm = k == A->mt-1 ? A->m- k *A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k+1, DIM_n, A->n ); + INSERT_TASK_zgelqt( &options, tempkm, tempkn, ib, A->nb, @@ -298,7 +299,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, /* LEFT on the remaining tiles until the left side */ for (n = k+2; n < A->nt ; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zunmlq( &options, ChamLeft, ChamNoTrans, @@ -309,7 +310,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, } for (n = k+2; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = 1; INSERT_TASK_ztslqt( &options, @@ -333,7 +334,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, /* LEFT */ for (j = n+1; j < A->nt ; j++) { - tempjj = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjj = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_ztsmlq( &options, ChamLeft, ChamNoTrans, @@ -413,7 +414,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, /* Copy-back into A */ for (k = 1; k < A->nt; k++){ - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zlacpy( &options, uplo, tempkn, tempkn, D(k), A(k, k)); diff --git a/compute/pzlag2c.c b/compute/pzlag2c.c index fb45797c676de7a50a3012e1df1997728abfc416..4113e49138e186c67fdefe58ea1d739d5e971f74 100644 --- a/compute/pzlag2c.c +++ b/compute/pzlag2c.c @@ -46,10 +46,10 @@ void chameleon_pclag2z( CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_options_init(&options, chamctxt, sequence, request); for(m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for(n = 0; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_clag2z( &options, @@ -81,10 +81,10 @@ void chameleon_pzlag2c( CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_options_init(&options, chamctxt, sequence, request); for(m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for(n = 0; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zlag2c( &options, diff --git a/compute/pzlange.c b/compute/pzlange.c index abfb27a366655537e0fa0e6e4a48d7e3626aa2bf..100b44059da604131a3f32751741e6cdc5c27f8a 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -54,10 +54,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, int mmin = ( uplo == ChamLower ) ? n : 0; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); for(m = mmin; m < mmax; m++) { - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); if ( (n == m) && (uplo != ChamUpperLower) ) { INSERT_TASK_ztrasm( @@ -144,10 +144,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( (n == m) && (uplo != ChamUpperLower) ) { INSERT_TASK_ztrasm( @@ -230,10 +230,10 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( (n == m) && (uplo != ChamUpperLower) ) { INSERT_TASK_zlantr( @@ -313,10 +313,10 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( (n == m) && (uplo != ChamUpperLower) ) { INSERT_TASK_ztrssq( diff --git a/compute/pzlansy.c b/compute/pzlansy.c index ab709ac9e396a578ad3e5db31c7577661505848b..928e26964c6674f67b46c07c6be1b4f5db2a717c 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -51,10 +51,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( n == m ) { INSERT_TASK_dzasum( @@ -77,7 +77,7 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, } for(m = 0; m < MT; m++) { - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = Q; n < NT; n++) { INSERT_TASK_daxpy( @@ -144,10 +144,10 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, int nmin = (uplo == ChamUpper ) ? m : 0; int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( n == m ) { if ( trans == ChamConjTrans) { @@ -230,10 +230,10 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, int nmin = (uplo == ChamUpper ) ? m : 0; int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int tempmm = A->get_blkdim( A, m, DIM_m, M ); for(n = nmin; n < nmax; n++) { - int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + int tempnn = A->get_blkdim( A, n, DIM_n, N ); if ( n == m ) { if ( trans == ChamConjTrans) { diff --git a/compute/pzlascal.c b/compute/pzlascal.c index 5334730b3cae41d140c6f452fefe2e1986bb3d01..09ac36ca9b44aae41bb64b3f49ba04532e2e6f28 100644 --- a/compute/pzlascal.c +++ b/compute/pzlascal.c @@ -44,8 +44,8 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc switch(uplo) { case ChamLower: for (n = 0; n < minmnt; n++) { - tempnm = n == A->mt-1 ? A->m-n*A->mb : A->mb; - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnm = A->get_blkdim( A, n, DIM_m, A->m ); + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zlascal( &options, @@ -53,7 +53,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc alpha, A(n, n)); for (m = n+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zlascal( &options, @@ -65,8 +65,8 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc case ChamUpper: for (m = 0; m < minmnt; m++) { - tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; - tempmn = m == A->nt-1 ? A->n-m*A->nb : A->nb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); + tempmn = A->get_blkdim( A, m, DIM_n, A->n ); INSERT_TASK_zlascal( &options, @@ -74,7 +74,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc alpha, A(m, m)); for (n = m+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zlascal( &options, @@ -87,10 +87,10 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc case ChamUpperLower: default: for (m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = 0; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zlascal( &options, diff --git a/compute/pzlaset.c b/compute/pzlaset.c index 3eec0b7c67c9fd9fc17142747c50921c1f4006de..94ce79ce7f133fd744ff735bb51b7c47d51d579d 100644 --- a/compute/pzlaset.c +++ b/compute/pzlaset.c @@ -52,15 +52,15 @@ void chameleon_pzlaset( cham_uplo_t uplo, if (uplo == ChamLower) { for (j = 0; j < minmn; j++){ - tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb; - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjm = A->get_blkdim( A, j, DIM_m, A->m ); + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_zlaset( &options, ChamLower, tempjm, tempjn, alpha, beta, A(j, j)); for (i = j+1; i < A->mt; i++){ - tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; + tempim = A->get_blkdim( A, i, DIM_m, A->m ); INSERT_TASK_zlaset( &options, ChamUpperLower, tempim, tempjn, alpha, alpha, @@ -70,11 +70,11 @@ void chameleon_pzlaset( cham_uplo_t uplo, } else if (uplo == ChamUpper) { for (i = 0; i < A->mt; i++) { - tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; + tempim = A->get_blkdim( A, i, DIM_m, A->m ); if ( i < A->nt ) { j = i; - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_zlaset( &options, @@ -82,7 +82,7 @@ void chameleon_pzlaset( cham_uplo_t uplo, alpha, beta, A(i, j)); } for (j = i+1; j < A->nt; j++) { - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_zlaset( &options, @@ -93,9 +93,9 @@ void chameleon_pzlaset( cham_uplo_t uplo, } else { for (i = 0; i < A->mt; i++){ - tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; + tempim = A->get_blkdim( A, i, DIM_m, A->m ); for (j = 0; j < A->nt; j++){ - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_zlaset( &options, ChamUpperLower, tempim, tempjn, diff --git a/compute/pzlaset2.c b/compute/pzlaset2.c index 1819b70b79db0f3406619682ecd10b4e4d1bbf7a..7efc84208b76f992eb96fb41b5ea8b060d424975 100644 --- a/compute/pzlaset2.c +++ b/compute/pzlaset2.c @@ -51,15 +51,15 @@ void chameleon_pzlaset2( cham_uplo_t uplo, if (uplo == ChamLower) { for (j = 0; j < minmn; j++){ - tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb; - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjm = A->get_blkdim( A, j, DIM_m, A->m ); + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_zlaset2( &options, ChamLower, tempjm, tempjn, alpha, A(j, j)); for (i = j+1; i < A->mt; i++){ - tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; + tempim = A->get_blkdim( A, i, DIM_m, A->m ); INSERT_TASK_zlaset2( &options, ChamUpperLower, tempim, tempjn, alpha, @@ -69,9 +69,9 @@ void chameleon_pzlaset2( cham_uplo_t uplo, } else if (uplo == ChamUpper) { for (j = 1; j < A->nt; j++){ - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); for (i = 0; i < chameleon_min(j, A->mt); i++){ - tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; + tempim = A->get_blkdim( A, i, DIM_m, A->m ); INSERT_TASK_zlaset2( &options, ChamUpperLower, tempim, tempjn, alpha, @@ -79,8 +79,8 @@ void chameleon_pzlaset2( cham_uplo_t uplo, } } for (j = 0; j < minmn; j++){ - tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb; - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjm = A->get_blkdim( A, j, DIM_m, A->m ); + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_zlaset2( &options, ChamUpper, tempjm, tempjn, alpha, @@ -89,9 +89,9 @@ void chameleon_pzlaset2( cham_uplo_t uplo, } else { for (i = 0; i < A->mt; i++){ - tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; + tempim = A->get_blkdim( A, i, DIM_m, A->m ); for (j = 0; j < A->nt; j++){ - tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; + tempjn = A->get_blkdim( A, j, DIM_n, A->n ); INSERT_TASK_zlaset2( &options, ChamUpperLower, tempim, tempjn, alpha, diff --git a/compute/pzlatms.c b/compute/pzlatms.c index 483ce2a1f7afa2b79623bba80bb22b86afc51fec..7e6fd24e3eb7f053ef7a6efe0de702316773b394 100644 --- a/compute/pzlatms.c +++ b/compute/pzlatms.c @@ -34,8 +34,8 @@ zlaset_diag_cpu( void *op_args, const double *D = (const double *)op_args; CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA ); - int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; - int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; + int tempmm = descA->get_blkdim( descA, m, DIM_m, descA->m ); + int tempnn = descA->get_blkdim( descA, n, DIM_n, descA->n ); int minmn = chameleon_min( tempmm, tempnn ); int lda = tileA->ld; int i; diff --git a/compute/pzlauum.c b/compute/pzlauum.c index 9a948e50a969743cc650e4d10bb6602c2ebba8b4..b462e65d92e64bde05ae08e4963279464d1b1cc6 100644 --- a/compute/pzlauum.c +++ b/compute/pzlauum.c @@ -49,7 +49,7 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, */ if (uplo == ChamLower) { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); for(n = 0; n < k; n++) { INSERT_TASK_zherk( &options, @@ -89,7 +89,7 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, */ else { for (k = 0; k < A->mt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (m = 0; m < k; m++) { INSERT_TASK_zherk( diff --git a/compute/pzplghe.c b/compute/pzplghe.c index b224835a07e5afe4b0cff8f5a6f2248c4afe8a09..5add769a300d06979e3512b7b7a3f4628024ab4a 100644 --- a/compute/pzplghe.c +++ b/compute/pzplghe.c @@ -47,10 +47,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, switch ( uplo ) { case ChamLower: for (n = 0; n < minmn; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); for (m = n; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = m + n; INSERT_TASK_zplghe( @@ -63,10 +63,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, case ChamUpper: for (m = 0; m < minmn; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = m; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = m + n; INSERT_TASK_zplghe( @@ -80,10 +80,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, case ChamUpperLower: default: for (m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = 0; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = m + n; INSERT_TASK_zplghe( diff --git a/compute/pzplgsy.c b/compute/pzplgsy.c index bc64fadaf68bad0f778f4fd1c5097b085043ae5f..a1d0eedcde2a00e9a108eda50b6d1e0154bebfd9 100644 --- a/compute/pzplgsy.c +++ b/compute/pzplgsy.c @@ -47,10 +47,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ switch ( uplo ) { case ChamLower: for (n = 0; n < minmn; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); for (m = n; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = m + n; INSERT_TASK_zplgsy( @@ -63,10 +63,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ case ChamUpper: for (m = 0; m < minmn; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = m; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = m + n; INSERT_TASK_zplgsy( @@ -80,10 +80,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ case ChamUpperLower: default: for (m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = 0; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = m + n; INSERT_TASK_zplgsy( diff --git a/compute/pzplrnk.c b/compute/pzplrnk.c index bab04f16ad8af1bc45021f8ab0dbf1ac504ae7eb..97e7c8bdccb2f45b323717391d7ccf7c892e5a06 100644 --- a/compute/pzplrnk.c +++ b/compute/pzplrnk.c @@ -53,12 +53,12 @@ chameleon_pzplrnk_generic( CHAM_context_t *chamctxt, memset( initB, 0, C->nt * sizeof(int) ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); initA = 0; for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if ( C->get_rankof( C(m, n) ) == myrank ) { if ( !initA ) { @@ -129,7 +129,7 @@ chameleon_pzplrnk_2dbc( CHAM_context_t *chamctxt, zbeta = k == 0 ? 0. : 1.; for (n = myq; n < C->nt; n += q) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zplrnt( options, @@ -138,7 +138,7 @@ chameleon_pzplrnk_2dbc( CHAM_context_t *chamctxt, } for (m = myp; m < C->mt; m += p) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zplrnt( options, @@ -146,7 +146,7 @@ chameleon_pzplrnk_2dbc( CHAM_context_t *chamctxt, WA->m, m * WA->mb, k * WA->nb, seedA ); for (n = myq; n < C->nt; n+=q) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zgemm( options, @@ -195,13 +195,13 @@ chameleon_pzplrnk( int K, { chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mt * C->mb, C->nb * q, 0, 0, - C->mt * C->mb, C->nb * q, p, q, + C->m, C->nb * q, 0, 0, + C->m, C->nb * q, p, q, NULL, NULL, NULL, NULL ); chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mb * p, C->nt * C->nb, 0, 0, - C->mb * p, C->nt * C->nb, p, q, + C->mb * p, C->n, 0, 0, + C->mb * p, C->n, p, q, NULL, NULL, NULL, NULL ); chameleon_pzplrnk_2dbc( chamctxt, K, &WA, &WB, C, seedA, seedB, &options ); @@ -210,13 +210,13 @@ chameleon_pzplrnk( int K, int np = p * q; chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mt * C->mb, C->nb * np, 0, 0, - C->mt * C->mb, C->nb * np, 1, np, + C->m, C->nb * np, 0, 0, + C->m, C->nb * np, 1, np, NULL, NULL, NULL, NULL ); chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), - C->mb * np, C->nt * C->nb, 0, 0, - C->mb * np, C->nt * C->nb, np, 1, + C->mb * np, C->n, 0, 0, + C->mb * np, C->n, np, 1, NULL, NULL, NULL, NULL ); chameleon_pzplrnk_generic( chamctxt, K, &WA, &WB, C, seedA, seedB, &options ); diff --git a/compute/pzplrnt.c b/compute/pzplrnt.c index 571a2fa05c1f33596ae9a29773e91555a2fe9b65..93d4d7e830b1f11d92207ded0ab70f58099919c5 100644 --- a/compute/pzplrnt.c +++ b/compute/pzplrnt.c @@ -46,10 +46,10 @@ void chameleon_pzplrnt( CHAM_desc_t *A, RUNTIME_options_init(&options, chamctxt, sequence, request); for (m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = 0; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zplrnt( &options, diff --git a/compute/pzpotrf.c b/compute/pzpotrf.c index 43656e9d38e4cbdc312221ce81be2edd3f2977b7..0b613229ed33ed4bae99cfec0d91bb4c903fabda 100644 --- a/compute/pzpotrf.c +++ b/compute/pzpotrf.c @@ -60,7 +60,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, for (k = 0; k < A->mt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); options.priority = 2*A->mt - 2*k; INSERT_TASK_zpotrf( @@ -69,7 +69,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, A(k, k), A->nb*k); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = 2*A->mt - 2*k - m; INSERT_TASK_ztrsm( @@ -82,7 +82,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_data_flush( sequence, A(k, k) ); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = 2*A->mt - 2*k - n; INSERT_TASK_zherk( @@ -93,7 +93,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, 1.0, A(n, n)); for (m = n+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = 2*A->mt - 2*k - n - m; INSERT_TASK_zgemm( @@ -116,7 +116,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, for (k = 0; k < A->nt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_n, A->n ); options.priority = 2*A->nt - 2*k; INSERT_TASK_zpotrf( @@ -126,7 +126,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, A(k, k), A->nb*k); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = 2*A->nt - 2*k - n; INSERT_TASK_ztrsm( @@ -139,7 +139,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_data_flush( sequence, A(k, k) ); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); options.priority = 2*A->nt - 2*k - m; INSERT_TASK_zherk( @@ -150,7 +150,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, 1.0, A(m, m)); for (n = m+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); options.priority = 2*A->nt - 2*k - n - m; INSERT_TASK_zgemm( diff --git a/compute/pzpotrimm.c b/compute/pzpotrimm.c index c6d301836b8517b60dba1f62babd69cb9377869f..e3638491cb73a07b9eb291043c9cfd6d24cf6ff3 100644 --- a/compute/pzpotrimm.c +++ b/compute/pzpotrimm.c @@ -63,7 +63,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < A->mt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); INSERT_TASK_zpotrf( &options, @@ -71,7 +71,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ A(k, k), A->nb*k); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_ztrsm( &options, ChamRight, ChamLower, ChamConjTrans, ChamNonUnit, @@ -82,7 +82,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ RUNTIME_data_flush( sequence, A(k, k) ); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zherk( &options, ChamLower, ChamNoTrans, @@ -91,7 +91,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ 1.0, A(n, n)); for (m = n+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgemm( &options, ChamNoTrans, ChamConjTrans, @@ -111,9 +111,9 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < A->nt; k++) { RUNTIME_iteration_push(chamctxt, A->nt + k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_ztrsm( &options, ChamRight, uplo, ChamNoTrans, ChamNonUnit, @@ -122,7 +122,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ A(m, k)); } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = 0; n < k; n++) { INSERT_TASK_zgemm( &options, @@ -158,7 +158,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < A->mt; k++) { RUNTIME_iteration_push(chamctxt, 2*A->nt + k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); for(n = 0; n < k; n++) { INSERT_TASK_zherk( &options, @@ -200,14 +200,14 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < C->nt; k++) { RUNTIME_iteration_push(chamctxt, 3*A->nt + k); - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); zbeta = k == 0 ? beta : zone; for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if (k < n) { INSERT_TASK_zgemm( @@ -258,7 +258,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < A->nt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zpotrf( &options, ChamUpper, @@ -266,7 +266,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ A(k, k), A->nb*k); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_ztrsm( &options, ChamLeft, ChamUpper, ChamConjTrans, ChamNonUnit, @@ -277,7 +277,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ RUNTIME_data_flush( sequence, A(k, k) ); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zherk( &options, @@ -287,7 +287,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ 1.0, A(m, m)); for (n = m+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zgemm( &options, @@ -308,9 +308,9 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < A->mt; k++) { RUNTIME_iteration_push(chamctxt, A->nt + k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_ztrsm( &options, ChamLeft, uplo, ChamNoTrans, ChamNonUnit, @@ -319,7 +319,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ A(k, n)); } for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); for (m = 0; m < k; m++) { INSERT_TASK_zgemm( &options, @@ -355,7 +355,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < A->mt; k++) { RUNTIME_iteration_push(chamctxt, 2*A->nt + k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (m = 0; m < k; m++) { INSERT_TASK_zherk( @@ -398,14 +398,14 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ for (k = 0; k < C->nt; k++) { RUNTIME_iteration_push(chamctxt, 3*A->nt + k); - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); zbeta = k == 0 ? beta : zone; for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if (k < n) { INSERT_TASK_zgemm( diff --git a/compute/pzsymm.c b/compute/pzsymm.c index 8f16daf6fa1a1ed601ea31e6425f14c949891e3c..bcde1927be003743fe8c041bdb258aeca91c6d29 100644 --- a/compute/pzsymm.c +++ b/compute/pzsymm.c @@ -110,9 +110,9 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t } for(n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for(m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); /* Scale C */ options->forcesub = 0; @@ -126,7 +126,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t if (side == ChamLeft) { if (uplo == ChamLower) { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); if (k < m) { INSERT_TASK_zgemm_Astat( @@ -162,7 +162,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t */ else { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); if (k < m) { INSERT_TASK_zgemm_Astat( @@ -200,7 +200,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t else { if (uplo == ChamLower) { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); if (k < n) { INSERT_TASK_zgemm_Astat( @@ -236,7 +236,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t */ else { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); if (k < n) { INSERT_TASK_zgemm_Astat( @@ -293,7 +293,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, { RUNTIME_sequence_t *sequence = options->sequence; cham_trans_t transA; - int m, n, k, p, q, KT, K, lp, lq; + int m, n, k, p, q, KT, lp, lq; int tempmm, tempnn, tempkk; int lookahead, myp, myq; @@ -302,14 +302,13 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->nt; - K = A->n; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); - tempkk = k == KT - 1 ? K - k * A->nb : A->nb; + tempkk = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; /* Transfert ownership of the k column of A or B */ @@ -317,7 +316,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, int Am, Ak; int tempam, tempak; - tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if ( (( uplo == ChamUpper ) && ( m > k )) || (( uplo == ChamLower ) && ( m < k )) ) @@ -356,7 +355,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, /* Transfert ownership of the k row of B, or A */ for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zlacpy( options, @@ -377,11 +376,11 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, /* Perform the update of this iteration */ for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if ( k == m ) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zsymm( options, ChamLeft, uplo, @@ -402,7 +401,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, } for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zgemm( options, transA, ChamNoTrans, @@ -429,7 +428,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, { RUNTIME_sequence_t *sequence = options->sequence; cham_trans_t transA; - int m, n, k, p, q, KT, K, lp, lq; + int m, n, k, p, q, KT, lp, lq; int tempmm, tempnn, tempkk; int lookahead, myp, myq; @@ -438,20 +437,19 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, lookahead = chamctxt->lookahead; KT = A->mt; - K = A->m; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); for (k = 0; k < KT; k++ ) { lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0); lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1); - tempkk = k == KT - 1 ? K - k * A->nb : A->nb; + tempkk = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; /* Transfert ownership of the k column of A or B */ for (m = 0; m < C->mt; m++ ) { - tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zlacpy( options, @@ -475,7 +473,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, int Ak, An; int tempak, tempan; - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if ( (( uplo == ChamUpper ) && ( n < k )) || (( uplo == ChamLower ) && ( n > k )) ) @@ -512,11 +510,11 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, /* Perform the update of this iteration */ for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if ( k == n ) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); /* A has been stored in WA or WB for the summa ring */ INSERT_TASK_zsymm( @@ -538,7 +536,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, } for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zgemm( options, ChamNoTrans, transA, @@ -595,16 +593,16 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0; for(m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for(n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); /* * ChamLeft / ChamLower */ if (side == ChamLeft) { if (uplo == ChamLower) { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); zbeta = k == 0 ? beta : zone; if (k < m) { INSERT_TASK_zgemm( @@ -642,7 +640,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ */ else { for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); zbeta = k == 0 ? beta : zone; if (k < m) { INSERT_TASK_zgemm( @@ -682,7 +680,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ else { if (uplo == ChamLower) { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); zbeta = k == 0 ? beta : zone; if (k < n) { INSERT_TASK_zgemm( @@ -720,7 +718,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ */ else { for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); zbeta = k == 0 ? beta : zone; if (k < n) { INSERT_TASK_zgemm( diff --git a/compute/pzsyr2k.c b/compute/pzsyr2k.c index 74b386cd1d1e97a12054ee362017ecdc38855aad..945b97f30c91932b7e6be8f8afeca08e79073a5d 100644 --- a/compute/pzsyr2k.c +++ b/compute/pzsyr2k.c @@ -51,7 +51,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, RUNTIME_options_init(&options, chamctxt, sequence, request); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if (uplo == ChamLower) { mmin = n+1; @@ -67,7 +67,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, */ if (trans == ChamNoTrans) { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zsyr2k( &options, @@ -78,9 +78,9 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, zbeta, C(n, n)); /* ldc * N */ } for (m = mmin; m < mmax; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( &options, @@ -105,7 +105,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, */ else { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zsyr2k( &options, @@ -116,9 +116,9 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, zbeta, C(n, n)); /* ldc * N */ } for (m = mmin; m < mmax; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( &options, diff --git a/compute/pzsyrk.c b/compute/pzsyrk.c index aa4a5b29741941fa9706a273ea7afe388679c868..9b4ab5e117af9ceb79ab57dbbc34605325e0e4e1 100644 --- a/compute/pzsyrk.c +++ b/compute/pzsyrk.c @@ -51,13 +51,13 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, RUNTIME_options_init(&options, chamctxt, sequence, request); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); /* * ChamNoTrans */ if (trans == ChamNoTrans) { for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zsyrk( &options, @@ -71,9 +71,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, */ if (uplo == ChamLower) { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( &options, @@ -90,9 +90,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, */ else { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( &options, @@ -110,7 +110,7 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, */ else { for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zsyrk( &options, @@ -124,9 +124,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, */ if (uplo == ChamLower) { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( &options, @@ -143,9 +143,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, */ else { for (m = n+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); zbeta = k == 0 ? beta : zone; INSERT_TASK_zgemm( &options, diff --git a/compute/pzsytrf.c b/compute/pzsytrf.c index 6a9c41bedca23e8e8ecf493c61745d6b39c8e241..1cf7ce198cd3bd2fad7758d7604c03f7bac9238a 100644 --- a/compute/pzsytrf.c +++ b/compute/pzsytrf.c @@ -58,7 +58,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, for (k = 0; k < A->mt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); INSERT_TASK_zsytrf_nopiv( &options, @@ -66,7 +66,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, A(k, k), A->nb*k); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_ztrsm( &options, ChamRight, ChamLower, ChamTrans, ChamNonUnit, @@ -77,7 +77,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_data_flush( sequence, A(k, k) ); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zsyrk( &options, ChamLower, ChamNoTrans, @@ -86,7 +86,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, 1.0, A(n, n)); for (m = n+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zgemm( &options, ChamNoTrans, ChamTrans, @@ -108,7 +108,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, for (k = 0; k < A->nt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zsytrf_nopiv( &options, ChamUpper, @@ -116,7 +116,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, A(k, k), A->nb*k); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_ztrsm( &options, ChamLeft, ChamUpper, ChamTrans, ChamNonUnit, @@ -127,7 +127,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_data_flush( sequence, A(k, k) ); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_zsyrk( &options, @@ -137,7 +137,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, 1.0, A(m, m)); for (n = m+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_zgemm( &options, diff --git a/compute/pztile2band.c b/compute/pztile2band.c index 3ec2c09edaffbfd26fa2513b4500d291490b68fd..cabfe55d383530d3e4c1738f1319279d77964ad8 100644 --- a/compute/pztile2band.c +++ b/compute/pztile2band.c @@ -38,9 +38,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, int k; int tempkm, tempkn; int minmnt = chameleon_min(A->mt, A->nt); - int Bnb = B->nb; int Bmb = B->mb; - int Amb = A->mb; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) { @@ -59,8 +57,8 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, assert( A->i == B->j ); assert( A->j >= B->j ); - tempkm = ( k == A->mt-1 ) ? A->m - k * Amb : Amb; - tempkn = ( k == B->nt-1 ) ? B->n - k * Bnb : Bnb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = B->get_blkdim( B, k, DIM_n, B->n ); INSERT_TASK_zlaset( &options, ChamUpperLower, Bmb, tempkn, 0., 0., B, 0, k ); @@ -72,7 +70,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, if ( k < minmnt-1 ) { tileA = A->get_blktile( A, k+1, k ); - tempkm = ( (k+1) == A->mt-1 ) ? A->m - (k+1) * Amb : Amb; + tempkm = A->get_blkdim( A, k+1, DIM_m, A->m ); INSERT_TASK_zlacpyx( &options, ChamUpper, tempkm, tempkn, 0, A, k+1, k, tileA->ld, @@ -88,8 +86,8 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, assert( A->i == B->i ); assert( A->i >= B->j ); - tempkm = ( k == A->mt-1 ) ? A->m - k * Amb : Amb; - tempkn = ( k == B->nt-1 ) ? B->n - k * Bnb : Bnb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = B->get_blkdim( B, k, DIM_n, B->n ); INSERT_TASK_zlaset( &options, ChamUpperLower, Bmb, tempkn, 0., 0., B, 0, k ); @@ -101,7 +99,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, if ( k > 0 ) { tileA = A->get_blktile( A, k-1, k ); - tempkm = ( (k-1) == A->mt-1 ) ? A->m - (k-1) * Amb : Amb; + tempkm = A->get_blkdim( A, k-1, DIM_m, A->m ); INSERT_TASK_zlacpyx( &options, ChamLower, tempkm, tempkn, 0, A, k-1, k, tileA->ld, diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c index 7dccc6b2db6f97576a69c6eda3e76525471c1def..7d2ee5712fc391c39c41e019b59d703053a290e8 100644 --- a/compute/pztpgqrt.c +++ b/compute/pztpgqrt.c @@ -80,16 +80,16 @@ void chameleon_pztpgqrt( int KT, int L, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb; + tempkn = Q1->get_blkdim( Q1, k, DIM_n, Q1->n ); /* Equivalent to the tsmqr step on Q1,Q2 */ maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1; for (m = maxmtk; m > -1; m--) { - tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb; + tempmm = Q2->get_blkdim( Q2, m, DIM_m, Q2->m ); templm = ((L > 0) && (m == maxmtk)) ? tempmm : 0; for (n = k; n < Q2->nt; n++) { - tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb; + tempnn = Q2->get_blkdim( Q2, n, DIM_n, Q2->n ); /* TT kernel */ INSERT_TASK_ztpmqrt( &options, diff --git a/compute/pztpgqrt_param.c b/compute/pztpgqrt_param.c index 6da22b663ffc5d7cea537bda9c9129fb0e70759e..b667cc968c5d9c8f74e8e01ad07082c84d057b77 100644 --- a/compute/pztpgqrt_param.c +++ b/compute/pztpgqrt_param.c @@ -98,13 +98,13 @@ void chameleon_pztpgqrt_param( int genD, cham_uplo_t uplo, int K, /* Combine Bottom and Top matrices by merging last pivot with ATop(k,*) */ { CHAM_desc_t *T = TT; - int temppm = p == Q->mt-1 ? Q->m - p * Q->mb : Q->mb; - int tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb; + int temppm = Q->get_blkdim( Q, p, DIM_m, Q->m ); + int tempkn = A->get_blkdim( A, k, DIM_n, A->n ); int tempnn; int L = temppm; for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); node = Q->get_rankof( Q, p, n ); RUNTIME_data_migrate( sequence, QTop(k, n), node ); diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c index 658c75c687e1661ae2c310b2ac1dde00c5477a15..5d595035d3defeaf9145daf381031ac0757293aa 100644 --- a/compute/pztpqrt.c +++ b/compute/pztpqrt.c @@ -74,11 +74,11 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, for (k = 0; k < A->nt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (m = 0; m < maxmt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); templm = ((L > 0) && (m == maxmt-1)) ? tempmm : 0; /* TT kernel */ INSERT_TASK_ztpqrt( @@ -89,7 +89,7 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, T(m, k) ); for (n = k+1; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztpmqrt( &options, ChamLeft, ChamConjTrans, diff --git a/compute/pztpqrt_param.c b/compute/pztpqrt_param.c index 4ef4d9770a152037e0510fee009198786a6607d7..f01ed10832492f2440ebb3a6b75d4e578c1a5b44 100644 --- a/compute/pztpqrt_param.c +++ b/compute/pztpqrt_param.c @@ -99,8 +99,8 @@ void chameleon_pztpqrt_param( int genD, cham_uplo_t uplo, int K, /* Combine with ATop and A by merging last pivot with A(k,k) */ { CHAM_desc_t *T; - int temppm = p == ATop->mt-1 ? ATop->m - p * ATop->mb : ATop->mb; - int tempkn = k == ATop->nt-1 ? ATop->n - k * ATop->nb : ATop->nb; + int temppm = ATop->get_blkdim( ATop, p, DIM_m, ATop->m ); + int tempkn = ATop->get_blkdim( ATop, k, DIM_n, ATop->n ); int L, node, tempnn; T = TT; @@ -118,7 +118,7 @@ void chameleon_pztpqrt_param( int genD, cham_uplo_t uplo, int K, T(p, k)); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); node = A->get_rankof( A, p, n ); RUNTIME_data_migrate( sequence, ATop(k, n), node ); diff --git a/compute/pztradd.c b/compute/pztradd.c index 78d7a3d282e8a4d08bd57b91ffe2935fe337210e..370ccc0923e56f1aacb18973990057e51ad17f84 100644 --- a/compute/pztradd.c +++ b/compute/pztradd.c @@ -53,8 +53,8 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, case ChamLower: if (trans == ChamNoTrans) { for (n = 0; n < minmn; n++) { - tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb; - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnm = B->get_blkdim( B, n, DIM_m, B->m ); + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztradd( &options, @@ -63,7 +63,7 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, beta, B(n, n)); for (m = n+1; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_zgeadd( &options, @@ -75,8 +75,8 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, } else { for (n = 0; n < chameleon_min(B->mt,B->nt); n++) { - tempnm = n == B->mt-1 ? B->m - n * B->mb : B->mb; - tempnn = n == B->nt-1 ? B->n - n * B->nb : B->nb; + tempnm = B->get_blkdim( B, n, DIM_m, B->m ); + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztradd( &options, @@ -85,7 +85,7 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, beta, B(n, n)); for (m = n+1; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_zgeadd( &options, @@ -99,8 +99,8 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, case ChamUpper: if (trans == ChamNoTrans) { for (m = 0; m < minmn; m++) { - tempmm = m == B->mt-1 ? B->m - m * B->mb : B->nb; - tempmn = m == B->nt-1 ? B->n - m * B->nb : B->nb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); + tempmn = B->get_blkdim( B, m, DIM_n, B->n ); INSERT_TASK_ztradd( &options, @@ -109,7 +109,7 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, beta, B(m, m)); for (n = m+1; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n - n * B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgeadd( &options, @@ -121,8 +121,8 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, } else { for (m = 0; m < chameleon_min(B->mt,B->nt); m++) { - tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; - tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); + tempmn = B->get_blkdim( B, m, DIM_n, B->n ); INSERT_TASK_ztradd( &options, @@ -131,7 +131,7 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, beta, B(m, m)); for (n = m+1; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgeadd( &options, @@ -146,10 +146,10 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, default: if (trans == ChamNoTrans) { for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgeadd( &options, @@ -161,10 +161,10 @@ void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans, } else { for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgeadd( &options, diff --git a/compute/pztrmm.c b/compute/pztrmm.c index 40b0a6f0f856c2c858eb0ee620e157dd484bb6d7..9eeb07dfa0de17c119d54339abd28bc05498d67e 100644 --- a/compute/pztrmm.c +++ b/compute/pztrmm.c @@ -57,9 +57,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, if (uplo == ChamUpper) { if (trans == ChamNoTrans) { for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, @@ -68,7 +68,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, B(m, n)); /* ldb * tempnn */ for (k = m+1; k < A->mt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgemm( &options, trans, ChamNoTrans, @@ -85,9 +85,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, */ else { for (m = B->mt-1; m > -1; m--) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, @@ -114,9 +114,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, else { if (trans == ChamNoTrans) { for (m = B->mt-1; m > -1; m--) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, @@ -141,9 +141,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, */ else { for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, @@ -152,7 +152,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, B(m, n)); /* ldb * tempnn */ for (k = m+1; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); INSERT_TASK_zgemm( &options, trans, ChamNoTrans, @@ -173,9 +173,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, if (uplo == ChamUpper) { if (trans == ChamNoTrans) { for (n = B->nt-1; n > -1; n--) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, @@ -200,9 +200,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, */ else { for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, @@ -211,7 +211,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, B(m, n)); /* ldb * tempnn */ for (k = n+1; k < A->mt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgemm( &options, ChamNoTrans, trans, @@ -230,9 +230,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, else { if (trans == ChamNoTrans) { for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, @@ -241,7 +241,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, B(m, n)); /* ldb * tempnn */ for (k = n+1; k < A->mt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); INSERT_TASK_zgemm( &options, ChamNoTrans, trans, @@ -258,9 +258,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, */ else { for (n = B->nt-1; n > -1; n--) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrmm( &options, side, uplo, trans, diag, diff --git a/compute/pztrsm.c b/compute/pztrsm.c index 6084fa5a162728bd566f148146c9bbf10b44f6c9..01976543372569da9415ebb9f3bdee2214889b14 100644 --- a/compute/pztrsm.c +++ b/compute/pztrsm.c @@ -58,10 +58,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c if (uplo == ChamUpper) { if (trans == ChamNoTrans) { for (k = 0; k < B->mt; k++) { - tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb; + tempkm = B->get_blkdim( B, B->mt-1-k, DIM_m, B->m ); lalpha = k == 0 ? alpha : zone; for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -72,7 +72,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) ); for (m = k+1; m < B->mt; m++) { for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgemm( &options, ChamNoTrans, ChamNoTrans, @@ -93,10 +93,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c */ else { for (k = 0; k < B->mt; k++) { - tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb; + tempkm = B->get_blkdim( B, k, DIM_m, B->m ); lalpha = k == 0 ? alpha : zone; for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -106,9 +106,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c } RUNTIME_data_flush( sequence, A(k, k) ); for (m = k+1; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgemm( &options, trans, ChamNoTrans, @@ -132,10 +132,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c else { if (trans == ChamNoTrans) { for (k = 0; k < B->mt; k++) { - tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb; + tempkm = B->get_blkdim( B, k, DIM_m, B->m ); lalpha = k == 0 ? alpha : zone; for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -145,9 +145,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c } RUNTIME_data_flush( sequence, A(k, k) ); for (m = k+1; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgemm( &options, ChamNoTrans, ChamNoTrans, @@ -168,10 +168,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c */ else { for (k = 0; k < B->mt; k++) { - tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb; + tempkm = B->get_blkdim( B, B->mt-1-k, DIM_m, B->m ); lalpha = k == 0 ? alpha : zone; for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -182,7 +182,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) ); for (m = k+1; m < B->mt; m++) { for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgemm( &options, trans, ChamNoTrans, @@ -207,10 +207,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c if (uplo == ChamUpper) { if (trans == ChamNoTrans) { for (k = 0; k < B->nt; k++) { - tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; + tempkn = B->get_blkdim( B, k, DIM_n, B->n ); lalpha = k == 0 ? alpha : zone; for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -220,9 +220,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c } RUNTIME_data_flush( sequence, A(k, k) ); for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); for (n = k+1; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgemm( &options, ChamNoTrans, ChamNoTrans, @@ -243,9 +243,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c */ else { for (k = 0; k < B->nt; k++) { - tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb; + tempkn = B->get_blkdim( B, B->nt-1-k, DIM_n, B->n ); for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -277,10 +277,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c else { if (trans == ChamNoTrans) { for (k = 0; k < B->nt; k++) { - tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb; + tempkn = B->get_blkdim( B, B->nt-1-k, DIM_n, B->n ); lalpha = k == 0 ? alpha : zone; for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -310,9 +310,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c */ else { for (k = 0; k < B->nt; k++) { - tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; + tempkn = B->get_blkdim( B, k, DIM_n, B->n ); for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + tempmm = B->get_blkdim( B, m, DIM_m, B->m ); INSERT_TASK_ztrsm( &options, side, uplo, trans, diag, @@ -322,7 +322,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c RUNTIME_data_flush( sequence, A(k, k) ); for (n = k+1; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgemm( &options, ChamNoTrans, trans, diff --git a/compute/pztrsmpl.c b/compute/pztrsmpl.c index 0b794cd9e74e5391bb81bc36ecaa937922f5352a..27dcb64fe1adcffe13e8ef6c62700129500c628c 100644 --- a/compute/pztrsmpl.c +++ b/compute/pztrsmpl.c @@ -41,7 +41,7 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP int k, m, n; int tempkm, tempnn, tempkmin, tempmm, tempkn; - int ib; + int ib, K, DIM_k; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) { @@ -49,13 +49,23 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP } RUNTIME_options_init(&options, chamctxt, sequence, request); + if ( A->m <= A->n ) { + K = A->m; + DIM_k = DIM_m; + } + else { + K = A->n; + DIM_k = DIM_n; + } + ib = CHAMELEON_IB; for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempkmin = k == chameleon_min(A->mt, A->nt)-1 ? chameleon_min(A->m, A->n)-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); + for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zgessm( &options, tempkm, tempnn, tempkmin, ib, L->nb, @@ -65,9 +75,9 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP B(k, n)); } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempnn = B->get_blkdim( B, n, DIM_n, B->n ); INSERT_TASK_zssssm( &options, A->nb, tempnn, tempmm, tempnn, tempkn, ib, L->nb, diff --git a/compute/pztrtri.c b/compute/pztrtri.c index 1d6a223ebb1c88449eb72ceb57a7c7fa1e38b7b0..87dd22c336216506a3f3d232af5b4ec4a20a8289 100644 --- a/compute/pztrtri.c +++ b/compute/pztrtri.c @@ -55,9 +55,9 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, for (k = 0; k < A->nt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); INSERT_TASK_ztrsm( &options, ChamRight, uplo, ChamNoTrans, diag, @@ -66,7 +66,7 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, A(m, k)); } for (m = k+1; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = 0; n < k; n++) { INSERT_TASK_zgemm( &options, @@ -104,9 +104,9 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, for (k = 0; k < A->mt; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); INSERT_TASK_ztrsm( &options, ChamLeft, uplo, ChamNoTrans, diag, @@ -115,7 +115,7 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, A(k, n)); } for (n = k+1; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + tempnn = A->get_blkdim( A, n, DIM_n, A->n ); for (m = 0; m < k; m++) { INSERT_TASK_zgemm( &options, diff --git a/compute/pzunglq.c b/compute/pzunglq.c index d0da04302ab14f63c86f12179e4cf836720f268c..f10ae39f2839336bb37946678e20ed9a4e902e53 100644 --- a/compute/pzunglq.c +++ b/compute/pzunglq.c @@ -93,15 +93,15 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempAkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempAkn = A->get_blkdim( A, k, DIM_n, A->n ); tempkmin = chameleon_min( tempAkn, tempAkm ); - tempkn = k == Q->nt-1 ? Q->n-k*Q->nb : Q->nb; + tempkn = Q->get_blkdim( Q, k, DIM_n, Q->n ); for (n = Q->nt-1; n > k; n--) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); RUNTIME_data_migrate( sequence, Q(m, k), Q->get_rankof( Q, m, n ) ); @@ -121,7 +121,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T } if ( genD ) { - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, ChamUpper, tempkmin, tempDkn, @@ -136,7 +136,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T #endif } for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); /* Restore the original location of the tiles */ RUNTIME_data_migrate( sequence, Q(m, k), diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c index 8bd49dbeff65f872de70fb988b7032383610abd8..36f5b58ecdbc7c3c04d77ed7363edd743f0c245d 100644 --- a/compute/pzunglq_param.c +++ b/compute/pzunglq_param.c @@ -86,7 +86,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); /* Setting the order of the tiles*/ nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); @@ -95,7 +95,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t n = tiles[i]; p = qrtree->currpiv(qrtree, k, n); - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -108,7 +108,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t T = TT; } for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); node = Q->get_rankof( Q, m, n ); RUNTIME_data_migrate( sequence, Q(m, p), node ); @@ -131,11 +131,11 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + temppn = A->get_blkdim( A, p, DIM_n, A->n ); tempkmin = chameleon_min(tempkm, temppn); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, ChamUpper, tempkmin, tempDpn, @@ -150,7 +150,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t #endif } for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); RUNTIME_data_migrate( sequence, Q(m, p), Q->get_rankof( Q, m, p ) ); diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c index ec04ee0e79c0b3b70ffbb1b651154a6c392a16fa..85d184ae8fb4efd829481682183fc1bea40cf7e4 100644 --- a/compute/pzunglqrh.c +++ b/compute/pzunglqrh.c @@ -89,15 +89,15 @@ void chameleon_pzunglqrh( int genD, int BS, for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); lastRD = 0; for (RD = BS; RD < A->nt-k; RD *= 2) lastRD = RD; for (RD = lastRD; RD >= BS; RD /= 2) { for (N = k; N+RD < A->nt; N += 2*RD) { - tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb; + tempNRDn = A->get_blkdim( A, N+RD, DIM_n, A->n ); for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); node = Q->get_rankof( Q, m, N+RD ); RUNTIME_data_migrate( sequence, Q(m, N), node ); @@ -119,13 +119,13 @@ void chameleon_pzunglqrh( int genD, int BS, } } for (N = k; N < A->nt; N += BS) { - tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; + tempNn = A->get_blkdim( A, N, DIM_n, A->n ); tempkmin = chameleon_min(tempkm, tempNn); for (n = chameleon_min(N+BS, A->nt)-1; n > N; n--) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); node = Q->get_rankof( Q, m, n ); RUNTIME_data_migrate( sequence, Q(m, N), node ); @@ -147,7 +147,7 @@ void chameleon_pzunglqrh( int genD, int BS, } if ( genD ) { - int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb; + int tempDNn = D->get_blkdim( D, N, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -163,7 +163,7 @@ void chameleon_pzunglqrh( int genD, int BS, #endif } for (m = k; m < Q->mt; m++) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); RUNTIME_data_migrate( sequence, Q(m, N), Q->get_rankof( Q, m, N ) ); diff --git a/compute/pzungqr.c b/compute/pzungqr.c index ac5261230302338bc31233ebf677ee2c24c8bcba..f8f41adc10c6b709ad57ba135771f0f352a9bedc 100644 --- a/compute/pzungqr.c +++ b/compute/pzungqr.c @@ -93,14 +93,14 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempAkm = A->get_blkdim( A, k, DIM_m, A->m ); + tempAkn = A->get_blkdim( A, k, DIM_n, A->n ); tempkmin = chameleon_min( tempAkn, tempAkm ); - tempkm = k == Q->mt-1 ? Q->m-k*Q->mb : Q->mb; + tempkm = Q->get_blkdim( Q, k, DIM_m, Q->m ); for (m = Q->mt - 1; m > k; m--) { - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); RUNTIME_data_migrate( sequence, Q(k, n), Q->get_rankof( Q, m, n ) ); @@ -120,7 +120,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, } if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -136,7 +136,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, #endif } for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); /* Restore the original location of the tiles */ RUNTIME_data_migrate( sequence, Q(k, n), diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c index 5bf1a8f566509fc91b13d0b84b0d938659400392..9149ea3b8b41bd6b81747bcf460691b2a749887e 100644 --- a/compute/pzungqr_param.c +++ b/compute/pzungqr_param.c @@ -52,13 +52,13 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib, int tempmm, tempnn, tempkmin, tempkn; int nbgeqrt, node; - tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (i = nbtiles-1; i >= 0; i--) { m = tiles[i]; p = qrtree->currpiv( qrtree, k, m ); - tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m ); if( qrtree->gettype( qrtree, k, m ) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -77,7 +77,7 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib, } for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); node = Q->get_rankof( Q, m, n ); RUNTIME_data_migrate( sequence, Q(p, n), node ); @@ -108,11 +108,11 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib, continue; } - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); tempkmin = chameleon_min( tempmm, tempkn ); if ( genD ) { - int tempDmm = m == D->mt-1 ? D->m - m * D->mb : D->mb; + int tempDmm = D->get_blkdim( D, m, DIM_m, D->m ); INSERT_TASK_zlacpy( options, ChamLower, tempDmm, tempkmin, @@ -128,7 +128,7 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib, } for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); /* Restore the original location of the tiles */ RUNTIME_data_migrate( sequence, Q(m, n), diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index 3eaac991f5beba28ef97bb4f4a65293082905cf6..5d13e544d5e05b6b617dbf9f21c820812f693b8d 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -91,15 +91,15 @@ void chameleon_pzungqrrh( int genD, int BS, for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); lastRD = 0; for (RD = BS; RD < A->mt-k; RD *= 2) lastRD = RD; for (RD = lastRD; RD >= BS; RD /= 2) { for (M = k; M+RD < A->mt; M += 2*RD) { - tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb; + tempMRDm = A->get_blkdim( A, M+RD, DIM_m, A->m ); for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); node = Q->get_rankof( Q, M+RD, n ); RUNTIME_data_migrate( sequence, Q(M, n), node ); @@ -121,13 +121,13 @@ void chameleon_pzungqrrh( int genD, int BS, } } for (M = k; M < A->mt; M += BS) { - tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; + tempMm = A->get_blkdim( A, M, DIM_m, A->m ); tempkmin = chameleon_min(tempMm, tempkn); for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempmm = A->get_blkdim( A, m, DIM_m, A->m ); for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); node = Q->get_rankof( Q, m, n ); RUNTIME_data_migrate( sequence, Q(M, n), node ); @@ -148,7 +148,7 @@ void chameleon_pzungqrrh( int genD, int BS, } if ( genD ) { - int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb; + int tempDMm = D->get_blkdim( D, M, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, ChamLower, tempDMm, tempkmin, @@ -163,7 +163,7 @@ void chameleon_pzungqrrh( int genD, int BS, #endif } for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n ); /* Restore the original location of the tiles */ RUNTIME_data_migrate( sequence, Q(M, n), diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c index c1593bdab0f9a7c35bf89e0e30fa1e7209cf6989..d7043b08b03b5aeafe02362aebd89e35067b22ff 100644 --- a/compute/pzunmlq.c +++ b/compute/pzunmlq.c @@ -49,7 +49,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, int k, m, n; int tempkm, tempkn, tempkmin, tempmm, tempnn; - int ib, KT, K; + int ib, KT, K, DIM_k; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) { @@ -60,11 +60,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, ib = CHAMELEON_IB; if (A->m > A->n) { - KT = A->nt; - K = A->n; + KT = A->nt; + K = A->n; + DIM_k = DIM_n; } else { - KT = A->mt; - K = A->m; + KT = A->mt; + K = A->m; + DIM_k = DIM_m; } if ( D == NULL ) { @@ -100,12 +102,11 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; - + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); if ( genD ) { - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, ChamUpper, tempkmin, tempDkn, @@ -120,7 +121,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zunmlq( &options, side, trans, @@ -134,9 +135,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, RUNTIME_data_flush( sequence, T(k, k) ); for (m = k+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(k, n), C->get_rankof( C, m, n ) ); @@ -172,14 +173,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; - + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); for (m = C->mt-1; m > k; m--) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(k, n), C->get_rankof( C, m, n ) ); @@ -200,7 +200,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, } if ( genD ) { - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, ChamUpper, tempkmin, tempDkn, @@ -215,7 +215,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(k, n), C->get_rankof( C, k, n ) ); @@ -242,13 +242,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); for (n = C->nt-1; n > k; n--) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, k), C->get_rankof( C, m, n ) ); @@ -269,7 +269,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, } if ( genD ) { - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, ChamUpper, tempkmin, tempDkn, @@ -284,7 +284,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, #endif } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, k), C->get_rankof( C, m, k ) ); @@ -311,11 +311,11 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); if ( genD ) { - int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; + int tempDkn = D->get_blkdim( D, k, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -331,7 +331,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, #endif } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zunmlq( &options, side, trans, @@ -345,9 +345,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, RUNTIME_data_flush( sequence, T(k, k) ); for (n = k+1; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, k), C->get_rankof( C, m, n ) ); diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 94f73255fae1919344f054212a1c6c2b79c2d99b..80d9303fe6442e25a38f2909fb1e5660db833751 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -92,18 +92,18 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -119,7 +119,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zunmlq( &options, side, trans, temppm, tempnn, tempkmin, ib, T->nb, @@ -138,7 +138,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -151,7 +151,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, T = TT; } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -185,7 +185,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); /* Setting the order of the tiles*/ nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); @@ -194,7 +194,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -207,7 +207,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, T = TT; } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -229,12 +229,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -251,7 +251,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(p, n), C->get_rankof( C, p, n ) ); @@ -279,7 +279,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); /* Setting the order of the tiles*/ nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); @@ -288,7 +288,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, n = tiles[i]; p = qrtree->currpiv(qrtree, k, n); - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -302,7 +302,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -324,11 +324,11 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min( temppn, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -345,7 +345,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, p), C->get_rankof( C, m, p ) ); @@ -370,17 +370,17 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min( temppn, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -397,7 +397,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zunmlq( &options, side, trans, tempmm, temppn, tempkmin, ib, T->nb, @@ -416,7 +416,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, n = tiles[i]; p = qrtree->currpiv(qrtree, k, n); - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -430,7 +430,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c index cdf7dd9b8ee0dfb2c0489b607bda2de8c901c6f8..0d8608963e1767180d160356dbdf1cafdb35b20b 100644 --- a/compute/pzunmlqrh.c +++ b/compute/pzunmlqrh.c @@ -95,17 +95,17 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); for (p = k; p < C->mt; p += BS) { - temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -121,7 +121,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zunmlq( &options, side, trans, @@ -134,10 +134,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(k, p) ); for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -160,10 +160,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p+RD < C->mt; p += 2*RD) { m = p+RD; - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -200,7 +200,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); lastRD = 0; for (RD = BS; RD < C->mt-k; RD *= 2) @@ -209,10 +209,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p+RD < C->mt; p += 2*RD) { m = p+RD; - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -234,10 +234,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p < C->mt; p += BS) { for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -256,11 +256,11 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(k, m) ); } - temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -277,7 +277,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(p, n), C->get_rankof( C, p, n ) ); @@ -304,7 +304,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); lastRD = 0; for (RD = BS; RD < C->nt-k; RD *= 2) lastRD = RD; @@ -312,10 +312,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p+RD < C->nt; p += 2*RD) { n = p+RD; - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -338,10 +338,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (n = chameleon_min(p+BS, C->nt)-1; n > p; n--) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -360,11 +360,11 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(k, n) ); } - temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min( temppn, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -381,7 +381,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, p), C->get_rankof( C, m, p ) ); @@ -406,14 +406,14 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); for (p = k; p < C->nt; p += BS) { - temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min( temppn, tempkm ); if ( genD ) { - int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; + int tempDpn = D->get_blkdim( D, p, DIM_n, D->n ); INSERT_TASK_zlacpy( &options, @@ -430,7 +430,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zunmlq( &options, side, trans, tempmm, temppn, tempkmin, ib, T->nb, @@ -442,9 +442,9 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(k, p) ); for (n = p+1; n < chameleon_min(p+BS, C->nt); n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -466,10 +466,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (RD = BS; RD < C->nt-k; RD *= 2) { for (p = k; p+RD < C->nt; p += 2*RD) { n = p + RD; - tempnn = n == C->mt-1 ? C->m-n*C->mb : C->mb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c index 3275a87dbbdbeb37a0b82db7abf7cf0a11d86674..821b11d264fcd61966a49fca78c32e195108c219 100644 --- a/compute/pzunmqr.c +++ b/compute/pzunmqr.c @@ -49,7 +49,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, int k, m, n; int tempkm, tempkn, tempkmin, tempmm, tempnn; - int ib, KT, K; + int ib, KT, K, DIM_k; chamctxt = chameleon_context_self(); if (sequence->status != CHAMELEON_SUCCESS) { @@ -60,11 +60,13 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, ib = CHAMELEON_IB; if (A->m > A->n) { - KT = A->nt; - K = A->n; + KT = A->nt; + K = A->n; + DIM_k = DIM_n; } else { - KT = A->mt; - K = A->m; + KT = A->mt; + K = A->m; + DIM_k = DIM_m; } if ( D == NULL ) { @@ -100,12 +102,11 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; - + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -121,7 +122,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zunmqr( &options, side, trans, @@ -135,9 +136,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, RUNTIME_data_flush( sequence, T(k, k) ); for (m = k+1; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(k, n), C->get_rankof( C, m, n ) ); @@ -173,14 +174,13 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; - + tempkm = C->get_blkdim( C, k, DIM_m, C->m ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); for (m = C->mt-1; m > k; m--) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(k, n), C->get_rankof( C, m, n ) ); @@ -200,7 +200,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, } if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -216,7 +216,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(k, n), C->get_rankof( C, k, n ) ); @@ -243,13 +243,13 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); for (n = C->nt-1; n > k; n--) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, k), C->get_rankof( C, m, n ) ); @@ -270,7 +270,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, } if ( genD ) { - int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -286,7 +286,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, #endif } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, k), C->get_rankof( C, m, k ) ); @@ -313,11 +313,11 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb; - tempkmin = k == KT - 1 ? K - k * A->nb : A->nb; + tempkn = C->get_blkdim( C, k, DIM_n, C->n ); + tempkmin = A->get_blkdim( A, k, DIM_k, K ); if ( genD ) { - int tempDkm = k == D->mt - 1 ? D->m - k * D->mb : D->mb; + int tempDkm = D->get_blkdim( D, k, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -333,7 +333,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, #endif } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zunmqr( &options, side, trans, @@ -347,9 +347,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans, RUNTIME_data_flush( sequence, T(k, k) ); for (n = k+1; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, k), C->get_rankof( C, m, n ) ); diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index 47904aaaf165c8ed104b86b30b27b710531e19f4..72dd950a1e5e9b8848f9f88ceabfce4c08a6673a 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -92,18 +92,18 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt - 1 ? A->n - k * A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkn ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -119,7 +119,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zunmqr( &options, side, trans, temppm, tempnn, tempkmin, ib, T->nb, @@ -138,7 +138,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -151,7 +151,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, T = TT; } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -185,7 +185,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); /* Setting the order of the tiles*/ nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); @@ -194,7 +194,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -207,7 +207,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, T = TT; } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -229,12 +229,12 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkn ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -251,7 +251,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(p, n), C->get_rankof( C, p, n ) ); @@ -278,7 +278,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); /* Setting the order of the tiles*/ nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); @@ -287,7 +287,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, n = tiles[i]; p = qrtree->currpiv(qrtree, k, n); - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -301,7 +301,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -323,11 +323,11 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min(temppn, tempkn); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -344,7 +344,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, p), C->get_rankof( C, m, p ) ); @@ -369,17 +369,17 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); T = TS; for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { p = qrtree->getm(qrtree, k, i); - temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min( temppn, tempkn ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -396,7 +396,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zunmqr( &options, side, trans, tempmm, temppn, tempkmin, ib, T->nb, @@ -415,7 +415,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, n = tiles[i]; p = qrtree->currpiv(qrtree, k, n); - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) { /* TS kernel */ @@ -429,7 +429,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree, } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 8eda28189e83810f173e2634160905c8f5101661..be0f0e5bffd285cb5d03c33970b565475102ddfd 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -95,16 +95,16 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt - 1 ? A->n - k * A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (p = k; p < C->mt; p += BS) { - temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkn ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -120,7 +120,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans #endif } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); INSERT_TASK_zunmqr( &options, side, trans, @@ -133,10 +133,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(p, k) ); for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -159,10 +159,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p+RD < C->mt; p += 2*RD) { m = p+RD; - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -198,7 +198,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); lastRD = 0; for (RD = BS; RD < C->mt-k; RD *= 2) lastRD = RD; @@ -206,10 +206,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p+RD < C->mt; p += 2*RD) { m = p+RD; - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -231,10 +231,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p < C->mt; p += BS) { for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(p, n), node ); @@ -253,11 +253,11 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(m, k) ); } - temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb; + temppm = C->get_blkdim( C, p, DIM_m, C->m ); tempkmin = chameleon_min( temppm, tempkn ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -274,7 +274,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans } for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); RUNTIME_data_migrate( sequence, C(p, n), C->get_rankof( C, p, n ) ); @@ -301,7 +301,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = KT-1; k >= 0; k--) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); lastRD = 0; for (RD = BS; RD < C->nt-k; RD *= 2) @@ -310,10 +310,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (p = k; p+RD < C->nt; p += 2*RD) { n = p+RD; - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -336,10 +336,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (n = chameleon_min(p+BS, C->nt)-1; n > p; n--) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -358,11 +358,11 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(n, k) ); } - temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min( temppn, tempkn ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -379,7 +379,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); RUNTIME_data_migrate( sequence, C(m, p), C->get_rankof( C, m, p ) ); @@ -404,15 +404,15 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (k = 0; k < KT; k++) { RUNTIME_iteration_push(chamctxt, k); - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkn = A->get_blkdim( A, k, DIM_n, A->n ); for (p = k; p < C->nt; p += BS) { - temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb; + temppn = C->get_blkdim( C, p, DIM_n, C->n ); tempkmin = chameleon_min( temppn, tempkn ); if ( genD ) { - int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb; + int tempDpm = D->get_blkdim( D, p, DIM_m, D->m ); INSERT_TASK_zlacpy( &options, @@ -429,7 +429,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans } for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); INSERT_TASK_zunmqr( &options, side, trans, tempmm, temppn, tempkmin, ib, T->nb, @@ -441,9 +441,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans RUNTIME_data_flush( sequence, T(p, k) ); for (n = p+1; n < chameleon_min(p+BS, C->nt); n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); @@ -465,10 +465,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans for (RD = BS; RD < C->nt-k; RD *= 2) { for (p = k; p+RD < C->nt; p += 2*RD) { n = p + RD; - tempnn = n == C->mt-1 ? C->m-n*C->mb : C->mb; + tempnn = C->get_blkdim( C, n, DIM_n, C->n ); for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + tempmm = C->get_blkdim( C, m, DIM_m, C->m ); node = C->get_rankof( C, m, n ); RUNTIME_data_migrate( sequence, C(m, p), node ); diff --git a/compute/zgemm_batch.c b/compute/zgemm_batch.c index ab1ed4062062172c2d63b517485246763ac1e821..36d77960a50ee789516f82db85c672a4652a4ddb 100644 --- a/compute/zgemm_batch.c +++ b/compute/zgemm_batch.c @@ -61,13 +61,13 @@ zgemm_batch_cpu( void *op_args, tileC = va_arg(ap, CHAM_tile_t *); va_end(ap); - tempmm = m == descC->mt-1 ? descC->m - m * descC->mb : descC->mb; - tempnn = n == descC->nt-1 ? descC->n - n * descC->nb : descC->nb; + tempmm = descC->get_blkdim( descC, m, DIM_m, descC->m ); + tempnn = descC->get_blkdim( descC, n, DIM_n, descC->n ); if ( args->transA == ChamNoTrans ) { - tempkk = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb; + tempkk = descA->get_blkdim( descA, n, DIM_n, descA->n ); } else { - tempkk = m == descA->mt-1 ? descA->m - m * descA->mb : descA->mb; + tempkk = descA->get_blkdim( descA, m, DIM_m, descA->m ); } TCORE_zgemm( @@ -112,13 +112,13 @@ zgemm_batch_cuda( cublasHandle_t handle, void *op_args, tileC = va_arg(ap, CHAM_tile_t *); va_end(ap); - tempmm = m == descC->mt-1 ? descC->m - m * descC->mb : descC->mb; - tempnn = n == descC->nt-1 ? descC->n - n * descC->nb : descC->nb; + tempmm = descC->get_blkdim( descC, m, DIM_m, descC->m ); + tempnn = descC->get_blkdim( descC, n, DIM_n, descC->n ); if ( args->transA == ChamNoTrans ) { - tempkk = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb; + tempkk = descA->get_blkdim( descA, n, DIM_n, descA->n ); } else { - tempkk = m == descA->mt-1 ? descA->m - m * descA->mb : descA->mb; + tempkk = descA->get_blkdim( descA, m, DIM_m, descA->m ); } CUDA_zgemm( args->transA, args->transB, tempmm, tempnn, tempkk, diff --git a/compute/zherk_batch.c b/compute/zherk_batch.c index bff967e9f0afe0b4d2242ef8184a0574f3f96501..12eeda0823406811792995d49e0cd3fd842ce1cd 100644 --- a/compute/zherk_batch.c +++ b/compute/zherk_batch.c @@ -57,12 +57,12 @@ zherk_batch_cpu( void *op_args, tileC = va_arg(ap, CHAM_tile_t *); va_end(ap); - tempnn = n == descC->nt-1 ? descC->n - n * descC->nb : descC->nb; + tempnn = descC->get_blkdim( descC, n, DIM_n, descC->n ); if ( args->trans == ChamNoTrans ) { - tempkk = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb; + tempkk = descA->get_blkdim( descA, n, DIM_n, descA->n ); } else { - tempkk = m == descA->mt-1 ? descA->m - m * descA->mb : descA->mb; + tempkk = descA->get_blkdim( descA, m, DIM_m, descA->m ); } TCORE_zherk( @@ -102,12 +102,12 @@ zherk_batch_cuda( cublasHandle_t handle, void *op_args, tileC = va_arg(ap, CHAM_tile_t *); va_end(ap); - tempnn = n == descC->nt-1 ? descC->n - n * descC->nb : descC->nb; + tempnn = descC->get_blkdim( descC, n, DIM_n, descC->n ); if ( args->trans == ChamNoTrans ) { - tempkk = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb; + tempkk = descA->get_blkdim( descA, n, DIM_n, descA->n ); } else { - tempkk = m == descA->mt-1 ? descA->m - m * descA->mb : descA->mb; + tempkk = descA->get_blkdim( descA, m, DIM_m, descA->m ); } CUDA_zherk( args->uplo, args->trans, tempnn, tempkk, diff --git a/compute/zplghe_batch.c b/compute/zplghe_batch.c index 92a0a071ee77dbe136e91241a0226fe0e4d21f2f..e876df684909931eacd188099b862a3120b790cd 100644 --- a/compute/zplghe_batch.c +++ b/compute/zplghe_batch.c @@ -43,7 +43,7 @@ zplghe_batch_cpu( void *op_args, fprintf( stderr, "zplghe_batch_cpu: requires two pieces of data and %d have been given\n", ndata ); } - tempnn = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb; + tempnn = descA->get_blkdim( descA, n, DIM_n, descA->n ); /** * Let's fo the math to give the right bigM: diff --git a/compute/zpotrf_batch.c b/compute/zpotrf_batch.c index 983a9d92fa3ba72645b47d0dbe78dd1346ad59f2..dd6a3f440801d2efe6ceb867323aa1bdac2692dc 100644 --- a/compute/zpotrf_batch.c +++ b/compute/zpotrf_batch.c @@ -38,7 +38,7 @@ zpotrf_batch_cpu( void *op_args, fprintf( stderr, "zpotrf_batch_cpu: requires two pieces of data and %d have been given\n", ndata ); } - tempnn = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb; + tempnn = descA->get_blkdim( descA, n, DIM_n, descA->n ); TCORE_zpotrf( luplo, tempnn, tileA, &info ); diff --git a/compute/zprint.c b/compute/zprint.c index e17a406a1b5adcdf80ff90e2d49a87ec62c104eb..065beac235802f252590db1289b210e293da3bf4 100644 --- a/compute/zprint.c +++ b/compute/zprint.c @@ -34,8 +34,8 @@ zprint_cpu( void *op_args, struct zprint_args_s *options = (struct zprint_args_s *)op_args; CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA ); - int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; - int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; + int tempmm = descA->get_blkdim( descA, m, DIM_m, descA->m ); + int tempnn = descA->get_blkdim( descA, n, DIM_n, descA->n ); int lda = tileA->ld; if ( ndata > 1 ) { diff --git a/compute/ztrsm_batch.c b/compute/ztrsm_batch.c index 9d426737b3bc3678892e452173ae05cfe5a26298..ad8e855c1137ef69095f4638ada5dc5b66abd7b0 100644 --- a/compute/ztrsm_batch.c +++ b/compute/ztrsm_batch.c @@ -58,8 +58,8 @@ ztrsm_batch_cpu( void *op_args, tileB = va_arg(ap, CHAM_tile_t *); va_end(ap); - tempmm = m == descB->mt-1 ? descB->m - m * descB->mb : descB->mb; - tempnn = n == descB->nt-1 ? descB->n - n * descB->nb : descB->nb; + tempmm = descB->get_blkdim( descB, m, DIM_m, descB->m ); + tempnn = descB->get_blkdim( descB, n, DIM_n, descB->n ); TCORE_ztrsm( args->side, args->uplo, args->transA, args->diag, @@ -103,8 +103,8 @@ ztrsm_batch_cuda( cublasHandle_t handle, void *op_args, assert( tileA->format & CHAMELEON_TILE_FULLRANK ); assert( tileB->format & CHAMELEON_TILE_FULLRANK ); - tempmm = m == descB->mt-1 ? descB->m - m * descB->mb : descB->mb; - tempnn = n == descB->nt-1 ? descB->n - n * descB->nb : descB->nb; + tempmm = descB->get_blkdim( descB, m, DIM_m, descB->m ); + tempnn = descB->get_blkdim( descB, n, DIM_n, descB->n ); CUDA_ztrsm( args->side, args->uplo, args->transA, args->diag, diff --git a/control/descriptor.c b/control/descriptor.c index ff2732b2ccd670b250039ec9bdc899043964c589..4a1f2ef0073d1d8b8ece092ac856809b9031a4ed 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -222,6 +222,7 @@ int chameleon_desc_init_internal( CHAM_desc_t *desc, const char *name, void *mat /* If one of the function get_* is NULL, we switch back to the default */ desc->get_blktile = chameleon_desc_gettile; + desc->get_blkdim = chameleon_getblkdim; /* Data addresses */ if ( get_blkaddr ) { diff --git a/control/descriptor_rec.c b/control/descriptor_rec.c index 65d4f6148ccd279daa50178c4ea5a7b70ded3a5e..fed12a95ba7cd59aebd80063525dcb7e8be5c344 100644 --- a/control/descriptor_rec.c +++ b/control/descriptor_rec.c @@ -73,8 +73,8 @@ chameleon_recdesc_create( const char *name, CHAM_desc_t **descptr, void *mat, ch char *subname; tile = desc->get_blktile( desc, m, n ); - tempmm = m == desc->mt-1 ? desc->m - m * desc->mb : desc->mb; - tempnn = n == desc->nt-1 ? desc->n - n * desc->nb : desc->nb; + tempmm = desc->get_blkdim( desc, m, DIM_m, desc->m ); + tempnn = desc->get_blkdim( desc, n, DIM_n, desc->n ); chameleon_asprintf( &subname, "%s[%d,%d]", name, m, n ); diff --git a/example/lapack_to_chameleon/step7.h b/example/lapack_to_chameleon/step7.h index 7622f5ddbbe669e4cdf2810d761c0aa74181f2fb..53331515b35d2ca9caa65ba202d1c063439ea322 100644 --- a/example/lapack_to_chameleon/step7.h +++ b/example/lapack_to_chameleon/step7.h @@ -83,8 +83,8 @@ static int Cham_build_plgsy_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int tempmm, tempnn; /* Get the dimension of the tile */ - tempmm = (m == (descA->mt-1)) ? (descA->m - m * descA->mb) : descA->mb; - tempnn = (n == (descA->nt-1)) ? (descA->n - n * descA->nb) : descA->nb; + tempmm = descA->get_blkdim( descA, m, DIM_m, descA->m ); + tempnn = descA->get_blkdim( descA, n, DIM_n, descA->n ); /* fill the tile with the coreblas function plgsy = random SPD matrix generator */ TCORE_dplgsy( data->bump, tempmm, tempnn, tileA, @@ -101,8 +101,8 @@ static int Cham_build_plrnt_cpu( void *op_args, cham_uplo_t uplo, int m, int n, int tempmm, tempnn; /* Get the dimension of the tile */ - tempmm = (m == (descA->mt-1)) ? (descA->m - m * descA->mb) : descA->mb; - tempnn = (n == (descA->nt-1)) ? (descA->n - n * descA->nb) : descA->nb; + tempmm = descA->get_blkdim( descA, m, DIM_m, descA->m ); + tempnn = descA->get_blkdim( descA, n, DIM_n, descA->n ); /* fill the tile with the coreblas function plrnt = random general matrix generator */ TCORE_dplrnt( tempmm, tempnn, tileA, diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index d3cfb96208b9478c878d20b987dca3305e4bd7c6..09bfc942f5fc1d2e829616b61b79b878324bf61c 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -113,6 +113,14 @@ typedef enum chameleon_flttype_e { #define ChamConvertRealHalfToSingle ChamConvert( ChamRealHalf, ChamRealSingle ) #define ChamConvertRealHalfToHalf ChamConvert( ChamRealHalf, ChamRealHalf ) +/** + * @brief Matrix dimensions naming + */ +typedef enum chameleon_dim_e { + DIM_m = 0, + DIM_n = 1, +} cham_dim_t; + /** * @brief Matrix tile storage */ diff --git a/include/chameleon/descriptor_helpers.h b/include/chameleon/descriptor_helpers.h index 7bfdeb77ba565c2da0f9668f5d7d347e6e44112c..9e60ef27dda1d76e3af3c8f5fe3a2b427ae719c5 100644 --- a/include/chameleon/descriptor_helpers.h +++ b/include/chameleon/descriptor_helpers.h @@ -88,8 +88,92 @@ int chameleon_getblkldd_cm ( const CHAM_desc_t *A, int m ); int chameleon_getblkldd_ccrb( const CHAM_desc_t *A, int m ); /** * @} + * @name Tile dimensions computation in algorithms + * @{ */ +/** + * + * @ingroup Descriptor + * + * @brief Return tile dimension along the m dimension with regular tile sizes + * + * @param[in] A + * The chameleon descriptor for which to compute the size + * + * @param[in] m + * The row index of the tile + * + * @param[in] lm + * The matrix row dimension against which to compute the size + * + * @retval The dimension of the tile along the row/first dimension with a limit on lm + * + */ +static inline int +chameleon_getblkdim_m( const CHAM_desc_t *A, int m, int lm ) +{ + return (((m + 1) * A->mb) > lm ) ? lm - m * A->mb : A->mb; +} + +/** + * + * @ingroup Descriptor + * + * @brief Return tile dimension along the n dimension with regular tile sizes + * + * @param[in] A + * The chameleon descriptor for which to compute the size + * + * @param[in] n + * The column index of the tile + * + * @param[in] ln + * The matrix column dimension against which to compute the size + * + * @retval The dimension of the tile along the column/second dimension with a limit on ln + * + */ +static inline int +chameleon_getblkdim_n( const CHAM_desc_t *A, int n, int ln ) +{ + return (((n + 1) * A->nb) > ln ) ? ln - n * A->nb : A->nb; +} + +/** + * + * @ingroup Descriptor + * + * @brief Return tile dimension along the dim dimension with regular tile sizes + * + * @param[in] A + * The chameleon descriptor for which to compute the size + * + * @param[in] m + * The index of the tile in the given dimension + * + * @param[in] dim + * The dimension on which to compute the size + * + * @param[in] lm + * The matrix dimension along the chosen dim. + * + * @retval The dimension of the tile along the dim dimension with a limit on lm + * + */ +static inline int +chameleon_getblkdim( const CHAM_desc_t *A, int m, cham_dim_t dim, int lm ) +{ + if ( dim == 0 ) { + return chameleon_getblkdim_m( A, m, lm ); + } + else { + return chameleon_getblkdim_n( A, m, lm ); + } +} +/** + * @} + */ #ifdef __cplusplus } #endif diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index e61f95b18ba8a9e2f5c204bc1aff1428e24e6620..d369325007052f3d65ea4ed33033f49f8aac0e45 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -80,6 +80,7 @@ typedef struct chameleon_desc_s CHAM_desc_t; typedef void* (*blkaddr_fct_t) ( const CHAM_desc_t*, int, int ); typedef int (*blkldd_fct_t) ( const CHAM_desc_t*, int ); +typedef int (*blkdim_fct_t) ( const CHAM_desc_t*, int, cham_dim_t, int ); typedef int (*blkrankof_fct_t) ( const CHAM_desc_t*, int, int ); typedef int (*datadist_access_fct_t)( const CHAM_desc_t*, int, ... ); typedef CHAM_tile_t* (*blktile_fct_t) ( const CHAM_desc_t*, int, int ); @@ -116,11 +117,12 @@ void chameleon_desc_set_datadist( CHAM_desc_t *to, cham_data_dist_t *from ); struct chameleon_desc_s { const char *name; - blktile_fct_t get_blktile; /**> function to get chameleon tiles address */ - blkaddr_fct_t get_blkaddr; /**> function to get chameleon tiles address */ - blkldd_fct_t get_blkldd; /**> function to get chameleon tiles leading dimension */ - blkrankof_fct_t get_rankof; /**> function to get chameleon tiles MPI rank */ - blkrankof_fct_t get_rankof_init; /**> function to get chameleon tiles MPI rank */ + blktile_fct_t get_blktile; /**> function to get chameleon tiles address */ + blkaddr_fct_t get_blkaddr; /**> function to get chameleon tiles address */ + blkldd_fct_t get_blkldd; /**> function to get chameleon tiles leading dimension */ + blkdim_fct_t get_blkdim; /**> function to get chameleon tiles dimension within algorithms */ + blkrankof_fct_t get_rankof; /**> function to get chameleon tiles MPI rank */ + blkrankof_fct_t get_rankof_init; /**> function to get chameleon tiles MPI rank */ void* get_rankof_init_arg; CHAM_tile_t *tiles; /**> pointer to the array of tiles descriptors */