Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Showing with 294 additions and 277 deletions
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zungqr parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief
......@@ -22,7 +22,7 @@
* @author Florent Pruvost
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -93,14 +93,14 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
for (k = minMT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempAkm = A->get_blkdim( A, k, DIM_m, A->m );
tempAkn = A->get_blkdim( A, k, DIM_n, A->n );
tempkmin = chameleon_min( tempAkn, tempAkm );
tempkm = k == Q->mt-1 ? Q->m-k*Q->mb : Q->mb;
tempkm = Q->get_blkdim( Q, k, DIM_m, Q->m );
for (m = Q->mt - 1; m > k; m--) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
RUNTIME_data_migrate( sequence, Q(k, n),
Q->get_rankof( Q, m, n ) );
......@@ -120,7 +120,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
}
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -136,7 +136,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
#endif
}
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
/* Restore the original location of the tiles */
RUNTIME_data_migrate( sequence, Q(k, n),
......
......@@ -4,18 +4,18 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zungqr_param parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -52,13 +52,13 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib,
int tempmm, tempnn, tempkmin, tempkn;
int nbgeqrt, node;
tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (i = nbtiles-1; i >= 0; i--) {
m = tiles[i];
p = qrtree->currpiv( qrtree, k, m );
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
if( qrtree->gettype( qrtree, k, m ) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -77,7 +77,7 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib,
}
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
node = Q->get_rankof( Q, m, n );
RUNTIME_data_migrate( sequence, Q(p, n), node );
......@@ -108,11 +108,11 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib,
continue;
}
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
tempkmin = chameleon_min( tempmm, tempkn );
if ( genD ) {
int tempDmm = m == D->mt-1 ? D->m - m * D->mb : D->mb;
int tempDmm = D->get_blkdim( D, m, DIM_m, D->m );
INSERT_TASK_zlacpy(
options,
ChamLower, tempDmm, tempkmin,
......@@ -128,7 +128,7 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib,
}
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
/* Restore the original location of the tiles */
RUNTIME_data_migrate( sequence, Q(m, n),
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zungqrrh parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief
......@@ -23,7 +23,7 @@
* @author Florent Pruvost
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -91,15 +91,15 @@ void chameleon_pzungqrrh( int genD, int BS,
for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
lastRD = 0;
for (RD = BS; RD < A->mt-k; RD *= 2)
lastRD = RD;
for (RD = lastRD; RD >= BS; RD /= 2) {
for (M = k; M+RD < A->mt; M += 2*RD) {
tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
tempMRDm = A->get_blkdim( A, M+RD, DIM_m, A->m );
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
node = Q->get_rankof( Q, M+RD, n );
RUNTIME_data_migrate( sequence, Q(M, n), node );
......@@ -121,13 +121,13 @@ void chameleon_pzungqrrh( int genD, int BS,
}
}
for (M = k; M < A->mt; M += BS) {
tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
tempMm = A->get_blkdim( A, M, DIM_m, A->m );
tempkmin = chameleon_min(tempMm, tempkn);
for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
node = Q->get_rankof( Q, m, n );
RUNTIME_data_migrate( sequence, Q(M, n), node );
......@@ -148,7 +148,7 @@ void chameleon_pzungqrrh( int genD, int BS,
}
if ( genD ) {
int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
int tempDMm = D->get_blkdim( D, M, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
ChamLower, tempDMm, tempkmin,
......@@ -163,7 +163,7 @@ void chameleon_pzungqrrh( int genD, int BS,
#endif
}
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
/* Restore the original location of the tiles */
RUNTIME_data_migrate( sequence, Q(M, n),
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zunmlq parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief
......@@ -24,7 +24,7 @@
* @author Raphael Boucherie
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -49,7 +49,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
int k, m, n;
int tempkm, tempkn, tempkmin, tempmm, tempnn;
int ib, KT, K;
int ib, KT, K, DIM_k;
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) {
......@@ -60,11 +60,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
ib = CHAMELEON_IB;
if (A->m > A->n) {
KT = A->nt;
K = A->n;
KT = A->nt;
K = A->n;
DIM_k = DIM_n;
} else {
KT = A->mt;
K = A->m;
KT = A->mt;
K = A->m;
DIM_k = DIM_m;
}
if ( D == NULL ) {
......@@ -100,12 +102,11 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkm = C->get_blkdim( C, k, DIM_m, C->m );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
if ( genD ) {
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
ChamUpper, tempkmin, tempDkn,
......@@ -120,7 +121,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zunmlq(
&options,
side, trans,
......@@ -134,9 +135,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
RUNTIME_data_flush( sequence, T(k, k) );
for (m = k+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(k, n),
C->get_rankof( C, m, n ) );
......@@ -172,14 +173,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkm = C->get_blkdim( C, k, DIM_m, C->m );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
for (m = C->mt-1; m > k; m--) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(k, n),
C->get_rankof( C, m, n ) );
......@@ -200,7 +200,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
}
if ( genD ) {
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
ChamUpper, tempkmin, tempDkn,
......@@ -215,7 +215,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(k, n),
C->get_rankof( C, k, n ) );
......@@ -242,13 +242,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkn = C->get_blkdim( C, k, DIM_n, C->n );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
for (n = C->nt-1; n > k; n--) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, k),
C->get_rankof( C, m, n ) );
......@@ -269,7 +269,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
}
if ( genD ) {
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
ChamUpper, tempkmin, tempDkn,
......@@ -284,7 +284,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, k),
C->get_rankof( C, m, k ) );
......@@ -311,11 +311,11 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkn = C->get_blkdim( C, k, DIM_n, C->n );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
if ( genD ) {
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -331,7 +331,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zunmlq(
&options,
side, trans,
......@@ -345,9 +345,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
RUNTIME_data_flush( sequence, T(k, k) );
for (n = k+1; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, k),
C->get_rankof( C, m, n ) );
......
......@@ -4,18 +4,18 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zunmlq_param parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -92,18 +92,18 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -119,7 +119,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zunmlq(
&options, side, trans,
temppm, tempnn, tempkmin, ib, T->nb,
......@@ -138,7 +138,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
m = tiles[i];
p = qrtree->currpiv(qrtree, k, m);
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -151,7 +151,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
T = TT;
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -185,7 +185,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
/* Setting the order of the tiles*/
nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
......@@ -194,7 +194,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
m = tiles[i];
p = qrtree->currpiv(qrtree, k, m);
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -207,7 +207,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
T = TT;
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -229,12 +229,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -251,7 +251,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(p, n),
C->get_rankof( C, p, n ) );
......@@ -279,7 +279,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
/* Setting the order of the tiles*/
nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
......@@ -288,7 +288,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
n = tiles[i];
p = qrtree->currpiv(qrtree, k, n);
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -302,7 +302,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -324,11 +324,11 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min( temppn, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -345,7 +345,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, p),
C->get_rankof( C, m, p ) );
......@@ -370,17 +370,17 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min( temppn, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -397,7 +397,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zunmlq(
&options, side, trans,
tempmm, temppn, tempkmin, ib, T->nb,
......@@ -416,7 +416,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
n = tiles[i];
p = qrtree->currpiv(qrtree, k, n);
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -430,7 +430,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zunmlqrh parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief
......@@ -23,7 +23,7 @@
* @author Florent Pruvost
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -95,17 +95,17 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
for (p = k; p < C->mt; p += BS) {
temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -121,7 +121,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zunmlq(
&options,
side, trans,
......@@ -134,10 +134,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(k, p) );
for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -160,10 +160,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p+RD < C->mt; p += 2*RD) {
m = p+RD;
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -200,7 +200,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
lastRD = 0;
for (RD = BS; RD < C->mt-k; RD *= 2)
......@@ -209,10 +209,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p+RD < C->mt; p += 2*RD) {
m = p+RD;
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -234,10 +234,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p < C->mt; p += BS) {
for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -256,11 +256,11 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(k, m) );
}
temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -277,7 +277,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(p, n),
C->get_rankof( C, p, n ) );
......@@ -304,7 +304,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
lastRD = 0;
for (RD = BS; RD < C->nt-k; RD *= 2)
lastRD = RD;
......@@ -312,10 +312,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p+RD < C->nt; p += 2*RD) {
n = p+RD;
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -338,10 +338,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (n = chameleon_min(p+BS, C->nt)-1; n > p; n--) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -360,11 +360,11 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(k, n) );
}
temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min( temppn, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -381,7 +381,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, p),
C->get_rankof( C, m, p ) );
......@@ -406,14 +406,14 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
for (p = k; p < C->nt; p += BS) {
temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min( temppn, tempkm );
if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -430,7 +430,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zunmlq(
&options, side, trans,
tempmm, temppn, tempkmin, ib, T->nb,
......@@ -442,9 +442,9 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(k, p) );
for (n = p+1; n < chameleon_min(p+BS, C->nt); n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -466,10 +466,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (RD = BS; RD < C->nt-k; RD *= 2) {
for (p = k; p+RD < C->nt; p += 2*RD) {
n = p + RD;
tempnn = n == C->mt-1 ? C->m-n*C->mb : C->mb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zunmqr parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief
......@@ -24,7 +24,7 @@
* @author Raphael Boucherie
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -49,7 +49,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
int k, m, n;
int tempkm, tempkn, tempkmin, tempmm, tempnn;
int ib, KT, K;
int ib, KT, K, DIM_k;
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) {
......@@ -60,11 +60,13 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
ib = CHAMELEON_IB;
if (A->m > A->n) {
KT = A->nt;
K = A->n;
KT = A->nt;
K = A->n;
DIM_k = DIM_n;
} else {
KT = A->mt;
K = A->m;
KT = A->mt;
K = A->m;
DIM_k = DIM_m;
}
if ( D == NULL ) {
......@@ -100,12 +102,11 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkm = C->get_blkdim( C, k, DIM_m, C->m );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -121,7 +122,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zunmqr(
&options,
side, trans,
......@@ -135,9 +136,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
RUNTIME_data_flush( sequence, T(k, k) );
for (m = k+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(k, n),
C->get_rankof( C, m, n ) );
......@@ -173,14 +174,13 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkm = C->get_blkdim( C, k, DIM_m, C->m );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
for (m = C->mt-1; m > k; m--) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(k, n),
C->get_rankof( C, m, n ) );
......@@ -200,7 +200,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
}
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -216,7 +216,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(k, n),
C->get_rankof( C, k, n ) );
......@@ -243,13 +243,13 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkn = C->get_blkdim( C, k, DIM_n, C->n );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
for (n = C->nt-1; n > k; n--) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, k),
C->get_rankof( C, m, n ) );
......@@ -270,7 +270,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
}
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -286,7 +286,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, k),
C->get_rankof( C, m, k ) );
......@@ -313,11 +313,11 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
tempkmin = k == KT - 1 ? K - k * A->nb : A->nb;
tempkn = C->get_blkdim( C, k, DIM_n, C->n );
tempkmin = A->get_blkdim( A, k, DIM_k, K );
if ( genD ) {
int tempDkm = k == D->mt - 1 ? D->m - k * D->mb : D->mb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -333,7 +333,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
#endif
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zunmqr(
&options,
side, trans,
......@@ -347,9 +347,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
RUNTIME_data_flush( sequence, T(k, k) );
for (n = k+1; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, k),
C->get_rankof( C, m, n ) );
......
......@@ -4,18 +4,18 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zunmqr_param parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -92,18 +92,18 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt - 1 ? A->n - k * A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkn );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -119,7 +119,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zunmqr(
&options, side, trans,
temppm, tempnn, tempkmin, ib, T->nb,
......@@ -138,7 +138,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
m = tiles[i];
p = qrtree->currpiv(qrtree, k, m);
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -151,7 +151,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
T = TT;
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -185,7 +185,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
/* Setting the order of the tiles*/
nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
......@@ -194,7 +194,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
m = tiles[i];
p = qrtree->currpiv(qrtree, k, m);
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -207,7 +207,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
T = TT;
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -229,12 +229,12 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkn );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -251,7 +251,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(p, n),
C->get_rankof( C, p, n ) );
......@@ -278,7 +278,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
/* Setting the order of the tiles*/
nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
......@@ -287,7 +287,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
n = tiles[i];
p = qrtree->currpiv(qrtree, k, n);
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -301,7 +301,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -323,11 +323,11 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min(temppn, tempkn);
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -344,7 +344,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, p),
C->get_rankof( C, m, p ) );
......@@ -369,17 +369,17 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
T = TS;
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min( temppn, tempkn );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -396,7 +396,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zunmqr(
&options, side, trans,
tempmm, temppn, tempkmin, ib, T->nb,
......@@ -415,7 +415,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
n = tiles[i];
p = qrtree->currpiv(qrtree, k, n);
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -429,7 +429,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zunmqrrh parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief
......@@ -23,7 +23,7 @@
* @author Florent Pruvost
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -95,16 +95,16 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt - 1 ? A->n - k * A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (p = k; p < C->mt; p += BS) {
temppm = p == C->mt-1 ? C->m - p * C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkn );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -120,7 +120,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
#endif
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zunmqr(
&options,
side, trans,
......@@ -133,10 +133,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(p, k) );
for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -159,10 +159,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p+RD < C->mt; p += 2*RD) {
m = p+RD;
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -198,7 +198,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
lastRD = 0;
for (RD = BS; RD < C->mt-k; RD *= 2)
lastRD = RD;
......@@ -206,10 +206,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p+RD < C->mt; p += 2*RD) {
m = p+RD;
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -231,10 +231,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p < C->mt; p += BS) {
for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(p, n), node );
......@@ -253,11 +253,11 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(m, k) );
}
temppm = p == C->mt-1 ? C->m-p*C->mb : C->mb;
temppm = C->get_blkdim( C, p, DIM_m, C->m );
tempkmin = chameleon_min( temppm, tempkn );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -274,7 +274,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
RUNTIME_data_migrate( sequence, C(p, n),
C->get_rankof( C, p, n ) );
......@@ -301,7 +301,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
lastRD = 0;
for (RD = BS; RD < C->nt-k; RD *= 2)
......@@ -310,10 +310,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (p = k; p+RD < C->nt; p += 2*RD) {
n = p+RD;
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -336,10 +336,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (n = chameleon_min(p+BS, C->nt)-1; n > p; n--) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -358,11 +358,11 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(n, k) );
}
temppn = p == C->nt-1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min( temppn, tempkn );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -379,7 +379,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
RUNTIME_data_migrate( sequence, C(m, p),
C->get_rankof( C, m, p ) );
......@@ -404,15 +404,15 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (k = 0; k < KT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (p = k; p < C->nt; p += BS) {
temppn = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
temppn = C->get_blkdim( C, p, DIM_n, C->n );
tempkmin = chameleon_min( temppn, tempkn );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
INSERT_TASK_zlacpy(
&options,
......@@ -429,7 +429,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zunmqr(
&options, side, trans,
tempmm, temppn, tempkmin, ib, T->nb,
......@@ -441,9 +441,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
RUNTIME_data_flush( sequence, T(p, k) );
for (n = p+1; n < chameleon_min(p+BS, C->nt); n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......@@ -465,10 +465,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
for (RD = BS; RD < C->nt-k; RD *= 2) {
for (p = k; p+RD < C->nt; p += 2*RD) {
n = p + RD;
tempnn = n == C->mt-1 ? C->m-n*C->mb : C->mb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
node = C->get_rankof( C, m, n );
RUNTIME_data_migrate( sequence, C(m, p), node );
......
......@@ -4,12 +4,12 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @brief Chameleon zbuild wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
......@@ -17,7 +17,7 @@
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @author Florent Pruvost
* @date 2022-02-22
* @date 2024-03-14
* @precisions normal z -> s d c
*
*/
......
......@@ -2,7 +2,7 @@
*
* @file zcesca.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -12,7 +12,8 @@
* @version 1.3.0
* @author Florent Pruvost
* @author Lionel Eyraud-Dubois
* @date 2023-07-05
* @author Pierre Esterie
* @date 2024-11-13
* @precisions normal z -> s d c z
*
*/
......@@ -55,37 +56,47 @@ void *CHAMELEON_zcesca_WS_Alloc( const CHAM_desc_t *A )
options = calloc( 1, sizeof(struct chameleon_pzcesca_s) );
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) );
worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) );
chameleon_desc_init( &(options->Wgcol), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, 1, A->nb, A->nb,
workmt, A->n, 0, 0,
workmt, A->n, A->p, A->q,
workmt, A->n,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &(options->Wgrow), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, A->mb, 1, A->mb,
A->m, worknt, 0, 0,
A->m, worknt, A->p, A->q,
A->m, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &(options->Wgelt), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, 1, 1, 1,
1, worknt, 0, 0,
1, worknt, A->p, A->q,
1, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &(options->Wdcol), CHAMELEON_MAT_ALLOC_TILE,
ChamRealDouble, 2, A->nb, 2*A->nb,
2*workmt, A->n, 0, 0,
2*workmt, A->n, A->p, A->q,
2*workmt, A->n,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &(options->Wdrow), CHAMELEON_MAT_ALLOC_TILE,
ChamRealDouble, A->mb, 2, 2*A->mb,
A->m, 2*worknt, 0, 0,
A->m, 2*worknt, A->p, A->q,
A->m, 2*worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
return (void*)options;
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqf wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -21,7 +21,7 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,17 +4,17 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqf_param wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqs wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -20,7 +20,7 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,17 +4,17 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqs_param wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgels wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
......@@ -19,7 +19,7 @@
* @author Florent Pruvost
* @author Raphael Boucherie
* @author Alycia Lisito
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,18 +4,18 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgels_param wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @author Alycia Lisito
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -19,7 +19,8 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Lionel Eyraud-Dubois
* @date 2023-07-05
* @author Pierre Esterie
* @date 2024-11-13
* @precisions normal z -> s d c
*
*/
......@@ -110,7 +111,7 @@ void *CHAMELEON_zgemm_WS_Alloc( cham_trans_t transA __attribute__((unused)
* If only one process, or if generic has been globally enforced, we switch
* to generic immediately.
*/
if ( ((C->p == 1) && (C->q == 1)) ||
if ( ((chameleon_desc_datadist_get_iparam(C, 0) == 1) && (chameleon_desc_datadist_get_iparam(C, 1) == 1)) ||
(chamctxt->generic_enabled == CHAMELEON_TRUE) )
{
options->alg = ChamGemmAlgGeneric;
......@@ -151,9 +152,9 @@ void *CHAMELEON_zgemm_WS_Alloc( cham_trans_t transA __attribute__((unused)
double ratio = 1.5; /* Arbitrary ratio to give more weight to writes wrt reads. */
/* Compute the average array per node for each matrix */
sizeA = ((double)A->m * (double)A->n) / (double)(A->p * A->q);
sizeB = ((double)B->m * (double)B->n) / (double)(B->p * B->q);
sizeC = ((double)C->m * (double)C->n) / (double)(C->p * C->q) * ratio;
sizeA = ((double)A->m * (double)A->n) / (double)(chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1));
sizeB = ((double)B->m * (double)B->n) / (double)(chameleon_desc_datadist_get_iparam(B, 0) * chameleon_desc_datadist_get_iparam(B, 1));
sizeC = ((double)C->m * (double)C->n) / (double)(chameleon_desc_datadist_get_iparam(C, 0) * chameleon_desc_datadist_get_iparam(C, 1)) * ratio;
options->alg = ChamGemmAlgGeneric;
if ( (sizeC > sizeA) && (sizeC > sizeB) )
......@@ -192,13 +193,17 @@ void *CHAMELEON_zgemm_WS_Alloc( cham_trans_t transA __attribute__((unused)
chameleon_desc_init( &(options->WA), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mt * C->mb, C->nb * C->q * lookahead, 0, 0,
C->mt * C->mb, C->nb * C->q * lookahead, C->p, C->q,
C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead, 0, 0,
C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1) * lookahead,
chameleon_desc_datadist_get_iparam(C, 0),
chameleon_desc_datadist_get_iparam(C, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &(options->WB), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mb * C->p * lookahead, C->nt * C->nb, 0, 0,
C->mb * C->p * lookahead, C->nt * C->nb, C->p, C->q,
C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb, 0, 0,
C->mb * chameleon_desc_datadist_get_iparam(C, 0) * lookahead, C->nt * C->nb,
chameleon_desc_datadist_get_iparam(C, 0),
chameleon_desc_datadist_get_iparam(C, 1),
NULL, NULL, NULL, NULL );
}
......
......@@ -2,7 +2,7 @@
*
* @file zgemm_batch.c
*
* @copyright 2019-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2019-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -11,7 +11,7 @@
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2024-04-03
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -61,13 +61,13 @@ zgemm_batch_cpu( void *op_args,
tileC = va_arg(ap, CHAM_tile_t *);
va_end(ap);
tempmm = m == descC->mt-1 ? descC->m - m * descC->mb : descC->mb;
tempnn = n == descC->nt-1 ? descC->n - n * descC->nb : descC->nb;
tempmm = descC->get_blkdim( descC, m, DIM_m, descC->m );
tempnn = descC->get_blkdim( descC, n, DIM_n, descC->n );
if ( args->transA == ChamNoTrans ) {
tempkk = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb;
tempkk = descA->get_blkdim( descA, n, DIM_n, descA->n );
}
else {
tempkk = m == descA->mt-1 ? descA->m - m * descA->mb : descA->mb;
tempkk = descA->get_blkdim( descA, m, DIM_m, descA->m );
}
TCORE_zgemm(
......@@ -112,13 +112,13 @@ zgemm_batch_cuda( cublasHandle_t handle, void *op_args,
tileC = va_arg(ap, CHAM_tile_t *);
va_end(ap);
tempmm = m == descC->mt-1 ? descC->m - m * descC->mb : descC->mb;
tempnn = n == descC->nt-1 ? descC->n - n * descC->nb : descC->nb;
tempmm = descC->get_blkdim( descC, m, DIM_m, descC->m );
tempnn = descC->get_blkdim( descC, n, DIM_n, descC->n );
if ( args->transA == ChamNoTrans ) {
tempkk = n == descA->nt-1 ? descA->n - n * descA->nb : descA->nb;
tempkk = descA->get_blkdim( descA, n, DIM_n, descA->n );
}
else {
tempkk = m == descA->mt-1 ? descA->m - m * descA->mb : descA->mb;
tempkk = descA->get_blkdim( descA, m, DIM_m, descA->m );
}
CUDA_zgemm( args->transA, args->transB, tempmm, tempnn, tempkk,
......@@ -139,10 +139,11 @@ zgemm_batch_cuda( cublasHandle_t handle, void *op_args,
#endif
static cham_map_operator_t zgemm_batch_map = {
.name = "zgemm",
.cpufunc = zgemm_batch_cpu,
.cudafunc = zgemm_batch_cuda,
.hipfunc = NULL,
.name = "zgemm",
.cpufunc = zgemm_batch_cpu,
.cudafunc = zgemm_batch_cuda,
.hipfunc = NULL,
.synchronous = 0,
};
/**
......