Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Showing with 931 additions and 354 deletions
...@@ -2,17 +2,18 @@ ...@@ -2,17 +2,18 @@
* *
* @file pzgram.c * @file pzgram.c
* *
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zgram parallel algorithm * @brief Chameleon zgram parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c z * @precisions normal z -> s d c z
* *
*/ */
...@@ -33,8 +34,8 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -33,8 +34,8 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
int NT = A->nt; int NT = A->nt;
int M = A->m; int M = A->m;
int N = A->n; int N = A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* 1) compute (scl,ssq) over columns in each tile * 1) compute (scl,ssq) over columns in each tile
...@@ -42,10 +43,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -42,10 +43,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
for(n = 0; n < NT; n++) { for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0; int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
for(m = mmin; m < mmax; m++) { for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
if ( n == m ) { if ( n == m ) {
INSERT_TASK_dsyssq( INSERT_TASK_dsyssq(
...@@ -66,7 +67,7 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -66,7 +67,7 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
} }
for(n = 0; n < NT; n++) { for(n = 0; n < NT; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
/** /**
* 2) reduce columns (scl,ssq) tiles per processus (between lines) * 2) reduce columns (scl,ssq) tiles per processus (between lines)
...@@ -116,10 +117,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -116,10 +117,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
for(n = 0; n < NT; n++) { for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0; int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
for(m = mmin; m < mmax; m++) { for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
INSERT_TASK_zgram( INSERT_TASK_zgram(
options, options,
...@@ -152,9 +153,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des ...@@ -152,9 +153,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des
/* Initialize Wcol */ /* Initialize Wcol */
for(m = 0; m < Wcol->mt; m++) { for(m = 0; m < Wcol->mt; m++) {
tempmm = m == Wcol->mt-1 ? Wcol->m-m*Wcol->mb : Wcol->mb; tempmm = Wcol->get_blkdim( Wcol, m, DIM_m, Wcol->m );
for(n = 0; n < Wcol->nt; n++) { for(n = 0; n < Wcol->nt; n++) {
tempnn = n == Wcol->nt-1 ? Wcol->n-n*Wcol->nb : Wcol->nb; tempnn = Wcol->get_blkdim( Wcol, n, DIM_n, Wcol->n );
INSERT_TASK_dlaset( INSERT_TASK_dlaset(
&options, &options,
ChamUpperLower, tempmm, tempnn, ChamUpperLower, tempmm, tempnn,
...@@ -164,9 +165,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des ...@@ -164,9 +165,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des
} }
/* Initialize Welt */ /* Initialize Welt */
for(m = 0; m < Welt->mt; m++) { for(m = 0; m < Welt->mt; m++) {
tempmm = m == Welt->mt-1 ? Welt->m-m*Welt->mb : Welt->mb; tempmm = Welt->get_blkdim( Welt, m, DIM_m, Welt->m );
for(n = 0; n < Welt->nt; n++) { for(n = 0; n < Welt->nt; n++) {
tempnn = n == Welt->nt-1 ? Welt->n-n*Welt->nb : Welt->nb; tempnn = Welt->get_blkdim( Welt, n, DIM_n, Welt->n );
INSERT_TASK_dlaset( INSERT_TASK_dlaset(
&options, &options,
ChamUpperLower, tempmm, tempnn, ChamUpperLower, tempmm, tempnn,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zhemm parallel algorithm * @brief Chameleon zhemm parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> c * @precisions normal z -> c
* *
*/ */
...@@ -109,9 +110,9 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -109,9 +110,9 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
} }
for(n = 0; n < C->nt; n++) { for(n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for(m = 0; m < C->mt; m++) { for(m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
/* Scale C */ /* Scale C */
options->forcesub = 0; options->forcesub = 0;
...@@ -125,7 +126,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -125,7 +126,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
if (side == ChamLeft) { if (side == ChamLeft) {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
if (k < m) { if (k < m) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -161,7 +162,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -161,7 +162,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
*/ */
else { else {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
if (k < m) { if (k < m) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -199,7 +200,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -199,7 +200,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
else { else {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -235,7 +236,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -235,7 +236,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
*/ */
else { else {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -292,7 +293,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -292,7 +293,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
{ {
RUNTIME_sequence_t *sequence = options->sequence; RUNTIME_sequence_t *sequence = options->sequence;
cham_trans_t transA; cham_trans_t transA;
int m, n, k, p, q, KT, K, lp, lq; int m, n, k, p, q, KT, lp, lq;
int tempmm, tempnn, tempkk; int tempmm, tempnn, tempkk;
int lookahead, myp, myq; int lookahead, myp, myq;
...@@ -301,14 +302,13 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -301,14 +302,13 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
lookahead = chamctxt->lookahead; lookahead = chamctxt->lookahead;
KT = A->nt; KT = A->nt;
K = A->n; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
myp = C->myrank / C->q; myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);
myq = C->myrank % C->q;
for (k = 0; k < KT; k++ ) { for (k = 0; k < KT; k++ ) {
lp = (k % lookahead) * C->p; lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
lq = (k % lookahead) * C->q; lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
tempkk = k == KT - 1 ? K - k * A->nb : A->nb; tempkk = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
/* Transfert ownership of the k column of A or B */ /* Transfert ownership of the k column of A or B */
...@@ -316,7 +316,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -316,7 +316,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
int Am, Ak; int Am, Ak;
int tempam, tempak; int tempam, tempak;
tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if ( (( uplo == ChamUpper ) && ( m > k )) || if ( (( uplo == ChamUpper ) && ( m > k )) ||
(( uplo == ChamLower ) && ( m < k )) ) (( uplo == ChamLower ) && ( m < k )) )
...@@ -339,48 +339,48 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -339,48 +339,48 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
options, options,
ChamUpperLower, tempam, tempak, ChamUpperLower, tempam, tempak,
A( Am, Ak ), A( Am, Ak ),
WA( m, (k % C->q) + lq ) ); WA( m, (Ak % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, A( Am, Ak ) ); RUNTIME_data_flush( sequence, A( Am, Ak ) );
for ( q=1; q < C->q; q++ ) { for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempam, tempak, ChamUpperLower, tempam, tempak,
WA( m, ((k+q-1) % C->q) + lq ), WA( m, ((Ak+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((k+q) % C->q) + lq ) ); WA( m, ((Ak+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
} }
} }
/* Transfert ownership of the k row of B, or A */ /* Transfert ownership of the k row of B, or A */
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempkk, tempnn, ChamUpperLower, tempkk, tempnn,
B( k, n ), B( k, n ),
WB( (k % C->p) + lp, n ) ); WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, B( k, n ) ); RUNTIME_data_flush( sequence, B( k, n ) );
for ( p=1; p < C->p; p++ ) { for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempkk, tempnn, ChamUpperLower, tempkk, tempnn,
WB( ((k+p-1) % C->p) + lp, n ), WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) ); WB( ((k+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
} }
} }
/* Perform the update of this iteration */ /* Perform the update of this iteration */
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if ( k == m ) { if ( k == m ) {
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zhemm( INSERT_TASK_zhemm(
options, ChamLeft, uplo, options, ChamLeft, uplo,
...@@ -400,8 +400,8 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -400,8 +400,8 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
transA = ChamNoTrans; transA = ChamNoTrans;
} }
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
options, transA, ChamNoTrans, options, transA, ChamNoTrans,
...@@ -428,7 +428,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -428,7 +428,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
{ {
RUNTIME_sequence_t *sequence = options->sequence; RUNTIME_sequence_t *sequence = options->sequence;
cham_trans_t transA; cham_trans_t transA;
int m, n, k, p, q, KT, K, lp, lq; int m, n, k, p, q, KT, lp, lq;
int tempmm, tempnn, tempkk; int tempmm, tempnn, tempkk;
int lookahead, myp, myq; int lookahead, myp, myq;
...@@ -437,35 +437,34 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -437,35 +437,34 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
lookahead = chamctxt->lookahead; lookahead = chamctxt->lookahead;
KT = A->mt; KT = A->mt;
K = A->m; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
myp = C->myrank / C->q; myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);
myq = C->myrank % C->q;
for (k = 0; k < KT; k++ ) { for (k = 0; k < KT; k++ ) {
lp = (k % lookahead) * C->p; lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
lq = (k % lookahead) * C->q; lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
tempkk = k == KT - 1 ? K - k * A->nb : A->nb; tempkk = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
/* Transfert ownership of the k column of A or B */ /* Transfert ownership of the k column of A or B */
for (m = 0; m < C->mt; m++ ) { for (m = 0; m < C->mt; m++ ) {
tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempmm, tempkk, ChamUpperLower, tempmm, tempkk,
B( m, k ), B( m, k ),
WA( m, (k % C->q) + lq ) ); WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, B( m, k ) ); RUNTIME_data_flush( sequence, B( m, k ) );
for ( q=1; q < C->q; q++ ) { for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempmm, tempkk, ChamUpperLower, tempmm, tempkk,
WA( m, ((k+q-1) % C->q) + lq ), WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((k+q) % C->q) + lq ) ); WA( m, ((k+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
} }
} }
...@@ -474,7 +473,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -474,7 +473,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
int Ak, An; int Ak, An;
int tempak, tempan; int tempak, tempan;
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( (( uplo == ChamUpper ) && ( n < k )) || if ( (( uplo == ChamUpper ) && ( n < k )) ||
(( uplo == ChamLower ) && ( n > k )) ) (( uplo == ChamLower ) && ( n > k )) )
...@@ -496,26 +495,26 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -496,26 +495,26 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
options, options,
ChamUpperLower, tempak, tempan, ChamUpperLower, tempak, tempan,
A( Ak, An ), A( Ak, An ),
WB( (k % C->p) + lp, n ) ); WB( (Ak % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, A( Ak, An ) ); RUNTIME_data_flush( sequence, A( Ak, An ) );
for ( p=1; p < C->p; p++ ) { for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempak, tempan, ChamUpperLower, tempak, tempan,
WB( ((k+p-1) % C->p) + lp, n ), WB( ((Ak+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) ); WB( ((Ak+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
} }
} }
/* Perform the update of this iteration */ /* Perform the update of this iteration */
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( k == n ) { if ( k == n ) {
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
/* A has been stored in WA or WB for the summa ring */ /* A has been stored in WA or WB for the summa ring */
INSERT_TASK_zhemm( INSERT_TASK_zhemm(
...@@ -536,8 +535,8 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -536,8 +535,8 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
transA = ChamNoTrans; transA = ChamNoTrans;
} }
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
options, ChamNoTrans, transA, options, ChamNoTrans, transA,
...@@ -594,16 +593,16 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -594,16 +593,16 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0; CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
for(m = 0; m < C->mt; m++) { for(m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for(n = 0; n < C->nt; n++) { for(n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
/* /*
* ChamLeft / ChamLower * ChamLeft / ChamLower
*/ */
if (side == ChamLeft) { if (side == ChamLeft) {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < m) { if (k < m) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -641,7 +640,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -641,7 +640,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
*/ */
else { else {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < m) { if (k < m) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -681,7 +680,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -681,7 +680,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
else { else {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -719,7 +718,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -719,7 +718,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
*/ */
else { else {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
......
...@@ -4,21 +4,21 @@ ...@@ -4,21 +4,21 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zher2k parallel algorithm * @brief Chameleon zher2k parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> c * @precisions normal z -> c
* *
*/ */
...@@ -52,7 +52,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -52,7 +52,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if (uplo == ChamLower) { if (uplo == ChamLower) {
mmin = n+1; mmin = n+1;
...@@ -68,7 +68,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -68,7 +68,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
*/ */
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
dbeta = k == 0 ? beta : 1.0; dbeta = k == 0 ? beta : 1.0;
INSERT_TASK_zher2k( INSERT_TASK_zher2k(
&options, &options,
...@@ -79,9 +79,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -79,9 +79,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
dbeta, C(n, n)); /* ldc * N */ dbeta, C(n, n)); /* ldc * N */
} }
for (m = mmin; m < mmax; m++) { for (m = mmin; m < mmax; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -106,7 +106,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -106,7 +106,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
dbeta = k == 0 ? beta : 1.0; dbeta = k == 0 ? beta : 1.0;
INSERT_TASK_zher2k( INSERT_TASK_zher2k(
&options, &options,
...@@ -117,9 +117,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -117,9 +117,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
dbeta, C(n, n)); /* ldc * N */ dbeta, C(n, n)); /* ldc * N */
} }
for (m = mmin; m < mmax; m++) { for (m = mmin; m < mmax; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
* @version 1.3.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Ana Hourcau * @author Ana Hourcau
* @date 2024-07-17 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> z d * @precisions normal z -> z d
* *
*/ */
...@@ -28,8 +29,11 @@ ...@@ -28,8 +29,11 @@
#define W(desc, m, n) (desc), (m), (n) #define W(desc, m, n) (desc), (m), (n)
static inline void static inline void
chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, chameleon_pzhered_frb( cham_trans_t trans,
CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, cham_uplo_t uplo,
CHAM_desc_t *A,
CHAM_desc_t *Wnorm,
CHAM_desc_t *Welt,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
double alpha = 1.0; double alpha = 1.0;
...@@ -40,8 +44,8 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -40,8 +44,8 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
int NT = A->nt; int NT = A->nt;
int M = A->m; int M = A->m;
int N = A->n; int N = A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/* Initialize workspaces for tile norms */ /* Initialize workspaces for tile norms */
for (m = 0; m < Wnorm->mt; m++) for (m = 0; m < Wnorm->mt; m++)
...@@ -78,14 +82,13 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -78,14 +82,13 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
int nmin = (uplo == ChamUpper) ? m : 0; int nmin = (uplo == ChamUpper) ? m : 0;
int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, NT) : NT; int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, NT) : NT;
int tempmm = (m == (MT - 1)) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for (n = nmin; n < nmax; n++) for (n = nmin; n < nmax; n++)
{ {
int tempnn = (n == (NT - 1)) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if (n == m) if ( n == m ) {
{
if ( trans == ChamConjTrans ) { if ( trans == ChamConjTrans ) {
INSERT_TASK_zhessq( INSERT_TASK_zhessq(
options, ChamEltwise, uplo, tempmm, options, ChamEltwise, uplo, tempmm,
...@@ -97,8 +100,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -97,8 +100,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
A(m, n), W( Wnorm, m, n) ); A(m, n), W( Wnorm, m, n) );
} }
} }
else else {
{
INSERT_TASK_zgessq( INSERT_TASK_zgessq(
options, ChamEltwise, tempmm, tempnn, options, ChamEltwise, tempmm, tempnn,
A(m, n), W( Wnorm, m, n )); A(m, n), W( Wnorm, m, n ));
...@@ -162,11 +164,11 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -162,11 +164,11 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
/** /**
* Broadcast the result * Broadcast the result
*/ */
for (m = 0; m < A->p; m++) for (m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++)
{ {
for (n = 0; n < A->q; n++) for (n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++)
{ {
if ((m != 0) || (n != 0)) if ( ( m != 0 ) || ( n != 0 ) )
{ {
INSERT_TASK_dlacpy( INSERT_TASK_dlacpy(
options, options,
...@@ -180,14 +182,18 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -180,14 +182,18 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
/** /**
* *
*/ */
void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_desc_t *A, void chameleon_pzhered( cham_trans_t trans,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) cham_uplo_t uplo,
double prec,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
CHAM_desc_t Wcol; CHAM_desc_t Wcol;
CHAM_desc_t Welt; CHAM_desc_t Welt;
double gnorm, threshold, eps; double gnorm, threshold, eps, eps_diag, threshold_diag;
int workmt, worknt; int workmt, worknt;
int m, n; int m, n;
...@@ -199,71 +205,66 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ ...@@ -199,71 +205,66 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_
} }
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
workmt = chameleon_max(A->mt, A->p); workmt = chameleon_max(A->mt, chameleon_desc_datadist_get_iparam(A, 0));
worknt = chameleon_max(A->nt, A->q); worknt = chameleon_max(A->nt, chameleon_desc_datadist_get_iparam(A, 1));
RUNTIME_options_ws_alloc(&options, 1, 0); RUNTIME_options_ws_alloc(&options, 1, 0);
/* Matrix to store the norm of each element */ /* Matrix to store the norm of each element */
chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg); NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg);
/* Matrix to compute the global frobenius norm */ /* Matrix to compute the global frobenius norm */
chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q, workmt * 2, worknt, 0, 0, workmt * 2, worknt, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL);
chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options ); chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options );
CHAMELEON_Desc_Flush(&Wcol, sequence); CHAMELEON_Desc_Flush( &Wcol, sequence );
CHAMELEON_Desc_Flush(&Welt, sequence); CHAMELEON_Desc_Flush( &Welt, sequence );
CHAMELEON_Desc_Flush(A, sequence); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait(chamctxt, sequence); RUNTIME_sequence_wait( chamctxt, sequence );
gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / A->q, A->myrank % A->q)); gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1)));
chameleon_desc_destroy(&Welt); chameleon_desc_destroy(&Welt);
/** /**
* Reduce the precision of the tiles if possible * Reduce the precision of the tiles if possible
*/ */
if (prec < 0.) eps_diag = CHAMELEON_slamch();
{ if (prec < 0.) {
#if !defined(CHAMELEON_SIMULATION) eps = CHAMELEON_dlamch();
eps = LAPACKE_dlamch_work('e');
#else
#if defined(PRECISION_z) || defined(PRECISION_d)
eps = 1.e-15;
#else
eps = 1.e-7;
#endif
#endif
} }
else else {
{
eps = prec; eps = prec;
} }
threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt)); threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt));
threshold_diag = (eps < eps_diag) ? threshold : (eps_diag * gnorm) / (double)(chameleon_min(A->mt, A->nt));
#if defined(CHAMELEON_DEBUG_GERED) #if defined(CHAMELEON_DEBUG_GERED)
fprintf(stderr, fprintf( stderr,
"[%2d] The norm of A is: %e\n" "[%2d] The norm of A is: %e\n"
"[%2d] The requested precision is: %e\n" "[%2d] The requested precision is: %e\n"
"[%2d] The computed threshold is: %e\n", "[%2d] The computed threshold is: %e\n"
A->myrank, gnorm, "[%2d] The threshold diag is: %e\n",
A->myrank, eps, A->myrank, gnorm,
A->myrank, threshold); A->myrank, eps,
A->myrank, threshold,
A->myrank, threshold_diag );
#endif #endif
for (m = 0; m < A->mt; m++) for (m = 0; m < A->mt; m++)
{ {
int tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, A->m );
int nmin = (uplo == ChamUpper) ? m : 0; int nmin = (uplo == ChamUpper) ? m : 0;
int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, A->nt) : A->nt; int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, A->nt) : A->nt;
for (n = nmin; n < nmax; n++) for (n = nmin; n < nmax; n++)
{ {
int tempnn = (n == (A->nt - 1)) ? A->n - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, A->n );
/* /*
* u_{high} = 1e-16 (later should be application accuracy) * u_{high} = 1e-16 (later should be application accuracy)
...@@ -271,15 +272,21 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ ...@@ -271,15 +272,21 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_
* ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low})
* ||A_{i,j}||_F < threshold / u_{low} * ||A_{i,j}||_F < threshold / u_{low}
*/ */
INSERT_TASK_zgered( &options, threshold, if ( m == n ) {
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); INSERT_TASK_zgered( &options, threshold_diag,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
else {
INSERT_TASK_zgered( &options, threshold,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
} }
} }
CHAMELEON_Desc_Flush(A, sequence); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait(chamctxt, sequence); RUNTIME_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy(&Wcol); chameleon_desc_destroy( &Wcol );
RUNTIME_options_ws_free(&options); RUNTIME_options_ws_free( &options );
RUNTIME_options_finalize(&options, chamctxt); RUNTIME_options_finalize( &options, chamctxt );
} }
...@@ -4,21 +4,21 @@ ...@@ -4,21 +4,21 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zherk parallel algorithm * @brief Chameleon zherk parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> c * @precisions normal z -> c
* *
*/ */
...@@ -52,13 +52,13 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -52,13 +52,13 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
/* /*
* ChamNoTrans * ChamNoTrans
*/ */
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
dbeta = k == 0 ? beta : 1.0; dbeta = k == 0 ? beta : 1.0;
INSERT_TASK_zherk( INSERT_TASK_zherk(
&options, &options,
...@@ -72,9 +72,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -72,9 +72,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -91,9 +91,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -91,9 +91,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -111,7 +111,7 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -111,7 +111,7 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
dbeta = k == 0 ? beta : 1.0; dbeta = k == 0 ? beta : 1.0;
INSERT_TASK_zherk( INSERT_TASK_zherk(
&options, &options,
...@@ -125,9 +125,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -125,9 +125,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -144,9 +144,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -144,9 +144,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
* @author Samuel Thibault * @author Samuel Thibault
* @author Alycia Lisito * @author Alycia Lisito
* @author Lionel Eyraud-Dubois * @author Lionel Eyraud-Dubois
* @date 2023-07-05 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -90,7 +91,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -90,7 +91,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* Copy of the diagonal tiles to keep the general version of the tile all along the computation */ /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */
chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, A->p, A->q ); chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1) );
chameleon_desc_init( &AT, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, A->nb, (A->mb*A->nb), chameleon_desc_init( &AT, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, A->nb, (A->mb*A->nb),
chameleon_min(A->mt, A->nt) * A->mb, A->nb, 0, 0, chameleon_min(A->mt, A->nt) * A->mb, A->nb, 0, 0,
...@@ -99,7 +102,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -99,7 +102,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
/* Let's extract the diagonal in a temporary copy that contains A and A' */ /* Let's extract the diagonal in a temporary copy that contains A and A' */
for (k = 1; k < A->nt; k++){ for (k = 1; k < A->nt; k++){
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zhe2ge( &options, INSERT_TASK_zhe2ge( &options,
uplo, tempkn, tempkn, A->mb, uplo, tempkn, tempkn, A->mb,
...@@ -110,8 +113,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -110,8 +113,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
for (k = 0; k < A->nt-1; k++){ for (k = 0; k < A->nt-1; k++){
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k+1 == A->mt-1 ? A->m-(k+1)*A->mb : A->mb; tempkm = A->get_blkdim( A, k+1, DIM_m, A->m );
tempkn = k == A->nt-1 ? A->n- k *A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgeqrt( INSERT_TASK_zgeqrt(
&options, &options,
...@@ -145,7 +148,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -145,7 +148,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
/* RIGHT on the remaining tiles until the bottom */ /* RIGHT on the remaining tiles until the bottom */
for (m = k+2; m < A->mt ; m++) { for (m = k+2; m < A->mt ; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zunmqr( INSERT_TASK_zunmqr(
&options, &options,
ChamRight, ChamNoTrans, ChamRight, ChamNoTrans,
...@@ -156,7 +159,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -156,7 +159,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
} }
for (m = k+2; m < A->mt; m++) { for (m = k+2; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = 1; options.priority = 1;
INSERT_TASK_ztsqrt( INSERT_TASK_ztsqrt(
...@@ -181,7 +184,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -181,7 +184,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
/* RIGHT */ /* RIGHT */
for (j = m+1; j < A->mt ; j++) { for (j = m+1; j < A->mt ; j++) {
tempjj = j == A->mt-1 ? A->m-j*A->mb : A->mb; tempjj = A->get_blkdim( A, j, DIM_m, A->m );
INSERT_TASK_ztsmqr( INSERT_TASK_ztsmqr(
&options, &options,
ChamRight, ChamNoTrans, ChamRight, ChamNoTrans,
...@@ -262,8 +265,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -262,8 +265,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
for (k = 0; k < A->nt-1; k++){ for (k = 0; k < A->nt-1; k++){
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkn = k+1 == A->nt-1 ? A->n-(k+1)*A->nb : A->nb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkm = k == A->mt-1 ? A->m- k *A->mb : A->mb; tempkn = A->get_blkdim( A, k+1, DIM_n, A->n );
INSERT_TASK_zgelqt( INSERT_TASK_zgelqt(
&options, &options,
tempkm, tempkn, ib, A->nb, tempkm, tempkn, ib, A->nb,
...@@ -296,7 +300,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -296,7 +300,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
/* LEFT on the remaining tiles until the left side */ /* LEFT on the remaining tiles until the left side */
for (n = k+2; n < A->nt ; n++) { for (n = k+2; n < A->nt ; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zunmlq( INSERT_TASK_zunmlq(
&options, &options,
ChamLeft, ChamNoTrans, ChamLeft, ChamNoTrans,
...@@ -307,7 +311,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -307,7 +311,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
} }
for (n = k+2; n < A->nt; n++) { for (n = k+2; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = 1; options.priority = 1;
INSERT_TASK_ztslqt( INSERT_TASK_ztslqt(
&options, &options,
...@@ -331,7 +335,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -331,7 +335,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
/* LEFT */ /* LEFT */
for (j = n+1; j < A->nt ; j++) { for (j = n+1; j < A->nt ; j++) {
tempjj = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjj = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_ztsmlq( INSERT_TASK_ztsmlq(
&options, &options,
ChamLeft, ChamNoTrans, ChamLeft, ChamNoTrans,
...@@ -411,7 +415,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -411,7 +415,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
/* Copy-back into A */ /* Copy-back into A */
for (k = 1; k < A->nt; k++){ for (k = 1; k < A->nt; k++){
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zlacpy( &options, INSERT_TASK_zlacpy( &options,
uplo, tempkn, tempkn, uplo, tempkn, tempkn,
D(k), A(k, k)); D(k), A(k, k));
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zlacpy parallel algorithm * @brief Chameleon zlacpy parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @date 2024-02-18
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
......
...@@ -4,21 +4,21 @@ ...@@ -4,21 +4,21 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zlag2c parallel algorithm * @brief Chameleon zlag2c parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions mixed zc -> ds * @precisions mixed zc -> ds
* *
*/ */
...@@ -46,10 +46,10 @@ void chameleon_pclag2z( CHAM_desc_t *A, CHAM_desc_t *B, ...@@ -46,10 +46,10 @@ void chameleon_pclag2z( CHAM_desc_t *A, CHAM_desc_t *B,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for(m = 0; m < A->mt; m++) { for(m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for(n = 0; n < A->nt; n++) { for(n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_clag2z( INSERT_TASK_clag2z(
&options, &options,
...@@ -81,10 +81,10 @@ void chameleon_pzlag2c( CHAM_desc_t *A, CHAM_desc_t *B, ...@@ -81,10 +81,10 @@ void chameleon_pzlag2c( CHAM_desc_t *A, CHAM_desc_t *B,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for(m = 0; m < A->mt; m++) { for(m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for(n = 0; n < A->nt; n++) { for(n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlag2c( INSERT_TASK_zlag2c(
&options, &options,
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* @author Florent Pruvost * @author Florent Pruvost
* @author Alycia Lisito * @author Alycia Lisito
* @author Lionel Eyraud-Dubois * @author Lionel Eyraud-Dubois
* @date 2023-07-05 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -43,8 +44,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -43,8 +44,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
int NT = (uplo == ChamLower) ? minMNT : A->nt; int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m; int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n; int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* Step 1: * Step 1:
...@@ -54,10 +55,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -54,10 +55,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
int mmin = ( uplo == ChamLower ) ? n : 0; int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
for(m = mmin; m < mmax; m++) { for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
if ( (n == m) && (uplo != ChamUpperLower) ) { if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrasm( INSERT_TASK_ztrasm(
...@@ -133,8 +134,8 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -133,8 +134,8 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
int NT = (uplo == ChamLower) ? minMNT : A->nt; int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m; int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n; int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* Step 1: * Step 1:
...@@ -144,10 +145,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -144,10 +145,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
int nmin = ( uplo == ChamUpper ) ? m : 0; int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = nmin; n < nmax; n++) { for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( (n == m) && (uplo != ChamUpperLower) ) { if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrasm( INSERT_TASK_ztrasm(
...@@ -219,8 +220,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ ...@@ -219,8 +220,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
int NT = (uplo == ChamLower) ? minMNT : A->nt; int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m; int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n; int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* Step 1: * Step 1:
...@@ -230,10 +231,10 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ ...@@ -230,10 +231,10 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
int nmin = ( uplo == ChamUpper ) ? m : 0; int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = nmin; n < nmax; n++) { for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( (n == m) && (uplo != ChamUpperLower) ) { if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_zlantr( INSERT_TASK_zlantr(
...@@ -302,8 +303,8 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ ...@@ -302,8 +303,8 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
int NT = (uplo == ChamLower) ? minMNT : A->nt; int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m; int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n; int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* Step 1: * Step 1:
...@@ -313,10 +314,10 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_ ...@@ -313,10 +314,10 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
int nmin = ( uplo == ChamUpper ) ? m : 0; int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = nmin; n < nmax; n++) { for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( (n == m) && (uplo != ChamUpperLower) ) { if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrssq( INSERT_TASK_ztrssq(
...@@ -395,15 +396,17 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia ...@@ -395,15 +396,17 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
*result = 0.0; *result = 0.0;
workmt = chameleon_max( A->mt, A->p ); workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) );
worknt = chameleon_max( A->nt, A->q ); worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) );
switch ( norm ) { switch ( norm ) {
case ChamOneNorm: case ChamOneNorm:
RUNTIME_options_ws_alloc( &options, 1, 0 ); RUNTIME_options_ws_alloc( &options, 1, 0 );
chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 1, A->nb, A->nb, chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 1, A->nb, A->nb,
workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q, workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
wcol_init = 1; wcol_init = 1;
...@@ -411,7 +414,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia ...@@ -411,7 +414,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
* Use the global allocator for Welt, otherwise flush may free the data before the result is read. * Use the global allocator for Welt, otherwise flush may free the data before the result is read.
*/ */
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1,
A->p, worknt, 0, 0, A->p, worknt, A->p, A->q, chameleon_desc_datadist_get_iparam(A, 0), worknt, 0, 0, chameleon_desc_datadist_get_iparam(A, 0), worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
break; break;
...@@ -423,12 +428,16 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia ...@@ -423,12 +428,16 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
RUNTIME_options_ws_alloc( &options, A->mb, 0 ); RUNTIME_options_ws_alloc( &options, A->mb, 0 );
chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb, chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb,
workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
wcol_init = 1; wcol_init = 1;
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1,
workmt, A->q, 0, 0, workmt, A->q, A->p, A->q, workmt, chameleon_desc_datadist_get_iparam(A, 1), 0, 0, workmt, chameleon_desc_datadist_get_iparam(A, 1),
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
break; break;
...@@ -440,7 +449,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia ...@@ -440,7 +449,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
alpha = 1.; alpha = 1.;
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
workmt*2, worknt, 0, 0, workmt*2, worknt, A->p, A->q, workmt*2, worknt, 0, 0, workmt*2, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
break; break;
...@@ -452,7 +463,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia ...@@ -452,7 +463,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
RUNTIME_options_ws_alloc( &options, 1, 0 ); RUNTIME_options_ws_alloc( &options, 1, 0 );
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1,
workmt, worknt, 0, 0, workmt, worknt, A->p, A->q, workmt, worknt, 0, 0, workmt, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
} }
...@@ -504,8 +517,8 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia ...@@ -504,8 +517,8 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
/** /**
* Broadcast the result * Broadcast the result
*/ */
for(m = 0; m < A->p; m++) { for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) {
for(n = 0; n < A->q; n++) { for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) {
if ( (m != 0) || (n != 0) ) { if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy( INSERT_TASK_dlacpy(
&options, &options,
...@@ -522,7 +535,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia ...@@ -522,7 +535,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait( chamctxt, sequence ); RUNTIME_sequence_wait( chamctxt, sequence );
*result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q )); *result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1) ));
if ( wcol_init ) { if ( wcol_init ) {
chameleon_desc_destroy( &Wcol ); chameleon_desc_destroy( &Wcol );
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* @author Florent Pruvost * @author Florent Pruvost
* @author Alycia Lisito * @author Alycia Lisito
* @author Lionel Eyraud-Dubois * @author Lionel Eyraud-Dubois
* @date 2023-07-05 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
*/ */
...@@ -40,8 +41,8 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -40,8 +41,8 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
int NT = A->nt; int NT = A->nt;
int M = A->m; int M = A->m;
int N = A->n; int N = A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* Step 1: * Step 1:
...@@ -51,10 +52,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -51,10 +52,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
int nmin = ( uplo == ChamUpper ) ? m : 0; int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = nmin; n < nmax; n++) { for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( n == m ) { if ( n == m ) {
INSERT_TASK_dzasum( INSERT_TASK_dzasum(
...@@ -77,7 +78,7 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -77,7 +78,7 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
} }
for(m = 0; m < MT; m++) { for(m = 0; m < MT; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = Q; n < NT; n++) { for(n = Q; n < NT; n++) {
INSERT_TASK_daxpy( INSERT_TASK_daxpy(
...@@ -133,8 +134,8 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -133,8 +134,8 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
int NT = A->nt; int NT = A->nt;
int M = A->m; int M = A->m;
int N = A->n; int N = A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* Step 1: * Step 1:
...@@ -144,10 +145,10 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -144,10 +145,10 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
int nmin = (uplo == ChamUpper ) ? m : 0; int nmin = (uplo == ChamUpper ) ? m : 0;
int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = nmin; n < nmax; n++) { for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( n == m ) { if ( n == m ) {
if ( trans == ChamConjTrans) { if ( trans == ChamConjTrans) {
...@@ -219,8 +220,8 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -219,8 +220,8 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
int NT = A->nt; int NT = A->nt;
int M = A->m; int M = A->m;
int N = A->n; int N = A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* Step 1: * Step 1:
...@@ -230,10 +231,10 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -230,10 +231,10 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
int nmin = (uplo == ChamUpper ) ? m : 0; int nmin = (uplo == ChamUpper ) ? m : 0;
int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = nmin; n < nmax; n++) { for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( n == m ) { if ( n == m ) {
if ( trans == ChamConjTrans) { if ( trans == ChamConjTrans) {
...@@ -321,8 +322,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra ...@@ -321,8 +322,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
*result = 0.0; *result = 0.0;
workmt = chameleon_max( A->mt, A->p ); workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) );
worknt = chameleon_max( A->nt, A->q ); worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) );
switch ( norm ) { switch ( norm ) {
case ChamOneNorm: case ChamOneNorm:
...@@ -330,7 +331,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra ...@@ -330,7 +331,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
RUNTIME_options_ws_alloc( &options, 1, 0 ); RUNTIME_options_ws_alloc( &options, 1, 0 );
chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb, chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb,
workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q, workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
wcol_init = 1; wcol_init = 1;
...@@ -338,7 +341,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra ...@@ -338,7 +341,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
* Use the global allocator for Welt, otherwise flush may free the data before the result is read. * Use the global allocator for Welt, otherwise flush may free the data before the result is read.
*/ */
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1,
workmt, A->q, 0, 0, workmt, A->q, A->p, A->q, workmt, chameleon_desc_datadist_get_iparam(A, 1), 0, 0, workmt, chameleon_desc_datadist_get_iparam(A, 1),
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
break; break;
...@@ -350,7 +355,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra ...@@ -350,7 +355,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
alpha = 1.; alpha = 1.;
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
workmt*2, worknt, 0, 0, workmt*2, worknt, A->p, A->q, workmt*2, worknt, 0, 0, workmt*2, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
break; break;
...@@ -362,7 +369,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra ...@@ -362,7 +369,9 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
RUNTIME_options_ws_alloc( &options, 1, 0 ); RUNTIME_options_ws_alloc( &options, 1, 0 );
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1,
workmt, worknt, 0, 0, workmt, worknt, A->p, A->q, workmt, worknt, 0, 0, workmt, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
} }
...@@ -410,8 +419,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra ...@@ -410,8 +419,8 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
/** /**
* Broadcast the result * Broadcast the result
*/ */
for(m = 0; m < A->p; m++) { for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) {
for(n = 0; n < A->q; n++) { for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) {
if ( (m != 0) || (n != 0) ) { if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy( INSERT_TASK_dlacpy(
&options, &options,
...@@ -428,7 +437,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra ...@@ -428,7 +437,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait(chamctxt, sequence); RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ); *result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1) );
if ( wcol_init ) { if ( wcol_init ) {
chameleon_desc_destroy( &Wcol ); chameleon_desc_destroy( &Wcol );
......
...@@ -4,17 +4,17 @@ ...@@ -4,17 +4,17 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zlascal parallel algorithm * @brief Chameleon zlascal parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Dalal Sukkari * @author Dalal Sukkari
* @author Mathieu Faverge * @author Mathieu Faverge
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -44,8 +44,8 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc ...@@ -44,8 +44,8 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
switch(uplo) { switch(uplo) {
case ChamLower: case ChamLower:
for (n = 0; n < minmnt; n++) { for (n = 0; n < minmnt; n++) {
tempnm = n == A->mt-1 ? A->m-n*A->mb : A->mb; tempnm = A->get_blkdim( A, n, DIM_m, A->m );
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlascal( INSERT_TASK_zlascal(
&options, &options,
...@@ -53,7 +53,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc ...@@ -53,7 +53,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
alpha, A(n, n)); alpha, A(n, n));
for (m = n+1; m < A->mt; m++) { for (m = n+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zlascal( INSERT_TASK_zlascal(
&options, &options,
...@@ -65,8 +65,8 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc ...@@ -65,8 +65,8 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
case ChamUpper: case ChamUpper:
for (m = 0; m < minmnt; m++) { for (m = 0; m < minmnt; m++) {
tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
tempmn = m == A->nt-1 ? A->n-m*A->nb : A->nb; tempmn = A->get_blkdim( A, m, DIM_n, A->n );
INSERT_TASK_zlascal( INSERT_TASK_zlascal(
&options, &options,
...@@ -74,7 +74,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc ...@@ -74,7 +74,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
alpha, A(m, m)); alpha, A(m, m));
for (n = m+1; n < A->nt; n++) { for (n = m+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlascal( INSERT_TASK_zlascal(
&options, &options,
...@@ -87,10 +87,10 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc ...@@ -87,10 +87,10 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
case ChamUpperLower: case ChamUpperLower:
default: default:
for (m = 0; m < A->mt; m++) { for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = 0; n < A->nt; n++) { for (n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlascal( INSERT_TASK_zlascal(
&options, &options,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zlaset parallel algorithm * @brief Chameleon zlaset parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief * @author Hatem Ltaief
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -52,15 +52,15 @@ void chameleon_pzlaset( cham_uplo_t uplo, ...@@ -52,15 +52,15 @@ void chameleon_pzlaset( cham_uplo_t uplo,
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (j = 0; j < minmn; j++){ for (j = 0; j < minmn; j++){
tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb; tempjm = A->get_blkdim( A, j, DIM_m, A->m );
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_zlaset( INSERT_TASK_zlaset(
&options, &options,
ChamLower, tempjm, tempjn, alpha, beta, ChamLower, tempjm, tempjn, alpha, beta,
A(j, j)); A(j, j));
for (i = j+1; i < A->mt; i++){ for (i = j+1; i < A->mt; i++){
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; tempim = A->get_blkdim( A, i, DIM_m, A->m );
INSERT_TASK_zlaset( INSERT_TASK_zlaset(
&options, &options,
ChamUpperLower, tempim, tempjn, alpha, alpha, ChamUpperLower, tempim, tempjn, alpha, alpha,
...@@ -70,11 +70,11 @@ void chameleon_pzlaset( cham_uplo_t uplo, ...@@ -70,11 +70,11 @@ void chameleon_pzlaset( cham_uplo_t uplo,
} }
else if (uplo == ChamUpper) { else if (uplo == ChamUpper) {
for (i = 0; i < A->mt; i++) { for (i = 0; i < A->mt; i++) {
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; tempim = A->get_blkdim( A, i, DIM_m, A->m );
if ( i < A->nt ) { if ( i < A->nt ) {
j = i; j = i;
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_zlaset( INSERT_TASK_zlaset(
&options, &options,
...@@ -82,7 +82,7 @@ void chameleon_pzlaset( cham_uplo_t uplo, ...@@ -82,7 +82,7 @@ void chameleon_pzlaset( cham_uplo_t uplo,
alpha, beta, A(i, j)); alpha, beta, A(i, j));
} }
for (j = i+1; j < A->nt; j++) { for (j = i+1; j < A->nt; j++) {
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_zlaset( INSERT_TASK_zlaset(
&options, &options,
...@@ -93,9 +93,9 @@ void chameleon_pzlaset( cham_uplo_t uplo, ...@@ -93,9 +93,9 @@ void chameleon_pzlaset( cham_uplo_t uplo,
} }
else { else {
for (i = 0; i < A->mt; i++){ for (i = 0; i < A->mt; i++){
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; tempim = A->get_blkdim( A, i, DIM_m, A->m );
for (j = 0; j < A->nt; j++){ for (j = 0; j < A->nt; j++){
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_zlaset( INSERT_TASK_zlaset(
&options, &options,
ChamUpperLower, tempim, tempjn, ChamUpperLower, tempim, tempjn,
......
...@@ -4,20 +4,20 @@ ...@@ -4,20 +4,20 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zlaset2 parallel algorithm * @brief Chameleon zlaset2 parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Hatem Ltaief * @author Hatem Ltaief
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -51,15 +51,15 @@ void chameleon_pzlaset2( cham_uplo_t uplo, ...@@ -51,15 +51,15 @@ void chameleon_pzlaset2( cham_uplo_t uplo,
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (j = 0; j < minmn; j++){ for (j = 0; j < minmn; j++){
tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb; tempjm = A->get_blkdim( A, j, DIM_m, A->m );
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_zlaset2( INSERT_TASK_zlaset2(
&options, &options,
ChamLower, tempjm, tempjn, alpha, ChamLower, tempjm, tempjn, alpha,
A(j, j)); A(j, j));
for (i = j+1; i < A->mt; i++){ for (i = j+1; i < A->mt; i++){
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; tempim = A->get_blkdim( A, i, DIM_m, A->m );
INSERT_TASK_zlaset2( INSERT_TASK_zlaset2(
&options, &options,
ChamUpperLower, tempim, tempjn, alpha, ChamUpperLower, tempim, tempjn, alpha,
...@@ -69,9 +69,9 @@ void chameleon_pzlaset2( cham_uplo_t uplo, ...@@ -69,9 +69,9 @@ void chameleon_pzlaset2( cham_uplo_t uplo,
} }
else if (uplo == ChamUpper) { else if (uplo == ChamUpper) {
for (j = 1; j < A->nt; j++){ for (j = 1; j < A->nt; j++){
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
for (i = 0; i < chameleon_min(j, A->mt); i++){ for (i = 0; i < chameleon_min(j, A->mt); i++){
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; tempim = A->get_blkdim( A, i, DIM_m, A->m );
INSERT_TASK_zlaset2( INSERT_TASK_zlaset2(
&options, &options,
ChamUpperLower, tempim, tempjn, alpha, ChamUpperLower, tempim, tempjn, alpha,
...@@ -79,8 +79,8 @@ void chameleon_pzlaset2( cham_uplo_t uplo, ...@@ -79,8 +79,8 @@ void chameleon_pzlaset2( cham_uplo_t uplo,
} }
} }
for (j = 0; j < minmn; j++){ for (j = 0; j < minmn; j++){
tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb; tempjm = A->get_blkdim( A, j, DIM_m, A->m );
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_zlaset2( INSERT_TASK_zlaset2(
&options, &options,
ChamUpper, tempjm, tempjn, alpha, ChamUpper, tempjm, tempjn, alpha,
...@@ -89,9 +89,9 @@ void chameleon_pzlaset2( cham_uplo_t uplo, ...@@ -89,9 +89,9 @@ void chameleon_pzlaset2( cham_uplo_t uplo,
} }
else { else {
for (i = 0; i < A->mt; i++){ for (i = 0; i < A->mt; i++){
tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb; tempim = A->get_blkdim( A, i, DIM_m, A->m );
for (j = 0; j < A->nt; j++){ for (j = 0; j < A->nt; j++){
tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb; tempjn = A->get_blkdim( A, j, DIM_n, A->n );
INSERT_TASK_zlaset2( INSERT_TASK_zlaset2(
&options, &options,
ChamUpperLower, tempim, tempjn, alpha, ChamUpperLower, tempim, tempjn, alpha,
......
/**
*
* @file pzlaswp.c
*
* @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zlaswp parallel algorithm for row permutation.
*
* @version 1.3.0
* @author Alycia Lisito
* @author Matteo Marcos
* @date 2025-03-24
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
#define A(m,n) A, m, n
#define W(m,n) &(ws->W), m, n
/**
* Permutation of the panel n at step k
*/
static inline void
chameleon_pzlaswp_panel_permute( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int k,
int n,
RUNTIME_option_t *options )
{
int m;
int tempkm, tempnn, tempmm;
int withlacpy;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
/* Extract selected rows into U */
withlacpy = options->withlacpy;
options->withlacpy = 1;
INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
A(k, n), W(A->myrank, n) );
options->withlacpy = withlacpy;
INSERT_TASK_zlaswp_get( options, dir, k*A->mb, tempkm, tempnn, tempkm,
ipiv, k, A(k, n), W(A->myrank, n) );
for ( m = k + 1; m < A->mt; m++ ) {
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
/* Extract selected rows into A(k, n) */
INSERT_TASK_zlaswp_get( options, dir, m*A->mb, tempmm, tempnn, tempkm,
ipiv, k, A(m, n), W(A->myrank, n) );
/* Copy rows from A(k,n) into their final position */
INSERT_TASK_zlaswp_set( options, dir, m*A->mb, tempmm, tempnn, tempkm,
ipiv, k, A(k, n), A(m, n) );
}
if ( ws->allreduce ) {
INSERT_TASK_zperm_allreduce_row( options, dir, A, W(A->myrank, n), ipiv, k, k, n, ws );
}
else {
INSERT_TASK_zperm_reduce_row( options, dir, A, W(A->myrank, n), ipiv, k, k, n, ws );
}
}
/**
* Permutation of the panel n at step k
*/
static inline void
chameleon_pzlaswp_panel_permute_batched( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int k,
int n,
RUNTIME_option_t *options )
{
int m;
int tempkm, tempmm, tempnn;
int withlacpy;
void **clargs = malloc( sizeof(char *) );
*clargs = NULL;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
/* Extract selected rows into U */
withlacpy = options->withlacpy;
options->withlacpy = 1;
INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
A(k, n), W(A->myrank, n) );
options->withlacpy = withlacpy;
INSERT_TASK_zlaswp_get( options, dir, k*A->mb, tempkm, tempnn, tempkm,
ipiv, k, A(k, n), W(A->myrank, n) );
for ( m = k + 1; m < A->mt; m++ ) {
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zlaswp_batched( options, dir, m*A->mb, tempmm, tempnn, tempkm, (void *)ws, ipiv, k,
A(m, n), A(k, n), W(A->myrank, n), clargs );
}
INSERT_TASK_zlaswp_batched_flush( options, dir, ipiv, k, A(k, n), W(A->myrank, n), clargs );
if ( ws->allreduce ) {
INSERT_TASK_zperm_allreduce_row( options, dir, A, W(A->myrank, n), ipiv, k, k, n, ws );
}
else {
INSERT_TASK_zperm_reduce_row( options, dir, A, W(A->myrank, n), ipiv, k, k, n, ws );
}
free( clargs );
}
static inline void
chameleon_pzlaswp_panel( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int k,
int n,
RUNTIME_option_t *options,
RUNTIME_sequence_t *sequence )
{
CHAM_reduce_t *reduce = &(ws->reduce);
int tempkm, tempnn;
#if defined(CHAMELEON_USE_MPI)
chameleon_get_proc_involved_in_panelk_2dbc( A, k, n, reduce );
if ( A->myrank == ipiv->get_rankof( ipiv, k, k ) ) {
INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved );
INSERT_TASK_zperm_allreduce_send_invp_row( options, dir, ipiv, k, A, k, n );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
INSERT_TASK_zperm_allreduce_send_A( options, A, k, n, A->myrank, reduce->np_involved, reduce->proc_involved );
}
if ( !reduce->involved ) {
return;
}
#endif
if ( ws->batch_size_swap == 0 ){
chameleon_pzlaswp_panel_permute( ws, dir, A, ipiv, k, n, options );
}
else {
chameleon_pzlaswp_panel_permute_batched( ws, dir, A, ipiv, k, n, options );
}
if ( A->myrank == chameleon_getrankof_2d( A, k, n ) ) {
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlacpy( options, ChamUpperLower, tempkm, tempnn,
W(A->myrank, n), A(k, n) );
RUNTIME_data_flush( sequence, A(k, n) );
}
(void)reduce;
}
void
chameleon_pzlaswp( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *IPIV,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int n, k;
chamctxt = chameleon_context_self();
if ( sequence->status != CHAMELEON_SUCCESS ) {
return;
}
RUNTIME_options_init( &options, chamctxt, sequence, request );
if ( dir == ChamDirForward ) {
for ( k = 0; k < IPIV->mt; k++ ) {
for ( n = 0; n < A->nt; n++ ) {
options.priority = A->nt-n;
chameleon_pzlaswp_panel( ws, dir, A, IPIV, k, n, &options, sequence );
}
RUNTIME_perm_flushk( sequence, IPIV, k );
}
}
else {
for ( k = IPIV->mt - 1; k > -1; k-- ) {
for ( n = 0; n < A->nt; n++ ) {
options.priority = A->nt-n;
chameleon_pzlaswp_panel( ws, dir, A, IPIV, k, n, &options, sequence );
}
RUNTIME_perm_flushk( sequence, IPIV, k );
}
}
RUNTIME_options_finalize( &options, chamctxt );
}
/**
*
* @file pzlaswpc.c
*
* @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zlaswp parallel algorithm for column permutation.
*
* @version 1.3.0
* @author Alycia Lisito
* @author Matteo Marcos
* @date 2025-03-24
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
#define A(m,n) A, m, n
#define W(m,n) &(ws->W), m, n
/**
* Permutation of the panel n at step k
*/
static inline void
chameleon_pzlaswpc_panel_permute( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int m,
int k,
RUNTIME_option_t *options )
{
int n;
int tempmm, tempnn, tempkn;
int withlacpy;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
/* Extract selected rows into U */
withlacpy = options->withlacpy;
options->withlacpy = 1;
INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkn,
A(m, k), W(m, A->myrank) );
options->withlacpy = withlacpy;
INSERT_TASK_zlaswpc_get( options, dir, k*A->nb, tempmm, tempkn, tempkn,
ipiv, k, A(m, k), W(m, A->myrank) );
for ( n = k + 1; n < A->nt; n++ ) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
/* Extract selected rows into A(k, n) */
INSERT_TASK_zlaswpc_get( options, dir, n*A->nb, tempmm, tempnn, tempkn,
ipiv, k, A(m, n), W(m, A->myrank) );
/* Copy rows from A(k,n) into their final position */
INSERT_TASK_zlaswpc_set( options, dir, n*A->nb, tempmm, tempnn, tempkn,
ipiv, k, A(m, k), A(m, n) );
}
if ( ws->allreduce ) {
INSERT_TASK_zperm_allreduce_col( options, dir, A, W(m, A->myrank), ipiv, k, m, k, ws );
}
else {
INSERT_TASK_zperm_reduce_col( options, dir, A, W(m, A->myrank), ipiv, k, m, k, ws );
}
}
/**
* Permutation of the panel n at step k
*/
static inline void
chameleon_pzlaswpc_panel_permute_batched( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int m,
int k,
RUNTIME_option_t *options )
{
int n;
int tempmm, tempnn, tempkn;
int withlacpy;
void **clargs = malloc( sizeof(char *) );
*clargs = NULL;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
/* Extract selected rows into U */
withlacpy = options->withlacpy;
options->withlacpy = 1;
INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkn,
A(m, k), W(m, A->myrank) );
options->withlacpy = withlacpy;
INSERT_TASK_zlaswpc_get( options, dir, k*A->nb, tempmm, tempkn, tempkn,
ipiv, k, A(m, k), W(m, A->myrank) );
for ( n = k + 1; n < A->nt; n++ ) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlaswpc_batched( options, dir, n*A->nb, tempmm, tempnn, tempkn, (void *)ws, ipiv, k,
A(m, n), A(m, k), W(m, A->myrank), clargs );
}
INSERT_TASK_zlaswpc_batched_flush( options, dir, ipiv, k, A(m, k), W(m, A->myrank), clargs );
if ( ws->allreduce ) {
INSERT_TASK_zperm_allreduce_col( options, dir, A, W(m, A->myrank), ipiv, k, m, k, ws );
}
else {
INSERT_TASK_zperm_reduce_col( options, dir, A, W(m, A->myrank), ipiv, k, m, k, ws );
}
free( clargs );
}
static inline void
chameleon_pzlaswpc_panel( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *ipiv,
int m,
int k,
RUNTIME_option_t *options,
RUNTIME_sequence_t *sequence )
{
CHAM_reduce_t *reduce = &(ws->reduce);
int tempmm, tempkn;
#if defined(CHAMELEON_USE_MPI)
chameleon_get_proc_involved_in_rowpanelk_2dbc( A, m, k, reduce );
if ( A->myrank == ipiv->get_rankof( ipiv, k, k ) ) {
INSERT_TASK_zperm_allreduce_send_perm( options, dir, ipiv, k, A->myrank, reduce->np_involved, reduce->proc_involved );
INSERT_TASK_zperm_allreduce_send_invp_col( options, dir, ipiv, k, A, m, k );
}
if ( A->myrank == chameleon_getrankof_2d( A, m, k ) ) {
INSERT_TASK_zperm_allreduce_send_A( options, A, m, k, A->myrank, reduce->np_involved, reduce->proc_involved );
}
if ( !reduce->involved ) {
return;
}
#endif
if ( ws->batch_size_swap == 0 ){
chameleon_pzlaswpc_panel_permute( ws, dir, A, ipiv, m, k, options );
}
else {
chameleon_pzlaswpc_panel_permute_batched( ws, dir, A, ipiv, m, k, options );
}
if ( A->myrank == chameleon_getrankof_2d( A, m, k ) ) {
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zlacpy( options, ChamUpperLower, tempmm, tempkn,
W(m, A->myrank), A(m, k) );
RUNTIME_data_flush( sequence, A(m, k) );
}
(void)reduce;
}
void
chameleon_pzlaswpc( struct chameleon_pzlaswp_s *ws,
cham_dir_t dir,
CHAM_desc_t *A,
CHAM_ipiv_t *IPIV,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int m, k;
chamctxt = chameleon_context_self();
if ( sequence->status != CHAMELEON_SUCCESS ) {
return;
}
RUNTIME_options_init( &options, chamctxt, sequence, request );
if ( dir == ChamDirForward ) {
for ( k = 0; k < IPIV->mt; k++ ) {
for ( m = 0; m < A->mt; m++ ) {
options.priority = A->mt-m;
chameleon_pzlaswpc_panel( ws, dir, A, IPIV, m, k, &options, sequence );
}
RUNTIME_perm_flushk( sequence, IPIV, k );
}
}
else {
for ( k = IPIV->mt - 1; k > -1; k-- ) {
for ( m = 0; m < A->mt; m++ ) {
options.priority = A->mt-m;
chameleon_pzlaswpc_panel( ws, dir, A, IPIV, m, k, &options, sequence );
}
RUNTIME_perm_flushk( sequence, IPIV, k );
}
}
RUNTIME_options_finalize( &options, chamctxt );
}
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* *
* @file pzlatms.c * @file pzlatms.c
* *
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved. * @copyright 2016-2020 KAUST. All rights reserved.
* *
...@@ -13,7 +13,8 @@ ...@@ -13,7 +13,8 @@
* @version 1.3.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Lionel Eyraud-Dubois * @author Lionel Eyraud-Dubois
* @date 2024-03-14 * @author Pierre Esterie
* @date 2025-01-29
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -34,8 +35,8 @@ zlaset_diag_cpu( void *op_args, ...@@ -34,8 +35,8 @@ zlaset_diag_cpu( void *op_args,
const double *D = (const double *)op_args; const double *D = (const double *)op_args;
CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA ); CHAMELEON_Complex64_t *A = CHAM_tile_get_ptr( tileA );
int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; int tempmm = descA->get_blkdim( descA, m, DIM_m, descA->m );
int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; int tempnn = descA->get_blkdim( descA, n, DIM_n, descA->n );
int minmn = chameleon_min( tempmm, tempnn ); int minmn = chameleon_min( tempmm, tempnn );
int lda = tileA->ld; int lda = tileA->ld;
int i; int i;
...@@ -60,10 +61,11 @@ zlaset_diag_cpu( void *op_args, ...@@ -60,10 +61,11 @@ zlaset_diag_cpu( void *op_args,
} }
static cham_map_operator_t zlaset_diag_map = { static cham_map_operator_t zlaset_diag_map = {
.name = "zlaset_diag", .name = "zlaset_diag",
.cpufunc = zlaset_diag_cpu, .cpufunc = zlaset_diag_cpu,
.cudafunc = NULL, .cudafunc = NULL,
.hipfunc = NULL, .hipfunc = NULL,
.synchronous = 0,
}; };
/** /**
...@@ -121,7 +123,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym ...@@ -121,7 +123,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
#endif #endif
if ( D == NULL ) { if ( D == NULL ) {
D = malloc( minmn * sizeof(double) ); D = malloc( sizeof(double) * minmn );
alloc_d = 1; alloc_d = 1;
} }
rc = CORE_dlatm1( mode, cond, irsign, idist, seed, D, minmn ); rc = CORE_dlatm1( mode, cond, irsign, idist, seed, D, minmn );
...@@ -198,8 +200,8 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym ...@@ -198,8 +200,8 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
/* Apply a QR factorization */ /* Apply a QR factorization */
mat.mt = descU.mt; mat.mt = descU.mt;
mat.nt = descU.nt; mat.nt = descU.nt;
mat.nodes = descU.p * descU.q; mat.nodes = chameleon_desc_datadist_get_iparam(&descU, 0) * chameleon_desc_datadist_get_iparam(&descU, 1);
mat.p = descU.p; mat.p = chameleon_desc_datadist_get_iparam(&descU, 0);
libhqr_init_hqr( &qrtree, LIBHQR_QR, &mat, libhqr_init_hqr( &qrtree, LIBHQR_QR, &mat,
-1, /*low level tree */ -1, /*low level tree */
...@@ -217,12 +219,16 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym ...@@ -217,12 +219,16 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE, chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, descU.nb, ib * descU.nb, ChamComplexDouble, ib, descU.nb, ib * descU.nb,
ib * descU.mt, descU.nb * descU.nt, 0, 0, ib * descU.mt, descU.nb * descU.nt, 0, 0,
ib * descU.mt, descU.nb * descU.nt, descU.p, descU.q, ib * descU.mt, descU.nb * descU.nt,
chameleon_desc_datadist_get_iparam(&descU, 0),
chameleon_desc_datadist_get_iparam(&descU, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE, chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, descU.nb, ib * descU.nb, ChamComplexDouble, ib, descU.nb, ib * descU.nb,
ib * descU.mt, descU.nb * descU.nt, 0, 0, ib * descU.mt, descU.nb * descU.nt, 0, 0,
ib * descU.mt, descU.nb * descU.nt, descU.p, descU.q, ib * descU.mt, descU.nb * descU.nt,
chameleon_desc_datadist_get_iparam(&descU, 0),
chameleon_desc_datadist_get_iparam(&descU, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
/* U <= qr(U) */ /* U <= qr(U) */
...@@ -269,8 +275,8 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym ...@@ -269,8 +275,8 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
/* Apply a QR factorization */ /* Apply a QR factorization */
mat.mt = descV.mt; mat.mt = descV.mt;
mat.nt = descV.nt; mat.nt = descV.nt;
mat.nodes = descV.p * descV.q; mat.nodes = chameleon_desc_datadist_get_iparam(&descV, 0) * chameleon_desc_datadist_get_iparam(&descV, 1);
mat.p = descV.q; mat.p = chameleon_desc_datadist_get_iparam(&descV, 1);
libhqr_init_hqr( &qrtree, LIBHQR_LQ, &mat, libhqr_init_hqr( &qrtree, LIBHQR_LQ, &mat,
-1, /*low level tree */ -1, /*low level tree */
...@@ -288,12 +294,16 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym ...@@ -288,12 +294,16 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE, chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, descV.nb, ib * descV.nb, ChamComplexDouble, ib, descV.nb, ib * descV.nb,
ib * descV.mt, descV.nb * descV.nt, 0, 0, ib * descV.mt, descV.nb * descV.nt, 0, 0,
ib * descV.mt, descV.nb * descV.nt, descV.p, descV.q, ib * descV.mt, descV.nb * descV.nt,
chameleon_desc_datadist_get_iparam(&descV, 0),
chameleon_desc_datadist_get_iparam(&descV, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE, chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, descV.nb, ib * descV.nb, ChamComplexDouble, ib, descV.nb, ib * descV.nb,
ib * descV.mt, descV.nb * descV.nt, 0, 0, ib * descV.mt, descV.nb * descV.nt, 0, 0,
ib * descV.mt, descV.nb * descV.nt, descV.p, descV.q, ib * descV.mt, descV.nb * descV.nt,
chameleon_desc_datadist_get_iparam(&descV, 0),
chameleon_desc_datadist_get_iparam(&descV, 1),
NULL, NULL, NULL, NULL ); NULL, NULL, NULL, NULL );
/* V <= qr(V) */ /* V <= qr(V) */
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zlauum parallel algorithm * @brief Chameleon zlauum parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Julien Langou * @author Julien Langou
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -49,7 +49,7 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -49,7 +49,7 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A,
*/ */
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
for(n = 0; n < k; n++) { for(n = 0; n < k; n++) {
INSERT_TASK_zherk( INSERT_TASK_zherk(
&options, &options,
...@@ -89,7 +89,7 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -89,7 +89,7 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A,
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (m = 0; m < k; m++) { for (m = 0; m < k; m++) {
INSERT_TASK_zherk( INSERT_TASK_zherk(
......
...@@ -4,20 +4,20 @@ ...@@ -4,20 +4,20 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zplghe parallel algorithm * @brief Chameleon zplghe parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Mathis Rade * @author Mathis Rade
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> c * @precisions normal z -> c
* *
*/ */
...@@ -47,10 +47,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -47,10 +47,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
switch ( uplo ) { switch ( uplo ) {
case ChamLower: case ChamLower:
for (n = 0; n < minmn; n++) { for (n = 0; n < minmn; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
for (m = n; m < A->mt; m++) { for (m = n; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = m + n; options.priority = m + n;
INSERT_TASK_zplghe( INSERT_TASK_zplghe(
...@@ -63,10 +63,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -63,10 +63,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
case ChamUpper: case ChamUpper:
for (m = 0; m < minmn; m++) { for (m = 0; m < minmn; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = m; n < A->nt; n++) { for (n = m; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = m + n; options.priority = m + n;
INSERT_TASK_zplghe( INSERT_TASK_zplghe(
...@@ -80,10 +80,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -80,10 +80,10 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
case ChamUpperLower: case ChamUpperLower:
default: default:
for (m = 0; m < A->mt; m++) { for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = 0; n < A->nt; n++) { for (n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = m + n; options.priority = m + n;
INSERT_TASK_zplghe( INSERT_TASK_zplghe(
......
...@@ -4,20 +4,20 @@ ...@@ -4,20 +4,20 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zplgsy parallel algorithm * @brief Chameleon zplgsy parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Mathis Rade * @author Mathis Rade
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
*/ */
...@@ -47,10 +47,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ ...@@ -47,10 +47,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
switch ( uplo ) { switch ( uplo ) {
case ChamLower: case ChamLower:
for (n = 0; n < minmn; n++) { for (n = 0; n < minmn; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
for (m = n; m < A->mt; m++) { for (m = n; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = m + n; options.priority = m + n;
INSERT_TASK_zplgsy( INSERT_TASK_zplgsy(
...@@ -63,10 +63,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ ...@@ -63,10 +63,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
case ChamUpper: case ChamUpper:
for (m = 0; m < minmn; m++) { for (m = 0; m < minmn; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = m; n < A->nt; n++) { for (n = m; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = m + n; options.priority = m + n;
INSERT_TASK_zplgsy( INSERT_TASK_zplgsy(
...@@ -80,10 +80,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ ...@@ -80,10 +80,10 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
case ChamUpperLower: case ChamUpperLower:
default: default:
for (m = 0; m < A->mt; m++) { for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = 0; n < A->nt; n++) { for (n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = m + n; options.priority = m + n;
INSERT_TASK_zplgsy( INSERT_TASK_zplgsy(
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* *
* @file pzplrnk.c * @file pzplrnk.c
* *
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
...@@ -13,95 +13,219 @@ ...@@ -13,95 +13,219 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Florent Pruvost * @author Florent Pruvost
* @author Lionel Eyraud-Dubois * @author Lionel Eyraud-Dubois
* @date 2023-07-05 * @date 2025-01-29
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
#include "control/common.h" #include "control/common.h"
#define C(m, n) C, m, n #define WA(m, n) WA, m, n
#define WA(m, n) &WA, m, n #define WB(m, n) WB, m, n
#define WB(m, n) &WB, m, n #define C(m, n) C, m, n
/** /**
* chameleon_pzplrnk - Generate a random rank-k matrix by tiles. * chameleon_pzplrnk - Generate a random rank-k matrix by tiles.
*/ */
void chameleon_pzplrnk( int K, CHAM_desc_t *C, static inline void
unsigned long long int seedA, chameleon_pzplrnk_generic( CHAM_context_t *chamctxt,
unsigned long long int seedB, int K,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) CHAM_desc_t *WA,
CHAM_desc_t *WB,
CHAM_desc_t *C,
unsigned long long int seedA,
unsigned long long int seedB,
RUNTIME_option_t *options )
{ {
CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = options->sequence;
RUNTIME_option_t options;
int m, n, k, KT;
int tempmm, tempnn, tempkk;
int myp, myq;
CHAMELEON_Complex64_t zbeta; CHAMELEON_Complex64_t zbeta;
CHAM_desc_t WA, WB; int m, n, k, KT;
int tempmm, tempnn, tempkk;
int myrank = RUNTIME_comm_rank( chamctxt );
int initA;
int *initB = malloc( sizeof(int) * C->nt );
chamctxt = chameleon_context_self(); KT = (K + C->mb - 1) / C->mb;
if (sequence->status != CHAMELEON_SUCCESS) {
return; for (k = 0; k < KT; k++) {
tempkk = k == KT-1 ? K - k * WA->nb : WA->nb;
zbeta = k == 0 ? 0. : 1.;
memset( initB, 0, sizeof(int) * C->nt );
for (m = 0; m < C->mt; m++) {
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
initA = 0;
for (n = 0; n < C->nt; n++) {
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( C->get_rankof( C(m, n) ) == myrank ) {
if ( !initA ) {
INSERT_TASK_zplrnt(
options,
tempmm, tempkk, WA(m, myrank),
WA->m, m * WA->mb, k * WA->nb, seedA );
initA = 1;
}
if ( !initB[n] ) {
INSERT_TASK_zplrnt(
options,
tempkk, tempnn, WB(myrank, n),
WB->m, k * WB->mb, n * WB->nb, seedB );
initB[n] = 1;
}
INSERT_TASK_zgemm(
options,
ChamNoTrans, ChamNoTrans,
tempmm, tempnn, tempkk, C->mb,
1., WA(m, myrank),
WB(myrank, n),
zbeta, C(m, n));
}
}
if ( initA ) {
RUNTIME_data_flush( sequence, WA(m, myrank) );
}
}
for (n = 0; n < C->nt; n++) {
if ( initB[n] ) {
RUNTIME_data_flush( sequence, WB(myrank, n) );
}
}
} }
RUNTIME_options_init( &options, chamctxt, sequence, request );
chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE, free( initB );
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), }
C->mt * C->mb, C->nb * C->q, 0, 0,
C->mt * C->mb, C->nb * C->q, C->p, C->q,
NULL, NULL, NULL, NULL );
chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mb * C->p, C->nt * C->nb, 0, 0,
C->mb * C->p, C->nt * C->nb, C->p, C->q,
NULL, NULL, NULL, NULL );
KT = (K + C->mb - 1) / C->mb; /**
myp = C->myrank / C->q; * chameleon_pzplrnk - Generate a random rank-k matrix by tiles on a 2dbc grid.
myq = C->myrank % C->q; */
static inline void
chameleon_pzplrnk_2dbc( CHAM_context_t *chamctxt,
int K,
CHAM_desc_t *WA,
CHAM_desc_t *WB,
CHAM_desc_t *C,
unsigned long long int seedA,
unsigned long long int seedB,
RUNTIME_option_t *options )
{
RUNTIME_sequence_t *sequence = options->sequence;
CHAMELEON_Complex64_t zbeta;
int m, n, k, KT;
int tempmm, tempnn, tempkk;
int p, q, myp, myq;
KT = (K + C->mb - 1) / C->mb;
p = chameleon_desc_datadist_get_iparam( C, 0 );
q = chameleon_desc_datadist_get_iparam( C, 1 );
myp = C->myrank / q;
myq = C->myrank % q;
for (k = 0; k < KT; k++) { for (k = 0; k < KT; k++) {
tempkk = k == KT-1 ? K - k * WA.nb : WA.nb; tempkk = k == KT-1 ? K - k * WA->nb : WA->nb;
zbeta = k == 0 ? 0. : 1.; zbeta = k == 0 ? 0. : 1.;
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n += q) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zplrnt( INSERT_TASK_zplrnt(
&options, options,
tempkk, tempnn, WB(myp, n), tempkk, tempnn, WB(myp, n),
WB.m, k * WB.mb, n * WB.nb, seedB ); WB->m, k * WB->mb, n * WB->nb, seedB );
} }
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m += p) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zplrnt( INSERT_TASK_zplrnt(
&options, options,
tempmm, tempkk, WA(m, myq), tempmm, tempkk, WA(m, myq),
WA.m, m * WA.mb, k * WA.nb, seedA ); WA->m, m * WA->mb, k * WA->nb, seedA );
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=q) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, options,
ChamNoTrans, ChamNoTrans, ChamNoTrans, ChamNoTrans,
tempmm, tempnn, tempkk, C->mb, tempmm, tempnn, tempkk, C->mb,
1., WA(m, myq), 1., WA(m, myq),
WB(myp, n), WB(myp, n),
zbeta, C(m, n)); zbeta, C(m, n));
} }
RUNTIME_data_flush( sequence, WA(m, 0) ); RUNTIME_data_flush( sequence, WA(m, myq) );
} }
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=q) {
RUNTIME_data_flush( sequence, WB(0, n) ); RUNTIME_data_flush( sequence, WB(myp, n) );
} }
} }
}
/**
* Rank-k matrix generator.
*/
void
chameleon_pzplrnk( int K,
CHAM_desc_t *C,
unsigned long long int seedA,
unsigned long long int seedB,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
CHAM_desc_t WA, WB;
int p, q;
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) {
return;
}
RUNTIME_options_init( &options, chamctxt, sequence, request );
p = chameleon_desc_datadist_get_iparam( C, 0 );
q = chameleon_desc_datadist_get_iparam( C, 1 );
if ( ( chamctxt->generic_enabled != CHAMELEON_TRUE ) &&
( C->get_rankof_init == chameleon_getrankof_2d ) &&
( (chameleon_desc_datadist_get_iparam(C, 0) != 1) ||
(chameleon_desc_datadist_get_iparam(C, 1) != 1) ) )
{
chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->m, C->nb * q, 0, 0,
C->m, C->nb * q, p, q,
NULL, NULL, NULL, NULL );
chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mb * p, C->n, 0, 0,
C->mb * p, C->n, p, q,
NULL, NULL, NULL, NULL );
chameleon_pzplrnk_2dbc( chamctxt, K, &WA, &WB, C, seedA, seedB, &options );
}
else {
int np = p * q;
chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->m, C->nb * np, 0, 0,
C->m, C->nb * np, 1, np,
NULL, NULL, NULL, NULL );
chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mb * np, C->n, 0, 0,
C->mb * np, C->n, np, 1,
NULL, NULL, NULL, NULL );
chameleon_pzplrnk_generic( chamctxt, K, &WA, &WB, C, seedA, seedB, &options );
}
RUNTIME_desc_flush( &WA, sequence ); RUNTIME_desc_flush( &WA, sequence );
RUNTIME_desc_flush( &WB, sequence ); RUNTIME_desc_flush( &WB, sequence );
RUNTIME_desc_flush( C, sequence ); RUNTIME_desc_flush( C, sequence );
chameleon_sequence_wait( chamctxt, sequence ); chameleon_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy( &WA ); chameleon_desc_destroy( &WA );
chameleon_desc_destroy( &WB ); chameleon_desc_destroy( &WB );
......