Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Showing with 931 additions and 354 deletions
...@@ -2,17 +2,18 @@ ...@@ -2,17 +2,18 @@
* *
* @file pzgram.c * @file pzgram.c
* *
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zgram parallel algorithm * @brief Chameleon zgram parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c z * @precisions normal z -> s d c z
* *
*/ */
...@@ -33,8 +34,8 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -33,8 +34,8 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
int NT = A->nt; int NT = A->nt;
int M = A->m; int M = A->m;
int N = A->n; int N = A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/** /**
* 1) compute (scl,ssq) over columns in each tile * 1) compute (scl,ssq) over columns in each tile
...@@ -42,10 +43,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -42,10 +43,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
for(n = 0; n < NT; n++) { for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0; int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
for(m = mmin; m < mmax; m++) { for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
if ( n == m ) { if ( n == m ) {
INSERT_TASK_dsyssq( INSERT_TASK_dsyssq(
...@@ -66,7 +67,7 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -66,7 +67,7 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
} }
for(n = 0; n < NT; n++) { for(n = 0; n < NT; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
/** /**
* 2) reduce columns (scl,ssq) tiles per processus (between lines) * 2) reduce columns (scl,ssq) tiles per processus (between lines)
...@@ -116,10 +117,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo, ...@@ -116,10 +117,10 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
for(n = 0; n < NT; n++) { for(n = 0; n < NT; n++) {
int mmin = ( uplo == ChamLower ) ? n : 0; int mmin = ( uplo == ChamLower ) ? n : 0;
int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT;
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
for(m = mmin; m < mmax; m++) { for(m = mmin; m < mmax; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
INSERT_TASK_zgram( INSERT_TASK_zgram(
options, options,
...@@ -152,9 +153,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des ...@@ -152,9 +153,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des
/* Initialize Wcol */ /* Initialize Wcol */
for(m = 0; m < Wcol->mt; m++) { for(m = 0; m < Wcol->mt; m++) {
tempmm = m == Wcol->mt-1 ? Wcol->m-m*Wcol->mb : Wcol->mb; tempmm = Wcol->get_blkdim( Wcol, m, DIM_m, Wcol->m );
for(n = 0; n < Wcol->nt; n++) { for(n = 0; n < Wcol->nt; n++) {
tempnn = n == Wcol->nt-1 ? Wcol->n-n*Wcol->nb : Wcol->nb; tempnn = Wcol->get_blkdim( Wcol, n, DIM_n, Wcol->n );
INSERT_TASK_dlaset( INSERT_TASK_dlaset(
&options, &options,
ChamUpperLower, tempmm, tempnn, ChamUpperLower, tempmm, tempnn,
...@@ -164,9 +165,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des ...@@ -164,9 +165,9 @@ void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_des
} }
/* Initialize Welt */ /* Initialize Welt */
for(m = 0; m < Welt->mt; m++) { for(m = 0; m < Welt->mt; m++) {
tempmm = m == Welt->mt-1 ? Welt->m-m*Welt->mb : Welt->mb; tempmm = Welt->get_blkdim( Welt, m, DIM_m, Welt->m );
for(n = 0; n < Welt->nt; n++) { for(n = 0; n < Welt->nt; n++) {
tempnn = n == Welt->nt-1 ? Welt->n-n*Welt->nb : Welt->nb; tempnn = Welt->get_blkdim( Welt, n, DIM_n, Welt->n );
INSERT_TASK_dlaset( INSERT_TASK_dlaset(
&options, &options,
ChamUpperLower, tempmm, tempnn, ChamUpperLower, tempmm, tempnn,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zhemm parallel algorithm * @brief Chameleon zhemm parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> c * @precisions normal z -> c
* *
*/ */
...@@ -109,9 +110,9 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -109,9 +110,9 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
} }
for(n = 0; n < C->nt; n++) { for(n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for(m = 0; m < C->mt; m++) { for(m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
/* Scale C */ /* Scale C */
options->forcesub = 0; options->forcesub = 0;
...@@ -125,7 +126,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -125,7 +126,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
if (side == ChamLeft) { if (side == ChamLeft) {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
if (k < m) { if (k < m) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -161,7 +162,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -161,7 +162,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
*/ */
else { else {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
if (k < m) { if (k < m) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -199,7 +200,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -199,7 +200,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
else { else {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -235,7 +236,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -235,7 +236,7 @@ chameleon_pzhemm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
*/ */
else { else {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -292,7 +293,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -292,7 +293,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
{ {
RUNTIME_sequence_t *sequence = options->sequence; RUNTIME_sequence_t *sequence = options->sequence;
cham_trans_t transA; cham_trans_t transA;
int m, n, k, p, q, KT, K, lp, lq; int m, n, k, p, q, KT, lp, lq;
int tempmm, tempnn, tempkk; int tempmm, tempnn, tempkk;
int lookahead, myp, myq; int lookahead, myp, myq;
...@@ -301,14 +302,13 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -301,14 +302,13 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
lookahead = chamctxt->lookahead; lookahead = chamctxt->lookahead;
KT = A->nt; KT = A->nt;
K = A->n; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
myp = C->myrank / C->q; myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);
myq = C->myrank % C->q;
for (k = 0; k < KT; k++ ) { for (k = 0; k < KT; k++ ) {
lp = (k % lookahead) * C->p; lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
lq = (k % lookahead) * C->q; lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
tempkk = k == KT - 1 ? K - k * A->nb : A->nb; tempkk = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
/* Transfert ownership of the k column of A or B */ /* Transfert ownership of the k column of A or B */
...@@ -316,7 +316,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -316,7 +316,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
int Am, Ak; int Am, Ak;
int tempam, tempak; int tempam, tempak;
tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if ( (( uplo == ChamUpper ) && ( m > k )) || if ( (( uplo == ChamUpper ) && ( m > k )) ||
(( uplo == ChamLower ) && ( m < k )) ) (( uplo == ChamLower ) && ( m < k )) )
...@@ -339,48 +339,48 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -339,48 +339,48 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
options, options,
ChamUpperLower, tempam, tempak, ChamUpperLower, tempam, tempak,
A( Am, Ak ), A( Am, Ak ),
WA( m, (k % C->q) + lq ) ); WA( m, (Ak % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, A( Am, Ak ) ); RUNTIME_data_flush( sequence, A( Am, Ak ) );
for ( q=1; q < C->q; q++ ) { for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempam, tempak, ChamUpperLower, tempam, tempak,
WA( m, ((k+q-1) % C->q) + lq ), WA( m, ((Ak+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((k+q) % C->q) + lq ) ); WA( m, ((Ak+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
} }
} }
/* Transfert ownership of the k row of B, or A */ /* Transfert ownership of the k row of B, or A */
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempkk, tempnn, ChamUpperLower, tempkk, tempnn,
B( k, n ), B( k, n ),
WB( (k % C->p) + lp, n ) ); WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, B( k, n ) ); RUNTIME_data_flush( sequence, B( k, n ) );
for ( p=1; p < C->p; p++ ) { for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempkk, tempnn, ChamUpperLower, tempkk, tempnn,
WB( ((k+p-1) % C->p) + lp, n ), WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) ); WB( ((k+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
} }
} }
/* Perform the update of this iteration */ /* Perform the update of this iteration */
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if ( k == m ) { if ( k == m ) {
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zhemm( INSERT_TASK_zhemm(
options, ChamLeft, uplo, options, ChamLeft, uplo,
...@@ -400,8 +400,8 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -400,8 +400,8 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
transA = ChamNoTrans; transA = ChamNoTrans;
} }
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
options, transA, ChamNoTrans, options, transA, ChamNoTrans,
...@@ -428,7 +428,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -428,7 +428,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
{ {
RUNTIME_sequence_t *sequence = options->sequence; RUNTIME_sequence_t *sequence = options->sequence;
cham_trans_t transA; cham_trans_t transA;
int m, n, k, p, q, KT, K, lp, lq; int m, n, k, p, q, KT, lp, lq;
int tempmm, tempnn, tempkk; int tempmm, tempnn, tempkk;
int lookahead, myp, myq; int lookahead, myp, myq;
...@@ -437,35 +437,34 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -437,35 +437,34 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
lookahead = chamctxt->lookahead; lookahead = chamctxt->lookahead;
KT = A->mt; KT = A->mt;
K = A->m; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
myp = C->myrank / C->q; myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);
myq = C->myrank % C->q;
for (k = 0; k < KT; k++ ) { for (k = 0; k < KT; k++ ) {
lp = (k % lookahead) * C->p; lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
lq = (k % lookahead) * C->q; lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
tempkk = k == KT - 1 ? K - k * A->nb : A->nb; tempkk = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
/* Transfert ownership of the k column of A or B */ /* Transfert ownership of the k column of A or B */
for (m = 0; m < C->mt; m++ ) { for (m = 0; m < C->mt; m++ ) {
tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempmm, tempkk, ChamUpperLower, tempmm, tempkk,
B( m, k ), B( m, k ),
WA( m, (k % C->q) + lq ) ); WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, B( m, k ) ); RUNTIME_data_flush( sequence, B( m, k ) );
for ( q=1; q < C->q; q++ ) { for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempmm, tempkk, ChamUpperLower, tempmm, tempkk,
WA( m, ((k+q-1) % C->q) + lq ), WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((k+q) % C->q) + lq ) ); WA( m, ((k+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
} }
} }
...@@ -474,7 +473,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -474,7 +473,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
int Ak, An; int Ak, An;
int tempak, tempan; int tempak, tempan;
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( (( uplo == ChamUpper ) && ( n < k )) || if ( (( uplo == ChamUpper ) && ( n < k )) ||
(( uplo == ChamLower ) && ( n > k )) ) (( uplo == ChamLower ) && ( n > k )) )
...@@ -496,26 +495,26 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -496,26 +495,26 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
options, options,
ChamUpperLower, tempak, tempan, ChamUpperLower, tempak, tempan,
A( Ak, An ), A( Ak, An ),
WB( (k % C->p) + lp, n ) ); WB( (Ak % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, A( Ak, An ) ); RUNTIME_data_flush( sequence, A( Ak, An ) );
for ( p=1; p < C->p; p++ ) { for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempak, tempan, ChamUpperLower, tempak, tempan,
WB( ((k+p-1) % C->p) + lp, n ), WB( ((Ak+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) ); WB( ((Ak+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
} }
} }
/* Perform the update of this iteration */ /* Perform the update of this iteration */
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( k == n ) { if ( k == n ) {
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
/* A has been stored in WA or WB for the summa ring */ /* A has been stored in WA or WB for the summa ring */
INSERT_TASK_zhemm( INSERT_TASK_zhemm(
...@@ -536,8 +535,8 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -536,8 +535,8 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
transA = ChamNoTrans; transA = ChamNoTrans;
} }
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
options, ChamNoTrans, transA, options, ChamNoTrans, transA,
...@@ -594,16 +593,16 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -594,16 +593,16 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0; CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
for(m = 0; m < C->mt; m++) { for(m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for(n = 0; n < C->nt; n++) { for(n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
/* /*
* ChamLeft / ChamLower * ChamLeft / ChamLower
*/ */
if (side == ChamLeft) { if (side == ChamLeft) {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < m) { if (k < m) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -641,7 +640,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -641,7 +640,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
*/ */
else { else {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < m) { if (k < m) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -681,7 +680,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -681,7 +680,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
else { else {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -719,7 +718,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -719,7 +718,7 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
*/ */
else { else {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
......
...@@ -4,21 +4,21 @@ ...@@ -4,21 +4,21 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zher2k parallel algorithm * @brief Chameleon zher2k parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> c * @precisions normal z -> c
* *
*/ */
...@@ -52,7 +52,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -52,7 +52,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if (uplo == ChamLower) { if (uplo == ChamLower) {
mmin = n+1; mmin = n+1;
...@@ -68,7 +68,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -68,7 +68,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
*/ */
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
dbeta = k == 0 ? beta : 1.0; dbeta = k == 0 ? beta : 1.0;
INSERT_TASK_zher2k( INSERT_TASK_zher2k(
&options, &options,
...@@ -79,9 +79,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -79,9 +79,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
dbeta, C(n, n)); /* ldc * N */ dbeta, C(n, n)); /* ldc * N */
} }
for (m = mmin; m < mmax; m++) { for (m = mmin; m < mmax; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -106,7 +106,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -106,7 +106,7 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
dbeta = k == 0 ? beta : 1.0; dbeta = k == 0 ? beta : 1.0;
INSERT_TASK_zher2k( INSERT_TASK_zher2k(
&options, &options,
...@@ -117,9 +117,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -117,9 +117,9 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
dbeta, C(n, n)); /* ldc * N */ dbeta, C(n, n)); /* ldc * N */
} }
for (m = mmin; m < mmax; m++) { for (m = mmin; m < mmax; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
* @version 1.3.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Ana Hourcau * @author Ana Hourcau
* @date 2024-07-17 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> z d * @precisions normal z -> z d
* *
*/ */
...@@ -28,8 +29,11 @@ ...@@ -28,8 +29,11 @@
#define W(desc, m, n) (desc), (m), (n) #define W(desc, m, n) (desc), (m), (n)
static inline void static inline void
chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, chameleon_pzhered_frb( cham_trans_t trans,
CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, cham_uplo_t uplo,
CHAM_desc_t *A,
CHAM_desc_t *Wnorm,
CHAM_desc_t *Welt,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
double alpha = 1.0; double alpha = 1.0;
...@@ -40,8 +44,8 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -40,8 +44,8 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
int NT = A->nt; int NT = A->nt;
int M = A->m; int M = A->m;
int N = A->n; int N = A->n;
int P = Welt->p; int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = Welt->q; int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/* Initialize workspaces for tile norms */ /* Initialize workspaces for tile norms */
for (m = 0; m < Wnorm->mt; m++) for (m = 0; m < Wnorm->mt; m++)
...@@ -78,14 +82,13 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -78,14 +82,13 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
int nmin = (uplo == ChamUpper) ? m : 0; int nmin = (uplo == ChamUpper) ? m : 0;
int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, NT) : NT; int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, NT) : NT;
int tempmm = (m == (MT - 1)) ? M - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, M );
for (n = nmin; n < nmax; n++) for (n = nmin; n < nmax; n++)
{ {
int tempnn = (n == (NT - 1)) ? N - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, N );
if (n == m) if ( n == m ) {
{
if ( trans == ChamConjTrans ) { if ( trans == ChamConjTrans ) {
INSERT_TASK_zhessq( INSERT_TASK_zhessq(
options, ChamEltwise, uplo, tempmm, options, ChamEltwise, uplo, tempmm,
...@@ -97,8 +100,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -97,8 +100,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
A(m, n), W( Wnorm, m, n) ); A(m, n), W( Wnorm, m, n) );
} }
} }
else else {
{
INSERT_TASK_zgessq( INSERT_TASK_zgessq(
options, ChamEltwise, tempmm, tempnn, options, ChamEltwise, tempmm, tempnn,
A(m, n), W( Wnorm, m, n )); A(m, n), W( Wnorm, m, n ));
...@@ -162,11 +164,11 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -162,11 +164,11 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
/** /**
* Broadcast the result * Broadcast the result
*/ */
for (m = 0; m < A->p; m++) for (m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++)
{ {
for (n = 0; n < A->q; n++) for (n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++)
{ {
if ((m != 0) || (n != 0)) if ( ( m != 0 ) || ( n != 0 ) )
{ {
INSERT_TASK_dlacpy( INSERT_TASK_dlacpy(
options, options,
...@@ -180,14 +182,18 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -180,14 +182,18 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
/** /**
* *
*/ */
void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_desc_t *A, void chameleon_pzhered( cham_trans_t trans,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) cham_uplo_t uplo,
double prec,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
CHAM_desc_t Wcol; CHAM_desc_t Wcol;
CHAM_desc_t Welt; CHAM_desc_t Welt;
double gnorm, threshold, eps; double gnorm, threshold, eps, eps_diag, threshold_diag;
int workmt, worknt; int workmt, worknt;
int m, n; int m, n;
...@@ -199,71 +205,66 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ ...@@ -199,71 +205,66 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_
} }
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
workmt = chameleon_max(A->mt, A->p); workmt = chameleon_max(A->mt, chameleon_desc_datadist_get_iparam(A, 0));
worknt = chameleon_max(A->nt, A->q); worknt = chameleon_max(A->nt, chameleon_desc_datadist_get_iparam(A, 1));
RUNTIME_options_ws_alloc(&options, 1, 0); RUNTIME_options_ws_alloc(&options, 1, 0);
/* Matrix to store the norm of each element */ /* Matrix to store the norm of each element */
chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg); NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg);
/* Matrix to compute the global frobenius norm */ /* Matrix to compute the global frobenius norm */
chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q, workmt * 2, worknt, 0, 0, workmt * 2, worknt, chameleon_desc_datadist_get_iparam(A, 0), chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL);
chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options ); chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options );
CHAMELEON_Desc_Flush(&Wcol, sequence); CHAMELEON_Desc_Flush( &Wcol, sequence );
CHAMELEON_Desc_Flush(&Welt, sequence); CHAMELEON_Desc_Flush( &Welt, sequence );
CHAMELEON_Desc_Flush(A, sequence); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait(chamctxt, sequence); RUNTIME_sequence_wait( chamctxt, sequence );
gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / A->q, A->myrank % A->q)); gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1)));
chameleon_desc_destroy(&Welt); chameleon_desc_destroy(&Welt);
/** /**
* Reduce the precision of the tiles if possible * Reduce the precision of the tiles if possible
*/ */
if (prec < 0.) eps_diag = CHAMELEON_slamch();
{ if (prec < 0.) {
#if !defined(CHAMELEON_SIMULATION) eps = CHAMELEON_dlamch();
eps = LAPACKE_dlamch_work('e');
#else
#if defined(PRECISION_z) || defined(PRECISION_d)
eps = 1.e-15;
#else
eps = 1.e-7;
#endif
#endif
} }
else else {
{
eps = prec; eps = prec;
} }
threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt)); threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt));
threshold_diag = (eps < eps_diag) ? threshold : (eps_diag * gnorm) / (double)(chameleon_min(A->mt, A->nt));
#if defined(CHAMELEON_DEBUG_GERED) #if defined(CHAMELEON_DEBUG_GERED)
fprintf(stderr, fprintf( stderr,
"[%2d] The norm of A is: %e\n" "[%2d] The norm of A is: %e\n"
"[%2d] The requested precision is: %e\n" "[%2d] The requested precision is: %e\n"
"[%2d] The computed threshold is: %e\n", "[%2d] The computed threshold is: %e\n"
A->myrank, gnorm, "[%2d] The threshold diag is: %e\n",
A->myrank, eps, A->myrank, gnorm,
A->myrank, threshold); A->myrank, eps,
A->myrank, threshold,
A->myrank, threshold_diag );
#endif #endif
for (m = 0; m < A->mt; m++) for (m = 0; m < A->mt; m++)
{ {
int tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; int tempmm = A->get_blkdim( A, m, DIM_m, A->m );
int nmin = (uplo == ChamUpper) ? m : 0; int nmin = (uplo == ChamUpper) ? m : 0;
int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, A->nt) : A->nt; int nmax = (uplo == ChamLower) ? chameleon_min(m + 1, A->nt) : A->nt;
for (n = nmin; n < nmax; n++) for (n = nmin; n < nmax; n++)
{ {
int tempnn = (n == (A->nt - 1)) ? A->n - n * A->nb : A->nb; int tempnn = A->get_blkdim( A, n, DIM_n, A->n );
/* /*
* u_{high} = 1e-16 (later should be application accuracy) * u_{high} = 1e-16 (later should be application accuracy)
...@@ -271,15 +272,21 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ ...@@ -271,15 +272,21 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_
* ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low})
* ||A_{i,j}||_F < threshold / u_{low} * ||A_{i,j}||_F < threshold / u_{low}
*/ */
INSERT_TASK_zgered( &options, threshold, if ( m == n ) {
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); INSERT_TASK_zgered( &options, threshold_diag,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
else {
INSERT_TASK_zgered( &options, threshold,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
} }
} }
CHAMELEON_Desc_Flush(A, sequence); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait(chamctxt, sequence); RUNTIME_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy(&Wcol); chameleon_desc_destroy( &Wcol );
RUNTIME_options_ws_free(&options); RUNTIME_options_ws_free( &options );
RUNTIME_options_finalize(&options, chamctxt); RUNTIME_options_finalize( &options, chamctxt );
} }
This diff is collapsed.
This diff is collapsed.
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zlacpy parallel algorithm * @brief Chameleon zlacpy parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @date 2024-02-18
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.