Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Showing with 920 additions and 392 deletions
......@@ -2,7 +2,7 @@
*
* @file mapv.c
*
* @copyright 2018-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2018-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......
......@@ -2,7 +2,7 @@
*
* @file pmap.c
*
* @copyright 2018-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2018-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zbuild parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
......@@ -19,7 +19,7 @@
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @author Florent Pruvost
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -2,16 +2,17 @@
*
* @file pzcesca.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zcesca parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Florent Pruvost
* @date 2022-02-22
* @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c z
*
*/
......@@ -37,16 +38,16 @@ chameleon_pzcesca_internal( int center,
int NT = A->nt;
int M = A->m;
int N = A->n;
int P = A->p;
int Q = A->q;
int P = chameleon_desc_datadist_get_iparam(A, 0);
int Q = chameleon_desc_datadist_get_iparam(A, 1);
/**
* 1) compute sums and sum-square (scl,ssq) in each tile
*/
for(n = 0; n < NT; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
int tempnn = A->get_blkdim( A, n, DIM_n, N );
for(m = 0; m < MT; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int tempmm = A->get_blkdim( A, m, DIM_m, M );
if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) {
INSERT_TASK_zgesum(
options, ChamColumnwise, tempmm, tempnn,
......@@ -71,7 +72,7 @@ chameleon_pzcesca_internal( int center,
}
for(n = 0; n < NT; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) {
/**
......@@ -126,7 +127,7 @@ chameleon_pzcesca_internal( int center,
}
for(m = 0; m < MT; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int tempmm = A->get_blkdim( A, m, DIM_m, M );
if ( (center == 1) && ( (axis == ChamRowwise) || (axis == ChamEltwise) ) ) {
/**
......@@ -193,10 +194,10 @@ chameleon_pzcesca_internal( int center,
/* Finally compute Centered-Scaled matrix coefficients inplace */
for(n = 0; n < NT; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
int tempnn = A->get_blkdim( A, n, DIM_n, N );
for(m = 0; m < MT; m++) {
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int tempmm = A->get_blkdim( A, m, DIM_m, M );
INSERT_TASK_zcesca(
options,
......@@ -234,9 +235,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c
/* Initialize Wgcol */
for(m = 0; m < Wgcol->mt; m++) {
tempmm = m == Wgcol->mt-1 ? Wgcol->m-m*Wgcol->mb : Wgcol->mb;
tempmm = Wgcol->get_blkdim( Wgcol, m, DIM_m, Wgcol->m );
for(n = 0; n < Wgcol->nt; n++) {
tempnn = n == Wgcol->nt-1 ? Wgcol->n-n*Wgcol->nb : Wgcol->nb;
tempnn = Wgcol->get_blkdim( Wgcol, n, DIM_n, Wgcol->n );
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
......@@ -246,9 +247,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c
}
/* Initialize Wgrow */
for(m = 0; m < Wgrow->mt; m++) {
tempmm = m == Wgrow->mt-1 ? Wgrow->m-m*Wgrow->mb : Wgrow->mb;
tempmm = Wgrow->get_blkdim( Wgrow, m, DIM_m, Wgrow->m );
for(n = 0; n < Wgrow->nt; n++) {
tempnn = n == Wgrow->nt-1 ? Wgrow->n-n*Wgrow->nb : Wgrow->nb;
tempnn = Wgrow->get_blkdim( Wgrow, n, DIM_n, Wgrow->n );
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
......@@ -258,9 +259,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c
}
/* Initialize Wgelt */
for(m = 0; m < Wgelt->mt; m++) {
tempmm = m == Wgelt->mt-1 ? Wgelt->m-m*Wgelt->mb : Wgelt->mb;
tempmm = Wgelt->get_blkdim( Wgelt, m, DIM_m, Wgelt->m );
for(n = 0; n < Wgelt->nt; n++) {
tempnn = n == Wgelt->nt-1 ? Wgelt->n-n*Wgelt->nb : Wgelt->nb;
tempnn = Wgelt->get_blkdim( Wgelt, n, DIM_n, Wgelt->n );
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
......@@ -270,9 +271,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c
}
/* Initialize Wdcol */
for(m = 0; m < Wdcol->mt; m++) {
tempmm = m == Wdcol->mt-1 ? Wdcol->m-m*Wdcol->mb : Wdcol->mb;
tempmm = Wdcol->get_blkdim( Wdcol, m, DIM_m, Wdcol->m );
for(n = 0; n < Wdcol->nt; n++) {
tempnn = n == Wdcol->nt-1 ? Wdcol->n-n*Wdcol->nb : Wdcol->nb;
tempnn = Wdcol->get_blkdim( Wdcol, n, DIM_n, Wdcol->n );
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
......@@ -282,9 +283,9 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c
}
/* Initialize Wdrow */
for(m = 0; m < Wdrow->mt; m++) {
tempmm = m == Wdrow->mt-1 ? Wdrow->m-m*Wdrow->mb : Wdrow->mb;
tempmm = Wdrow->get_blkdim( Wdrow, m, DIM_m, Wdrow->m );
for(n = 0; n < Wdrow->nt; n++) {
tempnn = n == Wdrow->nt-1 ? Wdrow->n-n*Wdrow->nb : Wdrow->nb;
tempnn = Wdrow->get_blkdim( Wdrow, n, DIM_n, Wdrow->n );
INSERT_TASK_dlaset(
&options,
ChamUpperLower, tempmm, tempnn,
......
......@@ -4,19 +4,19 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgebrd parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Hatem Ltaief
* @author Azzam Haidar
* @author Mathieu Faverge
* @author Alycia Lisito
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......@@ -178,8 +178,11 @@ chameleon_pzgebrd_gb2bd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A,
CHAM_desc_t descAB;
cham_uplo_t uplo;
int M, N, MINMN, NB, LDAB, ABn;
#if !defined(CHAMELEON_SIMULATION)
int info;
int KL, KU;
char gbbrd_vect;
#endif
chamctxt = chameleon_context_self();
if ( sequence->status != CHAMELEON_SUCCESS ) {
......@@ -205,13 +208,13 @@ chameleon_pzgebrd_gb2bd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A,
/* Convert matrix to band form */
chameleon_pztile2band( uplo, A, &descAB, sequence, request );
#if !defined(CHAMELEON_SIMULATION)
/* NCC = 0, C = NULL, we do not update any matrix with new singular vectors */
/* On exit, AB = U (S +~ E) VT */
KL = uplo == ChamUpper ? 0 : NB;
KU = uplo == ChamUpper ? NB : 0;
/* Manage the case where only singular values are required */
char gbbrd_vect;
if ( jobu == ChamNoVec ) {
if ( jobvt == ChamNoVec ) {
gbbrd_vect = 'N';
......@@ -228,6 +231,7 @@ chameleon_pzgebrd_gb2bd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A,
gbbrd_vect = 'B';
}
}
#endif
CHAMELEON_Desc_Flush( A, sequence );
CHAMELEON_Desc_Flush( &descAB, sequence );
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqf parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -22,7 +22,7 @@
* @author Florent Pruvost
* @author Raphael Boucherie
* @author Samuel Thibault
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -46,8 +46,8 @@ int chameleon_pzgelqf_step( int genD, int k, int ib,
int m, n;
int tempkm, tempkn, tempmm, tempnn;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgelqt(
options,
tempkm, tempkn, ib, T->nb,
......@@ -55,8 +55,8 @@ int chameleon_pzgelqf_step( int genD, int k, int ib,
T(k, k));
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
options,
ChamUpper, tempDkm, tempDkn,
......@@ -72,7 +72,7 @@ int chameleon_pzgelqf_step( int genD, int k, int ib,
}
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zunmlq(
options,
ChamRight, ChamConjTrans,
......@@ -85,7 +85,7 @@ int chameleon_pzgelqf_step( int genD, int k, int ib,
RUNTIME_data_flush( sequence, T(k, k) );
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
RUNTIME_data_migrate( sequence, A(k, k),
A->get_rankof( A, k, n ) );
......@@ -98,7 +98,7 @@ int chameleon_pzgelqf_step( int genD, int k, int ib,
A(k, n),
T(k, n));
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
RUNTIME_data_migrate( sequence, A(m, k),
A->get_rankof( A, m, n ) );
......
......@@ -4,17 +4,17 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqf_param parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -49,7 +49,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
int tempkmin, tempkm, tempnn, tempmm, temppn;
int node, nbtiles;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
/* The number of geqrt to apply */
nbgelqt = qrtree->getnbgeqrf( qrtree, k );
......@@ -63,7 +63,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
continue;
}
temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb;
temppn = A->get_blkdim( A, p, DIM_n, A->n );
tempkmin = chameleon_min(tempkm, temppn);
INSERT_TASK_zgelqt(
......@@ -72,8 +72,8 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
A(k, p), T(k, p));
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy(
options,
......@@ -89,7 +89,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
}
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zunmlq(
options,
ChamRight, ChamConjTrans,
......@@ -112,7 +112,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
n = tiles[i];
p = qrtree->currpiv( qrtree, k, n );
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
if ( qrtree->gettype( qrtree, k, n ) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -142,7 +142,7 @@ int chameleon_pzgelqf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
T(k, n));
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
node = A->get_rankof( A, m, n );
RUNTIME_data_migrate( sequence, A(m, p), node );
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqfrh parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -23,7 +23,7 @@
* @author Florent Pruvost
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -92,10 +92,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
for (k = 0; k < K; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
for (N = k; N < A->nt; N += BS) {
tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb;
tempNn = A->get_blkdim( A, N, DIM_n, A->n );
tempkmin = chameleon_min(tempkm, tempNn);
INSERT_TASK_zgelqt(
&options,
......@@ -103,8 +103,8 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
A(k, N),
T(k, N));
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
int tempDNn = D->get_blkdim( D, N, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -120,7 +120,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
#endif
}
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zunmlq(
&options,
ChamRight, ChamConjTrans,
......@@ -133,7 +133,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
RUNTIME_data_flush( sequence, T(k, N) );
for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
RUNTIME_data_migrate( sequence, A(k, N),
A->get_rankof( A, k, n ) );
......@@ -147,7 +147,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
T(k, n));
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
RUNTIME_data_migrate( sequence, A(m, N),
A->get_rankof( A, m, n ) );
......@@ -167,7 +167,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
}
for (RD = BS; RD < A->nt-k; RD *= 2) {
for (N = k; N+RD < A->nt; N += 2*RD) {
tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
tempNRDn = A->get_blkdim( A, N+RD, DIM_n, A->n );
node = A->get_rankof( A, k, N+RD );
RUNTIME_data_migrate( sequence, A(k, N), node );
......@@ -182,7 +182,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
T2(k, N+RD));
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
node = A->get_rankof( A, m, N+RD );
RUNTIME_data_migrate( sequence, A(m, N), node );
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgemm parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
......@@ -19,7 +19,8 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Alycia Lisito
* @date 2022-02-22
* @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -84,9 +85,9 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
/* Scale C */
options->forcesub = 0;
......@@ -100,7 +101,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
if (transA == ChamNoTrans) {
if (transB == ChamNoTrans) {
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgemm_Astat(
options,
......@@ -116,7 +117,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
*/
else {
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgemm_Astat(
options,
......@@ -134,7 +135,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
else {
if (transB == ChamNoTrans) {
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
INSERT_TASK_zgemm_Astat(
options,
......@@ -150,7 +151,7 @@ chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
*/
else {
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
INSERT_TASK_zgemm_Astat(
options,
......@@ -186,46 +187,55 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
RUNTIME_sequence_t *sequence = options->sequence;
int m, n, k, p, q, KT, K, lp, lq;
int tempmm, tempnn, tempkk;
int lookahead, myp, myq;
int lookahead, myp, myq, DIM_k;
CHAMELEON_Complex64_t zbeta;
CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
lookahead = chamctxt->lookahead;
KT = transA == ChamNoTrans ? A->nt : A->mt;
K = transA == ChamNoTrans ? A->n : A->m;
myp = C->myrank / C->q;
myq = C->myrank % C->q;
if ( transA == ChamNoTrans ) {
KT = A->nt;
K = A->n;
DIM_k = DIM_n;
}
else {
KT = A->mt;
K = A->m;
DIM_k = DIM_m;
}
myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);
/*
* A: ChamNoTrans / B: ChamNoTrans
*/
for (k = 0; k < KT; k++ ) {
lp = (k % lookahead) * C->p;
lq = (k % lookahead) * C->q;
tempkk = k == KT - 1 ? K - k * A->nb : A->nb;
lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
tempkk = A->get_blkdim( A, k, DIM_k, K );
zbeta = k == 0 ? beta : zone;
/* Transfert ownership of the k column of A */
for (m = 0; m < C->mt; m ++ ) {
tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if ( transA == ChamNoTrans ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempmm, tempkk,
A( m, k ),
WA( m, (k % C->q) + lq ) );
WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, A( m, k ) );
for ( q=1; q < C->q; q++ ) {
for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempmm, tempkk,
WA( m, ((k+q-1) % C->q) + lq ),
WA( m, ((k+q) % C->q) + lq ) );
WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((k+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
}
}
else {
......@@ -233,39 +243,39 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
options,
ChamUpperLower, tempkk, tempmm,
A( k, m ),
WA( m, (k % C->q) + lq ) );
WA( m, (m % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, A( k, m ) );
for ( q=1; q < C->q; q++ ) {
for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempkk, tempmm,
WA( m, ((k+q-1) % C->q) + lq ),
WA( m, ((k+q) % C->q) + lq ) );
WA( m, ((m+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((m+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
}
}
}
/* Transfert ownership of the k row of B */
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( transB == ChamNoTrans ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempkk, tempnn,
B( k, n ),
WB( (k % C->p) + lp, n ) );
WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, B( k, n ) );
for ( p=1; p < C->p; p++ ) {
for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempkk, tempnn,
WB( ((k+p-1) % C->p) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) );
WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((k+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
}
}
else {
......@@ -273,25 +283,25 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
options,
ChamUpperLower, tempnn, tempkk,
B( n, k ),
WB( (k % C->p) + lp, n ) );
WB( (n % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, B( n, k ) );
for ( p=1; p < C->p; p++ ) {
for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy(
options,
ChamUpperLower, tempnn, tempkk,
WB( ((k+p-1) % C->p) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) );
WB( ((n+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((n+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
}
}
}
for (m = myp; m < C->mt; m+=C->p) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = myq; n < C->nt; n+=C->q) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zgemm(
options,
......@@ -327,16 +337,16 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
tempnn = C->get_blkdim( C, n, DIM_n, C->n );
/*
* A: ChamNoTrans / B: ChamNoTrans
*/
if (transA == ChamNoTrans) {
if (transB == ChamNoTrans) {
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm(
options,
......@@ -352,7 +362,7 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
*/
else {
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm(
options,
......@@ -370,7 +380,7 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
else {
if (transB == ChamNoTrans) {
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm(
options,
......@@ -386,7 +396,7 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
*/
else {
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm(
options,
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved.
*
......@@ -17,7 +17,8 @@
* @author Dalal Sukkari
* @author Alycia Lisito
* @author Lionel Eyraud-Dubois
* @date 2023-07-05
* @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -38,8 +39,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
RUNTIME_option_t options;
CHAM_desc_t X, SX, NRMX, NRMSX, DROW;
int m, n, k;
int myp = A->myrank / A->q;
int myq = A->myrank % A->q;
int myp = A->myrank / chameleon_desc_datadist_get_iparam(A, 1);
int myq = A->myrank % chameleon_desc_datadist_get_iparam(A, 1);
int tempmm, tempnn;
int cnt, maxiter;
double e0, normx, normsx, beta, scl;
......@@ -67,7 +68,9 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
* This is the same issue for X and SX to be reused from one iteration to another.
*/
chameleon_desc_init( &DROW, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, A->nb, A->nb,
A->p, A->n, 0, 0, A->p, A->n, A->p, A->q,
chameleon_desc_datadist_get_iparam(A, 0), A->n, 0, 0, chameleon_desc_datadist_get_iparam(A, 0), A->n,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
/**
* NRMX must be allocated with GLOBAL to be able to access the norm value
......@@ -75,7 +78,10 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
* This is the same issue for NRMSX.
*/
chameleon_desc_init( &NRMX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q,
chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1), 0, 0
, chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1),
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
/**
......@@ -86,8 +92,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
* So drow[j] = sum( S_{p,j}, p=0..P-1 ) with S_{p,j} = sum( |A_{i,j}|, i=0..m-1 \ i%P = p )
*
*/
for(n = myq; n < A->nt; n += A->q) {
tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
for(n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
/* Zeroes the local intermediate vector */
INSERT_TASK_dlaset(
......@@ -97,8 +103,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
DROW( myp, n ) );
/* Computes the sums of the local tiles into the local vector */
for(m = myp; m < A->mt; m += A->p) {
tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
for(m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0)) {
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_dzasum(
&options,
ChamColumnwise, ChamUpperLower, tempmm, tempnn,
......@@ -106,7 +112,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Reduce on first row of nodes */
for(m = 1; m < A->p; m++) {
for(m = 1; m < chameleon_desc_datadist_get_iparam(A, 0); m++) {
INSERT_TASK_daxpy(
&options, tempnn, 1.,
DROW( m, n ), 1,
......@@ -125,8 +131,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
1., 0.,
NRMX( myp, myq ) );
for( n = myq; n < A->nt; n += A->q ) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
for( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_dgessq(
&options, ChamEltwise, 1, tempnn,
DROW( myp, n ),
......@@ -134,7 +140,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Reduce on first row of nodes */
for(n = 1; n < A->q; n++) {
for(n = 1; n < chameleon_desc_datadist_get_iparam(A, 1); n++) {
INSERT_TASK_dplssq(
&options, ChamEltwise, 1, 1,
NRMX( myp, n ),
......@@ -146,8 +152,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Bcast norm over processes from node (0,0) */
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) {
for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) {
if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy(
&options,
......@@ -171,13 +177,22 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
chameleon_desc_init( &NRMSX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q,
chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1), 0, 0,
chameleon_desc_datadist_get_iparam(A, 0) * 2, chameleon_desc_datadist_get_iparam(A, 1),
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &X, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, 1, A->nb, A->nb,
A->p, A->n, 0, 0, A->p, A->n, A->p, A->q,
chameleon_desc_datadist_get_iparam(A, 0), A->n, 0, 0,
chameleon_desc_datadist_get_iparam(A, 0), A->n,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &SX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, 1, A->mb,
A->m, A->q, 0, 0, A->m, A->q, A->p, A->q,
A->m, chameleon_desc_datadist_get_iparam(A, 1), 0, 0,
A->m, chameleon_desc_datadist_get_iparam(A, 1),
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
cnt = 0;
......@@ -192,8 +207,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
/* Initialization of X in the first loop */
if ( cnt == 0 )
{
for (n = myq; n < A->nt; n += A->q) {
tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
if ( myp == 0 ) {
#if defined(PRECISION_z) || defined(PRECISION_c)
......@@ -212,7 +227,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Broadcast X */
for (m = 1; m < A->p; m++) {
for (m = 1; m < chameleon_desc_datadist_get_iparam(A, 0); m++) {
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, 1, tempnn,
......@@ -230,8 +245,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
* copy of the scaled X.
*/
scl = 1. / e0;
for (n = myq; n < A->nt; n += A->q) {
tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1)) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zlascal(
&options,
......@@ -242,11 +257,11 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
/**
* Compute Sx = S * x
*/
for(m = myp; m < A->mt; m+=A->p) {
tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
for(m = myp; m < A->mt; m+=chameleon_desc_datadist_get_iparam(A, 0)) {
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = myq; n < A->nt; n += A->q ) {
tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
for (n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
beta = n == myq ? 0. : 1.;
INSERT_TASK_zgemv(
......@@ -258,14 +273,14 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Reduce columns */
for (k = 1; k < chameleon_min( A->q, A->nt ); k++) {
for (k = 1; k < chameleon_min( chameleon_desc_datadist_get_iparam(A, 1), A->nt ); k++) {
INSERT_TASK_zaxpy(
&options, tempmm, 1.,
SX( m, k ), 1,
SX( m, 0 ), 1 );
}
/* Broadcast SX to ease the following gemv */
for (k = 1; k < A->q; k++) {
for (k = 1; k < chameleon_desc_datadist_get_iparam(A, 1); k++) {
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempmm, 1,
......@@ -277,11 +292,11 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
/**
* Compute x = S' * S * x = S' * Sx
*/
for ( n = myq; n < A->nt; n += A->q ) {
tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
for ( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
for( m = myp; m < A->mt; m += A->p ) {
tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
for( m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0) ) {
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
beta = m == myp ? 0. : 1.;
INSERT_TASK_zgemv(
......@@ -293,14 +308,14 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Reduce rows */
for (k = 1; k < chameleon_min( A->p, A->mt ); k++) {
for (k = 1; k < chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt ); k++) {
INSERT_TASK_zaxpy(
&options, tempnn, 1.,
X( k, n ), 1,
X( 0, n ), 1 );
}
/* Broadcast */
for (k = 1; k < A->p; k++) {
for (k = 1; k < chameleon_desc_datadist_get_iparam(A, 0); k++) {
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, 1, tempnn,
......@@ -321,8 +336,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
1., 0.,
NRMX( myp, myq ) );
for( n = myq; n < A->nt; n += A->q ) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
for( n = myq; n < A->nt; n += chameleon_desc_datadist_get_iparam(A, 1) ) {
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zgessq(
&options, ChamEltwise, 1, tempnn,
......@@ -331,7 +346,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Reduce columns */
for(n = 1; n < chameleon_min( A->q, A->nt ); n++) {
for(n = 1; n < chameleon_min( chameleon_desc_datadist_get_iparam(A, 1), A->nt ); n++) {
INSERT_TASK_dplssq(
&options, ChamEltwise, 1, 1,
NRMX( myp, n ),
......@@ -342,7 +357,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
&options, 1, NRMX( myp, 0 ) );
/* Broadcast the results to processes in the same row */
for(n = 1; n < A->q; n++) {
for(n = 1; n < chameleon_desc_datadist_get_iparam(A, 1); n++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1,
......@@ -363,8 +378,8 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
1., 0.,
NRMSX( myp, myq ) );
for( m = myp; m < A->mt; m += A->p ) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
for( m = myp; m < A->mt; m += chameleon_desc_datadist_get_iparam(A, 0) ) {
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zgessq(
&options, ChamEltwise, tempmm, 1,
SX( m, myq ),
......@@ -372,7 +387,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
}
/* Reduce rows */
for( m = 1; m < chameleon_min( A->p, A->mt ); m++ ) {
for( m = 1; m < chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt ); m++ ) {
INSERT_TASK_dplssq(
&options, ChamEltwise, 1, 1,
NRMSX( m, myq ),
......@@ -383,7 +398,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
&options, 1, NRMSX( 0, myq ) );
/* Broadcast the results to processes in the same column */
for(m = 1; m < A->p; m++) {
for(m = 1; m < chameleon_desc_datadist_get_iparam(A, 0); m++) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1,
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -15,7 +15,8 @@
* @author Mathieu Faverge
* @author Hatem Ltaief
* @author Lionel Eyraud-Dubois
* @date 2023-07-05
* @author Pierre Esterie
* @date 2024-11-13
* @precisions normal z -> s d c
*
*/
......@@ -35,8 +36,8 @@ static int _zgepdf_qdwh_opt_genD = 1;
static int _zgepdf_qdwh_opt_genD = 0;
#endif
static int _zgepdf_qdwh_opt_qr = 1;
static int _zgepdf_qdwh_opt_id = 1;
static int _zgepdf_qdwh_opt_qr = 1;
static int _zgepdf_qdwh_opt_id = 1;
static int _zgepdf_qdwh_verbose = 0;
/**
......@@ -109,23 +110,31 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H,
chameleon_desc_init( TS1, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, nb, ib * nb,
ib * U->mt, nb * U->nt, 0, 0,
ib * U->mt, nb * U->nt, U->p, U->q,
ib * U->mt, nb * U->nt,
chameleon_desc_datadist_get_iparam(U, 0),
chameleon_desc_datadist_get_iparam(U, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( TT1, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, nb, ib * nb,
ib * U->mt, nb * U->nt, 0, 0,
ib * U->mt, nb * U->nt, H->p, H->q,
ib * U->mt, nb * U->nt,
chameleon_desc_datadist_get_iparam(H, 0),
chameleon_desc_datadist_get_iparam(H, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( TS2, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, nb, ib * nb,
ib * H->mt, nb * H->nt, 0, 0,
ib * H->mt, nb * H->nt, U->p, U->q,
ib * H->mt, nb * H->nt,
chameleon_desc_datadist_get_iparam(U, 0),
chameleon_desc_datadist_get_iparam(U, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( TT2, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ib, nb, ib * nb,
ib * H->mt, nb * H->nt, 0, 0,
ib * H->mt, nb * H->nt, H->p, H->q,
ib * H->mt, nb * H->nt,
chameleon_desc_datadist_get_iparam(H, 0),
chameleon_desc_datadist_get_iparam(H, 1),
NULL, NULL, NULL, NULL );
/*
......@@ -135,8 +144,8 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H,
libhqr_matrix_t mat = {
.mt = B1->mt,
.nt = B1->nt,
.nodes = B1->p * B1-> q,
.p = B1->p,
.nodes = chameleon_desc_datadist_get_iparam(B1, 0) * chameleon_desc_datadist_get_iparam(B1,1),
.p = chameleon_desc_datadist_get_iparam(B1, 0),
};
/* Tree for the top matrix */
......@@ -144,7 +153,7 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H,
-1, /*low level tree */
-1, /* high level tree */
-1, /* TS tree size */
B1->p, /* High level size */
chameleon_desc_datadist_get_iparam(B1, 0), /* High level size */
-1, /* Domino */
0 /* TSRR (unstable) */ );
......@@ -156,7 +165,7 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H,
/* high level tree (Could be greedy, but flat should reduce the volume of comm) */
LIBHQR_FLAT_TREE,
-1, /* TS tree size */
B2->p /* High level size */ );
chameleon_desc_datadist_get_iparam(B2, 0) /* High level size */ );
}
/*
......@@ -170,7 +179,9 @@ chameleon_pzgepdf_qdwh_init( const CHAM_desc_t *U, const CHAM_desc_t *H,
chameleon_desc_init( Ut, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, U->mb, U->nb, U->mb * U->nb,
U->n, U->m, 0, 0,
U->n, U->m, U->p, U->q,
U->n, U->m,
chameleon_desc_datadist_get_iparam(U, 0),
chameleon_desc_datadist_get_iparam(U, 1),
NULL, NULL, NULL, NULL );
/*
......@@ -603,16 +614,9 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
double conv = 100.;
double normest, Unorm;
int it, itconv, facto = -1;
cham_bool_t optlacpy_backup;
#if !defined(CHAMELEON_SIMULATION)
double eps = LAPACKE_dlamch_work('e');
#else
#if defined(PRECISION_z) || defined(PRECISION_d)
double eps = 1.e-15;
#else
double eps = 1.e-7;
#endif
#endif
double eps = CHAMELEON_dlamch();
double tol1 = 5. * eps;
double tol3 = pow( tol1, 1./3. );
double id_flops_ratio = ( _zgepdf_qdwh_opt_id == 1 ) ? .5 : 1.5;
......@@ -623,6 +627,10 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
}
assert( chamctxt->scheduler != RUNTIME_SCHED_PARSEC );
/* Force unoptimized lacpy */
optlacpy_backup = chamctxt->optlacpy_enabled;
chamctxt->optlacpy_enabled = CHAMELEON_FALSE;
if ( info ) {
info->itQR = 0;
info->itPO = 0;
......@@ -727,13 +735,13 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
it++;
last = ( it >= itconv );
chameleon_sequence_wait( chamctxt, sequence_it );
if ( params[2] > 100 ) {
int do_qr = (!_zgepdf_qdwh_opt_qr) || (it > 1);
if ( (chamctxt->scheduler == RUNTIME_SCHED_PARSEC) &&
( sequence_it != sequence_qr ) )
{
chameleon_sequence_wait( chamctxt, sequence_it );
sequence_it = sequence_qr;
request_it = &request_qr;
}
......@@ -761,7 +769,6 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
if ( (chamctxt->scheduler == RUNTIME_SCHED_PARSEC) &&
( sequence_it != sequence_po ) )
{
chameleon_sequence_wait( chamctxt, sequence_it );
sequence_it = sequence_po;
request_it = &request_po;
}
......@@ -804,10 +811,10 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
}
}
chameleon_sequence_wait( chamctxt, sequence_it );
if ( (chamctxt->scheduler == RUNTIME_SCHED_PARSEC) &&
( sequence_it != sequence ) )
{
chameleon_sequence_wait( chamctxt, sequence_it );
chameleon_sequence_destroy( chamctxt, sequence_qr );
chameleon_sequence_destroy( chamctxt, sequence_po );
}
......@@ -857,5 +864,8 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t
&descB2, &descTS2, &descTT2, &descQ2, &descD2 );
CHAMELEON_zgemm_WS_Free( gemm_ws );
/* Restore optimized lacpy value */
chamctxt->optlacpy_enabled = optlacpy_backup;
return;
}
......@@ -2,7 +2,7 @@
*
* @file pzgepdf_qr.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved.
*
......@@ -10,9 +10,9 @@
*
* @brief Chameleon zgepdf_qr parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeqrf parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -21,7 +21,7 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Samuel Thibault
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -45,8 +45,8 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib,
int m, n;
int tempkm, tempkn, tempnn, tempmm;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgeqrt(
options,
......@@ -55,8 +55,8 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib,
T(k, k));
if ( genD ) {
int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDkm = D->get_blkdim( D, k, DIM_m, D->m );
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
options,
ChamLower, tempDkm, tempDkn,
......@@ -71,7 +71,7 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib,
#endif
}
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zunmqr(
options,
ChamLeft, ChamConjTrans,
......@@ -84,7 +84,7 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib,
RUNTIME_data_flush( sequence, T(k, k) );
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
RUNTIME_data_migrate( sequence, A(k, k),
A->get_rankof( A, m, k ) );
......@@ -98,7 +98,7 @@ int chameleon_pzgeqrf_step( int genD, int k, int ib,
T(m, k));
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
RUNTIME_data_migrate( sequence, A(k, n),
A->get_rankof( A, m, n ) );
......
......@@ -4,18 +4,18 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeqrf_param parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -50,7 +50,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
int tempkmin, tempkn, tempnn, tempmm, temppm;
int node, nbtiles;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
/* The number of geqrt to apply */
nbgeqrt = qrtree->getnbgeqrf( qrtree, k );
......@@ -64,7 +64,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
continue;
}
temppm = p == A->mt-1 ? A->m-p*A->mb : A->mb;
temppm = A->get_blkdim( A, p, DIM_m, A->m );
tempkmin = chameleon_min(temppm, tempkn);
INSERT_TASK_zgeqrt(
......@@ -73,8 +73,8 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
A(p, k), T(p, k) );
if ( genD ) {
int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDpm = D->get_blkdim( D, p, DIM_m, D->m );
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
options,
......@@ -90,7 +90,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
}
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zunmqr(
options,
ChamLeft, ChamConjTrans,
......@@ -113,7 +113,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
m = tiles[i];
p = qrtree->currpiv( qrtree, k, m );
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
if ( qrtree->gettype( qrtree, k, m ) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */
......@@ -143,7 +143,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
T(m, k));
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
node = A->get_rankof( A, m, n );
RUNTIME_data_migrate( sequence, A(p, n), node );
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeqrfrh parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -23,7 +23,7 @@
* @author Florent Pruvost
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -89,9 +89,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
for (k = 0; k < K; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (M = k; M < A->mt; M += BS) {
tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
tempMm = A->get_blkdim( A, M, DIM_m, A->m );
tempkmin = chameleon_min(tempMm, tempkn);
INSERT_TASK_zgeqrt(
......@@ -100,8 +100,8 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
A(M, k),
T(M, k));
if ( genD ) {
int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
int tempDMm = D->get_blkdim( D, M, DIM_m, D->m );
int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy(
&options,
......@@ -117,7 +117,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
#endif
}
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zunmqr(
&options,
ChamLeft, ChamConjTrans,
......@@ -130,7 +130,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
RUNTIME_data_flush( sequence, T(M, k) );
for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
RUNTIME_data_migrate( sequence, A(M, k),
A->get_rankof( A, m, k ) );
......@@ -144,7 +144,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
T(m, k));
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
RUNTIME_data_migrate( sequence, A(M, n),
A->get_rankof( A, m, n ) );
......@@ -164,7 +164,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
}
for (RD = BS; RD < A->mt-k; RD *= 2) {
for (M = k; M+RD < A->mt; M += 2*RD) {
tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
tempMRDm = A->get_blkdim( A, M+RD, DIM_m, A->m );
node = A->get_rankof( A, M+RD, k );
RUNTIME_data_migrate( sequence, A(M, k), node );
......@@ -179,7 +179,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
T2(M+RD, k));
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
node = A->get_rankof( A, M+RD, n );
RUNTIME_data_migrate( sequence, A(M, n), node );
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -14,7 +14,8 @@
* @version 1.3.0
* @author Mathieu Faverge
* @author Ana Hourcau
* @date 2024-07-17
* @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> z d
*
*/
......@@ -28,8 +29,10 @@
#define W( desc, m, n ) (desc), (m), (n)
static inline void
chameleon_pzgered_frb( cham_uplo_t uplo,
CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt,
chameleon_pzgered_frb( cham_uplo_t uplo,
CHAM_desc_t *A,
CHAM_desc_t *Wnorm,
CHAM_desc_t *Welt,
RUNTIME_option_t *options )
{
double alpha = 1.0;
......@@ -42,8 +45,8 @@ chameleon_pzgered_frb( cham_uplo_t uplo,
int NT = (uplo == ChamLower) ? minMNT : A->nt;
int M = (uplo == ChamUpper) ? minMN : A->m;
int N = (uplo == ChamLower) ? minMN : A->n;
int P = Welt->p;
int Q = Welt->q;
int P = chameleon_desc_datadist_get_iparam(Welt, 0);
int Q = chameleon_desc_datadist_get_iparam(Welt, 1);
/* Initialize workspaces for tile norms */
for(m = 0; m < Wnorm->mt; m++) {
......@@ -78,10 +81,10 @@ chameleon_pzgered_frb( cham_uplo_t uplo,
int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int tempmm = A->get_blkdim( A, m, DIM_m, M );
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
int tempnn = A->get_blkdim( A, n, DIM_n, N );
if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrssq(
......@@ -140,8 +143,8 @@ chameleon_pzgered_frb( cham_uplo_t uplo,
/**
* Broadcast the result
*/
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
for(m = 0; m < chameleon_desc_datadist_get_iparam(A, 0); m++) {
for(n = 0; n < chameleon_desc_datadist_get_iparam(A, 1); n++) {
if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy(
options,
......@@ -155,14 +158,17 @@ chameleon_pzgered_frb( cham_uplo_t uplo,
/**
*
*/
void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
void chameleon_pzgered( cham_uplo_t uplo,
double prec,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
CHAM_desc_t Wcol;
CHAM_desc_t Welt;
double gnorm, threshold, eps;
double gnorm, threshold, eps, eps_diag, threshold_diag;
int workmt, worknt;
int m, n;
......@@ -173,19 +179,23 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
}
RUNTIME_options_init(&options, chamctxt, sequence, request);
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) );
worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) );
RUNTIME_options_ws_alloc( &options, 1, 0 );
/* Matrix to store the norm of each element */
chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q,
A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
/* Matrix to compute the global frobenius norm */
chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
workmt*2, worknt, 0, 0, workmt*2, worknt, A->p, A->q,
workmt*2, worknt, 0, 0, workmt*2, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_pzgered_frb( uplo, A, &Wcol, &Welt, &options );
......@@ -196,44 +206,43 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
RUNTIME_sequence_wait( chamctxt, sequence );
gnorm = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ));
gnorm = *((double *)Welt.get_blkaddr( &Welt, A->myrank / chameleon_desc_datadist_get_iparam(A, 1), A->myrank % chameleon_desc_datadist_get_iparam(A, 1) ));
chameleon_desc_destroy( &Welt );
/**
* Reduce the precision of the tiles if possible
*/
eps_diag = CHAMELEON_slamch();
if ( prec < 0. ) {
#if !defined(CHAMELEON_SIMULATION)
eps = LAPACKE_dlamch_work('e');
#else
#if defined(PRECISION_z) || defined(PRECISION_d)
eps = 1.e-15;
#else
eps = 1.e-7;
#endif
#endif
eps = CHAMELEON_dlamch();
}
else {
eps = prec;
}
threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt));
threshold_diag = ( eps < eps_diag ) ? threshold : (eps_diag * gnorm) / (double)(chameleon_min(A->mt, A->nt));
#if defined(CHAMELEON_DEBUG_GERED)
fprintf( stderr,
"[%2d] The norm of A is: %e\n"
"[%2d] The requested precision is: %e\n"
"[%2d] The computed threshold is: %e\n",
"[%2d] The computed threshold is: %e\n"
"[%2d] The threshold diag is : %e\n",
A->myrank, gnorm,
A->myrank, eps,
A->myrank, threshold );
A->myrank, threshold,
A->myrank, threshold_diag );
#endif
for(m = 0; m < A->mt; m++) {
int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb;
for(m = 0; m < A->mt; m++)
{
int tempmm = A->get_blkdim( A, m, DIM_m, A->m );
int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt;
for(n = nmin; n < nmax; n++) {
int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb;
for(n = nmin; n < nmax; n++)
{
int tempnn = A->get_blkdim( A, n, DIM_n, A->n );
/*
* u_{high} = 1e-16 (later should be application accuracy)
......@@ -241,8 +250,14 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
* ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low})
* ||A_{i,j}||_F < threshold / u_{low}
*/
INSERT_TASK_zgered( &options, threshold,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
if ( m == n ) {
INSERT_TASK_zgered( &options, threshold_diag,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
else {
INSERT_TASK_zgered( &options, threshold,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
}
}
......@@ -250,6 +265,6 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
RUNTIME_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy( &Wcol );
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, chamctxt);
RUNTIME_options_ws_free( &options );
RUNTIME_options_finalize( &options, chamctxt );
}
......@@ -2,7 +2,7 @@
*
* @file pzgerst.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -11,7 +11,7 @@
*
* @version 1.3.0
* @author Mathieu Faverge
* @date 2023-07-06
* @date 2025-01-24
* @precisions normal z -> d
*
*/
......@@ -36,7 +36,7 @@ void chameleon_pzgerst( cham_uplo_t uplo,
RUNTIME_options_init(&options, chamctxt, sequence, request);
for(m = 0; m < A->mt; m++) {
int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb;
int tempmm = A->get_blkdim( A, m, DIM_m, A->m );
int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt;
......@@ -46,7 +46,7 @@ void chameleon_pzgerst( cham_uplo_t uplo,
if (( tile->rank == A->myrank ) &&
( tile->flttype != ChamComplexDouble ) )
{
int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb;
int tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zgerst( &options,
tempmm, tempnn, A( m, n ) );
......
This diff is collapsed.
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgetrf_incpiv parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -22,7 +22,7 @@
* @author Florent Pruvost
* @author Samuel Thibault
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
......@@ -84,8 +84,8 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
for (k = 0; k < minMNT; k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgetrf_incpiv(
&options,
tempkm, tempkn, ib, L->nb,
......@@ -105,7 +105,7 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
}
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zgessm(
&options,
tempkm, tempnn, tempkm, ib, L->nb,
......@@ -115,7 +115,7 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
A(k, n));
}
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_ztstrf(
&options,
tempmm, tempkn, ib, L->nb,
......@@ -126,7 +126,7 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
m == A->mt-1, A->nb*k);
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zssssm(
&options,
A->nb, tempnn, tempmm, tempnn, A->nb, ib, L->nb,
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgetrf_nopiv parallel algorithm
*
* @version 1.2.0
* @version 1.3.0
* @author Omar Zenati
* @author Mathieu Faverge
* @author Emmanuel Agullo
......@@ -20,20 +20,24 @@
* @author Samuel Thibault
* @author Terry Cojean
* @author Matthieu Kuhn
* @date 2022-02-22
* @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
#define A(m,n) A, m, n
#define A(m, n) A, m, n
#define WD(m) WL, m, m
#define WL(m, n) WL, m, n
#define WU(m, n) WU, m, n
/**
* Parallel tile LU factorization with no pivoting - dynamic scheduling
*/
void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
......@@ -54,7 +58,7 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
if ( chamctxt->autominmax_enabled && (chamctxt->scheduler == RUNTIME_SCHED_STARPU) ) {
int lookahead = chamctxt->lookahead;
int nbtasks_per_step = (A->mt * A->nt) / (A->p * A->q);
int nbtasks_per_step = (A->mt * A->nt) / (chameleon_desc_datadist_get_iparam(A, 0) * chameleon_desc_datadist_get_iparam(A, 1));
int mintasks = nbtasks_per_step * lookahead;
int maxtasks = nbtasks_per_step * (lookahead+1);
......@@ -68,8 +72,8 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
for (k = 0; k < chameleon_min(A->mt, A->nt); k++) {
RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
options.priority = 2*A->nt - 2*k;
INSERT_TASK_zgetrf_nopiv(
......@@ -79,7 +83,7 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
for (m = k+1; m < A->mt; m++) {
options.priority = 2*A->nt - 2*k - m;
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_ztrsm(
&options,
ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit,
......@@ -88,7 +92,7 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
A(m, k));
}
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = 2*A->nt - 2*k - n;
INSERT_TASK_ztrsm(
&options,
......@@ -98,7 +102,7 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
A(k, n));
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = 2*A->nt - 2*k - n - m;
INSERT_TASK_zgemm(
&options,
......@@ -121,3 +125,196 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A,
RUNTIME_options_finalize(&options, chamctxt);
}
void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A,
CHAM_desc_t *WL,
CHAM_desc_t *WU,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
int k, m, n, ib, p, q, lp, lq;
int tempkm, tempkn, tempmm, tempnn;
int lookahead, myp, myq;
CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0;
CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) {
return;
}
RUNTIME_options_init(&options, chamctxt, sequence, request);
ib = CHAMELEON_IB;
lookahead = chamctxt->lookahead;
myp = A->myrank / chameleon_desc_datadist_get_iparam(A, 1);
myq = A->myrank % chameleon_desc_datadist_get_iparam(A, 1);
for (k = 0; k < chameleon_min(A->mt, A->nt); k++) {
RUNTIME_iteration_push(chamctxt, k);
lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 0);
lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 1);
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = A->get_blkdim( A, k, DIM_n, A->n );
options.priority = 2*A->nt - 2*k;
INSERT_TASK_zgetrf_nopiv(
&options,
tempkm, tempkn, ib, A->mb,
A(k, k), A->mb*k);
/**
* Broadcast of A(k,k) along rings in both directions
*/
{
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempkm, tempkn,
A( k, k ),
WL( k, (k % chameleon_desc_datadist_get_iparam(A, 1)) + lq ) );
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempkm, tempkn,
A( k, k ),
WU( (k % chameleon_desc_datadist_get_iparam(A, 0)) + lp, k ) );
for ( q=1; q < chameleon_desc_datadist_get_iparam(A, 1); q++ ) {
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempkm, tempkn,
WL( k, ((k+q-1) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ),
WL( k, ((k+q) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ) );
}
for ( p=1; p < chameleon_desc_datadist_get_iparam(A, 0); p++ ) {
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempkm, tempkn,
WU( ((k+p-1) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, k ),
WU( ((k+p) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, k ) );
}
}
RUNTIME_data_flush( sequence, A( k, k ) );
for (m = k+1; m < A->mt; m++) {
/* Skip the row if you are not involved with */
if ( m%chameleon_desc_datadist_get_iparam(A, 0) != myp ) {
continue;
}
options.priority = 2*A->nt - 2*k - m;
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
assert( A->get_rankof( A, m, k ) == WU->get_rankof( WU, myp + lp, k) );
INSERT_TASK_ztrsm(
&options,
ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit,
tempmm, tempkn, A->mb,
zone, WU( myp + lp, k ),
A( m, k ) );
/* Broadcast A(m,k) into temp buffers through a ring */
{
assert( A->get_rankof( A, m, k ) == WL->get_rankof( WL, m, (k % chameleon_desc_datadist_get_iparam(A, 1)) + lq) );
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempmm, tempkn,
A( m, k ),
WL( m, (k % chameleon_desc_datadist_get_iparam(A, 1)) + lq) );
for ( q=1; q < chameleon_desc_datadist_get_iparam(A, 1); q++ ) {
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempmm, tempkn,
WL( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ),
WL( m, ((k+q) % chameleon_desc_datadist_get_iparam(A, 1)) + lq ) );
}
}
RUNTIME_data_flush( sequence, A( m, k ) );
}
for (n = k+1; n < A->nt; n++) {
/* Skip the column if you are not involved with */
if ( n%chameleon_desc_datadist_get_iparam(A, 1) != myq ) {
continue;
}
tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = 2*A->nt - 2*k - n;
assert( A->get_rankof( A, k, n ) == WL->get_rankof( WL, k, myq+lq) );
INSERT_TASK_ztrsm(
&options,
ChamLeft, ChamLower, ChamNoTrans, ChamUnit,
tempkm, tempnn, A->mb,
zone, WL( k, myq + lq ),
A( k, n ));
/* Broadcast A(k,n) into temp buffers through a ring */
{
assert( A->get_rankof( A, k, n ) == WU->get_rankof( WU, (k%chameleon_desc_datadist_get_iparam(A, 0)) + lp, n) );
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempkm, tempnn,
A( k, n ),
WU( (k % chameleon_desc_datadist_get_iparam(A, 0)) + lp, n ) );
for ( p=1; p < chameleon_desc_datadist_get_iparam(A, 0); p++ ) {
INSERT_TASK_zlacpy(
&options,
ChamUpperLower, tempkm, tempnn,
WU( ((k+p-1) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, n ),
WU( ((k+p) % chameleon_desc_datadist_get_iparam(A, 0)) + lp, n ) );
}
}
RUNTIME_data_flush( sequence, A( k, n ) );
for (m = k+1; m < A->mt; m++) {
/* Skip the row if you are not involved with */
if ( m%chameleon_desc_datadist_get_iparam(A, 0) != myp ) {
continue;
}
tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = 2*A->nt - 2*k - n - m;
assert( A->get_rankof( A, m, n ) == WL->get_rankof( WL, m, myq + lq) );
assert( A->get_rankof( A, m, n ) == WU->get_rankof( WU, myp + lp, n) );
INSERT_TASK_zgemm(
&options,
ChamNoTrans, ChamNoTrans,
tempmm, tempnn, A->mb, A->mb,
mzone, WL( m, myq + lq ),
WU( myp + lp, n ),
zone, A( m, n ));
}
}
RUNTIME_iteration_pop( chamctxt );
}
CHAMELEON_Desc_Flush( WL, sequence );
CHAMELEON_Desc_Flush( WU, sequence );
RUNTIME_options_finalize( &options, chamctxt );
}
void chameleon_pzgetrf_nopiv( struct chameleon_pzgetrf_nopiv_s *ws,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
if ( ws && ws->use_workspace ) {
chameleon_pzgetrf_nopiv_ws( A, &(ws->WL), &(ws->WU), sequence, request );
}
else {
chameleon_pzgetrf_nopiv_generic( A, sequence, request );
}
}