Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Showing with 309 additions and 299 deletions
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zplrnt parallel algorithm * @brief Chameleon zplrnt parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @author Matthieu Kuhn * @author Matthieu Kuhn
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -46,10 +46,10 @@ void chameleon_pzplrnt( CHAM_desc_t *A, ...@@ -46,10 +46,10 @@ void chameleon_pzplrnt( CHAM_desc_t *A,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for (m = 0; m < A->mt; m++) { for (m = 0; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = 0; n < A->nt; n++) { for (n = 0; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zplrnt( INSERT_TASK_zplrnt(
&options, &options,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zpotrf parallel algorithm * @brief Chameleon zpotrf parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak * @author Jakub Kurzak
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* @author Florent Pruvost * @author Florent Pruvost
* @author Samuel Thibault * @author Samuel Thibault
* @author Terry Cojean * @author Terry Cojean
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -60,7 +60,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -60,7 +60,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
options.priority = 2*A->mt - 2*k; options.priority = 2*A->mt - 2*k;
INSERT_TASK_zpotrf( INSERT_TASK_zpotrf(
...@@ -69,7 +69,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -69,7 +69,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
A(k, k), A->nb*k); A(k, k), A->nb*k);
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = 2*A->mt - 2*k - m; options.priority = 2*A->mt - 2*k - m;
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
...@@ -82,7 +82,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -82,7 +82,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = 2*A->mt - 2*k - n; options.priority = 2*A->mt - 2*k - n;
INSERT_TASK_zherk( INSERT_TASK_zherk(
...@@ -93,7 +93,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -93,7 +93,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
1.0, A(n, n)); 1.0, A(n, n));
for (m = n+1; m < A->mt; m++) { for (m = n+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = 2*A->mt - 2*k - n - m; options.priority = 2*A->mt - 2*k - n - m;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -116,7 +116,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -116,7 +116,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkm = A->get_blkdim( A, k, DIM_n, A->n );
options.priority = 2*A->nt - 2*k; options.priority = 2*A->nt - 2*k;
INSERT_TASK_zpotrf( INSERT_TASK_zpotrf(
...@@ -126,7 +126,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -126,7 +126,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
A(k, k), A->nb*k); A(k, k), A->nb*k);
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = 2*A->nt - 2*k - n; options.priority = 2*A->nt - 2*k - n;
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
...@@ -139,7 +139,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -139,7 +139,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
options.priority = 2*A->nt - 2*k - m; options.priority = 2*A->nt - 2*k - m;
INSERT_TASK_zherk( INSERT_TASK_zherk(
...@@ -150,7 +150,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -150,7 +150,7 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
1.0, A(m, m)); 1.0, A(m, m));
for (n = m+1; n < A->nt; n++) { for (n = m+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
options.priority = 2*A->nt - 2*k - n - m; options.priority = 2*A->nt - 2*k - n - m;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zpotrimm parallel algorithm * @brief Chameleon zpotrimm parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief * @author Hatem Ltaief
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Ali M Charara * @author Ali M Charara
* @author Florent Pruvost * @author Florent Pruvost
* @author Samuel Thibault * @author Samuel Thibault
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -63,7 +63,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -63,7 +63,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
INSERT_TASK_zpotrf( INSERT_TASK_zpotrf(
&options, &options,
...@@ -71,7 +71,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -71,7 +71,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
A(k, k), A->nb*k); A(k, k), A->nb*k);
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamRight, ChamLower, ChamConjTrans, ChamNonUnit, ChamRight, ChamLower, ChamConjTrans, ChamNonUnit,
...@@ -82,7 +82,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -82,7 +82,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zherk( INSERT_TASK_zherk(
&options, &options,
ChamLower, ChamNoTrans, ChamLower, ChamNoTrans,
...@@ -91,7 +91,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -91,7 +91,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
1.0, A(n, n)); 1.0, A(n, n));
for (m = n+1; m < A->mt; m++) { for (m = n+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, ChamConjTrans, ChamNoTrans, ChamConjTrans,
...@@ -111,9 +111,9 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -111,9 +111,9 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
RUNTIME_iteration_push(chamctxt, A->nt + k); RUNTIME_iteration_push(chamctxt, A->nt + k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamRight, uplo, ChamNoTrans, ChamNonUnit, ChamRight, uplo, ChamNoTrans, ChamNonUnit,
...@@ -122,7 +122,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -122,7 +122,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
A(m, k)); A(m, k));
} }
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = 0; n < k; n++) { for (n = 0; n < k; n++) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -158,7 +158,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -158,7 +158,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
RUNTIME_iteration_push(chamctxt, 2*A->nt + k); RUNTIME_iteration_push(chamctxt, 2*A->nt + k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
for(n = 0; n < k; n++) { for(n = 0; n < k; n++) {
INSERT_TASK_zherk( INSERT_TASK_zherk(
&options, &options,
...@@ -200,14 +200,14 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -200,14 +200,14 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
RUNTIME_iteration_push(chamctxt, 3*A->nt + k); RUNTIME_iteration_push(chamctxt, 3*A->nt + k);
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
for (m = 0; m < C->mt; m++) { for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -258,7 +258,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -258,7 +258,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkm = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zpotrf( INSERT_TASK_zpotrf(
&options, &options,
ChamUpper, ChamUpper,
...@@ -266,7 +266,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -266,7 +266,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
A(k, k), A->nb*k); A(k, k), A->nb*k);
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamLeft, ChamUpper, ChamConjTrans, ChamNonUnit, ChamLeft, ChamUpper, ChamConjTrans, ChamNonUnit,
...@@ -277,7 +277,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -277,7 +277,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zherk( INSERT_TASK_zherk(
&options, &options,
...@@ -287,7 +287,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -287,7 +287,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
1.0, A(m, m)); 1.0, A(m, m));
for (n = m+1; n < A->nt; n++) { for (n = m+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -308,9 +308,9 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -308,9 +308,9 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
RUNTIME_iteration_push(chamctxt, A->nt + k); RUNTIME_iteration_push(chamctxt, A->nt + k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamLeft, uplo, ChamNoTrans, ChamNonUnit, ChamLeft, uplo, ChamNoTrans, ChamNonUnit,
...@@ -319,7 +319,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -319,7 +319,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
A(k, n)); A(k, n));
} }
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
for (m = 0; m < k; m++) { for (m = 0; m < k; m++) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -355,7 +355,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -355,7 +355,7 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
RUNTIME_iteration_push(chamctxt, 2*A->nt + k); RUNTIME_iteration_push(chamctxt, 2*A->nt + k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (m = 0; m < k; m++) { for (m = 0; m < k; m++) {
INSERT_TASK_zherk( INSERT_TASK_zherk(
...@@ -398,14 +398,14 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_ ...@@ -398,14 +398,14 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
RUNTIME_iteration_push(chamctxt, 3*A->nt + k); RUNTIME_iteration_push(chamctxt, 3*A->nt + k);
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
for (m = 0; m < C->mt; m++) { for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zsymm parallel algorithm * @brief Chameleon zsymm parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
...@@ -20,7 +20,8 @@ ...@@ -20,7 +20,8 @@
* @author Florent Pruvost * @author Florent Pruvost
* @author Alycia Lisito * @author Alycia Lisito
* @author Romain Peressoni * @author Romain Peressoni
* @date 2022-02-22 * @author Pierre Esterie
* @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -110,9 +111,9 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -110,9 +111,9 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
} }
for(n = 0; n < C->nt; n++) { for(n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
for(m = 0; m < C->mt; m++) { for(m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
/* Scale C */ /* Scale C */
options->forcesub = 0; options->forcesub = 0;
...@@ -126,7 +127,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -126,7 +127,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
if (side == ChamLeft) { if (side == ChamLeft) {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
if (k < m) { if (k < m) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -162,7 +163,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -162,7 +163,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
*/ */
else { else {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
if (k < m) { if (k < m) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -200,7 +201,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -200,7 +201,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
else { else {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -236,7 +237,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t ...@@ -236,7 +237,7 @@ chameleon_pzsymm_Astat( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_t
*/ */
else { else {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
if (k < n) { if (k < n) {
INSERT_TASK_zgemm_Astat( INSERT_TASK_zgemm_Astat(
...@@ -293,7 +294,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -293,7 +294,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
{ {
RUNTIME_sequence_t *sequence = options->sequence; RUNTIME_sequence_t *sequence = options->sequence;
cham_trans_t transA; cham_trans_t transA;
int m, n, k, p, q, KT, K, lp, lq; int m, n, k, p, q, KT, lp, lq;
int tempmm, tempnn, tempkk; int tempmm, tempnn, tempkk;
int lookahead, myp, myq; int lookahead, myp, myq;
...@@ -302,14 +303,13 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -302,14 +303,13 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
lookahead = chamctxt->lookahead; lookahead = chamctxt->lookahead;
KT = A->nt; KT = A->nt;
K = A->n; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
myp = C->myrank / C->q; myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);
myq = C->myrank % C->q;
for (k = 0; k < KT; k++ ) { for (k = 0; k < KT; k++ ) {
lp = (k % lookahead) * C->p; lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
lq = (k % lookahead) * C->q; lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
tempkk = k == KT - 1 ? K - k * A->nb : A->nb; tempkk = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
/* Transfert ownership of the k column of A or B */ /* Transfert ownership of the k column of A or B */
...@@ -317,7 +317,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -317,7 +317,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
int Am, Ak; int Am, Ak;
int tempam, tempak; int tempam, tempak;
tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if ( (( uplo == ChamUpper ) && ( m > k )) || if ( (( uplo == ChamUpper ) && ( m > k )) ||
(( uplo == ChamLower ) && ( m < k )) ) (( uplo == ChamLower ) && ( m < k )) )
...@@ -340,48 +340,48 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -340,48 +340,48 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
options, options,
ChamUpperLower, tempam, tempak, ChamUpperLower, tempam, tempak,
A( Am, Ak ), A( Am, Ak ),
WA( m, (k % C->q) + lq ) ); WA( m, (Ak % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, A( Am, Ak ) ); RUNTIME_data_flush( sequence, A( Am, Ak ) );
for ( q=1; q < C->q; q++ ) { for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempam, tempak, ChamUpperLower, tempam, tempak,
WA( m, ((k+q-1) % C->q) + lq ), WA( m, ((Ak+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((k+q) % C->q) + lq ) ); WA( m, ((Ak+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
} }
} }
/* Transfert ownership of the k row of B, or A */ /* Transfert ownership of the k row of B, or A */
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempkk, tempnn, ChamUpperLower, tempkk, tempnn,
B( k, n ), B( k, n ),
WB( (k % C->p) + lp, n ) ); WB( (k % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, B( k, n ) ); RUNTIME_data_flush( sequence, B( k, n ) );
for ( p=1; p < C->p; p++ ) { for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempkk, tempnn, ChamUpperLower, tempkk, tempnn,
WB( ((k+p-1) % C->p) + lp, n ), WB( ((k+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) ); WB( ((k+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
} }
} }
/* Perform the update of this iteration */ /* Perform the update of this iteration */
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
if ( k == m ) { if ( k == m ) {
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zsymm( INSERT_TASK_zsymm(
options, ChamLeft, uplo, options, ChamLeft, uplo,
...@@ -401,8 +401,8 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -401,8 +401,8 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
transA = ChamNoTrans; transA = ChamNoTrans;
} }
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
options, transA, ChamNoTrans, options, transA, ChamNoTrans,
...@@ -429,7 +429,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -429,7 +429,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
{ {
RUNTIME_sequence_t *sequence = options->sequence; RUNTIME_sequence_t *sequence = options->sequence;
cham_trans_t transA; cham_trans_t transA;
int m, n, k, p, q, KT, K, lp, lq; int m, n, k, p, q, KT, lp, lq;
int tempmm, tempnn, tempkk; int tempmm, tempnn, tempkk;
int lookahead, myp, myq; int lookahead, myp, myq;
...@@ -438,35 +438,34 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -438,35 +438,34 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
lookahead = chamctxt->lookahead; lookahead = chamctxt->lookahead;
KT = A->mt; KT = A->mt;
K = A->m; myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1);
myp = C->myrank / C->q; myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1);
myq = C->myrank % C->q;
for (k = 0; k < KT; k++ ) { for (k = 0; k < KT; k++ ) {
lp = (k % lookahead) * C->p; lp = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 0);
lq = (k % lookahead) * C->q; lq = (k % lookahead) * chameleon_desc_datadist_get_iparam(C, 1);
tempkk = k == KT - 1 ? K - k * A->nb : A->nb; tempkk = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
/* Transfert ownership of the k column of A or B */ /* Transfert ownership of the k column of A or B */
for (m = 0; m < C->mt; m++ ) { for (m = 0; m < C->mt; m++ ) {
tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempmm, tempkk, ChamUpperLower, tempmm, tempkk,
B( m, k ), B( m, k ),
WA( m, (k % C->q) + lq ) ); WA( m, (k % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
RUNTIME_data_flush( sequence, B( m, k ) ); RUNTIME_data_flush( sequence, B( m, k ) );
for ( q=1; q < C->q; q++ ) { for ( q=1; q < chameleon_desc_datadist_get_iparam(C, 1); q++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempmm, tempkk, ChamUpperLower, tempmm, tempkk,
WA( m, ((k+q-1) % C->q) + lq ), WA( m, ((k+q-1) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ),
WA( m, ((k+q) % C->q) + lq ) ); WA( m, ((k+q) % chameleon_desc_datadist_get_iparam(C, 1)) + lq ) );
} }
} }
...@@ -475,7 +474,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -475,7 +474,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
int Ak, An; int Ak, An;
int tempak, tempan; int tempak, tempan;
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( (( uplo == ChamUpper ) && ( n < k )) || if ( (( uplo == ChamUpper ) && ( n < k )) ||
(( uplo == ChamLower ) && ( n > k )) ) (( uplo == ChamLower ) && ( n > k )) )
...@@ -497,26 +496,26 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -497,26 +496,26 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
options, options,
ChamUpperLower, tempak, tempan, ChamUpperLower, tempak, tempan,
A( Ak, An ), A( Ak, An ),
WB( (k % C->p) + lp, n ) ); WB( (Ak % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
RUNTIME_data_flush( sequence, A( Ak, An ) ); RUNTIME_data_flush( sequence, A( Ak, An ) );
for ( p=1; p < C->p; p++ ) { for ( p=1; p < chameleon_desc_datadist_get_iparam(C, 0); p++ ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
options, options,
ChamUpperLower, tempak, tempan, ChamUpperLower, tempak, tempan,
WB( ((k+p-1) % C->p) + lp, n ), WB( ((Ak+p-1) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ),
WB( ((k+p) % C->p) + lp, n ) ); WB( ((Ak+p) % chameleon_desc_datadist_get_iparam(C, 0)) + lp, n ) );
} }
} }
/* Perform the update of this iteration */ /* Perform the update of this iteration */
for (n = myq; n < C->nt; n+=C->q) { for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if ( k == n ) { if ( k == n ) {
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
/* A has been stored in WA or WB for the summa ring */ /* A has been stored in WA or WB for the summa ring */
INSERT_TASK_zsymm( INSERT_TASK_zsymm(
...@@ -537,8 +536,8 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo, ...@@ -537,8 +536,8 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
transA = ChamNoTrans; transA = ChamNoTrans;
} }
for (m = myp; m < C->mt; m+=C->p) { for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
options, ChamNoTrans, transA, options, ChamNoTrans, transA,
...@@ -595,16 +594,16 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -595,16 +594,16 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0; CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
for(m = 0; m < C->mt; m++) { for(m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for(n = 0; n < C->nt; n++) { for(n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
/* /*
* ChamLeft / ChamLower * ChamLeft / ChamLower
*/ */
if (side == ChamLeft) { if (side == ChamLeft) {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < m) { if (k < m) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -642,7 +641,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -642,7 +641,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
*/ */
else { else {
for (k = 0; k < C->mt; k++) { for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; tempkm = C->get_blkdim( C, k, DIM_m, C->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < m) { if (k < m) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -682,7 +681,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -682,7 +681,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
else { else {
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
...@@ -720,7 +719,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_ ...@@ -720,7 +719,7 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
*/ */
else { else {
for (k = 0; k < C->nt; k++) { for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; tempkn = C->get_blkdim( C, k, DIM_n, C->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
if (k < n) { if (k < n) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
......
...@@ -4,21 +4,21 @@ ...@@ -4,21 +4,21 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zsyr2k parallel algorithm * @brief Chameleon zsyr2k parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
*/ */
...@@ -51,7 +51,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -51,7 +51,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
if (uplo == ChamLower) { if (uplo == ChamLower) {
mmin = n+1; mmin = n+1;
...@@ -67,7 +67,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -67,7 +67,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
*/ */
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zsyr2k( INSERT_TASK_zsyr2k(
&options, &options,
...@@ -78,9 +78,9 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -78,9 +78,9 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
zbeta, C(n, n)); /* ldc * N */ zbeta, C(n, n)); /* ldc * N */
} }
for (m = mmin; m < mmax; m++) { for (m = mmin; m < mmax; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -105,7 +105,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -105,7 +105,7 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zsyr2k( INSERT_TASK_zsyr2k(
&options, &options,
...@@ -116,9 +116,9 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans, ...@@ -116,9 +116,9 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
zbeta, C(n, n)); /* ldc * N */ zbeta, C(n, n)); /* ldc * N */
} }
for (m = mmin; m < mmax; m++) { for (m = mmin; m < mmax; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zsyrk parallel algorithm * @brief Chameleon zsyrk parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak * @author Jakub Kurzak
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -51,13 +51,13 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -51,13 +51,13 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
for (n = 0; n < C->nt; n++) { for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = C->get_blkdim( C, n, DIM_n, C->n );
/* /*
* ChamNoTrans * ChamNoTrans
*/ */
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zsyrk( INSERT_TASK_zsyrk(
&options, &options,
...@@ -71,9 +71,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -71,9 +71,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -90,9 +90,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -90,9 +90,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -110,7 +110,7 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -110,7 +110,7 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zsyrk( INSERT_TASK_zsyrk(
&options, &options,
...@@ -124,9 +124,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -124,9 +124,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
if (uplo == ChamLower) { if (uplo == ChamLower) {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -143,9 +143,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans, ...@@ -143,9 +143,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
*/ */
else { else {
for (m = n+1; m < C->mt; m++) { for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = C->get_blkdim( C, m, DIM_m, C->m );
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
zbeta = k == 0 ? beta : zone; zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zsytrf parallel algorithm * @brief Chameleon zsytrf parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Jakub Kurzak * @author Jakub Kurzak
* @author Hatem Ltaief * @author Hatem Ltaief
* @author Mathieu Faverge * @author Mathieu Faverge
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Florent Pruvost * @author Florent Pruvost
* @author Marc Sergent * @author Marc Sergent
* @author Samuel Thibault * @author Samuel Thibault
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> c * @precisions normal z -> c
* *
*/ */
...@@ -58,7 +58,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -58,7 +58,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
INSERT_TASK_zsytrf_nopiv( INSERT_TASK_zsytrf_nopiv(
&options, &options,
...@@ -66,7 +66,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -66,7 +66,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
A(k, k), A->nb*k); A(k, k), A->nb*k);
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamRight, ChamLower, ChamTrans, ChamNonUnit, ChamRight, ChamLower, ChamTrans, ChamNonUnit,
...@@ -77,7 +77,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -77,7 +77,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zsyrk( INSERT_TASK_zsyrk(
&options, &options,
ChamLower, ChamNoTrans, ChamLower, ChamNoTrans,
...@@ -86,7 +86,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -86,7 +86,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
1.0, A(n, n)); 1.0, A(n, n));
for (m = n+1; m < A->mt; m++) { for (m = n+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, ChamTrans, ChamNoTrans, ChamTrans,
...@@ -108,7 +108,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -108,7 +108,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkm = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zsytrf_nopiv( INSERT_TASK_zsytrf_nopiv(
&options, &options,
ChamUpper, ChamUpper,
...@@ -116,7 +116,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -116,7 +116,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
A(k, k), A->nb*k); A(k, k), A->nb*k);
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamLeft, ChamUpper, ChamTrans, ChamNonUnit, ChamLeft, ChamUpper, ChamTrans, ChamNonUnit,
...@@ -127,7 +127,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -127,7 +127,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_zsyrk( INSERT_TASK_zsyrk(
&options, &options,
...@@ -137,7 +137,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -137,7 +137,7 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
1.0, A(m, m)); 1.0, A(m, m));
for (n = m+1; n < A->nt; n++) { for (n = m+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
......
...@@ -4,19 +4,19 @@ ...@@ -4,19 +4,19 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon ztile2band parallel algorithm * @brief Chameleon ztile2band parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Azzam Haidar * @author Azzam Haidar
* @author Gregoire Pichon * @author Gregoire Pichon
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -38,9 +38,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, ...@@ -38,9 +38,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
int k; int k;
int tempkm, tempkn; int tempkm, tempkn;
int minmnt = chameleon_min(A->mt, A->nt); int minmnt = chameleon_min(A->mt, A->nt);
int Bnb = B->nb;
int Bmb = B->mb; int Bmb = B->mb;
int Amb = A->mb;
chamctxt = chameleon_context_self(); chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) { if (sequence->status != CHAMELEON_SUCCESS) {
...@@ -59,8 +57,8 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, ...@@ -59,8 +57,8 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
assert( A->i == B->j ); assert( A->i == B->j );
assert( A->j >= B->j ); assert( A->j >= B->j );
tempkm = ( k == A->mt-1 ) ? A->m - k * Amb : Amb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = ( k == B->nt-1 ) ? B->n - k * Bnb : Bnb; tempkn = B->get_blkdim( B, k, DIM_n, B->n );
INSERT_TASK_zlaset( &options, ChamUpperLower, Bmb, tempkn, INSERT_TASK_zlaset( &options, ChamUpperLower, Bmb, tempkn,
0., 0., B, 0, k ); 0., 0., B, 0, k );
...@@ -72,7 +70,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, ...@@ -72,7 +70,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
if ( k < minmnt-1 ) { if ( k < minmnt-1 ) {
tileA = A->get_blktile( A, k+1, k ); tileA = A->get_blktile( A, k+1, k );
tempkm = ( (k+1) == A->mt-1 ) ? A->m - (k+1) * Amb : Amb; tempkm = A->get_blkdim( A, k+1, DIM_m, A->m );
INSERT_TASK_zlacpyx( &options, ChamUpper, tempkm, tempkn, INSERT_TASK_zlacpyx( &options, ChamUpper, tempkm, tempkn,
0, A, k+1, k, tileA->ld, 0, A, k+1, k, tileA->ld,
...@@ -88,8 +86,8 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, ...@@ -88,8 +86,8 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
assert( A->i == B->i ); assert( A->i == B->i );
assert( A->i >= B->j ); assert( A->i >= B->j );
tempkm = ( k == A->mt-1 ) ? A->m - k * Amb : Amb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = ( k == B->nt-1 ) ? B->n - k * Bnb : Bnb; tempkn = B->get_blkdim( B, k, DIM_n, B->n );
INSERT_TASK_zlaset( &options, ChamUpperLower, Bmb, tempkn, INSERT_TASK_zlaset( &options, ChamUpperLower, Bmb, tempkn,
0., 0., B, 0, k ); 0., 0., B, 0, k );
...@@ -101,7 +99,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, ...@@ -101,7 +99,7 @@ void chameleon_pztile2band( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
if ( k > 0 ) { if ( k > 0 ) {
tileA = A->get_blktile( A, k-1, k ); tileA = A->get_blktile( A, k-1, k );
tempkm = ( (k-1) == A->mt-1 ) ? A->m - (k-1) * Amb : Amb; tempkm = A->get_blkdim( A, k-1, DIM_m, A->m );
INSERT_TASK_zlacpyx( &options, ChamLower, tempkm, tempkn, INSERT_TASK_zlacpyx( &options, ChamLower, tempkm, tempkn,
0, A, k-1, k, tileA->ld, 0, A, k-1, k, tileA->ld,
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* @copyright 2009-2016 The University of Tennessee and The University of * @copyright 2009-2016 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* @copyright 2016-2018 KAUST. All rights reserved. * @copyright 2016-2018 KAUST. All rights reserved.
* *
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
* *
* @brief Chameleon computational routines * @brief Chameleon computational routines
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Raphael Boucherie * @author Raphael Boucherie
* @author Samuel Thibault * @author Samuel Thibault
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -80,16 +80,16 @@ void chameleon_pztpgqrt( int KT, int L, ...@@ -80,16 +80,16 @@ void chameleon_pztpgqrt( int KT, int L,
for (k = KT-1; k >= 0; k--) { for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb; tempkn = Q1->get_blkdim( Q1, k, DIM_n, Q1->n );
/* Equivalent to the tsmqr step on Q1,Q2 */ /* Equivalent to the tsmqr step on Q1,Q2 */
maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1; maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
for (m = maxmtk; m > -1; m--) { for (m = maxmtk; m > -1; m--) {
tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb; tempmm = Q2->get_blkdim( Q2, m, DIM_m, Q2->m );
templm = ((L > 0) && (m == maxmtk)) ? tempmm : 0; templm = ((L > 0) && (m == maxmtk)) ? tempmm : 0;
for (n = k; n < Q2->nt; n++) { for (n = k; n < Q2->nt; n++) {
tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb; tempnn = Q2->get_blkdim( Q2, n, DIM_n, Q2->n );
/* TT kernel */ /* TT kernel */
INSERT_TASK_ztpmqrt( INSERT_TASK_ztpmqrt(
&options, &options,
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* *
* @file pztpgqrt_param.c * @file pztpgqrt_param.c
* *
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved. * @copyright 2016-2020 KAUST. All rights reserved.
* *
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
* *
* @brief Chameleon computational routines * @brief Chameleon computational routines
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -98,13 +98,13 @@ void chameleon_pztpgqrt_param( int genD, cham_uplo_t uplo, int K, ...@@ -98,13 +98,13 @@ void chameleon_pztpgqrt_param( int genD, cham_uplo_t uplo, int K,
/* Combine Bottom and Top matrices by merging last pivot with ATop(k,*) */ /* Combine Bottom and Top matrices by merging last pivot with ATop(k,*) */
{ {
CHAM_desc_t *T = TT; CHAM_desc_t *T = TT;
int temppm = p == Q->mt-1 ? Q->m - p * Q->mb : Q->mb; int temppm = Q->get_blkdim( Q, p, DIM_m, Q->m );
int tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb; int tempkn = A->get_blkdim( A, k, DIM_n, A->n );
int tempnn; int tempnn;
int L = temppm; int L = temppm;
for (n = k; n < Q->nt; n++) { for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
node = Q->get_rankof( Q, p, n ); node = Q->get_rankof( Q, p, n );
RUNTIME_data_migrate( sequence, QTop(k, n), node ); RUNTIME_data_migrate( sequence, QTop(k, n), node );
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* @copyright 2009-2016 The University of Tennessee and The University of * @copyright 2009-2016 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* @copyright 2016-2018 KAUST. All rights reserved. * @copyright 2016-2018 KAUST. All rights reserved.
* *
...@@ -12,10 +12,10 @@ ...@@ -12,10 +12,10 @@
* *
* @brief Chameleon computational routines * @brief Chameleon computational routines
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Samuel Thibault * @author Samuel Thibault
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -74,11 +74,11 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, ...@@ -74,11 +74,11 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (m = 0; m < maxmt; m++) { for (m = 0; m < maxmt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
templm = ((L > 0) && (m == maxmt-1)) ? tempmm : 0; templm = ((L > 0) && (m == maxmt-1)) ? tempmm : 0;
/* TT kernel */ /* TT kernel */
INSERT_TASK_ztpqrt( INSERT_TASK_ztpqrt(
...@@ -89,7 +89,7 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, ...@@ -89,7 +89,7 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
T(m, k) ); T(m, k) );
for (n = k+1; n < B->nt; n++) { for (n = k+1; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztpmqrt( INSERT_TASK_ztpmqrt(
&options, &options,
ChamLeft, ChamConjTrans, ChamLeft, ChamConjTrans,
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* *
* @file pztpqrt_param.c * @file pztpqrt_param.c
* *
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved. * @copyright 2016-2020 KAUST. All rights reserved.
* *
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
* *
* @brief Chameleon computational routines * @brief Chameleon computational routines
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -99,8 +99,8 @@ void chameleon_pztpqrt_param( int genD, cham_uplo_t uplo, int K, ...@@ -99,8 +99,8 @@ void chameleon_pztpqrt_param( int genD, cham_uplo_t uplo, int K,
/* Combine with ATop and A by merging last pivot with A(k,k) */ /* Combine with ATop and A by merging last pivot with A(k,k) */
{ {
CHAM_desc_t *T; CHAM_desc_t *T;
int temppm = p == ATop->mt-1 ? ATop->m - p * ATop->mb : ATop->mb; int temppm = ATop->get_blkdim( ATop, p, DIM_m, ATop->m );
int tempkn = k == ATop->nt-1 ? ATop->n - k * ATop->nb : ATop->nb; int tempkn = ATop->get_blkdim( ATop, k, DIM_n, ATop->n );
int L, node, tempnn; int L, node, tempnn;
T = TT; T = TT;
...@@ -118,7 +118,7 @@ void chameleon_pztpqrt_param( int genD, cham_uplo_t uplo, int K, ...@@ -118,7 +118,7 @@ void chameleon_pztpqrt_param( int genD, cham_uplo_t uplo, int K,
T(p, k)); T(p, k));
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
node = A->get_rankof( A, p, n ); node = A->get_rankof( A, p, n );
RUNTIME_data_migrate( sequence, ATop(k, n), node ); RUNTIME_data_migrate( sequence, ATop(k, n), node );
......
...@@ -4,20 +4,20 @@ ...@@ -4,20 +4,20 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon ztradd parallel algorithm * @brief Chameleon ztradd parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -29,16 +29,17 @@ ...@@ -29,16 +29,17 @@
/** /**
* Parallel tile matrix-matrix multiplication - dynamic scheduling * Parallel tile matrix-matrix multiplication - dynamic scheduling
*/ */
void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, void chameleon_pztradd( cham_uplo_t uplo, cham_trans_t trans,
CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A,
CHAMELEON_Complex64_t beta, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *B,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
int tempmm, tempnn, tempmn, tempnm; int tempmm, tempnn, tempmn, tempnm;
int m, n; int m, n, minmn;
chamctxt = chameleon_context_self(); chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) { if (sequence->status != CHAMELEON_SUCCESS) {
...@@ -46,12 +47,14 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -46,12 +47,14 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
} }
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
minmn = chameleon_min( B->mt, B->nt );
switch(uplo){ switch(uplo){
case ChamLower: case ChamLower:
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (n = 0; n < chameleon_min(B->mt,B->nt); n++) { for (n = 0; n < minmn; n++) {
tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb; tempnm = B->get_blkdim( B, n, DIM_m, B->m );
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztradd( INSERT_TASK_ztradd(
&options, &options,
...@@ -60,7 +63,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -60,7 +63,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
beta, B(n, n)); beta, B(n, n));
for (m = n+1; m < B->mt; m++) { for (m = n+1; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_zgeadd( INSERT_TASK_zgeadd(
&options, &options,
...@@ -72,8 +75,8 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -72,8 +75,8 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
} }
else { else {
for (n = 0; n < chameleon_min(B->mt,B->nt); n++) { for (n = 0; n < chameleon_min(B->mt,B->nt); n++) {
tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb; tempnm = B->get_blkdim( B, n, DIM_m, B->m );
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztradd( INSERT_TASK_ztradd(
&options, &options,
...@@ -82,7 +85,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -82,7 +85,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
beta, B(n, n)); beta, B(n, n));
for (m = n+1; m < B->mt; m++) { for (m = n+1; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_zgeadd( INSERT_TASK_zgeadd(
&options, &options,
...@@ -95,9 +98,9 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -95,9 +98,9 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
break; break;
case ChamUpper: case ChamUpper:
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (m = 0; m < chameleon_min(B->mt,B->nt); m++) { for (m = 0; m < minmn; m++) {
tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb; tempmn = B->get_blkdim( B, m, DIM_n, B->n );
INSERT_TASK_ztradd( INSERT_TASK_ztradd(
&options, &options,
...@@ -106,7 +109,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -106,7 +109,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
beta, B(m, m)); beta, B(m, m));
for (n = m+1; n < B->nt; n++) { for (n = m+1; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgeadd( INSERT_TASK_zgeadd(
&options, &options,
...@@ -118,8 +121,8 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -118,8 +121,8 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
} }
else { else {
for (m = 0; m < chameleon_min(B->mt,B->nt); m++) { for (m = 0; m < chameleon_min(B->mt,B->nt); m++) {
tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb; tempmn = B->get_blkdim( B, m, DIM_n, B->n );
INSERT_TASK_ztradd( INSERT_TASK_ztradd(
&options, &options,
...@@ -128,7 +131,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -128,7 +131,7 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
beta, B(m, m)); beta, B(m, m));
for (n = m+1; n < B->nt; n++) { for (n = m+1; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgeadd( INSERT_TASK_zgeadd(
&options, &options,
...@@ -143,10 +146,10 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -143,10 +146,10 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
default: default:
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgeadd( INSERT_TASK_zgeadd(
&options, &options,
...@@ -158,10 +161,10 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans, ...@@ -158,10 +161,10 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
} }
else { else {
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgeadd( INSERT_TASK_zgeadd(
&options, &options,
......
...@@ -4,21 +4,21 @@ ...@@ -4,21 +4,21 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon ztrmm parallel algorithm * @brief Chameleon ztrmm parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -57,9 +57,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -57,9 +57,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
if (uplo == ChamUpper) { if (uplo == ChamUpper) {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -68,7 +68,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -68,7 +68,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
B(m, n)); /* ldb * tempnn */ B(m, n)); /* ldb * tempnn */
for (k = m+1; k < A->mt; k++) { for (k = m+1; k < A->mt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamNoTrans, trans, ChamNoTrans,
...@@ -85,9 +85,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -85,9 +85,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
*/ */
else { else {
for (m = B->mt-1; m > -1; m--) { for (m = B->mt-1; m > -1; m--) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -114,9 +114,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -114,9 +114,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
else { else {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (m = B->mt-1; m > -1; m--) { for (m = B->mt-1; m > -1; m--) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -141,9 +141,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -141,9 +141,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
*/ */
else { else {
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -152,7 +152,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -152,7 +152,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
B(m, n)); /* ldb * tempnn */ B(m, n)); /* ldb * tempnn */
for (k = m+1; k < A->mt; k++) { for (k = m+1; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamNoTrans, trans, ChamNoTrans,
...@@ -173,9 +173,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -173,9 +173,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
if (uplo == ChamUpper) { if (uplo == ChamUpper) {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (n = B->nt-1; n > -1; n--) { for (n = B->nt-1; n > -1; n--) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -200,9 +200,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -200,9 +200,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
*/ */
else { else {
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -211,7 +211,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -211,7 +211,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
B(m, n)); /* ldb * tempnn */ B(m, n)); /* ldb * tempnn */
for (k = n+1; k < A->mt; k++) { for (k = n+1; k < A->mt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, trans, ChamNoTrans, trans,
...@@ -230,9 +230,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -230,9 +230,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
else { else {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -241,7 +241,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -241,7 +241,7 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
B(m, n)); /* ldb * tempnn */ B(m, n)); /* ldb * tempnn */
for (k = n+1; k < A->mt; k++) { for (k = n+1; k < A->mt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, trans, ChamNoTrans, trans,
...@@ -258,9 +258,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo, ...@@ -258,9 +258,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
*/ */
else { else {
for (n = B->nt-1; n > -1; n--) { for (n = B->nt-1; n > -1; n--) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrmm( INSERT_TASK_ztrmm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2015 The University of Tennessee and The University of * @copyright 2009-2015 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon ztrsm parallel algorithm * @brief Chameleon ztrsm parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak * @author Jakub Kurzak
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -58,10 +58,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -58,10 +58,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
if (uplo == ChamUpper) { if (uplo == ChamUpper) {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < B->mt; k++) { for (k = 0; k < B->mt; k++) {
tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb; tempkm = B->get_blkdim( B, B->mt-1-k, DIM_m, B->m );
lalpha = k == 0 ? alpha : zone; lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -72,7 +72,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -72,7 +72,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) ); RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
for (m = k+1; m < B->mt; m++) { for (m = k+1; m < B->mt; m++) {
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, ChamNoTrans, ChamNoTrans, ChamNoTrans,
...@@ -93,10 +93,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -93,10 +93,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
*/ */
else { else {
for (k = 0; k < B->mt; k++) { for (k = 0; k < B->mt; k++) {
tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb; tempkm = B->get_blkdim( B, k, DIM_m, B->m );
lalpha = k == 0 ? alpha : zone; lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -106,9 +106,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -106,9 +106,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
} }
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (m = k+1; m < B->mt; m++) { for (m = k+1; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamNoTrans, trans, ChamNoTrans,
...@@ -132,10 +132,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -132,10 +132,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
else { else {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < B->mt; k++) { for (k = 0; k < B->mt; k++) {
tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb; tempkm = B->get_blkdim( B, k, DIM_m, B->m );
lalpha = k == 0 ? alpha : zone; lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -145,9 +145,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -145,9 +145,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
} }
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (m = k+1; m < B->mt; m++) { for (m = k+1; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, ChamNoTrans, ChamNoTrans, ChamNoTrans,
...@@ -168,10 +168,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -168,10 +168,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
*/ */
else { else {
for (k = 0; k < B->mt; k++) { for (k = 0; k < B->mt; k++) {
tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb; tempkm = B->get_blkdim( B, B->mt-1-k, DIM_m, B->m );
lalpha = k == 0 ? alpha : zone; lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -182,7 +182,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -182,7 +182,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) ); RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
for (m = k+1; m < B->mt; m++) { for (m = k+1; m < B->mt; m++) {
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamNoTrans, trans, ChamNoTrans,
...@@ -207,10 +207,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -207,10 +207,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
if (uplo == ChamUpper) { if (uplo == ChamUpper) {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < B->nt; k++) { for (k = 0; k < B->nt; k++) {
tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; tempkn = B->get_blkdim( B, k, DIM_n, B->n );
lalpha = k == 0 ? alpha : zone; lalpha = k == 0 ? alpha : zone;
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -220,9 +220,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -220,9 +220,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
} }
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
for (n = k+1; n < B->nt; n++) { for (n = k+1; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, ChamNoTrans, ChamNoTrans, ChamNoTrans,
...@@ -243,9 +243,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -243,9 +243,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
*/ */
else { else {
for (k = 0; k < B->nt; k++) { for (k = 0; k < B->nt; k++) {
tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb; tempkn = B->get_blkdim( B, B->nt-1-k, DIM_n, B->n );
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -277,10 +277,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -277,10 +277,10 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
else { else {
if (trans == ChamNoTrans) { if (trans == ChamNoTrans) {
for (k = 0; k < B->nt; k++) { for (k = 0; k < B->nt; k++) {
tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb; tempkn = B->get_blkdim( B, B->nt-1-k, DIM_n, B->n );
lalpha = k == 0 ? alpha : zone; lalpha = k == 0 ? alpha : zone;
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -310,9 +310,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -310,9 +310,9 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
*/ */
else { else {
for (k = 0; k < B->nt; k++) { for (k = 0; k < B->nt; k++) {
tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; tempkn = B->get_blkdim( B, k, DIM_n, B->n );
for (m = 0; m < B->mt; m++) { for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; tempmm = B->get_blkdim( B, m, DIM_m, B->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
side, uplo, trans, diag, side, uplo, trans, diag,
...@@ -322,7 +322,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c ...@@ -322,7 +322,7 @@ void chameleon_pztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, c
RUNTIME_data_flush( sequence, A(k, k) ); RUNTIME_data_flush( sequence, A(k, k) );
for (n = k+1; n < B->nt; n++) { for (n = k+1; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
ChamNoTrans, trans, ChamNoTrans, trans,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon ztrsmpl parallel algorithm * @brief Chameleon ztrsmpl parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak * @author Jakub Kurzak
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -41,7 +41,7 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP ...@@ -41,7 +41,7 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP
int k, m, n; int k, m, n;
int tempkm, tempnn, tempkmin, tempmm, tempkn; int tempkm, tempnn, tempkmin, tempmm, tempkn;
int ib; int ib, K, DIM_k;
chamctxt = chameleon_context_self(); chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) { if (sequence->status != CHAMELEON_SUCCESS) {
...@@ -49,13 +49,23 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP ...@@ -49,13 +49,23 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP
} }
RUNTIME_options_init(&options, chamctxt, sequence, request); RUNTIME_options_init(&options, chamctxt, sequence, request);
if ( A->m <= A->n ) {
K = A->m;
DIM_k = DIM_m;
}
else {
K = A->n;
DIM_k = DIM_n;
}
ib = CHAMELEON_IB; ib = CHAMELEON_IB;
for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { for (k = 0; k < chameleon_min(A->mt, A->nt); k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
tempkmin = k == chameleon_min(A->mt, A->nt)-1 ? chameleon_min(A->m, A->n)-k*A->mb : A->mb; tempkmin = A->get_blkdim( A, k, DIM_k, K );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zgessm( INSERT_TASK_zgessm(
&options, &options,
tempkm, tempnn, tempkmin, ib, L->nb, tempkm, tempnn, tempkmin, ib, L->nb,
...@@ -65,9 +75,9 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP ...@@ -65,9 +75,9 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP
B(k, n)); B(k, n));
} }
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = 0; n < B->nt; n++) { for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; tempnn = B->get_blkdim( B, n, DIM_n, B->n );
INSERT_TASK_zssssm( INSERT_TASK_zssssm(
&options, &options,
A->nb, tempnn, tempmm, tempnn, tempkn, ib, L->nb, A->nb, tempnn, tempmm, tempnn, tempkn, ib, L->nb,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon ztrtri parallel algorithm * @brief Chameleon ztrtri parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Julien Langou * @author Julien Langou
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Florent Pruvost * @author Florent Pruvost
* @author Samuel Thibault * @author Samuel Thibault
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -55,9 +55,9 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -55,9 +55,9 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for (k = 0; k < A->nt; k++) { for (k = 0; k < A->nt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = A->get_blkdim( A, k, DIM_n, A->n );
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamRight, uplo, ChamNoTrans, diag, ChamRight, uplo, ChamNoTrans, diag,
...@@ -66,7 +66,7 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -66,7 +66,7 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
A(m, k)); A(m, k));
} }
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = A->get_blkdim( A, m, DIM_m, A->m );
for (n = 0; n < k; n++) { for (n = 0; n < k; n++) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
...@@ -104,9 +104,9 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -104,9 +104,9 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
INSERT_TASK_ztrsm( INSERT_TASK_ztrsm(
&options, &options,
ChamLeft, uplo, ChamNoTrans, diag, ChamLeft, uplo, ChamNoTrans, diag,
...@@ -115,7 +115,7 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, ...@@ -115,7 +115,7 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
A(k, n)); A(k, n));
} }
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = A->get_blkdim( A, n, DIM_n, A->n );
for (m = 0; m < k; m++) { for (m = 0; m < k; m++) {
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zunglq parallel algorithm * @brief Chameleon zunglq parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Hatem Ltaief * @author Hatem Ltaief
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
* @author Raphael Boucherie * @author Raphael Boucherie
* @author Samuel Thibault * @author Samuel Thibault
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -93,15 +93,15 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T ...@@ -93,15 +93,15 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
for (k = minMT-1; k >= 0; k--) { for (k = minMT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempAkm = A->get_blkdim( A, k, DIM_m, A->m );
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempAkn = A->get_blkdim( A, k, DIM_n, A->n );
tempkmin = chameleon_min( tempAkn, tempAkm ); tempkmin = chameleon_min( tempAkn, tempAkm );
tempkn = k == Q->nt-1 ? Q->n-k*Q->nb : Q->nb; tempkn = Q->get_blkdim( Q, k, DIM_n, Q->n );
for (n = Q->nt-1; n > k; n--) { for (n = Q->nt-1; n > k; n--) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
for (m = k; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
RUNTIME_data_migrate( sequence, Q(m, k), RUNTIME_data_migrate( sequence, Q(m, k),
Q->get_rankof( Q, m, n ) ); Q->get_rankof( Q, m, n ) );
...@@ -121,7 +121,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T ...@@ -121,7 +121,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
} }
if ( genD ) { if ( genD ) {
int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb; int tempDkn = D->get_blkdim( D, k, DIM_n, D->n );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
&options, &options,
ChamUpper, tempkmin, tempDkn, ChamUpper, tempkmin, tempDkn,
...@@ -136,7 +136,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T ...@@ -136,7 +136,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
#endif #endif
} }
for (m = k; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
/* Restore the original location of the tiles */ /* Restore the original location of the tiles */
RUNTIME_data_migrate( sequence, Q(m, k), RUNTIME_data_migrate( sequence, Q(m, k),
......
...@@ -4,18 +4,18 @@ ...@@ -4,18 +4,18 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zunglq_param parallel algorithm * @brief Chameleon zunglq_param parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Raphael Boucherie * @author Raphael Boucherie
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -86,7 +86,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -86,7 +86,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
for (k = K-1; k >= 0; k--) { for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
/* Setting the order of the tiles*/ /* Setting the order of the tiles*/
nbtiles = libhqr_walk_stepk( qrtree, k, tiles ); nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
...@@ -95,7 +95,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -95,7 +95,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
n = tiles[i]; n = tiles[i];
p = qrtree->currpiv(qrtree, k, n); p = qrtree->currpiv(qrtree, k, n);
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) { if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */ /* TS kernel */
...@@ -108,7 +108,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -108,7 +108,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
T = TT; T = TT;
} }
for (m = k; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
node = Q->get_rankof( Q, m, n ); node = Q->get_rankof( Q, m, n );
RUNTIME_data_migrate( sequence, Q(m, p), node ); RUNTIME_data_migrate( sequence, Q(m, p), node );
...@@ -131,11 +131,11 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -131,11 +131,11 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i); p = qrtree->getm(qrtree, k, i);
temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; temppn = A->get_blkdim( A, p, DIM_n, A->n );
tempkmin = chameleon_min(tempkm, temppn); tempkmin = chameleon_min(tempkm, temppn);
if ( genD ) { if ( genD ) {
int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb; int tempDpn = D->get_blkdim( D, p, DIM_n, D->n );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
&options, &options,
ChamUpper, tempkmin, tempDpn, ChamUpper, tempkmin, tempDpn,
...@@ -150,7 +150,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -150,7 +150,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
#endif #endif
} }
for (m = k; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
RUNTIME_data_migrate( sequence, Q(m, p), RUNTIME_data_migrate( sequence, Q(m, p),
Q->get_rankof( Q, m, p ) ); Q->get_rankof( Q, m, p ) );
......
...@@ -4,14 +4,14 @@ ...@@ -4,14 +4,14 @@
* *
* @copyright 2009-2014 The University of Tennessee and The University of * @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved. * Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved. * Univ. Bordeaux. All rights reserved.
* *
*** ***
* *
* @brief Chameleon zunglqrh parallel algorithm * @brief Chameleon zunglqrh parallel algorithm
* *
* @version 1.2.0 * @version 1.3.0
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2 * from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Dulceneia Becker * @author Dulceneia Becker
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* @author Florent Pruvost * @author Florent Pruvost
* @author Samuel Thibault * @author Samuel Thibault
* @author Alycia Lisito * @author Alycia Lisito
* @date 2022-02-22 * @date 2025-01-24
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -89,15 +89,15 @@ void chameleon_pzunglqrh( int genD, int BS, ...@@ -89,15 +89,15 @@ void chameleon_pzunglqrh( int genD, int BS,
for (k = K-1; k >= 0; k--) { for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = A->get_blkdim( A, k, DIM_m, A->m );
lastRD = 0; lastRD = 0;
for (RD = BS; RD < A->nt-k; RD *= 2) for (RD = BS; RD < A->nt-k; RD *= 2)
lastRD = RD; lastRD = RD;
for (RD = lastRD; RD >= BS; RD /= 2) { for (RD = lastRD; RD >= BS; RD /= 2) {
for (N = k; N+RD < A->nt; N += 2*RD) { for (N = k; N+RD < A->nt; N += 2*RD) {
tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb; tempNRDn = A->get_blkdim( A, N+RD, DIM_n, A->n );
for (m = k; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
node = Q->get_rankof( Q, m, N+RD ); node = Q->get_rankof( Q, m, N+RD );
RUNTIME_data_migrate( sequence, Q(m, N), node ); RUNTIME_data_migrate( sequence, Q(m, N), node );
...@@ -119,13 +119,13 @@ void chameleon_pzunglqrh( int genD, int BS, ...@@ -119,13 +119,13 @@ void chameleon_pzunglqrh( int genD, int BS,
} }
} }
for (N = k; N < A->nt; N += BS) { for (N = k; N < A->nt; N += BS) {
tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb; tempNn = A->get_blkdim( A, N, DIM_n, A->n );
tempkmin = chameleon_min(tempkm, tempNn); tempkmin = chameleon_min(tempkm, tempNn);
for (n = chameleon_min(N+BS, A->nt)-1; n > N; n--) { for (n = chameleon_min(N+BS, A->nt)-1; n > N; n--) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = Q->get_blkdim( Q, n, DIM_n, Q->n );
for (m = k; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
node = Q->get_rankof( Q, m, n ); node = Q->get_rankof( Q, m, n );
RUNTIME_data_migrate( sequence, Q(m, N), node ); RUNTIME_data_migrate( sequence, Q(m, N), node );
...@@ -147,7 +147,7 @@ void chameleon_pzunglqrh( int genD, int BS, ...@@ -147,7 +147,7 @@ void chameleon_pzunglqrh( int genD, int BS,
} }
if ( genD ) { if ( genD ) {
int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb; int tempDNn = D->get_blkdim( D, N, DIM_n, D->n );
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
&options, &options,
...@@ -163,7 +163,7 @@ void chameleon_pzunglqrh( int genD, int BS, ...@@ -163,7 +163,7 @@ void chameleon_pzunglqrh( int genD, int BS,
#endif #endif
} }
for (m = k; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = Q->get_blkdim( Q, m, DIM_m, Q->m );
RUNTIME_data_migrate( sequence, Q(m, N), RUNTIME_data_migrate( sequence, Q(m, N),
Q->get_rankof( Q, m, N ) ); Q->get_rankof( Q, m, N ) );
......