From e33e5138ae2405b88c66cafbbf83efbfcae9ea35 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Sun, 25 Oct 2015 06:18:54 +0000 Subject: [PATCH] Update HAtem's potrimm with the dataflush --- compute/pzpotrimm.c | 181 +++++++++++++++++++++++--------------------- 1 file changed, 94 insertions(+), 87 deletions(-) diff --git a/compute/pzpotrimm.c b/compute/pzpotrimm.c index 729f45a56..7869d314e 100644 --- a/compute/pzpotrimm.c +++ b/compute/pzpotrimm.c @@ -76,6 +76,7 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de for (k = 0; k < A->mt; k++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); + MORSE_TASK_zpotrf( &options, MorseLower, tempkm, A->mb, @@ -120,88 +121,89 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de /* * ZTRTRI */ - for (n = 0; n < A->nt; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - ldan = BLKLDD(A, n); - for (m = n+1; m < A->mt; m++) { + for (k = 0; k < A->nt; k++) { + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + ldak = BLKLDD(A, k); + for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); MORSE_TASK_ztrsm( &options, MorseRight, uplo, MorseNoTrans, MorseNonUnit, - tempmm, tempnn, A->mb, - mzone, A(n, n), ldan, - A(m, n), ldam); + tempmm, tempkn, A->mb, + mzone, A(k, k), ldak, + A(m, k), ldam); } - for (m = n+1; m < A->mt; m++) { + for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); - for (k = 0; k < n; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + for (n = 0; n < k; n++) { MORSE_TASK_zgemm( &options, MorseNoTrans, MorseNoTrans, - tempmm, tempkn, tempnn, A->mb, - zone, A(m, n), ldam, - A(n, k), ldan, - zone, A(m, k), ldam); + tempmm, A->nb, tempkn, A->mb, + zone, A(m, k), ldam, + A(k, n), ldak, + zone, A(m, n), ldam); } + MORSE_TASK_dataflush( &options, A(m, k) ); } - for (m = 0; m < n; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + for (n = 0; n < k; n++) { + MORSE_TASK_dataflush( &options, A(k, n) ); MORSE_TASK_ztrsm( &options, MorseLeft, uplo, MorseNoTrans, MorseNonUnit, - tempnn, tempmm, A->mb, - zone, A(n, n), ldan, - A(n, m), ldan); + tempkn, A->nb, A->mb, + zone, A(k, k), ldak, + A(k, n), ldak); } + MORSE_TASK_dataflush( &options, A(k, k) ); MORSE_TASK_ztrtri( &options, uplo, MorseNonUnit, - tempnn, A->mb, - A(n, n), ldan, A->nb*n); + tempkn, A->mb, + A(k, k), ldak, A->nb*k); } /* * ZLAUUM */ - for (m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - for(n = 0; n < m; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + for (k = 0; k < A->mt; k++) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + for(n = 0; n < k; n++) { + ldan = BLKLDD(A, n); MORSE_TASK_zherk( &options, uplo, MorseConjTrans, - tempnn, tempmm, A->mb, - 1.0, A(m, n), ldam, - 1.0, A(n, n), A->mb); + A->mb, tempkm, A->mb, + 1.0, A(k, n), ldak, + 1.0, A(n, n), ldan); - for(k = n+1; k < m; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + for(m = n+1; m < k; m++) { + ldam = BLKLDD(A, m); MORSE_TASK_zgemm( &options, MorseConjTrans, MorseNoTrans, - tempkm, tempnn, tempmm, A->mb, - zone, A(m, k), ldam, - A(m, n), ldam, - zone, A(k, n), A->mb); + A->mb, A->nb, tempkm, A->mb, + 1.0, A(k, m), ldak, + A(k, n), ldak, + 1.0, A(m, n), ldam); } } - for (n = 0; n < m; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + for (n = 0; n < k; n++) { + MORSE_TASK_dataflush( &options, A(k, n) ); MORSE_TASK_ztrmm( &options, MorseLeft, uplo, MorseConjTrans, MorseNonUnit, - tempmm, tempnn, A->mb, - zone, A(m, m), ldam, - A(m, n), ldam); + tempkm, A->nb, A->mb, + 1.0, A(k, k), ldak, + A(k, n), ldak); } + MORSE_TASK_dataflush( &options, A(k, k) ); MORSE_TASK_zlauum( &options, - uplo, - tempmm, - A->mb, A(m, m), ldam); + uplo, tempkm, A->mb, + A(k, k), ldak); } /* * ZSYMM @@ -277,7 +279,6 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de } MORSE_TASK_dataflush( &options, A(k, k) ); - for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; ldam = BLKLDD(A, m); @@ -289,7 +290,6 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de -1.0, A(k, m), ldak, 1.0, A(m, m), ldam); - for (n = m+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; @@ -307,84 +307,91 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de /* * ZTRTRI */ - for (m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - for (n = m+1; n < A->nt; n++) { + for (k = 0; k < A->mt; k++) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + for (n = k+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; MORSE_TASK_ztrsm( &options, MorseLeft, uplo, MorseNoTrans, MorseNonUnit, - tempmm, tempnn, A->mb, - mzone, A(m, m), ldam, - A(m, n), ldam); + tempkm, tempnn, A->mb, + mzone, A(k, k), ldak, + A(k, n), ldak); } - for (n = 0; n < m; n++) { + for (n = k+1; n < A->nt; n++) { tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - ldan = BLKLDD(A, n); - for (k = m+1; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + for (m = 0; m < k; m++) { + ldam = BLKLDD(A, m); MORSE_TASK_zgemm( &options, MorseNoTrans, MorseNoTrans, - tempnn, tempkn, tempmm, A->mb, - zone, A(n, m), ldan, - A(m, k), ldam, - zone, A(n, k), ldan); + A->mb, tempnn, tempkm, A->mb, + zone, A(m, k), ldam, + A(k, n), ldak, + zone, A(m, n), ldam); } + MORSE_TASK_dataflush( &options, A(k, n) ); + } + for (m = 0; m < k; m++) { + ldam = BLKLDD(A, m); + MORSE_TASK_dataflush( &options, A(m, k) ); MORSE_TASK_ztrsm( &options, MorseRight, uplo, MorseNoTrans, MorseNonUnit, - tempnn, tempmm, A->mb, - zone, A(m, m), ldam, - A(n, m), ldan); + A->mb, tempkm, A->mb, + zone, A(k, k), ldak, + A(m, k), ldam); } + MORSE_TASK_dataflush( &options, A(k, k) ); MORSE_TASK_ztrtri( &options, uplo, MorseNonUnit, - tempmm, A->mb, - A(m, m), ldam, A->mb*m); + tempkm, A->mb, + A(k, k), ldak, A->mb*k); } /* * ZLAUUM */ - for (m = 0; m < A->mt; m++) { - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - for (n = 0; n < m; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + for (k = 0; k < A->mt; k++) { + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + ldak = BLKLDD(A, k); + + for (m = 0; m < k; m++) { + ldam = BLKLDD(A, m); MORSE_TASK_zherk( &options, uplo, MorseNoTrans, - tempnn, tempmm, A->mb, - 1.0, A(n, m), A->mb, - 1.0, A(n, n), A->mb); + A->mb, tempkn, A->mb, + 1.0, A(m, k), ldam, + 1.0, A(m, m), ldam); - for (k = n+1; k < m; k++){ - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + for (n = m+1; n < k; n++){ + ldan = BLKLDD(A, n); MORSE_TASK_zgemm( &options, MorseNoTrans, MorseConjTrans, - tempnn, tempkm, tempmm, A->mb, - zone, A(n, m), A->mb, - A(k, m), A->mb, - zone, A(n, k), A->mb); + A->mb, A->nb, tempkn, A->mb, + 1.0, A(m, k), ldam, + A(n, k), ldan, + 1.0, A(m, n), ldam); } } - for (n = 0; n < m; n++) { - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + for (m = 0; m < k; m++) { + ldam = BLKLDD(A, m); + MORSE_TASK_dataflush( &options, A(m, k) ); MORSE_TASK_ztrmm( &options, MorseRight, uplo, MorseConjTrans, MorseNonUnit, - tempnn, tempmm, A->mb, - zone, A(m, m), ldam, - A(n, m), A->mb); + A->mb, tempkn, A->mb, + 1.0, A(k, k), ldak, + A(m, k), ldam); } + MORSE_TASK_dataflush( &options, A(k, k) ); MORSE_TASK_zlauum( &options, - uplo, - tempmm, - A->mb, A(m, m), ldam); + uplo, tempkn, A->mb, + A(k, k), ldak); } /* * ZSYMM -- GitLab