diff --git a/compute/pzpotrimm.c b/compute/pzpotrimm.c index 5f6ab13e200e781900ea8cedf14ccaa514b0c4dc..8f7345db6578753db08af413052b5935c6a151e4 100644 --- a/compute/pzpotrimm.c +++ b/compute/pzpotrimm.c @@ -19,11 +19,9 @@ * @version 2.5.0 * @comment This file has been automatically generated * from Plasma 2.5.0 for MORSE 1.0.0 - * @author Jakub Kurzak * @author Hatem Ltaief * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede + * @author Ali M Charara * @date 2010-11-15 * @precisions normal z -> s d c * @@ -47,7 +45,7 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de int ldak, ldam, ldan; int tempkm, tempmm, tempnn, tempkn; - MORSE_Complex64_t alpha = (MORSE_Complex64_t) 1.0; + MORSE_Complex64_t alpha = (MORSE_Complex64_t) 1.0; MORSE_Complex64_t beta = (MORSE_Complex64_t) 0.0; MORSE_Complex64_t zbeta; MORSE_Complex64_t zone = (MORSE_Complex64_t) 1.0; diff --git a/compute/pzsymm.c b/compute/pzsymm.c index d3d1c7f8f4f006af21d897cb5a3d25e906e9291c..eda68c35c9538f4c8935e2c8730b661cdc4a1a75 100644 --- a/compute/pzsymm.c +++ b/compute/pzsymm.c @@ -43,7 +43,7 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo, MORSE_option_t options; int k, m, n; - int lda, ldak, ldb, ldc; + int ldak, ldam, ldan, ldbk, ldbm, ldcm; int tempmm, tempnn, tempkn, tempkm; MORSE_Complex64_t zbeta; @@ -54,30 +54,36 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo, return; RUNTIME_options_init(&options, morse, sequence, request); - for (m = 0; m < C->mt; m++) { - tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; - ldc = BLKLDD(C, m); - for (n = 0; n < C->nt; n++) { - tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; - /* - * MorseLeft / MorseLower - */ - if (side == MorseLeft) { - lda = BLKLDD(A, m); - if (uplo == MorseLower) { - for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; - ldak = BLKLDD(A, k); - ldb = BLKLDD(B, k); - zbeta = k == 0 ? beta : zone; + /* + * MorseLeft + */ + if (side == MorseLeft) { + for (k = 0; k < C->mt; k++) { + tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; + ldak = BLKLDD(A, k); + ldbk = BLKLDD(B, k); + zbeta = k == 0 ? beta : zone; + + for (n = 0; n < C->nt; n++) { + tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + + for (m = 0; m < C->mt; m++) { + tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + ldam = BLKLDD(A, m); + ldcm = BLKLDD(C, m); + + /* + * MorseLeft / MorseLower + */ + if (uplo == MorseLower) { if (k < m) { MORSE_TASK_zgemm( &options, MorseNoTrans, MorseNoTrans, tempmm, tempnn, tempkm, A->mb, - alpha, A(m, k), lda, /* lda * K */ - B(k, n), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, A(m, k), ldam, + B(k, n), ldbk, + zbeta, C(m, n), ldcm); } else { if (k == m) { @@ -85,39 +91,33 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo, &options, side, uplo, tempmm, tempnn, A->mb, - alpha, A(k, k), ldak, /* ldak * X */ - B(k, n), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, A(k, k), ldak, + B(k, n), ldbk, + zbeta, C(m, n), ldcm); } else { MORSE_TASK_zgemm( &options, MorseTrans, MorseNoTrans, tempmm, tempnn, tempkm, A->mb, - alpha, A(k, m), ldak, /* ldak * X */ - B(k, n), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, A(k, m), ldak, + B(k, n), ldbk, + zbeta, C(m, n), ldcm); } } } - } - /* - * MorseLeft / MorseUpper - */ - else { - for (k = 0; k < C->mt; k++) { - tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb; - ldak = BLKLDD(A, k); - ldb = BLKLDD(B, k); - zbeta = k == 0 ? beta : zone; + /* + * MorseLeft / MorseUpper + */ + else { if (k < m) { MORSE_TASK_zgemm( &options, MorseTrans, MorseNoTrans, tempmm, tempnn, tempkm, A->mb, - alpha, A(k, m), ldak, /* ldak * X */ - B(k, n), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, A(k, m), ldak, + B(k, n), ldbk, + zbeta, C(m, n), ldcm); } else { if (k == m) { @@ -125,104 +125,134 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo, &options, side, uplo, tempmm, tempnn, A->mb, - alpha, A(k, k), ldak, /* ldak * K */ - B(k, n), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, A(k, k), ldak, + B(k, n), ldbk, + zbeta, C(m, n), ldcm); } else { MORSE_TASK_zgemm( &options, MorseNoTrans, MorseNoTrans, tempmm, tempnn, tempkm, A->mb, - alpha, A(m, k), lda, /* lda * K */ - B(k, n), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, A(m, k), ldam, + B(k, n), ldbk, + zbeta, C(m, n), ldcm); } } } } + MORSE_TASK_dataflush( &options, B(k, n) ); + } + if (uplo == MorseLower) { + for (n = 0; n <= k; n++) { + MORSE_TASK_dataflush( &options, A(k, n) ); + } } - /* - * MorseRight / MorseLower - */ else { - lda = BLKLDD(A, n); - ldb = BLKLDD(B, m); - if (uplo == MorseLower) { - for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; - ldak = BLKLDD(A, k); - zbeta = k == 0 ? beta : zone; + for (m = 0; m <= k; m++) { + MORSE_TASK_dataflush( &options, A(m, k) ); + } + } + } + } + /* + * MorseRight + */ + else { + for (k = 0; k < C->nt; k++) { + tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; + ldak = BLKLDD(A, k); + zbeta = k == 0 ? beta : zone; + + for (m = 0; m < C->mt; m++) { + tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + ldbm = BLKLDD(B, m); + ldcm = BLKLDD(C, m); + + for (n = 0; n < C->nt; n++) { + tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + ldan = BLKLDD(A, n); + + /* + * MorseRight / MorseLower + */ + if (uplo == MorseLower) { if (k < n) { - MORSE_TASK_zgemm( - &options, - MorseNoTrans, MorseTrans, - tempmm, tempnn, tempkn, A->mb, - alpha, B(m, k), ldb, /* ldb * K */ - A(n, k), lda, /* lda * K */ - zbeta, C(m, n), ldc); /* ldc * Y */ + MORSE_TASK_zgemm( + &options, + MorseNoTrans, MorseTrans, + tempmm, tempnn, tempkn, A->mb, + alpha, B(m, k), ldbm, + A(n, k), ldan, + zbeta, C(m, n), ldcm); } else { if (k == n) { - MORSE_TASK_zsymm( - &options, - side, uplo, - tempmm, tempnn, A->mb, - alpha, A(k, k), ldak, /* ldak * Y */ - B(m, k), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + MORSE_TASK_zsymm( + &options, + MorseRight, uplo, + tempmm, tempnn, A->mb, + alpha, A(k, k), ldak, + B(m, k), ldbm, + zbeta, C(m, n), ldcm); } else { MORSE_TASK_zgemm( &options, MorseNoTrans, MorseNoTrans, tempmm, tempnn, tempkn, A->mb, - alpha, B(m, k), ldb, /* ldb * K */ - A(k, n), ldak, /* ldak * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, B(m, k), ldbm, + A(k, n), ldak, + zbeta, C(m, n), ldcm); } } } - } - /* - * MorseRight / MorseUpper - */ - else { - for (k = 0; k < C->nt; k++) { - tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb; - ldak = BLKLDD(A, k); - zbeta = k == 0 ? beta : zone; + /* + * MorseRight / MorseUpper + */ + else { if (k < n) { MORSE_TASK_zgemm( &options, MorseNoTrans, MorseNoTrans, tempmm, tempnn, tempkn, A->mb, - alpha, B(m, k), ldb, /* ldb * K */ - A(k, n), ldak, /* ldak * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, B(m, k), ldbm, + A(k, n), ldak, + zbeta, C(m, n), ldcm); } else { if (k == n) { MORSE_TASK_zsymm( &options, - side, uplo, + MorseRight, uplo, tempmm, tempnn, A->mb, - alpha, A(k, k), ldak, /* ldak * Y */ - B(m, k), ldb, /* ldb * Y */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, A(k, k), ldak, + B(m, k), ldbm, + zbeta, C(m, n), ldcm); } else { MORSE_TASK_zgemm( &options, MorseNoTrans, MorseTrans, tempmm, tempnn, tempkn, A->mb, - alpha, B(m, k), ldb, /* ldb * K */ - A(n, k), lda, /* lda * K */ - zbeta, C(m, n), ldc); /* ldc * Y */ + alpha, B(m, k), ldbm, + A(n, k), ldan, + zbeta, C(m, n), ldcm); } } } } + MORSE_TASK_dataflush( &options, B(m, k) ); + } + if (uplo == MorseLower) { + for (n = 0; n <= k; n++) { + MORSE_TASK_dataflush( &options, A(k, n) ); + } + } + else { + for (m = 0; m <= k; m++) { + MORSE_TASK_dataflush( &options, A(m, k) ); + } } } } diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c index 179743c050f7ac082781041f3a9a96ca5d034658..39347010e6402523b1fc6eb0718f3e713e97cd3d 100644 --- a/compute/zpotrimm.c +++ b/compute/zpotrimm.c @@ -74,9 +74,9 @@ * ******************************************************************************/ int MORSE_zpotrimm(MORSE_enum uplo, int N, - MORSE_Complex64_t *A, int LDA, - MORSE_Complex64_t *B, int LDB, - MORSE_Complex64_t *C, int LDC) + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *C, int LDC) { int NB; int status; @@ -310,17 +310,14 @@ int MORSE_zpotrimm_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, return morse_request_fail(sequence, request, -1); } /* Quick return */ -/* - if (max(N, 0) == 0) - return MORSE_SUCCESS; -*/ + /* + if (max(N, 0) == 0) + return MORSE_SUCCESS; + */ morse_pzpotrimm(uplo, A, B, C, sequence, request); - - /* - morse_pztrtri(uplo, MorseNonUnit, A, sequence, request); - - morse_pzlauum(uplo, A, sequence, request); + morse_pztrtri(uplo, MorseNonUnit, A, sequence, request); + morse_pzlauum(uplo, A, sequence, request); */ return MORSE_SUCCESS;