Commit 998c7a2a authored by Mathieu Faverge's avatar Mathieu Faverge

Reorder loops in SYMM to add the dataflush calls for distributed

parent f3f08db3
......@@ -19,11 +19,9 @@
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Jakub Kurzak
* @author Hatem Ltaief
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Ali M Charara
* @date 2010-11-15
* @precisions normal z -> s d c
*
......@@ -47,7 +45,7 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
int ldak, ldam, ldan;
int tempkm, tempmm, tempnn, tempkn;
MORSE_Complex64_t alpha = (MORSE_Complex64_t) 1.0;
MORSE_Complex64_t alpha = (MORSE_Complex64_t) 1.0;
MORSE_Complex64_t beta = (MORSE_Complex64_t) 0.0;
MORSE_Complex64_t zbeta;
MORSE_Complex64_t zone = (MORSE_Complex64_t) 1.0;
......
......@@ -43,7 +43,7 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo,
MORSE_option_t options;
int k, m, n;
int lda, ldak, ldb, ldc;
int ldak, ldam, ldan, ldbk, ldbm, ldcm;
int tempmm, tempnn, tempkn, tempkm;
MORSE_Complex64_t zbeta;
......@@ -54,30 +54,36 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo,
return;
RUNTIME_options_init(&options, morse, sequence, request);
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldc = BLKLDD(C, m);
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
/*
* MorseLeft / MorseLower
*/
if (side == MorseLeft) {
lda = BLKLDD(A, m);
if (uplo == MorseLower) {
for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb;
ldak = BLKLDD(A, k);
ldb = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
/*
* MorseLeft
*/
if (side == MorseLeft) {
for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldam = BLKLDD(A, m);
ldcm = BLKLDD(C, m);
/*
* MorseLeft / MorseLower
*/
if (uplo == MorseLower) {
if (k < m) {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseNoTrans,
tempmm, tempnn, tempkm, A->mb,
alpha, A(m, k), lda, /* lda * K */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(m, k), ldam,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
else {
if (k == m) {
......@@ -85,39 +91,33 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo,
&options,
side, uplo,
tempmm, tempnn, A->mb,
alpha, A(k, k), ldak, /* ldak * X */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(k, k), ldak,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
else {
MORSE_TASK_zgemm(
&options,
MorseTrans, MorseNoTrans,
tempmm, tempnn, tempkm, A->mb,
alpha, A(k, m), ldak, /* ldak * X */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(k, m), ldak,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
}
}
}
/*
* MorseLeft / MorseUpper
*/
else {
for (k = 0; k < C->mt; k++) {
tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb;
ldak = BLKLDD(A, k);
ldb = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
/*
* MorseLeft / MorseUpper
*/
else {
if (k < m) {
MORSE_TASK_zgemm(
&options,
MorseTrans, MorseNoTrans,
tempmm, tempnn, tempkm, A->mb,
alpha, A(k, m), ldak, /* ldak * X */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(k, m), ldak,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
else {
if (k == m) {
......@@ -125,104 +125,134 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo,
&options,
side, uplo,
tempmm, tempnn, A->mb,
alpha, A(k, k), ldak, /* ldak * K */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(k, k), ldak,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
else {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseNoTrans,
tempmm, tempnn, tempkm, A->mb,
alpha, A(m, k), lda, /* lda * K */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(m, k), ldam,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
}
}
}
MORSE_TASK_dataflush( &options, B(k, n) );
}
if (uplo == MorseLower) {
for (n = 0; n <= k; n++) {
MORSE_TASK_dataflush( &options, A(k, n) );
}
}
/*
* MorseRight / MorseLower
*/
else {
lda = BLKLDD(A, n);
ldb = BLKLDD(B, m);
if (uplo == MorseLower) {
for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
for (m = 0; m <= k; m++) {
MORSE_TASK_dataflush( &options, A(m, k) );
}
}
}
}
/*
* MorseRight
*/
else {
for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldbm = BLKLDD(B, m);
ldcm = BLKLDD(C, m);
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
ldan = BLKLDD(A, n);
/*
* MorseRight / MorseLower
*/
if (uplo == MorseLower) {
if (k < n) {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(n, k), lda, /* lda * K */
zbeta, C(m, n), ldc); /* ldc * Y */
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldbm,
A(n, k), ldan,
zbeta, C(m, n), ldcm);
}
else {
if (k == n) {
MORSE_TASK_zsymm(
&options,
side, uplo,
tempmm, tempnn, A->mb,
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
MORSE_TASK_zsymm(
&options,
MorseRight, uplo,
tempmm, tempnn, A->mb,
alpha, A(k, k), ldak,
B(m, k), ldbm,
zbeta, C(m, n), ldcm);
}
else {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseNoTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, B(m, k), ldbm,
A(k, n), ldak,
zbeta, C(m, n), ldcm);
}
}
}
}
/*
* MorseRight / MorseUpper
*/
else {
for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
/*
* MorseRight / MorseUpper
*/
else {
if (k < n) {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseNoTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, B(m, k), ldbm,
A(k, n), ldak,
zbeta, C(m, n), ldcm);
}
else {
if (k == n) {
MORSE_TASK_zsymm(
&options,
side, uplo,
MorseRight, uplo,
tempmm, tempnn, A->mb,
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(k, k), ldak,
B(m, k), ldbm,
zbeta, C(m, n), ldcm);
}
else {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(n, k), lda, /* lda * K */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, B(m, k), ldbm,
A(n, k), ldan,
zbeta, C(m, n), ldcm);
}
}
}
}
MORSE_TASK_dataflush( &options, B(m, k) );
}
if (uplo == MorseLower) {
for (n = 0; n <= k; n++) {
MORSE_TASK_dataflush( &options, A(k, n) );
}
}
else {
for (m = 0; m <= k; m++) {
MORSE_TASK_dataflush( &options, A(m, k) );
}
}
}
}
......
......@@ -74,9 +74,9 @@
*
******************************************************************************/
int MORSE_zpotrimm(MORSE_enum uplo, int N,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB,
MORSE_Complex64_t *C, int LDC)
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB,
MORSE_Complex64_t *C, int LDC)
{
int NB;
int status;
......@@ -310,17 +310,14 @@ int MORSE_zpotrimm_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B,
return morse_request_fail(sequence, request, -1);
}
/* Quick return */
/*
if (max(N, 0) == 0)
return MORSE_SUCCESS;
*/
/*
if (max(N, 0) == 0)
return MORSE_SUCCESS;
*/
morse_pzpotrimm(uplo, A, B, C, sequence, request);
/*
morse_pztrtri(uplo, MorseNonUnit, A, sequence, request);
morse_pzlauum(uplo, A, sequence, request);
morse_pztrtri(uplo, MorseNonUnit, A, sequence, request);
morse_pzlauum(uplo, A, sequence, request);
*/
return MORSE_SUCCESS;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment