Commit f3f08db3 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Add dataflush to the symm kernel in Hatem's potrimm

parent e33e5138
......@@ -43,7 +43,7 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
MORSE_option_t options;
int k, m, n;
int lda, ldb, ldc;
int ldbm, ldcm;
int ldak, ldam, ldan;
int tempkm, tempmm, tempnn, tempkn;
......@@ -206,27 +206,30 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
A(k, k), ldak);
}
/*
* ZSYMM
* ZSYMM Right / Lower
*/
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldc = BLKLDD(C, m);
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
lda = BLKLDD(A, n);
ldb = BLKLDD(B, m);
for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldbm = BLKLDD(B, m);
ldcm = BLKLDD(C, m);
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
ldan = BLKLDD(A, n);
if (k < n) {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(n, k), lda, /* lda * K */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, B(m, k), ldbm, /* ldbm * K */
A(n, k), ldan, /* ldan * K */
zbeta, C(m, n), ldcm); /* ldcm * Y */
}
else {
if (k == n) {
......@@ -234,21 +237,25 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
&options,
MorseRight, uplo,
tempmm, tempnn, A->mb,
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldbm, /* ldbm * Y */
zbeta, C(m, n), ldcm); /* ldcm * Y */
}
else {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseNoTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, B(m, k), ldbm, /* ldbm * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldcm); /* ldcm * Y */
}
}
}
MORSE_TASK_dataflush( &options, B(m, k) );
}
for (n = 0; n <= k; n++) {
MORSE_TASK_dataflush( &options, A(k, n) );
}
}
}
......@@ -394,27 +401,30 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
A(k, k), ldak);
}
/*
* ZSYMM
* ZSYMM Right / Upper
*/
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldc = BLKLDD(C, m);
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
lda = BLKLDD(A, n);
ldb = BLKLDD(B, m);
for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
for (k = 0; k < C->nt; k++) {
tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldbm = BLKLDD(B, m);
ldcm = BLKLDD(C, m);
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
ldan = BLKLDD(A, n);
if (k < n) {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseNoTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, B(m, k), ldbm, /* ldbm * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldcm); /* ldcm * Y */
}
else {
if (k == n) {
......@@ -422,21 +432,25 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de
&options,
MorseRight, uplo,
tempmm, tempnn, A->mb,
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldbm, /* ldbm * Y */
zbeta, C(m, n), ldcm); /* ldcm * Y */
}
else {
MORSE_TASK_zgemm(
&options,
MorseNoTrans, MorseTrans,
tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldb, /* ldb * K */
A(n, k), lda, /* lda * K */
zbeta, C(m, n), ldc); /* ldc * Y */
alpha, B(m, k), ldbm, /* ldbm * K */
A(n, k), ldan, /* ldan * K */
zbeta, C(m, n), ldcm); /* ldcm * Y */
}
}
}
MORSE_TASK_dataflush( &options, B(m, k) );
}
for (m = 0; m <= k; m++) {
MORSE_TASK_dataflush( &options, A(m, k) );
}
}
}
......
......@@ -286,9 +286,9 @@ int MORSE_zsymm_Tile(MORSE_enum side, MORSE_enum uplo,
MORSE_zsymm_Tile_Async(side, uplo, alpha, A, B, beta, C, sequence, &request);
morse_sequence_wait(morse, sequence);
RUNTIME_desc_getoncpu(A);
RUNTIME_desc_getoncpu(B);
RUNTIME_desc_getoncpu(C);
RUNTIME_desc_getoncpu(B);
RUNTIME_desc_getoncpu(C);
status = sequence->status;
morse_sequence_destroy(morse, sequence);
return status;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment