Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 033b6011 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Fix her2k and simplify syr2k

parent b3235235
No related branches found
No related tags found
1 merge request!174Fix hemm/symm summa versions
...@@ -29,15 +29,15 @@ ...@@ -29,15 +29,15 @@
/** /**
* Parallel tile Hermitian rank-k update - dynamic scheduling * Parallel tile Hermitian rank-k update - dynamic scheduling
*/ */
void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans, void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
double beta, CHAM_desc_t *C, double beta, CHAM_desc_t *C,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
int m, n, k; int m, n, k, mmin, mmax;
int ldak, ldam, ldan, ldcm, ldcn; int ldak, ldam, ldan, ldcm, ldcn;
int ldbk, ldbm, ldbn; int ldbk, ldbm, ldbn;
int tempnn, tempmm, tempkn, tempkm; int tempnn, tempmm, tempkn, tempkm;
...@@ -57,6 +57,16 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans, ...@@ -57,6 +57,16 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans,
ldan = BLKLDD(A, n); ldan = BLKLDD(A, n);
ldbn = BLKLDD(B, n); ldbn = BLKLDD(B, n);
ldcn = BLKLDD(C, n); ldcn = BLKLDD(C, n);
if (uplo == ChamLower) {
mmin = n+1;
mmax = C->mt;
}
else {
mmin = 0;
mmax = n;
}
/* /*
* ChamNoTrans * ChamNoTrans
*/ */
...@@ -72,68 +82,34 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans, ...@@ -72,68 +82,34 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans,
B(n, k), ldbn, B(n, k), ldbn,
dbeta, C(n, n), ldcn); /* ldc * N */ dbeta, C(n, n), ldcn); /* ldc * N */
} }
/* for (m = mmin; m < mmax; m++) {
* ChamNoTrans / ChamLower tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
*/ ldam = BLKLDD(A, m);
if (uplo == ChamLower) { ldbm = BLKLDD(B, m);
for (m = n+1; m < C->mt; m++) { ldcm = BLKLDD(C, m);
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; for (k = 0; k < A->nt; k++) {
ldam = BLKLDD(A, m); tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
ldbm = BLKLDD(B, m); zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
ldcm = BLKLDD(C, m); INSERT_TASK_zgemm(
for (k = 0; k < A->nt; k++) { &options,
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; ChamNoTrans, ChamConjTrans,
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; tempmm, tempnn, tempkn, A->mb,
INSERT_TASK_zgemm( alpha, A(m, k), ldam,
&options, B(n, k), ldbn,
trans, ChamConjTrans, zbeta, C(m, n), ldcm);
tempmm, tempnn, tempkn, A->mb,
conj(alpha), A(m, k), ldam, /* ldam * K */
B(n, k), ldbn, /* ldan * K */
zbeta, C(m, n), ldcm); /* ldc * N */
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamConjTrans, ChamNoTrans, ChamConjTrans,
tempmm, tempnn, tempkn, A->mb, tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldbm, /* ldam * K */ conj(alpha), B(m, k), ldbm,
A(n, k), ldan, /* ldan * K */ A(n, k), ldan,
zone, C(m, n), ldcm); /* ldc * N */ zone, C(m, n), ldcm);
}
}
}
/*
* ChamNoTrans / ChamUpper
*/
else {
for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm(
&options,
trans, ChamConjTrans,
tempnn, tempmm, tempkn, A->mb,
alpha, A(n, k), ldan, /* ldan * K */
B(m, k), ldbm, /* ldam * M */
zbeta, C(n, m), ldcn); /* ldc * M */
INSERT_TASK_zgemm(
&options,
trans, ChamConjTrans,
tempnn, tempmm, tempkn, A->mb,
conj(alpha), B(n, k), ldan, /* ldan * K */
A(m, k), ldam, /* ldam * M */
zone, C(n, m), ldcn); /* ldc * M */
}
} }
} }
} }
/* /*
* Cham[Conj]Trans * ChamConjTrans
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
...@@ -149,63 +125,29 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans, ...@@ -149,63 +125,29 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans,
B(k, n), ldbk, B(k, n), ldbk,
dbeta, C(n, n), ldcn); /* ldc * N */ dbeta, C(n, n), ldcn); /* ldc * N */
} }
/* for (m = mmin; m < mmax; m++) {
* Cham[Conj]Trans / ChamLower tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
*/ ldcm = BLKLDD(C, m);
if (uplo == ChamLower) { for (k = 0; k < A->mt; k++) {
for (m = n+1; m < C->mt; m++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; ldak = BLKLDD(A, k);
ldcm = BLKLDD(C, m); ldbk = BLKLDD(B, k);
for (k = 0; k < A->mt; k++) { zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; INSERT_TASK_zgemm(
ldak = BLKLDD(A, k); &options,
ldbk = BLKLDD(B, k); ChamConjTrans, ChamNoTrans,
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone; tempmm, tempnn, tempkm, A->mb,
INSERT_TASK_zgemm( alpha, A(k, m), ldak,
&options, B(k, n), ldbk,
trans, ChamNoTrans, zbeta, C(m, n), ldcm);
tempmm, tempnn, tempkm, A->mb,
alpha, A(k, m), ldak, /* lda * M */
B(k, n), ldbk, /* lda * N */
zbeta, C(m, n), ldcm); /* ldc * N */
INSERT_TASK_zgemm(
&options,
trans, ChamNoTrans,
tempmm, tempnn, tempkm, A->mb,
alpha, B(k, m), ldbk, /* lda * M */
A(k, n), ldak, /* lda * N */
zone, C(m, n), ldcm); /* ldc * N */
}
}
}
/*
* Cham[Conj]Trans / ChamUpper
*/
else {
for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
INSERT_TASK_zgemm(
&options,
trans, ChamNoTrans,
tempnn, tempmm, tempkm, A->mb,
alpha, A(k, n), ldak, /* lda * K */
B(k, m), ldbk, /* lda * M */
zbeta, C(n, m), ldcn); /* ldc * M */
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamNoTrans, ChamConjTrans, ChamNoTrans,
tempnn, tempmm, tempkm, A->mb, tempmm, tempnn, tempkm, A->mb,
conj(alpha), B(k, n), ldbk, /* lda * K */ conj(alpha), B(k, m), ldbk,
A(k, m), ldak, /* lda * M */ A(k, n), ldak,
zone, C(n, m), ldcn); /* ldc * M */ zone, C(m, n), ldcm );
}
} }
} }
} }
......
...@@ -29,15 +29,15 @@ ...@@ -29,15 +29,15 @@
/** /**
* Parallel tile Hermitian rank-k update - dynamic scheduling * Parallel tile Hermitian rank-k update - dynamic scheduling
*/ */
void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans, void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
CHAMELEON_Complex64_t beta, CHAM_desc_t *C, CHAMELEON_Complex64_t beta, CHAM_desc_t *C,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
int m, n, k; int m, n, k, mmin, mmax;
int ldak, ldam, ldan, ldcm, ldcn; int ldak, ldam, ldan, ldcm, ldcn;
int ldbk, ldbm, ldbn; int ldbk, ldbm, ldbn;
int tempnn, tempmm, tempkn, tempkm; int tempnn, tempmm, tempkn, tempkm;
...@@ -56,6 +56,16 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans, ...@@ -56,6 +56,16 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans,
ldan = BLKLDD(A, n); ldan = BLKLDD(A, n);
ldbn = BLKLDD(B, n); ldbn = BLKLDD(B, n);
ldcn = BLKLDD(C, n); ldcn = BLKLDD(C, n);
if (uplo == ChamLower) {
mmin = n+1;
mmax = C->mt;
}
else {
mmin = 0;
mmax = n;
}
/* /*
* ChamNoTrans * ChamNoTrans
*/ */
...@@ -71,68 +81,34 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans, ...@@ -71,68 +81,34 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans,
B(n, k), ldbn, B(n, k), ldbn,
zbeta, C(n, n), ldcn); /* ldc * N */ zbeta, C(n, n), ldcn); /* ldc * N */
} }
/* for (m = mmin; m < mmax; m++) {
* ChamNoTrans / ChamLower tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
*/ ldam = BLKLDD(A, m);
if (uplo == ChamLower) { ldbm = BLKLDD(B, m);
for (m = n+1; m < C->mt; m++) { ldcm = BLKLDD(C, m);
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; for (k = 0; k < A->nt; k++) {
ldam = BLKLDD(A, m); tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
ldbm = BLKLDD(B, m); zbeta = k == 0 ? beta : zone;
ldcm = BLKLDD(C, m); INSERT_TASK_zgemm(
for (k = 0; k < A->nt; k++) { &options,
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; ChamNoTrans, ChamTrans,
zbeta = k == 0 ? beta : zone; tempmm, tempnn, tempkn, A->mb,
INSERT_TASK_zgemm( alpha, A(m, k), ldam,
&options, B(n, k), ldbn,
trans, ChamTrans, zbeta, C(m, n), ldcm);
tempmm, tempnn, tempkn, A->mb,
alpha, A(m, k), ldam, /* ldam * K */
B(n, k), ldbn, /* ldan * K */
zbeta, C(m, n), ldcm); /* ldc * N */
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamTrans, ChamNoTrans, ChamTrans,
tempmm, tempnn, tempkn, A->mb, tempmm, tempnn, tempkn, A->mb,
alpha, B(m, k), ldbm, /* ldam * K */ alpha, B(m, k), ldbm,
A(n, k), ldan, /* ldan * K */ A(n, k), ldan,
zone, C(m, n), ldcm); /* ldc * N */ zone, C(m, n), ldcm);
}
}
}
/*
* ChamNoTrans / ChamUpper
*/
else {
for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm(
&options,
trans, ChamTrans,
tempnn, tempmm, tempkn, A->mb,
alpha, A(n, k), ldan, /* ldan * K */
B(m, k), ldbm, /* ldam * M */
zbeta, C(n, m), ldcn); /* ldc * M */
INSERT_TASK_zgemm(
&options,
trans, ChamTrans,
tempnn, tempmm, tempkn, A->mb,
alpha, B(n, k), ldan, /* ldan * K */
A(m, k), ldam, /* ldam * M */
zone, C(n, m), ldcn); /* ldc * M */
}
} }
} }
} }
/* /*
* Cham[Conj]Trans * ChamTrans
*/ */
else { else {
for (k = 0; k < A->mt; k++) { for (k = 0; k < A->mt; k++) {
...@@ -148,63 +124,29 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans, ...@@ -148,63 +124,29 @@ void chameleon_pzsyr2k(cham_uplo_t uplo, cham_trans_t trans,
B(k, n), ldbk, B(k, n), ldbk,
zbeta, C(n, n), ldcn); /* ldc * N */ zbeta, C(n, n), ldcn); /* ldc * N */
} }
/* for (m = mmin; m < mmax; m++) {
* Cham[Conj]Trans / ChamLower tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
*/ ldcm = BLKLDD(C, m);
if (uplo == ChamLower) { for (k = 0; k < A->mt; k++) {
for (m = n+1; m < C->mt; m++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; ldak = BLKLDD(A, k);
ldcm = BLKLDD(C, m); ldbk = BLKLDD(B, k);
for (k = 0; k < A->mt; k++) { zbeta = k == 0 ? beta : zone;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; INSERT_TASK_zgemm(
ldak = BLKLDD(A, k); &options,
ldbk = BLKLDD(B, k); ChamTrans, ChamNoTrans,
zbeta = k == 0 ? beta : zone; tempmm, tempnn, tempkm, A->mb,
INSERT_TASK_zgemm( alpha, A(k, m), ldak,
&options, B(k, n), ldbk,
trans, ChamNoTrans, zbeta, C(m, n), ldcm);
tempmm, tempnn, tempkm, A->mb,
alpha, A(k, m), ldak, /* lda * M */
B(k, n), ldbk, /* lda * N */
zbeta, C(m, n), ldcm); /* ldc * N */
INSERT_TASK_zgemm(
&options,
trans, ChamNoTrans,
tempmm, tempnn, tempkm, A->mb,
alpha, B(k, m), ldbk, /* lda * M */
A(k, n), ldak, /* lda * N */
zone, C(m, n), ldcm); /* ldc * N */
}
}
}
/*
* Cham[Conj]Trans / ChamUpper
*/
else {
for (m = n+1; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
INSERT_TASK_zgemm(
&options,
trans, ChamNoTrans,
tempnn, tempmm, tempkm, A->mb,
alpha, A(k, n), ldak, /* lda * K */
B(k, m), ldbk, /* lda * M */
zbeta, C(n, m), ldcn); /* ldc * M */
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, &options,
trans, ChamNoTrans, ChamTrans, ChamNoTrans,
tempnn, tempmm, tempkm, A->mb, tempmm, tempnn, tempkm, A->mb,
alpha, B(k, n), ldbk, /* lda * K */ alpha, B(k, m), ldbk,
A(k, m), ldak, /* lda * M */ A(k, n), ldak,
zone, C(n, m), ldcn); /* ldc * M */ zone, C(m, n), ldcm );
}
} }
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment