Commit 0a540293 authored by Mathieu Faverge's avatar Mathieu Faverge

Fix a lot of QR/LQ functions (I thought it was already in the trunk ...)

parent b38b6d74
......@@ -96,9 +96,11 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q);
#endif
for (k = 0; k < min(A->mt, A->nt); k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
......@@ -116,13 +118,13 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
MorseUpper, A->mb, A->nb, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, A->mb, A->nb,
0., 1.,
DIAG(k), A->mb );
DIAG(k), ldak );
#endif
#endif
}
for (m = k+1; m < A->mt; m++) {
......@@ -132,7 +134,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
&options,
MorseRight, MorseConjTrans,
tempmm, tempkn, tempkn, ib, T->nb,
DIAG(k), A->mb,
DIAG(k), ldak,
T(k, k), T->mb,
A(m, k), ldam);
}
......@@ -162,6 +164,9 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -104,10 +104,12 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
for (k = 0; k < min(A->mt, A->nt); k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
......@@ -127,13 +129,13 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
MorseUpper, tempkm, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkm, tempNn,
0., 1.,
DIAG(k, N), ldak );
#endif
#endif
}
for (m = k+1; m < A->mt; m++) {
......@@ -199,6 +201,9 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -97,9 +97,11 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q);
#endif
for (k = 0; k < minMNT; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
......@@ -117,13 +119,13 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
MorseLower, A->mb, A->nb, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseUpper, A->mb, A->nb,
0., 1.,
DIAG(k), ldak );
#endif
#endif
}
for (n = k+1; n < A->nt; n++) {
......@@ -162,6 +164,9 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -87,7 +87,9 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
*/
ws_worker = max( ws_worker, ib * (ib + A->nb) );
ws_worker = max( ws_worker, ib * A->nb * 2 );
#endif
#if defined(CHAMELEON_USE_MAGMA)
/* Host space
*
* zgeqrt = ib * (A->nb+3*ib) + A->nb )
......@@ -102,10 +104,12 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
K = min(A->mt, A->nt);
for (k = 0; k < K; k++) {
......@@ -126,13 +130,13 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
MorseLower, tempMm, A->nb, A->nb,
A(M, k), ldaM,
DIAG(M, k), ldaM );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempMm, A->nb,
0., 1.,
DIAG(M, k), ldaM );
#endif
#endif
}
for (n = k+1; n < A->nt; n++) {
......@@ -198,6 +202,9 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -91,11 +91,13 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
for (k = min(A->mt, A->nt)-1; k >= 0; k--) {
for (k = minMT-1; k >= 0; k--) {
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkmin = min( tempAkn, tempAkm );
......@@ -121,23 +123,23 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
&options,
MorseUpper, tempkmin, tempkn, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#endif
DIAG(k), ldak );
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkn,
0., 1.,
DIAG(k), A->mb );
DIAG(k), ldak );
#endif
for (m = 0; m < Q->mt; m++) {
#endif
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m);
MORSE_TASK_zunmlq(
&options,
MorseRight, MorseNoTrans,
tempmm, tempkn, tempkmin, ib, T->nb,
DIAG(k), A->mb,
DIAG(k), ldak,
T(k, k), T->mb,
Q(m, k), ldqm);
}
......@@ -146,6 +148,9 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -89,10 +89,12 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
K = min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
......@@ -104,7 +106,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
for (RD = lastRD; RD >= BS; RD /= 2) {
for (N = k; N+RD < A->nt; N += 2*RD) {
tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
for (m = 0; m < Q->mt; m++) {
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m );
MORSE_TASK_zttmlq(
......@@ -125,7 +127,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
for (n = min(N+BS, A->nt)-1; n > N; n--) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
for (m = 0; m < Q->mt; m++) {
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m);
MORSE_TASK_ztsmlq(
......@@ -145,7 +147,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
......@@ -153,7 +154,8 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
0., 1.,
DIAG(k, N), ldak );
#endif
for (m = 0; m < Q->mt; m++) {
#endif
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m);
MORSE_TASK_zunmlq(
......@@ -171,6 +173,9 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -55,7 +55,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
int ldak, ldqk, ldam, ldqm;
int tempmm, tempnn, tempkmin, tempkm;
int tempAkm, tempAkn;
int ib;
int ib, minMT;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
......@@ -64,6 +64,12 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
ib = MORSE_IB;
if (A->m > A->n) {
minMT = A->nt;
} else {
minMT = A->mt;
}
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
......@@ -85,11 +91,13 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q);
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
for (k = min(A->mt, A->nt)-1; k >= 0; k--) {
for (k = minMT-1; k >= 0; k--) {
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkmin = min( tempAkn, tempAkm );
......@@ -100,7 +108,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldam = BLKLDD(A, m);
ldqm = BLKLDD(Q, m);
for (n = 0; n < Q->nt; n++) {
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_ztsmqr(
&options,
......@@ -118,7 +126,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
MorseLower, tempkm, tempkmin, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
......@@ -126,7 +133,8 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
0., 1.,
DIAG(k), ldak );
#endif
for (n = 0; n < Q->nt; n++) {
#endif
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_zunmqr(
&options,
......@@ -141,6 +149,9 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -58,7 +58,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
int k, m, n;
int K, M, RD, lastRD;
int ldaM, ldam, ldaMRD;
int ldbM, ldbm, ldbMRD;
int ldqM, ldqm, ldqMRD;
int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin;
int ib;
int nblk;
......@@ -91,10 +91,12 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
K = min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
......@@ -105,18 +107,18 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
for (RD = lastRD; RD >= BS; RD /= 2) {
for (M = k; M+RD < A->mt; M += 2*RD) {
tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
ldbM = BLKLDD(Q, M );
ldbMRD = BLKLDD(Q, M+RD);
ldqM = BLKLDD(Q, M );
ldqMRD = BLKLDD(Q, M+RD);
ldaMRD = BLKLDD(A, M+RD);
for (n = 0; n < Q->nt; n++) {
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_zttmqr(
&options,
MorseLeft, MorseNoTrans,
A->nb, tempnn, tempMRDm, tempnn,
tempkn, ib, T->nb,
Q (M, n), ldbM,
Q (M+RD, n), ldbMRD,
Q (M, n), ldqM,
Q (M+RD, n), ldqMRD,
A (M+RD, k), ldaMRD,
T2(M+RD, k), T->mb);
}
......@@ -126,21 +128,21 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
tempkmin = min(tempMm, tempkn);
ldaM = BLKLDD(A, M);
ldbM = BLKLDD(Q, M);
ldqM = BLKLDD(Q, M);
for (m = min(M+BS, A->mt)-1; m > M; m--) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldbm = BLKLDD(Q, m);
ldqm = BLKLDD(Q, m);
ldam = BLKLDD(A, m);
for (n = 0; n < Q->nt; n++) {
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_ztsmqr(
&options,
MorseLeft, MorseNoTrans,
A->nb, tempnn, tempmm, tempnn,
tempkn, ib, T->nb,
Q(M, n), ldbM,
Q(m, n), ldbm,
Q(M, n), ldqM,
Q(m, n), ldqm,
A(m, k), ldam,
T(m, k), T->mb);
}
......@@ -151,7 +153,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MorseLower, tempMm, tempkmin, A->nb,
A(M, k), ldaM,
DIAG(M, k), ldaM );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
......@@ -159,7 +160,8 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
0., 1.,
DIAG(M, k), ldaM );
#endif
for (n = 0; n < Q->nt; n++) {
#endif
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_zunmqr(
&options,
......@@ -168,7 +170,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
tempkmin, ib, T->nb,
DIAG(M, k), ldaM,
T(M, k), T->mb,
Q(M, n), ldbM);
Q(M, n), ldqM);
}
}
}
......@@ -176,6 +178,9 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -74,18 +74,18 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
}
/*
* zunmlq = A->nb * ib
* ztsmlq = A->nb * ib
* zunmlq = A->mb * ib
* ztsmlq = A->mb * ib
*/
ws_worker = A->nb * ib;
ws_worker = A->mb * ib;
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
/* Worker space
*
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
* zunmlq = A->mb * ib
* ztsmlq = 2 * A->mb * ib
*/
ws_worker = max( ws_worker, ib * A->nb * 2 );
ws_worker = max( ws_worker, ib * A->mb * 2 );
#endif
ws_worker *= sizeof(MORSE_Complex64_t);
......@@ -94,8 +94,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
#if defined(CHAMELEON_COPY_DIAG)
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
if (side == MorseLeft ) {
if (trans == MorseNoTrans) {
......@@ -103,7 +105,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
* MorseLeft / MorseNoTrans
*/
for (k = 0; k < minMT; k++) {
tempkm = k == B->mt -1 ? B->m -k*B->mb : B->mb;
tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
......@@ -112,14 +114,14 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
MorseUpper, tempkmin, tempkm, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#endif
DIAG(k), ldak );
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkm,
0., 1.,
DIAG(k), A->mb );
DIAG(k), ldak );
#endif
#endif
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
......@@ -127,7 +129,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
side, trans,
tempkm, tempnn, tempkmin, ib, T->nb,
DIAG(k), A->mb,
DIAG(k), ldak,
T(k, k), T->mb,
B(k, n), ldbk);
}
......@@ -153,7 +155,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
* MorseLeft / MorseConjTrans
*/
for (k = minMT-1; k >= 0; k--) {
tempkm = k == B->mt -1 ? B->m -k*B->mb : B->mb;
tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
......@@ -177,14 +179,14 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
MorseUpper, tempkmin, tempkm, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#endif
DIAG(k), ldak );
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkm,
0., 1.,
DIAG(k), A->mb );
DIAG(k), ldak );
#endif
#endif
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
......@@ -192,7 +194,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
side, trans,
tempkm, tempnn, tempkmin, ib, T->nb,
DIAG(k), A->mb,
DIAG(k), ldak,
T(k, k), T->mb,
B(k, n), ldbk);
}
......@@ -228,14 +230,14 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
MorseUpper, tempkmin, tempkn, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#endif
DIAG(k), ldak );
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkn,
0., 1.,
DIAG(k), A->mb );
DIAG(k), ldak );
#endif
#endif
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
......@@ -244,7 +246,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
side, trans,
tempmm, tempkn, tempkmin, ib, T->nb,
DIAG(k), A->mb,
DIAG(k), ldak,
T(k, k), T->mb,
B(m, k), ldbm);
}
......@@ -263,14 +265,14 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
MorseUpper, tempkmin, tempkn, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#endif
DIAG(k), ldak );
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkn,
0., 1.,
DIAG(k), A->mb );
DIAG(k), ldak );
#endif
#endif
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
......@@ -279,7 +281,7 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
side, trans,
tempmm, tempkn, tempkmin, ib, T->nb,
DIAG(k), A->mb,
DIAG(k), ldak,
T(k, k), T->mb,
B(m, k), ldbm);
}
......@@ -305,6 +307,9 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -92,9 +92,11 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
#if defined(CHAMELEON_COPY_DIAG)
nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
K = min(A->mt, A->nt);
if (side == MorseLeft ) {
......@@ -116,13 +118,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
0., 1.,
DIAG(k, N), ldak );
#endif
#endif
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
......@@ -228,13 +230,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
0., 1.,
DIAG(k, N), ldak );
#endif
#endif
for (n = 0; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
......@@ -305,13 +307,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
0., 1.,
DIAG(k, N), ldak );
#endif
#endif
for (m = 0; m < B->mt; m++) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
......@@ -342,15 +344,15 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MORSE_TASK_zlacpy(
&options,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldaN,
DIAG(k, N), ldaN );
#endif
A(k, N), ldak,
DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
0., 1.,
DIAG(k, N), ldaN );
DIAG(k, N), ldak );
#endif
#endif
for (m = 0; m < B->mt; m++) {
ldbm = BLKLDD(B, m);
......@@ -360,7 +362,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
side, trans,
tempmm, tempNn,
tempkmin, ib, T->nb,
DIAG(k, N), ldaN,
DIAG(k, N), ldak,
T(k, N), T->mb,
B(m, N), ldbm);
}
......@@ -406,6 +408,9 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,