Commit 0a540293 authored by Mathieu Faverge's avatar Mathieu Faverge

Fix a lot of QR/LQ functions (I thought it was already in the trunk ...)

parent b38b6d74
...@@ -96,9 +96,11 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, ...@@ -96,9 +96,11 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */ /* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q);
#endif
for (k = 0; k < min(A->mt, A->nt); k++) { for (k = 0; k < min(A->mt, A->nt); k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
...@@ -116,13 +118,13 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, ...@@ -116,13 +118,13 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
MorseUpper, A->mb, A->nb, A->nb, MorseUpper, A->mb, A->nb, A->nb,
A(k, k), ldak, A(k, k), ldak,
DIAG(k), ldak ); DIAG(k), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseLower, A->mb, A->nb, MorseLower, A->mb, A->nb,
0., 1., 0., 1.,
DIAG(k), A->mb ); DIAG(k), ldak );
#endif
#endif #endif
} }
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
...@@ -132,7 +134,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, ...@@ -132,7 +134,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
&options, &options,
MorseRight, MorseConjTrans, MorseRight, MorseConjTrans,
tempmm, tempkn, tempkn, ib, T->nb, tempmm, tempkn, tempkn, ib, T->nb,
DIAG(k), A->mb, DIAG(k), ldak,
T(k, k), T->mb, T(k, k), T->mb,
A(m, k), ldam); A(m, k), ldam);
} }
...@@ -162,6 +164,9 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, ...@@ -162,6 +164,9 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG); morse_desc_mat_free(DIAG);
free(DIAG); free(DIAG);
#endif
} }
...@@ -104,10 +104,12 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ...@@ -104,10 +104,12 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */ /* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->nt + BS -1 ) / BS; nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
for (k = 0; k < min(A->mt, A->nt); k++) { for (k = 0; k < min(A->mt, A->nt); k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
...@@ -127,13 +129,13 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ...@@ -127,13 +129,13 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
MorseUpper, tempkm, tempNn, A->nb, MorseUpper, tempkm, tempNn, A->nb,
A(k, N), ldak, A(k, N), ldak,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseLower, tempkm, tempNn, MorseLower, tempkm, tempNn,
0., 1., 0., 1.,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#endif
#endif #endif
} }
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
...@@ -199,6 +201,9 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ...@@ -199,6 +201,9 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG); morse_desc_mat_free(DIAG);
free(DIAG); free(DIAG);
#endif
} }
...@@ -97,9 +97,11 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, ...@@ -97,9 +97,11 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */ /* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q);
#endif
for (k = 0; k < minMNT; k++) { for (k = 0; k < minMNT; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
...@@ -117,13 +119,13 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, ...@@ -117,13 +119,13 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
MorseLower, A->mb, A->nb, A->nb, MorseLower, A->mb, A->nb, A->nb,
A(k, k), ldak, A(k, k), ldak,
DIAG(k), ldak ); DIAG(k), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseUpper, A->mb, A->nb, MorseUpper, A->mb, A->nb,
0., 1., 0., 1.,
DIAG(k), ldak ); DIAG(k), ldak );
#endif
#endif #endif
} }
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
...@@ -162,6 +164,9 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, ...@@ -162,6 +164,9 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG); morse_desc_mat_free(DIAG);
free(DIAG); free(DIAG);
#endif
} }
...@@ -87,7 +87,9 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ...@@ -87,7 +87,9 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
*/ */
ws_worker = max( ws_worker, ib * (ib + A->nb) ); ws_worker = max( ws_worker, ib * (ib + A->nb) );
ws_worker = max( ws_worker, ib * A->nb * 2 ); ws_worker = max( ws_worker, ib * A->nb * 2 );
#endif
#if defined(CHAMELEON_USE_MAGMA)
/* Host space /* Host space
* *
* zgeqrt = ib * (A->nb+3*ib) + A->nb ) * zgeqrt = ib * (A->nb+3*ib) + A->nb )
...@@ -102,10 +104,12 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ...@@ -102,10 +104,12 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */ /* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->mt + BS -1 ) / BS; nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
K = min(A->mt, A->nt); K = min(A->mt, A->nt);
for (k = 0; k < K; k++) { for (k = 0; k < K; k++) {
...@@ -126,13 +130,13 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ...@@ -126,13 +130,13 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
MorseLower, tempMm, A->nb, A->nb, MorseLower, tempMm, A->nb, A->nb,
A(M, k), ldaM, A(M, k), ldaM,
DIAG(M, k), ldaM ); DIAG(M, k), ldaM );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseUpper, tempMm, A->nb, MorseUpper, tempMm, A->nb,
0., 1., 0., 1.,
DIAG(M, k), ldaM ); DIAG(M, k), ldaM );
#endif
#endif #endif
} }
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
...@@ -198,6 +202,9 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ...@@ -198,6 +202,9 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG); morse_desc_mat_free(DIAG);
free(DIAG); free(DIAG);
#endif
} }
...@@ -91,11 +91,13 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -91,11 +91,13 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */ /* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
for (k = min(A->mt, A->nt)-1; k >= 0; k--) { for (k = minMT-1; k >= 0; k--) {
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkmin = min( tempAkn, tempAkm ); tempkmin = min( tempAkn, tempAkm );
...@@ -121,23 +123,23 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -121,23 +123,23 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
&options, &options,
MorseUpper, tempkmin, tempkn, A->nb, MorseUpper, tempkmin, tempkn, A->nb,
A(k, k), ldak, A(k, k), ldak,
DIAG(k), A->mb ); DIAG(k), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseLower, tempkmin, tempkn, MorseLower, tempkmin, tempkn,
0., 1., 0., 1.,
DIAG(k), A->mb ); DIAG(k), ldak );
#endif #endif
for (m = 0; m < Q->mt; m++) { #endif
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m); ldqm = BLKLDD(Q, m);
MORSE_TASK_zunmlq( MORSE_TASK_zunmlq(
&options, &options,
MorseRight, MorseNoTrans, MorseRight, MorseNoTrans,
tempmm, tempkn, tempkmin, ib, T->nb, tempmm, tempkn, tempkmin, ib, T->nb,
DIAG(k), A->mb, DIAG(k), ldak,
T(k, k), T->mb, T(k, k), T->mb,
Q(m, k), ldqm); Q(m, k), ldqm);
} }
...@@ -146,6 +148,9 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -146,6 +148,9 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG); morse_desc_mat_free(DIAG);
free(DIAG); free(DIAG);
#endif
} }
...@@ -89,10 +89,12 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -89,10 +89,12 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */ /* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->nt + BS -1 ) / BS; nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
K = min(A->mt, A->nt); K = min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) { for (k = K-1; k >= 0; k--) {
...@@ -104,7 +106,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -104,7 +106,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
for (RD = lastRD; RD >= BS; RD /= 2) { for (RD = lastRD; RD >= BS; RD /= 2) {
for (N = k; N+RD < A->nt; N += 2*RD) { for (N = k; N+RD < A->nt; N += 2*RD) {
tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb; tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
for (m = 0; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m ); ldqm = BLKLDD(Q, m );
MORSE_TASK_zttmlq( MORSE_TASK_zttmlq(
...@@ -125,7 +127,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -125,7 +127,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
for (n = min(N+BS, A->nt)-1; n > N; n--) { for (n = min(N+BS, A->nt)-1; n > N; n--) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
for (m = 0; m < Q->mt; m++) { for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m); ldqm = BLKLDD(Q, m);
MORSE_TASK_ztsmlq( MORSE_TASK_ztsmlq(
...@@ -145,7 +147,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -145,7 +147,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MorseUpper, tempkmin, tempNn, A->nb, MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak, A(k, N), ldak,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
...@@ -153,7 +154,8 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -153,7 +154,8 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
0., 1., 0., 1.,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#endif #endif
for (m = 0; m < Q->mt; m++) { #endif
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m); ldqm = BLKLDD(Q, m);
MORSE_TASK_zunmlq( MORSE_TASK_zunmlq(
...@@ -171,6 +173,9 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -171,6 +173,9 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG); morse_desc_mat_free(DIAG);
free(DIAG); free(DIAG);
#endif
} }
...@@ -55,7 +55,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -55,7 +55,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
int ldak, ldqk, ldam, ldqm; int ldak, ldqk, ldam, ldqm;
int tempmm, tempnn, tempkmin, tempkm; int tempmm, tempnn, tempkmin, tempkm;
int tempAkm, tempAkn; int tempAkm, tempAkn;
int ib; int ib, minMT;
morse = morse_context_self(); morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS) if (sequence->status != MORSE_SUCCESS)
...@@ -64,6 +64,12 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -64,6 +64,12 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
ib = MORSE_IB; ib = MORSE_IB;
if (A->m > A->n) {
minMT = A->nt;
} else {
minMT = A->mt;
}
/* /*
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = A->nb * ib * ztsmqr = A->nb * ib
...@@ -85,11 +91,13 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -85,11 +91,13 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */ /* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
for (k = min(A->mt, A->nt)-1; k >= 0; k--) { for (k = minMT-1; k >= 0; k--) {
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
tempkmin = min( tempAkn, tempAkm ); tempkmin = min( tempAkn, tempAkm );
...@@ -100,7 +108,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -100,7 +108,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldam = BLKLDD(A, m); ldam = BLKLDD(A, m);
ldqm = BLKLDD(Q, m); ldqm = BLKLDD(Q, m);
for (n = 0; n < Q->nt; n++) { for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_ztsmqr( MORSE_TASK_ztsmqr(
&options, &options,
...@@ -118,7 +126,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -118,7 +126,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
MorseLower, tempkm, tempkmin, A->nb, MorseLower, tempkm, tempkmin, A->nb,
A(k, k), ldak, A(k, k), ldak,
DIAG(k), ldak ); DIAG(k), ldak );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
...@@ -126,7 +133,8 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -126,7 +133,8 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
0., 1., 0., 1.,
DIAG(k), ldak ); DIAG(k), ldak );
#endif #endif
for (n = 0; n < Q->nt; n++) { #endif
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_zunmqr( MORSE_TASK_zunmqr(
&options, &options,
...@@ -141,6 +149,9 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, ...@@ -141,6 +149,9 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all(); MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG); morse_desc_mat_free(DIAG);
free(DIAG); free(DIAG);
#endif
} }
...@@ -58,7 +58,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -58,7 +58,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
int k, m, n; int k, m, n;
int K, M, RD, lastRD; int K, M, RD, lastRD;
int ldaM, ldam, ldaMRD; int ldaM, ldam, ldaMRD;
int ldbM, ldbm, ldbMRD; int ldqM, ldqm, ldqMRD;
int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin; int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin;
int ib; int ib;
int nblk; int nblk;
...@@ -91,10 +91,12 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -91,10 +91,12 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */ /* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->mt + BS -1 ) / BS; nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
#endif
K = min(A->mt, A->nt); K = min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) { for (k = K-1; k >= 0; k--) {
...@@ -105,18 +107,18 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -105,18 +107,18 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
for (RD = lastRD; RD >= BS; RD /= 2) { for (RD = lastRD; RD >= BS; RD /= 2) {
for (M = k; M+RD < A->mt; M += 2*RD) { for (M = k; M+RD < A->mt; M += 2*RD) {
tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb; tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
ldbM = BLKLDD(Q, M ); ldqM = BLKLDD(Q, M );
ldbMRD = BLKLDD(Q, M+RD); ldqMRD = BLKLDD(Q, M+RD);
ldaMRD = BLKLDD(A, M+RD); ldaMRD = BLKLDD(A, M+RD);
for (n = 0; n < Q->nt; n++) { for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_zttmqr( MORSE_TASK_zttmqr(
&options, &options,
MorseLeft, MorseNoTrans, MorseLeft, MorseNoTrans,
A->nb, tempnn, tempMRDm, tempnn, A->nb, tempnn, tempMRDm, tempnn,
tempkn, ib, T->nb, tempkn, ib, T->nb,
Q (M, n), ldbM, Q (M, n), ldqM,
Q (M+RD, n), ldbMRD, Q (M+RD, n), ldqMRD,
A (M+RD, k), ldaMRD, A (M+RD, k), ldaMRD,
T2(M+RD, k), T->mb); T2(M+RD, k), T->mb);
} }
...@@ -126,21 +128,21 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -126,21 +128,21 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb; tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
tempkmin = min(tempMm, tempkn); tempkmin = min(tempMm, tempkn);
ldaM = BLKLDD(A, M); ldaM = BLKLDD(A, M);
ldbM = BLKLDD(Q, M); ldqM = BLKLDD(Q, M);
for (m = min(M+BS, A->mt)-1; m > M; m--) { for (m = min(M+BS, A->mt)-1; m > M; m--) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldbm = BLKLDD(Q, m); ldqm = BLKLDD(Q, m);
ldam = BLKLDD(A, m); ldam = BLKLDD(A, m);
for (n = 0; n < Q->nt; n++) { for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_ztsmqr( MORSE_TASK_ztsmqr(
&options, &options,
MorseLeft, MorseNoTrans, MorseLeft, MorseNoTrans,
A->nb, tempnn, tempmm, tempnn, A->nb, tempnn, tempmm, tempnn,
tempkn, ib, T->nb, tempkn, ib, T->nb,
Q(M, n), ldbM, Q(M, n), ldqM,
Q(m, n), ldbm, Q(m, n), ldqm,
A(m, k), ldam, A(m, k), ldam,
T(m, k), T->mb); T(m, k), T->mb);
} }
...@@ -151,7 +153,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -151,7 +153,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MorseLower, tempMm, tempkmin, A->nb, MorseLower, tempMm, tempkmin, A->nb,
A(M, k), ldaM, A(M, k), ldaM,
DIAG(M, k), ldaM ); DIAG(M, k), ldaM );
#endif
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA) #if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
...@@ -159,7 +160,8 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -159,7 +160,8 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
0., 1., 0., 1.,
DIAG(M, k), ldaM ); DIAG(M, k), ldaM );
#endif #endif
for (n = 0; n < Q->nt; n++) { #endif
for (n = k; n < Q->nt; n++) {
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
MORSE_TASK_zunmqr( MORSE_TASK_zunmqr(
&options, &options,
...@@ -168,7 +170,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -168,7 +170,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
tempkmin, ib, T->nb, tempkmin, ib, T->nb,
DIAG(M, k), ldaM, DIAG(M, k), ldaM,
T(M, k), T->mb, T(M, k), T->mb,
Q(M, n), ldbM); Q(M, n), ldqM);
} }
} }
} }
...@@ -176,6 +178,9 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, ...@@ -176,6 +178,9 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_finalize(&options, morse); RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();