From 43144c81573c2800ba3584e682f55bcda47871e6 Mon Sep 17 00:00:00 2001 From: Raphael Boucherie <raphael.boucherie@inria.fr> Date: Thu, 11 May 2017 12:28:21 +0200 Subject: [PATCH] testing for ungqr_param works --- compute/pzungqr_param.c | 128 +++++++++++++++++++++------------- compute/pzunmqr_param.c | 13 ++++ compute/zungqr_param.c | 30 +++++--- control/compute_z.h | 2 +- include/morse_z.h | 6 +- testing/testing_zgels_param.c | 13 ++-- 6 files changed, 122 insertions(+), 70 deletions(-) diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c index d16098dc9..b1c41fead 100644 --- a/compute/pzungqr_param.c +++ b/compute/pzungqr_param.c @@ -32,30 +32,30 @@ #define A(m,n) A, m, n #define Q(m,n) Q, m, n -#define T(m,n) T, m, n +#define TS(m,n) TS, m, n +#define TT(m,n) TT, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(m,n) D, m, n #else -#define DIAG(k) A, k, k +#define D(m,n) A, m, n #endif /** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling */ void morse_pzungqr_param(const libhqr_tree_t *qrtree, - MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, - MORSE_sequence_t *sequence, MORSE_request_t *request) + MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; + MORSE_desc_t *D = NULL; int k, m, n, i, p; - int ldak, ldqk, ldam, ldqm; - int tempmm, tempnn, tempkmin, tempkm; - int tempAkm, tempAkn; + int ldak, ldqk, ldam, ldqm, ldqp; + int tempmm, tempnn, tempkmin, tempkm, tempkn; int ib, minMT; int *tiles; @@ -99,68 +99,96 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); #if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q); + { + /* necessary to avoid dependencies between tasks regarding the diag tile */ + D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); + morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); + } #endif for (k = minMT-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); - tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempkmin = chameleon_min( tempAkn, tempAkm ); - tempkm = k == Q->mt-1 ? Q->m-k*Q->mb : Q->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); ldqk = BLKLDD(Q, k); /* Setting the order of tiles */ libhqr_treewalk(qrtree, k, tiles); - for (i = Q->mt - 1; i > k; i--) { + for (i = Q->mt - 2; i >= k; i--) { m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; ldam = BLKLDD(A, m); ldqm = BLKLDD(Q, m); - for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - MORSE_TASK_ztsmqr( - &options, - MorseLeft, MorseNoTrans, - Q->mb, tempnn, tempmm, tempnn, tempAkn, ib, T->nb, - Q(k, n), ldqk, - Q(m, n), ldqm, - A(m, k), ldam, - T(m, k), T->mb); + ldqp = BLKLDD(Q, p); + + /* TT or TS */ + + if(qrtree->gettype(qrtree, k , m) == 0){ + for (n = k; n < Q->nt; n++) { + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + MORSE_TASK_ztsmqr( + &options, + MorseLeft, MorseNoTrans, + Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, + Q(p, n), ldqp, + Q(m, n), ldqm, + A(m, k), ldam, + TS(m, k), TS->mb); + } + } + else { + for (n = k; n < Q->nt; n++) { + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + MORSE_TASK_zttmqr( + &options, + MorseLeft, MorseNoTrans, + Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, + Q(p, n), ldqp, + Q(m, n), ldqm, + A(m, k), ldam, + TT(m, k), TT->mb); + } } } + + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + m = qrtree->getm(qrtree, k, i); + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempkmin = chameleon_min(tempmm, tempkn); + ldam = BLKLDD(A, m); + ldqm = BLKLDD(Q, m); + #if defined(CHAMELEON_COPY_DIAG) - MORSE_TASK_zlacpy( - &options, - MorseLower, tempkm, tempkmin, A->nb, - A(k, k), ldak, - DIAG(k), ldak ); + MORSE_TASK_zlacpy( + &options, + MorseLower, tempmm, tempkmin, A->nb, + A(m, k), ldam, + D(m, k), ldam ); #if defined(CHAMELEON_USE_CUDA) - MORSE_TASK_zlaset( - &options, - MorseUpper, tempkm, tempkmin, - 0., 1., - DIAG(k), ldak ); + MORSE_TASK_zlaset( + &options, + MorseUpper, tempmm, tempkmin, + 0., 1., + D(m, k), ldam ); #endif #endif - for (n = k; n < Q->nt; n++) { - tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; - MORSE_TASK_zunmqr( - &options, - MorseLeft, MorseNoTrans, - tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, - T(k, k), T->mb, - Q(k, n), ldqk); + for (n = k; n < Q->nt; n++) { + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + MORSE_TASK_zunmqr( + &options, + MorseLeft, MorseNoTrans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D(m, k), ldam, + TS(m, k), TS->mb, + Q(m, n), ldqm); + } } - RUNTIME_iteration_pop(morse); } RUNTIME_options_ws_free(&options); @@ -169,8 +197,8 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, #if defined(CHAMELEON_COPY_DIAG) MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); + morse_desc_mat_free(D); + free(D); #endif - (void)DIAG; + (void)D; } diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index f18a979a3..17e9b2db4 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -181,6 +181,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); + + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + for (i = B->mt-1; i > k; i--) { m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); @@ -242,6 +246,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); + + /* Setting the order of tiles */ + libhqr_treewalk(qrtree, k, tiles); + for (n = B->nt-1; n > k; n--) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); @@ -297,6 +305,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; ldak = BLKLDD(A, k); + #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, @@ -322,6 +331,10 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, T(k, k), T->mb, B(m, k), ldbm); } + + /* Setting the order of tiles */ + libhqr_treewalk(qrtree, k, tiles); + for (n = k+1; n < B->nt; n++) { tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 6dddf1cd9..24bec0c62 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -40,6 +40,9 @@ * ******************************************************************************* * + * @param[in] qrtree + * The tree used for the factorization + * * @param[in] M * The number of rows of the matrix Q. M >= 0. * @@ -84,7 +87,8 @@ int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, - MORSE_desc_t *descT, + MORSE_desc_t *descTS, + MORSE_desc_t *descTT, MORSE_Complex64_t *Q, int LDQ) { int NB; @@ -149,7 +153,7 @@ int MORSE_zungqr_param(const libhqr_tree_t *qrtree, /* }*/ /* Call the tile interface */ - MORSE_zungqr_param_Tile_Async(qrtree, &descA, descT, &descQ, sequence, &request); + MORSE_zungqr_param_Tile_Async(qrtree, &descA, descTS, descTT, &descQ, sequence, &request); /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ morse_zooptile2lap(descQ, Q, NB, NB, LDQ, N, sequence, &request); @@ -167,7 +171,8 @@ int MORSE_zungqr_param(const libhqr_tree_t *qrtree, return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile * @@ -201,7 +206,7 @@ int MORSE_zungqr_param(const libhqr_tree_t *qrtree, * @sa MORSE_zgeqrf_Tile * ******************************************************************************/ -int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q) +int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q) { MORSE_context_t *morse; MORSE_sequence_t *sequence = NULL; @@ -214,7 +219,7 @@ int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_ return MORSE_ERR_NOT_INITIALIZED; } morse_sequence_create(morse, &sequence); - MORSE_zungqr_param_Tile_Async(qrtree, A, T, Q, sequence, &request); + MORSE_zungqr_param_Tile_Async(qrtree, A, TS, TT, Q, sequence, &request); morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); RUNTIME_desc_getoncpu(Q); @@ -224,7 +229,8 @@ int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_ return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile_Async * @@ -251,7 +257,7 @@ int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_ * @sa MORSE_zgeqrf_Tile_Async * ******************************************************************************/ -int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *Q, +int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; @@ -280,14 +286,18 @@ int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, morse_error("MORSE_zungqr_param_Tile", "invalid first descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (morse_desc_check(T) != MORSE_SUCCESS) { + if (morse_desc_check(TS) != MORSE_SUCCESS) { morse_error("MORSE_zungqr_param_Tile", "invalid second descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (morse_desc_check(Q) != MORSE_SUCCESS) { + if (morse_desc_check(TT) != MORSE_SUCCESS) { morse_error("MORSE_zungqr_param_Tile", "invalid third descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } + if (morse_desc_check(Q) != MORSE_SUCCESS) { + morse_error("MORSE_zungqr_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } /* Check input arguments */ if (A->nb != A->mb || Q->nb != Q->mb) { morse_error("MORSE_zungqr_param_Tile", "only square tiles supported"); @@ -299,7 +309,7 @@ int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, return MORSE_SUCCESS; */ morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); - morse_pzungqr_param(qrtree, A, Q, T, sequence, request); + morse_pzungqr_param(qrtree, A, Q, TS, TT, sequence, request); return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index 934003711..0601c8ffe 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -168,6 +168,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enu MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, +void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); #endif /* defined(CHAMELEON_USE_LIBHQR) */ diff --git a/include/morse_z.h b/include/morse_z.h index ac045809a..1c7e6896c 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -280,9 +280,9 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, int MORSE_zgeqrf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT); int MORSE_zgeqrf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT); int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); -int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descT, MORSE_Complex64_t *B, int LDB); -int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B); -int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); + int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); + int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); + int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descT, MORSE_Complex64_t *B, int LDB); int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B); int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/testing/testing_zgels_param.c b/testing/testing_zgels_param.c index d87f776e3..85e5b429b 100644 --- a/testing/testing_zgels_param.c +++ b/testing/testing_zgels_param.c @@ -33,6 +33,7 @@ #include <stdio.h> #include <string.h> #include <math.h> +#include <assert.h> #include <morse.h> #include <coreblas/include/cblas.h> @@ -115,9 +116,9 @@ int testing_zgels_param(int argc, char **argv) matrix.p = 1; /* Initialize qrtree */ - domino = 0; /* -1 */ - llvl = 0; /* -1 */ - hlvl = 0; /* -1 */ + domino = -1; /* -1 */ + llvl = -1; /* -1 */ + hlvl = -1; /* -1 */ qr_a = TS->mt; /* -1 */ qr_p = 1; /* matrix.p */ tsrr = 0; /* 0 */ @@ -142,7 +143,7 @@ int testing_zgels_param(int argc, char **argv) /* MORSE ZGELS */ if (M >= N) /* Building the economy-size Q */ - MORSE_zungqr(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zungqr_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); else /* Building the economy-size Q */ MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); @@ -197,7 +198,7 @@ int testing_zgels_param(int argc, char **argv) /* Morse routines */ MORSE_zgeqrf_param( &qrtree, M, N, A2, LDA, TS, TT ); - MORSE_zungqr(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zungqr_param( &qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); MORSE_zgeqrs(M, N, NRHS, A2, LDA, TS, B2, LDB); /* Check the orthogonality, factorization and the solution */ @@ -276,7 +277,7 @@ int testing_zgels_param(int argc, char **argv) printf(" Computational tests pass if scaled residuals are less than 60.\n"); MORSE_zgeqrf_param( &qrtree, M, N, A2, LDA, TS, TT ); - MORSE_zungqr(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zungqr_param( &qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); MORSE_zunmqr(MorseLeft, MorseConjTrans, M, NRHS, N, A2, LDA, TS, B2, LDB); MORSE_ztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, N, NRHS, 1.0, A2, LDA, B2, LDB); } -- GitLab