diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d79ea7c27b10f4daeb3c921ea92c43ba3702555..19f96ba3cc8c2641e4fad84fd79982794b874991 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,7 +260,7 @@ include_directories(${LIBHQR_INCLUDE_DIRS}) set(CMAKE_THREAD_PREFER_PTHREAD TRUE) find_package(Threads REQUIRED) if( THREADS_FOUND ) - list(APPEND EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) + list(APPEND EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) endif () # Add math library to the list of extra @@ -268,18 +268,18 @@ endif () set(M_LIBRARIES "") if(UNIX OR WIN32) find_library( - M_m_LIBRARY - NAMES m - ) + M_m_LIBRARY + NAMES m + ) mark_as_advanced(M_m_LIBRARY) if (M_m_LIBRARY) - list(APPEND M_LIBRARIES "${M_m_LIBRARY}") - list(APPEND EXTRA_LIBRARIES "${M_m_LIBRARY}") + list(APPEND M_LIBRARIES "${M_m_LIBRARY}") + list(APPEND EXTRA_LIBRARIES "${M_m_LIBRARY}") else() - message(FATAL_ERROR "Could NOT find libm on your system." - " Are you sure to a have a C compiler installed?") + message(FATAL_ERROR "Could NOT find libm on your system." + " Are you sure to a have a C compiler installed?") endif() - endif() +endif() # Try to find librt (libposix4 - POSIX.1b Realtime Extensions library) # on Unix systems except Apple ones because it does not exist on it diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 91b17ab5d57bc2d40fd26e56587d311e78802a62..0070062b9fc9b4524506fa95b88ce6c8b24548ce 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -131,11 +131,13 @@ set(ZSRC pzunmlq.c pzunmlqrh.c pzunmqr.c + pzunmqr_param.c pzunmqrrh.c pztpgqrt.c pztpqrt.c ### zgels.c + zgels_param.c zgelqf.c zgelqs.c zgeqrf.c diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c index 4f880c95cb5bf1e86ffa9bfba49429d016d49e81..360a2d408f08fab411be9497289a9cf1428b1129 100644 --- a/compute/pzgeqrf_param.c +++ b/compute/pzgeqrf_param.c @@ -52,9 +52,9 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de MORSE_desc_t *D = NULL; int k, m, n, i, p; - int K, M, RD; - int ldap, ldam, ldaMRD; - int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm; + int K; + int ldap, ldam; + int tempkmin, tempkn, tempnn, tempmm; int ib; int *tiles; diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c index b1c41fead93a91692ed00224d787a9fabcc7f0fc..3434c8f6998f4284ed3f52f53b86fedc074850b5 100644 --- a/compute/pzungqr_param.c +++ b/compute/pzungqr_param.c @@ -54,8 +54,8 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *D = NULL; int k, m, n, i, p; - int ldak, ldqk, ldam, ldqm, ldqp; - int tempmm, tempnn, tempkmin, tempkm, tempkn; + int ldam, ldqm, ldqp; + int tempmm, tempnn, tempkmin, tempkn; int ib, minMT; int *tiles; @@ -110,9 +110,6 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree, RUNTIME_iteration_push(morse, k); tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - ldak = BLKLDD(A, k); - ldqk = BLKLDD(Q, k); /* Setting the order of tiles */ libhqr_treewalk(qrtree, k, tiles); diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index 17e9b2db4e3ac841ee033e32a2a81844b36fb2a8..f45091455f09ecfe979db9fb67addd25c5bd2540 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -10,7 +10,7 @@ /** * - * @file pzunmqr.c + * @file pzunmqr_param.c * * MORSE auxiliary routines * MORSE is a software package provided by Univ. of Tennessee, @@ -25,6 +25,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Raphael Boucherie * @date 2010-11-15 * @precisions normal z -> s d c * @@ -33,31 +34,32 @@ #define A(m,n) A, m, n #define B(m,n) B, m, n -#define T(m,n) T, m, n +#define TS(m,n) TS, m, n +#define TT(m,n) TT, m, n #if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 +#define D(m,n) D, m, n #else -#define DIAG(k) A, k, k +#define D(m,n) A, m, n #endif /** * Parallel application of Q using tile V - QR factorization - dynamic scheduling */ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, - MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, - MORSE_sequence_t *sequence, MORSE_request_t *request) + MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; + MORSE_desc_t *D = NULL; - int k, m, n, i; - int ldak, ldbk, ldam, ldan, ldbm; - int tempkm, tempnn, tempkmin, tempmm, tempkn; - int ib, minMT, minM; + int k, m, n, i, p; + int ldam, ldan, ldbm, ldbp; + int tempnn, tempkmin, tempmm, tempkn; + int ib, K; int *tiles; morse = morse_context_self(); @@ -67,17 +69,12 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ib = MORSE_IB; - if (A->m > A->n) { - minM = A->n; - minMT = A->nt; - } else { - minM = A->m; - minMT = A->mt; - } + K = chameleon_min(A->mt, A->nt); /* * zunmqr = A->nb * ib * ztsmqr = A->nb * ib + * zttmqr = A->nb * ib */ ws_worker = A->nb * ib; @@ -102,8 +99,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, /* necessary to avoid dependencies between tasks regarding the diag tile */ #if defined(CHAMELEON_COPY_DIAG) - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->nb, A->nb, 0, 0, minMT*A->nb, A->nb, A->p, A->q); + D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); + morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->nb, A->nb, 0, 0, K*A->nb, A->nb, A->p, A->q); #endif if (side == MorseLeft ) { @@ -111,42 +108,48 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, /* * MorseLeft / MorseConjTrans */ - for (k = 0; k < minMT; k++) { + for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); - tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb; - tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; - ldak = BLKLDD(A, k); - ldbk = BLKLDD(B, k); + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + m = qrtree->getm(qrtree, k, i); + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempkmin = chameleon_min(tempmm, tempkn); + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + #if defined(CHAMELEON_COPY_DIAG) - MORSE_TASK_zlacpy( - &options, - MorseLower, tempkm, tempkmin, A->nb, - A(k, k), ldak, - DIAG(k), ldak ); + MORSE_TASK_zlacpy( + &options, + MorseLower, tempmm, tempkmin, A->nb, + A(m, k), ldam, + D(m, k), ldam ); #if defined(CHAMELEON_USE_CUDA) - MORSE_TASK_zlaset( - &options, - MorseUpper, tempkm, tempkmin, - 0., 1., - DIAG(k), ldak ); + MORSE_TASK_zlaset( + &options, + MorseUpper, tempmm, tempkmin, + 0., 1., + D(m, k), ldam ); #endif #endif - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zunmqr( - &options, - side, trans, - tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, - T(k, k), T->mb, - B(k, n), ldbk); + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( m, k), ldam, + TS(m, k), TS->mb, + B( m, n), ldbm); + } } - /* Setting the order of the tiles*/ libhqr_treewalk(qrtree, k, tiles); - for (i = k+1; i < B->mt; i++) { + for (i = k; i < B->mt-1; i++) { m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); @@ -154,38 +157,48 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); ldbp = BLKLDD(B, p); - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, - B(p, n), ldbp, - B(m, n), ldbm, - A(m, k), ldam, - T(m, k), T->mb); + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztsmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TS(m, k), TS->mb); + } + } + else { + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zttmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TT(m, k), TT->mb); + } } } - RUNTIME_iteration_pop(morse); } - } - /* - * MorseLeft / MorseNoTrans - */ - else { - for (k = minMT-1; k >= 0; k--) { + } else { + /* + * MorseLeft / MorseNoTrans + */ + for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); - tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb; - tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; - ldak = BLKLDD(A, k); - ldbk = BLKLDD(B, k); + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; /* Setting the order of the tiles*/ libhqr_treewalk(qrtree, k, tiles); - for (i = B->mt-1; i > k; i--) { + for (i = B->mt-2; i >= k; i--) { m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); @@ -193,162 +206,238 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); ldbp = BLKLDD(B, p); - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_ztsmqr( - &options, - side, trans, - B->mb, tempnn, tempmm, tempnn, tempkmin, ib, T->nb, - B(p, n), ldbp, - B(m, n), ldbm, - A(m, k), ldam, - T(m, k), T->mb); + + /* TT or TS */ + + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = k; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztsmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TS(m, k), TS->mb); + } + } + else { + for (n = k; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zttmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TT(m, k), TT->mb); + } } } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + m = qrtree->getm(qrtree, k, i); + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempkmin = chameleon_min(tempmm, tempkn); + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + #if defined(CHAMELEON_COPY_DIAG) - MORSE_TASK_zlacpy( - &options, - MorseLower, tempkm, tempkmin, A->nb, - A(k, k), ldak, - DIAG(k), ldak ); + MORSE_TASK_zlacpy( + &options, + MorseLower, tempmm, tempkmin, A->nb, + A(m, k), ldam, + D(m, k), ldam ); #if defined(CHAMELEON_USE_CUDA) - MORSE_TASK_zlaset( - &options, - MorseUpper, tempkm, tempkmin, - 0., 1., - DIAG(k), ldak ); + MORSE_TASK_zlaset( + &options, + MorseUpper, tempmm, tempkmin, + 0., 1., + D(m, k), ldam ); #endif #endif - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - MORSE_TASK_zunmqr( - &options, - side, trans, - tempkm, tempnn, tempkmin, ib, T->nb, - DIAG(k), ldak, - T(k, k), T->mb, - B(k, n), ldbk); + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( m, k), ldam, + TS(m, k), TS->mb, + B( m, n), ldbm); + } } - RUNTIME_iteration_pop(morse); } } - } - /* - * MorseRight / MorseConjTrans - */ - else { + } else { if (trans == MorseConjTrans) { - for (k = minMT-1; k >= 0; k--) { + /* + * MorseRight / MorseConjTrans + */ + for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); - tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; - tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; - ldak = BLKLDD(A, k); - ldbk = BLKLDD(B, k); + tempkn = k == A->nt-1 ? A->n - k*A->nb : A->nb; /* Setting the order of tiles */ libhqr_treewalk(qrtree, k, tiles); - for (n = B->nt-1; n > k; n--) { + for (i = B->nt-2; i >= k; i--) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, - B(m, k), ldbm, - B(m, n), ldbm, - A(n, k), ldan, - T(n, k), T->mb); + ldbp = BLKLDD(B, p); + + /* TS or TT */ + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztsmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb, + B( m, p), ldbp, + B( m, n), ldbm, + A( n, k), ldan, + TS(n, k), TS->mb); + } + } + else{ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_zttmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb, + B( m, p), ldbp, + B( m, n), ldbm, + A( n, k), ldan, + TT(n, k), TT->mb); + } } } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + n = qrtree->getm(qrtree, k, i); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempkmin = chameleon_min(tempnn, tempkn); + ldan = BLKLDD(A, n); + #if defined(CHAMELEON_COPY_DIAG) - MORSE_TASK_zlacpy( - &options, - MorseLower, tempkn, tempkmin, A->nb, - A(k, k), ldak, - DIAG(k), ldak ); + MORSE_TASK_zlacpy( + &options, + MorseLower, tempnn, tempkmin, A->nb, + A(n, k), ldan, + D(n, k), ldan ); #if defined(CHAMELEON_USE_CUDA) - MORSE_TASK_zlaset( - &options, - MorseUpper, tempkn, tempkmin, - 0., 1., - DIAG(k), ldak ); + MORSE_TASK_zlaset( + &options, + MorseUpper, tempnn, tempkmin, + 0., 1., + D(n, k), ldan ); #endif #endif - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_zunmqr( - &options, - side, trans, - tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, - T(k, k), T->mb, - B(m, k), ldbm); + for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( n, k), ldan, + TS(n, k), TS->mb, + B( m, n), ldbm); + } } RUNTIME_iteration_pop(morse); } - } - /* - * MorseRight / MorseNoTrans - */ - else { - for (k = 0; k < minMT; k++) { + } else { + /* + * MorseRight / MorseNoTrans + */ + for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); - tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; - tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb; - ldak = BLKLDD(A, k); + tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; + + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + n = qrtree->getm(qrtree, k, i); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempkmin = chameleon_min(tempnn, tempkn); + ldan = BLKLDD(A, n); #if defined(CHAMELEON_COPY_DIAG) - MORSE_TASK_zlacpy( - &options, - MorseLower, tempkn, tempkmin, A->nb, - A(k, k), ldak, - DIAG(k), ldak ); + MORSE_TASK_zlacpy( + &options, + MorseLower, tempnn, tempkmin, A->nb, + A(n, k), ldan, + D(n, k), ldan ); #if defined(CHAMELEON_USE_CUDA) - MORSE_TASK_zlaset( - &options, - MorseUpper, tempkn, tempkmin, - 0., 1., - DIAG(k), ldak ); + MORSE_TASK_zlaset( + &options, + MorseUpper, tempnn, tempkmin, + 0., 1., + D(n, k), ldan ); #endif #endif - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_zunmqr( - &options, - side, trans, - tempmm, tempkn, tempkmin, ib, T->nb, - DIAG(k), ldak, - T(k, k), T->mb, - B(m, k), ldbm); + for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( n, k), ldan, + TS(n, k), TS->mb, + B( m, n), ldbm); + } } - /* Setting the order of tiles */ libhqr_treewalk(qrtree, k, tiles); - for (n = k+1; n < B->nt; n++) { + for (i = k; i < B->nt-1; n++) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); - MORSE_TASK_ztsmqr( - &options, - side, trans, - tempmm, B->nb, tempmm, tempnn, tempkmin, ib, T->nb, - B(m, k), ldbm, - B(m, n), ldbm, - A(n, k), ldan, - T(n, k), T->mb); + ldbp = BLKLDD(B, p); + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztsmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( n, k), ldan, + TS(n, k), TS->mb); + } + } + else { + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_zttmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( n, k), ldan, + TT(n, k), TT->mb); + } } } @@ -362,8 +451,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, #if defined(CHAMELEON_COPY_DIAG) MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); + morse_desc_mat_free(D); + free(D); #endif - (void)DIAG; + (void)D; } diff --git a/compute/zgels_param.c b/compute/zgels_param.c new file mode 100644 index 0000000000000000000000000000000000000000..898d74e51251ff8e8ea1aa70e739f66f9a6dc6aa --- /dev/null +++ b/compute/zgels_param.c @@ -0,0 +1,421 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgels_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgels_param - solves overdetermined or underdetermined linear systems involving an M-by-N + * matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. + * The following options are provided: + * + * # trans = MorseNoTrans and M >= N: find the least squares solution of an overdetermined + * system, i.e., solve the least squares problem: minimize || B - A*X ||. + * + * # trans = MorseNoTrans and M < N: find the minimum norm solution of an underdetermined + * system A * X = B. + * + * Several right hand side vectors B and solution vectors X can be handled in a single call; + * they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS + * solution matrix X. + * + ******************************************************************************* + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: the linear system involves A; + * = MorseConjTrans: the linear system involves A**H. + * Currently only MorseNoTrans is supported. + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] NRHS + * The number of right hand sides, i.e., the number of columns of the matrices B and X. + * NRHS >= 0. + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, + * if M >= N, A is overwritten by details of its QR factorization as returned by + * MORSE_zgeqrf; + * if M < N, A is overwritten by details of its LQ factorization as returned by + * MORSE_zgelqf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] descT + * On exit, auxiliary factorization data. + * + * @param[in,out] B + * On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; + * On exit, if return value = 0, B is overwritten by the solution vectors, stored + * columnwise: + * if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual + * sum of squares for the solution in each column is given by the sum of squares of the + * modulus of elements N+1 to M in that column; + * if M < N, rows 1 to N of B contain the minimum norm solution vectors; + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= MAX(1,M,N). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zgels_param_Tile + * @sa MORSE_zgels_param_Tile_Async + * @sa MORSE_cgels + * @sa MORSE_dgels + * @sa MORSE_sgels + * + ******************************************************************************/ +int MORSE_zgels_param(const libhqr_tree_t *qrtree, MORSE_enum trans, int M, int N, int NRHS, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *B, int LDB) +{ + int i, j; + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgels_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (trans != MorseNoTrans) { + morse_error("MORSE_zgels_param", "only MorseNoTrans supported"); + return MORSE_ERR_NOT_SUPPORTED; + } + if (M < 0) { + morse_error("MORSE_zgels_param", "illegal value of M"); + return -2; + } + if (N < 0) { + morse_error("MORSE_zgels_param", "illegal value of N"); + return -3; + } + if (NRHS < 0) { + morse_error("MORSE_zgels_param", "illegal value of NRHS"); + return -4; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zgels_param", "illegal value of LDA"); + return -6; + } + if (LDB < chameleon_max(1, chameleon_max(M, N))) { + morse_error("MORSE_zgels_param", "illegal value of LDB"); + return -9; + } + /* Quick return */ + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + for (i = 0; i < chameleon_max(M, N); i++) + for (j = 0; j < NRHS; j++) + B[j*LDB+i] = 0.0; + return MORSE_SUCCESS; + } + + /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, NRHS); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + + if ( M >= N ) { +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS,*/ +/* sequence, &request);*/ +/* }*/ + } else { +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS,*/ +/* sequence, &request);*/ +/* }*/ + } + + /* Call the tile interface */ + MORSE_zgels_param_Tile_Async(qrtree, MorseNoTrans, &descA, descTS, descTT, &descB, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, NRHS, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descB, B, NB, NB, LDB, NRHS, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgels_param_Tile - Solves overdetermined or underdetermined linear system of equations + * using the tile QR or the tile LQ factorization. + * Tile equivalent of MORSE_zgels_param(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: the linear system involves A; + * = MorseConjTrans: the linear system involves A**H. + * Currently only MorseNoTrans is supported. + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, + * if M >= N, A is overwritten by details of its QR factorization as returned by + * MORSE_zgeqrf; + * if M < N, A is overwritten by details of its LQ factorization as returned by + * MORSE_zgelqf. + * + * @param[out] T + * On exit, auxiliary factorization data. + * + * @param[in,out] B + * On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; + * On exit, if return value = 0, B is overwritten by the solution vectors, stored + * columnwise: + * if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual + * sum of squares for the solution in each column is given by the sum of squares of the + * modulus of elements N+1 to M in that column; + * if M < N, rows 1 to N of B contain the minimum norm solution vectors; + * + ******************************************************************************* + * + * @return + * \return MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgels_param + * @sa MORSE_zgels_param_Tile_Async + * @sa MORSE_cgels_Tile + * @sa MORSE_dgels_Tile + * @sa MORSE_sgels_Tile + * + ******************************************************************************/ +int MORSE_zgels_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgels_param_Tile_Async(qrtree, trans, A, TS, TT, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgels_param_Tile_Async - Solves overdetermined or underdetermined linear + * system of equations using the tile QR or the tile LQ factorization. + * Non-blocking equivalent of MORSE_zgels_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgels_param + * @sa MORSE_zgels_param_Tile + * @sa MORSE_cgels_Tile_Async + * @sa MORSE_dgels_Tile_Async + * @sa MORSE_sgels_Tile_Async + * + ******************************************************************************/ +int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_desc_t *subA; + MORSE_desc_t *subB; + MORSE_context_t *morse; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || B->nb != B->mb) { + morse_error("MORSE_zgels_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (trans != MorseNoTrans) { + morse_error("MORSE_zgels_param_Tile", "only MorseNoTrans supported"); + return morse_request_fail(sequence, request, MORSE_ERR_NOT_SUPPORTED); + } + /* Quick return - currently NOT equivalent to LAPACK's: + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + for (i = 0; i < chameleon_max(M, N); i++) + for (j = 0; j < NRHS; j++) + B[j*LDB+i] = 0.0; + return MORSE_SUCCESS; + } + */ + if (A->m >= A->n) { + morse_pzgeqrf_param(qrtree, A, TS, TT, sequence, request); + + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); + + subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); + } + else { + /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); + morse_pztile_zero(subB, sequence, request); + free(subB); */ + + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { + morse_pzgelqf(A, TS, sequence, request); + } + else { + morse_pzgelqfrh(A, TS, MORSE_RHBLK, sequence, request); + } + subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); + morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); + + if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { + morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request); + } + else { + morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, TS, MORSE_RHBLK, sequence, request); + } + } + return MORSE_SUCCESS; +} diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index b4467c65eaa464940bdc136c70d1779443adc3e5..8a5a42cd47431a9a31ff377e9b172f1338015a03 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -110,7 +110,7 @@ int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_Complex64_t *A, int LDA, - MORSE_desc_t *descT, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *C, int LDC) { int NB, Am; @@ -191,7 +191,7 @@ int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, /* Call the tile interface */ MORSE_zunmqr_param_Tile_Async( - qrtree, side, trans, &descA, descT, &descC, sequence, &request); + qrtree, side, trans, &descA, descTS, descTT, &descC, sequence, &request); /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ morse_zooptile2lap(descC, C, NB, NB, LDC, N, sequence, &request); @@ -260,7 +260,7 @@ int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, * ******************************************************************************/ int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *C) + MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *C) { MORSE_context_t *morse; MORSE_sequence_t *sequence = NULL; @@ -273,7 +273,7 @@ int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_ return MORSE_ERR_NOT_INITIALIZED; } morse_sequence_create(morse, &sequence); - MORSE_zunmqr_param_Tile_Async(qrtree, side, trans, A, T, C, sequence, &request); + MORSE_zunmqr_param_Tile_Async(qrtree, side, trans, A, TS, TT, C, sequence, &request); morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); RUNTIME_desc_getoncpu(C); @@ -313,7 +313,7 @@ int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_ ******************************************************************************/ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *C, + MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request) { MORSE_context_t *morse; @@ -342,14 +342,18 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, morse_error("MORSE_zunmqr_param_Tile", "invalid first descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (morse_desc_check(T) != MORSE_SUCCESS) { + if (morse_desc_check(TS) != MORSE_SUCCESS) { morse_error("MORSE_zunmqr_param_Tile", "invalid second descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (morse_desc_check(C) != MORSE_SUCCESS) { + if (morse_desc_check(TT) != MORSE_SUCCESS) { morse_error("MORSE_zunmqr_param_Tile", "invalid third descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } + if (morse_desc_check(C) != MORSE_SUCCESS) { + morse_error("MORSE_zunmqr_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } /* Check input arguments */ if (A->nb != A->mb || C->nb != C->mb) { morse_error("MORSE_zunmqr_param_Tile", "only square tiles supported"); @@ -368,7 +372,7 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, return MORSE_SUCCESS; */ - morse_pzunmqr_param(qrtree, side, trans, A, C, T, sequence, request); + morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, sequence, request); return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index 0601c8ffe1902971485b4f1e7e8507a644a056c9..6decc053193b2dd1f77c020bd418ed25ca25d14c 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -164,7 +164,7 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enu MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, - MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/include/morse_z.h b/include/morse_z.h index 1c7e6896c1e6eabf414c369451fe770c3372409d..03e400778979ad402eb580af016ef4ad7f877405 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -277,15 +277,18 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, * Declarations of libhqr dependent functions. */ #if defined(CHAMELEON_USE_LIBHQR) +int MORSE_zgels_param(const libhqr_tree_t *qrtree, MORSE_enum trans, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); +int MORSE_zgels_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zgeqrf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT); int MORSE_zgeqrf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT); int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); - int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); - int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); - int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); -int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descT, MORSE_Complex64_t *B, int LDB); -int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B); -int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); +int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); +int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); #endif /* defined(CHAMELEON_USE_LIBHQR) */ /** * Declarations of workspace allocation functions (tile layout) - alphabetical order diff --git a/testing/testing_zgels_param.c b/testing/testing_zgels_param.c index 85e5b429bf848bcc345e9116dc12dedc9bac5326..9cdb8290060db4abc795e6fd6dbd1473b551bb75 100644 --- a/testing/testing_zgels_param.c +++ b/testing/testing_zgels_param.c @@ -79,7 +79,6 @@ int testing_zgels_param(int argc, char **argv) int K = min(M, N); double eps; int info_ortho, info_solution, info_factorization; - int i, j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; int domino, tsrr, llvl, hlvl, qr_a, qr_p; @@ -127,7 +126,7 @@ int testing_zgels_param(int argc, char **argv) ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); -#if 0 +#if 1 /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); @@ -278,7 +277,7 @@ int testing_zgels_param(int argc, char **argv) MORSE_zgeqrf_param( &qrtree, M, N, A2, LDA, TS, TT ); MORSE_zungqr_param( &qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); - MORSE_zunmqr(MorseLeft, MorseConjTrans, M, NRHS, N, A2, LDA, TS, B2, LDB); + MORSE_zunmqr_param( &qrtree, MorseLeft, MorseConjTrans, M, NRHS, N, A2, LDA, TS, TT, B2, LDB); MORSE_ztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, N, NRHS, 1.0, A2, LDA, B2, LDB); } else { @@ -345,7 +344,6 @@ static int check_orthogonality(int M, int N, int LDQ, MORSE_Complex64_t *Q, doub double alpha, beta; double normQ; int info_ortho; - int i; int minMN = min(M, N); double *work = (double *)malloc(minMN*sizeof(double));