diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index c44695858f43b3160112afa17a294eddaf469bea..246c7ea3e905a2268ada2bd1a747d5aacacdb336 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -125,6 +125,7 @@ set(ZSRC pztrtri.c pzpotrimm.c pzunglq.c + pzunglq_param.c pzunglqrh.c pzungqr.c pzungqr_param.c @@ -143,6 +144,7 @@ set(ZSRC zgelqf.c zgelqf_param.c zgelqs.c + zgelqs_param.c zgeqrf.c zgeqrf_param.c zgeqrs.c @@ -175,6 +177,7 @@ set(ZSRC zsytrs.c ztrtri.c zunglq.c + zunglq_param.c zungqr.c zungqr_param.c zunmlq.c diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c index a43115496cb7a45b7535f5361bc27542f9e77d83..c0843b6664ad51079cf2b12720143b06ac005d8b 100644 --- a/compute/pzgelqf_param.c +++ b/compute/pzgelqf_param.c @@ -66,12 +66,12 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de ib = MORSE_IB; /* - * zgeqrt = A->nb * (ib+1) - * zunmqr = A->nb * ib - * ztsqrt = A->nb * (ib+1) - * zttqrt = A->nb * (ib+1) - * ztsmqr = A->nb * ib - * zttmqr = A->nb * ib + * zgelqt = A->nb * (ib+1) + * zunmlq = A->nb * ib + * ztslqt = A->nb * (ib+1) + * zttlqt = A->nb * (ib+1) + * ztsmlq = A->nb * ib + * zttmlq = A->nb * ib */ ws_worker = A->nb * (ib+1); @@ -79,8 +79,8 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de #if defined(CHAMELEON_USE_CUDA) /* Worker space * - * zunmqr = A->nb * ib - * ztsmqr = 2 * A->nb * ib + * zunmlq = A->nb * ib + * ztsmlq = 2 * A->nb * ib */ ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c new file mode 100644 index 0000000000000000000000000000000000000000..9d265cf441be14525eb942cbb65783498ec6745d --- /dev/null +++ b/compute/pzunglq_param.c @@ -0,0 +1,195 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pzunglq_pram.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2011-05-24 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m,n) A, (m), (n) +#define Q(m,n) Q, (m), (n) +#define TS(m,n) TS, (m), (n) +#define TT(m,n) TT, (m), (n) +#if defined(CHAMELEON_COPY_DIAG) +#define D(m,n) D, ((n)/BS), 0 +#else +#define D(m,n) A, (m), (n) +#endif + +/** + * Parallel construction of Q using tile V - dynamic scheduling + */ +void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + MORSE_desc_t *D = NULL; + + int k, m, n, i, p; + int K; + int ldak, ldqp, ldqm; + int tempkm, tempkmin, temppn, tempnn, tempmm; + int ib; + int *tiles; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + /* + * zunmqr = A->nb * ib + * ztsmqr = A->nb * ib + * zttmqr = A->nb * ib + */ + ws_worker = A->nb * ib; + +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); +#endif + + /* Initialisation of tiles */ + + tiles = (int*)malloc((qrtree->mt)*sizeof(int)); + memset( tiles, 0, (qrtree->mt)*sizeof(int) ); + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + +#if defined(CHAMELEON_COPY_DIAG) + { + /* necessary to avoid dependencies between tasks regarding the diag tile */ + int nblk = ( A->nt + BS -1 ) / BS; + D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); + morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q); + } +#endif + + K = chameleon_min(A->mt, A->nt); + for (k = K-1; k >= 0; k--) { + RUNTIME_iteration_push(morse, k); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + + for (i = A->nt-2; i >= k; i--) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + ldqp = BLKLDD(Q, p); + + /* TT or TS */ + + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = k; m < Q->mt; m++) { + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + ldqm = BLKLDD(Q, m); + MORSE_TASK_ztsmlq( + &options, + MorseRight, MorseNoTrans, + tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TS->nb, + Q( m, p), ldqm, + Q( m, n), ldqm, + A( k, n), ldak, + TS(k, n), TS->mb); + } + } + else { + for (m = k; m < Q->mt; m++) { + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + MORSE_TASK_zttmlq( + &options, + MorseRight, MorseNoTrans, + tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TT->nb, + Q( m, p), ldqm, + Q( m, n), ldqm, + A( k, n), ldak, + TT(k, n), TT->mb); + } + } + } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + p = qrtree->getm(qrtree, k, i); + + temppn = p == A->mt-1 ? A->m-p*A->mb : A->mb; + tempkmin = chameleon_min(tempkm, temppn); + ldqp = BLKLDD(Q, p); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkmim, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseLower, tempkmin, temppn, + 0., 1., + D(k, p), ldak ); +#endif +#endif + for (m = k; m < Q->mt; m++) { + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + MORSE_TASK_zunmlq( + &options, + MorseRight, MorseNoTrans, + tempmm, temppn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + Q( m, p), ldqm); + } + } + RUNTIME_iteration_pop(morse); + } + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_Sequence_Wait(sequence); + morse_desc_mat_free(D); + free(D); +#endif + (void)D; +} diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 49b3e877f0a3fe0c08106871627a3e2e969ce357..cb49ede28a41df7418ea56bed5503107c6845f22 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -51,7 +51,7 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, int k, m, n, i, p; int ldan, ldam, ldbm, ldbn, ldak, ldbp; - int tempnn, tempkmin, tempmm, tempkn, tempkm; + int tempnn, temppn, tempkmin, tempmm, tempkn, tempkm; int ib, K; int *tiles; @@ -81,7 +81,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, #endif /* Initialisation of tiles */ - tiles = (int*)malloc((qrtree->nt)*sizeof(int)); memset( tiles, 0, (qrtree->nt)*sizeof(int) ); @@ -97,84 +96,87 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, #endif if (side == MorseLeft ) { - if (trans == MorseConjTrans) { + if (trans == MorseNoTrans) { /* - * MorseLeft / MorseConjTrans + * MorseLeft / MorseNoTrans */ for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { - n = qrtree->getm(qrtree, k, i); + p = qrtree->getm(qrtree, k, i); + + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + ldbp = BLKLDD(B, p); - tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - ldbn = BLKLDD(B, n); - tempkmin = chameleon_min(tempnn, tempkm); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, - MorseUpper, tempkmin, tempnn, A->nb, - A(k, n), ldak, - D(k, n), ldak ); + MorseUpper, tempkmin, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, - MorseLower, tempkmin, tempnn, + MorseLower, tempkmin, temppn, 0., 1., - D(k, n), ldak ); + D(k, p), ldak ); #endif #endif - for (m = 0; m < B->nt; m++) { - tempmm = m == B->nt-1 ? B->n-n*B->nb : B->nb; + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; MORSE_TASK_zunmlq( &options, side, trans, - tempnn, tempmm, tempkmin, ib, TS->nb, - D( k, n), ldak, - TS(k, n), TS->mb, - B( n, m), ldbn); + temppn, tempnn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( p, n), ldbp); } } + /* Setting the order of the tiles*/ libhqr_treewalk(qrtree, k, tiles); - for (i = k; i < B->mt-1; i++) { - n = tiles[i]; - p = qrtree->currpiv(qrtree, k, n); + for (i = k; i < A->nt-1; i++) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbp = BLKLDD(B, p); + ldbm = BLKLDD(B, m); /* TT or TS */ + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - if(qrtree->gettype(qrtree, k, n) == 0){ - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); MORSE_TASK_ztsmlq( &options, side, trans, - B->nb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, - B( m, p), ldbm, + B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, + B( p, n), ldbp, B( m, n), ldbm, - A( k, n), ldak, - TS(k, n), TS->mb); + A( k, m), ldak, + TS(k, m), TS->mb); } } else { - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zttmlq( &options, side, trans, - tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->mb, - B( m, p), ldbm, + B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb, + B( p, n), ldbp, B( m, n), ldbm, A( k, m), ldak, - TT(k, m), TT->mb); + TT(k, m), TS->mb); } } } @@ -182,72 +184,72 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, } } else { /* - * MorseLeft / MorseNoTrans + * MorseLeft / MorseConjTrans */ for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); /* Setting the order of the tiles*/ libhqr_treewalk(qrtree, k, tiles); - for (i = B->mt-2; i >= k; i--) { - n = tiles[i]; - p = qrtree->currpiv(qrtree, k, n); + for (i = A->nt-2; i >= k; i--) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbp = BLKLDD(B, p); + ldbm = BLKLDD(B, m); /* TT or TS */ - - if(qrtree->gettype(qrtree, k, n) == 0){ - for (m = 0; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; MORSE_TASK_ztsmlq( &options, side, trans, - B->nb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, - B( m, p), ldbp, + B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, + B( p, n), ldbp, B( m, n), ldbm, - A( k, n), ldak, - TS(k, n), TS->mb); + A( k, m), ldak, + TS(k, m), TS->mb); } } else { - for (m = k; m < B->mt; m++) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; MORSE_TASK_zttmlq( &options, side, trans, B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb, - B( m, p), ldbp, + B( p, n), ldbp, B( m, n), ldbm, - A( k, n), ldak, - TT(k, n), TT->mb); + A( k, m), ldak, + TT(k, m), TT->mb); } } } for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { - m = qrtree->getm(qrtree, k, i); + p = qrtree->getm(qrtree, k, i); - tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - tempkmin = chameleon_min(tempmm, tempkn); - ldbm = BLKLDD(B, m); + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + ldbp = BLKLDD(B, p); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, - MorseUpper, tempkmim, tempmm, A->nb, - A(k, m), ldak, - D(k, m), ldak ); + MorseUpper, tempkmim, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, - MorseLower, tempkmin, tempmm, + MorseLower, tempkmin, temppn, 0., 1., - D(k, m), ldak ); + D(k, p), ldak ); #endif #endif for (n = 0; n < B->nt; n++) { @@ -255,59 +257,60 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_TASK_zunmlq( &options, side, trans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( k, m), ldak, - TS(k, m), TS->mb, - B( m, n), ldbm); + temppn, tempnn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( p, n), ldbp); } } RUNTIME_iteration_pop(morse); } } } else { - if (trans == MorseConjTrans) { + if (trans == MorseNoTrans) { /* - * MorseRight / MorseConjTrans + * MorseRight / MorseNoTrans */ for (k = K-1; k >= 0; k--) { RUNTIME_iteration_push(morse, k); - tempkm = k == A->mt-1 ? A->m - k*A->mb : A->mb; + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); - /* Setting the order of tiles */ + /* Setting the order of the tiles*/ libhqr_treewalk(qrtree, k, tiles); - for (i = B->nt-2; i >= k; i--) { - m = tiles[i]; - p = qrtree->currpiv(qrtree, k, m); + for (i = A->nt-2; i >= k; i--) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldbp = BLKLDD(B, p); - /* TS or TT */ - if(qrtree->gettype(qrtree, k, m) == 0){ - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + /* TT or TS */ + + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); MORSE_TASK_ztsmlq( &options, side, trans, tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb, - B( p, n), ldbp, + B( m, p), ldbm, B( m, n), ldbm, A( k, n), ldak, TS(k, n), TS->mb); } } - else{ - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + else { + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; MORSE_TASK_zttmlq( &options, side, trans, tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb, - B( p, n), ldbp, + B( m, p), ldbm, B( m, n), ldbm, A( k, n), ldak, TT(k, n), TT->mb); @@ -315,95 +318,94 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, } } for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { - m = qrtree->getm(qrtree, k, i); + p = qrtree->getm(qrtree, k, i); - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - tempkmin = chameleon_min(tempmm, tempkm); - ldbm = BLKLDD(B, m); + temppn = p == A->mt-1 ? A->m-p*A->mb : A->mb; + tempkmin = chameleon_min(tempkm, temppn); + ldbp = BLKLDD(B, p); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, - MorseUpper, tempkmin, tempkmm, A->nb, - A(k, m), ldak, - D(k, m), ldak ); + MorseUpper, tempkmim, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, - MorseLower, tempkmin, tempkmm, + MorseLower, tempkmin, temppn, 0., 1., - D(k, m), ldak ); + D(k, p), ldak ); #endif #endif - for (n = 0; n < B->nt; n++) { - ldbm = BLKLDD(B, m); - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; MORSE_TASK_zunmlq( &options, side, trans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( k, n), ldak, - TS(k, n), TS->mb, - B( m, n), ldbm); + tempmm, temppn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( m, p), ldbm); } } - RUNTIME_iteration_pop(morse); } } else { /* - * MorseRight / MorseNoTrans + * MorseRight / MorseConjTrans */ for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); - tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb; + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; ldak = BLKLDD(A, k); for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { - m = qrtree->getm(qrtree, k, i); + p = qrtree->getm(qrtree, k, i); - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - tempkmin = chameleon_min(tempmm, tempkm); - ldbm = BLKLDD(B, m); + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + ldbp = BLKLDD(B, p); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( &options, - MorseUpper, tempkmin, tempkmm, A->nb, - A(k, m), ldak, - D(k, m), ldak ); + MorseUpper, tempkmin, tempkpn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); #if defined(CHAMELEON_USE_CUDA) MORSE_TASK_zlaset( &options, - MorseLower, tempkmin, tempkmm, + MorseLower, tempkmin, tempkpn, 0., 1., - D(k, m), ldak ); + D(k, p), ldak ); #endif #endif - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; MORSE_TASK_zunmlq( &options, side, trans, - tempmm, tempnn, tempkmin, ib, TS->nb, - D( k, m), ldan, - TS(k, m), TS->mb, - B( m, n), ldbm); + tempmm, temppn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( m, p), ldbm); } } /* Setting the order of tiles */ libhqr_treewalk(qrtree, k, tiles); - for (i = k; i < B->mt-1; i++) { - m = tiles[i]; - p = qrtree->currpiv(qrtree, k, m); + for (i = k; i < A->nt-1; i++) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - ldbm = BLKLDD(B, m); + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldbp = BLKLDD(B, p); - if(qrtree->gettype(qrtree, k, m) == 0){ - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; MORSE_TASK_ztsmlq( &options, side, trans, @@ -415,8 +417,8 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, } } else { - for (n = 0; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; MORSE_TASK_zttmlq( &options, side, trans, diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c new file mode 100644 index 0000000000000000000000000000000000000000..41bbe18428f15e5e6530988ad63f95a64dfb300f --- /dev/null +++ b/compute/zgelqs_param.c @@ -0,0 +1,330 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgelqs_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgelqs_param - Compute a minimum-norm solution min || A*X - B || using the LQ factorization + * A = L*Q computed by MORSE_zgelqf. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= M >= 0. + * + * @param[in] NRHS + * The number of columns of B. NRHS >= 0. + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= M. + * + * @param[in] descT + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in,out] B + * On entry, the M-by-NRHS right hand side matrix B. + * On exit, the N-by-NRHS solution matrix X. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= N. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zgelqs_param_Tile + * @sa MORSE_zgelqs_param_Tile_Async + * @sa MORSE_cgelqs + * @sa MORSE_dgelqs + * @sa MORSE_sgelqs + * @sa MORSE_zgelqf + * + ******************************************************************************/ +int MORSE_zgelqs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *B, int LDB) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqs_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zgelqs_param", "illegal value of M"); + return -1; + } + if (N < 0 || M > N) { + morse_error("MORSE_zgelqs_param", "illegal value of N"); + return -2; + } + if (NRHS < 0) { + morse_error("MORSE_zgelqs_param", "illegal value of N"); + return -3; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zgelqs_param", "illegal value of LDA"); + return -5; + } + if (LDB < chameleon_max(1, chameleon_max(1, N))) { + morse_error("MORSE_zgelqs_param", "illegal value of LDB"); + return -8; + } + /* Quick return */ + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + return MORSE_SUCCESS; + } + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, NRHS); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, */ +/* sequence, &request);*/ +/* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zgelqs_param_Tile_Async(qrtree, &descA, descTS, descTT, &descB, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, NRHS, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descB, B, NB, NB, LDB, NRHS, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgelqs_param_Tile - Computes a minimum-norm solution using previously computed + * LQ factorization. + * Tile equivalent of MORSE_zgelqs_param(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] T + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in,out] B + * On entry, the M-by-NRHS right hand side matrix B. + * On exit, the N-by-NRHS solution matrix X. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgelqs_param + * @sa MORSE_zgelqs_param_Tile_Async + * @sa MORSE_cgelqs_Tile + * @sa MORSE_dgelqs_Tile + * @sa MORSE_sgelqs_Tile + * @sa MORSE_zgelqf_Tile + * + ******************************************************************************/ +int MORSE_zgelqs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgelqs_param_Tile_Async(qrtree, A, TS, TT, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgelqs_param_Tile_Async - Computes a minimum-norm solution using previously + * computed LQ factorization. + * Non-blocking equivalent of MORSE_zgelqs_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgelqs_param + * @sa MORSE_zgelqs_param_Tile + * @sa MORSE_cgelqs_Tile_Async + * @sa MORSE_dgelqs_Tile_Async + * @sa MORSE_sgelqs_Tile_Async + * @sa MORSE_zgelqf_Tile_Async + * + ******************************************************************************/ +int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_desc_t *subB; + MORSE_desc_t *subA; + MORSE_context_t *morse; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || B->nb != B->mb) { + morse_error("MORSE_zgelqs_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return */ +/* + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + return MORSE_SUCCESS; + } +*/ + /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); + morse_pztile_zero(subB, sequence, request); + free(subB); */ + + subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); + morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); + + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); + + + return MORSE_SUCCESS; +} diff --git a/compute/zgels_param.c b/compute/zgels_param.c index 2dae80528c18b309bc0565d76f93efbfe75d1980..6d78f62c1a9c17de8ce9b7535886f6e1ecb4aa89 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -401,24 +401,15 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, morse_pztile_zero(subB, sequence, request); free(subB); */ - if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzgelqf(A, TS, sequence, request); - } - else { - morse_pzgelqfrh(A, TS, MORSE_RHBLK, sequence, request); - } + morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request); + subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); free(subA); free(subB); - if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { - morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request); - } - else { - morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, TS, MORSE_RHBLK, sequence, request); - } + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); } return MORSE_SUCCESS; } diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c new file mode 100644 index 0000000000000000000000000000000000000000..990b511282502feea620a2bc4bb0731fffe01155 --- /dev/null +++ b/compute/zunglq_param.c @@ -0,0 +1,311 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zunglq_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zunglq_param - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the + * first M rows of a product of the elementary reflectors returned by MORSE_zgelqf. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix Q. M >= 0. + * + * @param[in] N + * The number of columns of the matrix Q. N >= M. + * + * @param[in] K + * The number of rows of elementary tile reflectors whose product defines the matrix Q. + * M >= K >= 0. + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in] descT + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[out] Q + * On exit, the M-by-N matrix Q. + * + * @param[in] LDQ + * The leading dimension of the array Q. LDQ >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval MORSE_SUCCESS <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zunglq_param_Tile + * @sa MORSE_zunglq_param_Tile_Async + * @sa MORSE_cunglq + * @sa MORSE_dorglq + * @sa MORSE_sorglq + * @sa MORSE_zgelqf + * + ******************************************************************************/ +int MORSE_zunglq_param(const libhqr_tree_t *qrtree, int M, int N, int K, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *Q, int LDQ) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descQ; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunglq_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zunglq_param", "illegal value of M"); + return -1; + } + if (N < M) { + morse_error("MORSE_zunglq_param", "illegal value of N"); + return -2; + } + if (K < 0 || K > M) { + morse_error("MORSE_zunglq_param", "illegal value of K"); + return -3; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zunglq_param", "illegal value of LDA"); + return -5; + } + if (LDQ < chameleon_max(1, M)) { + morse_error("MORSE_zunglq_param", "illegal value of LDQ"); + return -8; + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */ + if (chameleon_min(M, chameleon_min(N, K)) == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descQ))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N,*/ +/* sequence, &request);*/ +/* morse_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zunglq_param_Tile_Async(qrtree, &descA, descTS, descTT, &descQ, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descQ, Q, NB, NB, LDQ, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descQ); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descQ, Q, NB, NB, LDQ, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zunglq_param_Tile - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the + * first M rows of a product of the elementary reflectors returned by MORSE_zgelqf. + * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] T + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[out] Q + * On exit, the M-by-N matrix Q. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zunglq_param + * @sa MORSE_zunglq_param_Tile_Async + * @sa MORSE_cunglq_Tile + * @sa MORSE_dorglq_Tile + * @sa MORSE_sorglq_Tile + * @sa MORSE_zgelqf_Tile + * + ******************************************************************************/ +int MORSE_zunglq_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zunglq_param_Tile_Async(qrtree, A, TS, TT, Q, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(Q); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * Non-blocking equivalent of MORSE_zunglq_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zunglq_param + * @sa MORSE_zunglq_param_Tile + * @sa MORSE_cunglq_Tile_Async + * @sa MORSE_dorglq_Tile_Async + * @sa MORSE_sorglq_Tile_Async + * @sa MORSE_zgelqf_Tile_Async + * + ******************************************************************************/ +int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(Q) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || Q->nb != Q->mb) { + morse_error("MORSE_zunglq_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, Q, LDQ ) */ +/* + if (chameleon_min(M, N) == 0) + return MORSE_SUCCESS; +*/ + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); + morse_pzunglq_param(qrtree, A, Q, TS, TT, sequence, request); + return MORSE_SUCCESS; +} diff --git a/include/morse_z.h b/include/morse_z.h index b1784bf39cbe574f5fac2fd10e76dc0f66866eb2..32754e4f80f95f4e29f8dd17c824a2b1a8650d2c 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -283,12 +283,18 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, int MORSE_zgelqf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT); int MORSE_zgelqf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT); int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zgelqs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); +int MORSE_zgelqs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zgeqrf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT); int MORSE_zgeqrf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT); int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zgeqrs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); int MORSE_zgeqrs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zunglq_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); +int MORSE_zunglq_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/testing/testing_zgels_param.c b/testing/testing_zgels_param.c index 6df2515fc87a6d6208763f79b53d6962246e2c28..cebf4300e5103c7286c680abe9f98a5643dabe1e 100644 --- a/testing/testing_zgels_param.c +++ b/testing/testing_zgels_param.c @@ -118,7 +118,7 @@ int testing_zgels_param(int argc, char **argv) domino = -1; /* -1 */ llvl = -1; /* -1 */ hlvl = -1; /* -1 */ - qr_a = TS->mt; /* -1 */ + qr_a = ( M >= N ) ? -1 : TS->nt; /* -1 */ qr_p = 1; /* matrix.p */ tsrr = 0; /* 0 */ @@ -126,7 +126,6 @@ int testing_zgels_param(int argc, char **argv) ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); -#if 1 /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); @@ -145,7 +144,7 @@ int testing_zgels_param(int argc, char **argv) MORSE_zungqr_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); else /* Building the economy-size Q */ - MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); printf("\n"); printf("------ TESTS FOR CHAMELEON ZGELS_PARAM ROUTINE ------- \n"); @@ -171,7 +170,6 @@ int testing_zgels_param(int argc, char **argv) printf(" - TESTING ZGELS_PARAM ... FAILED !\n"); hres++; printf("************************************************\n"); } -#endif /*------------------------------------------------------------- * TESTING ZGEQRF + ZGEQRS or ZGELQF + ZGELQS @@ -228,8 +226,8 @@ int testing_zgels_param(int argc, char **argv) /* Morse routines */ MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); - MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); - MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); /* Check the orthogonality, factorization and the solution */ info_ortho = check_orthogonality(M, N, LDA, Q, eps); @@ -292,7 +290,7 @@ int testing_zgels_param(int argc, char **argv) MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); MORSE_ztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB); - MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB); }