diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 1d9b97f0432585e291caf7406c07c5d349f4335e..8d61cde9313c0a9207783c0dc6eb2a14363fef77 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -130,6 +130,7 @@ set(ZSRC pzunmlqrh.c pzunmqr.c pzunmqrrh.c + pztpqrt.c ### zgels.c zgelqf.c @@ -167,6 +168,7 @@ set(ZSRC zungqr.c zunmlq.c zunmqr.c + ztpqrt.c ################## # MIXED PRECISION ################## diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..8dd8c63351baee3cae7b090d83b27c4993ae5c0f --- /dev/null +++ b/compute/pztpqrt.c @@ -0,0 +1,151 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pztpqrt.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m,n) A, m, n +#define B(m,n) B, m, n +#define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) +#define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif + +/***************************************************************************//** + * Parallel tile QR factorization - dynamic scheduling + **/ +void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_sequence_t *sequence, MORSE_request_t *request ) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + MORSE_desc_t *DIAG = NULL; + + int k, m, n; + int ldak, ldbm; + int tempkm, tempkn, tempnn, tempmm, templm; + int ib; + + /* Dimension of the first column */ + int maxm = B->m - L; + int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1); + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + /* + * zgeqrt = A->nb * (ib+1) + * zunmqr = A->nb * ib + * ztsqrt = A->nb * (ib+1) + * ztsmqr = A->nb * ib + */ + ws_worker = A->nb * (ib+1); + + /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + +#if defined(CHAMELEON_USE_MAGMA) + /* Worker space + * + * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + */ + ws_worker = max( ws_worker, ib * (ib + A->nb) ); + + /* Host space + * + * zgeqrt = ib * (A->mb+3*ib) + A->mb ) + * ztsqrt = 2 * ib * (A->nb+ib) + A->nb + */ + ws_host = max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); + ws_host = max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); +#endif + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + +#if defined(CHAMELEON_COPY_DIAG) + /* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */ + DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); + morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); +#endif + + for (k = 0; k < A->nt; k++) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + ldak = BLKLDD(A, k); + + for (m = 0; m < maxmt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + templm = m == maxmt-1 ? tempmm : 0; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztpqrt( + &options, + tempmm, tempkn, templm, ib, T->nb, + A(k, k), ldak, + B(m, k), ldbm, + T(m, k), T->mb ); + + for (n = k+1; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztpmqrt( + &options, + MorseLeft, MorseConjTrans, + tempmm, tempnn, tempkm, templm, ib, T->nb, + B(m, k), ldbm, + T(m, k), T->mb, + A(k, n), ldak, + B(m, n), ldbm ); + } + } + + maxmt = min( B->mt, maxmt+1 ); + } + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_Sequence_Wait(sequence); + morse_desc_mat_free(DIAG); + free(DIAG); +#endif + (void)DIAG; +} diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..ef9e4232e870a92a1ae556366c1d17a130709446 --- /dev/null +++ b/compute/ztpqrt.c @@ -0,0 +1,361 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file ztpqrt.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ****************************************************************************** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_ztpqrt - Computes a blocked QR factorization of a + * "triangular-pentagonal" matrix C, which is composed of a triangular block A + * and a pentagonal block B, using the compact representation for Q. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix B. M >= 0. + * + * @param[in] N + * The number of columns of the matrix B, and the order of the matrix + * A. N >= 0. + * + * @param[in] L + * The number of rows of the upper trapezoidal part of B. + * MIN(M,N) >= L >= 0. See Further Details. + * + * @param[in,out] A + * On entry, the upper triangular N-by-N matrix A. + * On exit, the elements on and above the diagonal of the array + * contain the upper triangular matrix R. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,N). + * + * @param[in,out] B + * On entry, the pentagonal M-by-N matrix B. The first M-L rows + * are rectangular, and the last L rows are upper trapezoidal. + * On exit, B contains the pentagonal matrix V. See Further Details. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,M). + * + * @param[out] descT + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to + * solve the system of equations, or by any function to apply the Q. + * + * @par Further Details: + * ===================== + * + * The input matrix C is a (N+M)-by-N matrix + * + * C = [ A ] + * [ B ] + * + * where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal + * matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N + * upper trapezoidal matrix B2: + * + * B = [ B1 ] <- (M-L)-by-N rectangular + * [ B2 ] <- L-by-N upper trapezoidal. + * + * The upper trapezoidal matrix B2 consists of the first L rows of a + * N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, + * B is rectangular M-by-N; if M=L=N, B is upper triangular. + * + * The matrix W stores the elementary reflectors H(i) in the i-th column + * below the diagonal (of A) in the (N+M)-by-N input matrix C + * + * C = [ A ] <- upper triangular N-by-N + * [ B ] <- M-by-N pentagonal + * + * so that W can be represented as + * + * W = [ I ] <- identity, N-by-N + * [ V ] <- M-by-N, same form as B. + * + * Thus, all of information needed for W is contained on exit in B, which + * we call V above. Note that V has the same form as B; that is, + * + * V = [ V1 ] <- (M-L)-by-N rectangular + * [ V2 ] <- L-by-N upper trapezoidal. + * + * The columns of V represent the vectors which define the H(i)'s. + * + * The number of blocks is B = ceiling(N/NB), where each + * block is of order NB except for the last block, which is of order + * IB = N - (B-1)*NB. For each of the B blocks, a upper triangular block + * reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB + * for the last block) T's are stored in the NB-by-N matrix T as + * + * T = [T1 T2 ... TB]. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_ztpqrt_Tile + * @sa MORSE_ztpqrt_Tile_Async + * @sa MORSE_ctpqrt + * @sa MORSE_dtpqrt + * @sa MORSE_stpqrt + * @sa MORSE_zgeqrs + * + ******************************************************************************/ +int MORSE_ztpqrt( int M, int N, int L, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_desc_t *descT ) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB; + int minMN = min( M, N ); + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztpqrt", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_ztpqrt", "illegal value of M"); + return -1; + } + if (N < 0) { + morse_error("MORSE_ztpqrt", "illegal value of N"); + return -2; + } + if ((L < 0) || ((L > minMN) && (minMN > 0))) { + morse_error("MORSE_ztpqrt", "illegal value of N"); + return -3; + } + if (LDA < max(1, N)) { + morse_error("MORSE_ztpqrt", "illegal value of LDA"); + return -5; + } + if (LDB < max(1, M)) { + morse_error("MORSE_ztpqrt", "illegal value of LDB"); + return -7; + } + + /* Quick return */ + if (minMN == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request, + (morse_desc_mat_free(&(descA)), morse_desc_mat_free(&(descB))) ); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_ztpqrt_Tile_Async(L, &descA, &descB, descT, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_ztpqrt_Tile - Computes the tile QR factorization of a matrix. + * Tile equivalent of MORSE_ztpqrt(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N + * upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the + * diagonal represent the unitary matrix Q as a product of elementary reflectors stored + * by tiles. + * + * @param[out] T + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system + * of equations. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_ztpqrt + * @sa MORSE_ztpqrt_Tile_Async + * @sa MORSE_ctpqrt_Tile + * @sa MORSE_dtpqrt_Tile + * @sa MORSE_stpqrt_Tile + * @sa MORSE_zgeqrs_Tile + * + ******************************************************************************/ +int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T ) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztpqrt_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_ztpqrt_Tile_Async(L, A, B, T, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_ztpqrt_Tile_Async - Computes the tile QR factorization of a matrix. + * Non-blocking equivalent of MORSE_ztpqrt_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_ztpqrt + * @sa MORSE_ztpqrt_Tile + * @sa MORSE_ctpqrt_Tile_Async + * @sa MORSE_dtpqrt_Tile_Async + * @sa MORSE_stpqrt_Tile_Async + * @sa MORSE_zgeqrs_Tile_Async + * + ******************************************************************************/ +int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_sequence_t *sequence, MORSE_request_t *request ) +{ + MORSE_context_t *morse; + + morse = morse_context_self(); + if (morse == NULL) { + morse_error("MORSE_ztpqrt_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_ztpqrt_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_ztpqrt_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(T) != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + morse_error("MORSE_ztpqrt_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (((B->m - L) % B->mb) != 0) { + morse_error("MORSE_ztpqrt_Tile", "Triangular part must be aligned with tiles"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + + /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ + morse_pztpqrt(L, A, B, T, sequence, request); + /* } */ + /* else { */ + /* morse_pztpqrtrh(A, T, MORSE_RHBLK, sequence, request); */ + /* } */ + + return MORSE_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index fd6051a4966326ae600e9904bf7536f4d0dff4c8..d99406b1474862f19a864f56fa8b8a5a78e63eb3 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -134,6 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);