diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..571da630afd9168a1eb64a95f2f637f39ed4ed73 --- /dev/null +++ b/coreblas/compute/core_ztpmqrt.c @@ -0,0 +1,190 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file core_ztpmqrt.c + * + * PLASMA core_blas kernel + * PLASMA is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/coreblas.h" + +/** + ******************************************************************************* + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_ztpmqrt applies a complex orthogonal matrix Q obtained from a + * "triangular-pentagonal" complex block reflector H to a general complex matrix + * C, which consists of two blocks A and B. + * + ******************************************************************************* + * + * @param[in] side + * @arg MorseLeft : apply Q or Q**H from the Left; + * @arg MorseRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg MorseNoTrans : No transpose, apply Q; + * @arg MorseConjTrans : ConjTranspose, apply Q**H. + * + * @param[in] M + * The number of rows of the tile B. M >= 0. + * + * @param[in] N1 + * The number of columns of the tile B. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * + * @param[in] L + * The number of rows of the upper trapezoidal part of V. + * K >= L >= 0. See Further Details. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] V + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_ZTTQRT in the first k rows of its array argument V. + * + * @param[in] LDV + * The leading dimension of the array V. LDV >= max(1,K). + * + * @param[in] T + * The IB-by-N1 triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[in,out] A + * A is COMPLEX*16 array, dimension (LDA,N) if side = MorseLeft + * or (LDA,K) if SIDE = MorseRight + * On entry, the K-by-N or M-by-K matrix A. + * On exit, A is overwritten by the corresponding block of + * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * + * @param[in] LDA + * The leading dimension of the array A. LDA1 >= max(1,M1). + * If side = MorseLeft, LDA >= max(1,K); + * If side = Morseright, LDA >= max(1,M). + * + * @param[in,out] B + * On entry, the M-by-N tile B. + * On exit, B is overwritten by the corresponding block of + * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * + * @param[in] LDB + * The leading dimension of the tile B. LDB >= max(1,M). + * + * @param[out] WORK + * Workspace array of size LDWORK-by-NB. + * LDWORK = N if side =MorseLeft, or M if side = MorseRight. + * + ******************************************************************************* + * + * @par Further Details: + * ===================== + * + * The columns of the pentagonal matrix V contain the elementary reflectors + * H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a + * trapezoidal block V2: + * + * V = [V1] + * [V2]. + * + * The size of the trapezoidal block V2 is determined by the parameter L, + * where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L + * rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular; + * if L=0, there is no trapezoidal block, hence V = V1 is rectangular. + * + * If side = MorseLeft: C = [A] where A is K-by-N, B is M-by-N and V is M-by-K. + * [B] + * + * If side = MorseRight: C = [A B] where A is M-by-K, B is M-by-N and V is N-by-K. + * + * The complex orthogonal matrix Q is formed from V and T. + * + * If trans='N' and side='L', C is on exit replaced with Q * C. + * + * If trans='C' and side='L', C is on exit replaced with Q**H * C. + * + * If trans='N' and side='R', C is on exit replaced with C * Q. + * + * If trans='C' and side='R', C is on exit replaced with C * Q**H. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ + +int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int IB, + const MORSE_Complex64_t *V, int LDV, + const MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *WORK ) +{ + int m1; + int n1; + + /* Check input arguments */ + if ((side != MorseLeft) && (side != MorseRight)) { + coreblas_error(1, "Illegal value of side"); + return -1; + } + + if ( side == MorseLeft ) { + m1 = K; + n1 = N; + ldwork = ib; + } + else { + m1 = M; + n1 = K; + ldwork = m1; + } + + /* TS case */ + if (L == 0) { + CORE_ztsmqr( side, trans, m1, n1, M, N, K, IB, + A, LDA, B, LDB, V, LDV, T, LDT, + WORK, ldwork ); + } + /* TT case */ + else if( L == M ) { + CORE_zttmqr( side, trans, m1, n1, M, N, K, IB, + A, LDA, B, LDB, V, LDV, T, LDT, + WORK, ldwork ); + } + else { + //LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK ); + coreblas_error( 3, "Illegal value of L (only 0 or M handled for now)"); + return -3; + } + + return MORSE_SUCCESS; +} diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..76a94fc2cf08e7963255988ea5e093151b6d34dd --- /dev/null +++ b/coreblas/compute/core_ztpqrt.c @@ -0,0 +1,159 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file core_ztpqrt.c + * + * PLASMA core_blas kernel + * PLASMA is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/lapacke.h" +#include "coreblas/include/coreblas.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_ztpqrt computes a blocked QR factorization of a complex + * "triangular-pentagonal" matrix C, which is composed of a + * triangular block A and pentagonal block B, using the compact + * WY representation for Q. + * + * C = | A | = Q * R + * | B | + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile B. M >= 0. + * + * @param[in] N + * The number of rows of the tile A1. + * The number of columns of the tiles A1 and A2. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] N + * The number of columns of the matrix B, and the order of the matrix + * A. N >= 0. + * + * @param[in] L + * The number of rows of the upper trapezoidal part of B. + * MIN(M,N) >= L >= 0. See Further Details. + * + * @param[in,out] A + * On entry, the upper triangular N-by-N matrix A. + * On exit, the elements on and above the diagonal of the array + * contain the upper triangular matrix R. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,N). + * + * @param[in,out] B + * On entry, the pentagonal M-by-N matrix B. The first M-L rows + * are rectangular, and the last L rows are upper trapezoidal. + * On exit, B contains the pentagonal matrix V. See Further Details. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * WORK is COMPLEX*16 array, dimension ((IB+1)*N) + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ +int CORE_ztpqrt( int M, int N, int L, int IB, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *WORK ) +{ + static MORSE_Complex64_t zone = 1.0; + static MORSE_Complex64_t zzero = 0.0; + + MORSE_Complex64_t alpha; + int i, ii, sb; + +#if !defined(NDEBUG) + /* Check input arguments */ + if (M < 0) { + coreblas_error(1, "Illegal value of M"); + return -1; + } + if (N < 0) { + coreblas_error(2, "Illegal value of N"); + return -2; + } + if( (L < 0) || ((L > min(M, N)) && (min(M,N) > 0))) { + coreblas_error(3, "Illegal value of L"); + return -3; + } + if (IB < 0) { + coreblas_error(4, "Illegal value of IB"); + return -4; + } + if ((LDA < max(1,N)) && (N > 0)) { + coreblas_error(6, "Illegal value of LDA"); + return -6; + } + if ((LDB < max(1,M)) && (M > 0)) { + coreblas_error(6, "Illegal value of LDB"); + return -8; + } + if ((LDT < max(1,IB)) && (IB > 0)) { + coreblas_error(6, "Illegal value of LDT"); + return -10; + } +#endif /*!defined(NDEBUG)*/ + + /* Quick return */ + if ((M == 0) || (N == 0) || (IB == 0)) + return MORSE_SUCCESS; + + if ( L == O ) { + CORE_ztsqrt( M, N, IB, A, LDA, B, LDB, T, LDT, WORK, WORK+N ); + } + else if (L == M) { + CORE_zttqrt( M, N, IB, A, LDA, B, LDB, T, LDT, WORK, WORK+N ); + } + else { + //LAPACKE_ztpqrt_work( LAPACK_COL_MAJOR, M, N, L, IB, A, LDA, B, LDB, T, LDT, WORK ); + coreblas_error( 3, "Illegal value of L (only 0 or M handled for now)"); + return -3; + } + return MORSE_SUCCESS; +} diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h index e944555626b2093e1f053ba0bd5a4b7c25fdebd7..345836f81bf57f97b41081a4dd956b340ccf3f9c 100644 --- a/coreblas/include/coreblas_z.h +++ b/coreblas/include/coreblas_z.h @@ -352,6 +352,18 @@ int CORE_ztstrf(int M, int N, int IB, int NB, MORSE_Complex64_t *L, int LDL, int *IPIV, MORSE_Complex64_t *WORK, int LDWORK, int *INFO); +int CORE_ztpqrt( int M, int N, int L, int IB, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *WORK ); +int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int IB, + const MORSE_Complex64_t *V, int LDV, + const MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *WORK ); int CORE_zttmqr(MORSE_enum side, MORSE_enum trans, int M1, int N1, int M2, int N2, int K, int IB, MORSE_Complex64_t *A1, int LDA1,