Commit 9c2e2baf authored by Mathieu Faverge's avatar Mathieu Faverge

Add tpqrt and tpmqrt kernels

parent 51e0bc7a
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file core_ztpmqrt.c
*
* PLASMA core_blas kernel
* PLASMA is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> c d s
*
**/
#include "coreblas/include/coreblas.h"
/**
*******************************************************************************
*
* @ingroup CORE_MORSE_Complex64_t
*
* CORE_ztpmqrt applies a complex orthogonal matrix Q obtained from a
* "triangular-pentagonal" complex block reflector H to a general complex matrix
* C, which consists of two blocks A and B.
*
*******************************************************************************
*
* @param[in] side
* @arg MorseLeft : apply Q or Q**H from the Left;
* @arg MorseRight : apply Q or Q**H from the Right.
*
* @param[in] trans
* @arg MorseNoTrans : No transpose, apply Q;
* @arg MorseConjTrans : ConjTranspose, apply Q**H.
*
* @param[in] M
* The number of rows of the tile B. M >= 0.
*
* @param[in] N1
* The number of columns of the tile B. N >= 0.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q.
*
* @param[in] L
* The number of rows of the upper trapezoidal part of V.
* K >= L >= 0. See Further Details.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in] V
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* CORE_ZTTQRT in the first k rows of its array argument V.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
*
* @param[in] T
* The IB-by-N1 triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[in,out] A
* A is COMPLEX*16 array, dimension (LDA,N) if side = MorseLeft
* or (LDA,K) if SIDE = MorseRight
* On entry, the K-by-N or M-by-K matrix A.
* On exit, A is overwritten by the corresponding block of
* Q*C or Q**H*C or C*Q or C*Q**H. See Further Details.
*
* @param[in] LDA
* The leading dimension of the array A. LDA1 >= max(1,M1).
* If side = MorseLeft, LDA >= max(1,K);
* If side = Morseright, LDA >= max(1,M).
*
* @param[in,out] B
* On entry, the M-by-N tile B.
* On exit, B is overwritten by the corresponding block of
* Q*C or Q**H*C or C*Q or C*Q**H. See Further Details.
*
* @param[in] LDB
* The leading dimension of the tile B. LDB >= max(1,M).
*
* @param[out] WORK
* Workspace array of size LDWORK-by-NB.
* LDWORK = N if side =MorseLeft, or M if side = MorseRight.
*
*******************************************************************************
*
* @par Further Details:
* =====================
*
* The columns of the pentagonal matrix V contain the elementary reflectors
* H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a
* trapezoidal block V2:
*
* V = [V1]
* [V2].
*
* The size of the trapezoidal block V2 is determined by the parameter L,
* where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L
* rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular;
* if L=0, there is no trapezoidal block, hence V = V1 is rectangular.
*
* If side = MorseLeft: C = [A] where A is K-by-N, B is M-by-N and V is M-by-K.
* [B]
*
* If side = MorseRight: C = [A B] where A is M-by-K, B is M-by-N and V is N-by-K.
*
* The complex orthogonal matrix Q is formed from V and T.
*
* If trans='N' and side='L', C is on exit replaced with Q * C.
*
* If trans='C' and side='L', C is on exit replaced with Q**H * C.
*
* If trans='N' and side='R', C is on exit replaced with C * Q.
*
* If trans='C' and side='R', C is on exit replaced with C * Q**H.
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
******************************************************************************/
int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int IB,
const MORSE_Complex64_t *V, int LDV,
const MORSE_Complex64_t *T, int LDT,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB,
MORSE_Complex64_t *WORK )
{
int m1;
int n1;
/* Check input arguments */
if ((side != MorseLeft) && (side != MorseRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
if ( side == MorseLeft ) {
m1 = K;
n1 = N;
ldwork = ib;
}
else {
m1 = M;
n1 = K;
ldwork = m1;
}
/* TS case */
if (L == 0) {
CORE_ztsmqr( side, trans, m1, n1, M, N, K, IB,
A, LDA, B, LDB, V, LDV, T, LDT,
WORK, ldwork );
}
/* TT case */
else if( L == M ) {
CORE_zttmqr( side, trans, m1, n1, M, N, K, IB,
A, LDA, B, LDB, V, LDV, T, LDT,
WORK, ldwork );
}
else {
//LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK );
coreblas_error( 3, "Illegal value of L (only 0 or M handled for now)");
return -3;
}
return MORSE_SUCCESS;
}
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file core_ztpqrt.c
*
* PLASMA core_blas kernel
* PLASMA is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> c d s
*
**/
#include "coreblas/include/lapacke.h"
#include "coreblas/include/coreblas.h"
/**
******************************************************************************
*
* @ingroup CORE_MORSE_Complex64_t
*
* CORE_ztpqrt computes a blocked QR factorization of a complex
* "triangular-pentagonal" matrix C, which is composed of a
* triangular block A and pentagonal block B, using the compact
* WY representation for Q.
*
* C = | A | = Q * R
* | B |
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the tile B. M >= 0.
*
* @param[in] N
* The number of rows of the tile A1.
* The number of columns of the tiles A1 and A2. N >= 0.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in] N
* The number of columns of the matrix B, and the order of the matrix
* A. N >= 0.
*
* @param[in] L
* The number of rows of the upper trapezoidal part of B.
* MIN(M,N) >= L >= 0. See Further Details.
*
* @param[in,out] A
* On entry, the upper triangular N-by-N matrix A.
* On exit, the elements on and above the diagonal of the array
* contain the upper triangular matrix R.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
* @param[in,out] B
* On entry, the pentagonal M-by-N matrix B. The first M-L rows
* are rectangular, and the last L rows are upper trapezoidal.
* On exit, B contains the pentagonal matrix V. See Further Details.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* @param[out] WORK
* WORK is COMPLEX*16 array, dimension ((IB+1)*N)
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
******************************************************************************/
int CORE_ztpqrt( int M, int N, int L, int IB,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB,
MORSE_Complex64_t *T, int LDT,
MORSE_Complex64_t *WORK )
{
static MORSE_Complex64_t zone = 1.0;
static MORSE_Complex64_t zzero = 0.0;
MORSE_Complex64_t alpha;
int i, ii, sb;
#if !defined(NDEBUG)
/* Check input arguments */
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if( (L < 0) || ((L > min(M, N)) && (min(M,N) > 0))) {
coreblas_error(3, "Illegal value of L");
return -3;
}
if (IB < 0) {
coreblas_error(4, "Illegal value of IB");
return -4;
}
if ((LDA < max(1,N)) && (N > 0)) {
coreblas_error(6, "Illegal value of LDA");
return -6;
}
if ((LDB < max(1,M)) && (M > 0)) {
coreblas_error(6, "Illegal value of LDB");
return -8;
}
if ((LDT < max(1,IB)) && (IB > 0)) {
coreblas_error(6, "Illegal value of LDT");
return -10;
}
#endif /*!defined(NDEBUG)*/
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
return MORSE_SUCCESS;
if ( L == O ) {
CORE_ztsqrt( M, N, IB, A, LDA, B, LDB, T, LDT, WORK, WORK+N );
}
else if (L == M) {
CORE_zttqrt( M, N, IB, A, LDA, B, LDB, T, LDT, WORK, WORK+N );
}
else {
//LAPACKE_ztpqrt_work( LAPACK_COL_MAJOR, M, N, L, IB, A, LDA, B, LDB, T, LDT, WORK );
coreblas_error( 3, "Illegal value of L (only 0 or M handled for now)");
return -3;
}
return MORSE_SUCCESS;
}
......@@ -352,6 +352,18 @@ int CORE_ztstrf(int M, int N, int IB, int NB,
MORSE_Complex64_t *L, int LDL,
int *IPIV, MORSE_Complex64_t *WORK,
int LDWORK, int *INFO);
int CORE_ztpqrt( int M, int N, int L, int IB,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB,
MORSE_Complex64_t *T, int LDT,
MORSE_Complex64_t *WORK );
int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int IB,
const MORSE_Complex64_t *V, int LDV,
const MORSE_Complex64_t *T, int LDT,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB,
MORSE_Complex64_t *WORK );
int CORE_zttmqr(MORSE_enum side, MORSE_enum trans,
int M1, int N1, int M2, int N2, int K, int IB,
MORSE_Complex64_t *A1, int LDA1,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment