Newer
Older
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2015 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_ztsmlq.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-16
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
int CUDA_ztsmlq(
MORSE_enum side, MORSE_enum trans,
int M1, int N1,
int M2, int N2,
int K, int IB,
cuDoubleComplex *A1, int LDA1,
cuDoubleComplex *A2, int LDA2,
const cuDoubleComplex *V, int LDV,
const cuDoubleComplex *T, int LDT,
cuDoubleComplex *WORK, int LDWORK,
cuDoubleComplex *WORKC, int LDWORKC,
CUBLAS_STREAM_PARAM)
{
int i, i1, i3;
int NW;
int kb;
int ic = 0;
int jc = 0;
int mi = M1;
int ni = N1;
/* Check input arguments */
if ((side != MorseLeft) && (side != MorseRight)) {
return -1;
}
/* NW is the minimum dimension of WORK */
if (side == MorseLeft) {
NW = IB;
}
else {
}
if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
return -2;
}
if (M1 < 0) {
return -3;
}
if (N1 < 0) {
return -4;
}
if ( (M2 < 0) ||
( (M2 != M1) && (side == MorseRight) ) ){
return -5;
}
if ( (N2 < 0) ||
( (N2 != N1) && (side == MorseLeft) ) ){
return -6;
}
if ((K < 0) ||
( (side == MorseLeft) && (K > M1) ) ||
( (side == MorseRight) && (K > N1) ) ) {
return -7;
}
if (IB < 0) {
return -8;
}
if (LDA1 < max(1,M1)){
return -10;
}
if (LDA2 < max(1,M2)){
return -12;
}
if (LDV < max(1,K)){
return -14;
}
if (LDT < max(1,IB)){
return -16;
}
if (LDWORK < max(1,NW)){
return -18;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
return MORSE_SUCCESS;
if (((side == MorseLeft) && (trans == MorseNoTrans))
|| ((side == MorseRight) && (trans != MorseNoTrans))) {
i1 = 0;
i3 = IB;
}
else {
i1 = ((K-1) / IB)*IB;
i3 = -IB;
}
if (trans == MorseNoTrans) {
trans = MorseConjTrans;
}
else {
trans = MorseNoTrans;
}
for(i = i1; (i > -1) && (i < K); i += i3) {
kb = min(IB, K-i);
if (side == MorseLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
mi = M1 - i;
ic = i;
}
else {
/*
* H or H' is applied to C(1:m,i:n)
*/
ni = N1 - i;
jc = i;
}
/*
* Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
*/
CUDA_zparfb(
side, trans, MorseForward, MorseRowwise,
mi, ni, M2, N2, kb, 0,
A1 + LDA1*jc+ic, LDA1,
A2, LDA2,
V + i, LDV,
T + LDT*i, LDT,
WORK, LDWORK, WORKC, LDWORKC, CUBLAS_STREAM_VALUE );
}
return MORSE_SUCCESS;
}