cuda_zunmlqt.c 2.89 KB
Newer Older
1
/**
2 3
 *
 * @file cuda_zunmlqt.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
PRUVOST Florent's avatar
PRUVOST Florent committed
7
 * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon cuda_zunmlqt GPU kernel
13
 *
PRUVOST Florent's avatar
PRUVOST Florent committed
14
 * @version 0.9.2
15
 * @author Florent Pruvost
PRUVOST Florent's avatar
PRUVOST Florent committed
16
 * @date 2015-09-17
17 18
 * @precisions normal z -> c d s
 *
19
 */
20
#include "cudablas.h"
21

22
int
23
CUDA_zunmlqt(cham_side_t side, cham_trans_t trans,
24 25 26 27 28 29
             int M, int N, int K, int IB,
             const cuDoubleComplex *A,    int LDA,
             const cuDoubleComplex *T,    int LDT,
             cuDoubleComplex *C,    int LDC,
             cuDoubleComplex *WORK, int LDWORK,
             CUBLAS_STREAM_PARAM )
30 31 32 33 34 35 36 37 38 39
{
    int i, kb;
    int i1, i3;
    int nq, nw;
    int ic = 0;
    int jc = 0;
    int ni = N;
    int mi = M;

    /* Check input arguments */
40
    if ((side != ChamLeft) && (side != ChamRight)) {
41 42 43 44 45
        return -1;
    }
    /*
     * NQ is the order of Q and NW is the minimum dimension of WORK
     */
46
    if (side == ChamLeft) {
47 48 49 50 51 52 53 54
        nq = M;
        nw = N;
    }
    else {
        nq = N;
        nw = M;
    }

55
    if ((trans != ChamNoTrans) && (trans != ChamConjTrans)) {
56 57 58 59 60 61 62 63 64 65 66 67 68 69
        return -2;
    }
    if (M < 0) {
        return -3;
    }
    if (N < 0) {
        return -4;
    }
    if ((K < 0) || (K > nq)) {
        return -5;
    }
    if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
        return -6;
    }
70
    if ((LDA < chameleon_max(1,K)) && (K > 0)) {
71 72
        return -8;
    }
73
    if ((LDC < chameleon_max(1,M)) && (M > 0)) {
74 75
        return -12;
    }
76
    if ((LDWORK < chameleon_max(1,nw)) && (nw > 0)) {
77 78 79 80 81
        return -14;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (K == 0))
82
        return CHAMELEON_SUCCESS;
83

84 85
    if (((side == ChamLeft) && (trans == ChamNoTrans))
        || ((side == ChamRight) && (trans != ChamNoTrans))) {
86 87 88 89 90 91 92 93
        i1 = 0;
        i3 = IB;
    }
    else {
        i1 = ( ( K-1 ) / IB )*IB;
        i3 = -IB;
    }

94 95
    if( trans == ChamNoTrans) {
        trans = ChamConjTrans;
96 97
    }
    else {
98
        trans = ChamNoTrans;
99 100 101
    }

    for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
102
        kb = chameleon_min(IB, K-i);
103

104
        if (side == ChamLeft) {
105 106 107 108 109 110 111 112 113 114 115 116 117 118
            /*
             * H or H' is applied to C(i:m,1:n)
             */
            mi = M - i;
            ic = i;
        }
        else {
            /*
             * H or H' is applied to C(1:m,i:n)
             */
            ni = N - i;
            jc = i;
        }

119
        CUDA_zlarfb( side, trans, ChamDirForward, ChamRowwise,
120 121 122 123 124
                     mi, ni, kb,
                     A + LDA * i  + i,  LDA,
                     T + LDT * i,       LDT,
                     C + LDC * jc + ic, LDC,
                     WORK, LDWORK, CUBLAS_STREAM_VALUE);
125
    }
126
    return CHAMELEON_SUCCESS;
127
}
128