Mentions légales du service

Skip to content
Snippets Groups Projects
cuda_ztsmlq.c 3.57 KiB
Newer Older
/**
 *
 * @copyright (c) 2009-2014 The University of Tennessee and The University
 *                          of Tennessee Research Foundation.
 *                          All rights reserved.
 * @copyright (c) 2012-2015 Inria. All rights reserved.
 * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
 *
 **/

/**
 *
 * @file cuda_ztsmlq.c
 *
 *  MORSE cudablas kernel
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver,
 *  and INRIA Bordeaux Sud-Ouest
 *
 * @author Florent Pruvost
 * @date 2015-09-16
 * @precisions normal z -> c d s
 *
 **/
#include "cudablas/include/cudablas.h"

int CUDA_ztsmlq(
        MORSE_enum side, MORSE_enum trans,
        int M1, int N1,
        int M2, int N2,
        int K, int IB,
              cuDoubleComplex *A1,    int LDA1,
              cuDoubleComplex *A2,    int LDA2,
        const cuDoubleComplex *V,     int LDV,
        const cuDoubleComplex *T,     int LDT,
              cuDoubleComplex *WORK,  int LDWORK,
              cuDoubleComplex *WORKC, int LDWORKC,
        CUBLAS_STREAM_PARAM)
{
    int i, i1, i3;
    int NW;
    int kb;
    int ic = 0;
    int jc = 0;
    int mi = M1;
    int ni = N1;

    /* Check input arguments */
    if ((side != MorseLeft) && (side != MorseRight)) {
        return -1;
    }

    /* NW is the minimum dimension of WORK */
    if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
        return -2;
    }
    if (M1 < 0) {
        return -3;
    }
    if (N1 < 0) {
        return -4;
    }
    if ( (M2 < 0) ||
         ( (M2 != M1) && (side == MorseRight) ) ){
         ( (N2 != N1) && (side == MorseLeft) ) ){
        ( (side == MorseLeft)  && (K > M1) ) ||
        ( (side == MorseRight) && (K > N1) ) ) {
        return -7;
    }
    if (IB < 0) {
        return -8;
    }
    if (LDA1 < max(1,M1)){
        return -10;
    }
    if (LDA2 < max(1,M2)){
        return -12;
    }
    if (LDV < max(1,K)){
        return -14;
    }
    if (LDT < max(1,IB)){
        return -16;
    }
    if (LDWORK < max(1,NW)){
        return -18;
    }

    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
    if (((side == MorseLeft) && (trans == MorseNoTrans))
        || ((side == MorseRight) && (trans != MorseNoTrans))) {
    if (trans == MorseNoTrans) {
        trans = MorseConjTrans;
    }

    for(i = i1; (i > -1) && (i < K); i += i3) {
        kb = min(IB, K-i);

            /*
             * H or H' is applied to C(i:m,1:n)
             */
            mi = M1 - i;
            ic = i;
        }
        else {
            /*
             * H or H' is applied to C(1:m,i:n)
             */
            ni = N1 - i;
            jc = i;
        }

        /*
         * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
         */
        CUDA_zparfb(
                side, trans, MorseForward, MorseRowwise,
                mi, ni, M2, N2, kb, 0,
                A1 + LDA1*jc+ic, LDA1,
                A2, LDA2,
                V + i, LDV,
                T + LDT*i, LDT,
                WORK, LDWORK, WORKC, LDWORKC, CUBLAS_STREAM_VALUE );