diff --git a/cudablas/compute/cuda_zherfb.c b/cudablas/compute/cuda_zherfb.c new file mode 100644 index 0000000000000000000000000000000000000000..4563d938864b54b4ec0bec0fb9d7c07f0c806790 --- /dev/null +++ b/cudablas/compute/cuda_zherfb.c @@ -0,0 +1,92 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file cuda_zherfb.c + * + * MORSE cudablas kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver, + * and INRIA Bordeaux Sud-Ouest + * + * @author Florent Pruvost + * @date 2015-09-16 + * @precisions normal z -> c d s + * + **/ +#include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" + +int +CUDA_zherfb( MORSE_enum uplo, int n, + int k, int ib, int nb, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *T, int ldt, + cuDoubleComplex *C, int ldc, + cuDoubleComplex *WORK, int ldwork, + CUBLAS_STREAM_PARAM ) +{ + /* Check input arguments */ + if ((uplo != MorseUpper) && (uplo != MorseLower)) { + cudablas_error(1, "Illegal value of uplo"); + return -1; + } + if (n < 0) { + cudablas_error(2, "Illegal value of n"); + return -2; + } + if (k < 0) { + cudablas_error(3, "Illegal value of k"); + return -3; + } + if (ib < 0) { + cudablas_error(4, "Illegal value of ib"); + return -4; + } + if (nb < 0) { + cudablas_error(5, "Illegal value of nb"); + return -5; + } + if ( (lda < chameleon_max(1,n)) && (n > 0) ) { + cudablas_error(7, "Illegal value of lda"); + return -7; + } + if ( (ldt < chameleon_max(1,ib)) && (ib > 0) ) { + cudablas_error(9, "Illegal value of ldt"); + return -9; + } + if ( (ldc < chameleon_max(1,n)) && (n > 0) ) { + cudablas_error(11, "Illegal value of ldc"); + return -11; + } + + if (uplo == MorseLower) { + /* Left */ + CUDA_zunmqrt( MorseLeft, MorseConjTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + /* Right */ + CUDA_zunmqrt( MorseRight, MorseNoTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + } + else { + /* Right */ + CUDA_zunmlqt( MorseRight, MorseConjTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + /* Left */ + CUDA_zunmlqt( MorseLeft, MorseNoTrans, n, n, k, ib, + A, lda, T, ldt, C, ldc, WORK, ldwork, + CUBLAS_STREAM_VALUE ); + } + return 0; +} diff --git a/cudablas/compute/cuda_ztpmqrt.c b/cudablas/compute/cuda_ztpmqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..b9f6afac4cabd9a66c017c29e16abad91e7a2b97 --- /dev/null +++ b/cudablas/compute/cuda_ztpmqrt.c @@ -0,0 +1,83 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file cuda_ztpmqrt.c + * + * MORSE cudablas kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver, + * and INRIA Bordeaux Sud-Ouest + * + * @author Florent Pruvost + * @date 2015-09-16 + * @precisions normal z -> c d s + * + **/ +#include "cudablas/include/cudablas.h" +#include "cudablas/include/cudablas_z.h" + +int +CUDA_ztpmqrt( MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int IB, + const cuDoubleComplex *V, int LDV, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *A, int LDA, + cuDoubleComplex *B, int LDB, + cuDoubleComplex *WORK, + CUBLAS_STREAM_PARAM ) +{ + int m1, n1, ldwork, ldworkc, ws; + + /* Check input arguments */ + if ((side != MorseLeft) && (side != MorseRight)) { + cudablas_error(1, "Illegal value of side"); + return -1; + } + + if ( side == MorseLeft ) { + m1 = K; + n1 = N; + ldwork = IB; + ldworkc = M; + ws = K * n1; + } + else { + m1 = M; + n1 = K; + ldwork = m1; + ldworkc = IB; + ws = m1 * K; + } + + /* TS case */ + if (L == 0) { + CUDA_ztsmqr( side, trans, m1, n1, M, N, K, IB, + A, LDA, B, LDB, V, LDV, T, LDT, + WORK, ldwork, WORK + ws, ldworkc, + CUBLAS_STREAM_VALUE ); + } + /* TT case */ + else if( L == M ) { + cudablas_error(-6, "TTMQRT not available on GPU yet\n" ); + return -6; + /* CUDA_zttmqr( side, trans, m1, n1, M, N, K, IB, */ + /* A, LDA, B, LDB, V, LDV, T, LDT, */ + /* WORK, ldwork ); */ + } + else { + cudablas_error(-6, "TPMQRT not available on GPU yet\n" ); + return -6; + //LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK ); + } + + return MORSE_SUCCESS; +}