Commit a8954d6f authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Add missing cuda kernels

parent 5639e1b8
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zherfb.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-16
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
#include "cudablas/include/cudablas_z.h"
int
CUDA_zherfb( MORSE_enum uplo, int n,
int k, int ib, int nb,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *T, int ldt,
cuDoubleComplex *C, int ldc,
cuDoubleComplex *WORK, int ldwork,
CUBLAS_STREAM_PARAM )
{
/* Check input arguments */
if ((uplo != MorseUpper) && (uplo != MorseLower)) {
cudablas_error(1, "Illegal value of uplo");
return -1;
}
if (n < 0) {
cudablas_error(2, "Illegal value of n");
return -2;
}
if (k < 0) {
cudablas_error(3, "Illegal value of k");
return -3;
}
if (ib < 0) {
cudablas_error(4, "Illegal value of ib");
return -4;
}
if (nb < 0) {
cudablas_error(5, "Illegal value of nb");
return -5;
}
if ( (lda < chameleon_max(1,n)) && (n > 0) ) {
cudablas_error(7, "Illegal value of lda");
return -7;
}
if ( (ldt < chameleon_max(1,ib)) && (ib > 0) ) {
cudablas_error(9, "Illegal value of ldt");
return -9;
}
if ( (ldc < chameleon_max(1,n)) && (n > 0) ) {
cudablas_error(11, "Illegal value of ldc");
return -11;
}
if (uplo == MorseLower) {
/* Left */
CUDA_zunmqrt( MorseLeft, MorseConjTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
/* Right */
CUDA_zunmqrt( MorseRight, MorseNoTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
}
else {
/* Right */
CUDA_zunmlqt( MorseRight, MorseConjTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
/* Left */
CUDA_zunmlqt( MorseLeft, MorseNoTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
}
return 0;
}
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_ztpmqrt.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-16
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
#include "cudablas/include/cudablas_z.h"
int
CUDA_ztpmqrt( MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int IB,
const cuDoubleComplex *V, int LDV,
const cuDoubleComplex *T, int LDT,
cuDoubleComplex *A, int LDA,
cuDoubleComplex *B, int LDB,
cuDoubleComplex *WORK,
CUBLAS_STREAM_PARAM )
{
int m1, n1, ldwork, ldworkc, ws;
/* Check input arguments */
if ((side != MorseLeft) && (side != MorseRight)) {
cudablas_error(1, "Illegal value of side");
return -1;
}
if ( side == MorseLeft ) {
m1 = K;
n1 = N;
ldwork = IB;
ldworkc = M;
ws = K * n1;
}
else {
m1 = M;
n1 = K;
ldwork = m1;
ldworkc = IB;
ws = m1 * K;
}
/* TS case */
if (L == 0) {
CUDA_ztsmqr( side, trans, m1, n1, M, N, K, IB,
A, LDA, B, LDB, V, LDV, T, LDT,
WORK, ldwork, WORK + ws, ldworkc,
CUBLAS_STREAM_VALUE );
}
/* TT case */
else if( L == M ) {
cudablas_error(-6, "TTMQRT not available on GPU yet\n" );
return -6;
/* CUDA_zttmqr( side, trans, m1, n1, M, N, K, IB, */
/* A, LDA, B, LDB, V, LDV, T, LDT, */
/* WORK, ldwork ); */
}
else {
cudablas_error(-6, "TPMQRT not available on GPU yet\n" );
return -6;
//LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK );
}
return MORSE_SUCCESS;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment