Mentions légales du service

Skip to content
Snippets Groups Projects
Commit a8954d6f authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Add missing cuda kernels

parent 5639e1b8
No related branches found
No related tags found
1 merge request!5Missing CUDA kernels, and fix many warnings
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_zherfb.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-16
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
#include "cudablas/include/cudablas_z.h"
int
CUDA_zherfb( MORSE_enum uplo, int n,
int k, int ib, int nb,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *T, int ldt,
cuDoubleComplex *C, int ldc,
cuDoubleComplex *WORK, int ldwork,
CUBLAS_STREAM_PARAM )
{
/* Check input arguments */
if ((uplo != MorseUpper) && (uplo != MorseLower)) {
cudablas_error(1, "Illegal value of uplo");
return -1;
}
if (n < 0) {
cudablas_error(2, "Illegal value of n");
return -2;
}
if (k < 0) {
cudablas_error(3, "Illegal value of k");
return -3;
}
if (ib < 0) {
cudablas_error(4, "Illegal value of ib");
return -4;
}
if (nb < 0) {
cudablas_error(5, "Illegal value of nb");
return -5;
}
if ( (lda < chameleon_max(1,n)) && (n > 0) ) {
cudablas_error(7, "Illegal value of lda");
return -7;
}
if ( (ldt < chameleon_max(1,ib)) && (ib > 0) ) {
cudablas_error(9, "Illegal value of ldt");
return -9;
}
if ( (ldc < chameleon_max(1,n)) && (n > 0) ) {
cudablas_error(11, "Illegal value of ldc");
return -11;
}
if (uplo == MorseLower) {
/* Left */
CUDA_zunmqrt( MorseLeft, MorseConjTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
/* Right */
CUDA_zunmqrt( MorseRight, MorseNoTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
}
else {
/* Right */
CUDA_zunmlqt( MorseRight, MorseConjTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
/* Left */
CUDA_zunmlqt( MorseLeft, MorseNoTrans, n, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork,
CUBLAS_STREAM_VALUE );
}
return 0;
}
/**
*
* @copyright (c) 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cuda_ztpmqrt.c
*
* MORSE cudablas kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver,
* and INRIA Bordeaux Sud-Ouest
*
* @author Florent Pruvost
* @date 2015-09-16
* @precisions normal z -> c d s
*
**/
#include "cudablas/include/cudablas.h"
#include "cudablas/include/cudablas_z.h"
int
CUDA_ztpmqrt( MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int IB,
const cuDoubleComplex *V, int LDV,
const cuDoubleComplex *T, int LDT,
cuDoubleComplex *A, int LDA,
cuDoubleComplex *B, int LDB,
cuDoubleComplex *WORK,
CUBLAS_STREAM_PARAM )
{
int m1, n1, ldwork, ldworkc, ws;
/* Check input arguments */
if ((side != MorseLeft) && (side != MorseRight)) {
cudablas_error(1, "Illegal value of side");
return -1;
}
if ( side == MorseLeft ) {
m1 = K;
n1 = N;
ldwork = IB;
ldworkc = M;
ws = K * n1;
}
else {
m1 = M;
n1 = K;
ldwork = m1;
ldworkc = IB;
ws = m1 * K;
}
/* TS case */
if (L == 0) {
CUDA_ztsmqr( side, trans, m1, n1, M, N, K, IB,
A, LDA, B, LDB, V, LDV, T, LDT,
WORK, ldwork, WORK + ws, ldworkc,
CUBLAS_STREAM_VALUE );
}
/* TT case */
else if( L == M ) {
cudablas_error(-6, "TTMQRT not available on GPU yet\n" );
return -6;
/* CUDA_zttmqr( side, trans, m1, n1, M, N, K, IB, */
/* A, LDA, B, LDB, V, LDV, T, LDT, */
/* WORK, ldwork ); */
}
else {
cudablas_error(-6, "TPMQRT not available on GPU yet\n" );
return -6;
//LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK );
}
return MORSE_SUCCESS;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment