Commit d363ff0a authored by Mathieu Faverge's avatar Mathieu Faverge

Upgrade kernels to compile with API v2 (even with this crazy change in TRSM API .....)

parent e50caa38
......@@ -65,12 +65,20 @@ if( CHAMELEON_USE_MAGMA )
)
endif()
precisions_rules_py(CUDABLAS_SRCS_GENERATED "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}")
precisions_rules_py(
CUDABLAS_SRCS_GENERATED "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}")
set(CUDABLAS_SRCS
${CUDABLAS_SRCS_GENERATED}
${CUDABLAS_SRCS_GENERATED}
)
if (CHAMELEON_USE_CUBLAS_V2)
set(CUDABLAS_SRCS
${CUDABLAS_SRCS}
cudaglobal.c
)
endif (CHAMELEON_USE_CUBLAS_V2)
# Compile step
# ------------
......
......@@ -35,7 +35,7 @@ int CUDA_zgemm(MORSE_enum transa, MORSE_enum transb,
CUBLAS_STREAM_PARAM)
{
cublasZgemm(CUBLAS_HANDLE
morse_lapack_const(transa), morse_lapack_const(transb),
morse_cublas_const(transa), morse_cublas_const(transb),
m, n, k,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
......
......@@ -35,7 +35,7 @@ int CUDA_zhemm(MORSE_enum side, MORSE_enum uplo,
CUBLAS_STREAM_PARAM)
{
cublasZhemm(CUBLAS_HANDLE
morse_lapack_const(side), morse_lapack_const(uplo),
morse_cublas_const(side), morse_cublas_const(uplo),
m, n,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
......
......@@ -35,7 +35,7 @@ int CUDA_zher2k(MORSE_enum uplo, MORSE_enum trans,
CUBLAS_STREAM_PARAM)
{
cublasZher2k(CUBLAS_HANDLE
morse_lapack_const(uplo), morse_lapack_const(trans),
morse_cublas_const(uplo), morse_cublas_const(trans),
n, k,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
......
......@@ -33,11 +33,11 @@ int CUDA_zherk( MORSE_enum uplo, MORSE_enum trans,
cuDoubleComplex *B, int ldb,
CUBLAS_STREAM_PARAM)
{
cublasZherk(
morse_lapack_const(uplo), morse_lapack_const(trans),
n, k,
*alpha, A, lda,
*beta, B, ldb);
cublasZherk( CUBLAS_HANDLE
morse_cublas_const(uplo), morse_cublas_const(trans),
n, k,
CUBLAS_VALUE(alpha), A, lda,
CUBLAS_VALUE(beta), B, ldb);
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
......
......@@ -103,23 +103,22 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
// W = C^H V
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseConjTrans), morse_lapack_const(notransV),
morse_cublas_const(MorseConjTrans), morse_cublas_const(notransV),
N, K, M,
CUBLAS_SADDR(zone), C, LDC,
V, LDV,
CUBLAS_SADDR(zzero), WORK, LDWORK );
// W = W T^H = C^H V T^H
cublasZtrmm( CUBLAS_HANDLE
morse_lapack_const(MorseRight), morse_lapack_const(uplo),
morse_lapack_const(transT), morse_lapack_const(MorseNonUnit),
N, K, CUBLAS_SADDR(zone),
T, LDT,
WORK, LDWORK);
CUDA_ztrmm( MorseRight, uplo, transT, MorseNonUnit,
N, K,
CUBLAS_SADDR(zone), T, LDT,
WORK, LDWORK,
CUBLAS_STREAM_VALUE );
// C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(notransV), morse_lapack_const(MorseConjTrans),
morse_cublas_const(notransV), morse_cublas_const(MorseConjTrans),
M, N, K,
CUBLAS_SADDR(mzone), V, LDV,
WORK, LDWORK,
......@@ -131,23 +130,22 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
// W = C V
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(notransV),
morse_cublas_const(MorseNoTrans), morse_cublas_const(notransV),
M, K, N,
CUBLAS_SADDR(zone), C, LDC,
V, LDV,
CUBLAS_SADDR(zzero), WORK, LDWORK );
// W = W T = C V T
cublasZtrmm( CUBLAS_HANDLE
morse_lapack_const(MorseRight), morse_lapack_const(uplo),
morse_lapack_const(trans), morse_lapack_const(MorseNonUnit),
M, K, CUBLAS_SADDR(zone),
T, LDT,
WORK, LDWORK);
CUDA_ztrmm( MorseRight, uplo, trans, MorseNonUnit,
M, K,
CUBLAS_SADDR(zone), T, LDT,
WORK, LDWORK,
CUBLAS_STREAM_VALUE );
// C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(transV),
morse_cublas_const(MorseNoTrans), morse_cublas_const(transV),
M, N, K,
CUBLAS_SADDR(mzone), WORK, LDWORK,
V, LDV,
......
......@@ -243,7 +243,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
transA2 = storev == MorseColumnwise ? MorseNoTrans : MorseConjTrans;
cublasZgemm(CUBLAS_HANDLE
morse_lapack_const(transW), morse_lapack_const(MorseNoTrans),
morse_cublas_const(transW), morse_cublas_const(MorseNoTrans),
K, N1, M2,
CUBLAS_SADDR(zone),
V /* K*M2 */, LDV,
......@@ -253,14 +253,11 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
if (WORKC == NULL) {
/* W = op(T) * W */
cublasZtrmm( CUBLAS_HANDLE
morse_lapack_const(MorseLeft), morse_lapack_const(MorseUpper),
morse_lapack_const(trans), morse_lapack_const(MorseNonUnit),
K, N2,
CUBLAS_SADDR(zone),
T, LDT,
WORK, LDWORK);
CUDA_ztrmm( MorseLeft, MorseUpper, trans, MorseNonUnit,
K, N2,
CUBLAS_SADDR(zone), T, LDT,
WORK, LDWORK,
CUBLAS_STREAM_VALUE );
/* A1 = A1 - W = A1 - op(T) * W */
for(j = 0; j < N1; j++) {
......@@ -272,7 +269,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
/* A2 = A2 - op(V) * W */
cublasZgemm(CUBLAS_HANDLE
morse_lapack_const(transA2), morse_lapack_const(MorseNoTrans),
morse_cublas_const(transA2), morse_cublas_const(MorseNoTrans),
M2, N2, K,
CUBLAS_SADDR(mzone), V /* M2*K */, LDV,
WORK /* K*N2 */, LDWORK,
......@@ -281,7 +278,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
} else {
/* Wc = V * op(T) */
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(transA2), morse_lapack_const(trans),
morse_cublas_const(transA2), morse_cublas_const(trans),
M2, K, K,
CUBLAS_SADDR(zone), V, LDV,
T, LDT,
......@@ -289,7 +286,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
/* A1 = A1 - opt(T) * W */
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(trans), morse_lapack_const(MorseNoTrans),
morse_cublas_const(trans), morse_cublas_const(MorseNoTrans),
K, N1, K,
CUBLAS_SADDR(mzone), T, LDT,
WORK, LDWORK,
......@@ -297,7 +294,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
/* A2 = A2 - Wc * W */
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(MorseNoTrans),
morse_cublas_const(MorseNoTrans), morse_cublas_const(MorseNoTrans),
M2, N2, K,
CUBLAS_SADDR(mzone), WORKC, LDWORKC,
WORK, LDWORK,
......@@ -328,7 +325,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
transA2 = storev == MorseColumnwise ? MorseConjTrans : MorseNoTrans;
cublasZgemm(CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(transW),
morse_cublas_const(MorseNoTrans), morse_cublas_const(transW),
M1, K, N2,
CUBLAS_SADDR(zone), A2 /* M1*N2 */, LDA2,
V /* N2*K */, LDV,
......@@ -336,14 +333,11 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
if (WORKC == NULL) {
/* W = W * op(T) */
cublasZtrmm( CUBLAS_HANDLE
morse_lapack_const(MorseRight), morse_lapack_const(MorseUpper),
morse_lapack_const(trans), morse_lapack_const(MorseNonUnit),
M2, K,
CUBLAS_SADDR(zone),
T, LDT,
WORK, LDWORK);
CUDA_ztrmm( MorseRight, MorseUpper, trans, MorseNonUnit,
M2, K,
CUBLAS_SADDR(zone), T, LDT,
WORK, LDWORK,
CUBLAS_STREAM_VALUE );
/* A1 = A1 - W = A1 - W * op(T) */
for(j = 0; j < K; j++) {
......@@ -355,7 +349,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
/* A2 = A2 - W * op(V) */
cublasZgemm(CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(transA2),
morse_cublas_const(MorseNoTrans), morse_cublas_const(transA2),
M2, N2, K,
CUBLAS_SADDR(mzone), WORK /* M2*K */, LDWORK,
V /* K*N2 */, LDV,
......@@ -364,7 +358,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
} else {
/* A1 = A1 - W * opt(T) */
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(trans),
morse_cublas_const(MorseNoTrans), morse_cublas_const(trans),
M1, K, K,
CUBLAS_SADDR(mzone), WORK, LDWORK,
T, LDT,
......@@ -372,7 +366,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
/* Wc = op(T) * V */
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(trans), morse_lapack_const(transA2),
morse_cublas_const(trans), morse_cublas_const(transA2),
K, N2, K,
CUBLAS_SADDR(zone), T, LDT,
V, LDV,
......@@ -380,7 +374,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans,
/* A2 = A2 - W * Wc */
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(MorseNoTrans),
morse_cublas_const(MorseNoTrans), morse_cublas_const(MorseNoTrans),
M2, N2, K,
CUBLAS_SADDR(mzone), WORK, LDWORK,
WORKC, LDWORKC,
......
......@@ -35,7 +35,7 @@ int CUDA_zsymm(MORSE_enum side, MORSE_enum uplo,
CUBLAS_STREAM_PARAM)
{
cublasZsymm(CUBLAS_HANDLE
morse_lapack_const(side), morse_lapack_const(uplo),
morse_cublas_const(side), morse_cublas_const(uplo),
m, n,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
......
......@@ -36,7 +36,7 @@ int CUDA_zsyr2k(
CUBLAS_STREAM_PARAM)
{
cublasZsyr2k(CUBLAS_HANDLE
morse_lapack_const(uplo), morse_lapack_const(trans),
morse_cublas_const(uplo), morse_cublas_const(trans),
n, k,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
......
......@@ -34,7 +34,7 @@ int CUDA_zsyrk(MORSE_enum uplo, MORSE_enum trans,
CUBLAS_STREAM_PARAM)
{
cublasZsyrk(CUBLAS_HANDLE
morse_lapack_const(uplo), morse_lapack_const(trans),
morse_cublas_const(uplo), morse_cublas_const(trans),
n, k,
CUBLAS_VALUE(alpha), A, lda,
CUBLAS_VALUE(beta), C, ldc);
......
......@@ -34,13 +34,29 @@ int CUDA_ztrmm(
cuDoubleComplex *B, int ldb,
CUBLAS_STREAM_PARAM)
{
cublasZtrmm(CUBLAS_HANDLE
morse_lapack_const(side), morse_lapack_const(uplo),
morse_lapack_const(transa), morse_lapack_const(diag),
#if defined(CHAMELEON_USE_CUBLAS_V2)
cublasZtrmm(
CUBLAS_HANDLE
morse_cublas_const(side), morse_cublas_const(uplo),
morse_cublas_const(transa), morse_cublas_const(diag),
m, n,
CUBLAS_VALUE(alpha), A, lda,
B, ldb,
B, ldb);
#else
cublasZtrmm(
CUBLAS_HANDLE
morse_cublas_const(side), morse_cublas_const(uplo),
morse_cublas_const(transa), morse_cublas_const(diag),
m, n,
CUBLAS_VALUE(alpha), A, lda,
B, ldb);
#endif
assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
return MORSE_SUCCESS;
......
......@@ -34,8 +34,8 @@ int CUDA_ztrsm(MORSE_enum side, MORSE_enum uplo,
CUBLAS_STREAM_PARAM)
{
cublasZtrsm(CUBLAS_HANDLE
morse_lapack_const(side), morse_lapack_const(uplo),
morse_lapack_const(transa), morse_lapack_const(diag),
morse_cublas_const(side), morse_cublas_const(uplo),
morse_cublas_const(transa), morse_cublas_const(diag),
m, n,
CUBLAS_VALUE(alpha), A, lda,
B, ldb);
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file cudaglobal.c
*
* MORSE auxiliary routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2017-04-06
*
**/
#include "cudablas/include/cudablas.h"
/*******************************************************************************
* LAPACK Constants
**/
int morse_cublas_constants[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, // 100
0, // 101: MorseRowMajor
0, // 102: MorseColMajor
0, 0, 0, 0, 0, 0, 0, 0,
CUBLAS_OP_N, // 111: MorseNoTrans
CUBLAS_OP_T, // 112: MorseTrans
CUBLAS_OP_C, // 113: MorseConjTrans
0, 0, 0, 0, 0, 0, 0,
CUBLAS_FILL_MODE_UPPER, // 121: MorseUpper
CUBLAS_FILL_MODE_LOWER, // 122: MorseLower
0, // 123: MorseUpperLower
0, 0, 0, 0, 0, 0, 0,
CUBLAS_DIAG_NON_UNIT, // 131: MorseNonUnit
CUBLAS_DIAG_UNIT, // 132: MorseUnit
0, 0, 0, 0, 0, 0, 0, 0,
CUBLAS_SIDE_LEFT, // 141: MorseLeft
CUBLAS_SIDE_RIGHT, // 142: MorseRight
0, 0, 0, 0, 0, 0, 0, 0,
0, // 151:
0, // 152:
0, // 153:
0, // 154:
0, // 155:
0, // 156:
0, // 157: MorseEps
0, // 158:
0, // 159:
0, // 160:
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, // 171: MorseOneNorm
0, // 172: MorseRealOneNorm
0, // 173: MorseTwoNorm
0, // 174: MorseFrobeniusNorm
0, // 175: MorseInfNorm
0, // 176: MorseRealInfNorm
0, // 177: MorseMaxNorm
0, // 178: MorseRealMaxNorm
0, // 179
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, // 200
0, // 201: MorseDistUniform
0, // 202: MorseDistSymmetric
0, // 203: MorseDistNormal
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, // 240
0, // 241 MorseHermGeev
0, // 242 MorseHermPoev
0, // 243 MorseNonsymPosv
0, // 244 MorseSymPosv
0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, // 290
0, // 291 MorseNoPacking
0, // 292 MorsePackSubdiag
0, // 293 MorsePackSupdiag
0, // 294 MorsePackColumn
0, // 295 MorsePackRow
0, // 296 MorsePackLowerBand
0, // 297 MorsePackUpeprBand
0, // 298 MorsePackAll
0, // 299
0, // 300
0, // 301 MorseNoVec
0, // 302 MorseVec
0, // 303 MorseIvec
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, // 390
0, // 391
0, // 392
0, 0, 0, 0, 0, 0, 0, 0,
0, // 401
0, // 402
0, 0, 0, 0, 0, 0, 0, 0 // Remember to add a coma!
};
......@@ -41,7 +41,9 @@
#if defined(CHAMELEON_USE_CUBLAS_V2)
#include <cublas.h>
#include <cublas_v2.h>
#define CUBLAS_STREAM_PARAM cublasHandle_t handle
#define CUBLAS_STREAM_VALUE handle
#define CUBLAS_HANDLE handle,
......@@ -96,4 +98,12 @@
extern char *morse_lapack_constants[];
#define morse_lapack_const(morse_const) morse_lapack_constants[morse_const][0]
extern int morse_cublas_constants[];
#if defined(CHAMELEON_USE_CUBLAS_V2)
#define morse_cublas_const(morse_const) morse_cublas_constants[morse_const]
#else
#define morse_cublas_const(morse_const) morse_lapack_constants[morse_const][0]
#endif
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment