diff --git a/cudablas/compute/CMakeLists.txt b/cudablas/compute/CMakeLists.txt index 06d25bd476f7d7f5286e40eefd583cee53d2de2a..39a9d20be5bb4f73d6892417af809b2db0f9be90 100644 --- a/cudablas/compute/CMakeLists.txt +++ b/cudablas/compute/CMakeLists.txt @@ -65,12 +65,20 @@ if( CHAMELEON_USE_MAGMA ) ) endif() -precisions_rules_py(CUDABLAS_SRCS_GENERATED "${ZSRC}" - PRECISIONS "${CHAMELEON_PRECISION}") +precisions_rules_py( + CUDABLAS_SRCS_GENERATED "${ZSRC}" + PRECISIONS "${CHAMELEON_PRECISION}") set(CUDABLAS_SRCS - ${CUDABLAS_SRCS_GENERATED} + ${CUDABLAS_SRCS_GENERATED} + ) + +if (CHAMELEON_USE_CUBLAS_V2) + set(CUDABLAS_SRCS + ${CUDABLAS_SRCS} + cudaglobal.c ) +endif (CHAMELEON_USE_CUBLAS_V2) # Compile step # ------------ diff --git a/cudablas/compute/cuda_zgemm.c b/cudablas/compute/cuda_zgemm.c index abc0a24f95a186cf080294ad61446e2ddd01f6f3..c5d0503b531443ab37b7dbeb127b3af672ec0250 100644 --- a/cudablas/compute/cuda_zgemm.c +++ b/cudablas/compute/cuda_zgemm.c @@ -35,7 +35,7 @@ int CUDA_zgemm(MORSE_enum transa, MORSE_enum transb, CUBLAS_STREAM_PARAM) { cublasZgemm(CUBLAS_HANDLE - morse_lapack_const(transa), morse_lapack_const(transb), + morse_cublas_const(transa), morse_cublas_const(transb), m, n, k, CUBLAS_VALUE(alpha), A, lda, B, ldb, diff --git a/cudablas/compute/cuda_zhemm.c b/cudablas/compute/cuda_zhemm.c index 2727223796d4791d27b25ca424678c4bcb5437c7..5cb93159e26ddbe5ddc3fb2608f1436b7dcf9f79 100644 --- a/cudablas/compute/cuda_zhemm.c +++ b/cudablas/compute/cuda_zhemm.c @@ -35,7 +35,7 @@ int CUDA_zhemm(MORSE_enum side, MORSE_enum uplo, CUBLAS_STREAM_PARAM) { cublasZhemm(CUBLAS_HANDLE - morse_lapack_const(side), morse_lapack_const(uplo), + morse_cublas_const(side), morse_cublas_const(uplo), m, n, CUBLAS_VALUE(alpha), A, lda, B, ldb, diff --git a/cudablas/compute/cuda_zher2k.c b/cudablas/compute/cuda_zher2k.c index 204a41e64eb8c9d669a224bf7d8574229173ab0f..206d0d50dc8e13dbf6aa55b528808088d057a11a 100644 --- a/cudablas/compute/cuda_zher2k.c +++ b/cudablas/compute/cuda_zher2k.c @@ -35,7 +35,7 @@ int CUDA_zher2k(MORSE_enum uplo, MORSE_enum trans, CUBLAS_STREAM_PARAM) { cublasZher2k(CUBLAS_HANDLE - morse_lapack_const(uplo), morse_lapack_const(trans), + morse_cublas_const(uplo), morse_cublas_const(trans), n, k, CUBLAS_VALUE(alpha), A, lda, B, ldb, diff --git a/cudablas/compute/cuda_zherk.c b/cudablas/compute/cuda_zherk.c index 1d36498a6e26447f03517321b393a13ebac5da97..f5eec6d1721efefdce0f5a8ce974887b00c9b1c6 100644 --- a/cudablas/compute/cuda_zherk.c +++ b/cudablas/compute/cuda_zherk.c @@ -33,11 +33,11 @@ int CUDA_zherk( MORSE_enum uplo, MORSE_enum trans, cuDoubleComplex *B, int ldb, CUBLAS_STREAM_PARAM) { - cublasZherk( - morse_lapack_const(uplo), morse_lapack_const(trans), - n, k, - *alpha, A, lda, - *beta, B, ldb); + cublasZherk( CUBLAS_HANDLE + morse_cublas_const(uplo), morse_cublas_const(trans), + n, k, + CUBLAS_VALUE(alpha), A, lda, + CUBLAS_VALUE(beta), B, ldb); assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); diff --git a/cudablas/compute/cuda_zlarfb.c b/cudablas/compute/cuda_zlarfb.c index dac0174395c3fd5f5ebbb9e37afc43c809b2ee60..0faee149e21d22aa9d4092da6aa5485997c5e0a1 100644 --- a/cudablas/compute/cuda_zlarfb.c +++ b/cudablas/compute/cuda_zlarfb.c @@ -103,23 +103,22 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, // W = C^H V cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(MorseConjTrans), morse_lapack_const(notransV), + morse_cublas_const(MorseConjTrans), morse_cublas_const(notransV), N, K, M, CUBLAS_SADDR(zone), C, LDC, V, LDV, CUBLAS_SADDR(zzero), WORK, LDWORK ); // W = W T^H = C^H V T^H - cublasZtrmm( CUBLAS_HANDLE - morse_lapack_const(MorseRight), morse_lapack_const(uplo), - morse_lapack_const(transT), morse_lapack_const(MorseNonUnit), - N, K, CUBLAS_SADDR(zone), - T, LDT, - WORK, LDWORK); + CUDA_ztrmm( MorseRight, uplo, transT, MorseNonUnit, + N, K, + CUBLAS_SADDR(zone), T, LDT, + WORK, LDWORK, + CUBLAS_STREAM_VALUE ); // C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(notransV), morse_lapack_const(MorseConjTrans), + morse_cublas_const(notransV), morse_cublas_const(MorseConjTrans), M, N, K, CUBLAS_SADDR(mzone), V, LDV, WORK, LDWORK, @@ -131,23 +130,22 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, // W = C V cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(MorseNoTrans), morse_lapack_const(notransV), + morse_cublas_const(MorseNoTrans), morse_cublas_const(notransV), M, K, N, CUBLAS_SADDR(zone), C, LDC, V, LDV, CUBLAS_SADDR(zzero), WORK, LDWORK ); // W = W T = C V T - cublasZtrmm( CUBLAS_HANDLE - morse_lapack_const(MorseRight), morse_lapack_const(uplo), - morse_lapack_const(trans), morse_lapack_const(MorseNonUnit), - M, K, CUBLAS_SADDR(zone), - T, LDT, - WORK, LDWORK); + CUDA_ztrmm( MorseRight, uplo, trans, MorseNonUnit, + M, K, + CUBLAS_SADDR(zone), T, LDT, + WORK, LDWORK, + CUBLAS_STREAM_VALUE ); // C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(MorseNoTrans), morse_lapack_const(transV), + morse_cublas_const(MorseNoTrans), morse_cublas_const(transV), M, N, K, CUBLAS_SADDR(mzone), WORK, LDWORK, V, LDV, diff --git a/cudablas/compute/cuda_zparfb.c b/cudablas/compute/cuda_zparfb.c index edca0c8d7e13b14995ac2c74857cdfa5106e38ca..e3e29f9569d37451b8741861a238315ed122115b 100644 --- a/cudablas/compute/cuda_zparfb.c +++ b/cudablas/compute/cuda_zparfb.c @@ -243,7 +243,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, transA2 = storev == MorseColumnwise ? MorseNoTrans : MorseConjTrans; cublasZgemm(CUBLAS_HANDLE - morse_lapack_const(transW), morse_lapack_const(MorseNoTrans), + morse_cublas_const(transW), morse_cublas_const(MorseNoTrans), K, N1, M2, CUBLAS_SADDR(zone), V /* K*M2 */, LDV, @@ -253,14 +253,11 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, if (WORKC == NULL) { /* W = op(T) * W */ - cublasZtrmm( CUBLAS_HANDLE - morse_lapack_const(MorseLeft), morse_lapack_const(MorseUpper), - morse_lapack_const(trans), morse_lapack_const(MorseNonUnit), - K, N2, - CUBLAS_SADDR(zone), - T, LDT, - WORK, LDWORK); - + CUDA_ztrmm( MorseLeft, MorseUpper, trans, MorseNonUnit, + K, N2, + CUBLAS_SADDR(zone), T, LDT, + WORK, LDWORK, + CUBLAS_STREAM_VALUE ); /* A1 = A1 - W = A1 - op(T) * W */ for(j = 0; j < N1; j++) { @@ -272,7 +269,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, /* A2 = A2 - op(V) * W */ cublasZgemm(CUBLAS_HANDLE - morse_lapack_const(transA2), morse_lapack_const(MorseNoTrans), + morse_cublas_const(transA2), morse_cublas_const(MorseNoTrans), M2, N2, K, CUBLAS_SADDR(mzone), V /* M2*K */, LDV, WORK /* K*N2 */, LDWORK, @@ -281,7 +278,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, } else { /* Wc = V * op(T) */ cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(transA2), morse_lapack_const(trans), + morse_cublas_const(transA2), morse_cublas_const(trans), M2, K, K, CUBLAS_SADDR(zone), V, LDV, T, LDT, @@ -289,7 +286,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, /* A1 = A1 - opt(T) * W */ cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(trans), morse_lapack_const(MorseNoTrans), + morse_cublas_const(trans), morse_cublas_const(MorseNoTrans), K, N1, K, CUBLAS_SADDR(mzone), T, LDT, WORK, LDWORK, @@ -297,7 +294,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, /* A2 = A2 - Wc * W */ cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(MorseNoTrans), morse_lapack_const(MorseNoTrans), + morse_cublas_const(MorseNoTrans), morse_cublas_const(MorseNoTrans), M2, N2, K, CUBLAS_SADDR(mzone), WORKC, LDWORKC, WORK, LDWORK, @@ -328,7 +325,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, transA2 = storev == MorseColumnwise ? MorseConjTrans : MorseNoTrans; cublasZgemm(CUBLAS_HANDLE - morse_lapack_const(MorseNoTrans), morse_lapack_const(transW), + morse_cublas_const(MorseNoTrans), morse_cublas_const(transW), M1, K, N2, CUBLAS_SADDR(zone), A2 /* M1*N2 */, LDA2, V /* N2*K */, LDV, @@ -336,14 +333,11 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, if (WORKC == NULL) { /* W = W * op(T) */ - cublasZtrmm( CUBLAS_HANDLE - morse_lapack_const(MorseRight), morse_lapack_const(MorseUpper), - morse_lapack_const(trans), morse_lapack_const(MorseNonUnit), - M2, K, - CUBLAS_SADDR(zone), - T, LDT, - WORK, LDWORK); - + CUDA_ztrmm( MorseRight, MorseUpper, trans, MorseNonUnit, + M2, K, + CUBLAS_SADDR(zone), T, LDT, + WORK, LDWORK, + CUBLAS_STREAM_VALUE ); /* A1 = A1 - W = A1 - W * op(T) */ for(j = 0; j < K; j++) { @@ -355,7 +349,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, /* A2 = A2 - W * op(V) */ cublasZgemm(CUBLAS_HANDLE - morse_lapack_const(MorseNoTrans), morse_lapack_const(transA2), + morse_cublas_const(MorseNoTrans), morse_cublas_const(transA2), M2, N2, K, CUBLAS_SADDR(mzone), WORK /* M2*K */, LDWORK, V /* K*N2 */, LDV, @@ -364,7 +358,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, } else { /* A1 = A1 - W * opt(T) */ cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(MorseNoTrans), morse_lapack_const(trans), + morse_cublas_const(MorseNoTrans), morse_cublas_const(trans), M1, K, K, CUBLAS_SADDR(mzone), WORK, LDWORK, T, LDT, @@ -372,7 +366,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, /* Wc = op(T) * V */ cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(trans), morse_lapack_const(transA2), + morse_cublas_const(trans), morse_cublas_const(transA2), K, N2, K, CUBLAS_SADDR(zone), T, LDT, V, LDV, @@ -380,7 +374,7 @@ CUDA_zparfb(MORSE_enum side, MORSE_enum trans, /* A2 = A2 - W * Wc */ cublasZgemm( CUBLAS_HANDLE - morse_lapack_const(MorseNoTrans), morse_lapack_const(MorseNoTrans), + morse_cublas_const(MorseNoTrans), morse_cublas_const(MorseNoTrans), M2, N2, K, CUBLAS_SADDR(mzone), WORK, LDWORK, WORKC, LDWORKC, diff --git a/cudablas/compute/cuda_zsymm.c b/cudablas/compute/cuda_zsymm.c index 4b448dd0371f070123d7700f29c6c89efc9e5b0e..5213565edb79207844345170f9de2cb8549a7d1b 100644 --- a/cudablas/compute/cuda_zsymm.c +++ b/cudablas/compute/cuda_zsymm.c @@ -35,7 +35,7 @@ int CUDA_zsymm(MORSE_enum side, MORSE_enum uplo, CUBLAS_STREAM_PARAM) { cublasZsymm(CUBLAS_HANDLE - morse_lapack_const(side), morse_lapack_const(uplo), + morse_cublas_const(side), morse_cublas_const(uplo), m, n, CUBLAS_VALUE(alpha), A, lda, B, ldb, diff --git a/cudablas/compute/cuda_zsyr2k.c b/cudablas/compute/cuda_zsyr2k.c index 64e4f4be11b3f0da94b2e0700129aa5c16d77a6f..c80babbad1e968f721881b630a18515ce25de51a 100644 --- a/cudablas/compute/cuda_zsyr2k.c +++ b/cudablas/compute/cuda_zsyr2k.c @@ -36,7 +36,7 @@ int CUDA_zsyr2k( CUBLAS_STREAM_PARAM) { cublasZsyr2k(CUBLAS_HANDLE - morse_lapack_const(uplo), morse_lapack_const(trans), + morse_cublas_const(uplo), morse_cublas_const(trans), n, k, CUBLAS_VALUE(alpha), A, lda, B, ldb, diff --git a/cudablas/compute/cuda_zsyrk.c b/cudablas/compute/cuda_zsyrk.c index b7e10c547d766c61c1799952b7b473053e6db62e..11d18ea1556b15b3511ad20b50451c5cd76c06d3 100644 --- a/cudablas/compute/cuda_zsyrk.c +++ b/cudablas/compute/cuda_zsyrk.c @@ -34,7 +34,7 @@ int CUDA_zsyrk(MORSE_enum uplo, MORSE_enum trans, CUBLAS_STREAM_PARAM) { cublasZsyrk(CUBLAS_HANDLE - morse_lapack_const(uplo), morse_lapack_const(trans), + morse_cublas_const(uplo), morse_cublas_const(trans), n, k, CUBLAS_VALUE(alpha), A, lda, CUBLAS_VALUE(beta), C, ldc); diff --git a/cudablas/compute/cuda_ztrmm.c b/cudablas/compute/cuda_ztrmm.c index 7d87dfb397aff7e6ccb657338b9c777fc62b2836..349a6f98a02597c81b7dbec7cf395b3cd8498ffc 100644 --- a/cudablas/compute/cuda_ztrmm.c +++ b/cudablas/compute/cuda_ztrmm.c @@ -34,13 +34,29 @@ int CUDA_ztrmm( cuDoubleComplex *B, int ldb, CUBLAS_STREAM_PARAM) { - cublasZtrmm(CUBLAS_HANDLE - morse_lapack_const(side), morse_lapack_const(uplo), - morse_lapack_const(transa), morse_lapack_const(diag), + +#if defined(CHAMELEON_USE_CUBLAS_V2) + + cublasZtrmm( + CUBLAS_HANDLE + morse_cublas_const(side), morse_cublas_const(uplo), + morse_cublas_const(transa), morse_cublas_const(diag), m, n, CUBLAS_VALUE(alpha), A, lda, + B, ldb, B, ldb); +#else + + cublasZtrmm( + CUBLAS_HANDLE + morse_cublas_const(side), morse_cublas_const(uplo), + morse_cublas_const(transa), morse_cublas_const(diag), + m, n, + CUBLAS_VALUE(alpha), A, lda, + B, ldb); +#endif + assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); return MORSE_SUCCESS; diff --git a/cudablas/compute/cuda_ztrsm.c b/cudablas/compute/cuda_ztrsm.c index 6953ccc7080073de22bb3cb4ac8582eaa80f0120..ae485b751b79ea71b04fbe9ce34c3aab08d97b36 100644 --- a/cudablas/compute/cuda_ztrsm.c +++ b/cudablas/compute/cuda_ztrsm.c @@ -34,8 +34,8 @@ int CUDA_ztrsm(MORSE_enum side, MORSE_enum uplo, CUBLAS_STREAM_PARAM) { cublasZtrsm(CUBLAS_HANDLE - morse_lapack_const(side), morse_lapack_const(uplo), - morse_lapack_const(transa), morse_lapack_const(diag), + morse_cublas_const(side), morse_cublas_const(uplo), + morse_cublas_const(transa), morse_cublas_const(diag), m, n, CUBLAS_VALUE(alpha), A, lda, B, ldb); diff --git a/cudablas/compute/cudaglobal.c b/cudablas/compute/cudaglobal.c new file mode 100644 index 0000000000000000000000000000000000000000..597501e9e45318c282226525dc2367afece94873 --- /dev/null +++ b/cudablas/compute/cudaglobal.c @@ -0,0 +1,127 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ +/** + * + * @file cudaglobal.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2017-04-06 + * + **/ +#include "cudablas/include/cudablas.h" + +/******************************************************************************* + * LAPACK Constants + **/ +int morse_cublas_constants[] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 100 + 0, // 101: MorseRowMajor + 0, // 102: MorseColMajor + 0, 0, 0, 0, 0, 0, 0, 0, + CUBLAS_OP_N, // 111: MorseNoTrans + CUBLAS_OP_T, // 112: MorseTrans + CUBLAS_OP_C, // 113: MorseConjTrans + 0, 0, 0, 0, 0, 0, 0, + CUBLAS_FILL_MODE_UPPER, // 121: MorseUpper + CUBLAS_FILL_MODE_LOWER, // 122: MorseLower + 0, // 123: MorseUpperLower + 0, 0, 0, 0, 0, 0, 0, + CUBLAS_DIAG_NON_UNIT, // 131: MorseNonUnit + CUBLAS_DIAG_UNIT, // 132: MorseUnit + 0, 0, 0, 0, 0, 0, 0, 0, + CUBLAS_SIDE_LEFT, // 141: MorseLeft + CUBLAS_SIDE_RIGHT, // 142: MorseRight + 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 151: + 0, // 152: + 0, // 153: + 0, // 154: + 0, // 155: + 0, // 156: + 0, // 157: MorseEps + 0, // 158: + 0, // 159: + 0, // 160: + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 171: MorseOneNorm + 0, // 172: MorseRealOneNorm + 0, // 173: MorseTwoNorm + 0, // 174: MorseFrobeniusNorm + 0, // 175: MorseInfNorm + 0, // 176: MorseRealInfNorm + 0, // 177: MorseMaxNorm + 0, // 178: MorseRealMaxNorm + 0, // 179 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 200 + 0, // 201: MorseDistUniform + 0, // 202: MorseDistSymmetric + 0, // 203: MorseDistNormal + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 240 + 0, // 241 MorseHermGeev + 0, // 242 MorseHermPoev + 0, // 243 MorseNonsymPosv + 0, // 244 MorseSymPosv + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 290 + 0, // 291 MorseNoPacking + 0, // 292 MorsePackSubdiag + 0, // 293 MorsePackSupdiag + 0, // 294 MorsePackColumn + 0, // 295 MorsePackRow + 0, // 296 MorsePackLowerBand + 0, // 297 MorsePackUpeprBand + 0, // 298 MorsePackAll + 0, // 299 + 0, // 300 + 0, // 301 MorseNoVec + 0, // 302 MorseVec + 0, // 303 MorseIvec + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 390 + 0, // 391 + 0, // 392 + 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 401 + 0, // 402 + 0, 0, 0, 0, 0, 0, 0, 0 // Remember to add a coma! +}; diff --git a/cudablas/include/cudablas.h b/cudablas/include/cudablas.h index d6c2004fe9bfa69e0b7ddb1fe9accb2f23f521e1..b181fc8332a36c1024f80c97d175e29f4ad1e0ae 100644 --- a/cudablas/include/cudablas.h +++ b/cudablas/include/cudablas.h @@ -41,7 +41,9 @@ #if defined(CHAMELEON_USE_CUBLAS_V2) +#include <cublas.h> #include <cublas_v2.h> + #define CUBLAS_STREAM_PARAM cublasHandle_t handle #define CUBLAS_STREAM_VALUE handle #define CUBLAS_HANDLE handle, @@ -96,4 +98,12 @@ extern char *morse_lapack_constants[]; #define morse_lapack_const(morse_const) morse_lapack_constants[morse_const][0] +extern int morse_cublas_constants[]; + +#if defined(CHAMELEON_USE_CUBLAS_V2) +#define morse_cublas_const(morse_const) morse_cublas_constants[morse_const] +#else +#define morse_cublas_const(morse_const) morse_lapack_constants[morse_const][0] +#endif + #endif