Commit 18416866 authored by Mathieu Faverge's avatar Mathieu Faverge

It seems that almost everything is back

parent 3df078f5
......@@ -161,15 +161,15 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/cmake_modules/")
endif()
# Use intermediate variable since cmake_dependent_option doesn't have OR conditions
set(CHAMELEON_ENABLE_MPI OFF CACHE BOOL "Tells if MPI might be supported by the runtime")
set(CHAMELEON_ENABLE_MPI OFF CACHE INTERNAL "Tells if MPI might be supported by the runtime")
if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU )
set(CHAMELEON_ENABLE_MPI ON)
set(CHAMELEON_ENABLE_MPI ON FORCE)
endif()
# Use intermediate variable since cmake_dependent_option doesn't have OR conditions
set(CHAMELEON_ENABLE_CUDA OFF CACHE BOOL "Tells if CUDA might be supported by the runtime")
set(CHAMELEON_ENABLE_CUDA OFF CACHE INTERNAL "Tells if CUDA might be supported by the runtime")
if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU )
set(CHAMELEON_ENABLE_CUDA ON)
set(CHAMELEON_ENABLE_CUDA ON FORCE)
endif()
# Additional options
......
......@@ -26,6 +26,18 @@
#
###
option(CHAMELEON_COPY_DIAG
"This options enables the duplication of the diagonal tiles in some algorithm to avoid anti-dependencies on lower/upper triangular parts (Might be useful to StarPU)" ON)
mark_as_advanced(CHAMELEON_COPY_DIAG)
if ( CHAMELEON_SCHED_QUARK )
# No need for those extra diagonale tiles
set( CHAMELEON_COPY_DIAG OFF )
endif()
if (CHAMELEON_COPY_DIAG)
add_definitions(-DCHAMELEON_COPY_DIAG)
endif()
# Define the list of sources
# --------------------------
......
......@@ -53,7 +53,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
int k, m, n;
int ldak, ldam;
int tempkm, tempkn, tempmm, tempnn;
int ib;
int ib, minMT;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
......@@ -62,6 +62,12 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
ib = MORSE_IB;
if (A->m > A->n) {
minMT = A->nt;
} else {
minMT = A->mt;
}
/*
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
......
......@@ -87,9 +87,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
*/
ws_worker = max( ws_worker, ib * (ib + A->nb) );
ws_worker = max( ws_worker, ib * A->nb * 2 );
#endif
#if defined(CHAMELEON_USE_MAGMA)
/* Host space
*
* zgeqrt = ib * (A->nb+3*ib) + A->nb )
......
......@@ -143,13 +143,6 @@
#define MORSE_MPI_SIZE morse->mpi_comm_size
#endif
/*******************************************************************************
* Activate copy of diagonal tile (StarPU only) for some tile algorithms (pz)
**/
#if defined(CHAMELEON_SCHED_STARPU)
#define CHAMELEON_COPY_DIAG
#endif
/*******************************************************************************
* IPT internal define
**/
......
......@@ -102,7 +102,7 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_seq
void morse_pzgetrf_nopiv(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzgetrf_reclap(MORSE_desc_t *A, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzgetrf_rectil(MORSE_desc_t *A, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzhbcpy_t2bl(MORSE_enum uplo, MORSE_desc_t *A, MORSE_Complex64_t *AB, int LDAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzhbcpy_t2bl(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *AB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzhegst(MORSE_enum itype, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
#ifdef COMPLEX
void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -145,12 +145,13 @@ int MORSE_Finalize(void)
#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION)
magma_finalize();
#endif
morse_context_destroy();
#if defined(CHAMELEON_USE_MPI)
if (!morse->mpi_outer_init)
MPI_Finalize();
#endif
morse_context_destroy();
return MORSE_SUCCESS;
}
......
......@@ -177,6 +177,7 @@ inline static int morse_getrankof_2d(const MORSE_desc_t *desc, int m, int n)
**/
inline static int morse_getrankof_2d_diag(const MORSE_desc_t *desc, int m, int n)
{
assert( n == 0 );
return (m % desc->p) * desc->q + (m % desc->q);
}
......
......@@ -27,33 +27,40 @@
# ------------------------------------------------------
set(CUDABLAS_SRCS_GENERATED "")
set(ZSRC
cuda_zgelqt.c
cuda_zgemerge.c
cuda_zgemm.c
cuda_zgeqrt.c
cuda_zgessm.c
cuda_zgetrf.c
cuda_zhemm.c
cuda_zher2k.c
cuda_zherk.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_zsymm.c
cuda_zsyr2k.c
cuda_zsyrk.c
cuda_ztrmm.c
cuda_ztrsm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsmlq.c
cuda_ztsmqr.c
)
if( CHAMELEON_USE_MAGMA )
set(ZSRC
${ZSRC}
cuda_zgelqt.c
cuda_zgeqrt.c
cuda_zgessm.c
cuda_zgetrf.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsqrt.c
cuda_ztstrf.c
cuda_zunmlqt.c
cuda_zunmqrt.c
)
endif()
precisions_rules_py(CUDABLAS_SRCS_GENERATED "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}")
......
......@@ -24,28 +24,15 @@
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zgemerge( MORSE_enum side, MORSE_enum diag,
int M, int N, cuDoubleComplex *A, int LDA,
cuDoubleComplex *B, int LDB, CUBLAS_STREAM_PARAM)
int
CUDA_zgemerge( MORSE_enum side, MORSE_enum diag,
int M, int N,
cuDoubleComplex *A, int LDA,
cuDoubleComplex *B, int LDB,
CUBLAS_STREAM_PARAM)
{
int i, j;
magmaDoubleComplex *cola, *colb;
cublasHandle_t handle;
cublasStatus_t stat;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cuDoubleComplex *cola, *colb;
if (M < 0) {
return -1;
......@@ -60,55 +47,9 @@ int CUDA_zgemerge( MORSE_enum side, MORSE_enum diag,
return -7;
}
if (side == MagmaLeft){
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(handle, i+1, cola, 1, colb, 1);
cudaMemcpyAsync(colb , cola,
(i+1)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}else{
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(handle, M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync(colb+i , cola+i,
(M-i)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zgemerge(
magma_side_t side, magma_diag_t diag,
magma_int_t M, magma_int_t N,
magmaDoubleComplex *A, magma_int_t LDA,
magmaDoubleComplex *B, magma_int_t LDB,
CUstream stream)
{
int i, j;
magmaDoubleComplex *cola, *colb;
if (M < 0) {
return -1;
}
if (N < 0) {
return -2;
}
if ( (LDA < max(1,M)) && (M > 0) ) {
return -5;
}
if ( (LDB < max(1,M)) && (M > 0) ) {
return -7;
}
CUBLAS_GET_STREAM;
if (side == MagmaLeft){
if (side == MorseLeft){
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
......@@ -128,5 +69,3 @@ int CUDA_zgemerge(
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif
......@@ -55,7 +55,7 @@ int CUDA_ztsmlq(
NW = IB;
}
else {
NW = M1;
NW = N1;
}
if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
......
......@@ -194,8 +194,8 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -129,11 +129,9 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
B, ldb,
beta, C, ldc);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum transA;
......@@ -172,9 +170,7 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum transA;
......@@ -213,9 +209,9 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -151,10 +151,9 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
WORK = TAU + max( m, n );
CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
{
MORSE_starpu_ws_t *h_work;
......@@ -196,8 +195,8 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -133,10 +133,8 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgessm_cuda_func(void *descr[], void *cl_arg)
{
int m;
......@@ -163,8 +161,8 @@ static void cl_zgessm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -171,13 +171,12 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
}
#endif
}
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet GPU
*/
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg)
{
int m;
......@@ -228,8 +227,8 @@ static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#endif //!defined(CHAMELEON_SIMULATION)
/*
......
......@@ -119,13 +119,11 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
}
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet GPU
*/
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
{
int m;
......@@ -142,8 +140,8 @@ static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
CUDA_zgetrf_nopiv( m, n, dA, lda, &info );
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -99,11 +99,9 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
B, LDB,
beta, C, LDC);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -143,9 +141,7 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -183,9 +179,9 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -96,11 +96,9 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
CORE_zher2k(uplo, trans,
n, k, alpha, A, lda, B, ldb, beta, C, ldc);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -135,9 +133,7 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -171,9 +167,9 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -90,11 +90,9 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
alpha, A, lda,
beta, C, ldc);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -132,9 +130,7 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -168,9 +164,9 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -73,10 +73,8 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
CORE_zlauum(uplo, N, A, LDA);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zlauum_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -91,8 +89,8 @@ static void cl_zlauum_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -79,10 +79,8 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
CORE_zpotrf(uplo, n, A, lda, &info);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_MAGMA
#if !defined(CHAMELEON_SIMULATION)
static void cl_zpotrf_cuda_func(void *descr[], void *cl_arg)
{
cudaStream_t stream[2], currentt_stream;
......@@ -115,8 +113,8 @@ static void cl_zpotrf_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -170,10 +170,8 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zssssm_cuda_func(void *descr[], void *cl_arg)
{
int m1;
......@@ -213,8 +211,8 @@ static void cl_zssssm_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -99,11 +99,9 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg)
B, LDB,
beta, C, LDC);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -141,9 +139,7 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -181,9 +177,9 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -96,11 +96,9 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
CORE_zsyr2k(uplo, trans,
n, k, alpha, A, lda, B, ldb, beta, C, ldc);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -135,9 +133,7 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -171,9 +167,9 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -91,11 +91,9 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
alpha, A, lda,