From 47383a750859a4561c7dbc30e4821928ef58aeb7 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 16 Feb 2022 10:16:39 +0100
Subject: [PATCH] cuda: Remove the CHAMELEON_USE_CUBLAS_V2 define to always
 enforce the use of the new API

---
 CMakeLists.txt                               |  6 ------
 control/common.h                             |  4 ----
 cudablas/compute/CMakeLists.txt              | 16 ++--------------
 cudablas/compute/cuda_zgeadd.c               |  4 ----
 cudablas/compute/cuda_ztrmm.c                | 13 -------------
 cudablas/include/cudablas.h                  | 19 -------------------
 include/chameleon/config.h.in                |  1 -
 runtime/starpu/codelets/codelet_zgeadd.c     |  6 +++---
 runtime/starpu/include/chameleon_starpu.h.in | 11 ++---------
 9 files changed, 7 insertions(+), 73 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b064cf151..45403fd80 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -520,12 +520,6 @@ int main(void) {
   HAVE_FALLTHROUGH
   )
 
-# Add option to exploit cublas API v2
-# -----------------------------------
-cmake_dependent_option(CHAMELEON_USE_CUBLAS_V2
-  "Enable cublas API v2" ON
-  "CHAMELEON_USE_CUDA;CHAMELEON_SCHED_STARPU" OFF)
-
 # Fix a problem on Mac OS X when building shared libraries
 if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
     set(CMAKE_SHARED_LINKER_FLAGS "-undefined dynamic_lookup")
diff --git a/control/common.h b/control/common.h
index ed2daacc0..7eab1f417 100644
--- a/control/common.h
+++ b/control/common.h
@@ -48,12 +48,8 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cuda_runtime_api.h>
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 #include <cublas.h>
 #include <cublas_v2.h>
-#else
-#include <cublas.h>
-#endif
 #endif
 
 #if defined(CHAMELEON_USE_OPENCL) && !defined(CHAMELEON_SIMULATION)
diff --git a/cudablas/compute/CMakeLists.txt b/cudablas/compute/CMakeLists.txt
index c61bc0496..a34736036 100644
--- a/cudablas/compute/CMakeLists.txt
+++ b/cudablas/compute/CMakeLists.txt
@@ -29,6 +29,7 @@
 # ------------------------------------------------------
 set(CUDABLAS_SRCS_GENERATED "")
 set(ZSRC
+    cuda_zgeadd.c
     cuda_zgemerge.c
     cuda_zgemm.c
     cuda_zhemm.c
@@ -52,13 +53,6 @@ set(ZSRC
     cuda_zunmqrt.c
     )
 
-if( CHAMELEON_USE_CUBLAS_V2 )
-  set(ZSRC
-    ${ZSRC}
-    cuda_zgeadd.c
-    )
-endif( CHAMELEON_USE_CUBLAS_V2 )
-
 # Former MAGMA files that are no longer supported
 # if( CHAMELEON_USE_MAGMA )
 #   set(ZSRC
@@ -83,15 +77,9 @@ precisions_rules_py(
 
 set(CUDABLAS_SRCS
   ${CUDABLAS_SRCS_GENERATED}
+  cudaglobal.c
   )
 
-if (CHAMELEON_USE_CUBLAS_V2)
-  set(CUDABLAS_SRCS
-    ${CUDABLAS_SRCS}
-    cudaglobal.c
-    )
-endif (CHAMELEON_USE_CUBLAS_V2)
-
 # Force generation of sources
 # ---------------------------
 add_custom_target(cudablas_sources ALL SOURCES ${CUDABLAS_SRCS})
diff --git a/cudablas/compute/cuda_zgeadd.c b/cudablas/compute/cuda_zgeadd.c
index 1c917cf20..b77093207 100644
--- a/cudablas/compute/cuda_zgeadd.c
+++ b/cudablas/compute/cuda_zgeadd.c
@@ -19,10 +19,6 @@
  */
 #include "cudablas.h"
 
-#if !defined(CHAMELEON_USE_CUBLAS_V2)
-#error "This file requires cublas api v2 support"
-#endif
-
 /**
  ******************************************************************************
  *
diff --git a/cudablas/compute/cuda_ztrmm.c b/cudablas/compute/cuda_ztrmm.c
index a054413e7..67e730097 100644
--- a/cudablas/compute/cuda_ztrmm.c
+++ b/cudablas/compute/cuda_ztrmm.c
@@ -30,8 +30,6 @@ int CUDA_ztrmm(
     CUBLAS_STREAM_PARAM)
 {
 
-#if defined(CHAMELEON_USE_CUBLAS_V2)
-
     cublasZtrmm(
         CUBLAS_HANDLE
         chameleon_cublas_const(side), chameleon_cublas_const(uplo),
@@ -41,17 +39,6 @@ int CUDA_ztrmm(
         B, ldb,
         B, ldb);
 
-#else
-
-    cublasZtrmm(
-        CUBLAS_HANDLE
-        chameleon_cublas_const(side), chameleon_cublas_const(uplo),
-        chameleon_cublas_const(transa), chameleon_cublas_const(diag),
-        m, n,
-        CUBLAS_VALUE(alpha), A, lda,
-                             B, ldb);
-#endif
-
     assert( CUBLAS_STATUS_SUCCESS == cublasGetError() );
 
     return CHAMELEON_SUCCESS;
diff --git a/cudablas/include/cudablas.h b/cudablas/include/cudablas.h
index ad581009f..f4492db17 100644
--- a/cudablas/include/cudablas.h
+++ b/cudablas/include/cudablas.h
@@ -36,8 +36,6 @@
 #include <cuda.h>
 #include <cuComplex.h>
 
-#if defined(CHAMELEON_USE_CUBLAS_V2)
-
 #include <cublas.h>
 #include <cublas_v2.h>
 
@@ -50,18 +48,6 @@
     cudaStream_t stream;                        \
     cublasGetStream( handle, &stream )
 
-#else
-
-#include <cublas.h>
-#define CUBLAS_STREAM_PARAM cudaStream_t stream
-#define CUBLAS_STREAM_VALUE stream
-#define CUBLAS_HANDLE
-#define CUBLAS_SADDR(_a_) (_a_)
-#define CUBLAS_VALUE(_a_) (*(_a_))
-#define CUBLAS_GET_STREAM
-
-#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
-
 /**
  * CHAMELEON types and constants
  */
@@ -95,12 +81,7 @@ extern char *chameleon_lapack_constants[];
 #define chameleon_lapack_const(chameleon_const) chameleon_lapack_constants[chameleon_const][0]
 
 extern int chameleon_cublas_constants[];
-
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 #define chameleon_cublas_const(chameleon_const) chameleon_cublas_constants[chameleon_const]
-#else
-#define chameleon_cublas_const(chameleon_const) chameleon_lapack_constants[chameleon_const][0]
-#endif
 
 END_C_DECLS
 
diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in
index 2b7e3e4bc..9f62ff985 100644
--- a/include/chameleon/config.h.in
+++ b/include/chameleon/config.h.in
@@ -51,7 +51,6 @@
 /* GPU Support */
 #cmakedefine CHAMELEON_USE_CUDA
 #cmakedefine CHAMELEON_USE_CUBLAS
-#cmakedefine CHAMELEON_USE_CUBLAS_V2
 
 /* Hmat-oss */
 #cmakedefine CHAMELEON_USE_HMAT
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index 7d7a0045a..d20ccca13 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -44,7 +44,7 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
     return;
 }
 
-#ifdef CHAMELEON_USE_CUBLAS_V2
+#ifdef CHAMELEON_USE_CUBLAS
 static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
 {
     cham_trans_t trans;
@@ -74,13 +74,13 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
+#endif /* defined(CHAMELEON_USE_CUBLAS) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_USE_CUBLAS_V2)
+#if defined(CHAMELEON_USE_CUBLAS)
 CODELETS(zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
 #else
 CODELETS_CPU(zgeadd, cl_zgeadd_cpu_func)
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index 315610817..b3a3e8e70 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -63,11 +63,9 @@
 
 #include <cublas.h>
 #include <starpu_cublas.h>
-#if defined(CHAMELEON_USE_CUBLAS_V2)
 #include <cublas_v2.h>
 #include <starpu_cublas_v2.h>
 #endif
-#endif
 
 #if defined(CHAMELEON_SIMULATION)
 # if !defined(STARPU_SIMGRID)
@@ -128,16 +126,11 @@ typedef struct starpu_option_request_s {
 #endif
 
 /*
- * cuBlasAPI v2 - StarPU enable the support for cublas handle
+ * cuBLAS API - StarPU enable the support for cublas handle
  */
-#if defined(CHAMELEON_USE_CUDA) && defined(CHAMELEON_USE_CUBLAS_V2)
+#if defined(CHAMELEON_USE_CUDA)
 #define RUNTIME_getStream(_stream_)                             \
     cublasHandle_t _stream_ = starpu_cublas_get_local_handle();
-#else
-#define RUNTIME_getStream(_stream_)                             \
-    cudaStream_t _stream_ = starpu_cuda_get_local_stream();     \
-    cublasSetKernelStream( stream );
-
 #endif
 
 /*
-- 
GitLab