From 1e33a0fd38342c4b16f49104e567e19a69173530 Mon Sep 17 00:00:00 2001
From: Florent Pruvost <florent.pruvost@inria.fr>
Date: Mon, 25 Apr 2022 11:20:24 +0200
Subject: [PATCH] Validate first algorithm gemm cblas and blas interfaces.

---
 cmake_modules/local_subs.py            |   1 +
 compute/CMakeLists.txt                 |  18 +++-
 compute/lapack_api/lapack_api_common.c |  39 +++++++
 compute/lapack_api/lapack_api_common.h |  34 ++++++
 compute/lapack_api/lapack_zgemm.c      | 143 +++++++++++++++++++++++++
 include/CMakeLists.txt                 |   9 +-
 include/chameleon.h                    |   5 +
 include/chameleon/chameleon_zlapack.h  |  37 +++++++
 testing/chameleon_ztesting.c           |   2 +-
 testing/testing_zgemm.c                |  19 +++-
 10 files changed, 298 insertions(+), 9 deletions(-)
 create mode 100644 compute/lapack_api/lapack_api_common.c
 create mode 100644 compute/lapack_api/lapack_api_common.h
 create mode 100644 compute/lapack_api/lapack_zgemm.c
 create mode 100644 include/chameleon/chameleon_zlapack.h

diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index 879a3c3fb..34a5bc904 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -81,6 +81,7 @@ subs = {
         ('',                     'ORGLQ',                'ORGLQ',                'UNGLQ',                'UNGLQ'               ),
         ('',                     'SYEV',                 'SYEV',                 'HEEV',                 'HEEV'                ),
         ('',                     'SYG',                  'SYG',                  'HEG',                  'HEG'                 ),
+        ('',                     'const float ',         'const double ',        'const void \*',        'const void \*'       ),
     ]
     + _extra_blas
     + _extra_BLAS
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 9c0ceed42..cda965e6b 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -264,10 +264,22 @@ precisions_rules_py(CONTROL_SRCS_GENERATED "${ZSRC}"
                     PRECISIONS "${CHAMELEON_PRECISION}"
                     TARGETDIR "control" )
 
+set(LAPACK_API_SRCS_GENERATED "")
+set(ZSRC
+  lapack_api/lapack_zgemm.c
+  )
+precisions_rules_py(LAPACK_API_SRCS_GENERATED "${ZSRC}"
+  PRECISIONS "${CHAMELEON_PRECISION}")
+set(LAPACK_API_SRCS
+  ${LAPACK_API_SRCS_GENERATED}
+  lapack_api/lapack_api_common.c
+  )
+
 set(CHAMELEON_SRCS
     ${CHAMELEON_CONTROL}
     ${CHAMELEON_SRCS_GENERATED}
     ${CONTROL_SRCS_GENERATED}
+    ${LAPACK_API_SRCS}
    )
 
 # Generate the chameleon fortran sources for all possible precisions
@@ -309,7 +321,7 @@ add_dependencies(chameleon
 )
 
 target_include_directories(chameleon PUBLIC
-  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/lapack_api>
   $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/include>
   $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/include>
   $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/control>
@@ -329,7 +341,9 @@ elseif(CHAMELEON_SCHED_QUARK)
 elseif(CHAMELEON_SCHED_OPENMP)
   target_link_libraries(chameleon PUBLIC chameleon_openmp)
 endif()
-if (NOT CHAMELEON_SIMULATION)
+if (CHAMELEON_SIMULATION)
+  target_include_directories(chameleon PUBLIC $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/coreblas/include>)
+else()
   target_link_libraries(chameleon PUBLIC coreblas)
 endif()
 target_link_libraries(chameleon PUBLIC hqr)
diff --git a/compute/lapack_api/lapack_api_common.c b/compute/lapack_api/lapack_api_common.c
new file mode 100644
index 000000000..5e98934bf
--- /dev/null
+++ b/compute/lapack_api/lapack_api_common.c
@@ -0,0 +1,39 @@
+/**
+ *
+ * @file lapack_api_common.c
+ *
+ * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                 Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon blas/lapack and cblas/lapack api common functions
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Florent Pruvost
+ * @date 2022-04-22
+ *
+ */
+#include "lapack_api_common.h"
+
+/**
+ * @brief Convert the input char BLAS trans parameter to a compatible parameter
+ * for the Cblas API.
+ * @param[in] trans The input char BLAS trans parameter
+ * @return The CBLAS equivalent parameter (CblasNoTrans, CblasTrans or
+ * CblasConjTrans).
+ */
+int chameleon_blastocblas_trans(const char* trans)
+{
+    if ( (*trans == 'N') || (*trans == 'n') ) {
+        return CblasNoTrans;
+    } else if ( (*trans == 'T') || (*trans == 't') ) {
+        return CblasTrans;
+    } else if ( (*trans == 'C') || (*trans == 'c') ) {
+        return CblasConjTrans;
+    } else {
+        chameleon_error("chameleon_blastocblas_trans", "illegal value of BLAS transpose parameter");
+        return CHAMELEON_ERR_ILLEGAL_VALUE;
+    }
+}
diff --git a/compute/lapack_api/lapack_api_common.h b/compute/lapack_api/lapack_api_common.h
new file mode 100644
index 000000000..25eb831a0
--- /dev/null
+++ b/compute/lapack_api/lapack_api_common.h
@@ -0,0 +1,34 @@
+/**
+ *
+ * @file lapack_api_common.h
+ *
+ * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux.
+ *                 All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon blas/lapack and cblas/lapack api common internal functions
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Florent Pruvost
+ * @date 2022-04-22
+ *
+ */
+#ifndef _lapack_api_common_h_
+#define _lapack_api_common_h_
+
+#include "chameleon.h"
+#include "chameleon/mangling.h"
+#include "control/auxiliary.h"
+
+/**
+ *
+ * @defgroup CHAMELEON_LAPACK_API
+ * @brief Linear algebra routines exposed to users. LAPACK matrix data storage
+ *
+ */
+
+int chameleon_blastocblas_trans(const char* value);
+
+#endif /* _lapack_api_common_h_ */
diff --git a/compute/lapack_api/lapack_zgemm.c b/compute/lapack_api/lapack_zgemm.c
new file mode 100644
index 000000000..f36600a4c
--- /dev/null
+++ b/compute/lapack_api/lapack_zgemm.c
@@ -0,0 +1,143 @@
+/**
+ *
+ * @file lapack_zgemm.c
+ *
+ * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                 Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon blas and cblas api for gemm
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Florent Pruvost
+ * @date 2022-04-22
+ * @precisions normal z -> s d c
+ *
+ */
+
+#include "lapack_api_common.h"
+
+/* Fortran BLAS interface */
+
+#define CHAMELEON_blas_zgemm CHAMELEON_GLOBAL( chameleon_blas_zgemm, CHAMELEON_BLAS_ZGEMM )
+void CHAMELEON_blas_zgemm ( const char* transa, const char* transb,
+                            const int* m, const int* n, const int* k,
+                            const CHAMELEON_Complex64_t* alpha, const CHAMELEON_Complex64_t* a, const int* lda,
+                                                                const CHAMELEON_Complex64_t* b, const int* ldb,
+                            const CHAMELEON_Complex64_t* beta,  CHAMELEON_Complex64_t* c, const int* ldc )
+{
+    CHAMELEON_cblas_zgemm( CblasColMajor,
+                           chameleon_blastocblas_trans(transa),
+                           chameleon_blastocblas_trans(transb),
+                           *m, *n, *k,
+                           CBLAS_SADDR(*alpha), a, *lda,
+                           b, *ldb,
+                           CBLAS_SADDR(*beta), c, *ldc );
+}
+
+/* C CBLAS interface */
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_LAPACK_API
+ *
+ *  CHAMELEON_cblas_zgemm - Performs one of the matrix-matrix operations
+ *
+ *    \f[ C = \alpha [op( A )\times op( B )] + \beta C \f],
+ *
+ *  where op( X ) is one of
+ *
+ *    op( X ) = X  or op( X ) = X' or op( X ) = conjg( X' )
+ *
+ *  alpha and beta are scalars, and A, B and C  are matrices, with op( A )
+ *  an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+ *
+ *******************************************************************************
+ *
+ * @param[in] transA
+ *          Specifies whether the matrix A is transposed, not transposed or conjugate transposed:
+ *          = ChamNoTrans:   A is not transposed;
+ *          = ChamTrans:     A is transposed;
+ *          = ChamConjTrans: A is conjugate transposed.
+ *
+ * @param[in] transB
+ *          Specifies whether the matrix B is transposed, not transposed or conjugate transposed:
+ *          = ChamNoTrans:   B is not transposed;
+ *          = ChamTrans:     B is transposed;
+ *          = ChamConjTrans: B is conjugate transposed.
+ *
+ * @param[in] M
+ *          M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0.
+ *
+ * @param[in] N
+ *          N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0.
+ *
+ * @param[in] K
+ *          K specifies the number of columns of the matrix op( A ) and the number of rows of
+ *          the matrix op( B ). K >= 0.
+ *
+ * @param[in] alpha
+ *          alpha specifies the scalar alpha
+ *
+ * @param[in] A
+ *          A is a LDA-by-ka matrix, where ka is K when  transA = ChamNoTrans,
+ *          and is  M  otherwise.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ * @param[in] B
+ *          B is a LDB-by-kb matrix, where kb is N when  transB = ChamNoTrans,
+ *          and is  K  otherwise.
+ *
+ * @param[in] LDB
+ *          The leading dimension of the array B. LDB >= max(1,N).
+ *
+ * @param[in] beta
+ *          beta specifies the scalar beta
+ *
+ * @param[in,out] C
+ *          C is a LDC-by-N matrix.
+ *          On exit, the array is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C )
+ *
+ * @param[in] LDC
+ *          The leading dimension of the array C. LDC >= max(1,M).
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_cblas_zgemm
+ * @sa CHAMELEON_cblas_cgemm
+ * @sa CHAMELEON_cblas_dgemm
+ * @sa CHAMELEON_cblas_sgemm
+ *
+ */
+void CHAMELEON_cblas_zgemm( const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB,
+                            const int M, const int N, const int K,
+                            const void *alpha, const CHAMELEON_Complex64_t *A, const int lda,
+                                               const CHAMELEON_Complex64_t *B, const int ldb,
+                            const void *beta,        CHAMELEON_Complex64_t *C, const int ldc )
+{
+    if (Order != CblasColMajor){
+        chameleon_error("CHAMELEON_cblas_zgemm", "illegal value of order");
+    }
+
+#if defined(PRECISION_z) || defined(PRECISION_c)
+    CHAMELEON_Complex64_t alphac = *(CHAMELEON_Complex64_t *)alpha;
+    CHAMELEON_Complex64_t betac = *(CHAMELEON_Complex64_t *)beta;
+#else
+    CHAMELEON_Complex64_t alphac = alpha;
+    CHAMELEON_Complex64_t betac = beta;
+#endif
+
+    CHAMELEON_zgemm( (cham_trans_t)TransA, (cham_trans_t)TransB, M, N, K,
+                     alphac, (CHAMELEON_Complex64_t *)A, lda,
+                     (CHAMELEON_Complex64_t *)B, ldb,
+                     betac, (CHAMELEON_Complex64_t *)C, ldc );
+}
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index d76c36523..004ea4ab1 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -30,10 +30,11 @@
 # ----------------------------------------------------------
 set(CHAMELEON_HDRS_GENERATED "")
 set(ZHDR
-    chameleon/chameleon_z.h
-    chameleon/chameleon_zc.h
-    chameleon/tasks_z.h
-    chameleon/tasks_zc.h
+  chameleon/chameleon_z.h
+  chameleon/chameleon_zc.h
+  chameleon/chameleon_zlapack.h
+  chameleon/tasks_z.h
+  chameleon/tasks_zc.h
 )
 
 precisions_rules_py(CHAMELEON_HDRS_GENERATED "${ZHDR}"
diff --git a/include/chameleon.h b/include/chameleon.h
index 5aadfb8d4..58cce2ed5 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -68,6 +68,11 @@ typedef struct gepdf_info_s {
 #include "chameleon/chameleon_zc.h"
 #include "chameleon/chameleon_ds.h"
 
+#include "chameleon/chameleon_zlapack.h"
+#include "chameleon/chameleon_clapack.h"
+#include "chameleon/chameleon_dlapack.h"
+#include "chameleon/chameleon_slapack.h"
+
 BEGIN_C_DECLS
 
 /* ****************************************************************************
diff --git a/include/chameleon/chameleon_zlapack.h b/include/chameleon/chameleon_zlapack.h
new file mode 100644
index 000000000..761fed935
--- /dev/null
+++ b/include/chameleon/chameleon_zlapack.h
@@ -0,0 +1,37 @@
+/**
+ *
+ * @file chameleon_zlapack.h
+ *
+ * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon blas/lapack and cblas/lapack api functions
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Florent Pruvost
+ * @date 2022-04-26
+ * @precisions normal z -> c d s
+ *
+ */
+#ifndef _chameleon_zlapack_h_
+#define _chameleon_zlapack_h_
+
+#include "coreblas/cblas_wrapper.h"
+
+BEGIN_C_DECLS
+
+/**
+ *  Declarations of math functions (LAPACK layout, Cblas/Lapacke interface) - alphabetical order
+ */
+void CHAMELEON_cblas_zgemm( const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB,
+                            const int M, const int N, const int K,
+                            const void *alpha, const CHAMELEON_Complex64_t *A, const int lda,
+                                               const CHAMELEON_Complex64_t *B, const int ldb,
+                            const void *beta,        CHAMELEON_Complex64_t *C, const int ldc );
+
+END_C_DECLS
+
+#endif /* _chameleon_zlapack_h_ */
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index 61ae3d8f5..e2baddbcd 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -48,7 +48,7 @@ parameter_t parameters[] = {
     { "async",    "Switch to the Async interface",                        's', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "splitsub", "Split the task submission and execution stages",       'S', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "generic",  "Switch to the non optimized generic algorithms",       -35, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
-    { "api",      "Select the API to test (0: Descriptors, 1: Standard)", -36, PARAM_OPTION, 1, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
+    { "api",      "Select the API to test (0: Descriptors, 1: Standard, 2: Lapack)", -36, PARAM_OPTION, 1, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
 #endif
 
     { NULL, "Machine parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c
index 07ef9233c..e08a07624 100644
--- a/testing/testing_zgemm.c
+++ b/testing/testing_zgemm.c
@@ -144,6 +144,7 @@ testing_zgemm_std( run_arg_list_t *args, int check )
     int        hres      = 0;
 
     /* Read arguments */
+    int          api    = parameters_getvalue_int( "api" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     cham_trans_t transA = run_arg_get_trans( args, "transA", ChamNoTrans );
     cham_trans_t transB = run_arg_get_trans( args, "transB", ChamNoTrans );
@@ -199,12 +200,26 @@ testing_zgemm_std( run_arg_list_t *args, int check )
     /* Calculate the product */
 #if defined(CHAMELEON_TESTINGS_VENDOR)
     testing_start( &test_data );
-    cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, 
+    cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K,
                         CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC );
     testing_stop( &test_data, flops_zgemm( M, N, K ) );
 #else
     testing_start( &test_data );
-    hres = CHAMELEON_zgemm( transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC );
+    switch ( api ) {
+    case 1:
+        hres = CHAMELEON_zgemm( transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC );
+        break;
+    case 2:
+        CHAMELEON_cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K,
+                               CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC );
+        break;
+    default:
+        if ( CHAMELEON_Comm_rank() == 0 ) {
+            fprintf( stderr,
+                     "SKIPPED: This function can only be used with the option --api 1 or --api 2.\n" );
+        }
+        return -1;
+    }
     test_data.hres = hres;
     testing_stop( &test_data, flops_zgemm( M, N, K ) );
 
-- 
GitLab