From 1e33a0fd38342c4b16f49104e567e19a69173530 Mon Sep 17 00:00:00 2001 From: Florent Pruvost <florent.pruvost@inria.fr> Date: Mon, 25 Apr 2022 11:20:24 +0200 Subject: [PATCH] Validate first algorithm gemm cblas and blas interfaces. --- cmake_modules/local_subs.py | 1 + compute/CMakeLists.txt | 18 +++- compute/lapack_api/lapack_api_common.c | 39 +++++++ compute/lapack_api/lapack_api_common.h | 34 ++++++ compute/lapack_api/lapack_zgemm.c | 143 +++++++++++++++++++++++++ include/CMakeLists.txt | 9 +- include/chameleon.h | 5 + include/chameleon/chameleon_zlapack.h | 37 +++++++ testing/chameleon_ztesting.c | 2 +- testing/testing_zgemm.c | 19 +++- 10 files changed, 298 insertions(+), 9 deletions(-) create mode 100644 compute/lapack_api/lapack_api_common.c create mode 100644 compute/lapack_api/lapack_api_common.h create mode 100644 compute/lapack_api/lapack_zgemm.c create mode 100644 include/chameleon/chameleon_zlapack.h diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 879a3c3fb..34a5bc904 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -81,6 +81,7 @@ subs = { ('', 'ORGLQ', 'ORGLQ', 'UNGLQ', 'UNGLQ' ), ('', 'SYEV', 'SYEV', 'HEEV', 'HEEV' ), ('', 'SYG', 'SYG', 'HEG', 'HEG' ), + ('', 'const float ', 'const double ', 'const void \*', 'const void \*' ), ] + _extra_blas + _extra_BLAS diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 9c0ceed42..cda965e6b 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -264,10 +264,22 @@ precisions_rules_py(CONTROL_SRCS_GENERATED "${ZSRC}" PRECISIONS "${CHAMELEON_PRECISION}" TARGETDIR "control" ) +set(LAPACK_API_SRCS_GENERATED "") +set(ZSRC + lapack_api/lapack_zgemm.c + ) +precisions_rules_py(LAPACK_API_SRCS_GENERATED "${ZSRC}" + PRECISIONS "${CHAMELEON_PRECISION}") +set(LAPACK_API_SRCS + ${LAPACK_API_SRCS_GENERATED} + lapack_api/lapack_api_common.c + ) + set(CHAMELEON_SRCS ${CHAMELEON_CONTROL} ${CHAMELEON_SRCS_GENERATED} ${CONTROL_SRCS_GENERATED} + ${LAPACK_API_SRCS} ) # Generate the chameleon fortran sources for all possible precisions @@ -309,7 +321,7 @@ add_dependencies(chameleon ) target_include_directories(chameleon PUBLIC - $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> + $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/lapack_api> $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/include> $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/include> $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/control> @@ -329,7 +341,9 @@ elseif(CHAMELEON_SCHED_QUARK) elseif(CHAMELEON_SCHED_OPENMP) target_link_libraries(chameleon PUBLIC chameleon_openmp) endif() -if (NOT CHAMELEON_SIMULATION) +if (CHAMELEON_SIMULATION) + target_include_directories(chameleon PUBLIC $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/coreblas/include>) +else() target_link_libraries(chameleon PUBLIC coreblas) endif() target_link_libraries(chameleon PUBLIC hqr) diff --git a/compute/lapack_api/lapack_api_common.c b/compute/lapack_api/lapack_api_common.c new file mode 100644 index 000000000..5e98934bf --- /dev/null +++ b/compute/lapack_api/lapack_api_common.c @@ -0,0 +1,39 @@ +/** + * + * @file lapack_api_common.c + * + * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon blas/lapack and cblas/lapack api common functions + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-22 + * + */ +#include "lapack_api_common.h" + +/** + * @brief Convert the input char BLAS trans parameter to a compatible parameter + * for the Cblas API. + * @param[in] trans The input char BLAS trans parameter + * @return The CBLAS equivalent parameter (CblasNoTrans, CblasTrans or + * CblasConjTrans). + */ +int chameleon_blastocblas_trans(const char* trans) +{ + if ( (*trans == 'N') || (*trans == 'n') ) { + return CblasNoTrans; + } else if ( (*trans == 'T') || (*trans == 't') ) { + return CblasTrans; + } else if ( (*trans == 'C') || (*trans == 'c') ) { + return CblasConjTrans; + } else { + chameleon_error("chameleon_blastocblas_trans", "illegal value of BLAS transpose parameter"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } +} diff --git a/compute/lapack_api/lapack_api_common.h b/compute/lapack_api/lapack_api_common.h new file mode 100644 index 000000000..25eb831a0 --- /dev/null +++ b/compute/lapack_api/lapack_api_common.h @@ -0,0 +1,34 @@ +/** + * + * @file lapack_api_common.h + * + * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. + * All rights reserved. + * + *** + * + * @brief Chameleon blas/lapack and cblas/lapack api common internal functions + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-22 + * + */ +#ifndef _lapack_api_common_h_ +#define _lapack_api_common_h_ + +#include "chameleon.h" +#include "chameleon/mangling.h" +#include "control/auxiliary.h" + +/** + * + * @defgroup CHAMELEON_LAPACK_API + * @brief Linear algebra routines exposed to users. LAPACK matrix data storage + * + */ + +int chameleon_blastocblas_trans(const char* value); + +#endif /* _lapack_api_common_h_ */ diff --git a/compute/lapack_api/lapack_zgemm.c b/compute/lapack_api/lapack_zgemm.c new file mode 100644 index 000000000..f36600a4c --- /dev/null +++ b/compute/lapack_api/lapack_zgemm.c @@ -0,0 +1,143 @@ +/** + * + * @file lapack_zgemm.c + * + * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon blas and cblas api for gemm + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-22 + * @precisions normal z -> s d c + * + */ + +#include "lapack_api_common.h" + +/* Fortran BLAS interface */ + +#define CHAMELEON_blas_zgemm CHAMELEON_GLOBAL( chameleon_blas_zgemm, CHAMELEON_BLAS_ZGEMM ) +void CHAMELEON_blas_zgemm ( const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const CHAMELEON_Complex64_t* alpha, const CHAMELEON_Complex64_t* a, const int* lda, + const CHAMELEON_Complex64_t* b, const int* ldb, + const CHAMELEON_Complex64_t* beta, CHAMELEON_Complex64_t* c, const int* ldc ) +{ + CHAMELEON_cblas_zgemm( CblasColMajor, + chameleon_blastocblas_trans(transa), + chameleon_blastocblas_trans(transb), + *m, *n, *k, + CBLAS_SADDR(*alpha), a, *lda, + b, *ldb, + CBLAS_SADDR(*beta), c, *ldc ); +} + +/* C CBLAS interface */ + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_LAPACK_API + * + * CHAMELEON_cblas_zgemm - Performs one of the matrix-matrix operations + * + * \f[ C = \alpha [op( A )\times op( B )] + \beta C \f], + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ) + * + * alpha and beta are scalars, and A, B and C are matrices, with op( A ) + * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + * + ******************************************************************************* + * + * @param[in] transA + * Specifies whether the matrix A is transposed, not transposed or conjugate transposed: + * = ChamNoTrans: A is not transposed; + * = ChamTrans: A is transposed; + * = ChamConjTrans: A is conjugate transposed. + * + * @param[in] transB + * Specifies whether the matrix B is transposed, not transposed or conjugate transposed: + * = ChamNoTrans: B is not transposed; + * = ChamTrans: B is transposed; + * = ChamConjTrans: B is conjugate transposed. + * + * @param[in] M + * M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0. + * + * @param[in] N + * N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0. + * + * @param[in] K + * K specifies the number of columns of the matrix op( A ) and the number of rows of + * the matrix op( B ). K >= 0. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in] A + * A is a LDA-by-ka matrix, where ka is K when transA = ChamNoTrans, + * and is M otherwise. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in] B + * B is a LDB-by-kb matrix, where kb is N when transB = ChamNoTrans, + * and is K otherwise. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,N). + * + * @param[in] beta + * beta specifies the scalar beta + * + * @param[in,out] C + * C is a LDC-by-N matrix. + * On exit, the array is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ) + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_cblas_zgemm + * @sa CHAMELEON_cblas_cgemm + * @sa CHAMELEON_cblas_dgemm + * @sa CHAMELEON_cblas_sgemm + * + */ +void CHAMELEON_cblas_zgemm( const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, + const int M, const int N, const int K, + const void *alpha, const CHAMELEON_Complex64_t *A, const int lda, + const CHAMELEON_Complex64_t *B, const int ldb, + const void *beta, CHAMELEON_Complex64_t *C, const int ldc ) +{ + if (Order != CblasColMajor){ + chameleon_error("CHAMELEON_cblas_zgemm", "illegal value of order"); + } + +#if defined(PRECISION_z) || defined(PRECISION_c) + CHAMELEON_Complex64_t alphac = *(CHAMELEON_Complex64_t *)alpha; + CHAMELEON_Complex64_t betac = *(CHAMELEON_Complex64_t *)beta; +#else + CHAMELEON_Complex64_t alphac = alpha; + CHAMELEON_Complex64_t betac = beta; +#endif + + CHAMELEON_zgemm( (cham_trans_t)TransA, (cham_trans_t)TransB, M, N, K, + alphac, (CHAMELEON_Complex64_t *)A, lda, + (CHAMELEON_Complex64_t *)B, ldb, + betac, (CHAMELEON_Complex64_t *)C, ldc ); +} diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index d76c36523..004ea4ab1 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -30,10 +30,11 @@ # ---------------------------------------------------------- set(CHAMELEON_HDRS_GENERATED "") set(ZHDR - chameleon/chameleon_z.h - chameleon/chameleon_zc.h - chameleon/tasks_z.h - chameleon/tasks_zc.h + chameleon/chameleon_z.h + chameleon/chameleon_zc.h + chameleon/chameleon_zlapack.h + chameleon/tasks_z.h + chameleon/tasks_zc.h ) precisions_rules_py(CHAMELEON_HDRS_GENERATED "${ZHDR}" diff --git a/include/chameleon.h b/include/chameleon.h index 5aadfb8d4..58cce2ed5 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -68,6 +68,11 @@ typedef struct gepdf_info_s { #include "chameleon/chameleon_zc.h" #include "chameleon/chameleon_ds.h" +#include "chameleon/chameleon_zlapack.h" +#include "chameleon/chameleon_clapack.h" +#include "chameleon/chameleon_dlapack.h" +#include "chameleon/chameleon_slapack.h" + BEGIN_C_DECLS /* **************************************************************************** diff --git a/include/chameleon/chameleon_zlapack.h b/include/chameleon/chameleon_zlapack.h new file mode 100644 index 000000000..761fed935 --- /dev/null +++ b/include/chameleon/chameleon_zlapack.h @@ -0,0 +1,37 @@ +/** + * + * @file chameleon_zlapack.h + * + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon blas/lapack and cblas/lapack api functions + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-26 + * @precisions normal z -> c d s + * + */ +#ifndef _chameleon_zlapack_h_ +#define _chameleon_zlapack_h_ + +#include "coreblas/cblas_wrapper.h" + +BEGIN_C_DECLS + +/** + * Declarations of math functions (LAPACK layout, Cblas/Lapacke interface) - alphabetical order + */ +void CHAMELEON_cblas_zgemm( const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, + const int M, const int N, const int K, + const void *alpha, const CHAMELEON_Complex64_t *A, const int lda, + const CHAMELEON_Complex64_t *B, const int ldb, + const void *beta, CHAMELEON_Complex64_t *C, const int ldc ); + +END_C_DECLS + +#endif /* _chameleon_zlapack_h_ */ diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index 61ae3d8f5..e2baddbcd 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -48,7 +48,7 @@ parameter_t parameters[] = { { "async", "Switch to the Async interface", 's', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, { "splitsub", "Split the task submission and execution stages", 'S', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, { "generic", "Switch to the non optimized generic algorithms", -35, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, - { "api", "Select the API to test (0: Descriptors, 1: Standard)", -36, PARAM_OPTION, 1, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "api", "Select the API to test (0: Descriptors, 1: Standard, 2: Lapack)", -36, PARAM_OPTION, 1, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, #endif { NULL, "Machine parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c index 07ef9233c..e08a07624 100644 --- a/testing/testing_zgemm.c +++ b/testing/testing_zgemm.c @@ -144,6 +144,7 @@ testing_zgemm_std( run_arg_list_t *args, int check ) int hres = 0; /* Read arguments */ + int api = parameters_getvalue_int( "api" ); int nb = run_arg_get_int( args, "nb", 320 ); cham_trans_t transA = run_arg_get_trans( args, "transA", ChamNoTrans ); cham_trans_t transB = run_arg_get_trans( args, "transB", ChamNoTrans ); @@ -199,12 +200,26 @@ testing_zgemm_std( run_arg_list_t *args, int check ) /* Calculate the product */ #if defined(CHAMELEON_TESTINGS_VENDOR) testing_start( &test_data ); - cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC ); testing_stop( &test_data, flops_zgemm( M, N, K ) ); #else testing_start( &test_data ); - hres = CHAMELEON_zgemm( transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ); + switch ( api ) { + case 1: + hres = CHAMELEON_zgemm( transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ); + break; + case 2: + CHAMELEON_cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC ); + break; + default: + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, + "SKIPPED: This function can only be used with the option --api 1 or --api 2.\n" ); + } + return -1; + } test_data.hres = hres; testing_stop( &test_data, flops_zgemm( M, N, K ) ); -- GitLab