diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 879a3c3fbd919d27e0b9668b4dbc0e6b84ff3334..34a5bc904d05debd04053b4452611b87df3d94be 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -81,6 +81,7 @@ subs = { ('', 'ORGLQ', 'ORGLQ', 'UNGLQ', 'UNGLQ' ), ('', 'SYEV', 'SYEV', 'HEEV', 'HEEV' ), ('', 'SYG', 'SYG', 'HEG', 'HEG' ), + ('', 'const float ', 'const double ', 'const void \*', 'const void \*' ), ] + _extra_blas + _extra_BLAS diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 9c0ceed426a0bbed8395a24af6772b44310fd22d..cda965e6b024e17120369f9b52350e6daf030dc4 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -264,10 +264,22 @@ precisions_rules_py(CONTROL_SRCS_GENERATED "${ZSRC}" PRECISIONS "${CHAMELEON_PRECISION}" TARGETDIR "control" ) +set(LAPACK_API_SRCS_GENERATED "") +set(ZSRC + lapack_api/lapack_zgemm.c + ) +precisions_rules_py(LAPACK_API_SRCS_GENERATED "${ZSRC}" + PRECISIONS "${CHAMELEON_PRECISION}") +set(LAPACK_API_SRCS + ${LAPACK_API_SRCS_GENERATED} + lapack_api/lapack_api_common.c + ) + set(CHAMELEON_SRCS ${CHAMELEON_CONTROL} ${CHAMELEON_SRCS_GENERATED} ${CONTROL_SRCS_GENERATED} + ${LAPACK_API_SRCS} ) # Generate the chameleon fortran sources for all possible precisions @@ -309,7 +321,7 @@ add_dependencies(chameleon ) target_include_directories(chameleon PUBLIC - $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> + $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/lapack_api> $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/include> $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/include> $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/control> @@ -329,7 +341,9 @@ elseif(CHAMELEON_SCHED_QUARK) elseif(CHAMELEON_SCHED_OPENMP) target_link_libraries(chameleon PUBLIC chameleon_openmp) endif() -if (NOT CHAMELEON_SIMULATION) +if (CHAMELEON_SIMULATION) + target_include_directories(chameleon PUBLIC $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/coreblas/include>) +else() target_link_libraries(chameleon PUBLIC coreblas) endif() target_link_libraries(chameleon PUBLIC hqr) diff --git a/compute/lapack_api/lapack_api_common.c b/compute/lapack_api/lapack_api_common.c new file mode 100644 index 0000000000000000000000000000000000000000..5e98934bf969f0b4af014064f307b19eef355403 --- /dev/null +++ b/compute/lapack_api/lapack_api_common.c @@ -0,0 +1,39 @@ +/** + * + * @file lapack_api_common.c + * + * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon blas/lapack and cblas/lapack api common functions + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-22 + * + */ +#include "lapack_api_common.h" + +/** + * @brief Convert the input char BLAS trans parameter to a compatible parameter + * for the Cblas API. + * @param[in] trans The input char BLAS trans parameter + * @return The CBLAS equivalent parameter (CblasNoTrans, CblasTrans or + * CblasConjTrans). + */ +int chameleon_blastocblas_trans(const char* trans) +{ + if ( (*trans == 'N') || (*trans == 'n') ) { + return CblasNoTrans; + } else if ( (*trans == 'T') || (*trans == 't') ) { + return CblasTrans; + } else if ( (*trans == 'C') || (*trans == 'c') ) { + return CblasConjTrans; + } else { + chameleon_error("chameleon_blastocblas_trans", "illegal value of BLAS transpose parameter"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } +} diff --git a/compute/lapack_api/lapack_api_common.h b/compute/lapack_api/lapack_api_common.h new file mode 100644 index 0000000000000000000000000000000000000000..25eb831a0f1bc0ed0d8facf5f0569b8eb72b94df --- /dev/null +++ b/compute/lapack_api/lapack_api_common.h @@ -0,0 +1,34 @@ +/** + * + * @file lapack_api_common.h + * + * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. + * All rights reserved. + * + *** + * + * @brief Chameleon blas/lapack and cblas/lapack api common internal functions + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-22 + * + */ +#ifndef _lapack_api_common_h_ +#define _lapack_api_common_h_ + +#include "chameleon.h" +#include "chameleon/mangling.h" +#include "control/auxiliary.h" + +/** + * + * @defgroup CHAMELEON_LAPACK_API + * @brief Linear algebra routines exposed to users. LAPACK matrix data storage + * + */ + +int chameleon_blastocblas_trans(const char* value); + +#endif /* _lapack_api_common_h_ */ diff --git a/compute/lapack_api/lapack_zgemm.c b/compute/lapack_api/lapack_zgemm.c new file mode 100644 index 0000000000000000000000000000000000000000..f36600a4c43b34450b81d123b5cbc80964bda4cc --- /dev/null +++ b/compute/lapack_api/lapack_zgemm.c @@ -0,0 +1,143 @@ +/** + * + * @file lapack_zgemm.c + * + * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon blas and cblas api for gemm + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-22 + * @precisions normal z -> s d c + * + */ + +#include "lapack_api_common.h" + +/* Fortran BLAS interface */ + +#define CHAMELEON_blas_zgemm CHAMELEON_GLOBAL( chameleon_blas_zgemm, CHAMELEON_BLAS_ZGEMM ) +void CHAMELEON_blas_zgemm ( const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const CHAMELEON_Complex64_t* alpha, const CHAMELEON_Complex64_t* a, const int* lda, + const CHAMELEON_Complex64_t* b, const int* ldb, + const CHAMELEON_Complex64_t* beta, CHAMELEON_Complex64_t* c, const int* ldc ) +{ + CHAMELEON_cblas_zgemm( CblasColMajor, + chameleon_blastocblas_trans(transa), + chameleon_blastocblas_trans(transb), + *m, *n, *k, + CBLAS_SADDR(*alpha), a, *lda, + b, *ldb, + CBLAS_SADDR(*beta), c, *ldc ); +} + +/* C CBLAS interface */ + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_LAPACK_API + * + * CHAMELEON_cblas_zgemm - Performs one of the matrix-matrix operations + * + * \f[ C = \alpha [op( A )\times op( B )] + \beta C \f], + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ) + * + * alpha and beta are scalars, and A, B and C are matrices, with op( A ) + * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + * + ******************************************************************************* + * + * @param[in] transA + * Specifies whether the matrix A is transposed, not transposed or conjugate transposed: + * = ChamNoTrans: A is not transposed; + * = ChamTrans: A is transposed; + * = ChamConjTrans: A is conjugate transposed. + * + * @param[in] transB + * Specifies whether the matrix B is transposed, not transposed or conjugate transposed: + * = ChamNoTrans: B is not transposed; + * = ChamTrans: B is transposed; + * = ChamConjTrans: B is conjugate transposed. + * + * @param[in] M + * M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0. + * + * @param[in] N + * N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0. + * + * @param[in] K + * K specifies the number of columns of the matrix op( A ) and the number of rows of + * the matrix op( B ). K >= 0. + * + * @param[in] alpha + * alpha specifies the scalar alpha + * + * @param[in] A + * A is a LDA-by-ka matrix, where ka is K when transA = ChamNoTrans, + * and is M otherwise. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in] B + * B is a LDB-by-kb matrix, where kb is N when transB = ChamNoTrans, + * and is K otherwise. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,N). + * + * @param[in] beta + * beta specifies the scalar beta + * + * @param[in,out] C + * C is a LDC-by-N matrix. + * On exit, the array is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ) + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_cblas_zgemm + * @sa CHAMELEON_cblas_cgemm + * @sa CHAMELEON_cblas_dgemm + * @sa CHAMELEON_cblas_sgemm + * + */ +void CHAMELEON_cblas_zgemm( const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, + const int M, const int N, const int K, + const void *alpha, const CHAMELEON_Complex64_t *A, const int lda, + const CHAMELEON_Complex64_t *B, const int ldb, + const void *beta, CHAMELEON_Complex64_t *C, const int ldc ) +{ + if (Order != CblasColMajor){ + chameleon_error("CHAMELEON_cblas_zgemm", "illegal value of order"); + } + +#if defined(PRECISION_z) || defined(PRECISION_c) + CHAMELEON_Complex64_t alphac = *(CHAMELEON_Complex64_t *)alpha; + CHAMELEON_Complex64_t betac = *(CHAMELEON_Complex64_t *)beta; +#else + CHAMELEON_Complex64_t alphac = alpha; + CHAMELEON_Complex64_t betac = beta; +#endif + + CHAMELEON_zgemm( (cham_trans_t)TransA, (cham_trans_t)TransB, M, N, K, + alphac, (CHAMELEON_Complex64_t *)A, lda, + (CHAMELEON_Complex64_t *)B, ldb, + betac, (CHAMELEON_Complex64_t *)C, ldc ); +} diff --git a/coreblas/compute/core_dzasum.c b/coreblas/compute/core_dzasum.c index 1b9f19d648dc6e307cfd564a32c8f23bd07d6774..9f8ec032637163436ad98ee903c872dcf5805bfd 100644 --- a/coreblas/compute/core_dzasum.c +++ b/coreblas/compute/core_dzasum.c @@ -20,7 +20,6 @@ * @precisions normal z -> c d s * */ -#include "coreblas/cblas.h" #include <math.h> #include "coreblas.h" diff --git a/coreblas/compute/core_zgessm.c b/coreblas/compute/core_zgessm.c index 476701dbca77b58df2a72b25284be9da176f7deb..4e0b1fcb2bc11d798a9c5d543347d01246d21ba1 100644 --- a/coreblas/compute/core_zgessm.c +++ b/coreblas/compute/core_zgessm.c @@ -24,7 +24,6 @@ * @precisions normal z -> c d s * */ -#include "coreblas/cblas.h" #include "coreblas/lapacke.h" #include "coreblas.h" diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c index d97162b398190e7597c765cec9d56fc140c6555b..dce017c09cb88c1683b413f6111ea1205af12ec0 100644 --- a/coreblas/compute/core_zlascal.c +++ b/coreblas/compute/core_zlascal.c @@ -20,7 +20,6 @@ * */ #include "coreblas.h" -#include "coreblas/cblas.h" #include <math.h> /** diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c index d3c750fdbd92953ecf519b6759cfb3d4ac25ab4c..28ee83d22d95a24c2160380cb69d40176f9722d2 100644 --- a/coreblas/compute/core_zpamm.c +++ b/coreblas/compute/core_zpamm.c @@ -23,7 +23,6 @@ * @precisions normal z -> c d s * */ -#include "coreblas/cblas.h" #include "coreblas/lapacke.h" #include "coreblas.h" diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c index 05775accf2e45a8d26485e31a3636a9e456c121d..9787f5172ad3a233bb7e66a4945872964733058c 100644 --- a/coreblas/compute/core_zparfb.c +++ b/coreblas/compute/core_zparfb.c @@ -18,7 +18,6 @@ * @precisions normal z -> c d s * */ -#include "coreblas/cblas.h" #include "coreblas/lapacke.h" #include "coreblas.h" diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c index 7999e6634a74027bca6e2a67f3caf54dc73636d3..8a0b80244e979686e0c2dca7ccf869216b852878 100644 --- a/coreblas/compute/core_zpemv.c +++ b/coreblas/compute/core_zpemv.c @@ -23,7 +23,6 @@ * @precisions normal z -> c d s * */ -#include "coreblas/cblas.h" #include "coreblas/lapacke.h" #include "coreblas.h" diff --git a/coreblas/compute/core_zssssm.c b/coreblas/compute/core_zssssm.c index b579d3f3f94eafc56847aa9179dfca7a90bfbb17..3778f5df0d2f5a70332654e67925d875ac3b9ece 100644 --- a/coreblas/compute/core_zssssm.c +++ b/coreblas/compute/core_zssssm.c @@ -24,7 +24,6 @@ * @precisions normal z -> c d s * */ -#include "coreblas/cblas.h" #include "coreblas.h" /** diff --git a/coreblas/compute/core_ztrasm.c b/coreblas/compute/core_ztrasm.c index 2c89c76152adf7a977584003e49e2aee5459619f..60767885cf1f9fdca0c311da5957882d34ac1a8d 100644 --- a/coreblas/compute/core_ztrasm.c +++ b/coreblas/compute/core_ztrasm.c @@ -20,7 +20,6 @@ * @precisions normal z -> c d s * */ -#include "coreblas/cblas.h" #include <math.h> #include "coreblas.h" diff --git a/coreblas/compute/core_ztstrf.c b/coreblas/compute/core_ztstrf.c index 05c89a0fb4ffe0da4fea262b2d88135283bcf9ec..b3fd8a96c957654457ec6eb2d35dfdb3ed0635dd 100644 --- a/coreblas/compute/core_ztstrf.c +++ b/coreblas/compute/core_ztstrf.c @@ -25,7 +25,6 @@ * */ #include "coreblas.h" -#include "coreblas/cblas.h" #include <math.h> /** diff --git a/coreblas/include/coreblas.h b/coreblas/include/coreblas.h index 50a31094db4cf663eb6d6e4364f9d0b0dbc0631a..ec609fdf3bf840a3998bada0e5afe50e4e4a3bd8 100644 --- a/coreblas/include/coreblas.h +++ b/coreblas/include/coreblas.h @@ -29,14 +29,7 @@ #include <string.h> #include <assert.h> -/** - * CBLAS requires for scalar arguments to be passed - * by address rather than by value - */ -#ifndef CBLAS_SADDR -#define CBLAS_SADDR( _val_ ) &(_val_) -#endif -#include "coreblas/cblas.h" +#include "coreblas/cblas_wrapper.h" /** * CHAMELEON types and constants @@ -68,14 +61,6 @@ END_C_DECLS assert(0); \ } while(0) -/** - * CBlas enum - */ -#define CBLAS_TRANSPOSE enum CBLAS_TRANSPOSE -#define CBLAS_UPLO enum CBLAS_UPLO -#define CBLAS_DIAG enum CBLAS_DIAG -#define CBLAS_SIDE enum CBLAS_SIDE - /** * LAPACK Constants */ diff --git a/coreblas/include/coreblas/cblas_wrapper.h b/coreblas/include/coreblas/cblas_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..f35a7d60dfd526083e7047edb770936584192172 --- /dev/null +++ b/coreblas/include/coreblas/cblas_wrapper.h @@ -0,0 +1,42 @@ +/** + * + * @file cblas_wrapper.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon cblas header wrapper + * + * @version 1.2.0 + * @author Cedric Castagnede + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * + */ +#ifndef _cblas_wrapper_h_ +#define _cblas_wrapper_h_ + +/** + * CBLAS requires for scalar arguments to be passed + * by address rather than by value + */ +#ifndef CBLAS_SADDR +#define CBLAS_SADDR( _val_ ) &(_val_) +#endif +#include "coreblas/cblas.h" + +/** + * CBlas enum + */ +#define CBLAS_ORDER enum CBLAS_ORDER +#define CBLAS_TRANSPOSE enum CBLAS_TRANSPOSE +#define CBLAS_UPLO enum CBLAS_UPLO +#define CBLAS_DIAG enum CBLAS_DIAG +#define CBLAS_SIDE enum CBLAS_SIDE + +#endif /* _cblas_wrapper_h_ */ diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index d76c36523ea2728a24cb26d17bfcc001603c93eb..004ea4ab197088e24513799d18ba89eb22b84602 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -30,10 +30,11 @@ # ---------------------------------------------------------- set(CHAMELEON_HDRS_GENERATED "") set(ZHDR - chameleon/chameleon_z.h - chameleon/chameleon_zc.h - chameleon/tasks_z.h - chameleon/tasks_zc.h + chameleon/chameleon_z.h + chameleon/chameleon_zc.h + chameleon/chameleon_zlapack.h + chameleon/tasks_z.h + chameleon/tasks_zc.h ) precisions_rules_py(CHAMELEON_HDRS_GENERATED "${ZHDR}" diff --git a/include/chameleon.h b/include/chameleon.h index 5aadfb8d4802f7154b783c80db3fb7e2e08e561b..58cce2ed51d08bf8fe11c4d4f079715dc9216b83 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -68,6 +68,11 @@ typedef struct gepdf_info_s { #include "chameleon/chameleon_zc.h" #include "chameleon/chameleon_ds.h" +#include "chameleon/chameleon_zlapack.h" +#include "chameleon/chameleon_clapack.h" +#include "chameleon/chameleon_dlapack.h" +#include "chameleon/chameleon_slapack.h" + BEGIN_C_DECLS /* **************************************************************************** diff --git a/include/chameleon/chameleon_zlapack.h b/include/chameleon/chameleon_zlapack.h new file mode 100644 index 0000000000000000000000000000000000000000..761fed935b7f6e1f064e17293309e8fcd2f3250b --- /dev/null +++ b/include/chameleon/chameleon_zlapack.h @@ -0,0 +1,37 @@ +/** + * + * @file chameleon_zlapack.h + * + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon blas/lapack and cblas/lapack api functions + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-04-26 + * @precisions normal z -> c d s + * + */ +#ifndef _chameleon_zlapack_h_ +#define _chameleon_zlapack_h_ + +#include "coreblas/cblas_wrapper.h" + +BEGIN_C_DECLS + +/** + * Declarations of math functions (LAPACK layout, Cblas/Lapacke interface) - alphabetical order + */ +void CHAMELEON_cblas_zgemm( const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, + const int M, const int N, const int K, + const void *alpha, const CHAMELEON_Complex64_t *A, const int lda, + const CHAMELEON_Complex64_t *B, const int ldb, + const void *beta, CHAMELEON_Complex64_t *C, const int ldc ); + +END_C_DECLS + +#endif /* _chameleon_zlapack_h_ */ diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index 61ae3d8f50059f4df87c04965bfe1e8f9dc8fbd3..e2baddbcd8e31fd6390c7e565616f54e1c5b9488 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -48,7 +48,7 @@ parameter_t parameters[] = { { "async", "Switch to the Async interface", 's', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, { "splitsub", "Split the task submission and execution stages", 'S', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, { "generic", "Switch to the non optimized generic algorithms", -35, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, - { "api", "Select the API to test (0: Descriptors, 1: Standard)", -36, PARAM_OPTION, 1, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "api", "Select the API to test (0: Descriptors, 1: Standard, 2: Lapack)", -36, PARAM_OPTION, 1, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, #endif { NULL, "Machine parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c index 07ef9233c0c94966408795c2d825ddbb5080d18d..e08a076249a9f234f8582331a1e212a31a4338d3 100644 --- a/testing/testing_zgemm.c +++ b/testing/testing_zgemm.c @@ -144,6 +144,7 @@ testing_zgemm_std( run_arg_list_t *args, int check ) int hres = 0; /* Read arguments */ + int api = parameters_getvalue_int( "api" ); int nb = run_arg_get_int( args, "nb", 320 ); cham_trans_t transA = run_arg_get_trans( args, "transA", ChamNoTrans ); cham_trans_t transB = run_arg_get_trans( args, "transB", ChamNoTrans ); @@ -199,12 +200,26 @@ testing_zgemm_std( run_arg_list_t *args, int check ) /* Calculate the product */ #if defined(CHAMELEON_TESTINGS_VENDOR) testing_start( &test_data ); - cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC ); testing_stop( &test_data, flops_zgemm( M, N, K ) ); #else testing_start( &test_data ); - hres = CHAMELEON_zgemm( transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ); + switch ( api ) { + case 1: + hres = CHAMELEON_zgemm( transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ); + break; + case 2: + CHAMELEON_cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC ); + break; + default: + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, + "SKIPPED: This function can only be used with the option --api 1 or --api 2.\n" ); + } + return -1; + } test_data.hres = hres; testing_stop( &test_data, flops_zgemm( M, N, K ) );