diff --git a/control/context.c b/control/context.c index 38d8920367593e569a6ed79ab4ca010447f4da1b..91b5434aa2035aac1cdc521aaaa4e86c17bfa084 100644 --- a/control/context.c +++ b/control/context.c @@ -83,6 +83,7 @@ MORSE_context_t *morse_context_create() morse->parallel_enabled = MORSE_FALSE; morse->profiling_enabled = MORSE_FALSE; morse->progress_enabled = MORSE_FALSE; + morse->gemm3m_enabled = MORSE_FALSE; morse->householder = MORSE_FLAT_HOUSEHOLDER; morse->translation = MORSE_OUTOFPLACE; @@ -132,6 +133,7 @@ int morse_context_destroy(){ * @arg MORSE_AUTOTUNING autotuning for tile size and inner block size. * @arg MORSE_PROFILING_MODE activate profiling of kernels * @arg MORSE_PROGRESS activate progress indicator + * @arg MORSE_GEMM3M Use z/cgemm3m for complexe matrix-matrix products * ******************************************************************************* * @@ -166,6 +168,13 @@ int MORSE_Enable(MORSE_enum option) case MORSE_PROGRESS: morse->progress_enabled = MORSE_TRUE; break; + case MORSE_GEMM3M: +#ifdef CBLAS_HAS_ZGEMM3M + morse->gemm3m_enabled = MORSE_TRUE; +#else + morse_error("MORSE_Enable", "cannot enable GEMM3M (not available in cblas)"); +#endif + break; /* case MORSE_PARALLEL: */ /* morse->parallel_enabled = MORSE_TRUE; */ /* break; */ @@ -197,6 +206,7 @@ int MORSE_Enable(MORSE_enum option) * @arg MORSE_AUTOTUNING autotuning for tile size and inner block size. * @arg MORSE_PROFILING_MODE deactivate profiling of kernels * @arg MORSE_PROGRESS deactivate progress indicator + * @arg MORSE_GEMM3M Use z/cgemm3m for complexe matrix-matrix products * ******************************************************************************* * @@ -230,6 +240,9 @@ int MORSE_Disable(MORSE_enum option) case MORSE_PROGRESS: morse->progress_enabled = MORSE_FALSE; break; + case MORSE_GEMM3M: + morse->gemm3m_enabled = MORSE_FALSE; + break; case MORSE_PARALLEL_MODE: morse->parallel_enabled = MORSE_FALSE; break; diff --git a/coreblas/compute/core_zgemm.c b/coreblas/compute/core_zgemm.c index fe98b80a993a666c06b1e29a4d2156da6328df38..b5b7e4d4bc37ac0f8ac8d0203d3a91b2060f5182 100644 --- a/coreblas/compute/core_zgemm.c +++ b/coreblas/compute/core_zgemm.c @@ -42,6 +42,18 @@ void CORE_zgemm(MORSE_enum transA, int transB, const MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC) { +#ifdef CBLAS_HAS_ZGEMM3M + MORSE_context_t *morse = morse_context_self(); + if (morse->gemm3m_enabled) + cblas_zgemm3m( + CblasColMajor, + (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, + M, N, K, + CBLAS_SADDR(alpha), A, LDA, + B, LDB, + CBLAS_SADDR(beta), C, LDC); + else +#endif cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, diff --git a/include/morse_constants.h b/include/morse_constants.h index 39e3c460535c3a6a8a5fab2b9020b181cc9e9ea8..8ffe90dc0868c8d574dd5fde01cc92fe63724fba 100644 --- a/include/morse_constants.h +++ b/include/morse_constants.h @@ -131,6 +131,7 @@ #define MORSE_PARALLEL_MODE 6 #define MORSE_BOUND 7 #define MORSE_PROGRESS 8 +#define MORSE_GEMM3M 9 /** **************************************************************************** * MORSE constants - configuration parameters diff --git a/include/morse_struct.h b/include/morse_struct.h index 2958ce3d021bc68e2b5a91195b1b8dd7580a43b0..397d96e1e332171a8e2eef5eb7eff4073b43ca52 100644 --- a/include/morse_struct.h +++ b/include/morse_struct.h @@ -132,6 +132,7 @@ typedef struct morse_context_s { MORSE_bool parallel_enabled; MORSE_bool profiling_enabled; MORSE_bool progress_enabled; + MORSE_bool gemm3m_enabled; MORSE_enum householder; // "domino" (flat) or tree-based (reduction) Householder MORSE_enum translation; // In place or Out of place layout conversion diff --git a/timing/timing.c b/timing/timing.c index 796441d6bf9adfcedb0f5500a7c891f7df186c5b..1c83dbd161498577226a1ea55e3cc15d544ed800 100644 --- a/timing/timing.c +++ b/timing/timing.c @@ -349,6 +349,7 @@ show_help(char *prog_name) { " --[a]sync Enable/Disable synchronous calls in wrapper function such as POTRI. (default: async)\n" " --[no]check Check result (default: nocheck)\n" " --[no]progress Display progress indicator (default: noprogress)\n" + " --[no]gemm3m Use gemm3m complex method (default: nogemm3m)\n" " --[no]inv Check on inverse (default: noinv)\n" " --[no]warmup Perform a warmup run to pre-load libraries (default: warmup)\n" " --[no]trace Enable/Disable trace generation (default: notrace)\n" @@ -487,6 +488,7 @@ main(int argc, char *argv[]) { iparam[IPARAM_NMPI ] = 1; iparam[IPARAM_P ] = 1; iparam[IPARAM_Q ] = 1; + iparam[IPARAM_GEMM3M ] = 0; iparam[IPARAM_PROGRESS ] = 0; iparam[IPARAM_PROFILE ] = 0; iparam[IPARAM_PRINT_ERRORS ] = 0; @@ -526,6 +528,10 @@ main(int argc, char *argv[]) { iparam[IPARAM_TRACE] = 1; } else if (startswith( argv[i], "--notrace" )) { iparam[IPARAM_TRACE] = 0; + } else if (startswith( argv[i], "--gemm3m" )) { + iparam[IPARAM_GEMM3M] = 1; + } else if (startswith( argv[i], "--nogemm3m" )) { + iparam[IPARAM_GEMM3M] = 0; } else if (startswith( argv[i], "--progress" )) { iparam[IPARAM_PROGRESS] = 1; } else if (startswith( argv[i], "--noprogress" )) { @@ -637,6 +643,9 @@ main(int argc, char *argv[]) { if (iparam[IPARAM_PROGRESS] == 1) MORSE_Enable(MORSE_PROGRESS); + if (iparam[IPARAM_GEMM3M] == 1) + MORSE_Enable(MORSE_GEMM3M); + #if defined(CHAMELEON_USE_MPI) MORSE_Comm_size( &nbnode ); iparam[IPARAM_NMPI] = nbnode; diff --git a/timing/timing.h b/timing/timing.h index 4f8771bb6be45316752cf6649eeb989fb508c380..a1636491c6664b49868265fb622b623b449649a9 100644 --- a/timing/timing.h +++ b/timing/timing.h @@ -48,6 +48,7 @@ enum iparam_timing { IPARAM_Q, /* Parameter for 2D cyclic distribution */ IPARAM_PROGRESS, /* Use a progress indicator during computations */ + IPARAM_GEMM3M, /* Use GEMM3M for complex matrix vector products */ /* Added for StarPU version */ IPARAM_PROFILE, IPARAM_PRINT_ERRORS,