Commit 747c7935 authored by Guillaume Sylvand's avatar Guillaume Sylvand

Add possibility to use z/cgemm3m for complex mat-mat products

This routine, available in MKL, does a product in 6n^3 ops instead of 8n^3
but is interesting only for "large enough" matrices (to be tested...)
Potentially, we gain 25 % in all complex computations.
It could be interesting to look for it / implement it in cuda.

!!! Note that the flop counters are not updated         !!!
!!! In C/Z accuracy, most flops counter should be x0.75 !!!

IT is OFF by default
It is activated with MORSE_Enable(MORSE_GEMM3M)
In the timing routines, it is activated with --gemm3m
parent 92a3c4a1
......@@ -83,6 +83,7 @@ MORSE_context_t *morse_context_create()
morse->parallel_enabled = MORSE_FALSE;
morse->profiling_enabled = MORSE_FALSE;
morse->progress_enabled = MORSE_FALSE;
morse->gemm3m_enabled = MORSE_FALSE;
morse->householder = MORSE_FLAT_HOUSEHOLDER;
morse->translation = MORSE_OUTOFPLACE;
......@@ -132,6 +133,7 @@ int morse_context_destroy(){
* @arg MORSE_AUTOTUNING autotuning for tile size and inner block size.
* @arg MORSE_PROFILING_MODE activate profiling of kernels
* @arg MORSE_PROGRESS activate progress indicator
* @arg MORSE_GEMM3M Use z/cgemm3m for complexe matrix-matrix products
*
*******************************************************************************
*
......@@ -166,6 +168,13 @@ int MORSE_Enable(MORSE_enum option)
case MORSE_PROGRESS:
morse->progress_enabled = MORSE_TRUE;
break;
case MORSE_GEMM3M:
#ifdef CBLAS_HAS_ZGEMM3M
morse->gemm3m_enabled = MORSE_TRUE;
#else
morse_error("MORSE_Enable", "cannot enable GEMM3M (not available in cblas)");
#endif
break;
/* case MORSE_PARALLEL: */
/* morse->parallel_enabled = MORSE_TRUE; */
/* break; */
......@@ -197,6 +206,7 @@ int MORSE_Enable(MORSE_enum option)
* @arg MORSE_AUTOTUNING autotuning for tile size and inner block size.
* @arg MORSE_PROFILING_MODE deactivate profiling of kernels
* @arg MORSE_PROGRESS deactivate progress indicator
* @arg MORSE_GEMM3M Use z/cgemm3m for complexe matrix-matrix products
*
*******************************************************************************
*
......@@ -230,6 +240,9 @@ int MORSE_Disable(MORSE_enum option)
case MORSE_PROGRESS:
morse->progress_enabled = MORSE_FALSE;
break;
case MORSE_GEMM3M:
morse->gemm3m_enabled = MORSE_FALSE;
break;
case MORSE_PARALLEL_MODE:
morse->parallel_enabled = MORSE_FALSE;
break;
......
......@@ -42,6 +42,18 @@ void CORE_zgemm(MORSE_enum transA, int transB,
const MORSE_Complex64_t *B, int LDB,
MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC)
{
#ifdef CBLAS_HAS_ZGEMM3M
MORSE_context_t *morse = morse_context_self();
if (morse->gemm3m_enabled)
cblas_zgemm3m(
CblasColMajor,
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
M, N, K,
CBLAS_SADDR(alpha), A, LDA,
B, LDB,
CBLAS_SADDR(beta), C, LDC);
else
#endif
cblas_zgemm(
CblasColMajor,
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
......
......@@ -131,6 +131,7 @@
#define MORSE_PARALLEL_MODE 6
#define MORSE_BOUND 7
#define MORSE_PROGRESS 8
#define MORSE_GEMM3M 9
/** ****************************************************************************
* MORSE constants - configuration parameters
......
......@@ -132,6 +132,7 @@ typedef struct morse_context_s {
MORSE_bool parallel_enabled;
MORSE_bool profiling_enabled;
MORSE_bool progress_enabled;
MORSE_bool gemm3m_enabled;
MORSE_enum householder; // "domino" (flat) or tree-based (reduction) Householder
MORSE_enum translation; // In place or Out of place layout conversion
......
......@@ -349,6 +349,7 @@ show_help(char *prog_name) {
" --[a]sync Enable/Disable synchronous calls in wrapper function such as POTRI. (default: async)\n"
" --[no]check Check result (default: nocheck)\n"
" --[no]progress Display progress indicator (default: noprogress)\n"
" --[no]gemm3m Use gemm3m complex method (default: nogemm3m)\n"
" --[no]inv Check on inverse (default: noinv)\n"
" --[no]warmup Perform a warmup run to pre-load libraries (default: warmup)\n"
" --[no]trace Enable/Disable trace generation (default: notrace)\n"
......@@ -487,6 +488,7 @@ main(int argc, char *argv[]) {
iparam[IPARAM_NMPI ] = 1;
iparam[IPARAM_P ] = 1;
iparam[IPARAM_Q ] = 1;
iparam[IPARAM_GEMM3M ] = 0;
iparam[IPARAM_PROGRESS ] = 0;
iparam[IPARAM_PROFILE ] = 0;
iparam[IPARAM_PRINT_ERRORS ] = 0;
......@@ -526,6 +528,10 @@ main(int argc, char *argv[]) {
iparam[IPARAM_TRACE] = 1;
} else if (startswith( argv[i], "--notrace" )) {
iparam[IPARAM_TRACE] = 0;
} else if (startswith( argv[i], "--gemm3m" )) {
iparam[IPARAM_GEMM3M] = 1;
} else if (startswith( argv[i], "--nogemm3m" )) {
iparam[IPARAM_GEMM3M] = 0;
} else if (startswith( argv[i], "--progress" )) {
iparam[IPARAM_PROGRESS] = 1;
} else if (startswith( argv[i], "--noprogress" )) {
......@@ -637,6 +643,9 @@ main(int argc, char *argv[]) {
if (iparam[IPARAM_PROGRESS] == 1)
MORSE_Enable(MORSE_PROGRESS);
if (iparam[IPARAM_GEMM3M] == 1)
MORSE_Enable(MORSE_GEMM3M);
#if defined(CHAMELEON_USE_MPI)
MORSE_Comm_size( &nbnode );
iparam[IPARAM_NMPI] = nbnode;
......
......@@ -48,6 +48,7 @@ enum iparam_timing {
IPARAM_Q, /* Parameter for 2D cyclic distribution */
IPARAM_PROGRESS, /* Use a progress indicator during computations */
IPARAM_GEMM3M, /* Use GEMM3M for complex matrix vector products */
/* Added for StarPU version */
IPARAM_PROFILE,
IPARAM_PRINT_ERRORS,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment