diff --git a/CMakeLists.txt b/CMakeLists.txt index f90423ad4ea2e96edb6190820b5c2e5a8c875e2b..44b3fff3f6699f82de1df192d6980aa0f137c7b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -306,6 +306,9 @@ cmake_dependent_option(CHAMELEON_TESTINGS_VENDOR "Generate testings of the blas option(CHAMELEON_KERNELS_MT "Use multithreaded kernels (e.g. intel MKL MT)" OFF) #------------------------------------------------------------------------------ +# Option for the maximum batch size +set(CHAMELEON_BATCH_SIZE 10 CACHE STRING "Maximum size for the batched kernels") + ############################################################################### # Build dependency HQR library # ################################ diff --git a/compute/zgetrf.c b/compute/zgetrf.c index b15e25b114494243b96ffe0ad809824e350167ba..47d98cf7862754bdb691ab326afc7818adf65d3a 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -89,6 +89,14 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) } ws->batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 1 ); + if ( ws->batch_size > CHAMELEON_BATCH_SIZE ) { + chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_GETRF_BATCH_SIZE value\n" ); + ws->batch_size = CHAMELEON_BATCH_SIZE; + } + if ( (ws->batch_size > 1) && (CHAMELEON_Comm_rank() > 1) ) { + chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE is unavailable in distributed, value forced to 1\n" ); + ws->batch_size = 1; + } /* Allocation of U for permutation of the panels */ if ( ws->alg == ChamGetrfNoPivPerColumn ) { diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index c9ddebbfbd6f88e546adf73c1ec7dda84448578e..49885a8993781dfd8e0454862f91792c96e21688 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -79,6 +79,9 @@ /* chameleon compute */ #cmakedefine CHAMELEON_COPY_DIAG +/* Define the maximum batch size for kernels using it */ +#define CHAMELEON_BATCH_SIZE @CHAMELEON_BATCH_SIZE@ + /* chameleon runtime starpu */ #cmakedefine CHAMELEON_ENABLE_PRUNING_STATS