From 9541c0e37eda234258d9b96f333ee2368ea4cc0e Mon Sep 17 00:00:00 2001 From: Alycia Lisito <alycia.lisito@inria.fr> Date: Wed, 13 Mar 2024 15:59:30 +0100 Subject: [PATCH] zgetrf batched: Add an a cmake option to set the maximum batch size --- CMakeLists.txt | 3 +++ compute/zgetrf.c | 8 ++++++++ include/chameleon/config.h.in | 3 +++ 3 files changed, 14 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index f90423ad4..44b3fff3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -306,6 +306,9 @@ cmake_dependent_option(CHAMELEON_TESTINGS_VENDOR "Generate testings of the blas option(CHAMELEON_KERNELS_MT "Use multithreaded kernels (e.g. intel MKL MT)" OFF) #------------------------------------------------------------------------------ +# Option for the maximum batch size +set(CHAMELEON_BATCH_SIZE 10 CACHE STRING "Maximum size for the batched kernels") + ############################################################################### # Build dependency HQR library # ################################ diff --git a/compute/zgetrf.c b/compute/zgetrf.c index b15e25b11..47d98cf78 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -89,6 +89,14 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) } ws->batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 1 ); + if ( ws->batch_size > CHAMELEON_BATCH_SIZE ) { + chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_GETRF_BATCH_SIZE value\n" ); + ws->batch_size = CHAMELEON_BATCH_SIZE; + } + if ( (ws->batch_size > 1) && (CHAMELEON_Comm_rank() > 1) ) { + chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE is unavailable in distributed, value forced to 1\n" ); + ws->batch_size = 1; + } /* Allocation of U for permutation of the panels */ if ( ws->alg == ChamGetrfNoPivPerColumn ) { diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index c9ddebbfb..49885a899 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -79,6 +79,9 @@ /* chameleon compute */ #cmakedefine CHAMELEON_COPY_DIAG +/* Define the maximum batch size for kernels using it */ +#define CHAMELEON_BATCH_SIZE @CHAMELEON_BATCH_SIZE@ + /* chameleon runtime starpu */ #cmakedefine CHAMELEON_ENABLE_PRUNING_STATS -- GitLab