From 9541c0e37eda234258d9b96f333ee2368ea4cc0e Mon Sep 17 00:00:00 2001
From: Alycia Lisito <alycia.lisito@inria.fr>
Date: Wed, 13 Mar 2024 15:59:30 +0100
Subject: [PATCH] zgetrf batched: Add an a cmake option to set the maximum
 batch size

---
 CMakeLists.txt                | 3 +++
 compute/zgetrf.c              | 8 ++++++++
 include/chameleon/config.h.in | 3 +++
 3 files changed, 14 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f90423ad4..44b3fff3f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -306,6 +306,9 @@ cmake_dependent_option(CHAMELEON_TESTINGS_VENDOR "Generate testings of the blas
 option(CHAMELEON_KERNELS_MT "Use multithreaded kernels (e.g. intel MKL MT)" OFF)
 #------------------------------------------------------------------------------
 
+# Option for the maximum batch size
+set(CHAMELEON_BATCH_SIZE 10 CACHE STRING "Maximum size for the batched kernels")
+
 ###############################################################################
 # Build dependency HQR library #
 ################################
diff --git a/compute/zgetrf.c b/compute/zgetrf.c
index b15e25b11..47d98cf78 100644
--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -89,6 +89,14 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
     }
 
     ws->batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 1 );
+    if ( ws->batch_size > CHAMELEON_BATCH_SIZE ) {
+        chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_GETRF_BATCH_SIZE value\n" );
+        ws->batch_size = CHAMELEON_BATCH_SIZE;
+    }
+    if ( (ws->batch_size > 1) && (CHAMELEON_Comm_rank() > 1) ) {
+        chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE is unavailable in distributed, value forced to 1\n" );
+        ws->batch_size = 1;
+    }
 
     /* Allocation of U for permutation of the panels */
     if ( ws->alg == ChamGetrfNoPivPerColumn ) {
diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in
index c9ddebbfb..49885a899 100644
--- a/include/chameleon/config.h.in
+++ b/include/chameleon/config.h.in
@@ -79,6 +79,9 @@
 /* chameleon compute */
 #cmakedefine CHAMELEON_COPY_DIAG
 
+/* Define the maximum batch size for kernels using it */
+#define CHAMELEON_BATCH_SIZE @CHAMELEON_BATCH_SIZE@
+
 /* chameleon runtime starpu */
 #cmakedefine CHAMELEON_ENABLE_PRUNING_STATS
 
-- 
GitLab