From 7982e209431d8220a689d53d8852ed4e6ffe04ee Mon Sep 17 00:00:00 2001
From: Alycia Lisito <alycia.lisito@inria.fr>
Date: Fri, 23 Feb 2024 11:21:14 +0100
Subject: [PATCH] zgetrf: correct filter

---
 compute/pzgetrf.c                              | 10 ++++++----
 control/descriptor_helpers.c                   | 18 ++++++++++++++++++
 include/chameleon/descriptor_helpers.h         |  8 ++++++++
 .../starpu/codelets/codelet_zgetrf_blocked.c   |  6 +++---
 .../starpu/codelets/codelet_zgetrf_percol.c    |  5 ++---
 5 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 83d9accfd..108c4a201 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -131,7 +131,6 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
      * Algorithm per column with pivoting
      */
     for (h=0; h<=minmn; h++){
-
         INSERT_TASK_zgetrf_percol_diag(
             options,
             h, k * A->mb,
@@ -139,8 +138,6 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
             ipiv );
 
         for (m = k+1; m < A->mt; m++) {
-            //tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-
             INSERT_TASK_zgetrf_percol_offdiag(
                 options,
                 h, m * A->mb,
@@ -193,7 +190,6 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
                 ipiv );
 
             for (m = k+1; m < A->mt; m++) {
-
                 INSERT_TASK_zgetrf_blocked_offdiag(
                     options,
                     j, m * A->mb, ws->ib,
@@ -371,7 +367,13 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
         RUNTIME_iteration_push( chamctxt, k );
 
         options.priority = A->nt;
+        /*
+         * Do the panel factorization only if the current proc contributes in the
+         * block column k.
+         */
+        options.forcesub = chameleon_involved_in_panelk_2dbc( A, k );
         chameleon_pzgetrf_panel_facto( ws, A, IPIV, k, &options );
+        options.forcesub = 0;
 
         for (n = k+1; n < A->nt; n++) {
             options.priority = A->nt-n;
diff --git a/control/descriptor_helpers.c b/control/descriptor_helpers.c
index 775748014..9cae18835 100644
--- a/control/descriptor_helpers.c
+++ b/control/descriptor_helpers.c
@@ -82,6 +82,24 @@ int chameleon_getrankof_2d_diag( const CHAM_desc_t *A, int m, int n )
     return (mm % A->p) * A->q + (mm % A->q);
 }
 
+/**
+ * @brief Test if the current MPI process is involved in the panel k for 2DBC distributions.
+ *
+ * @param[in] A
+ *        The matrix descriptor.
+ *
+ * @param[in] k
+ *        The index of the panel to test.
+ *
+ * @return 1 if the current MPI process contributes to the panel k.
+ *         0 if the current MPI process doesn't contribute to the panel k.
+ *
+ */
+int chameleon_involved_in_panelk_2dbc( const CHAM_desc_t *A, int k ) {
+    int myrank = A->myrank;
+    return ( myrank % A->q == k % A->q );
+}
+
 /**
  * @brief Initializes a custom distribution based on an external file.
  *
diff --git a/include/chameleon/descriptor_helpers.h b/include/chameleon/descriptor_helpers.h
index 5716b2582..da79d0486 100644
--- a/include/chameleon/descriptor_helpers.h
+++ b/include/chameleon/descriptor_helpers.h
@@ -57,6 +57,14 @@ int chameleon_getrankof_custom_init   ( custom_dist_t **dist, const char *filena
 int chameleon_getrankof_custom_destroy( custom_dist_t **dist );
 int chameleon_getrankof_custom        ( const CHAM_desc_t *A, int m, int n );
 
+/**
+ * @}
+ * @name Panel involvement functions
+ * @{
+ */
+
+int chameleon_involved_in_panelk_2dbc( const CHAM_desc_t *A, int An );
+
 /**
  * @}
  * @name Block address functions
diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
index 03f4e5553..832132f4f 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
@@ -114,7 +114,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 
     /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_ACCESS_RW( A, Am, An );
     CHAMELEON_END_ACCESS_DECLARATION;
 
     /* Refine name */
@@ -197,7 +197,7 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
 
     /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_ACCESS_RW( A, Am, An );
     CHAMELEON_END_ACCESS_DECLARATION;
 
     /* Refine name */
@@ -275,7 +275,7 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
 
     /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_RW(U, Um, Un);
+    CHAMELEON_ACCESS_RW( U, Um, Un );
     CHAMELEON_END_ACCESS_DECLARATION;
 
     /* Refine name */
diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c
index 4b2de691a..7829c3f3e 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_percol.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c
@@ -92,7 +92,7 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 
     /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_ACCESS_RW( A, Am, An );
     CHAMELEON_END_ACCESS_DECLARATION;
 
     /* Refine name */
@@ -159,10 +159,9 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
     void (*callback)(void*) = options->profiling ? cl_zgetrf_percol_offdiag_callback : NULL;
     char *cl_name = "zgetrf_percol_offdiag";
 
-
     /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_ACCESS_RW( A, Am, An );
     CHAMELEON_END_ACCESS_DECLARATION;
 
     /* Refine name */
-- 
GitLab