From 1bef67ec6d1a7da4949114480d934d8fad220795 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Mon, 9 Dec 2024 22:59:11 +0100
Subject: [PATCH] getrf: Add a bcast version of the workspace copy and the
 possibility to switch after ringswitch iterations to the ring bcast

---
 compute/pzgetrf.c | 49 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index b4f1de9ff..8c41f81d5 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -518,29 +518,48 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws,
     CHAM_context_t  *chamctxt = chameleon_context_self();
     int m, tempmm, tempkn, q;
     int lookahead = chamctxt->lookahead;
-    int lq        = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 1);
-    int myp       = A->myrank / chameleon_desc_datadist_get_iparam(A, 1);
+    int P         = chameleon_desc_datadist_get_iparam(A, 0);
+    int Q         = chameleon_desc_datadist_get_iparam(A, 1);
+    int lq        = (k % lookahead) * Q;
+    int myp       = A->myrank / Q;
 
     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
 
-    for ( m = k+1; m < A->mt; m++ ) {
-        if ( m % chameleon_desc_datadist_get_iparam(A, 0) != myp ) continue;
-
-        tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-        INSERT_TASK_zlacpy(
-            options,
-            ChamUpperLower, tempmm, tempkn,
-            A( m, k ),
-            Wl( m, ( k % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ) );
+    if ( k >= ws->ringswitch ) {
+        for ( m = k+1; m < A->mt; m++ ) {
+            if ( ( m % P ) != myp ) continue;
 
-        for ( q = 1; q < chameleon_desc_datadist_get_iparam(A, 1); q++ ) {
+            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempmm, tempkn,
-                Wl( m, ( ( k + q - 1 ) % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ),
-                Wl( m, ( ( k + q )     % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ) );
+                A( m, k ),
+                Wl( m, ( k % Q ) + lq ) );
+
+            for ( q = 1; q < Q; q++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempmm, tempkn,
+                    Wl( m, ( ( k + q - 1 ) % Q ) + lq ),
+                    Wl( m, ( ( k + q )     % Q ) + lq ) );
+            }
+            RUNTIME_data_flush( options->sequence, A(m, k) );
+        }
+    }
+    else {
+        for ( m = k+1; m < A->mt; m++ ) {
+            if ( ( m % P ) != myp ) continue;
+
+            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+            for ( q = 0; q < Q; q++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempmm, tempkn,
+                    A( m, k ),
+                    Wl( m, ( ( k + q )% Q ) + lq ) );
+            }
+            RUNTIME_data_flush( options->sequence, A(m, k) );
         }
-        RUNTIME_data_flush( options->sequence, A(m, k) );
     }
 }
 
-- 
GitLab