diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index b4f1de9ff1848677a1177a609abe3d1c788ef46e..8c41f81d550fa77e1c5bb0fd6d6dc59af3b0dea4 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -518,29 +518,48 @@ chameleon_pzgetrf_panel_update_ws( struct chameleon_pzgetrf_s *ws,
     CHAM_context_t  *chamctxt = chameleon_context_self();
     int m, tempmm, tempkn, q;
     int lookahead = chamctxt->lookahead;
-    int lq        = (k % lookahead) * chameleon_desc_datadist_get_iparam(A, 1);
-    int myp       = A->myrank / chameleon_desc_datadist_get_iparam(A, 1);
+    int P         = chameleon_desc_datadist_get_iparam(A, 0);
+    int Q         = chameleon_desc_datadist_get_iparam(A, 1);
+    int lq        = (k % lookahead) * Q;
+    int myp       = A->myrank / Q;
 
     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
 
-    for ( m = k+1; m < A->mt; m++ ) {
-        if ( m % chameleon_desc_datadist_get_iparam(A, 0) != myp ) continue;
-
-        tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-        INSERT_TASK_zlacpy(
-            options,
-            ChamUpperLower, tempmm, tempkn,
-            A( m, k ),
-            Wl( m, ( k % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ) );
+    if ( k >= ws->ringswitch ) {
+        for ( m = k+1; m < A->mt; m++ ) {
+            if ( ( m % P ) != myp ) continue;
 
-        for ( q = 1; q < chameleon_desc_datadist_get_iparam(A, 1); q++ ) {
+            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempmm, tempkn,
-                Wl( m, ( ( k + q - 1 ) % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ),
-                Wl( m, ( ( k + q )     % chameleon_desc_datadist_get_iparam(A, 1) ) + lq ) );
+                A( m, k ),
+                Wl( m, ( k % Q ) + lq ) );
+
+            for ( q = 1; q < Q; q++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempmm, tempkn,
+                    Wl( m, ( ( k + q - 1 ) % Q ) + lq ),
+                    Wl( m, ( ( k + q )     % Q ) + lq ) );
+            }
+            RUNTIME_data_flush( options->sequence, A(m, k) );
+        }
+    }
+    else {
+        for ( m = k+1; m < A->mt; m++ ) {
+            if ( ( m % P ) != myp ) continue;
+
+            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+            for ( q = 0; q < Q; q++ ) {
+                INSERT_TASK_zlacpy(
+                    options,
+                    ChamUpperLower, tempmm, tempkn,
+                    A( m, k ),
+                    Wl( m, ( ( k + q )% Q ) + lq ) );
+            }
+            RUNTIME_data_flush( options->sequence, A(m, k) );
         }
-        RUNTIME_data_flush( options->sequence, A(m, k) );
     }
 }
 
diff --git a/compute/zgetrf.c b/compute/zgetrf.c
index 9c3377bda312d3abe14a5d5dce92cbd26e547386..514e89d3e375a38d487efcddf6aee07505660f00 100644
--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -27,6 +27,7 @@
  *
  */
 #include "control/common.h"
+#include <limits.h>
 
 /**
  ********************************************************************************
@@ -103,6 +104,8 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
         ws->batch_size = CHAMELEON_BATCH_SIZE;
     }
 
+    ws->ringswitch = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_RINGSWITCH", INT_MAX );
+
     /* Allocation of U for permutation of the panels */
     if ( ws->alg == ChamGetrfNoPivPerColumn ) {
         chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
diff --git a/control/compute_z.h b/control/compute_z.h
index f1002522d6f430c4cbf58251c39c1604526f4dac..b75c303a1131e5c9f142803e66a227fe0c04de91 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -44,12 +44,13 @@ struct chameleon_pzgemm_s {
  */
 struct chameleon_pzgetrf_s {
     cham_getrf_t   alg;
-    int            ib;         /**< Internal blocking parameter */
-    int            batch_size; /**< Batch size for the panel    */
+    int            ib;         /**< Internal blocking parameter                   */
+    int            batch_size; /**< Batch size for the panel                      */
+    int            ringswitch; /**< Define when to switch to ring bcast           */
     CHAM_desc_t    U;
-    CHAM_desc_t    Up; /**< Workspace used for the panel factorization    */
-    CHAM_desc_t    Wu; /**< Workspace used for the permutation and update */
-    CHAM_desc_t    Wl; /**< Workspace used the update                     */
+    CHAM_desc_t    Up;         /**< Workspace used for the panel factorization    */
+    CHAM_desc_t    Wu;         /**< Workspace used for the permutation and update */
+    CHAM_desc_t    Wl;         /**< Workspace used the update                     */
     int           *proc_involved;
     unsigned int   involved;
     int            np_involved;