diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 6db9a8a40148a67fcaeda74f9a718c949b12e59d..4347f5710f23d5ff450f9eb77bf7f7f5c9f41205 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -150,7 +150,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
         }
 
         /* Reduce globally (between MPI processes) */
-        INSERT_TASK_zipiv_allreduce( A, options, ipiv, ws->proc_involved, k, h, tempkn );
+        INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, h, tempkn, ws );
     }
 
     /* Flush temporary data used for the pivoting */
@@ -196,7 +196,7 @@ chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws,
         }
         INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, k, clargs, ipiv );
 
-        INSERT_TASK_zipiv_allreduce( A, options, ipiv, ws->proc_involved, k, h, tempkn );
+        INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, h, tempkn, ws );
     }
 
     free( clargs );
@@ -250,7 +250,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
 
             assert( j <= minmn );
             /* Reduce globally (between MPI processes) */
-            INSERT_TASK_zipiv_allreduce( A, options, ipiv, ws->proc_involved, k, j, tempkn );
+            INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, j, tempkn, ws );
 
             if ( ( b < (nbblock-1) ) && ( h == hmax-1 ) ) {
                 INSERT_TASK_zgetrf_blocked_trsm(
@@ -312,7 +312,7 @@ chameleon_pzgetrf_panel_facto_blocked_batched( struct chameleon_pzgetrf_s *ws,
 
             assert( j <= minmn );
             /* Reduce globally (between MPI processes) */
-            INSERT_TASK_zipiv_allreduce( A, options, ipiv, ws->proc_involved, k, j, tempkn );
+            INSERT_TASK_zipiv_allreduce( options, A, ipiv, k, j, tempkn, ws );
 
             if ( (b < (nbblock-1)) && (h == hmax-1) ) {
                 INSERT_TASK_zgetrf_blocked_trsm(
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 5f1bbcd322293e3104fca313974096bcf711de71..402c92a3f6d9dcb4ff8e1039b64b62ef051a4a7c 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -575,13 +575,13 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ws );
 
-void INSERT_TASK_zipiv_allreduce( CHAM_desc_t            *A,
-                                  const RUNTIME_option_t *options,
+void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
+                                  CHAM_desc_t            *A,
                                   CHAM_ipiv_t            *ipiv,
-                                  int                    *proc_involved,
                                   int                     k,
                                   int                     h,
-                                  int                     n );
+                                  int                     n,
+                                  void                   *ws );
 
 /**
  ********************************************************************************
diff --git a/runtime/openmp/codelets/codelet_zipiv_allreduce.c b/runtime/openmp/codelets/codelet_zipiv_allreduce.c
index b088283254cd64e1bada1628939436327b8a2789..197842ea3e96fdba1a9e1d67152a8a5b3e6196ea 100644
--- a/runtime/openmp/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/openmp/codelets/codelet_zipiv_allreduce.c
@@ -17,19 +17,19 @@
  */
 #include "chameleon_openmp.h"
 
-void INSERT_TASK_zipiv_allreduce( CHAM_desc_t            *A,
-                                  const RUNTIME_option_t *options,
+void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
+                                  CHAM_desc_t            *A,
                                   CHAM_ipiv_t            *ipiv,
-                                  int                    *proc_involved,
                                   int                     k,
                                   int                     h,
-                                  int                     n )
+                                  int                     n,
+                                  void                   *ws )
 {
-    (void)A;
     (void)options;
+    (void)A;
     (void)ipiv;
-    (void)proc_involved;
     (void)k;
     (void)h;
     (void)n;
+    (void)ws;
 }
diff --git a/runtime/parsec/codelets/codelet_zipiv_allreduce.c b/runtime/parsec/codelets/codelet_zipiv_allreduce.c
index 75e0611647a464cad9c37e59a5619ebefaae19ed..d6bd3f4c06baf9b1c44e4db6971c88c09acd432f 100644
--- a/runtime/parsec/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/parsec/codelets/codelet_zipiv_allreduce.c
@@ -17,19 +17,19 @@
  */
 #include "chameleon_parsec.h"
 
-void INSERT_TASK_zipiv_allreduce( CHAM_desc_t            *A,
-                                  const RUNTIME_option_t *options,
+void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
+                                  CHAM_desc_t            *A,
                                   CHAM_ipiv_t            *ipiv,
-                                  int                    *proc_involved,
                                   int                     k,
                                   int                     h,
-                                  int                     n )
+                                  int                     n,
+                                  void                   *ws )
 {
-    (void)A;
     (void)options;
+    (void)A;
     (void)ipiv;
-    (void)proc_involved;
     (void)k;
     (void)h;
     (void)n;
+    (void)ws;
 }
diff --git a/runtime/quark/codelets/codelet_zipiv_allreduce.c b/runtime/quark/codelets/codelet_zipiv_allreduce.c
index e88269e931f3f210282a1382d44a6ff9516c7453..0186fd142b67d08dcfca01e9b8184b471362ce1c 100644
--- a/runtime/quark/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/quark/codelets/codelet_zipiv_allreduce.c
@@ -17,19 +17,19 @@
  */
 #include "chameleon_quark.h"
 
-void INSERT_TASK_zipiv_allreduce( CHAM_desc_t            *A,
-                                  const RUNTIME_option_t *options,
+void INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
+                                  CHAM_desc_t            *A,
                                   CHAM_ipiv_t            *ipiv,
-                                  int                    *proc_involved,
                                   int                     k,
                                   int                     h,
-                                  int                     n )
+                                  int                     n,
+                                  void                   *ws )
 {
-    (void)A;
     (void)options;
+    (void)A;
     (void)ipiv;
-    (void)proc_involved;
     (void)k;
     (void)h;
     (void)n;
+    (void)ws;
 }
diff --git a/runtime/starpu/codelets/codelet_zipiv_allreduce.c b/runtime/starpu/codelets/codelet_zipiv_allreduce.c
index a81f0d08eef1fb94b6846606b5e63aae64ab075c..48ecdd0c33fa07f9cfd326f775b0f31fb48a67b8 100644
--- a/runtime/starpu/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/starpu/codelets/codelet_zipiv_allreduce.c
@@ -22,18 +22,18 @@
 struct cl_redux_args_t {
     int h;
     int n;
-    int k;
 };
 
-static void cl_zipiv_allreduce_cpu_func( void *descr[], void *cl_arg )
+static void
+zipiv_allreduce_cpu_func( cppi_interface_t *cppi_me,
+                          cppi_interface_t *cppi_src,
+                          int               h,
+                          int               n )
 {
-    struct cl_redux_args_t *clargs      = (struct cl_redux_args_t *) cl_arg;
-    cppi_interface_t       *cppi_me     = ((cppi_interface_t *) descr[0]);
-    cppi_interface_t       *cppi_src    = ((cppi_interface_t *) descr[1]);
-    CHAM_pivot_t           *nextpiv_me  = &(cppi_me->pivot);
-    CHAM_pivot_t           *nextpiv_src = &(cppi_src->pivot);
-    CHAMELEON_Complex64_t  *pivrow_me   = (CHAMELEON_Complex64_t *)(nextpiv_me->pivrow);
-    CHAMELEON_Complex64_t  *pivrow_src  = (CHAMELEON_Complex64_t *)(nextpiv_src->pivrow);
+    CHAM_pivot_t          *nextpiv_me  = &(cppi_me->pivot);
+    CHAM_pivot_t          *nextpiv_src = &(cppi_src->pivot);
+    CHAMELEON_Complex64_t *pivrow_me   = (CHAMELEON_Complex64_t *)(nextpiv_me->pivrow);
+    CHAMELEON_Complex64_t *pivrow_src  = (CHAMELEON_Complex64_t *)(nextpiv_src->pivrow);
 
     cppi_display_dbg( cppi_me,  stderr, "Global redux Inout: ");
     cppi_display_dbg( cppi_src, stderr, "Global redux Input: ");
@@ -43,33 +43,42 @@ static void cl_zipiv_allreduce_cpu_func( void *descr[], void *cl_arg )
     assert( cppi_me->flttype   == cppi_src->flttype   );
     assert( cppi_me->arraysize == cppi_src->arraysize );
 
-    if ( cabs( pivrow_src[ clargs->h ] ) > cabs( pivrow_me[ clargs->h ] ) ) {
+    if ( cabs( pivrow_src[ h ] ) > cabs( pivrow_me[ h ] ) ) {
         nextpiv_me->blkm0  = nextpiv_src->blkm0;
         nextpiv_me->blkidx = nextpiv_src->blkidx;
-        cblas_zcopy( clargs->n, pivrow_src, 1, pivrow_me, 1 );
+        cblas_zcopy( n, pivrow_src, 1, pivrow_me, 1 );
     }
 
     /* Let's copy the diagonal row if needed */
     if ( ( cppi_src->has_diag == 1 ) &&
          ( cppi_me->has_diag  == -1 ) )
     {
-        cblas_zcopy( clargs->n, nextpiv_src->diagrow, 1, nextpiv_me->diagrow, 1 );
-        assert( cppi_src->arraysize == sizeof(CHAMELEON_Complex64_t) * clargs->n );
+        cblas_zcopy( n, nextpiv_src->diagrow, 1, nextpiv_me->diagrow, 1 );
+        assert( cppi_src->arraysize == sizeof(CHAMELEON_Complex64_t) * n );
         cppi_me->has_diag = 1;
     }
 
     cppi_display_dbg( cppi_me,  stderr, "Global redux Inout(After): ");
 }
 
+static void
+cl_zipiv_allreduce_cpu_func( void *descr[], void *cl_arg )
+{
+    struct cl_redux_args_t *clargs   = (struct cl_redux_args_t *) cl_arg;
+    cppi_interface_t       *cppi_me  = ((cppi_interface_t *) descr[0]);
+    cppi_interface_t       *cppi_src = ((cppi_interface_t *) descr[1]);
+    zipiv_allreduce_cpu_func(  cppi_me, cppi_src, clargs->h, clargs->n );
+}
+
 CODELETS_CPU( zipiv_allreduce, cl_zipiv_allreduce_cpu_func )
 
-void
-INSERT_TASK_zipiv_allreduce_send( CHAM_ipiv_t *ipiv,
-                                  int          me,
-                                  int          dst,
-                                  int          k,
-                                  int          h,
-                                  const RUNTIME_option_t *options )
+static void
+INSERT_TASK_zipiv_allreduce_send( const RUNTIME_option_t *options,
+                                  CHAM_ipiv_t            *ipiv,
+                                  int                     me,
+                                  int                     dst,
+                                  int                     k,
+                                  int                     h )
 {
     rt_starpu_insert_task(
         NULL,
@@ -79,20 +88,19 @@ INSERT_TASK_zipiv_allreduce_send( CHAM_ipiv_t *ipiv,
         0 );
 }
 
-void
-INSERT_TASK_zipiv_allreduce_recv( CHAM_ipiv_t *ipiv,
-                                  int          me,
-                                  int          src,
-                                  int          k,
-                                  int          h,
-                                  int          n,
-                                  const RUNTIME_option_t *options )
+static void
+INSERT_TASK_zipiv_allreduce_recv( const RUNTIME_option_t *options,
+                                  CHAM_ipiv_t            *ipiv,
+                                  int                     me,
+                                  int                     src,
+                                  int                     k,
+                                  int                     h,
+                                  int                     n )
 {
     struct cl_redux_args_t *clargs;
-    clargs = malloc( sizeof( struct cl_redux_args_t ) );
+    clargs    = malloc( sizeof( struct cl_redux_args_t ) );
     clargs->h = h;
     clargs->n = n;
-    clargs->k = k;
 
     rt_starpu_insert_task(
         &cl_zipiv_allreduce,
@@ -106,16 +114,17 @@ INSERT_TASK_zipiv_allreduce_recv( CHAM_ipiv_t *ipiv,
     starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( ipiv, src, k, h ) );
 }
 
-void INSERT_TASK_zipiv_allreduce( CHAM_desc_t            *A,
-                                  const RUNTIME_option_t *options,
-                                  CHAM_ipiv_t            *ipiv,
-                                  int                    *proc_involved,
-                                  int                     k,
-                                  int                     h,
-                                  int                     n )
+static void
+zipiv_allreduce_chameleon_starpu_task( const RUNTIME_option_t *options,
+                                       CHAM_desc_t            *A,
+                                       CHAM_ipiv_t            *ipiv,
+                                       int                    *proc_involved,
+                                       int                     k,
+                                       int                     h,
+                                       int                     n )
 {
-    int np_involved   = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k);
-    int np_iter       = np_involved;
+    int np_involved = chameleon_min( chameleon_desc_datadist_get_iparam(A, 0), A->mt - k);
+    int np_iter     = np_involved;
     int p_recv, p_send, me;
     int shift = 1;
 
@@ -140,29 +149,48 @@ void INSERT_TASK_zipiv_allreduce( CHAM_desc_t            *A,
             p_send = proc_involved[ ( me + shift               ) % np_involved ];
             p_recv = proc_involved[ ( me - shift + np_involved ) % np_involved ];
 
-            INSERT_TASK_zipiv_allreduce_send( ipiv, A->myrank, p_send, k, h,    options );
-            INSERT_TASK_zipiv_allreduce_recv( ipiv, A->myrank, p_recv, k, h, n, options );
+            INSERT_TASK_zipiv_allreduce_send( options, ipiv, A->myrank, p_send, k, h    );
+            INSERT_TASK_zipiv_allreduce_recv( options, ipiv, A->myrank, p_recv, k, h, n );
 
             shift   = shift << 1;
             np_iter = chameleon_ceil( np_iter, 2 );
         }
     }
 }
+
+void
+INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
+                             CHAM_desc_t            *A,
+                             CHAM_ipiv_t            *ipiv,
+                             int                     k,
+                             int                     h,
+                             int                     n,
+                             void                   *ws )
+{
+    struct chameleon_pzgetrf_s *tmp = (struct chameleon_pzgetrf_s *)ws;
+    cham_getrf_allreduce_t alg = tmp->alg_allreduce;
+    switch( alg ) {
+    case ChamStarPUTasks:
+    default:
+        zipiv_allreduce_chameleon_starpu_task( options, A, ipiv, tmp->proc_involved, k, h, n );
+    }
+}
 #else
-void INSERT_TASK_zipiv_allreduce( CHAM_desc_t            *A,
-                                  const RUNTIME_option_t *options,
-                                  CHAM_ipiv_t            *ipiv,
-                                  int                    *proc_involved,
-                                  int                     k,
-                                  int                     h,
-                                  int                     n )
+void
+INSERT_TASK_zipiv_allreduce( const RUNTIME_option_t *options,
+                             CHAM_desc_t            *A,
+                             CHAM_ipiv_t            *ipiv,
+                             int                     k,
+                             int                     h,
+                             int                     n,
+                             void                   *ws )
 {
     if ( h > 0 ) {
         starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( ipiv, A->myrank, k, h-1 ) );
     }
 
     (void)options;
-    (void)proc_involved;
+    (void)ws;
     (void)n;
 }
 #endif