diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 64552d74636c4919a6283f4a856a10eb5c2f05f2..99152bc04575e72ab83819df54472161b57406f6 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -118,7 +118,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
                                       RUNTIME_option_t           *options )
 {
     int m, h;
-    int tempkm, tempkn, minmn;
+    int tempkm, tempkn, tempmm, minmn;
 
     tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
@@ -133,14 +133,15 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
     for (h=0; h<=minmn; h++){
         INSERT_TASK_zgetrf_percol_diag(
             options,
-            h, k * A->mb,
+            tempkm, tempkn, h, k * A->mb,
             A(k, k),
             ipiv );
 
         for (m = k+1; m < A->mt; m++) {
+            tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
             INSERT_TASK_zgetrf_percol_offdiag(
                 options,
-                h, m * A->mb,
+                tempmm, tempkn, h, m * A->mb,
                 A(m, k),
                 ipiv );
         }
@@ -164,7 +165,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
                                        RUNTIME_option_t           *options )
 {
     int m, h, b, nbblock;
-    int tempkm, tempkn, minmn;
+    int tempkm, tempkn, tempmm, minmn;
 
     tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
@@ -185,14 +186,15 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
 
             INSERT_TASK_zgetrf_blocked_diag(
                 options,
-                j, k * A->mb, ws->ib,
+                tempkm, tempkn, j, k * A->mb, ws->ib,
                 A(k, k), Up(k, k),
                 ipiv );
 
             for (m = k+1; m < A->mt; m++) {
+                tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
                 INSERT_TASK_zgetrf_blocked_offdiag(
                     options,
-                    j, m * A->mb, ws->ib,
+                    tempmm, tempkn, j, m * A->mb, ws->ib,
                     A(m, k), Up(k, k),
                     ipiv );
             }
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 3d2d6de0dda97e18c0ab12a91dc680412372c610..eb855ec34be8d002a9d99ab63276b32c722edb4b 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -508,23 +508,23 @@ void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
                                            const CHAM_desc_t *U, int Um, int Un );
 
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
-                                     int h, int m0,
+                                     int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ws );
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
-                                        int h, int m0,
+                                        int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ws );
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
-                                      int h, int m0, int ib,
+                                      int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ws );
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
-                                         int h, int m0, int ib,
+                                         int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
                                          CHAM_ipiv_t *ws );
diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
index 8df527007f8d004fdc73019de014e2361b6954a0..2c6daa18d9bda1f7ff433305aa98ad77f648b4b5 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
@@ -29,7 +29,7 @@ CHAMELEON_CL_CB( zgetrf_blocked_trsm,    cti_handle_get_m(task->handles[0]), 0,
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                    h, m0, ib;
+    int                    m, n, h, m0, ib;
     RUNTIME_sequence_t    *sequence;
     RUNTIME_request_t     *request;
     CHAM_tile_t           *tileA;
@@ -40,7 +40,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t *U   = NULL;
     int                    ldu = -1;;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0, &ib,
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib,
                                 &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
@@ -67,7 +67,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
     nextpiv->h        = h;
     nextpiv->has_diag = 1;
 
-    CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0, ib,
+    CORE_zgetrf_panel_diag( m, n, h, m0, ib,
                             CHAM_tile_get_ptr( tileA ), tileA->ld,
                             U, ldu,
                             ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -87,7 +87,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU( zgetrf_blocked_diag, cl_zgetrf_blocked_diag_cpu_func );
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
-                                      int h, int m0, int ib,
+                                      int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ipiv )
@@ -123,6 +123,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &ib,                  sizeof(int),
@@ -146,7 +148,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                    h, m0, ib;
+    int                    m, n, h, m0, ib;
     RUNTIME_sequence_t    *sequence;
     RUNTIME_request_t     *request;
     CHAM_tile_t           *tileA;
@@ -156,7 +158,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t *U   = NULL;
     int                    ldu = -1;;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0, &ib, &sequence, &request );
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib, &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
     nextpiv = (cppi_interface_t*) descr[1];
@@ -169,7 +171,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
 
     nextpiv->h = h; /* Initialize in case it uses a copy */
 
-    CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0, ib,
+    CORE_zgetrf_panel_offdiag( m, n, h, m0, ib,
                                CHAM_tile_get_ptr(tileA), tileA->ld,
                                U, ldu,
                                &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -182,7 +184,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(zgetrf_blocked_offdiag, cl_zgetrf_blocked_offdiag_cpu_func)
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
-                                         int h, int m0, int ib,
+                                         int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
                                          CHAM_ipiv_t *ipiv )
@@ -206,6 +208,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &ib,                  sizeof(int),
diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c
index 104eee81e51ab032f78534e6b37f9b1340c23f96..5d3f83b6ce046a72135c8f513c8cc23822159595 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_percol.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c
@@ -28,7 +28,7 @@ CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                 h, m0;
+    int                 m, n, h, m0;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t  *request;
     CHAM_tile_t        *tileA;
@@ -36,8 +36,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
     cppi_interface_t   *nextpiv;
     cppi_interface_t   *prevpiv;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0,
-                                &sequence, &request );
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
     ipiv    = (int *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -58,7 +57,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
     nextpiv->h        = h;
     nextpiv->has_diag = 1;
 
-    CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0, tileA->n,
+    CORE_zgetrf_panel_diag( m, n, h, m0, tileA->n,
                             CHAM_tile_get_ptr( tileA ), tileA->ld,
                             NULL, -1,
                             ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -66,7 +65,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
     if ( h > 0 ) {
         cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " );
     }
-    if ( h < tileA->n ) {
+    if ( h < n ) {
         cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " );
     }
 }
@@ -78,7 +77,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func );
 
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
-                                     int h, int m0,
+                                     int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ipiv )
 {
@@ -101,6 +100,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t*),
@@ -122,14 +123,14 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                 h, m0;
+    int                 m, n, h, m0;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t  *request;
     CHAM_tile_t        *tileA;
     cppi_interface_t   *nextpiv;
     cppi_interface_t   *prevpiv;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0, &sequence, &request );
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
     nextpiv = (cppi_interface_t*) descr[1];
@@ -137,7 +138,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
 
     nextpiv->h = h; /* Initialize in case it uses a copy */
 
-    CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0, tileA->n,
+    CORE_zgetrf_panel_offdiag( m, n, h, m0, tileA->n,
                                CHAM_tile_get_ptr(tileA), tileA->ld,
                                NULL, -1,
                                &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -150,7 +151,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func)
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
-                                        int h, int m0,
+                                        int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ipiv )
 {
@@ -170,6 +171,8 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t *),