diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c
index 64552d74636c4919a6283f4a856a10eb5c2f05f2..99152bc04575e72ab83819df54472161b57406f6 100644
--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -118,7 +118,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
                                       RUNTIME_option_t           *options )
 {
     int m, h;
-    int tempkm, tempkn, minmn;
+    int tempkm, tempkn, tempmm, minmn;
 
     tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
@@ -133,14 +133,15 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
     for (h=0; h<=minmn; h++){
         INSERT_TASK_zgetrf_percol_diag(
             options,
-            h, k * A->mb,
+            tempkm, tempkn, h, k * A->mb,
             A(k, k),
             ipiv );
 
         for (m = k+1; m < A->mt; m++) {
+            tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
             INSERT_TASK_zgetrf_percol_offdiag(
                 options,
-                h, m * A->mb,
+                tempmm, tempkn, h, m * A->mb,
                 A(m, k),
                 ipiv );
         }
@@ -164,7 +165,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
                                        RUNTIME_option_t           *options )
 {
     int m, h, b, nbblock;
-    int tempkm, tempkn, minmn;
+    int tempkm, tempkn, tempmm, minmn;
 
     tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
@@ -185,14 +186,15 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
 
             INSERT_TASK_zgetrf_blocked_diag(
                 options,
-                j, k * A->mb, ws->ib,
+                tempkm, tempkn, j, k * A->mb, ws->ib,
                 A(k, k), Up(k, k),
                 ipiv );
 
             for (m = k+1; m < A->mt; m++) {
+                tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
                 INSERT_TASK_zgetrf_blocked_offdiag(
                     options,
-                    j, m * A->mb, ws->ib,
+                    tempmm, tempkn, j, m * A->mb, ws->ib,
                     A(m, k), Up(k, k),
                     ipiv );
             }
diff --git a/control/context.c b/control/context.c
index 18e7a719bed3e5f838c9f897d3355e4eae9cb068..566b5461ba10d68ca1d799515f96c61b7fb529ab 100644
--- a/control/context.c
+++ b/control/context.c
@@ -371,13 +371,8 @@ int CHAMELEON_Set( int param, int value )
                 return CHAMELEON_ERR_ILLEGAL_VALUE;
             }
             if (value > chamctxt->nb) {
-                chameleon_error("CHAMELEON_Set", "inner block larger than tile");
-                return CHAMELEON_ERR_ILLEGAL_VALUE;
+                chameleon_warning("CHAMELEON_Set", "inner block larger than tile");
             }
-            /* if (chamctxt->nb % value != 0) { */
-            /*     chameleon_error("CHAMELEON_Set", "inner block does not divide tile"); */
-            /*     return CHAMELEON_ERR_ILLEGAL_VALUE; */
-            /* } */
             chamctxt->ib = value;
             /* Limit ib to nb */
             chamctxt->ib = chameleon_min( chamctxt->nb, chamctxt->ib );
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 3d2d6de0dda97e18c0ab12a91dc680412372c610..eb855ec34be8d002a9d99ab63276b32c722edb4b 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -508,23 +508,23 @@ void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
                                            const CHAM_desc_t *U, int Um, int Un );
 
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
-                                     int h, int m0,
+                                     int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ws );
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
-                                        int h, int m0,
+                                        int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ws );
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
-                                      int h, int m0, int ib,
+                                      int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ws );
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
-                                         int h, int m0, int ib,
+                                         int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
                                          CHAM_ipiv_t *ws );
diff --git a/runtime/openmp/codelets/codelet_zgetrf_blocked.c b/runtime/openmp/codelets/codelet_zgetrf_blocked.c
index 72f2bc651d4efd70c2ad97098f1de7d50aa36efb..9fc38048ce6351cd56942f0e988119c091090954 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_blocked.c
@@ -21,13 +21,15 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
-                                      int h, int m0, int ib,
+                                      int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)ib;
@@ -41,13 +43,15 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
-                                         int h, int m0, int ib,
+                                         int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
                                          CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)ib;
diff --git a/runtime/openmp/codelets/codelet_zgetrf_percol.c b/runtime/openmp/codelets/codelet_zgetrf_percol.c
index 0310cfa976b7c68bf07db30818c4d730792e9d78..3577c0c8a7801369593d45fa6e9e6d965fe4daf2 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_percol.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_percol.c
@@ -22,12 +22,14 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
-                                     int h, int m0,
+                                     int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)A;
@@ -37,12 +39,14 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
-                                        int h, int m0,
+                                        int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)A;
diff --git a/runtime/parsec/codelets/codelet_zgetrf_blocked.c b/runtime/parsec/codelets/codelet_zgetrf_blocked.c
index fc23c4ac278a77536f470730b36d79ed4d5f66de..db1d66722df93036cf0a8b21cd664bfd906cdb74 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_blocked.c
@@ -21,13 +21,15 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
-                                      int h, int m0, int ib,
+                                      int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)ib;
@@ -41,13 +43,15 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
-                                         int h, int m0, int ib,
+                                         int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
                                          CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)ib;
diff --git a/runtime/parsec/codelets/codelet_zgetrf_percol.c b/runtime/parsec/codelets/codelet_zgetrf_percol.c
index c3366cc9ab3e2553da18103135600d93de50ef26..d151073b520ab6556b0929751b6528e7c8a673f6 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_percol.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_percol.c
@@ -22,12 +22,14 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
-                                     int h, int m0,
+                                     int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)A;
@@ -37,12 +39,14 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
-                                        int h, int m0,
+                                        int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)A;
diff --git a/runtime/quark/codelets/codelet_zgetrf_blocked.c b/runtime/quark/codelets/codelet_zgetrf_blocked.c
index 608a36cb78efaa2ccedee8d291935a05c89113f1..46ba11ba318242440b0d76bead4cf0b0153efe7d 100644
--- a/runtime/quark/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/quark/codelets/codelet_zgetrf_blocked.c
@@ -21,13 +21,15 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
-                                      int h, int m0, int ib,
+                                      int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)ib;
@@ -41,13 +43,15 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
-                                         int h, int m0, int ib,
+                                         int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
                                          CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)ib;
diff --git a/runtime/quark/codelets/codelet_zgetrf_percol.c b/runtime/quark/codelets/codelet_zgetrf_percol.c
index b218e84f6d76905d312e28a1e7d6ad83af213c6c..3ce5d23ae0dc4e0301a9da1484f2e59b77d73e16 100644
--- a/runtime/quark/codelets/codelet_zgetrf_percol.c
+++ b/runtime/quark/codelets/codelet_zgetrf_percol.c
@@ -22,12 +22,14 @@
 #include "chameleon/tasks_z.h"
 
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
-                                     int h, int m0,
+                                     int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)A;
@@ -37,12 +39,14 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
-                                        int h, int m0,
+                                        int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ipiv )
 {
     assert( 0 );
     (void)options;
+    (void)m;
+    (void)n;
     (void)h;
     (void)m0;
     (void)A;
diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
index 8df527007f8d004fdc73019de014e2361b6954a0..2c6daa18d9bda1f7ff433305aa98ad77f648b4b5 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
@@ -29,7 +29,7 @@ CHAMELEON_CL_CB( zgetrf_blocked_trsm,    cti_handle_get_m(task->handles[0]), 0,
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                    h, m0, ib;
+    int                    m, n, h, m0, ib;
     RUNTIME_sequence_t    *sequence;
     RUNTIME_request_t     *request;
     CHAM_tile_t           *tileA;
@@ -40,7 +40,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t *U   = NULL;
     int                    ldu = -1;;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0, &ib,
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib,
                                 &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
@@ -67,7 +67,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
     nextpiv->h        = h;
     nextpiv->has_diag = 1;
 
-    CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0, ib,
+    CORE_zgetrf_panel_diag( m, n, h, m0, ib,
                             CHAM_tile_get_ptr( tileA ), tileA->ld,
                             U, ldu,
                             ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -87,7 +87,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU( zgetrf_blocked_diag, cl_zgetrf_blocked_diag_cpu_func );
 
 void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
-                                      int h, int m0, int ib,
+                                      int m, int n, int h, int m0, int ib,
                                       CHAM_desc_t *A, int Am, int An,
                                       CHAM_desc_t *U, int Um, int Un,
                                       CHAM_ipiv_t *ipiv )
@@ -123,6 +123,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &ib,                  sizeof(int),
@@ -146,7 +148,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                    h, m0, ib;
+    int                    m, n, h, m0, ib;
     RUNTIME_sequence_t    *sequence;
     RUNTIME_request_t     *request;
     CHAM_tile_t           *tileA;
@@ -156,7 +158,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t *U   = NULL;
     int                    ldu = -1;;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0, &ib, &sequence, &request );
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib, &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
     nextpiv = (cppi_interface_t*) descr[1];
@@ -169,7 +171,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
 
     nextpiv->h = h; /* Initialize in case it uses a copy */
 
-    CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0, ib,
+    CORE_zgetrf_panel_offdiag( m, n, h, m0, ib,
                                CHAM_tile_get_ptr(tileA), tileA->ld,
                                U, ldu,
                                &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -182,7 +184,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(zgetrf_blocked_offdiag, cl_zgetrf_blocked_offdiag_cpu_func)
 
 void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
-                                         int h, int m0, int ib,
+                                         int m, int n, int h, int m0, int ib,
                                          CHAM_desc_t *A, int Am, int An,
                                          CHAM_desc_t *U, int Um, int Un,
                                          CHAM_ipiv_t *ipiv )
@@ -206,6 +208,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &ib,                  sizeof(int),
diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c
index 104eee81e51ab032f78534e6b37f9b1340c23f96..5d3f83b6ce046a72135c8f513c8cc23822159595 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_percol.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c
@@ -28,7 +28,7 @@ CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                 h, m0;
+    int                 m, n, h, m0;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t  *request;
     CHAM_tile_t        *tileA;
@@ -36,8 +36,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
     cppi_interface_t   *nextpiv;
     cppi_interface_t   *prevpiv;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0,
-                                &sequence, &request );
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
     ipiv    = (int *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -58,7 +57,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
     nextpiv->h        = h;
     nextpiv->has_diag = 1;
 
-    CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0, tileA->n,
+    CORE_zgetrf_panel_diag( m, n, h, m0, tileA->n,
                             CHAM_tile_get_ptr( tileA ), tileA->ld,
                             NULL, -1,
                             ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -66,7 +65,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
     if ( h > 0 ) {
         cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " );
     }
-    if ( h < tileA->n ) {
+    if ( h < n ) {
         cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " );
     }
 }
@@ -78,7 +77,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func );
 
 void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
-                                     int h, int m0,
+                                     int m, int n, int h, int m0,
                                      CHAM_desc_t *A, int Am, int An,
                                      CHAM_ipiv_t *ipiv )
 {
@@ -101,6 +100,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t*),
@@ -122,14 +123,14 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
 {
-    int                 h, m0;
+    int                 m, n, h, m0;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t  *request;
     CHAM_tile_t        *tileA;
     cppi_interface_t   *nextpiv;
     cppi_interface_t   *prevpiv;
 
-    starpu_codelet_unpack_args( cl_arg, &h, &m0, &sequence, &request );
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
 
     tileA   = cti_interface_get(descr[0]);
     nextpiv = (cppi_interface_t*) descr[1];
@@ -137,7 +138,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
 
     nextpiv->h = h; /* Initialize in case it uses a copy */
 
-    CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0, tileA->n,
+    CORE_zgetrf_panel_offdiag( m, n, h, m0, tileA->n,
                                CHAM_tile_get_ptr(tileA), tileA->ld,
                                NULL, -1,
                                &(nextpiv->pivot), &(prevpiv->pivot) );
@@ -150,7 +151,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func)
 
 void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
-                                        int h, int m0,
+                                        int m, int n, int h, int m0,
                                         CHAM_desc_t *A, int Am, int An,
                                         CHAM_ipiv_t *ipiv )
 {
@@ -170,6 +171,8 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
+        STARPU_VALUE,             &m,                   sizeof(int),
+        STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &h,                   sizeof(int),
         STARPU_VALUE,             &m0,                  sizeof(int),
         STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t *),
diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c
index 634378e8dc41bb41b9a0610fa7642711c9371f10..48be66e17652b487c246c2eec0dd8211d7890b36 100644
--- a/runtime/starpu/control/runtime_descriptor_ipiv.c
+++ b/runtime/starpu/control/runtime_descriptor_ipiv.c
@@ -356,4 +356,6 @@ void RUNTIME_ipiv_gather( const RUNTIME_sequence_t *sequence,
             starpu_data_unregister( ipiv_dst );
         }
     }
+
+    chameleon_starpu_tag_release( tag );
 }
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index db00fe81fec1f17da40a6b0e82509994404ecb8d..b429b5c02563d4d34889b94db36ddc4636663619 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -85,6 +85,20 @@ if (NOT CHAMELEON_SIMULATION)
           add_test( test_${cat}_${prec}${test} ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/${test}.in )
         endforeach()
 
+        if ( CHAMELEON_SCHED_STARPU )
+            add_test( test_${cat}_${prec}getrf_nopivpercol ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 --diag=ChamUnit -f input/getrf_nopiv.in )
+            set_tests_properties( test_${cat}_${prec}getrf_nopivpercol
+                                PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=nopivpercolumn;CHAMELEON_GETRF_BATCH_SIZE=1" )
+
+            add_test( test_${cat}_${prec}getrf_ppivpercol ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf_nopiv.in )
+            set_tests_properties( test_${cat}_${prec}getrf_ppivpercol
+                                PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppivpercolumn;CHAMELEON_GETRF_BATCH_SIZE=1" )
+
+            add_test( test_${cat}_${prec}getrf_ppiv ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf.in )
+            set_tests_properties( test_${cat}_${prec}getrf_ppiv
+                                PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=1" )
+        endif()
+
         list( REMOVE_ITEM TESTSTMP print gepdf_qr )
 
         foreach( test ${TESTSTMP} )
diff --git a/testing/input/getrf.in b/testing/input/getrf.in
new file mode 100644
index 0000000000000000000000000000000000000000..c0c99c52c69b54daa2f915d3fb636ad805b88b84
--- /dev/null
+++ b/testing/input/getrf.in
@@ -0,0 +1,16 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# GETRF
+
+# nb: Tile size
+# m: Number of rows of the matrix A
+# n: Number of columns of the matrix A
+# lda: Leading dimension of matrix A
+
+op = getrf
+nb = 16, 17
+ib = 16, 5
+m = 13, 17, 35
+n = 15, 19, 33
+lda = 41