diff --git a/compute/pzlange.c b/compute/pzlange.c
index 2bb14d7a2124b4a8328597063b601adc8d38d716..bb7d6b17636ad02787dd7094f45d8b5bda32054a 100644
--- a/compute/pzlange.c
+++ b/compute/pzlange.c
@@ -72,11 +72,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
             }
 
             if ( m >= P ) {
-                INSERT_TASK_dgeadd(
-                    options,
-                    ChamNoTrans, 1, tempnn, A->nb,
-                    1.0, W( Wcol, m,   n ), 1,
-                    1.0, W( Wcol, m%P, n ), 1 );
+                INSERT_TASK_daxpy(
+                    options, tempnn, 1.,
+                    W( Wcol, m,   n ), 1,
+                    W( Wcol, m%P, n ), 1 );
             }
         }
 
@@ -85,11 +84,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
          *  For each i, W(i, n) = reduce( W(0..P-1, n) )
          */
         for(m = 1; m < P; m++) {
-            INSERT_TASK_dgeadd(
-                options,
-                ChamNoTrans, 1, tempnn, A->nb,
-                1.0, W( Wcol, m, n ), 1,
-                1.0, W( Wcol, 0, n ), 1 );
+            INSERT_TASK_daxpy(
+                options, tempnn, 1.,
+                W( Wcol, m, n ), 1,
+                W( Wcol, 0, n ), 1 );
         }
 
         INSERT_TASK_dlange(
@@ -165,11 +163,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
             }
 
             if ( n >= Q ) {
-                INSERT_TASK_dgeadd(
-                    options,
-                    ChamNoTrans, tempmm, 1, A->mb,
-                    1.0, W( Wcol, m, n  ), tempmm,
-                    1.0, W( Wcol, m, n%Q), tempmm );
+                INSERT_TASK_daxpy(
+                    options, tempmm, 1.,
+                    W( Wcol, m, n   ), 1,
+                    W( Wcol, m, n%Q ), 1 );
             }
         }
 
@@ -178,11 +175,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
          *  For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
          */
         for(n = 1; n < Q; n++) {
-            INSERT_TASK_dgeadd(
-                options,
-                ChamNoTrans, tempmm, 1, A->mb,
-                1.0, W( Wcol, m, n), tempmm,
-                1.0, W( Wcol, m, 0), tempmm );
+            INSERT_TASK_daxpy(
+                options, tempmm, 1.,
+                W( Wcol, m, n ), 1,
+                W( Wcol, m, 0 ), 1 );
         }
 
         INSERT_TASK_dlange(
@@ -407,11 +403,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
     case ChamOneNorm:
         RUNTIME_options_ws_alloc( &options, 1, 0 );
 
-        chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, A->nb, A->nb,
+        chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, 1, A->nb, A->nb,
                              workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q,
                              NULL, NULL, NULL );
         wcol_init = 1;
 
+        /*
+         * Use the global allocator for Welt, otherwise flush may free the data before the result is read.
+         */
         chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1,
                              A->p, worknt, 0, 0, A->p, worknt, A->p, A->q,
                              NULL, NULL, NULL );
@@ -424,7 +423,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
     case ChamInfNorm:
         RUNTIME_options_ws_alloc( &options, A->mb, 0 );
 
-        chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, A->mb, 1, A->mb,
+        chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb,
                              workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q,
                              NULL, NULL, NULL );
         wcol_init = 1;
@@ -522,7 +521,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
     }
     CHAMELEON_Desc_Flush( &Welt, sequence );
     CHAMELEON_Desc_Flush( A, sequence );
-    RUNTIME_sequence_wait(chamctxt, sequence);
+    RUNTIME_sequence_wait( chamctxt, sequence );
 
     *result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ));
 
diff --git a/compute/pzlansy.c b/compute/pzlansy.c
index 5763df96dc4c25fd7bc097699ec354e59b62a28d..6a9f56fb1ec6ef894b8f08fdad3854bd3edb69ad 100644
--- a/compute/pzlansy.c
+++ b/compute/pzlansy.c
@@ -81,11 +81,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
         int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
 
         for(n = Q; n < NT; n++) {
-            INSERT_TASK_dgeadd(
-                options,
-                ChamNoTrans, tempmm, 1, A->nb,
-                1.0, W( Wcol, m, n  ), tempmm,
-                1.0, W( Wcol, m, n%Q), tempmm );
+            INSERT_TASK_daxpy(
+                options, tempmm, 1.,
+                W( Wcol, m, n   ), 1,
+                W( Wcol, m, n%Q ), 1 );
         }
 
         /**
@@ -93,11 +92,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
          *  For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
          */
         for(n = 1; n < Q; n++) {
-            INSERT_TASK_dgeadd(
-                options,
-                ChamNoTrans, tempmm, 1, A->mb,
-                1.0, W( Wcol, m, n), tempmm,
-                1.0, W( Wcol, m, 0), tempmm );
+            INSERT_TASK_daxpy(
+                options, tempmm, 1.,
+                W( Wcol, m, n ), 1,
+                W( Wcol, m, 0 ), 1 );
         }
 
         INSERT_TASK_dlange(
@@ -334,11 +332,14 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
     case ChamInfNorm:
         RUNTIME_options_ws_alloc( &options, 1, 0 );
 
-        chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, A->mb, 1, A->mb,
+        chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble, A->mb, 1, A->mb,
                              workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q,
                              NULL, NULL, NULL );
         wcol_init = 1;
 
+        /*
+         * Use the global allocator for Welt, otherwise flush may free the data before the result is read.
+         */
         chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, 1, 1,
                              workmt, A->q, 0, 0, workmt, A->q, A->p, A->q,
                              NULL, NULL, NULL );
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index 48dbc13eaab3b75b2a75b01c567c68524041680e..02b740b3b397ec8ce8f8ffc4c372a4984c51dd0c 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -466,6 +466,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     RUNTIME_data_flush( sequence, T(k, n) );
                 }
 
+                /* Restore the original location of the tiles */
+                for (m = 0; m < B->mt; m++) {
+                    RUNTIME_data_migrate( sequence, B( m, k ),
+                                          B->get_rankof( B, m, k ) );
+                }
+
                 RUNTIME_iteration_pop(chamctxt);
             }
         }
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index 772bfdf48f4310ef9a96a7de427fdc7c35a09f49..a11c5f247a6ac17d0602a0f985e707d78bc43c71 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -467,6 +467,12 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     RUNTIME_data_flush( sequence, T(n, k) );
                 }
 
+                /* Restore the original location of the tiles */
+                for (m = 0; m < B->mt; m++) {
+                    RUNTIME_data_migrate( sequence, B(m, k),
+                                          B->get_rankof( B, m, k ) );
+                }
+
                 RUNTIME_iteration_pop(chamctxt);
             }
         }
diff --git a/compute/zlaset.c b/compute/zlaset.c
index 2b03272b7499713ffa3e4a5bfb39f825fb818387..7001e66a2a328fbd57839a9c03b0082eaf86ac7f 100644
--- a/compute/zlaset.c
+++ b/compute/zlaset.c
@@ -266,7 +266,7 @@ int CHAMELEON_zlaset_Tile_Async( cham_uplo_t uplo,
         return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
     }
     /* Check input arguments */
-    if (A->nb != A->mb) {
+    if ( (alpha != beta) && (A->nb != A->mb) ) {
         chameleon_error("CHAMELEON_zlaset_Tile_Async", "only square tiles supported");
         return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
     }
diff --git a/control/compute_z.h b/control/compute_z.h
index 3229f1389f86287bdd0fd355f3934c25f67a99ab..3bacf06a19dff7c57d2f315634e9040034c1763b 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -134,7 +134,7 @@ void chameleon_pzungqr_param( int genD, int K, const libhqr_tree_t *qrtree,
 static inline int
 chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int q ) {
     int diag_m = chameleon_min( m, n );
-    return chameleon_desc_init( descA, CHAMELEON_MAT_ALLOC_GLOBAL,
+    return chameleon_desc_init( descA, CHAMELEON_MAT_ALLOC_TILE,
                                 ChamComplexDouble, nb, nb, nb*nb,
                                 diag_m, nb, 0, 0, diag_m, nb, p, q,
                                 chameleon_getaddr_diag,
@@ -145,7 +145,7 @@ chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int
 #define chameleon_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \
     {                                                                   \
         int rc;                                                         \
-        rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_GLOBAL, \
+        rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_TILE, \
                                   ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \
                                   (m), (n), (i), (j), (m), (n), 1, 1,   \
                                   NULL, NULL, NULL );                   \
@@ -174,7 +174,7 @@ chameleon_zlap2tile( CHAM_context_t *chamctxt,
 
     if ( CHAMELEON_TRANSLATION == ChamOutOfPlace ) {
         /* Initialize the tile descriptor */
-        chameleon_desc_init( descAt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, mb, nb, (mb)*(nb),
+        chameleon_desc_init( descAt, CHAMELEON_MAT_ALLOC_TILE, ChamComplexDouble, mb, nb, (mb)*(nb),
                              lm, ln, 0, 0, m, n, 1, 1,
                              chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, NULL );
 
@@ -235,6 +235,7 @@ chameleon_ztile2lap( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t
 static inline void
 chameleon_ztile2lap_cleanup( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t *descAt )
 {
+    (void)chamctxt;
     chameleon_desc_destroy( descAl );
     chameleon_desc_destroy( descAt );
 }
diff --git a/control/descriptor.c b/control/descriptor.c
index c27fe5749f2164bd558fedffc9d9f0ec9208552e..06e52cdec39845cccc8f57af889fd1ee9f3e5c3a 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -226,26 +226,32 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat,
     /* The matrix is alocated tile by tile with out of core */
     desc->ooc = 0;
 
-    // Matrix address
-    if ( mat == CHAMELEON_MAT_ALLOC_GLOBAL ) {
-        rc = chameleon_desc_mat_alloc( desc );
+    switch ( (intptr_t)mat ) {
+    case (intptr_t)CHAMELEON_MAT_ALLOC_TILE:
+        if ( chamctxt->scheduler == RUNTIME_SCHED_STARPU ) {
+            /* Let's use the allocation on the fly as in OOC */
+            desc->get_blkaddr = chameleon_getaddr_null;
+            desc->mat = NULL;
+            break;
+        }
+        /* Otherwise we switch back to the full allocation */
 
-        desc->alloc_mat = 1;
-        desc->use_mat   = 1;
-    }
-    else if ( mat == CHAMELEON_MAT_ALLOC_TILE ) {
-        //chameleon_error( "chameleon_desc_init", "CHAMELEON_MAT_ALLOC_TILE is not available yet" );
-        //desc->mat = NULL;
+    case (intptr_t)CHAMELEON_MAT_ALLOC_GLOBAL:
         rc = chameleon_desc_mat_alloc( desc );
+        desc->alloc_mat = 1;
         desc->use_mat   = 1;
+        break;
 
-        desc->alloc_mat = 1;
-    }
-    else if ( mat == CHAMELEON_MAT_OOC ) {
+    case (intptr_t)CHAMELEON_MAT_OOC:
+        if ( chamctxt->scheduler != RUNTIME_SCHED_STARPU ) {
+            chameleon_error("CHAMELEON_Desc_Create", "CHAMELEON Out-of-Core descriptors are supported only with StarPU");
+            return CHAMELEON_ERR_NOT_SUPPORTED;
+        }
         desc->mat = NULL;
         desc->ooc = 1;
-    }
-    else {
+        break;
+
+    default:
         /* memory of the matrix is handled by users */
         desc->mat     = mat;
         desc->use_mat = 1;
diff --git a/control/workspace.c b/control/workspace.c
index e743e33dbb8d40461a70bb0288d1cb824d656c21..8039447fbd09b0a93610ae1a2344eaf8198ddc1c 100644
--- a/control/workspace.c
+++ b/control/workspace.c
@@ -74,7 +74,8 @@ int chameleon_alloc_ibnb_tile(int M, int N, cham_tasktype_t func, int type, CHAM
     lm = IB * MT;
     ln = NB * NT;
 
-    return CHAMELEON_Desc_Create( desc, NULL, type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q );
+    return CHAMELEON_Desc_Create( desc, CHAMELEON_MAT_ALLOC_TILE, type, IB, NB, IB*NB,
+                                  lm, ln, 0, 0, lm, ln, p, q );
 }
 
 /**
@@ -119,7 +120,8 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc
     /* TODO: Fix the distribution for IPIV */
     *IPIV = (int*)malloc( size );
 
-    return CHAMELEON_Desc_Create( desc, NULL, type, IB, NB, IB*NB, lm, ln, 0, 0, lm, ln, p, q );
+    return CHAMELEON_Desc_Create( desc, CHAMELEON_MAT_ALLOC_TILE, type, IB, NB, IB*NB,
+                                  lm, ln, 0, 0, lm, ln, p, q );
 }
 
 /**
diff --git a/coreblas/compute/core_zgelqt.c b/coreblas/compute/core_zgelqt.c
index cb9f67b494d0d911c3eaedc2340980eb24921eb7..7a2a74ca07a08a234c95db214485f12c46097f42 100644
--- a/coreblas/compute/core_zgelqt.c
+++ b/coreblas/compute/core_zgelqt.c
@@ -67,7 +67,7 @@
  *         The leading dimension of the array A.  LDA >= max(1,M).
  *
  * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
+ *         The IB-by-M triangular factor T of the block reflector.
  *         T is upper triangular by block (economic storage);
  *         The rest of the array is not referenced.
  *
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index ca0ae0e21faba5d75b9bcecc86ee09ab99d78174..4fa07c2b4cb4df1146f85baa3ad7340c7ad58b10 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -483,6 +483,8 @@ INSERT_TASK_ztsmlq( const RUNTIME_option_t *options,
                     const CHAM_desc_t *V, int Vm, int Vn, int ldv,
                     const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
+    (void)m1;
+    (void)n1;
     return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, 0, ib, nb,
                                 V, Vm, Vn, ldv, T, Tm, Tn, ldt,
                                 A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
@@ -497,6 +499,8 @@ INSERT_TASK_ztsmqr( const RUNTIME_option_t *options,
                     const CHAM_desc_t *V, int Vm, int Vn, int ldv,
                     const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
+    (void)m1;
+    (void)n1;
     return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, 0, ib, nb,
                                 V, Vm, Vn, ldv, T, Tm, Tn, ldt,
                                 A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
@@ -511,6 +515,8 @@ INSERT_TASK_zttmlq( const RUNTIME_option_t *options,
                     const CHAM_desc_t *V, int Vm, int Vn, int ldv,
                     const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
+    (void)m1;
+    (void)n1;
     return INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, n2, ib, nb,
                                 V, Vm, Vn, ldv, T, Tm, Tn, ldt,
                                 A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
@@ -525,6 +531,8 @@ INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
                     const CHAM_desc_t *V, int Vm, int Vn, int ldv,
                     const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
+    (void)m1;
+    (void)n1;
     return INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, m2, ib, nb,
                                 V, Vm, Vn, ldv, T, Tm, Tn, ldt,
                                 A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c
index 71a9bddceef10244b194948d3a7c3623168ea03c..3341a8f01532ea77a638799a7557c7414cb1299f 100644
--- a/runtime/openmp/codelets/codelet_zgelqt.c
+++ b/runtime/openmp/codelets/codelet_zgelqt.c
@@ -98,10 +98,13 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
+
+#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
     {
       CHAMELEON_Complex64_t TAU[ws_size];
       CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n );
-      CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work);
+
+      CORE_zlaset( ChamUpperLower, ib, m, 0., 0., ptrT, ldt );
+      CORE_zgelqt( m, n, ib, ptrA, lda, ptrT, ldt, TAU, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c
index a097637736103f1b1e1e90a4f6e72407c5ce68b9..6428375b2e9b2ef6c20e6bbc8f803e986e4a1ef2 100644
--- a/runtime/openmp/codelets/codelet_zgeqrt.c
+++ b/runtime/openmp/codelets/codelet_zgeqrt.c
@@ -99,10 +99,13 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
+
+#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
     {
       CHAMELEON_Complex64_t TAU[ws_size];
       CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n);
-      CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work);
+
+      CORE_zlaset( ChamUpperLower, ib, n, 0., 0., ptrT, ldt );
+      CORE_zgeqrt( m, n, ib, ptrA, lda, ptrT, ldt, TAU, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c
index 1acb66066910c0626cb731b76f5b8987c2beaac6..4bb4f16f030a9f5aa58a4dc9e66f28cefc1b4a39 100644
--- a/runtime/openmp/codelets/codelet_ztplqt.c
+++ b/runtime/openmp/codelets/codelet_ztplqt.c
@@ -31,9 +31,13 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0], ptrT[0])
+
+#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
     {
       CHAMELEON_Complex64_t work[ws_size];
+
+      CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt);
+
       CORE_ztplqt( M, N, L, ib,
                    ptrA, lda, ptrB, ldb, ptrT, ldt, work );
     }
diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c
index 17917cc7b7791955707edfe3f608ea18e1247705..7381f6ebdc7682b4f2fc73c976d5e00f4cf21bf6 100644
--- a/runtime/openmp/codelets/codelet_ztpqrt.c
+++ b/runtime/openmp/codelets/codelet_ztpqrt.c
@@ -30,9 +30,13 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0])
+
+#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
     {
       CHAMELEON_Complex64_t tmp[ws_size];
+
+      CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt);
+
       CORE_ztpqrt( M, N, L, ib,
           ptrA, lda, ptrB, ldb, ptrT, ldt, tmp );
     }
diff --git a/runtime/parsec/codelets/codelet_zgelqt.c b/runtime/parsec/codelets/codelet_zgelqt.c
index 6e159eddcb2ec8e267b3bf5963e66fc8d50ac283..4ef5b5b7a6dbe1f5d385a775e580d6a13199f082 100644
--- a/runtime/parsec/codelets/codelet_zgelqt.c
+++ b/runtime/parsec/codelets/codelet_zgelqt.c
@@ -98,6 +98,7 @@ CORE_zgelqt_parsec( parsec_execution_stream_t *context,
     parsec_dtd_unpack_args(
         this_task, &m, &n, &ib, &A, &lda, &T, &ldt, &TAU, &WORK );
 
+    CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt );
     CORE_zgelqt( m, n, ib, A, lda, T, ldt, TAU, WORK );
 
     (void)context;
diff --git a/runtime/parsec/codelets/codelet_zgeqrt.c b/runtime/parsec/codelets/codelet_zgeqrt.c
index d4e9cc529cfa6bf86c233bcd6b5620109588403b..53ac8ac042386c01281c123e0488819bb299ee90 100644
--- a/runtime/parsec/codelets/codelet_zgeqrt.c
+++ b/runtime/parsec/codelets/codelet_zgeqrt.c
@@ -99,6 +99,7 @@ CORE_zgeqrt_parsec ( parsec_execution_stream_t *context,
     parsec_dtd_unpack_args(
         this_task, &m, &n, &ib, &A, &lda, &T, &ldt, &TAU, &WORK );
 
+    CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt );
     CORE_zgeqrt( m, n, ib, A, lda, T, ldt, TAU, WORK );
 
     (void)context;
diff --git a/runtime/parsec/codelets/codelet_ztplqt.c b/runtime/parsec/codelets/codelet_ztplqt.c
index 3da524a420219a1befb61e46ab679342267061ae..96a2209258ebd44d6513866b6cfeef4508fcbf34 100644
--- a/runtime/parsec/codelets/codelet_ztplqt.c
+++ b/runtime/parsec/codelets/codelet_ztplqt.c
@@ -40,6 +40,7 @@ CORE_ztplqt_parsec( parsec_execution_stream_t *context,
     parsec_dtd_unpack_args(
         this_task, &M, &N, &L, &ib, &A, &lda, &B, &ldb, &T, &ldt, &WORK );
 
+    CORE_zlaset( ChamUpperLower, ib, M, 0., 0., T, ldt );
     CORE_ztplqt( M, N, L, ib,
                  A, lda, B, ldb, T, ldt, WORK );
 
diff --git a/runtime/parsec/codelets/codelet_ztpqrt.c b/runtime/parsec/codelets/codelet_ztpqrt.c
index ace7a3bf9df149ac89d5d34dd80334ce0c75b75d..f2308aa5bd3be488289b770b94bcb720e91a49ef 100644
--- a/runtime/parsec/codelets/codelet_ztpqrt.c
+++ b/runtime/parsec/codelets/codelet_ztpqrt.c
@@ -40,6 +40,7 @@ CORE_ztpqrt_parsec( parsec_execution_stream_t *context,
     parsec_dtd_unpack_args(
         this_task, &M, &N, &L, &ib, &A, &lda, &B, &ldb, &T, &ldt, &WORK );
 
+    CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt );
     CORE_ztpqrt( M, N, L, ib,
                  A, lda, B, ldb, T, ldt, WORK );
 
diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c
index 7b1e5a47df394c9ba38fb2fa811e7d9211a7de7a..240773c983156402febe2242d86d5a0b560bfeee 100644
--- a/runtime/quark/codelets/codelet_zgelqt.c
+++ b/runtime/quark/codelets/codelet_zgelqt.c
@@ -40,6 +40,7 @@ void CORE_zgelqt_quark(Quark *quark)
     CHAMELEON_Complex64_t *WORK;
 
     quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
+    CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt );
     CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK);
 }
 
diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c
index 010a24653b20bf4c17e68b050facc5cd91565cfa..09ed24eef4662df09ea5e8fb59029ed8d8cf46df 100644
--- a/runtime/quark/codelets/codelet_zgeqrt.c
+++ b/runtime/quark/codelets/codelet_zgeqrt.c
@@ -40,6 +40,7 @@ void CORE_zgeqrt_quark(Quark *quark)
     CHAMELEON_Complex64_t *WORK;
 
     quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
+    CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt );
     CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
 }
 
diff --git a/runtime/quark/codelets/codelet_ztplqt.c b/runtime/quark/codelets/codelet_ztplqt.c
index f0e51b3754d6460ea9d3be4a8e9d4f826c8a997f..98b153433cd0382c262247eb2867ffd3229611a6 100644
--- a/runtime/quark/codelets/codelet_ztplqt.c
+++ b/runtime/quark/codelets/codelet_ztplqt.c
@@ -39,6 +39,7 @@ CORE_ztplqt_quark( Quark *quark )
     quark_unpack_args_11( quark, M, N, L, ib,
                           A, lda, B, ldb, T, ldt, WORK );
 
+    CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt );
     CORE_ztplqt( M, N, L, ib,
                  A, lda, B, ldb, T, ldt, WORK );
 }
diff --git a/runtime/quark/codelets/codelet_ztpqrt.c b/runtime/quark/codelets/codelet_ztpqrt.c
index 24ce98e124023f90184379c211200d21693ed503..b508e548c371ce446c30dd6e40ec8ed4f63ed23c 100644
--- a/runtime/quark/codelets/codelet_ztpqrt.c
+++ b/runtime/quark/codelets/codelet_ztpqrt.c
@@ -39,6 +39,7 @@ CORE_ztpqrt_quark( Quark *quark )
     quark_unpack_args_11( quark, M, N, L, ib,
                           A, lda, B, ldb, T, ldt, WORK );
 
+    CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt );
     CORE_ztpqrt( M, N, L, ib,
                  A, lda, B, ldb, T, ldt, WORK );
 }
diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 68d435d03483c2d344fb76b1ebb66e0d2e16112d..8ffad6e1ada200916e25ef6e685898e1c8c53c08 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -26,6 +26,36 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_starpu_ws_t *h_work;
+    int m;
+    int n;
+    int ib;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex64_t *T;
+    int ldt;
+    CHAMELEON_Complex64_t *TAU, *WORK;
+
+    A   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
+
+    WORK = TAU + chameleon_max( m, n );
+    CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt );
+    CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -87,7 +117,6 @@
  *          \retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
                        const CHAM_desc_t *A, int Am, int An, int lda,
@@ -123,33 +152,3 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *TAU, *WORK;
-
-    A   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
-
-    WORK = TAU + chameleon_max( m, n );
-    CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 0719010b62340bb9f42bbbb6998c198cc07b6717..205da5e35aa7be306588b42be3471a9cc302cc6a 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -35,7 +35,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                       cham_trans_t transA, cham_trans_t transB,
                       int m, int n, int k, int nb,
                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                                               const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                                                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldc)
 {
     (void)nb;
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index eaa24263791ddafc5f64be669d4f9676fdc048d5..bee5168f95baef5c9e0b4ea8da3d4adbe6c43625 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -26,6 +26,37 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_starpu_ws_t *h_work;
+    int m;
+    int n;
+    int ib;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex64_t *T;
+    int ldt;
+    CHAMELEON_Complex64_t *TAU, *WORK;
+
+    A   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + n * ib */
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
+
+    WORK = TAU + chameleon_max( m, n );
+
+    CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt );
+    CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -88,7 +119,6 @@
  *          \retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
                        const CHAM_desc_t *A, int Am, int An, int lda,
@@ -124,33 +154,3 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *TAU, *WORK;
-
-    A   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + n * ib */
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
-
-    WORK = TAU + chameleon_max( m, n );
-    CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c
index f689d82bcb332dc40e32d31c2a43d7cf09398eee..9ab611908607b4440277005c057598e35d530825 100644
--- a/runtime/starpu/codelets/codelet_zlange.c
+++ b/runtime/starpu/codelets/codelet_zlange.c
@@ -24,10 +24,10 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zlange(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zlange( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, int M, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)NB;
     struct starpu_codelet *codelet = &cl_zlange;
diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c
index c2f771e69fd77a8b35e5c54c94b565b6a4aa7aba..44615d5c3bef1f0b958de2c89cfbf41181ae9a49 100644
--- a/runtime/starpu/codelets/codelet_ztplqt.c
+++ b/runtime/starpu/codelets/codelet_ztplqt.c
@@ -43,6 +43,7 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
                                 &lda, &ldb, &ldt );
 
+    CORE_zlaset( ChamUpperLower, ib, M, 0., 0., T, ldt );
     CORE_ztplqt( M, N, L, ib,
                  A, lda, B, ldb, T, ldt, WORK );
 }
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
index bfddf9d4b7859da85baf54fe12925fc797d6f5af..6fbd0afe65501a497ceda71c9c6f40444a50369a 100644
--- a/runtime/starpu/codelets/codelet_ztpqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -43,6 +43,7 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib,
                                 &lda, &ldb, &ldt );
 
+    CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt );
     CORE_ztpqrt( M, N, L, ib,
                  A, lda, B, ldb, T, ldt, WORK );
 }
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index 92b63ce46f24f9b33f2499e88f55b218c9f1756e..c8ffd2e6b2cf32005f7838c6d4bee3e6e72d526b 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -238,7 +238,7 @@ void RUNTIME_desc_destroy( CHAM_desc_t *desc )
             for (m = 0; m < lmt; m++)
             {
                 if (*handle != NULL) {
-                    starpu_data_unregister(*handle);
+                    starpu_data_unregister_submit(*handle);
                 }
                 handle++;
             }
diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c
index a7a308326b5787eb76e3c0f16f40ca0e4c0cc44f..8c833bd181864ea62e39f471003dd916aced9e09 100644
--- a/runtime/starpu/control/runtime_options.c
+++ b/runtime/starpu/control/runtime_options.c
@@ -49,9 +49,9 @@ int RUNTIME_options_ws_alloc( RUNTIME_option_t *options, size_t worker_size, siz
     int ret = 0;
     if ( worker_size > 0 ) {
         options->ws_wsize = worker_size;
-        starpu_vector_data_register((starpu_data_handle_t*)(&(options->ws_worker)),
-                                    -1, (uintptr_t)NULL,
-                                    worker_size, sizeof(char));
+        starpu_matrix_data_register( (starpu_data_handle_t*)(&(options->ws_worker)),
+                                     -1, (uintptr_t)NULL,
+                                     worker_size, worker_size, 1, sizeof(char));
     }
     if ( host_size > 0 ) {
         options->ws_hsize = host_size;
diff --git a/testing/testing_zgels.c b/testing/testing_zgels.c
index 9abcde8cf3e5a46b807cb96a8ab68abf19d4aa76..6316ab1f5236c2367ea4613fb5251834ea7cbde9 100644
--- a/testing/testing_zgels.c
+++ b/testing/testing_zgels.c
@@ -103,7 +103,6 @@ int testing_zgels(int argc, char **argv)
     }
 
     CHAMELEON_Alloc_Workspace_zgels(M, N, &T, 1, 1);
-    memset(T->mat, 0, (T->llm*T->lln)*sizeof(CHAMELEON_Complex64_t));
     eps = LAPACKE_dlamch_work('e');
 
     /*----------------------------------------------------------
diff --git a/testing/testing_zgels_hqr.c b/testing/testing_zgels_hqr.c
index 67101b034580137d1a6318f9ffc2e1be03af5d2e..91b6d78d24e48a8c050218e56f0ed0def870d0db 100644
--- a/testing/testing_zgels_hqr.c
+++ b/testing/testing_zgels_hqr.c
@@ -99,8 +99,6 @@ int testing_zgels_hqr(int argc, char **argv)
 
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, 1, 1);
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, 1, 1);
-    memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(CHAMELEON_Complex64_t));
-    memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(CHAMELEON_Complex64_t));
 
     eps = LAPACKE_dlamch_work( 'e' );
 
diff --git a/testing/testing_zgels_systolic.c b/testing/testing_zgels_systolic.c
index 53176ceecb6317b75c8942b30d98f0e5d98c0fd6..7862ee0fd1ae36dcb8044c57ddd0cebb439e2fd0 100644
--- a/testing/testing_zgels_systolic.c
+++ b/testing/testing_zgels_systolic.c
@@ -93,8 +93,6 @@ int testing_zgels_systolic(int argc, char **argv)
 
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, 1, 1);
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, 1, 1);
-    memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(CHAMELEON_Complex64_t));
-    memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(CHAMELEON_Complex64_t));
 
     eps = LAPACKE_dlamch_work( 'e' );
 
diff --git a/timing/time_zgelqf.c b/timing/time_zgelqf.c
index e2c709b7009d9403af0f9ab6c301ebd95621b20d..45c69f04605ac578a9adf044b7d4e091de3b1422 100644
--- a/timing/time_zgelqf.c
+++ b/timing/time_zgelqf.c
@@ -44,7 +44,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels(M, N, &T, P, Q);
-    memset(T->mat, 0, (T->llm*T->lln)*sizeof(ChamComplexDouble));
 
     /* Save AT in lapack layout for check */
     PASTE_CODE_ALLOCATE_COPY( Acpy, check, CHAMELEON_Complex64_t, A, LDA, N );
diff --git a/timing/time_zgelqf_tile.c b/timing/time_zgelqf_tile.c
index f79ee5a857662bfc3e2a5e2863582b4fdf6dd4af..bc4723bafcdf0c1675163d4ceec68c0c6bfbee1e 100644
--- a/timing/time_zgelqf_tile.c
+++ b/timing/time_zgelqf_tile.c
@@ -45,7 +45,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q);
-    memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble));
 
     /* CHAMELEON ZGEQRF */
     START_TIMING();
diff --git a/timing/time_zgels.c b/timing/time_zgels.c
index 77bbbe667091accf09b8c2c9539d76d2439c715d..30a3ad5e1ef20419a21c1a066d5928901b5b8864 100644
--- a/timing/time_zgels.c
+++ b/timing/time_zgels.c
@@ -26,7 +26,7 @@
 #include "timing_zauxiliary.h"
 
 static int
-RunTest(int *iparam, double *dparam, chameleon_time_t *t_) 
+RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 {
     CHAM_desc_t *T;
     PASTE_CODE_IPARAM_LOCALS( iparam );
@@ -47,7 +47,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
     CHAMELEON_zplrnt( M, NRHS, x, LDB, 5673 );
 
     CHAMELEON_Alloc_Workspace_zgels(M, N, &T, P, Q);
-    memset(T->mat, 0, (T->llm*T->lln)*sizeof(ChamComplexDouble));
 
     /* Save A and b  */
     if (check) {
@@ -58,13 +57,13 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
     START_TIMING();
     CHAMELEON_zgels( ChamNoTrans, M, N, NRHS, A, LDA, T, x, LDB );
     STOP_TIMING();
-    
+
     /* Check the solution */
     if (check)
     {
         dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, b, x, LDB,
-                                              &(dparam[IPARAM_ANORM]), 
-                                              &(dparam[IPARAM_BNORM]), 
+                                              &(dparam[IPARAM_ANORM]),
+                                              &(dparam[IPARAM_BNORM]),
                                               &(dparam[IPARAM_XNORM]));
         free(Acpy); free(b);
     }
diff --git a/timing/time_zgels_tile.c b/timing/time_zgels_tile.c
index 6e0d300fa252053914c6ed29d0065fa0b30769e2..0d628287bf3df8a03f4b992913399bc69419b8bd 100644
--- a/timing/time_zgels_tile.c
+++ b/timing/time_zgels_tile.c
@@ -25,7 +25,7 @@
 #include "./timing.c"
 
 static int
-RunTest(int *iparam, double *dparam, chameleon_time_t *t_) 
+RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 {
     CHAM_desc_t *descT;
     PASTE_CODE_IPARAM_LOCALS( iparam );
@@ -46,7 +46,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q);
-    memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble));
 
     /* Save A and B for check */
     if (check == 1){
diff --git a/timing/time_zgeqrf.c b/timing/time_zgeqrf.c
index 70353b2ca1daea834c78b34cd4d80c721ddf83c2..89e3534e40e2a50908d50807b83a39a71855ba57 100644
--- a/timing/time_zgeqrf.c
+++ b/timing/time_zgeqrf.c
@@ -44,7 +44,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels(M, N, &T, P, Q);
-    memset(T->mat, 0, (T->llm*T->lln)*sizeof(ChamComplexDouble));
 
     /* Save AT in lapack layout for check */
     PASTE_CODE_ALLOCATE_COPY( Acpy, check, CHAMELEON_Complex64_t, A, LDA, N );
diff --git a/timing/time_zgeqrf_hqr.c b/timing/time_zgeqrf_hqr.c
index 725597fa0b0256c9dc0071aca22a888adf0ce493..6b4f60459eb30a8f66227f10dc9d6e99ffb709fa 100644
--- a/timing/time_zgeqrf_hqr.c
+++ b/timing/time_zgeqrf_hqr.c
@@ -51,9 +51,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, P, Q);
-    memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(ChamComplexDouble));
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, P, Q);
-    memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(ChamComplexDouble));
 
     /* Save AT in lapack layout for check */
     PASTE_CODE_ALLOCATE_COPY( Acpy, check, CHAMELEON_Complex64_t, A, LDA, N );
diff --git a/timing/time_zgeqrf_hqr_tile.c b/timing/time_zgeqrf_hqr_tile.c
index 2b30953e0062de9537d75423bc0cd32210fbfbed..3af4530fda76f4f84e0b4b47cf9c8694d8beb177 100644
--- a/timing/time_zgeqrf_hqr_tile.c
+++ b/timing/time_zgeqrf_hqr_tile.c
@@ -58,9 +58,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TS, P, Q);
-    memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(ChamComplexDouble));
     CHAMELEON_Alloc_Workspace_zgels(M, N, &TT, P, Q);
-    memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(ChamComplexDouble));
 
     /* Initialize matrix */
     matrix.mt = TS->mt;
diff --git a/timing/time_zgeqrf_tile.c b/timing/time_zgeqrf_tile.c
index b35782a698828c262c27b3630885bec1821880ca..dc257558bca9ee6864e9077f851f33256e5d6c4e 100644
--- a/timing/time_zgeqrf_tile.c
+++ b/timing/time_zgeqrf_tile.c
@@ -45,7 +45,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q);
-    memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble));
 
     /* CHAMELEON ZGEQRF */
     START_TIMING();
diff --git a/timing/time_zgeqrs_tile.c b/timing/time_zgeqrs_tile.c
index 3018a74b2c5a7ab7d1db198bb10a08a4e6c96c12..78b008c1ad6f4b80f434cc588acd05f103e8672c 100644
--- a/timing/time_zgeqrs_tile.c
+++ b/timing/time_zgeqrs_tile.c
@@ -48,7 +48,6 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
 
     /* Allocate Workspace */
     CHAMELEON_Alloc_Workspace_zgels_Tile(M, N, &descT, P, Q);
-    memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble));
 
     /* CHAMELEON ZGEQRF */
     CHAMELEON_zgeqrf_Tile( descA, descT );