diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 68a77fe561d452f473a2eef963774b4c3675bcc0..4c447adeeb43bac8f9d59709d3afee92efb4fbc0 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -66,8 +66,10 @@ set(ZSRC
+    pzlascal.c
+    zlascal.c
diff --git a/compute/pzlascal.c b/compute/pzlascal.c
new file mode 100644
index 0000000000000000000000000000000000000000..524c113022d68ea916d744087e8d3444ff25d64a
--- /dev/null
+++ b/compute/pzlascal.c
@@ -0,0 +1,103 @@
+ *
+ * @file pzlascal.c
+ *
+ *  MORSE auxiliary routines
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Dalal Sukkari
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "control/common.h"
+#define A(m, n) A,  m,  n
+ *  Parallel scale of a matrix A
+ **/
+void morse_pzlascal(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A,
+                    MORSE_sequence_t *sequence, MORSE_request_t *request)
+    MORSE_context_t *morse;
+    MORSE_option_t options;
+    int tempmm, tempnn, tempmn, tempnm;
+    int m, n;
+    int ldam, ldan;
+    int minmnt = min(A->mt, A->nt);
+    morse = morse_context_self();
+    if (sequence->status != MORSE_SUCCESS)
+        return;
+    RUNTIME_options_init(&options, morse, sequence, request);
+    switch(uplo) {
+    case MorseLower:
+        for (n = 0; n < minmnt; n++) {
+            tempnm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
+            tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+            ldan = BLKLDD(A, n);
+            MORSE_TASK_zlascal(
+                &options,
+                MorseLower, tempnm, tempnn, A->mb,
+                alpha, A(n, n), ldan);
+            for (m = n+1; m < A->mt; m++) {
+                tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb;
+                ldam = BLKLDD(A, m);
+                MORSE_TASK_zlascal(
+                    &options,
+                    MorseUpperLower, tempmm, tempnn, A->mb,
+                    alpha, A(m, n), ldam);
+            }
+        }
+        break;
+    case MorseUpper:
+        for (m = 0; m < minmnt; m++) {
+            tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb;
+            tempmn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
+            ldam = BLKLDD(A, m);
+            MORSE_TASK_zlascal(
+                &options,
+                MorseUpper, tempmm, tempmn, A->mb,
+                alpha, A(m, m), ldam);
+            for (n = m+1; n < A->nt; n++) {
+                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+                MORSE_TASK_zlascal(
+                    &options,
+                    MorseUpperLower, tempmm, tempnn, A->mb,
+                    alpha, A(m, n), ldam);
+            }
+        }
+        break;
+    case MorseUpperLower:
+    default:
+        for (m = 0; m < A->mt; m++) {
+            tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb;
+            ldam = BLKLDD(A, m);
+            for (n = 0; n < A->nt; n++) {
+                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+                MORSE_TASK_zlascal(
+                    &options,
+                    MorseUpperLower, tempmm, tempnn, A->mb,
+                    alpha, A(m, n), ldam);
+            }
+        }
+    }
+    RUNTIME_options_ws_free(&options);
+    RUNTIME_options_finalize(&options, morse);
+    MORSE_TASK_dataflush_all();
diff --git a/compute/zlascal.c b/compute/zlascal.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c7d2f36224eee1e22e46ebad2e2c99606c94080
--- /dev/null
+++ b/compute/zlascal.c
@@ -0,0 +1,281 @@
+ *
+ * @file zlascal.c
+ *
+ *  MORSE computational routines
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Dalal Sukkari
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "control/common.h"
+ *
+ * @ingroup MORSE_Complex64_t
+ *
+ *  MORSE_zlascal - Scales a matrix by the scalar alpha as in
+ *  ScaLAPACK pzlascal().
+ *
+ *    \f[ A = \alpha A \f],
+ *
+ *  alpha is a scalar, and A a general, upper or lower trapezoidal matrix.
+ *
+ *******************************************************************************
+ *
+ * @param[in] uplo
+ *          Specifies the shape of A:
+ *          = MorseUpperLower: A is a general matrix.
+ *          = MorseUpper: A is an upper trapezoidal matrix.
+ *          = MorseLower: A is a lower trapezoidal matrix.
+ *
+ * @param[in] M
+ *          M specifies the number of rows of the matrix A. M >= 0.
+ *
+ * @param[in] N
+ *          N specifies the number of columns of the matrix A. N >= 0.
+ *
+ * @param[in] alpha
+ *          alpha specifies the scalar alpha
+ *
+ * @param[in,out] A
+ *          A is a LDA-by-N matrix.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zlascal_Tile
+ * @sa MORSE_clascal
+ * @sa MORSE_dlascal
+ * @sa MORSE_slascal
+ *
+ ******************************************************************************/
+int MORSE_zlascal(MORSE_enum uplo, int M, int N,
+                   MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA)
+    int NB;
+    int status;
+    MORSE_desc_t descA;
+    MORSE_context_t *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zlascal", "MORSE not initialized");
+    }
+    /* Check input arguments */
+    if (uplo != MorseUpper && uplo != MorseLower && uplo != MorseUpperLower) {
+        morse_error("MORSE_zlascal", "illegal value of uplo");
+        return -1;
+    }
+    if (M < 0) {
+        morse_error("MORSE_zlascal", "illegal value of M");
+        return -2;
+    }
+    if (N < 0) {
+        morse_error("MORSE_zlascal", "illegal value of N");
+        return -3;
+    }
+    if (LDA < max(1, M)) {
+        morse_error("MORSE_zlascal", "illegal value of LDA");
+        return -6;
+    }
+    /* Quick return */
+    if (M == 0 || N == 0 ||
+        (alpha == (MORSE_Complex64_t)1.0))
+        return MORSE_SUCCESS;
+    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
+    status = morse_tune(MORSE_FUNC_ZGEMM, M, N, 0);
+    if (status != MORSE_SUCCESS) {
+        morse_error("MORSE_zlascal", "morse_tune() failed");
+        return status;
+    }
+    /* Set MT & NT & KT */
+    NB = MORSE_NB;
+    morse_sequence_create(morse, &sequence);
+        morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request,
+                             morse_desc_mat_free(&(descA)) );
+/*    } else {*/
+/*        morse_ziplap2tile( descA, A, NB, NB, LDA, N , 0, 0, M, N,*/
+/*                            sequence, &request);*/
+/*    }*/
+    /* Call the tile interface */
+    MORSE_zlascal_Tile_Async(
+        uplo, alpha, &descA, sequence, &request);
+        morse_zooptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request);
+        RUNTIME_sequence_wait(morse, sequence);
+        morse_desc_mat_free(&descA);
+/*    } else {*/
+/*        morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/
+/*        morse_dynamic_sync();*/
+/*    }*/
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+ *
+ * @ingroup MORSE_Complex64_t_Tile
+ *
+ *  MORSE_zlascal_Tile - Scales a matrix by the scalar alpha as in
+ *  ScaLAPACK pzlascal().
+ *
+ *    \f[ A = \alpha A \f],
+ *
+ *  alpha is a scalar, and A a general, upper or lower trapezoidal matrix.
+ *
+ *******************************************************************************
+ *
+ * @param[in] uplo
+ *          Specifies the shape of A:
+ *          = MorseUpperLower: A is a general matrix.
+ *          = MorseUpper: A is an upper trapezoidal matrix.
+ *          = MorseLower: A is a lower trapezoidal matrix.
+ *
+ * @param[in] alpha
+ *          alpha specifies the scalar alpha
+ *
+ * @param[in] A
+ *          A is a LDA-by-N matrix.
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zlascal
+ * @sa MORSE_zlascal_Tile_Async
+ * @sa MORSE_clascal_Tile
+ * @sa MORSE_dlascal_Tile
+ * @sa MORSE_slascal_Tile
+ *
+ ******************************************************************************/
+int MORSE_zlascal_Tile(MORSE_enum uplo,
+                        MORSE_Complex64_t alpha, MORSE_desc_t *A)
+    MORSE_context_t *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
+    int status;
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zlascal_Tile", "MORSE not initialized");
+    }
+    morse_sequence_create(morse, &sequence);
+    MORSE_zlascal_Tile_Async(uplo, alpha, A, sequence, &request);
+    RUNTIME_sequence_wait(morse, sequence);
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+ *
+ * @ingroup MORSE_Complex64_t_Tile_Async
+ *
+ *  MORSE_zlascal_Tile_Async - Scales a matrix by the scalar alpha as in
+ *  ScaLAPACK pzlascal().
+ *  Non-blocking equivalent of MORSE_zlascal_Tile().
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zlascal
+ * @sa MORSE_zlascal_Tile
+ * @sa MORSE_clascal_Tile_Async
+ * @sa MORSE_dlascal_Tile_Async
+ * @sa MORSE_slascal_Tile_Async
+ *
+ ******************************************************************************/
+int MORSE_zlascal_Tile_Async(MORSE_enum uplo,
+                              MORSE_Complex64_t alpha, MORSE_desc_t *A,
+                              MORSE_sequence_t *sequence, MORSE_request_t *request)
+    MORSE_context_t *morse;
+    MORSE_desc_t descA;
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zlascal_Tile_Async", "MORSE not initialized");
+    }
+    if (sequence == NULL) {
+        morse_fatal_error("MORSE_zlascal_Tile_Async", "NULL sequence");
+        return MORSE_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        morse_fatal_error("MORSE_zlascal_Tile_Async", "NULL request");
+        return MORSE_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == MORSE_SUCCESS)
+        request->status = MORSE_SUCCESS;
+    else
+        return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
+    /* Check descriptors for correctness */
+    if (morse_desc_check(A) != MORSE_SUCCESS) {
+        morse_error("MORSE_zlascal_Tile_Async", "invalid first descriptor");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    } else {
+        descA = *A;
+    }
+    /* Check input arguments */
+    if (uplo != MorseUpper && uplo != MorseLower && uplo != MorseUpperLower) {
+        morse_error("MORSE_zlascal", "illegal value of uplo");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    if ( (descA.i%descA.mb != 0) || (descA.j%descA.nb != 0) ) {
+        morse_error("MORSE_zlascal", "start indexes have to be multiple of tile size");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    /* Quick return */
+    if ( (descA.m == 0) || (descA.n == 0) ||
+         (alpha == (MORSE_Complex64_t)1.0) )
+        return MORSE_SUCCESS;
+    morse_pzlascal( uplo, alpha, A, sequence, request);
+    return MORSE_SUCCESS;
diff --git a/control/compute_z.h b/control/compute_z.h
index cf34dcd1afd11c792691da2ce837c1d97daea5e7..78656825adbe77b371ca03fcd30fcf93b3ca3da4 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -118,6 +118,7 @@ void morse_pzlanhe(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *re
 void morse_pzlansy(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request);
+void morse_pzlascal(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzlaset( MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzlaset2(MORSE_enum uplo, MORSE_Complex64_t alpha,                          MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzlaswp(MORSE_desc_t *B, int *IPIV, int inc, MORSE_sequence_t *sequence, MORSE_request_t *request);
diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index 6dc104b74a527d790c76b5f0e9b3bc584cb1d8f3..a0253041cf5b526e957538c3f3c2c4a8a3bb1bb3 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -33,6 +33,7 @@ set(ZSRC
+    core_zlascal.c
diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c
new file mode 100644
index 0000000000000000000000000000000000000000..bc17934d3b06cec9c58fe7dc95456f7d6cd5460c
--- /dev/null
+++ b/coreblas/compute/core_zlascal.c
@@ -0,0 +1,104 @@
+ * @file core_zlascal.c
+ *
+ *  MORSE computational routines
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Dalal Sukkari
+ * @date 2015-11-05
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "coreblas/include/coreblas.h"
+#include "coreblas/include/cblas.h"
+#include <math.h>
+ *******************************************************************************
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ *  CORE_zlascal scales a two-dimensional matrix A. As opposite to
+ *  CORE_zlascl(), no checks is performed to prevent under/overflow. This should
+ *  have been done at higher level.
+ *
+ *******************************************************************************
+ *
+ * @param[in] uplo
+ *          Specifies the shape of A:
+ *          = MorseUpperLower: A is a general matrix.
+ *          = MorseUpper: A is an upper trapezoidal matrix.
+ *          = MorseLower: A is a lower trapezoidal matrix.
+ *
+ * @param[in] m is the number of rows of the matrix A. m >= 0
+ *
+ * @param[in] n is the number of columns of the matrix A. n >= 0
+ *
+ * @param[in] alpha
+ *            The scalar factor.
+ *
+ * @param[in,out] A is the matrix to be multiplied by alpha
+ *
+ * @param[in] lda is the leading dimension of the array A. lda >= max(1,m).
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *
+ ******************************************************************************/
+CORE_zlascal( MORSE_enum uplo, int m, int n,
+              MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int lda )
+    int i;
+    if ( (uplo != MorseUpperLower) &&
+         (uplo != MorseUpper)      &&
+         (uplo != MorseLower))
+    {
+        coreblas_error(1, "illegal value of uplo");
+        return -1;
+    }
+    if (m < 0) {
+        coreblas_error(2, "Illegal value of m");
+        return -2;
+    }
+    if (n < 0) {
+        coreblas_error(3, "Illegal value of n");
+        return -3;
+    }
+    if ( (lda < max(1,m)) && (m > 0) ) {
+        coreblas_error(6, "Illegal value of lda");
+        return -6;
+    }
+    switch ( uplo ) {
+    case MorseUpper:
+        for(i=0; i<n; i++) {
+            cblas_zscal( min( i+1, m ), CBLAS_SADDR(alpha), A+i*lda, 1 );
+        }
+        break;
+    case MorseLower:
+        for(i=0; i<n; i++) {
+            cblas_zscal( max( m, m-i ), CBLAS_SADDR(alpha), A+i*lda, 1 );
+        }
+        break;
+    default:
+        if (m == lda) {
+            cblas_zscal( m*n, CBLAS_SADDR(alpha), A, 1 );
+        }
+        else {
+            for(i=0; i<n; i++) {
+                cblas_zscal( m, CBLAS_SADDR(alpha), A+i*lda, 1 );
+            }
+        }
+    }
+    return MORSE_SUCCESS;
diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c
index aadcbbe5005334a4f987e40bf09ba2cdd3bebb49..47a4f09c6348a51a108253cd0d289a58a0fad0fc 100644
--- a/coreblas/compute/core_ztsmlq.c
+++ b/coreblas/compute/core_ztsmlq.c
@@ -259,10 +259,10 @@ int CORE_ztsmlq(MORSE_enum side, MORSE_enum trans,
             side, trans, MorseForward, MorseRowwise,
             mi, ni, M2, N2, kb, 0,
-            &A1[LDA1*jc+ic], LDA1,
+            A1 + LDA1 * jc + ic, LDA1,
             A2, LDA2,
-            &V[i], LDV,
-            &T[LDT*i], LDT,
+            V + i,       LDV,
+            T + i * LDT, LDT,
             WORK, LDWORK);
     return MORSE_SUCCESS;
diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h
index fd21173de2f4b639dd058e353e01271925de3bad..4c6ee39cae6b975da25162e7edb852db83cb484a 100644
--- a/coreblas/include/coreblas_z.h
+++ b/coreblas/include/coreblas_z.h
@@ -63,6 +63,8 @@ int CORE_zgeadd(MORSE_enum trans, int M, int N,
                 const MORSE_Complex64_t *A, int LDA,
                       MORSE_Complex64_t beta,
                       MORSE_Complex64_t *B, int LDB);
+int CORE_zlascal( MORSE_enum uplo, int m, int n,
+              MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int lda );
 int  CORE_zgelqt(int M, int N, int IB,
                  MORSE_Complex64_t *A, int LDA,
                  MORSE_Complex64_t *T, int LDT,
diff --git a/cudablas/compute/cuda_zgelqt.c b/cudablas/compute/cuda_zgelqt.c
index ff18cc171eba1573d417cedcd925400e719d737a..87307f51076498abd5547c37ad23871adb6c921b 100644
--- a/cudablas/compute/cuda_zgelqt.c
+++ b/cudablas/compute/cuda_zgelqt.c
@@ -26,17 +26,17 @@
 int CUDA_zgelqt(
-        magma_int_t m, magma_int_t n, magma_int_t nb,
-        magmaDoubleComplex *da, magma_int_t ldda,
-        magmaDoubleComplex *v,  magma_int_t ldv,
-        magmaDoubleComplex *dt, magma_int_t lddt,
-        magmaDoubleComplex *t,  magma_int_t ldt,
-        magmaDoubleComplex *dd,
-        magmaDoubleComplex *d,  magma_int_t ldd,
-        magmaDoubleComplex *tau,
-        magmaDoubleComplex *hwork,
-        magmaDoubleComplex *dwork,
-        CUstream stream)
+    magma_int_t m, magma_int_t n, magma_int_t nb,
+    magmaDoubleComplex *da, magma_int_t ldda,
+    magmaDoubleComplex *v,  magma_int_t ldv,
+    magmaDoubleComplex *dt, magma_int_t lddt,
+    magmaDoubleComplex *t,  magma_int_t ldt,
+    magmaDoubleComplex *dd,
+    magmaDoubleComplex *d,  magma_int_t ldd,
+    magmaDoubleComplex *tau,
+    magmaDoubleComplex *hwork,
+    magmaDoubleComplex *dwork,
+    CUstream stream)
 #define da_ref(a_1,a_2) ( da+(a_2)*(ldda) + (a_1))
 #define v_ref(a_1,a_2)  ( v+(a_2)*(ldv) + (a_1))
@@ -47,17 +47,17 @@ int CUDA_zgelqt(
     double _Complex one=1.;
     if (m < 0) {
-    return -1;
+        return -1;
     } else if (n < 0) {
-    return -2;
+        return -2;
     } else if (ldda < max(1,m)) {
-    return -4;
+        return -4;
     k = min(m,n);
     if (k == 0) {
-    hwork[0] = *((magmaDoubleComplex*) &one);
-    return MAGMA_SUCCESS;
+        hwork[0] = *((magmaDoubleComplex*) &one);
+        return MAGMA_SUCCESS;
     /* lower parts of little T must be zero: memset to 0 for simplicity */
@@ -92,7 +92,7 @@ int CUDA_zgelqt(
             magma_queue_sync( stream );
             /* Form the triangular factor of the block reflector on the host
-            H = H'(i+ib-1) . . . H(i+1) H(i) */
+             H = H'(i+ib-1) . . . H(i+1) H(i) */
             CORE_zgelqt(ib, cols, ib,
                         (double _Complex*) v_ref(0,i), ib,
                         (double _Complex*) t_ref(0,0), ib,
@@ -100,7 +100,7 @@ int CUDA_zgelqt(
                         (double _Complex*) hwork);
             /* put 0s in the lower triangular part of a panel (and 1s on the
-              diagonal); copy the lower triangular in d */
+             diagonal); copy the lower triangular in d */
             CORE_zgesplit(MorseRight, MorseUnit, ib, min(ib,cols),
                           (double _Complex*) v_ref(0,i), ib,
                           (double _Complex*) d, ib);
diff --git a/cudablas/compute/cuda_zgeqrt.c b/cudablas/compute/cuda_zgeqrt.c
index 1ba8caddc26adb0be4db9bd7b15365d83f96a876..6215333e63be8308ca7c80d34a2bd71896e08f7e 100644
--- a/cudablas/compute/cuda_zgeqrt.c
+++ b/cudablas/compute/cuda_zgeqrt.c
@@ -26,17 +26,17 @@
 int CUDA_zgeqrt(
-        magma_int_t m, magma_int_t n, magma_int_t nb,
-        magmaDoubleComplex *da, magma_int_t ldda,
-        magmaDoubleComplex *v,  magma_int_t ldv,
-        magmaDoubleComplex *dt, magma_int_t lddt,
-        magmaDoubleComplex *t,  magma_int_t ldt,
-        magmaDoubleComplex *dd,
-        magmaDoubleComplex *d,  magma_int_t ldd,
-        magmaDoubleComplex *tau,
-        magmaDoubleComplex *hwork,
-        magmaDoubleComplex *dwork,
-        CUstream stream)
+    magma_int_t m, magma_int_t n, magma_int_t nb,
+    magmaDoubleComplex *da, magma_int_t ldda,
+    magmaDoubleComplex *v,  magma_int_t ldv,
+    magmaDoubleComplex *dt, magma_int_t lddt,
+    magmaDoubleComplex *t,  magma_int_t ldt,
+    magmaDoubleComplex *dd,
+    magmaDoubleComplex *d,  magma_int_t ldd,
+    magmaDoubleComplex *tau,
+    magmaDoubleComplex *hwork,
+    magmaDoubleComplex *dwork,
+    CUstream stream)
 #define da_ref(a_1,a_2) ( da+(a_2)*(ldda) + (a_1))
 #define v_ref(a_1,a_2)  ( v+(a_2)*(ldv) + (a_1))
@@ -45,7 +45,6 @@ int CUDA_zgeqrt(
     int i, k, ib, old_i, old_ib, rows, cols;
     double _Complex one=1.;
-    int i1, i2;
     if (m < 0) {
         return -1;
@@ -62,7 +61,7 @@ int CUDA_zgeqrt(
     /* lower parts of little T must be zero: memset to 0 for simplicity */
-    memset(t_ref(0,0), 0, nb*nb*sizeof(magmaDoubleComplex));
+    memset(t_ref(0,0), 0, nb*n*sizeof(magmaDoubleComplex));
     cudaMemsetAsync(dt_ref(0,0), 0, nb*n*sizeof(magmaDoubleComplex), stream);
     if ( (nb > 1) && (nb < k) ) {
@@ -101,7 +100,7 @@ int CUDA_zgeqrt(
                         (double _Complex*) hwork);
             /* Put 0s in the upper triangular part of a panel (and 1s on the
-               diagonal); copy the upper triangular in d. */
+             diagonal); copy the upper triangular in d. */
             CORE_zgesplit(MorseLeft, MorseUnit, min(rows,ib), ib,
                           (double _Complex*) v_ref(i, 0), ldv,
                           (double _Complex*) d, ib);
diff --git a/cudablas/compute/cuda_ztsmlq.c b/cudablas/compute/cuda_ztsmlq.c
index 0a44d580020444d72d8740f8989aa73f0245ded7..6c525138af35fc407cb20b26da094d53b804a5b5 100644
--- a/cudablas/compute/cuda_ztsmlq.c
+++ b/cudablas/compute/cuda_ztsmlq.c
@@ -55,7 +55,7 @@ int CUDA_ztsmlq(
         NW = IB;
     else {
-        NW = N1;
+        NW = M1;
     if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
diff --git a/include/morse_kernels.h b/include/morse_kernels.h
index 0680893add97d4c6ca45f27cdf902ea3e3cf7cc5..2d29233992f8ea1e80d7f262caf5b89437f1f5a9 100644
--- a/include/morse_kernels.h
+++ b/include/morse_kernels.h
@@ -69,6 +69,7 @@ typedef enum morse_kernel_e {
diff --git a/include/morse_z.h b/include/morse_z.h
index 8335b892513343c1adef902a5c7142122f44b9b4..3d3a2dbf578468d5086cd0895ecd4f91d621c82b 100644
--- a/include/morse_z.h
+++ b/include/morse_z.h
@@ -81,6 +81,7 @@ double MORSE_zlanhe(MORSE_enum norm, MORSE_enum uplo, int N, MORSE_Complex64_t *
 double MORSE_zlansy(MORSE_enum norm, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA);
 double MORSE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, int M, int N, MORSE_Complex64_t *A, int LDA);
+int MORSE_zlascal(MORSE_enum uplo, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA);
 int MORSE_zlaset(MORSE_enum uplo, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_Complex64_t *A, int LDA);
 //int MORSE_zlaswp(int N, MORSE_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX);
 //int MORSE_zlaswpc(int N, MORSE_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX);
@@ -157,6 +158,7 @@ double MORSE_zlanhe_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A);
 double MORSE_zlansy_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A);
 double MORSE_zlantr_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A);
+int MORSE_zlascal_Tile(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A);
 int MORSE_zlaset_Tile(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_desc_t *A);
 //int MORSE_zlaswp_Tile(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX);
 //int MORSE_zlaswpc_Tile(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX);
@@ -230,6 +232,7 @@ int MORSE_zlanhe_Tile_Async(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, d
 int MORSE_zlansy_Tile_Async(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A, double *value, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zlantr_Tile_Async(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A, double *value, MORSE_sequence_t *sequence, MORSE_request_t *request);
+int MORSE_zlascal_Tile_Async(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zlaset_Tile_Async(MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_Complex64_t beta, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 //int MORSE_zlaswp_Tile_Async(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX, MORSE_sequence_t *sequence, MORSE_request_t *request);
 //int MORSE_zlaswpc_Tile_Async(MORSE_desc_t *A, int K1, int K2, int *IPIV, int INCX, MORSE_sequence_t *sequence, MORSE_request_t *request);
diff --git a/include/runtime_z.h b/include/runtime_z.h
index 1d5c16016b888e7726c3f681349a00ce8c1b7100..3ed09528ba9d80d1a42c2b5bc4248da91c7991e7 100644
--- a/include/runtime_z.h
+++ b/include/runtime_z.h
@@ -50,6 +50,11 @@ void MORSE_TASK_zgeadd(const MORSE_option_t *options,
                        MORSE_enum trans, int m, int n, int nb,
                        MORSE_Complex64_t alpha, const MORSE_desc_t *A, int Am, int An, int lda,
                        MORSE_Complex64_t beta,  const MORSE_desc_t *B, int Bm, int Bn, int ldb);
+void MORSE_TASK_zlascal(const MORSE_option_t *options,
+                        MORSE_enum uplo,
+                        int m, int n, int nb,
+                        MORSE_Complex64_t alpha,
+                        const MORSE_desc_t *A, int Am, int An, int lda);
 void MORSE_TASK_zbrdalg(const MORSE_option_t *options,
                         MORSE_enum uplo,
                         int N, int NB,
diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt
index 1a9bef5d8d610a98653af83b4c040d6cdb46b5b3..e67975d6b8b507587fe9978088d4c0d9244ddc05 100644
--- a/runtime/quark/CMakeLists.txt
+++ b/runtime/quark/CMakeLists.txt
@@ -10,8 +10,8 @@
 #  @file CMakeLists.txt
-#  @project MORSE
-#  MORSE is a software package provided by:
+#  @project CHAMELEON
+#  CHAMELEON is a software package provided by:
 #     Inria Bordeaux - Sud-Ouest,
 #     Univ. of Tennessee,
 #     King Abdullah Univesity of Science and Technology
@@ -22,6 +22,7 @@
 #  @author Cedric Castagnede
 #  @author Emmanuel Agullo
 #  @author Mathieu Faverge
+#  @author Florent Pruvost
 #  @date 13-07-2012
@@ -107,6 +108,7 @@ set(ZSRC
     # LAPACK
+    codelets/codelet_zlascal.c
diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c
index 1d950a22e1905703fd06efc7ce5aefc2afcfbf7a..875471e893bde9d5784a203c6816e947ee89a426 100644
--- a/runtime/quark/codelets/codelet_zaxpy.c
+++ b/runtime/quark/codelets/codelet_zaxpy.c
@@ -31,8 +31,8 @@ void MORSE_TASK_zaxpy(const MORSE_option_t *options,
                       const MORSE_desc_t *A, int Am, int An, int incA,
                       const MORSE_desc_t *B, int Bm, int Bn, int incB)
-	quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
     QUARK_Insert_Task(opt->quark, CORE_zaxpy_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                        &M,         VALUE,
         sizeof(MORSE_Complex64_t),          alpha,      VALUE,
diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c
index 3fb14a416ad1efb7df68fcb26204013ceed4ab81..a0cc3b1f88ceb70d2c13dea1bd613d1384a742c0 100644
--- a/runtime/quark/codelets/codelet_zgelqt.c
+++ b/runtime/quark/codelets/codelet_zgelqt.c
@@ -31,7 +31,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c
index 0d817631ffe3acebf408cce9064f06e260a1b059..d1906b28da022d7e311541ae8d25b8caf9b3ca8f 100644
--- a/runtime/quark/codelets/codelet_zgeqrt.c
+++ b/runtime/quark/codelets/codelet_zgeqrt.c
@@ -31,7 +31,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c
index 799b7ac4bc1a01efc5cdf01c791cee68aa44fe7f..2b2fd9da49a5ace36c50cb0c38f6d8b5d7cbefb9 100644
--- a/runtime/quark/codelets/codelet_zlacpy.c
+++ b/runtime/quark/codelets/codelet_zlacpy.c
@@ -36,6 +36,19 @@
  * @ingroup CORE_MORSE_Complex64_t
+static inline void CORE_zlacpy_quark(Quark *quark)
+    MORSE_enum uplo;
+    int M;
+    int N;
+    MORSE_Complex64_t *A;
+    int LDA;
+    MORSE_Complex64_t *B;
+    int LDB;
+    quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB);
+    CORE_zlacpy(uplo, M, N, A, LDA, B, LDB);
 void MORSE_TASK_zlacpy(const MORSE_option_t *options,
                        MORSE_enum uplo, int m, int n, int nb,
@@ -55,18 +68,3 @@ void MORSE_TASK_zlacpy(const MORSE_option_t *options,
-void CORE_zlacpy_quark(Quark *quark)
-    MORSE_enum uplo;
-    int M;
-    int N;
-    MORSE_Complex64_t *A;
-    int LDA;
-    MORSE_Complex64_t *B;
-    int LDB;
-    quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB);
-    CORE_zlacpy(uplo, M, N, A, LDA, B, LDB);
diff --git a/runtime/quark/codelets/codelet_zlascal.c b/runtime/quark/codelets/codelet_zlascal.c
new file mode 100644
index 0000000000000000000000000000000000000000..ab96da473c760287503e0a995452e0ba75203d52
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zlascal.c
@@ -0,0 +1,70 @@
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014, 2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+ *
+ * @file codelet_zlascal.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.5.0
+ * @comment This file has been automatically generated
+ *          from Plasma 2.5.0 for MORSE 1.0.0
+ * @author Julien Langou
+ * @author Henricus Bouwmeester
+ * @author Mathieu Faverge
+ * @author Emmanuel Agullo
+ * @author Cedric Castagnede
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/quark/include/morse_quark.h"
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+static inline void CORE_zlascal_quark(Quark *quark)
+    MORSE_enum uplo;
+    int M;
+    int N;
+    MORSE_Complex64_t alpha;
+    MORSE_Complex64_t *A;
+    int LDA;
+    quark_unpack_args_6(quark, uplo, M, N, alpha, A, LDA);
+    CORE_zlascal(uplo, M, N, alpha, A, LDA);
+void MORSE_TASK_zlascal(const MORSE_option_t *options,
+                        MORSE_enum uplo,
+                        int m, int n, int nb,
+                        MORSE_Complex64_t alpha,
+                        const MORSE_desc_t *A, int Am, int An, int lda)
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    QUARK_Insert_Task(opt->quark, CORE_zlascal_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),                 &uplo,  VALUE,
+        sizeof(int),                        &m,     VALUE,
+        sizeof(int),                        &n,     VALUE,
+        sizeof(MORSE_Complex64_t),          alpha,      VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A, MORSE_Complex64_t, Am, An),             INOUT,
+        sizeof(int),                        &lda,   VALUE,
+        0);
diff --git a/runtime/quark/codelets/codelet_ztslqt.c b/runtime/quark/codelets/codelet_ztslqt.c
index e59162cb369a1bb1a55732f8b5baf51618964e2e..1f2e181b7e360ac03ecd406e934d9522937e6452 100644
--- a/runtime/quark/codelets/codelet_ztslqt.c
+++ b/runtime/quark/codelets/codelet_ztslqt.c
@@ -30,10 +30,8 @@
 #include "runtime/quark/include/morse_quark.h"
-#undef REAL
-#define COMPLEX
  * @ingroup CORE_MORSE_Complex64_t
@@ -119,11 +117,11 @@ void MORSE_TASK_ztslqt(const MORSE_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT | QUARK_REGION_D | QUARK_REGION_L,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_L | LOCALITY,
         sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT,
         sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
+        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    OUTPUT,
         sizeof(int),                        &ldt,   VALUE,
         sizeof(MORSE_Complex64_t)*nb,       NULL,          SCRATCH,
         sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
diff --git a/runtime/quark/codelets/codelet_ztsmlq.c b/runtime/quark/codelets/codelet_ztsmlq.c
index 645630434f22698efa7e3037f70203bb64ed18fd..81dc27110abc0f7c761542fba7e4adafd2c395cf 100644
--- a/runtime/quark/codelets/codelet_ztsmlq.c
+++ b/runtime/quark/codelets/codelet_ztsmlq.c
@@ -32,7 +32,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
diff --git a/runtime/quark/codelets/codelet_ztsmqr.c b/runtime/quark/codelets/codelet_ztsmqr.c
index ade4d922ea8dfe3898312e2c1daeb9552396edb4..aaff66e2830d3a352d9858a652025ce14e88d4d5 100644
--- a/runtime/quark/codelets/codelet_ztsmqr.c
+++ b/runtime/quark/codelets/codelet_ztsmqr.c
@@ -32,7 +32,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
diff --git a/runtime/quark/codelets/codelet_ztsqrt.c b/runtime/quark/codelets/codelet_ztsqrt.c
index aeaf06fd4beb98199a9f0312dc4fa3d954abfe15..76546d31e37c9a07f8e77ab1a2f61ffddbf52d13 100644
--- a/runtime/quark/codelets/codelet_ztsqrt.c
+++ b/runtime/quark/codelets/codelet_ztsqrt.c
@@ -30,10 +30,8 @@
 #include "runtime/quark/include/morse_quark.h"
-#undef REAL
-#define COMPLEX
  * @ingroup CORE_MORSE_Complex64_t
@@ -108,11 +106,11 @@ void MORSE_TASK_ztsqrt(const MORSE_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT | QUARK_REGION_D | QUARK_REGION_U | LOCALITY,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT | QUARK_REGION_D | QUARK_REGION_U | LOCALITY,
         sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT,
         sizeof(int),                        &lda2,  VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
+        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    OUTPUT,
         sizeof(int),                        &ldt,   VALUE,
         sizeof(MORSE_Complex64_t)*nb,       NULL,          SCRATCH,
         sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
diff --git a/runtime/quark/codelets/codelet_zttlqt.c b/runtime/quark/codelets/codelet_zttlqt.c
index 423708140a7f35fbe9f0aa8f9ec9bff90aa9382e..98b42550badcdc8053a720b467e32b04562ca32f 100644
--- a/runtime/quark/codelets/codelet_zttlqt.c
+++ b/runtime/quark/codelets/codelet_zttlqt.c
@@ -30,10 +30,8 @@
 #include "runtime/quark/include/morse_quark.h"
-#undef REAL
-#define COMPLEX
  * @ingroup CORE_MORSE_Complex64_t
@@ -120,9 +118,9 @@ void MORSE_TASK_zttlqt(const MORSE_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT/**/,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
         sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT/**/|LOCALITY,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
         sizeof(int),                        &lda2,  VALUE,
         sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
         sizeof(int),                        &ldt,   VALUE,
diff --git a/runtime/quark/codelets/codelet_zttmlq.c b/runtime/quark/codelets/codelet_zttmlq.c
index 8d9c9a3d27b472ee37620a79999d146fff6a75b5..cab48f44c9701c937ceeb596b87498a3bd4a7bdf 100644
--- a/runtime/quark/codelets/codelet_zttmlq.c
+++ b/runtime/quark/codelets/codelet_zttmlq.c
@@ -30,7 +30,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
@@ -102,7 +102,7 @@
  * @param[in] LDV
  *         The leading dimension of the array V. LDV >= max(1,K).
- * @param[out] T
+ * @param[in] T
  *         The IB-by-N1 triangular factor T of the block reflector.
  *         T is upper triangular by block (economic storage);
  *         The rest of the array is not referenced.
diff --git a/runtime/quark/codelets/codelet_zttmqr.c b/runtime/quark/codelets/codelet_zttmqr.c
index 15b93b4b4478c4ec0142a3d9071aaa7f460d7169..849a5454d18cd3961205a47d95cf08cd63552314 100644
--- a/runtime/quark/codelets/codelet_zttmqr.c
+++ b/runtime/quark/codelets/codelet_zttmqr.c
@@ -30,7 +30,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
diff --git a/runtime/quark/codelets/codelet_zttqrt.c b/runtime/quark/codelets/codelet_zttqrt.c
index e6ed35f0b17d92959a998febb3b1dd00465c9220..06106fc5a84299a1874116eb04670662470ecd69 100644
--- a/runtime/quark/codelets/codelet_zttqrt.c
+++ b/runtime/quark/codelets/codelet_zttqrt.c
@@ -30,10 +30,8 @@
 #include "runtime/quark/include/morse_quark.h"
-#undef REAL
-#define COMPLEX
  * @ingroup CORE_MORSE_Complex64_t
@@ -120,9 +118,9 @@ void MORSE_TASK_zttqrt(const MORSE_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT/**/,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),            INOUT,
         sizeof(int),                        &lda1,  VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT/**/|LOCALITY,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
         sizeof(int),                        &lda2,  VALUE,
         sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             OUTPUT,
         sizeof(int),                        &ldt,   VALUE,
diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c
index 939933714fb24567eadd5b67cfc81219dfed2469..1ae6b0899667469aad7ca8ba0376430622e50af2 100644
--- a/runtime/quark/codelets/codelet_zunmlq.c
+++ b/runtime/quark/codelets/codelet_zunmlq.c
@@ -32,7 +32,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
@@ -126,20 +126,20 @@ void MORSE_TASK_zunmlq(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     QUARK_Insert_Task(opt->quark, CORE_zunmlq_quark, (Quark_Task_Flags*)opt,
-        sizeof(MORSE_enum),                &side,  VALUE,
-        sizeof(MORSE_enum),                &trans, VALUE,
-        sizeof(int),                        &m,     VALUE,
-        sizeof(int),                        &n,     VALUE,
-        sizeof(int),                        &k,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A, MORSE_Complex64_t, Am, An),             INPUT | QUARK_REGION_U,
-        sizeof(int),                        &lda,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),             INPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn),             INOUT,
-        sizeof(int),                        &ldc,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,    NULL,          SCRATCH,
-        sizeof(int),                        &nb,    VALUE,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &m,     VALUE,
+        sizeof(int),                     &n,     VALUE,
+        sizeof(int),                     &k,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT | QUARK_REGION_U,
+        sizeof(int),                     &lda,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn), INOUT,
+        sizeof(int),                     &ldc,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  NULL,      SCRATCH,
+        sizeof(int),                     &nb,    VALUE,
diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c
index fb9f0f10ddd5493046e570eeb82731e796d28d65..01e9f150b909659302bd941f3cca8d80edc79154 100644
--- a/runtime/quark/codelets/codelet_zunmqr.c
+++ b/runtime/quark/codelets/codelet_zunmqr.c
@@ -8,7 +8,7 @@
- /**
  * @file codelet_zunmqr.c
@@ -31,7 +31,7 @@
 #include "runtime/quark/include/morse_quark.h"
  * @ingroup CORE_MORSE_Complex64_t
@@ -126,20 +126,20 @@ void MORSE_TASK_zunmqr(const MORSE_option_t *options,
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     QUARK_Insert_Task(opt->quark, CORE_zunmqr_quark, (Quark_Task_Flags*)opt,
-        sizeof(MORSE_enum),                &side,  VALUE,
-        sizeof(MORSE_enum),                &trans, VALUE,
-        sizeof(int),                        &m,     VALUE,
-        sizeof(int),                        &n,     VALUE,
-        sizeof(int),                        &k,     VALUE,
-        sizeof(int),                        &ib,    VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,   RTBLKADDR(A, MORSE_Complex64_t, Am, An),      INPUT | QUARK_REGION_L,
-        sizeof(int),                        &lda,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,   RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),      INPUT,
-        sizeof(int),                        &ldt,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,   RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn),      INOUT,
-        sizeof(int),                        &ldc,   VALUE,
-        sizeof(MORSE_Complex64_t)*ib*nb,   NULL,   SCRATCH,
-        sizeof(int),                        &nb,    VALUE,
+        sizeof(MORSE_enum),              &side,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &m,     VALUE,
+        sizeof(int),                     &n,     VALUE,
+        sizeof(int),                     &k,     VALUE,
+        sizeof(int),                     &ib,    VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT | QUARK_REGION_L,
+        sizeof(int),                     &lda,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), INPUT,
+        sizeof(int),                     &ldt,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn), INOUT,
+        sizeof(int),                     &ldc,   VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  NULL,      SCRATCH,
+        sizeof(int),                     &nb,    VALUE,
@@ -166,4 +166,3 @@ void CORE_zunmqr_quark(Quark *quark)
     CORE_zunmqr(side, trans, m, n, k, ib,
                 A, lda, T, ldt, C, ldc, WORK, ldwork);
diff --git a/runtime/quark/control/runtime_descriptor.c b/runtime/quark/control/runtime_descriptor.c
index 5ce2b288bef23e67d1aba6eea5d0417a618ab5eb..6e4dfa9d20ede9b54e254972890e5d297df4b53e 100644
--- a/runtime/quark/control/runtime_descriptor.c
+++ b/runtime/quark/control/runtime_descriptor.c
@@ -16,7 +16,7 @@
  *  MORSE is a software package provided by Univ. of Tennessee,
  *  Univ. of California Berkeley and Univ. of Colorado Denver
- * @version 
+ * @version
  * @author Vijay Joshi
  * @author Cedric Castagnede
  * @date 2012-09-15
diff --git a/runtime/quark/include/core_blas_dag.h b/runtime/quark/include/core_blas_dag.h
index 0ab24fcce0a1db092447abb2fabb0f6005f1056d..68358d10f2061f06e2fa83f02b56d28aafcb2248 100644
--- a/runtime/quark/include/core_blas_dag.h
+++ b/runtime/quark/include/core_blas_dag.h
@@ -37,6 +37,7 @@
 #define DAG_CORE_AXPY       DAG_SET_PROPERTIES( "AXPY"      , "white"   )
 #define DAG_CORE_BUILD      DAG_SET_PROPERTIES( "BUILD"     , "white"   )
 #define DAG_CORE_GEADD      DAG_SET_PROPERTIES( "GEADD"     , "white"   )
+#define DAG_CORE_LASCAL     DAG_SET_PROPERTIES( "LASCAL"    , "white"   )
 #define DAG_CORE_GELQT      DAG_SET_PROPERTIES( "GELQT"     , "green"   )
 #define DAG_CORE_GEMM       DAG_SET_PROPERTIES( "GEMM"      , "yellow"  )
 #define DAG_CORE_GEQRT      DAG_SET_PROPERTIES( "GEQRT"     , "green"   )
diff --git a/runtime/quark/include/quark_zblas.h b/runtime/quark/include/quark_zblas.h
index 36d4aaa2c7eadd07b97892f55b00b8f2c61501d8..e5fe96bac53a1ab8c8eea44c212fc75d5309788f 100644
--- a/runtime/quark/include/quark_zblas.h
+++ b/runtime/quark/include/quark_zblas.h
@@ -69,7 +69,6 @@ void CORE_zher2k_quark(Quark *quark);
 void CORE_zhegst_quark(Quark *quark);
 void CORE_zherfb_quark(Quark *quark);
 void CORE_zhessq_quark(Quark *quark);
-void CORE_zlacpy_quark(Quark *quark);
 void CORE_zlatro_quark(Quark *quark);
 void CORE_zlange_quark(Quark *quark);
 void CORE_zlange_max_quark(Quark *quark);
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index cfe20135d19b109390ea8d32c1a905d13ac7e49a..764e55567b562b90594252c8911d6150e22dfe2d 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -27,6 +27,7 @@
+cmake_minimum_required(VERSION 2.8)
 # check if magma_dgetrf_incpiv_gpu is accessible in libmagma and activate it in chameleon
@@ -83,10 +84,10 @@ precisions_rules_py(RUNTIME_COMMON_GENERATED "${ZSRC}"
                     TARGETDIR "control")
-    control/runtime_control.c
+    control/runtime_async.c
+    control/runtime_control.c
-    control/runtime_async.c
@@ -127,6 +128,7 @@ set(ZSRC
     # LAPACK
+    codelets/codelet_zlascal.c
@@ -171,7 +173,6 @@ set(ZSRC
 precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
                     PRECISIONS "${CHAMELEON_PRECISION}"
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 010334af9e332f9e0072709143c9e2bb5715f886..fa66cff4d9f3be011bfbe7e7a51f6c29ebdde7e4 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -31,6 +31,7 @@
 CHAMELEON_CL_CB(zasum,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N);
 CHAMELEON_CL_CB(zaxpy,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[1]), 0,                                      M);
 CHAMELEON_CL_CB(zgeadd,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N);
+CHAMELEON_CL_CB(zlascal,       starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N);
 CHAMELEON_CL_CB(zgelqt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      (4./3.)*M*N*K);
 CHAMELEON_CL_CB(zgemm,         starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), starpu_matrix_get_ny(task->handles[0]),     2. *M*N*K); /* If A^t, computation is wrong */
 CHAMELEON_CL_CB(zgeqrt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      (4./3.)*M*M*N);
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
new file mode 100644
index 0000000000000000000000000000000000000000..9a2fc682195bd31af7de870004d650c8071680f6
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -0,0 +1,109 @@
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+ *
+ * @file codelet_zlascal.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.5.0
+ * @comment This file has been automatically generated
+ *          from Plasma 2.5.0 for MORSE 1.0.0
+ * @author Dalal Sukkari
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ *  CORE_zlascal adds to matrices together.
+ *
+ *       A <- alpha * A
+ *
+ *******************************************************************************
+ *
+ * @param[in] M
+ *          Number of rows of the matrices A and B.
+ *
+ * @param[in] N
+ *          Number of columns of the matrices A and B.
+ *
+ * @param[in] alpha
+ *          Scalar factor of A.
+ *
+ * @param[in] A
+ *          Matrix of size LDA-by-N.
+ *
+ * @param[in] LDA
+ *          Leading dimension of the array A. LDA >= max(1,M)
+ *
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *
+ ******************************************************************************/
+void MORSE_TASK_zlascal(const MORSE_option_t *options,
+                        MORSE_enum uplo,
+                        int m, int n, int nb,
+                        MORSE_Complex64_t alpha,
+                        const MORSE_desc_t *A, int Am, int An, int lda)
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zlascal;
+    void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL;
+    if ( morse_desc_islocal( A, Am, An ))
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE,    &uplo,              sizeof(MORSE_enum),
+            STARPU_VALUE,    &m,                  sizeof(int),
+            STARPU_VALUE,    &n,                  sizeof(int),
+            STARPU_VALUE,    &alpha,              sizeof(MORSE_Complex64_t),
+            STARPU_RW,         RTBLKADDR(A, MORSE_Complex64_t, Am, An),
+            STARPU_VALUE,    &lda,                sizeof(int),
+            STARPU_PRIORITY,  options->priority,
+            STARPU_CALLBACK,  callback,
+            0);
+    }
+static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
+    MORSE_enum uplo;
+    int M;
+    int N;
+    MORSE_Complex64_t alpha;
+    MORSE_Complex64_t *A;
+    int LDA;
+    A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
+    CORE_zlascal(uplo, M, N, alpha, A, LDA);
+    return;
+ * Codelet definition
+ */
+CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 33433cf0224292a78b110563b3222400846693e6..7af0ac2e49860997f9a7c1d190b4c2765ace7bd0 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -88,6 +88,7 @@ ZCODELETS_HEADER(unmqr)
  * Auxiliary functions
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 6c50399235d7087513f857f397a91cdf10b9357f..ccb9f5b675e919c9059a6fcc3ef343d671d045fd 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -26,7 +26,7 @@ foreach(cat  ${TEST_CATEGORIES})
         add_test(test_${cat}_${prec}syrk  ./${prec}${TEST_CMD_${cat}} SYRK  1.0 -2.0 600 500 650 625)
         add_test(test_${cat}_${prec}syr2k ./${prec}${TEST_CMD_${cat}} SYR2K 1.0 -2.0 600 500 650 625 700)
-        if ( prec STREQUAL "c" OR prec STREQUAL "z" )
+        if ( ${prec} STREQUAL "c" OR ${prec} STREQUAL "z" )
           add_test(test_${cat}_${prec}hemm  ./${prec}${TEST_CMD_${cat}} HEMM      1.0 -2.0 600 500 650 625 600)
           add_test(test_${cat}_${prec}herk  ./${prec}${TEST_CMD_${cat}} HERK      1.0 -2.0 600 500 650 625)
           add_test(test_${cat}_${prec}her2k ./${prec}${TEST_CMD_${cat}} HER2K     1.0 -2.0 600 500 650 625 700)