From 5715a0a813df0ba8b196a38bec14f488584859c1 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 7 Nov 2018 19:05:50 +0100
Subject: [PATCH] Update StarPU codelets

---
 runtime/starpu/codelets/codelet_ztpmlqt.c     | 18 ++++++++-----
 runtime/starpu/codelets/codelet_ztpmqrt.c     | 27 +++++++++++--------
 runtime/starpu/codelets/codelet_ztsmlq.c      | 25 ++++++++---------
 .../starpu/codelets/codelet_ztsmlq_hetra1.c   |  6 ++---
 runtime/starpu/codelets/codelet_ztsmqr.c      | 27 ++++++++++---------
 runtime/starpu/codelets/codelet_zttmqr.c      | 23 +++++++++-------
 6 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c
index 714b45762..8dffa4ff2 100644
--- a/runtime/starpu/codelets/codelet_ztpmlqt.c
+++ b/runtime/starpu/codelets/codelet_ztpmlqt.c
@@ -11,7 +11,7 @@
  *
  * @version 1.0.0
  * @author Mathieu Faverge
- * @date 2016-12-15
+ * @date 2018-11-07
  * @precisions normal z -> s d c
  *
  */
@@ -37,6 +37,7 @@ static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t *B;
     int ldb;
     CHAMELEON_Complex64_t *WORK;
+    size_t lwork;
 
     V    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
     T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -45,13 +46,15 @@ static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg)
     WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
 
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
-                                &ldv, &ldt, &lda, &ldb );
+                                &ldv, &ldt, &lda, &ldb, &lwork );
 
     CORE_ztpmlqt( side, trans, M, N, K, L, ib,
                   V, ldv, T, ldt, A, lda, B, ldb, WORK );
+
+    (void)lwork;
 }
 
-#if defined(CHAMELEON_USE_CUDA) && 0
+#if defined(CHAMELEON_USE_CUDA)
 static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
 {
     cham_side_t side;
@@ -70,6 +73,7 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *B;
     int ldb;
     cuDoubleComplex *W;
+    size_t lwork;
 
     V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -78,14 +82,14 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
     W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
 
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
-                                &ldv, &ldt, &lda, &ldb );
+                                &ldv, &ldt, &lda, &ldb, &lwork );
 
     RUNTIME_getStream(stream);
 
     CUDA_ztpmlqt(
             side, trans, M, N, K, L, ib,
             V, ldv, T, ldt, A, lda, B, ldb,
-            W, stream );
+            W, lwork, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -97,8 +101,7 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztpmlqt, 5, cl_ztpmlqt_cpu_func)
-//CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC)
 
 void
 INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
@@ -136,6 +139,7 @@ INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
         STARPU_VALUE, &lda,   sizeof(int),
         STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE, &ldb,   sizeof(int),
+        STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index 40f83ab2d..6684e59f8 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -11,7 +11,7 @@
  *
  * @version 1.0.0
  * @author Mathieu Faverge
- * @date 2016-12-15
+ * @date 2018-11-07
  * @precisions normal z -> s d c
  *
  */
@@ -37,6 +37,7 @@ static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t *B;
     int ldb;
     CHAMELEON_Complex64_t *WORK;
+    size_t lwork;
 
     V    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
     T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -45,10 +46,12 @@ static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
     WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
 
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
-                                &ldv, &ldt, &lda, &ldb );
+                                &ldv, &ldt, &lda, &ldb, &lwork );
 
     CORE_ztpmqrt( side, trans, M, N, K, L, ib,
                   V, ldv, T, ldt, A, lda, B, ldb, WORK );
+
+    (void)lwork;
 }
 
 
@@ -71,22 +74,23 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
     cuDoubleComplex *B;
     int ldb;
     cuDoubleComplex *W;
+    size_t lwork;
 
     V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
-    W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
+    W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 3*ib*nb */
 
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
-                                &ldv, &ldt, &lda, &ldb );
+                                &ldv, &ldt, &lda, &ldb, &lwork );
 
     RUNTIME_getStream(stream);
 
     CUDA_ztpmqrt(
             side, trans, M, N, K, L, ib,
             V, ldv, T, ldt, A, lda, B, ldb,
-            W, stream );
+            W, lwork, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -102,12 +106,12 @@ CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYN
 
 void
 INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
-                    cham_side_t side, cham_trans_t trans,
-                    int M, int N, int K, int L, int ib, int nb,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                    const CHAM_desc_t *A, int Am, int An, int lda,
-                    const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                     cham_side_t side, cham_trans_t trans,
+                     int M, int N, int K, int L, int ib, int nb,
+                     const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                     const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                     const CHAM_desc_t *A, int Am, int An, int lda,
+                     const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_ztpmqrt;
     void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
@@ -136,6 +140,7 @@ INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
         STARPU_VALUE, &lda,   sizeof(int),
         STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE, &ldb,   sizeof(int),
+        STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztsmlq.c b/runtime/starpu/codelets/codelet_ztsmlq.c
index b0a2e38ec..ad1d0a7a0 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq.c
@@ -21,7 +21,7 @@
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
- * @date 2010-11-15
+ * @date 2018-11-07
  * @precisions normal z -> c d s
  *
  */
@@ -165,9 +165,10 @@ void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options,
         STARPU_VALUE,    &ldv,               sizeof(int),
         STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
         STARPU_VALUE,    &ldt,               sizeof(int),
-        /* max( ib*nb, 2*ib*nb ) */
+        /* max( ib*nb, 3*ib*nb ) */
         STARPU_SCRATCH,   options->ws_worker,
-        STARPU_VALUE,    &ldwork,            sizeof(int),
+        STARPU_VALUE,    &ldwork,              sizeof(int),
+        STARPU_VALUE,    &(options->ws_wsize), sizeof(size_t),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
@@ -198,6 +199,7 @@ static void cl_ztsmlq_cpu_func(void *descr[], void *cl_arg)
     int ldt;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
+    size_t lwork;
 
     A1   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -206,10 +208,12 @@ static void cl_ztsmlq_cpu_func(void *descr[], void *cl_arg)
     WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
 
     starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
-                               &lda1, &lda2, &ldv, &ldt, &ldwork);
+                               &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork );
 
     CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib,
                 A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+
+    (void)lwork;
 }
 
 #if defined(CHAMELEON_USE_CUDA)
@@ -231,9 +235,9 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
     int ldv;
     cuDoubleComplex *T;
     int ldt;
-    cuDoubleComplex *W, *WC;
+    cuDoubleComplex *W;
     int ldwork;
-    int ldworkc;
+    size_t lwork;
 
     A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -241,17 +245,14 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
     T  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
     W  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
-                               &lda1, &lda2, &ldv, &ldt, &ldwork);
-
-    WC = W + ib * ldwork;
-    ldworkc = (side == ChamLeft) ? m1 : ib;
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
+                                &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork );
 
     RUNTIME_getStream(stream);
 
     CUDA_ztsmlq( side, trans, m1, n1, m2, n2, k, ib,
                       A1, lda1, A2, lda2, V, ldv, T, ldt,
-                      W, ldwork, WC, ldworkc, stream );
+                      W, lwork, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
index 8996121be..d68e2bebf 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -15,7 +15,7 @@
  * @author Hatem Ltaief
  * @author Mathieu Faverge
  * @author Azzam Haidar
- * @date 2010-11-15
+ * @date 2018-11-07
  * @precisions normal z -> c d s
  *
  */
@@ -106,8 +106,8 @@ static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
     T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
     WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
-                               &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork);
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
+                                &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork);
     CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
                        ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
 }
diff --git a/runtime/starpu/codelets/codelet_ztsmqr.c b/runtime/starpu/codelets/codelet_ztsmqr.c
index c38a8fbd1..349aa129e 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr.c
@@ -21,7 +21,7 @@
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
- * @date 2010-11-15
+ * @date 2018-11-07
  * @precisions normal z -> c d s
  *
  */
@@ -169,6 +169,7 @@ void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options,
         /* max( ib*nb, 2*ib*nb ) */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_VALUE,    &ldwork,            sizeof(int),
+        STARPU_VALUE,   &(options->ws_wsize), sizeof(size_t),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
 #if defined(CHAMELEON_USE_MPI)
@@ -202,6 +203,7 @@ static void cl_ztsmqr_cpu_func(void *descr[], void *cl_arg)
     int ldt;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
+    size_t lwork;
 
     A1   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -210,10 +212,12 @@ static void cl_ztsmqr_cpu_func(void *descr[], void *cl_arg)
     WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
 
     starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
-                               &lda1, &lda2, &ldv, &ldt, &ldwork);
+                               &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork);
 
     CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib,
                 A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+
+    (void)lwork;
 }
 
 #if defined(CHAMELEON_USE_CUDA)
@@ -235,9 +239,9 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
     int ldv;
     cuDoubleComplex *T;
     int ldt;
-    cuDoubleComplex *W, *WC;
+    cuDoubleComplex *W;
     int ldwork;
-    int ldworkc;
+    size_t lwork;
 
     A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -245,22 +249,21 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
     T  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
     W  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
-                               &lda1, &lda2, &ldv, &ldt, &ldwork);
-
-    WC = W + ib * (side == ChamLeft ? m1 : n1);
-    ldworkc = (side == ChamLeft) ? m2 : ib;
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
+                                &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork );
 
     RUNTIME_getStream(stream);
 
     CUDA_ztsmqr(
-            side, trans, m1, n1, m2, n2, k, ib,
-            A1, lda1, A2, lda2, V, ldv, T, ldt,
-            W, ldwork, WC, ldworkc, stream );
+        side, trans, m1, n1, m2, n2, k, ib,
+        A1, lda1, A2, lda2, V, ldv, T, ldt,
+        W, lwork, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
 #endif
+
+    (void)ldwork;
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
diff --git a/runtime/starpu/codelets/codelet_zttmqr.c b/runtime/starpu/codelets/codelet_zttmqr.c
index d485d16b9..0f88020fc 100644
--- a/runtime/starpu/codelets/codelet_zttmqr.c
+++ b/runtime/starpu/codelets/codelet_zttmqr.c
@@ -19,7 +19,7 @@
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
- * @date 2010-11-15
+ * @date 2018-11-07
  * @precisions normal z -> c d s
  *
  */
@@ -167,6 +167,7 @@ void INSERT_TASK_zttmqr(const RUNTIME_option_t *options,
         /* max( ib*nb, 2*ib*nb ) */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_VALUE,    &ldwork,            sizeof(int),
+        STARPU_VALUE,    &(options->ws_wsize), sizeof(size_t),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
 #if defined(CHAMELEON_USE_MPI)
@@ -200,6 +201,7 @@ static void cl_zttmqr_cpu_func(void *descr[], void *cl_arg)
     int ldt;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
+    size_t lwork;
 
     A1   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -208,10 +210,12 @@ static void cl_zttmqr_cpu_func(void *descr[], void *cl_arg)
     WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
 
     starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
-                               &lda1, &lda2, &ldv, &ldt, &ldwork);
+                               &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork );
 
     CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib,
                 A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+
+    (void)lwork;
 }
 
 #if defined(CHAMELEON_USE_CUDA)
@@ -233,9 +237,9 @@ static void cl_zttmqr_cuda_func(void *descr[], void *cl_arg)
     int ldv;
     cuDoubleComplex *T;
     int ldt;
-    cuDoubleComplex *W, *WC;
+    cuDoubleComplex *W;
     int ldwork;
-    int ldworkc;
+    size_t lwork;
 
     A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
     A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -243,22 +247,21 @@ static void cl_zttmqr_cuda_func(void *descr[], void *cl_arg)
     T  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
     W  = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
-                               &lda1, &lda2, &ldv, &ldt, &ldwork);
-
-    WC = W + ib * (side == ChamLeft ? m1 : n1);
-    ldworkc = (side == ChamLeft) ? m2 : ib;
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
+                                &lda1, &lda2, &ldv, &ldt, &ldwork, &lwork );
 
     RUNTIME_getStream(stream);
 
     CUDA_zttmqr(
             side, trans, m1, n1, m2, n2, k, ib,
             A1, lda1, A2, lda2, V, ldv, T, ldt,
-            W, ldwork, WC, ldworkc, stream );
+            W, lwork, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
 #endif
+
+    (void)ldwork;
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
-- 
GitLab