From a59ca4f3574db6eb782a838b7827d7cc6edd4201 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Wed, 28 Sep 2022 22:20:13 +0200
Subject: [PATCH] mixed-precision: update files in runtimes

---
 runtime/openmp/codelets/codelet_zlag2c.c     | 13 +++--
 runtime/parsec/codelets/codelet_zlag2c.c     | 28 ++++-----
 runtime/quark/codelets/codelet_zlag2c.c      | 61 ++++++++++----------
 runtime/starpu/codelets/codelet_zccallback.c | 26 +++++++++
 runtime/starpu/codelets/codelet_zlag2c.c     |  5 +-
 runtime/starpu/include/runtime_codelet_z.h   |  5 --
 runtime/starpu/include/runtime_codelet_zc.h  | 44 ++++++++++++++
 7 files changed, 128 insertions(+), 54 deletions(-)
 create mode 100644 runtime/starpu/codelets/codelet_zccallback.c
 create mode 100644 runtime/starpu/include/runtime_codelet_zc.h

diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c
index 369b5dd05..30a7c94e8 100644
--- a/runtime/openmp/codelets/codelet_zlag2c.c
+++ b/runtime/openmp/codelets/codelet_zlag2c.c
@@ -17,8 +17,8 @@
  *
  */
 #include "chameleon_openmp.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_ztile.h"
+#include "chameleon/tasks_zc.h"
+#include "coreblas/coreblas_zctile.h"
 
 void INSERT_TASK_zlag2c( const RUNTIME_option_t *options,
                          int m, int n, int nb,
@@ -26,9 +26,12 @@ void INSERT_TASK_zlag2c( const RUNTIME_option_t *options,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
-    CHAMELEON_Complex32_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 #pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
-    TCORE_zlag2c( m, n, tileA, tileB );
+    {
+        int info = 0;
+        TCORE_zlag2c( m, n, tileA, tileB, &info );
+    }
 
     (void)options;
     (void)nb;
@@ -39,7 +42,7 @@ void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex32_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 #pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
     TCORE_clag2z( m, n, tileA, tileB );
diff --git a/runtime/parsec/codelets/codelet_zlag2c.c b/runtime/parsec/codelets/codelet_zlag2c.c
index 24255ade3..6df975c11 100644
--- a/runtime/parsec/codelets/codelet_zlag2c.c
+++ b/runtime/parsec/codelets/codelet_zlag2c.c
@@ -16,17 +16,18 @@
  * @author Florent Pruvost
  * @author Mathieu Faverge
  * @date 2022-02-22
- * @precisions normal z -> c d s
+ * @precisions mixed zc -> ds
  *
  */
 #include "chameleon_parsec.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "chameleon/tasks_zc.h"
+#include "coreblas/coreblas_zc.h"
 
 static inline int
 CORE_zlag2c_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
 {
+    int info;
     int m;
     int n;
     CHAMELEON_Complex64_t *A;
@@ -37,16 +38,16 @@ CORE_zlag2c_parsec( parsec_execution_stream_t *context,
     parsec_dtd_unpack_args(
         this_task, &m, &n, &A, &lda, &B, &ldb );
 
-    CORE_zlag2c( m, n, A, lda, B, ldb );
+    CORE_zlag2c( m, n, A, lda, B, ldb, &info );
 
     (void)context;
     return PARSEC_HOOK_RETURN_DONE;
 }
 
-void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
-                       int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zlag2c( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
     CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
@@ -68,7 +69,8 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
  *
  */
 static inline int
-CORE_clag2z_parsec(parsec_execution_stream_t *context, parsec_task_t *this_task)
+CORE_clag2z_parsec( parsec_execution_stream_t *context,
+                    parsec_task_t             *this_task )
 {
     int m;
     int n;
@@ -86,10 +88,10 @@ CORE_clag2z_parsec(parsec_execution_stream_t *context, parsec_task_t *this_task)
     return PARSEC_HOOK_RETURN_DONE;
 }
 
-void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
-                       int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
     CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
diff --git a/runtime/quark/codelets/codelet_zlag2c.c b/runtime/quark/codelets/codelet_zlag2c.c
index 336540606..2c6bccb84 100644
--- a/runtime/quark/codelets/codelet_zlag2c.c
+++ b/runtime/quark/codelets/codelet_zlag2c.c
@@ -21,10 +21,10 @@
  *
  */
 #include "chameleon_quark.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_ztile.h"
+#include "chameleon/tasks_zc.h"
+#include "coreblas/coreblas_zctile.h"
 
-void CORE_zlag2c_quark(Quark *quark)
+void CORE_zlag2c_quark( Quark *quark )
 {
     int m;
     int n;
@@ -34,50 +34,53 @@ void CORE_zlag2c_quark(Quark *quark)
     RUNTIME_request_t *request;
     int info;
 
-    quark_unpack_args_6(quark, m, n, tileA, tileB, sequence, request);
+    quark_unpack_args_6( quark, m, n, tileA, tileB, sequence, request );
     TCORE_zlag2c( m, n, tileA, tileB, &info );
     if ( (sequence->status != CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, info );
     }
 }
 
-void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
-                       int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zlag2c( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LAG2C;
-    QUARK_Insert_Task(opt->quark, CORE_zlag2c_quark, (Quark_Task_Flags*)opt,
-                      sizeof(int),                        &m,         VALUE,
-                      sizeof(int),                        &n,         VALUE,
-                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-                      sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn),                 OUTPUT,
-                      sizeof(RUNTIME_sequence_t*),           &(options->sequence),  VALUE,
-                      sizeof(RUNTIME_request_t*),            &(options->request),   VALUE,
-                      0);
+    QUARK_Insert_Task( opt->quark, CORE_zlag2c_quark, (Quark_Task_Flags*)opt,
+                       sizeof(int),                        &m,         VALUE,
+                       sizeof(int),                        &n,         VALUE,
+                       sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+                       sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn),                 OUTPUT,
+                       sizeof(RUNTIME_sequence_t*),           &(options->sequence),  VALUE,
+                       sizeof(RUNTIME_request_t*),            &(options->request),   VALUE,
+                       0 );
 }
 
-void CORE_clag2z_quark(Quark *quark)
+void CORE_clag2z_quark( Quark *quark )
 {
     int m;
     int n;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
 
-    quark_unpack_args_6(quark, m, n, tileA, tileB);
-    TCORE_clag2z( m, n, tileA, tileB);
+    quark_unpack_args_4( quark, m, n, tileA, tileB );
+    TCORE_clag2z( m, n, tileA, tileB );
 }
 
-void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
-                       int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    QUARK_Insert_Task(opt->quark, CORE_clag2z_quark, (Quark_Task_Flags*)opt,
-                      sizeof(int),                        &m,     VALUE,
-                      sizeof(int),                        &n,     VALUE,
-                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),             INPUT,
-                      sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
-                      0);
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_LAG2C;
+
+    QUARK_Insert_Task( opt->quark, CORE_clag2z_quark, (Quark_Task_Flags*)opt,
+                       sizeof(int),                        &m,     VALUE,
+                       sizeof(int),                        &n,     VALUE,
+                       sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),             INPUT,
+                       sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
+                       0 );
 }
diff --git a/runtime/starpu/codelets/codelet_zccallback.c b/runtime/starpu/codelets/codelet_zccallback.c
new file mode 100644
index 000000000..40bdd7dc9
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zccallback.c
@@ -0,0 +1,26 @@
+/**
+ *
+ * @file starpu/codelet_zccallback.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zc callback StarPU codelet
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Cedric Augonnet
+ * @author Florent Pruvost
+ * @date 2022-02-22
+ * @precisions mixed zc -> ds
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_zc.h"
+
+CHAMELEON_CL_CB(zlag2c,        cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0,                                      M*N)
+CHAMELEON_CL_CB(clag2z,        cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0,                                      M*N)
diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c
index a259373a2..3ca3d7e70 100644
--- a/runtime/starpu/codelets/codelet_zlag2c.c
+++ b/runtime/starpu/codelets/codelet_zlag2c.c
@@ -25,11 +25,12 @@
  *
  */
 #include "chameleon_starpu.h"
-#include "runtime_codelet_z.h"
+#include "runtime_codelet_zc.h"
 
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
 {
+    int info = 0;
     int m;
     int n;
     CHAM_tile_t *tileA;
@@ -39,7 +40,7 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
     tileB = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &m, &n);
-    TCORE_zlag2c( m, n, tileA, tileB);
+    TCORE_zlag2c( m, n, tileA, tileB, &info );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index bd823f410..13c2cdb8b 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -105,11 +105,6 @@ CODELETS_HEADER(zlatro);
 CODELETS_HEADER(zplssq);
 CODELETS_HEADER(zplssq2);
 
-/*
- * MIXED PRECISION functions
- */
-CODELETS_HEADER(zlag2c);
-
 /*
  * DZ functions
  */
diff --git a/runtime/starpu/include/runtime_codelet_zc.h b/runtime/starpu/include/runtime_codelet_zc.h
new file mode 100644
index 000000000..d357630c5
--- /dev/null
+++ b/runtime/starpu/include/runtime_codelet_zc.h
@@ -0,0 +1,44 @@
+/**
+ *
+ * @file starpu/runtime_codelet_zc.h
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon StarPU CHAMELEON_Complex64_t codelets header
+ *
+ * @version 1.2.0
+ * @author Cedric Augonnet
+ * @author Mathieu Faverge
+ * @author Cedric Castagnede
+ * @author Florent Pruvost
+ * @date 2022-02-22
+ * @precisions mixed zc -> ds
+ *
+ */
+#ifndef _runtime_codelet_zc_h_
+#define _runtime_codelet_zc_h_
+
+#include <stdio.h>
+#include "runtime_codelets.h"
+
+#include "chameleon/tasks_zc.h"
+#if !defined(CHAMELEON_SIMULATION)
+#include "coreblas/coreblas_zc.h"
+#include "coreblas/coreblas_zctile.h"
+#if defined(CHAMELEON_USE_CUDA)
+#include "cudablas.h"
+#endif
+#endif
+
+/*
+ * MIXED PRECISION functions
+ */
+CODELETS_HEADER(zlag2c);
+CODELETS_HEADER(clag2z);
+
+#endif /* _runtime_codelet_zc_h_ */
-- 
GitLab