From ad566f2e19ef469c5230240300b9c4b53e723511 Mon Sep 17 00:00:00 2001
From: Alycia Lisito <alycia.lisito@inria.fr>
Date: Thu, 17 Mar 2022 15:55:48 +0100
Subject: [PATCH] driver: add zplgtr in order to initiate trapezoidal matrix
 with the std api

---
 cmake_modules/local_subs.py     |   1 +
 compute/CMakeLists.txt          |   1 +
 compute/zplgtr.c                | 306 ++++++++++++++++++++++++++++++++
 include/chameleon/chameleon_z.h |   3 +
 4 files changed, 311 insertions(+)
 create mode 100644 compute/zplgtr.c

diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index dde6c21e1..879a3c3fb 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -24,6 +24,7 @@ _extra_blas = [
     ('',                     'spocon',               'dpocon',               'cpocon',               'zpocon'              ),
     ('',                     'strasm',               'dtrasm',               'ctrasm',               'ztrasm'              ),
     ('',                     'sgecfi',               'dgecfi',               'cgecfi',               'zgecfi'              ),
+    ('',                     'splgtr',               'dplgtr',               'cplgtr',               'zplgtr'              ),
     ('',                     'splrnk',               'dplrnk',               'cplrnk',               'zplrnk'              ),
     ('',                     'splssq',               'dplssq',               'cplssq',               'zplssq'              ),
     ('',                     'sy2sb',                'sy2sb' ,               'he2hb',                'he2hb'               ),
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 53bf35743..aef8d85d2 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -160,6 +160,7 @@ set(ZSRC
     zlauum.c
     zplghe.c
     zplgsy.c
+    zplgtr.c
     zplrnt.c
     zplrnk.c
     zposv.c
diff --git a/compute/zplgtr.c b/compute/zplgtr.c
new file mode 100644
index 000000000..e77d4cc65
--- /dev/null
+++ b/compute/zplgtr.c
@@ -0,0 +1,306 @@
+/**
+ *
+ * @file zplgtr.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zplgtr wrappers
+ *
+ * @version 1.2.0
+ * @author Alycia Lisito
+ * @date 2022-03-17
+ * @precisions normal z -> c d s
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Generate a random trapezoidal matrix.
+ *
+ *******************************************************************************
+ *
+ * @param[in] bump
+ *          The value to add to the diagonal to be sure
+ *          to have a positive definite matrix if needed.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper trapezoidal or lower trapezoidal:
+ *          = ChamUpper: Only the upper trapezoidal part of A is generated;
+ *          = ChamLower: Only the lower trapezoidal part of A is generated;
+ *          = ChamUpperLower: the full A is generated. Note that in that case
+ *          the matrix is symmetric, and it is thus not recommended to call
+ *          this function.
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0.
+ *
+ * @param[out] A
+ *          On exit, the random trapezoidal matrix A generated.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ * @param[in] seed
+ *          The seed used in the random generation.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zplgtr_Tile
+ * @sa CHAMELEON_zplgtr_Tile_Async
+ * @sa CHAMELEON_cplgtr
+ * @sa CHAMELEON_dplgtr
+ * @sa CHAMELEON_splgtr
+ * @sa CHAMELEON_zplgtr
+ *
+ */
+int CHAMELEON_zplgtr( CHAMELEON_Complex64_t bump, cham_uplo_t uplo,
+                      int M, int N, CHAMELEON_Complex64_t *A, int LDA,
+                      unsigned long long int seed )
+{
+    int NB;
+    int status;
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t descAl, descAt;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        chameleon_fatal_error("CHAMELEON_zplgtr", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if ( N < 0 ) {
+        chameleon_error("CHAMELEON_zplgtr", "illegal value of N");
+        return -2;
+    }
+    if ( M < 0 ) {
+        chameleon_error("CHAMELEON_zplgtr", "illegal value of M");
+        return -3;
+    }
+    if ( LDA < chameleon_max(1, M) ) {
+        chameleon_error("CHAMELEON_zplgtr", "illegal value of LDA");
+        return -4;
+    }
+    /* Quick return */
+    if ( chameleon_max(0, N) == 0 || chameleon_max(0, M) == 0 )
+        return CHAMELEON_SUCCESS;
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
+    if (status != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zplgtr", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NT */
+    NB = CHAMELEON_NB;
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescOutput, uplo,
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
+
+    /* Call the tile interface */
+    CHAMELEON_zplgtr_Tile_Async( bump, uplo, &descAt, seed, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescOutput, uplo, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ * @brief Generate a random trapezoidal matrix. Tile equivalent of CHAMELEON_zplgtr().
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] bump
+ *          The value to add to the diagonal to be sure
+ *          to have a positive definite matrix if needed.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper trapezoidal or lower trapezoidal:
+ *          = ChamUpper: Only the upper trapezoidal part of A is generated;
+ *          = ChamLower: Only the lower trapezoidal part of A is generated;
+ *          = ChamUpperLower: the full A is generated. Note that in that case
+ *          the matrix is symmetric, and it is thus not recommended to call
+ *          this function.
+ *
+ * @param[out] A
+ *          On exit, the random trapezoidal matrix A generated.
+ *
+ * @param[in] seed
+ *          The seed used in the random generation.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zplgtr
+ * @sa CHAMELEON_zplgtr_Tile_Async
+ * @sa CHAMELEON_cplgtr_Tile
+ * @sa CHAMELEON_dplgtr_Tile
+ * @sa CHAMELEON_splgtr_Tile
+ * @sa CHAMELEON_zplgtr_Tile
+ *
+ */
+int CHAMELEON_zplgtr_Tile( CHAMELEON_Complex64_t bump, cham_uplo_t uplo,
+                           CHAM_desc_t *A,
+                           unsigned long long int seed )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    int status;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zplgtr_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_zplgtr_Tile_Async( bump, uplo, A, seed, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ * @brief Generate a random trapezoidal matrix. Non-blocking equivalent of CHAMELEON_zplgtr_Tile().
+ *
+ * May return before the computation is finished.
+ * Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] bump
+ *          The value to add to the diagonal to be sure
+ *          to have a positive definite matrix if needed.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper trapezoidal or lower trapezoidal:
+ *          = ChamUpper: Only the upper trapezoidal part of A is generated;
+ *          = ChamLower: Only the lower trapezoidal part of A is generated;
+ *          = ChamUpperLower: the full A is generated. Note that in that case
+ *          the matrix is symmetric, and it is thus not recommended to call
+ *          this function.
+ *
+ * @param[out] A
+ *          On exit, the random trapezoidal matrix A generated.
+ *
+ * @param[in] seed
+ *          The seed used in the random generation.
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zplgtr
+ * @sa CHAMELEON_zplgtr_Tile
+ * @sa CHAMELEON_cplgtr_Tile_Async
+ * @sa CHAMELEON_dplgtr_Tile_Async
+ * @sa CHAMELEON_splgtr_Tile_Async
+ * @sa CHAMELEON_zplgtr_Tile_Async
+ * @sa CHAMELEON_zplgtr_Tile_Async
+ *
+ */
+int CHAMELEON_zplgtr_Tile_Async( CHAMELEON_Complex64_t  bump,
+                                 cham_uplo_t            uplo,
+                                 CHAM_desc_t           *A,
+                                 unsigned long long int seed,
+                                 RUNTIME_sequence_t    *sequence,
+                                 RUNTIME_request_t     *request )
+{
+    CHAM_context_t *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zplgtr_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        chameleon_fatal_error("CHAMELEON_zplgtr_Tile", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        chameleon_fatal_error("CHAMELEON_zplgtr_Tile", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == CHAMELEON_SUCCESS) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zplgtr_Tile", "invalid descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Check input arguments */
+    if (A->nb != A->mb) {
+        chameleon_error("CHAMELEON_zplgtr_Tile", "only square tiles supported");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    /* Quick return */
+    if (chameleon_min( A->m, A->n ) == 0)
+        return CHAMELEON_SUCCESS;
+
+    chameleon_pzplgsy( bump, uplo, A, A->m, A->i, A->j, seed, sequence, request );
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 2ea083dbf..3a800ec85 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -79,6 +79,7 @@ int CHAMELEON_zlatms( int M, int N, cham_dist_t idist, unsigned long long int se
 int CHAMELEON_zlauum(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA);
 int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
 int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
+int CHAMELEON_zplgtr( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
 int CHAMELEON_zplrnt( int M, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
 int CHAMELEON_zplrnk( int M, int N, int K, CHAMELEON_Complex64_t *C, int LDC, unsigned long long int seedA, unsigned long long int seedB );
 int CHAMELEON_zpoinv(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA);
@@ -158,6 +159,7 @@ int CHAMELEON_zlatms_Tile( cham_dist_t idist, unsigned long long int seed, cham_
 int CHAMELEON_zlauum_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
 int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed );
 int CHAMELEON_zplgsy_Tile(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed );
+int CHAMELEON_zplgtr_Tile(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed );
 int CHAMELEON_zplrnt_Tile(CHAM_desc_t *A, unsigned long long int seed );
 int CHAMELEON_zplrnk_Tile(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB );
 int CHAMELEON_zpoinv_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
@@ -233,6 +235,7 @@ int CHAMELEON_zlatms_Tile_Async( cham_dist_t idist, unsigned long long int seed,
 int CHAMELEON_zlauum_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zplgsy_Tile_Async(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+int CHAMELEON_zplgtr_Tile_Async(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zplrnt_Tile_Async(CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zplrnk_Tile_Async(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zpoinv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-- 
GitLab