From 9c2e2baf7419b9c623743d4415080c45bfbb2d35 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 15 Dec 2016 22:34:35 +0100 Subject: [PATCH 1/8] Add tpqrt and tpmqrt kernels --- coreblas/compute/core_ztpmqrt.c | 190 ++++++++++++++++++++++++++++++++ coreblas/compute/core_ztpqrt.c | 159 ++++++++++++++++++++++++++ coreblas/include/coreblas_z.h | 12 ++ 3 files changed, 361 insertions(+) create mode 100644 coreblas/compute/core_ztpmqrt.c create mode 100644 coreblas/compute/core_ztpqrt.c diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c new file mode 100644 index 000000000..571da630a --- /dev/null +++ b/coreblas/compute/core_ztpmqrt.c @@ -0,0 +1,190 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file core_ztpmqrt.c + * + * PLASMA core_blas kernel + * PLASMA is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/coreblas.h" + +/** + ******************************************************************************* + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_ztpmqrt applies a complex orthogonal matrix Q obtained from a + * "triangular-pentagonal" complex block reflector H to a general complex matrix + * C, which consists of two blocks A and B. + * + ******************************************************************************* + * + * @param[in] side + * @arg MorseLeft : apply Q or Q**H from the Left; + * @arg MorseRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg MorseNoTrans : No transpose, apply Q; + * @arg MorseConjTrans : ConjTranspose, apply Q**H. + * + * @param[in] M + * The number of rows of the tile B. M >= 0. + * + * @param[in] N1 + * The number of columns of the tile B. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * + * @param[in] L + * The number of rows of the upper trapezoidal part of V. + * K >= L >= 0. See Further Details. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] V + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_ZTTQRT in the first k rows of its array argument V. + * + * @param[in] LDV + * The leading dimension of the array V. LDV >= max(1,K). + * + * @param[in] T + * The IB-by-N1 triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[in,out] A + * A is COMPLEX*16 array, dimension (LDA,N) if side = MorseLeft + * or (LDA,K) if SIDE = MorseRight + * On entry, the K-by-N or M-by-K matrix A. + * On exit, A is overwritten by the corresponding block of + * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * + * @param[in] LDA + * The leading dimension of the array A. LDA1 >= max(1,M1). + * If side = MorseLeft, LDA >= max(1,K); + * If side = Morseright, LDA >= max(1,M). + * + * @param[in,out] B + * On entry, the M-by-N tile B. + * On exit, B is overwritten by the corresponding block of + * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * + * @param[in] LDB + * The leading dimension of the tile B. LDB >= max(1,M). + * + * @param[out] WORK + * Workspace array of size LDWORK-by-NB. + * LDWORK = N if side =MorseLeft, or M if side = MorseRight. + * + ******************************************************************************* + * + * @par Further Details: + * ===================== + * + * The columns of the pentagonal matrix V contain the elementary reflectors + * H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a + * trapezoidal block V2: + * + * V = [V1] + * [V2]. + * + * The size of the trapezoidal block V2 is determined by the parameter L, + * where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L + * rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular; + * if L=0, there is no trapezoidal block, hence V = V1 is rectangular. + * + * If side = MorseLeft: C = [A] where A is K-by-N, B is M-by-N and V is M-by-K. + * [B] + * + * If side = MorseRight: C = [A B] where A is M-by-K, B is M-by-N and V is N-by-K. + * + * The complex orthogonal matrix Q is formed from V and T. + * + * If trans='N' and side='L', C is on exit replaced with Q * C. + * + * If trans='C' and side='L', C is on exit replaced with Q**H * C. + * + * If trans='N' and side='R', C is on exit replaced with C * Q. + * + * If trans='C' and side='R', C is on exit replaced with C * Q**H. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ + +int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int IB, + const MORSE_Complex64_t *V, int LDV, + const MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *WORK ) +{ + int m1; + int n1; + + /* Check input arguments */ + if ((side != MorseLeft) && (side != MorseRight)) { + coreblas_error(1, "Illegal value of side"); + return -1; + } + + if ( side == MorseLeft ) { + m1 = K; + n1 = N; + ldwork = ib; + } + else { + m1 = M; + n1 = K; + ldwork = m1; + } + + /* TS case */ + if (L == 0) { + CORE_ztsmqr( side, trans, m1, n1, M, N, K, IB, + A, LDA, B, LDB, V, LDV, T, LDT, + WORK, ldwork ); + } + /* TT case */ + else if( L == M ) { + CORE_zttmqr( side, trans, m1, n1, M, N, K, IB, + A, LDA, B, LDB, V, LDV, T, LDT, + WORK, ldwork ); + } + else { + //LAPACKE_ztpmqrt_work( LAPACK_COL_MAJOR, M, N, K, L, IB, V, LDV, T, LDT, A, LDA, B, LDB, WORK ); + coreblas_error( 3, "Illegal value of L (only 0 or M handled for now)"); + return -3; + } + + return MORSE_SUCCESS; +} diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c new file mode 100644 index 000000000..76a94fc2c --- /dev/null +++ b/coreblas/compute/core_ztpqrt.c @@ -0,0 +1,159 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file core_ztpqrt.c + * + * PLASMA core_blas kernel + * PLASMA is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> c d s + * + **/ +#include "coreblas/include/lapacke.h" +#include "coreblas/include/coreblas.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_MORSE_Complex64_t + * + * CORE_ztpqrt computes a blocked QR factorization of a complex + * "triangular-pentagonal" matrix C, which is composed of a + * triangular block A and pentagonal block B, using the compact + * WY representation for Q. + * + * C = | A | = Q * R + * | B | + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile B. M >= 0. + * + * @param[in] N + * The number of rows of the tile A1. + * The number of columns of the tiles A1 and A2. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] N + * The number of columns of the matrix B, and the order of the matrix + * A. N >= 0. + * + * @param[in] L + * The number of rows of the upper trapezoidal part of B. + * MIN(M,N) >= L >= 0. See Further Details. + * + * @param[in,out] A + * On entry, the upper triangular N-by-N matrix A. + * On exit, the elements on and above the diagonal of the array + * contain the upper triangular matrix R. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,N). + * + * @param[in,out] B + * On entry, the pentagonal M-by-N matrix B. The first M-L rows + * are rectangular, and the last L rows are upper trapezoidal. + * On exit, B contains the pentagonal matrix V. See Further Details. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * WORK is COMPLEX*16 array, dimension ((IB+1)*N) + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************/ +int CORE_ztpqrt( int M, int N, int L, int IB, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *WORK ) +{ + static MORSE_Complex64_t zone = 1.0; + static MORSE_Complex64_t zzero = 0.0; + + MORSE_Complex64_t alpha; + int i, ii, sb; + +#if !defined(NDEBUG) + /* Check input arguments */ + if (M < 0) { + coreblas_error(1, "Illegal value of M"); + return -1; + } + if (N < 0) { + coreblas_error(2, "Illegal value of N"); + return -2; + } + if( (L < 0) || ((L > min(M, N)) && (min(M,N) > 0))) { + coreblas_error(3, "Illegal value of L"); + return -3; + } + if (IB < 0) { + coreblas_error(4, "Illegal value of IB"); + return -4; + } + if ((LDA < max(1,N)) && (N > 0)) { + coreblas_error(6, "Illegal value of LDA"); + return -6; + } + if ((LDB < max(1,M)) && (M > 0)) { + coreblas_error(6, "Illegal value of LDB"); + return -8; + } + if ((LDT < max(1,IB)) && (IB > 0)) { + coreblas_error(6, "Illegal value of LDT"); + return -10; + } +#endif /*!defined(NDEBUG)*/ + + /* Quick return */ + if ((M == 0) || (N == 0) || (IB == 0)) + return MORSE_SUCCESS; + + if ( L == O ) { + CORE_ztsqrt( M, N, IB, A, LDA, B, LDB, T, LDT, WORK, WORK+N ); + } + else if (L == M) { + CORE_zttqrt( M, N, IB, A, LDA, B, LDB, T, LDT, WORK, WORK+N ); + } + else { + //LAPACKE_ztpqrt_work( LAPACK_COL_MAJOR, M, N, L, IB, A, LDA, B, LDB, T, LDT, WORK ); + coreblas_error( 3, "Illegal value of L (only 0 or M handled for now)"); + return -3; + } + return MORSE_SUCCESS; +} diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h index e94455562..345836f81 100644 --- a/coreblas/include/coreblas_z.h +++ b/coreblas/include/coreblas_z.h @@ -352,6 +352,18 @@ int CORE_ztstrf(int M, int N, int IB, int NB, MORSE_Complex64_t *L, int LDL, int *IPIV, MORSE_Complex64_t *WORK, int LDWORK, int *INFO); +int CORE_ztpqrt( int M, int N, int L, int IB, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *WORK ); +int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int IB, + const MORSE_Complex64_t *V, int LDV, + const MORSE_Complex64_t *T, int LDT, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_Complex64_t *WORK ); int CORE_zttmqr(MORSE_enum side, MORSE_enum trans, int M1, int N1, int M2, int N2, int K, int IB, MORSE_Complex64_t *A1, int LDA1, -- GitLab From 3b69d027e31ec84bc0af7f341934b19710c166bc Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 15 Dec 2016 22:35:22 +0100 Subject: [PATCH 2/8] Add task interfaces to tpqrt/tpmqrt tasks --- include/morse_z.h | 3 +++ include/runtime_z.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/morse_z.h b/include/morse_z.h index f01b0c778..07c81112a 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -102,6 +102,7 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64 int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC); int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); +int MORSE_ztpqrt( int M, int N, int L, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_desc_t *descT ); int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB); int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); int MORSE_ztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); @@ -179,6 +180,7 @@ int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C); int MORSE_zsysv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); +int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T ); int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B); int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B); int MORSE_ztrsm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B); @@ -253,6 +255,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, M int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_ztrsm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/include/runtime_z.h b/include/runtime_z.h index df6b2f553..d4e434a8c 100644 --- a/include/runtime_z.h +++ b/include/runtime_z.h @@ -315,6 +315,18 @@ void MORSE_TASK_zswptr_ontile(const MORSE_option_t *options, const MORSE_desc_t descA, const MORSE_desc_t *Aij, int Aijm, int Aijn, int i1, int i2, int *ipiv, int inc, const MORSE_desc_t *Akk, int Akkm, int Akkn, int ldak); +void MORSE_TASK_ztpmqrt(const MORSE_option_t *options, + MORSE_enum side, MORSE_enum trans, + int m, int n, int k, int l, int ib, int nb, + const MORSE_desc_t *V, int Vm, int Vn, int ldv, + const MORSE_desc_t *T, int Tm, int Tn, int ldt, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb ); +void MORSE_TASK_ztpqrt(const MORSE_option_t *options, + int m, int n, int l, int ib, int nb, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb, + const MORSE_desc_t *T, int Tm, int Tn, int ldt ); void MORSE_TASK_ztrdalg(const MORSE_option_t *options, MORSE_enum uplo, int N, int NB, -- GitLab From 9224a46894beec01ed0cd13e812a315691eb4424 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 15 Dec 2016 22:47:55 +0100 Subject: [PATCH 3/8] Add compilation of the kernels --- coreblas/compute/CMakeLists.txt | 2 ++ coreblas/compute/core_ztpmqrt.c | 5 ++--- coreblas/compute/core_ztpqrt.c | 8 +------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index 82813231b..d24bc295a 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -73,6 +73,8 @@ set(ZSRC core_zsyrk.c core_zsyssq.c core_zsytf2_nopiv.c + core_ztpqrt.c + core_ztpmqrt.c core_ztradd.c core_ztrasm.c core_ztrmm.c diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c index 571da630a..ee44b8cf8 100644 --- a/coreblas/compute/core_ztpmqrt.c +++ b/coreblas/compute/core_ztpmqrt.c @@ -148,8 +148,7 @@ int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t *WORK ) { - int m1; - int n1; + int m1, n1, ldwork; /* Check input arguments */ if ((side != MorseLeft) && (side != MorseRight)) { @@ -160,7 +159,7 @@ int CORE_ztpmqrt( MORSE_enum side, MORSE_enum trans, if ( side == MorseLeft ) { m1 = K; n1 = N; - ldwork = ib; + ldwork = IB; } else { m1 = M; diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c index 76a94fc2c..504951a0c 100644 --- a/coreblas/compute/core_ztpqrt.c +++ b/coreblas/compute/core_ztpqrt.c @@ -102,12 +102,6 @@ int CORE_ztpqrt( int M, int N, int L, int IB, MORSE_Complex64_t *T, int LDT, MORSE_Complex64_t *WORK ) { - static MORSE_Complex64_t zone = 1.0; - static MORSE_Complex64_t zzero = 0.0; - - MORSE_Complex64_t alpha; - int i, ii, sb; - #if !defined(NDEBUG) /* Check input arguments */ if (M < 0) { @@ -144,7 +138,7 @@ int CORE_ztpqrt( int M, int N, int L, int IB, if ((M == 0) || (N == 0) || (IB == 0)) return MORSE_SUCCESS; - if ( L == O ) { + if ( L == 0 ) { CORE_ztsqrt( M, N, IB, A, LDA, B, LDB, T, LDT, WORK, WORK+N ); } else if (L == M) { -- GitLab From eb02ab84224ada97e65fc71cd43b3d3727473021 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 15 Dec 2016 22:52:10 +0100 Subject: [PATCH 4/8] Add now driver and parallel implementation --- compute/CMakeLists.txt | 2 + compute/pztpqrt.c | 151 +++++++++++++++++ compute/ztpqrt.c | 361 +++++++++++++++++++++++++++++++++++++++++ control/compute_z.h | 1 + 4 files changed, 515 insertions(+) create mode 100644 compute/pztpqrt.c create mode 100644 compute/ztpqrt.c diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 1d9b97f04..8d61cde93 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -130,6 +130,7 @@ set(ZSRC pzunmlqrh.c pzunmqr.c pzunmqrrh.c + pztpqrt.c ### zgels.c zgelqf.c @@ -167,6 +168,7 @@ set(ZSRC zungqr.c zunmlq.c zunmqr.c + ztpqrt.c ################## # MIXED PRECISION ################## diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c new file mode 100644 index 000000000..8dd8c6335 --- /dev/null +++ b/compute/pztpqrt.c @@ -0,0 +1,151 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pztpqrt.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m,n) A, m, n +#define B(m,n) B, m, n +#define T(m,n) T, m, n +#if defined(CHAMELEON_COPY_DIAG) +#define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) A, k, k +#endif + +/***************************************************************************//** + * Parallel tile QR factorization - dynamic scheduling + **/ +void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_sequence_t *sequence, MORSE_request_t *request ) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + MORSE_desc_t *DIAG = NULL; + + int k, m, n; + int ldak, ldbm; + int tempkm, tempkn, tempnn, tempmm, templm; + int ib; + + /* Dimension of the first column */ + int maxm = B->m - L; + int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1); + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + /* + * zgeqrt = A->nb * (ib+1) + * zunmqr = A->nb * ib + * ztsqrt = A->nb * (ib+1) + * ztsmqr = A->nb * ib + */ + ws_worker = A->nb * (ib+1); + + /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + +#if defined(CHAMELEON_USE_MAGMA) + /* Worker space + * + * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) + */ + ws_worker = max( ws_worker, ib * (ib + A->nb) ); + + /* Host space + * + * zgeqrt = ib * (A->mb+3*ib) + A->mb ) + * ztsqrt = 2 * ib * (A->nb+ib) + A->nb + */ + ws_host = max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); + ws_host = max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); +#endif + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + +#if defined(CHAMELEON_COPY_DIAG) + /* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */ + DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); + morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); +#endif + + for (k = 0; k < A->nt; k++) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + ldak = BLKLDD(A, k); + + for (m = 0; m < maxmt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + templm = m == maxmt-1 ? tempmm : 0; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztpqrt( + &options, + tempmm, tempkn, templm, ib, T->nb, + A(k, k), ldak, + B(m, k), ldbm, + T(m, k), T->mb ); + + for (n = k+1; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztpmqrt( + &options, + MorseLeft, MorseConjTrans, + tempmm, tempnn, tempkm, templm, ib, T->nb, + B(m, k), ldbm, + T(m, k), T->mb, + A(k, n), ldak, + B(m, n), ldbm ); + } + } + + maxmt = min( B->mt, maxmt+1 ); + } + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_Sequence_Wait(sequence); + morse_desc_mat_free(DIAG); + free(DIAG); +#endif + (void)DIAG; +} diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c new file mode 100644 index 000000000..ef9e4232e --- /dev/null +++ b/compute/ztpqrt.c @@ -0,0 +1,361 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file ztpqrt.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ****************************************************************************** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_ztpqrt - Computes a blocked QR factorization of a + * "triangular-pentagonal" matrix C, which is composed of a triangular block A + * and a pentagonal block B, using the compact representation for Q. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix B. M >= 0. + * + * @param[in] N + * The number of columns of the matrix B, and the order of the matrix + * A. N >= 0. + * + * @param[in] L + * The number of rows of the upper trapezoidal part of B. + * MIN(M,N) >= L >= 0. See Further Details. + * + * @param[in,out] A + * On entry, the upper triangular N-by-N matrix A. + * On exit, the elements on and above the diagonal of the array + * contain the upper triangular matrix R. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,N). + * + * @param[in,out] B + * On entry, the pentagonal M-by-N matrix B. The first M-L rows + * are rectangular, and the last L rows are upper trapezoidal. + * On exit, B contains the pentagonal matrix V. See Further Details. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,M). + * + * @param[out] descT + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to + * solve the system of equations, or by any function to apply the Q. + * + * @par Further Details: + * ===================== + * + * The input matrix C is a (N+M)-by-N matrix + * + * C = [ A ] + * [ B ] + * + * where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal + * matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N + * upper trapezoidal matrix B2: + * + * B = [ B1 ] <- (M-L)-by-N rectangular + * [ B2 ] <- L-by-N upper trapezoidal. + * + * The upper trapezoidal matrix B2 consists of the first L rows of a + * N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, + * B is rectangular M-by-N; if M=L=N, B is upper triangular. + * + * The matrix W stores the elementary reflectors H(i) in the i-th column + * below the diagonal (of A) in the (N+M)-by-N input matrix C + * + * C = [ A ] <- upper triangular N-by-N + * [ B ] <- M-by-N pentagonal + * + * so that W can be represented as + * + * W = [ I ] <- identity, N-by-N + * [ V ] <- M-by-N, same form as B. + * + * Thus, all of information needed for W is contained on exit in B, which + * we call V above. Note that V has the same form as B; that is, + * + * V = [ V1 ] <- (M-L)-by-N rectangular + * [ V2 ] <- L-by-N upper trapezoidal. + * + * The columns of V represent the vectors which define the H(i)'s. + * + * The number of blocks is B = ceiling(N/NB), where each + * block is of order NB except for the last block, which is of order + * IB = N - (B-1)*NB. For each of the B blocks, a upper triangular block + * reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB + * for the last block) T's are stored in the NB-by-N matrix T as + * + * T = [T1 T2 ... TB]. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_ztpqrt_Tile + * @sa MORSE_ztpqrt_Tile_Async + * @sa MORSE_ctpqrt + * @sa MORSE_dtpqrt + * @sa MORSE_stpqrt + * @sa MORSE_zgeqrs + * + ******************************************************************************/ +int MORSE_ztpqrt( int M, int N, int L, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB, + MORSE_desc_t *descT ) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB; + int minMN = min( M, N ); + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztpqrt", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_ztpqrt", "illegal value of M"); + return -1; + } + if (N < 0) { + morse_error("MORSE_ztpqrt", "illegal value of N"); + return -2; + } + if ((L < 0) || ((L > minMN) && (minMN > 0))) { + morse_error("MORSE_ztpqrt", "illegal value of N"); + return -3; + } + if (LDA < max(1, N)) { + morse_error("MORSE_ztpqrt", "illegal value of LDA"); + return -5; + } + if (LDB < max(1, M)) { + morse_error("MORSE_ztpqrt", "illegal value of LDB"); + return -7; + } + + /* Quick return */ + if (minMN == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request, + (morse_desc_mat_free(&(descA)), morse_desc_mat_free(&(descB))) ); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_ztpqrt_Tile_Async(L, &descA, &descB, descT, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_ztpqrt_Tile - Computes the tile QR factorization of a matrix. + * Tile equivalent of MORSE_ztpqrt(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N + * upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the + * diagonal represent the unitary matrix Q as a product of elementary reflectors stored + * by tiles. + * + * @param[out] T + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system + * of equations. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_ztpqrt + * @sa MORSE_ztpqrt_Tile_Async + * @sa MORSE_ctpqrt_Tile + * @sa MORSE_dtpqrt_Tile + * @sa MORSE_stpqrt_Tile + * @sa MORSE_zgeqrs_Tile + * + ******************************************************************************/ +int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T ) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztpqrt_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_ztpqrt_Tile_Async(L, A, B, T, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/***************************************************************************//** + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_ztpqrt_Tile_Async - Computes the tile QR factorization of a matrix. + * Non-blocking equivalent of MORSE_ztpqrt_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_ztpqrt + * @sa MORSE_ztpqrt_Tile + * @sa MORSE_ctpqrt_Tile_Async + * @sa MORSE_dtpqrt_Tile_Async + * @sa MORSE_stpqrt_Tile_Async + * @sa MORSE_zgeqrs_Tile_Async + * + ******************************************************************************/ +int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, + MORSE_sequence_t *sequence, MORSE_request_t *request ) +{ + MORSE_context_t *morse; + + morse = morse_context_self(); + if (morse == NULL) { + morse_error("MORSE_ztpqrt_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_ztpqrt_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_ztpqrt_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(T) != MORSE_SUCCESS) { + morse_error("MORSE_ztpqrt_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + morse_error("MORSE_ztpqrt_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (((B->m - L) % B->mb) != 0) { + morse_error("MORSE_ztpqrt_Tile", "Triangular part must be aligned with tiles"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + + /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ + morse_pztpqrt(L, A, B, T, sequence, request); + /* } */ + /* else { */ + /* morse_pztpqrtrh(A, T, MORSE_RHBLK, sequence, request); */ + /* } */ + + return MORSE_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index fd6051a49..d99406b14 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -134,6 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); -- GitLab From 22869cafe224b2ea75a63cc543f36d292bc388e9 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Fri, 16 Dec 2016 00:06:47 +0100 Subject: [PATCH 5/8] Add codelets in all three runtimes --- coreblas/compute/core_ztpmqrt.c | 2 +- runtime/CMakeLists.txt | 78 +++++++++ runtime/parsec/CMakeLists.txt | 72 +------- runtime/parsec/codelets/codelet_ztpmqrt.c | 102 +++++++++++ runtime/parsec/codelets/codelet_ztpqrt.c | 85 ++++++++++ runtime/quark/CMakeLists.txt | 72 +------- runtime/quark/codelets/codelet_ztpmqrt.c | 84 +++++++++ runtime/quark/codelets/codelet_ztpqrt.c | 72 ++++++++ runtime/starpu/CMakeLists.txt | 72 +------- runtime/starpu/codelets/codelet_zcallback.c | 4 +- runtime/starpu/codelets/codelet_ztpmqrt.c | 159 ++++++++++++++++++ runtime/starpu/codelets/codelet_ztpqrt.c | 99 +++++++++++ .../starpu/include/runtime_codelet_profile.h | 2 +- runtime/starpu/include/runtime_codelet_z.h | 2 + runtime/starpu/include/runtime_codelets.h | 2 +- runtime/starpu/include/runtime_workspace.h | 8 +- 16 files changed, 694 insertions(+), 221 deletions(-) create mode 100644 runtime/parsec/codelets/codelet_ztpmqrt.c create mode 100644 runtime/parsec/codelets/codelet_ztpqrt.c create mode 100644 runtime/quark/codelets/codelet_ztpmqrt.c create mode 100644 runtime/quark/codelets/codelet_ztpqrt.c create mode 100644 runtime/starpu/codelets/codelet_ztpmqrt.c create mode 100644 runtime/starpu/codelets/codelet_ztpqrt.c diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c index ee44b8cf8..2241b5d39 100644 --- a/coreblas/compute/core_ztpmqrt.c +++ b/coreblas/compute/core_ztpmqrt.c @@ -98,7 +98,7 @@ * * @param[out] WORK * Workspace array of size LDWORK-by-NB. - * LDWORK = N if side =MorseLeft, or M if side = MorseRight. + * LDWORK = N if side = MorseLeft, or M if side = MorseRight. * ******************************************************************************* * diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index b0d76eeba..0224e3644 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -26,6 +26,84 @@ # ### +# List of codelets required by all runtimes +# ----------------------------------------- +set(CODELETS_ZSRC + codelets/codelet_ztile_zero.c + codelets/codelet_zasum.c + ################## + # BLAS 1 + ################## + codelets/codelet_zaxpy.c + ################## + # BLAS 3 + ################## + codelets/codelet_zgemm.c + codelets/codelet_zhemm.c + codelets/codelet_zher2k.c + codelets/codelet_zherk.c + codelets/codelet_zsymm.c + codelets/codelet_zsyr2k.c + codelets/codelet_zsyrk.c + codelets/codelet_ztrmm.c + codelets/codelet_ztrsm.c + ################## + # LAPACK + ################## + codelets/codelet_zgeadd.c + codelets/codelet_zlascal.c + codelets/codelet_zgelqt.c + codelets/codelet_zgeqrt.c + codelets/codelet_zgessm.c + codelets/codelet_zgessq.c + codelets/codelet_zgetrf.c + codelets/codelet_zgetrf_incpiv.c + codelets/codelet_zgetrf_nopiv.c + codelets/codelet_zhe2ge.c + codelets/codelet_zherfb.c + codelets/codelet_zhessq.c + codelets/codelet_zlacpy.c + codelets/codelet_zlange.c + codelets/codelet_zlanhe.c + codelets/codelet_zlansy.c + codelets/codelet_zlantr.c + codelets/codelet_zlaset2.c + codelets/codelet_zlaset.c + codelets/codelet_zlatro.c + codelets/codelet_zlauum.c + codelets/codelet_zplghe.c + codelets/codelet_zplgsy.c + codelets/codelet_zplrnt.c + codelets/codelet_zplssq.c + codelets/codelet_zpotrf.c + codelets/codelet_zssssm.c + codelets/codelet_zsyssq.c + codelets/codelet_zsytrf_nopiv.c + codelets/codelet_ztpqrt.c + codelets/codelet_ztpmqrt.c + codelets/codelet_ztradd.c + codelets/codelet_ztrasm.c + codelets/codelet_ztrssq.c + codelets/codelet_ztrtri.c + codelets/codelet_ztslqt.c + codelets/codelet_ztsmlq.c + codelets/codelet_ztsmqr.c + codelets/codelet_ztsmlq_hetra1.c + codelets/codelet_ztsmqr_hetra1.c + codelets/codelet_ztsqrt.c + codelets/codelet_ztstrf.c + codelets/codelet_zttlqt.c + codelets/codelet_zttmlq.c + codelets/codelet_zttmqr.c + codelets/codelet_zttqrt.c + codelets/codelet_zunmlq.c + codelets/codelet_zunmqr.c + ################## + # BUILD + ################## + codelets/codelet_zbuild.c + ) + # Check for the subdirectories # ---------------------------- if( CHAMELEON_SCHED_QUARK ) diff --git a/runtime/parsec/CMakeLists.txt b/runtime/parsec/CMakeLists.txt index a19890afe..872c19c48 100644 --- a/runtime/parsec/CMakeLists.txt +++ b/runtime/parsec/CMakeLists.txt @@ -88,77 +88,7 @@ set(RUNTIME_COMMON # ------------------------------------------------------ set(RUNTIME_SRCS_GENERATED "") set(ZSRC - codelets/codelet_ztile_zero.c - codelets/codelet_zasum.c - ################## - # BLAS 1 - ################## - codelets/codelet_zaxpy.c - ################## - # BLAS 3 - ################## - codelets/codelet_zgemm.c - codelets/codelet_zhemm.c - codelets/codelet_zher2k.c - codelets/codelet_zherk.c - codelets/codelet_zsymm.c - codelets/codelet_zsyr2k.c - codelets/codelet_zsyrk.c - codelets/codelet_ztrmm.c - codelets/codelet_ztrsm.c - ################## - # LAPACK - ################## - codelets/codelet_zgeadd.c - codelets/codelet_zlascal.c - codelets/codelet_zgelqt.c - codelets/codelet_zgeqrt.c - codelets/codelet_zgessm.c - codelets/codelet_zgessq.c - codelets/codelet_zgetrf.c - codelets/codelet_zgetrf_incpiv.c - codelets/codelet_zgetrf_nopiv.c - codelets/codelet_zhe2ge.c - codelets/codelet_zherfb.c - codelets/codelet_zhessq.c - codelets/codelet_zlacpy.c - codelets/codelet_zlange.c - codelets/codelet_zlanhe.c - codelets/codelet_zlansy.c - codelets/codelet_zlantr.c - codelets/codelet_zlaset2.c - codelets/codelet_zlaset.c - codelets/codelet_zlatro.c - codelets/codelet_zlauum.c - codelets/codelet_zplghe.c - codelets/codelet_zplgsy.c - codelets/codelet_zplrnt.c - codelets/codelet_zplssq.c - codelets/codelet_zpotrf.c - codelets/codelet_zssssm.c - codelets/codelet_zsyssq.c - codelets/codelet_zsytrf_nopiv.c - codelets/codelet_ztradd.c - codelets/codelet_ztrasm.c - codelets/codelet_ztrssq.c - codelets/codelet_ztrtri.c - codelets/codelet_ztslqt.c - codelets/codelet_ztsmlq.c - codelets/codelet_ztsmqr.c - codelets/codelet_ztsmlq_hetra1.c - codelets/codelet_ztsmqr_hetra1.c - codelets/codelet_ztsqrt.c - codelets/codelet_ztstrf.c - codelets/codelet_zttlqt.c - codelets/codelet_zttmlq.c - codelets/codelet_zttmqr.c - codelets/codelet_zttqrt.c - codelets/codelet_zunmlq.c - codelets/codelet_zunmqr.c - ################## - # BUILD - ################## - codelets/codelet_zbuild.c + ${CODELETS_ZSRC} ) precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}" diff --git a/runtime/parsec/codelets/codelet_ztpmqrt.c b/runtime/parsec/codelets/codelet_ztpmqrt.c new file mode 100644 index 000000000..612e9d54a --- /dev/null +++ b/runtime/parsec/codelets/codelet_ztpmqrt.c @@ -0,0 +1,102 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztpqrt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "runtime/parsec/include/morse_parsec.h" + +static int +CORE_ztpmqrt_parsec(dague_execution_unit_t *context, + dague_execution_context_t *this_task) +{ + MORSE_enum *side; + MORSE_enum *trans; + int *M; + int *N; + int *K; + int *L; + int *ib; + const MORSE_Complex64_t *V; + int *ldv; + const MORSE_Complex64_t *T; + int *ldt; + MORSE_Complex64_t *A; + int *lda; + MORSE_Complex64_t *B; + int *ldb; + MORSE_Complex64_t *WORK; + + dague_dtd_unpack_args( + this_task, + UNPACK_VALUE, &side, + UNPACK_VALUE, &trans, + UNPACK_VALUE, &M, + UNPACK_VALUE, &N, + UNPACK_VALUE, &K, + UNPACK_VALUE, &L, + UNPACK_VALUE, &ib, + UNPACK_DATA, &V, + UNPACK_VALUE, &ldv, + UNPACK_DATA, &T, + UNPACK_VALUE, &ldt, + UNPACK_DATA, &A, + UNPACK_VALUE, &lda, + UNPACK_DATA, &B, + UNPACK_VALUE, &ldb, + UNPACK_SCRATCH, &WORK ); + + CORE_ztpmqrt( *side, *trans, *M, *N, *K, *L, *ib, + V, *ldv, T, *ldt, A, *lda, B, *ldb, WORK ); + + return 0; +} + +void MORSE_TASK_ztpmqrt( const MORSE_option_t *options, + MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int ib, int nb, + const MORSE_desc_t *V, int Vm, int Vn, int ldv, + const MORSE_desc_t *T, int Tm, int Tn, int ldt, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb ) +{ + dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt); + + dague_insert_task( + DAGUE_dtd_handle, CORE_ztpmqrt_parsec, "tpmqrt", + sizeof(MORSE_enum), &side, VALUE, + sizeof(MORSE_enum), &trans, VALUE, + sizeof(int), &M, VALUE, + sizeof(int), &N, VALUE, + sizeof(int), &K, VALUE, + sizeof(int), &L, VALUE, + sizeof(int), &ib, VALUE, + PASSED_BY_REF, RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT | REGION_FULL, + sizeof(int), &ldv, VALUE, + PASSED_BY_REF, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT | REGION_FULL, + sizeof(int), &ldt, VALUE, + PASSED_BY_REF, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | REGION_FULL, + sizeof(int), &lda, VALUE, + PASSED_BY_REF, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | REGION_FULL, + sizeof(int), &ldb, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, + 0); +} diff --git a/runtime/parsec/codelets/codelet_ztpqrt.c b/runtime/parsec/codelets/codelet_ztpqrt.c new file mode 100644 index 000000000..a0b3f6e06 --- /dev/null +++ b/runtime/parsec/codelets/codelet_ztpqrt.c @@ -0,0 +1,85 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztpqrt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "runtime/parsec/include/morse_parsec.h" + +static int +CORE_ztpqrt_parsec(dague_execution_unit_t *context, + dague_execution_context_t *this_task) +{ + int *M; + int *N; + int *L; + int *ib; + MORSE_Complex64_t *A; + int *lda; + MORSE_Complex64_t *B; + int *ldb; + MORSE_Complex64_t *T; + int *ldt; + MORSE_Complex64_t *WORK; + + dague_dtd_unpack_args( + this_task, + UNPACK_VALUE, &M, + UNPACK_VALUE, &N, + UNPACK_VALUE, &L, + UNPACK_VALUE, &ib, + UNPACK_DATA, &A, + UNPACK_VALUE, &lda, + UNPACK_DATA, &B, + UNPACK_VALUE, &ldb, + UNPACK_DATA, &T, + UNPACK_VALUE, &ldt, + UNPACK_SCRATCH, &WORK ); + + CORE_ztpqrt( *M, *N, *L, *ib, + A, *lda, B, *ldb, T, *ldt, WORK ); + + return 0; +} + +void MORSE_TASK_ztpqrt( const MORSE_option_t *options, + int M, int N, int L, int ib, int nb, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb, + const MORSE_desc_t *T, int Tm, int Tn, int ldt ) +{ + dague_dtd_handle_t* DAGUE_dtd_handle = (dague_dtd_handle_t *)(options->sequence->schedopt); + + dague_insert_task( + DAGUE_dtd_handle, CORE_ztpqrt_parsec, "tpqrt", + sizeof(int), &M, VALUE, + sizeof(int), &N, VALUE, + sizeof(int), &L, VALUE, + sizeof(int), &ib, VALUE, + PASSED_BY_REF, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | REGION_U | REGION_D, + sizeof(int), &lda, VALUE, + PASSED_BY_REF, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT | REGION_FULL, + sizeof(int), &ldb, VALUE, + PASSED_BY_REF, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INOUT | REGION_FULL, + sizeof(int), &ldt, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, + 0); +} diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt index 9366a00d7..fa7952a15 100644 --- a/runtime/quark/CMakeLists.txt +++ b/runtime/quark/CMakeLists.txt @@ -86,77 +86,7 @@ set(RUNTIME_COMMON # ------------------------------------------------------ set(RUNTIME_SRCS_GENERATED "") set(ZSRC - codelets/codelet_ztile_zero.c - codelets/codelet_zasum.c - ################## - # BLAS 1 - ################## - codelets/codelet_zaxpy.c - ################## - # BLAS 3 - ################## - codelets/codelet_zgemm.c - codelets/codelet_zhemm.c - codelets/codelet_zher2k.c - codelets/codelet_zherk.c - codelets/codelet_zsymm.c - codelets/codelet_zsyr2k.c - codelets/codelet_zsyrk.c - codelets/codelet_ztrmm.c - codelets/codelet_ztrsm.c - ################## - # LAPACK - ################## - codelets/codelet_zgeadd.c - codelets/codelet_zlascal.c - codelets/codelet_zgelqt.c - codelets/codelet_zgeqrt.c - codelets/codelet_zgessm.c - codelets/codelet_zgessq.c - codelets/codelet_zgetrf.c - codelets/codelet_zgetrf_incpiv.c - codelets/codelet_zgetrf_nopiv.c - codelets/codelet_zhe2ge.c - codelets/codelet_zherfb.c - codelets/codelet_zhessq.c - codelets/codelet_zlacpy.c - codelets/codelet_zlange.c - codelets/codelet_zlanhe.c - codelets/codelet_zlansy.c - codelets/codelet_zlantr.c - codelets/codelet_zlaset2.c - codelets/codelet_zlaset.c - codelets/codelet_zlatro.c - codelets/codelet_zlauum.c - codelets/codelet_zplghe.c - codelets/codelet_zplgsy.c - codelets/codelet_zplrnt.c - codelets/codelet_zplssq.c - codelets/codelet_zpotrf.c - codelets/codelet_zssssm.c - codelets/codelet_zsyssq.c - codelets/codelet_zsytrf_nopiv.c - codelets/codelet_ztradd.c - codelets/codelet_ztrasm.c - codelets/codelet_ztrssq.c - codelets/codelet_ztrtri.c - codelets/codelet_ztslqt.c - codelets/codelet_ztsmlq.c - codelets/codelet_ztsmqr.c - codelets/codelet_ztsmlq_hetra1.c - codelets/codelet_ztsmqr_hetra1.c - codelets/codelet_ztsqrt.c - codelets/codelet_ztstrf.c - codelets/codelet_zttlqt.c - codelets/codelet_zttmlq.c - codelets/codelet_zttmqr.c - codelets/codelet_zttqrt.c - codelets/codelet_zunmlq.c - codelets/codelet_zunmqr.c - ################## - # BUILD - ################## - codelets/codelet_zbuild.c + ${CODELETS_ZSRC} ) precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}" diff --git a/runtime/quark/codelets/codelet_ztpmqrt.c b/runtime/quark/codelets/codelet_ztpmqrt.c new file mode 100644 index 000000000..25bd5ac83 --- /dev/null +++ b/runtime/quark/codelets/codelet_ztpmqrt.c @@ -0,0 +1,84 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztpqrt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "runtime/quark/include/morse_quark.h" + +static void +CORE_ztpmqrt_quark( Quark *quark ) +{ + MORSE_enum side; + MORSE_enum trans; + int M; + int N; + int K; + int L; + int ib; + const MORSE_Complex64_t *V; + int ldv; + const MORSE_Complex64_t *T; + int ldt; + MORSE_Complex64_t *A; + int lda; + MORSE_Complex64_t *B; + int ldb; + MORSE_Complex64_t *WORK; + + quark_unpack_args_16( quark, side, trans, M, N, K, L, ib, + V, ldv, T, ldt, A, lda, B, ldb, WORK ); + + CORE_ztpmqrt( side, trans, M, N, K, L, ib, + V, ldv, T, ldt, A, lda, B, ldb, WORK ); +} + +void MORSE_TASK_ztpmqrt( const MORSE_option_t *options, + MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int ib, int nb, + const MORSE_desc_t *V, int Vm, int Vn, int ldv, + const MORSE_desc_t *T, int Tm, int Tn, int ldt, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb ) +{ + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_TSMQR; + + QUARK_Insert_Task( + opt->quark, CORE_ztpmqrt_quark, (Quark_Task_Flags*)opt, + sizeof(MORSE_enum), &side, VALUE, + sizeof(MORSE_enum), &trans, VALUE, + sizeof(int), &M, VALUE, + sizeof(int), &N, VALUE, + sizeof(int), &K, VALUE, + sizeof(int), &L, VALUE, + sizeof(int), &ib, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( V, MORSE_Complex64_t, Vm, Vn ), INPUT, + sizeof(int), &ldv, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), INPUT, + sizeof(int), &ldt, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT, + sizeof(int), &lda, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT, + sizeof(int), &ldb, VALUE, + sizeof(MORSE_Complex64_t)*ib*nb, NULL, SCRATCH, + 0); +} diff --git a/runtime/quark/codelets/codelet_ztpqrt.c b/runtime/quark/codelets/codelet_ztpqrt.c new file mode 100644 index 000000000..9b7e09876 --- /dev/null +++ b/runtime/quark/codelets/codelet_ztpqrt.c @@ -0,0 +1,72 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztpqrt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "runtime/quark/include/morse_quark.h" + +static void +CORE_ztpqrt_quark( Quark *quark ) +{ + int M; + int N; + int L; + int ib; + MORSE_Complex64_t *A; + int lda; + MORSE_Complex64_t *B; + int ldb; + MORSE_Complex64_t *T; + int ldt; + MORSE_Complex64_t *WORK; + + quark_unpack_args_11( quark, M, N, L, ib, + A, lda, B, ldb, T, ldt, WORK ); + + CORE_ztpqrt( M, N, L, ib, + A, lda, B, ldb, T, ldt, WORK ); +} + +void MORSE_TASK_ztpqrt( const MORSE_option_t *options, + int M, int N, int L, int ib, int nb, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb, + const MORSE_desc_t *T, int Tm, int Tn, int ldt ) +{ + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_TSQRT; + + QUARK_Insert_Task( + opt->quark, CORE_ztpqrt_quark, (Quark_Task_Flags*)opt, + sizeof(int), &M, VALUE, + sizeof(int), &N, VALUE, + sizeof(int), &L, VALUE, + sizeof(int), &ib, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( A, MORSE_Complex64_t, Am, An ), INOUT | QUARK_REGION_U | QUARK_REGION_D, + sizeof(int), &lda, VALUE, + sizeof(MORSE_Complex64_t)*nb*nb, RTBLKADDR( B, MORSE_Complex64_t, Bm, Bn ), INOUT, + sizeof(int), &ldb, VALUE, + sizeof(MORSE_Complex64_t)*nb*ib, RTBLKADDR( T, MORSE_Complex64_t, Tm, Tn ), OUTPUT, + sizeof(int), &ldt, VALUE, + sizeof(MORSE_Complex64_t)*(ib+1)*nb, NULL, SCRATCH, + 0); +} diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index 08956acaf..b2748379d 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -106,77 +106,7 @@ set_source_files_properties(control/runtime_profiling.c PROPERTIES COMPILE_FLAGS set(RUNTIME_SRCS_GENERATED "") set(ZSRC codelets/codelet_zcallback.c - codelets/codelet_ztile_zero.c - codelets/codelet_zasum.c - ################## - # BLAS 1 - ################## - codelets/codelet_zaxpy.c - ################## - # BLAS 3 - ################## - codelets/codelet_zgemm.c - codelets/codelet_zhemm.c - codelets/codelet_zher2k.c - codelets/codelet_zherk.c - codelets/codelet_zsymm.c - codelets/codelet_zsyr2k.c - codelets/codelet_zsyrk.c - codelets/codelet_ztrmm.c - codelets/codelet_ztrsm.c - ################## - # LAPACK - ################## - codelets/codelet_zgeadd.c - codelets/codelet_zlascal.c - codelets/codelet_zgelqt.c - codelets/codelet_zgeqrt.c - codelets/codelet_zgessm.c - codelets/codelet_zgessq.c - codelets/codelet_zgetrf.c - codelets/codelet_zgetrf_incpiv.c - codelets/codelet_zgetrf_nopiv.c - codelets/codelet_zhe2ge.c - codelets/codelet_zherfb.c - codelets/codelet_zhessq.c - codelets/codelet_zlacpy.c - codelets/codelet_zlange.c - codelets/codelet_zlanhe.c - codelets/codelet_zlansy.c - codelets/codelet_zlantr.c - codelets/codelet_zlaset2.c - codelets/codelet_zlaset.c - codelets/codelet_zlatro.c - codelets/codelet_zlauum.c - codelets/codelet_zplghe.c - codelets/codelet_zplgsy.c - codelets/codelet_zplrnt.c - codelets/codelet_zplssq.c - codelets/codelet_zpotrf.c - codelets/codelet_zssssm.c - codelets/codelet_zsyssq.c - codelets/codelet_zsytrf_nopiv.c - codelets/codelet_ztradd.c - codelets/codelet_ztrasm.c - codelets/codelet_ztrssq.c - codelets/codelet_ztrtri.c - codelets/codelet_ztslqt.c - codelets/codelet_ztsmlq.c - codelets/codelet_ztsmqr.c - codelets/codelet_ztsmlq_hetra1.c - codelets/codelet_ztsmqr_hetra1.c - codelets/codelet_ztsqrt.c - codelets/codelet_ztstrf.c - codelets/codelet_zttlqt.c - codelets/codelet_zttmlq.c - codelets/codelet_zttmqr.c - codelets/codelet_zttqrt.c - codelets/codelet_zunmlq.c - codelets/codelet_zunmqr.c - ################## - # BUILD - ################## - codelets/codelet_zbuild.c + ${CODELETS_ZSRC} ) precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}" diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index 8af4ec546..bb26aa301 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -67,7 +67,9 @@ CHAMELEON_CL_CB(zssssm, starpu_matrix_get_nx(task->handles[0]), starpu_ma CHAMELEON_CL_CB(zsymm, starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0, 2.*M*M *N); CHAMELEON_CL_CB(zsyr2k, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+2.*M*N)*M); CHAMELEON_CL_CB(zsyrk, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, ( 1.+ M)*M*N); -CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1)); +CHAMELEON_CL_CB(ztpqrt, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]), 2.*M*N*K); +CHAMELEON_CL_CB(ztpmqrt, starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]), 4.*M*N*K); +CHAMELEON_CL_CB(ztrasm, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, 0.5*M*(M+1)); CHAMELEON_CL_CB(ztrmm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N); CHAMELEON_CL_CB(ztrsm, starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0, M*M*N); CHAMELEON_CL_CB(ztrtri, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M *M*M); diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c new file mode 100644 index 000000000..98188588e --- /dev/null +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -0,0 +1,159 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztpmqrt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "runtime/starpu/include/morse_starpu.h" +#include "runtime/starpu/include/runtime_codelet_z.h" + +void MORSE_TASK_ztpmqrt( const MORSE_option_t *options, + MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int L, int ib, int nb, + const MORSE_desc_t *V, int Vm, int Vn, int ldv, + const MORSE_desc_t *T, int Tm, int Tn, int ldt, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb ) +{ + struct starpu_codelet *codelet = &cl_ztpmqrt; + void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL; + + if ( morse_desc_islocal( A, Am, An ) || + morse_desc_islocal( B, Bm, Bn ) || + morse_desc_islocal( V, Vm, Vn ) || + morse_desc_islocal( T, Tm, Tn ) ) + { + starpu_insert_task( + codelet, + STARPU_VALUE, &side, sizeof(MORSE_enum), + STARPU_VALUE, &trans, sizeof(MORSE_enum), + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, &N, sizeof(int), + STARPU_VALUE, &K, sizeof(int), + STARPU_VALUE, &L, sizeof(int), + STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn), + STARPU_VALUE, &ldv, sizeof(int), + STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), + STARPU_VALUE, &ldt, sizeof(int), + STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + /* Other options */ + STARPU_SCRATCH, options->ws_worker, + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, execution_rank, +#endif +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "ztpmqrt", +#endif + 0); + } +} + + +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg) +{ + MORSE_enum side; + MORSE_enum trans; + int M; + int N; + int K; + int L; + int ib; + const MORSE_Complex64_t *V; + int ldv; + const MORSE_Complex64_t *T; + int ldt; + MORSE_Complex64_t *A; + int lda; + MORSE_Complex64_t *B; + int ldb; + MORSE_Complex64_t *WORK; + + V = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); + WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ + + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, + &ldv, &ldt, &lda, &ldb ); + + CORE_ztpmqrt( side, trans, M, N, K, L, ib, + V, ldv, T, ldt, A, lda, B, ldb, WORK ); +} + + +#if defined(CHAMELEON_USE_CUDA) +static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) +{ + MORSE_enum side; + MORSE_enum trans; + int M; + int N; + int K; + int L; + int k; + int ib; + const cuDoubleComplex *V; + int ldv; + const cuDoubleComplex *T; + int ldt; + cuDoubleComplex *A; + int lda; + cuDoubleComplex *B; + int ldb; + cuDoubleComplex *W; + CUstream stream; + + V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); + T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); + A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); + B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); + W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */ + + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, + &ldv, &ldt, &lda, &ldb ); + + stream = starpu_cuda_get_local_stream(); + cublasSetKernelStream( stream ); + + CUDA_ztpmqrt( + side, trans, M, N, K, L, ib, + A, lda, B, ldb, V, ldv, T, ldt, + W, stream ); + +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif /* !defined(CHAMELEON_SIMULATION) */ + + +/* + * Codelet definition + */ +CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC) diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c new file mode 100644 index 000000000..b6da13320 --- /dev/null +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -0,0 +1,99 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file codelet_ztpqrt.c + * + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "runtime/starpu/include/morse_starpu.h" +#include "runtime/starpu/include/runtime_codelet_z.h" + +void MORSE_TASK_ztpqrt( const MORSE_option_t *options, + int M, int N, int L, int ib, int nb, + const MORSE_desc_t *A, int Am, int An, int lda, + const MORSE_desc_t *B, int Bm, int Bn, int ldb, + const MORSE_desc_t *T, int Tm, int Tn, int ldt ) +{ + struct starpu_codelet *codelet = &cl_ztpqrt; + void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL; + + if ( morse_desc_islocal( A, Am, An ) || + morse_desc_islocal( B, Bm, Bn ) || + morse_desc_islocal( T, Tm, Tn ) ) + { + starpu_insert_task( + codelet, + STARPU_VALUE, &M, sizeof(int), + STARPU_VALUE, &N, sizeof(int), + STARPU_VALUE, &L, sizeof(int), + STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), + STARPU_VALUE, &lda, sizeof(int), + STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), + STARPU_VALUE, &ldb, sizeof(int), + STARPU_RW, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), + STARPU_VALUE, &ldt, sizeof(int), + /* Other options */ + STARPU_SCRATCH, options->ws_worker, + STARPU_PRIORITY, options->priority, + STARPU_CALLBACK, callback, +#if defined(CHAMELEON_USE_MPI) + STARPU_EXECUTE_ON_NODE, execution_rank, +#endif +#if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "ztpqrt", +#endif + 0); + } +} + + +#if !defined(CHAMELEON_SIMULATION) +static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) +{ + int M; + int N; + int L; + int ib; + MORSE_Complex64_t *A; + int lda; + MORSE_Complex64_t *B; + int ldb; + MORSE_Complex64_t *T; + int ldt; + MORSE_Complex64_t *WORK; + + A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); + T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); + WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + + starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, + &lda, &ldb, &ldt ); + + CORE_ztpqrt( M, N, L, ib, + A, lda, B, ldb, T, ldt, WORK ); +} +#endif /* !defined(CHAMELEON_SIMULATION) */ + + +/* + * Codelet definition + */ +CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func) diff --git a/runtime/starpu/include/runtime_codelet_profile.h b/runtime/starpu/include/runtime_codelet_profile.h index 99303041a..67942fc01 100644 --- a/runtime/starpu/include/runtime_codelet_profile.h +++ b/runtime/starpu/include/runtime_codelet_profile.h @@ -119,6 +119,6 @@ extern struct starpu_perfmodel cl_##name##_fake; \ void cl_##name##_callback(); \ void profiling_display_##name##_info(void); \ - void estimate_##name##_sustained_peak(double *res); + void estimate_##name##_sustained_peak(double *res) #endif /* __CODELET_PROFILE_H__ */ diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 0da29addb..16de7ea01 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -73,6 +73,8 @@ ZCODELETS_HEADER(syssq) ZCODELETS_HEADER(trasm) ZCODELETS_HEADER(trssq) ZCODELETS_HEADER(trtri) +ZCODELETS_HEADER(tpqrt) +ZCODELETS_HEADER(tpmqrt) ZCODELETS_HEADER(tslqt) ZCODELETS_HEADER(tsmlq) ZCODELETS_HEADER(tsmqr) diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h index cf0a3bb31..14b1c8e56 100644 --- a/runtime/starpu/include/runtime_codelets.h +++ b/runtime/starpu/include/runtime_codelets.h @@ -87,7 +87,7 @@ #define CODELETS_ALL_HEADER(name) \ - CHAMELEON_CL_CB_HEADER(name) \ + CHAMELEON_CL_CB_HEADER(name); \ void cl_##name##_load_fake_model(void); \ void cl_##name##_restore_model(void); \ extern struct starpu_codelet cl_##name; \ diff --git a/runtime/starpu/include/runtime_workspace.h b/runtime/starpu/include/runtime_workspace.h index e1bd1859d..a7d25d38e 100644 --- a/runtime/starpu/include/runtime_workspace.h +++ b/runtime/starpu/include/runtime_workspace.h @@ -26,10 +26,10 @@ #ifndef _MORSE_STARPU_WORKSPACE_H_ #define _MORSE_STARPU_WORKSPACE_H_ -/* - * Allocate workspace in host memory: CPU for any worker +/* + * Allocate workspace in host memory: CPU for any worker * or allocate workspace in worker's memory: main memory for cpu workers, - * and embedded memory for CUDA devices. + * and embedded memory for CUDA devices. */ #define MORSE_HOST_MEM 0 #define MORSE_WORKER_MEM 1 @@ -48,7 +48,7 @@ typedef struct morse_starpu_ws_s MORSE_starpu_ws_t; * (eg. MORSE_CUDA|MORSE_CPU for all CPU and GPU workers). The * memory_location argument indicates whether this should be a buffer in host * memory or in worker's memory (MORSE_HOST_MEM or MORSE_WORKER_MEM). This function - * returns 0 upon successful completion. + * returns 0 upon successful completion. */ int RUNTIME_starpu_ws_alloc ( MORSE_starpu_ws_t **workspace, size_t size, int which_workers, int memory_location); int RUNTIME_starpu_ws_free ( MORSE_starpu_ws_t *workspace); -- GitLab From dfc3fae8bfee9c6810095f89f6c3237fbf177174 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 21 Dec 2016 00:18:46 +0100 Subject: [PATCH 6/8] Cleanup and silent warnings --- compute/pztpqrt.c | 19 ------------------- compute/ztpqrt.c | 7 +++++-- coreblas/compute/core_ztpmqrt.c | 6 +++--- runtime/starpu/codelets/codelet_ztpmqrt.c | 2 ++ runtime/starpu/codelets/codelet_ztpqrt.c | 2 ++ 5 files changed, 12 insertions(+), 24 deletions(-) diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c index 8dd8c6335..0b825d5c2 100644 --- a/compute/pztpqrt.c +++ b/compute/pztpqrt.c @@ -27,11 +27,6 @@ #define A(m,n) A, m, n #define B(m,n) B, m, n #define T(m,n) T, m, n -#if defined(CHAMELEON_COPY_DIAG) -#define DIAG(k) DIAG, k, 0 -#else -#define DIAG(k) A, k, k -#endif /***************************************************************************//** * Parallel tile QR factorization - dynamic scheduling @@ -43,7 +38,6 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; - MORSE_desc_t *DIAG = NULL; int k, m, n; int ldak, ldbm; @@ -101,12 +95,6 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); -#if defined(CHAMELEON_COPY_DIAG) - /* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */ - DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); - morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q); -#endif - for (k = 0; k < A->nt; k++) { tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; @@ -141,11 +129,4 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); - -#if defined(CHAMELEON_COPY_DIAG) - MORSE_Sequence_Wait(sequence); - morse_desc_mat_free(DIAG); - free(DIAG); -#endif - (void)DIAG; } diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c index ef9e4232e..6efe33bbb 100644 --- a/compute/ztpqrt.c +++ b/compute/ztpqrt.c @@ -215,7 +215,8 @@ int MORSE_ztpqrt( int M, int N, int L, return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile * @@ -268,6 +269,7 @@ int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T morse_sequence_create(morse, &sequence); MORSE_ztpqrt_Tile_Async(L, A, B, T, sequence, &request); morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); RUNTIME_desc_getoncpu(B); status = sequence->status; @@ -275,7 +277,8 @@ int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile_Async * diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c index 2241b5d39..b9addb167 100644 --- a/coreblas/compute/core_ztpmqrt.c +++ b/coreblas/compute/core_ztpmqrt.c @@ -46,7 +46,7 @@ * @param[in] M * The number of rows of the tile B. M >= 0. * - * @param[in] N1 + * @param[in] N * The number of columns of the tile B. N >= 0. * * @param[in] K @@ -63,7 +63,7 @@ * @param[in] V * The i-th row must contain the vector which defines the * elementary reflector H(i), for i = 1,2,...,k, as returned by - * CORE_ZTTQRT in the first k rows of its array argument V. + * CORE_ZTPQRT in the first k rows of its array argument V. * * @param[in] LDV * The leading dimension of the array V. LDV >= max(1,K). @@ -84,7 +84,7 @@ * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. * * @param[in] LDA - * The leading dimension of the array A. LDA1 >= max(1,M1). + * The leading dimension of the array A. LDA >= max(1,M). * If side = MorseLeft, LDA >= max(1,K); * If side = Morseright, LDA >= max(1,M). * diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index 98188588e..7afc9e1bf 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -69,6 +69,8 @@ void MORSE_TASK_ztpmqrt( const MORSE_option_t *options, #endif 0); } + + (void)ib; (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index b6da13320..06ee745ed 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -61,6 +61,8 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options, #endif 0); } + + (void)ib; (void)nb; } -- GitLab From 0dd5b28803c5aae948aa81ca3586e953379587c4 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 21 Dec 2016 00:20:41 +0100 Subject: [PATCH 7/8] Add ztpgqrt function to generate Q from ztpqrt --- compute/CMakeLists.txt | 2 + compute/pztpgqrt.c | 106 ++++++++++++ compute/ztpgqrt.c | 371 +++++++++++++++++++++++++++++++++++++++++ control/compute_z.h | 1 + include/morse_z.h | 3 + 5 files changed, 483 insertions(+) create mode 100644 compute/pztpgqrt.c create mode 100644 compute/ztpgqrt.c diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 8d61cde93..9dcc70224 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -130,6 +130,7 @@ set(ZSRC pzunmlqrh.c pzunmqr.c pzunmqrrh.c + pztpgqrt.c pztpqrt.c ### zgels.c @@ -168,6 +169,7 @@ set(ZSRC zungqr.c zunmlq.c zunmqr.c + ztpgqrt.c ztpqrt.c ################## # MIXED PRECISION diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c new file mode 100644 index 000000000..723dbf369 --- /dev/null +++ b/compute/pztpgqrt.c @@ -0,0 +1,106 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pztpgqrt.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define V(m,n) V, m, n +#define T(m,n) T, m, n +#define A(m,n) A, m, n +#define B(m,n) B, m, n + +/***************************************************************************//** + * Parallel tile QR factorization - dynamic scheduling + **/ +void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request ) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + + int k, m, n; + int ldak, ldvm, ldbm; + int tempkn, tempnn, tempmm, templm; + int ib; + + /* Dimension of the first column */ + int maxm = B->m - L; + int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1); + int maxmtk; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + /* + * ztsmqr = A->nb * ib + */ + ws_worker = A->nb * ib; + + /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = max( ws_worker, ib * A->nb * 2 ); +#endif + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + + for (k = V->nt-1; k >= 0; k--) { + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + ldak = BLKLDD(A, k); + + maxmtk = min( B->mt, maxmt+k ) - 1; + for (m = maxmtk; m > -1; m--) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + templm = m == maxmtk ? tempmm : 0; + ldvm = BLKLDD(V, m); + ldbm = BLKLDD(B, m); + + for (n = k; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztpmqrt( + &options, + MorseLeft, MorseConjTrans, + tempmm, tempnn, tempkn, templm, ib, T->nb, + V(m, k), ldvm, + T(m, k), T->mb, + A(k, n), ldak, + B(m, n), ldbm ); + } + } + } + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); +} diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c new file mode 100644 index 000000000..6a4ef6d7c --- /dev/null +++ b/compute/ztpgqrt.c @@ -0,0 +1,371 @@ +/** + * + * @copyright (c) 2009-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file ztpgqrt.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.9.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ****************************************************************************** + * + * @ingroup MORSE_Complex64_t + * + * MORSE_ztpgqrt - Generates a partial Q matrix formed with a blocked QR + * factorization of a "triangular-pentagonal" matrix C, which is composed of an + * unused triangular block and a pentagonal block V, using the compact + * representation for Q. See MORSE_ztpqrt() to generate V. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrices B, and V. M >= 0. + * + * @param[in] N + * The number of columns of the matrices B, and A. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q in the matrix V. + * + * @param[in] L + * The number of rows of the upper trapezoidal part of V. + * MIN(M,N) >= L >= 0. See Further Details. + * + * @param[in] V + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * MORSE_ztpqrt() in the first k rows of its array argument V. + * V is matrx of size M-by-K. The first M-L rows + * are rectangular, and the last L rows are upper trapezoidal. + * + * @param[in] LDV + * The leading dimension of the array V. LDV >= max(1,K). + * + * @param[int] descT + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to + * solve the system of equations, or by any function to apply the Q. + * + * @param[in,out] A + * A is COMPLEX*16 array, dimension (LDA,N) + * On entry, the K-by-N matrix A. + * On exit, A is overwritten by the corresponding block of + * Q*A. See Further Details. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,K). + * + * @param[in,out] B + * On entry, the pentagonal M-by-N matrix B. + * On exit, B contains Q. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,M). + * + * @par Further Details: + * ===================== + * + * The input matrix Q is a (K+M)-by-N matrix + * + * Q = [ A ] + * [ B ] + * + * where A is an identity matrix, and B is a M-by-N matrix of 0. + * V a matrix of householder reflectors with a pentagonal shape consisting of a + * (M-L)-by-K rectangular matrix V1 on top of a L-by-N + * Upper trapezoidal matrix V2: + * + * V = [ V1 ] <- (M-L)-by-N rectangular + * [ V2 ] <- L-by-N upper trapezoidal. + * + * The upper trapezoidal matrix V2 consists of the first L rows of a + * K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0, + * V is rectangular M-by-K; if M=L=K, V is upper triangular. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_ztpgqrt_Tile + * @sa MORSE_ztpgqrt_Tile_Async + * @sa MORSE_ctpgqrt + * @sa MORSE_dtpgqrt + * @sa MORSE_stpgqrt + * @sa MORSE_zgeqrs + * + ******************************************************************************/ +int MORSE_ztpgqrt( int M, int N, int K, int L, + MORSE_Complex64_t *V, int LDV, + MORSE_desc_t *descT, + MORSE_Complex64_t *A, int LDA, + MORSE_Complex64_t *B, int LDB ) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB, descV; + int minMK = min( M, K ); + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztpgqrt", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_ztpgqrt", "illegal value of M"); + return -1; + } + if (N < 0) { + morse_error("MORSE_ztpgqrt", "illegal value of N"); + return -2; + } + if (K < 0) { + morse_error("MORSE_ztpgqrt", "illegal value of K"); + return -3; + } + if ((L < 0) || ((L > minMK) && (minMK > 0))) { + morse_error("MORSE_ztpgqrt", "illegal value of N"); + return -4; + } + if (LDV < max(1, M)) { + morse_error("MORSE_ztpgqrt", "illegal value of LDV"); + return -6; + } + if (LDA < max(1, K)) { + morse_error("MORSE_ztpgqrt", "illegal value of LDA"); + return -9; + } + if (LDB < max(1, M)) { + morse_error("MORSE_ztpgqrt", "illegal value of LDB"); + return -11; + } + + /* Quick return */ + if (minMK == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, K, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descV, V, NB, NB, LDB, K, 0, 0, M, K, sequence, &request, + morse_desc_mat_free(&(descV)) ); + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, sequence, &request, + (morse_desc_mat_free(&(descV)), + morse_desc_mat_free(&(descA))) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request, + (morse_desc_mat_free(&(descV)), + morse_desc_mat_free(&(descA)), + morse_desc_mat_free(&(descB))) ); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_ztpgqrt_Tile_Async(L, &descV, descT, &descA, &descB, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descV); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); +/* } else {*/ +/* morse_ziptile2lap( descV, V, NB, NB, LDV, K, sequence, &request);*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_ztpgqrt_Tile - Generates a partial Q matrix formed with a blocked QR + * factorization of a "triangular-pentagonal" matrix C, which is composed of an + * unused triangular block and a pentagonal block V, using the compact + * representation for Q. See MORSE_ztpqrt() to generate V. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N + * upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the + * diagonal represent the unitary matrix Q as a product of elementary reflectors stored + * by tiles. + * + * @param[out] T + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system + * of equations. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_ztpgqrt + * @sa MORSE_ztpgqrt_Tile_Async + * @sa MORSE_ctpgqrt_Tile + * @sa MORSE_dtpgqrt_Tile + * @sa MORSE_stpgqrt_Tile + * @sa MORSE_zgeqrs_Tile + * + ******************************************************************************/ +int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B ) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_ztpgqrt_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_ztpgqrt_Tile_Async(L, V, T, A, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_ztpgqrt_Tile_Async - Generates a partial Q matrix formed with a blocked QR + * factorization of a "triangular-pentagonal" matrix C, which is composed of an + * unused triangular block and a pentagonal block V, using the compact + * representation for Q. See MORSE_ztpqrt() to generate V. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_ztpgqrt + * @sa MORSE_ztpgqrt_Tile + * @sa MORSE_ctpgqrt_Tile_Async + * @sa MORSE_dtpgqrt_Tile_Async + * @sa MORSE_stpgqrt_Tile_Async + * @sa MORSE_zgeqrs_Tile_Async + * + ******************************************************************************/ +int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request ) +{ + MORSE_context_t *morse; + + morse = morse_context_self(); + if (morse == NULL) { + morse_error("MORSE_ztpgqrt_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_ztpgqrt_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_ztpgqrt_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(V) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(T) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + morse_error("MORSE_ztpgqrt_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (((B->m - L) % B->mb) != 0) { + morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + + /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ + morse_pztpgqrt(L, V, T, A, B, sequence, request); + /* } */ + /* else { */ + /* morse_pztpgqrtrh(A, T, MORSE_RHBLK, sequence, request); */ + /* } */ + + return MORSE_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index d99406b14..122120e48 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -134,6 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/include/morse_z.h b/include/morse_z.h index 07c81112a..2718a6bea 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -102,6 +102,7 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64 int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC); int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); +int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V, int LDV, MORSE_desc_t *descT, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB ); int MORSE_ztpqrt( int M, int N, int L, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_desc_t *descT ); int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB); int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); @@ -180,6 +181,7 @@ int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C); int MORSE_zsysv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); +int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B ); int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T ); int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B); int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B); @@ -255,6 +257,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, M int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request ); int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); -- GitLab From 1d9d4d50e74891f32acf7fe3341160e7ff282f0e Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 21 Dec 2016 00:39:32 +0100 Subject: [PATCH 8/8] Update morce_cmake --- cmake_modules/morse_cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index bf123957d..ace7c7b0c 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit bf123957de0f13775792b9ee8f788c02ee87ca55 +Subproject commit ace7c7b0ce506774db62a6fc5a3e178c8822bf91 -- GitLab