diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index 1285994bbbc7e54e149a863ceb7289b8feb82993..bec6c5aaf474aae765f84ac8f60e6d00c950fc62 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -68,8 +68,9 @@ set(ZSRC core_zlanhe.c core_zlansy.c core_zlantr.c - core_zlaset2.c core_zlaset.c + core_zlaset2.c + core_zlaswp.c core_zlatro.c core_zlauum.c core_zpamm.c diff --git a/coreblas/compute/core_zlaswp.c b/coreblas/compute/core_zlaswp.c new file mode 100644 index 0000000000000000000000000000000000000000..e28c82b1785c778b704eccab116b2be81bd93859 --- /dev/null +++ b/coreblas/compute/core_zlaswp.c @@ -0,0 +1,223 @@ +/** + * + * @file core_zlaswp.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon core_zgetrf with partial pivoting CPU kernel + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2023-08-31 + * @precisions normal z -> c d s + * + */ +#include "coreblas/lapacke.h" +#include "coreblas.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlaswp_get extracts the rows from the tile B that have been selected as + * pivot into the tile A. + * + ******************************************************************************* + * + * @param[in] m0 + * The index of the first row of the tile A into the larger matrix it + * belongs to. + * + * @param[in] m + * The number of rows of the matrix A. + * + * @param[in] n + * The number of columns of the matrices A and B. + * + * @param[in] k + * The number of rows of the matrix B. This is the number of potential + * pivot that can be extracted from A. + * + * @param[in] A + * On entry, the matrix A of dimension lda-by-n where to extract the + * pivot rows if some are selected in the range m0..m0+m. + * + * @param[in] lda + * The leading dimension of the array A. lda >= max(1,m). + * + * @param[inout] B + * On entry, a matrix of size ldb-by-n with 0s or already collected + * rows. + * On exit, B is filled with the selected rows from A, such that for + * each row i, B[i] = A[perm[i]-m0-1]. + * + * @param[in] ldb + * The leading dimension of the array B. ldb >= max(1,k). + * + * @param[in] perm + * The permutation array of dimension k. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * + */ +int +CORE_zlaswp_get( int m0, int m, int n, int k, + const CHAMELEON_Complex64_t *A, int lda, + CHAMELEON_Complex64_t *B, int ldb, + const int *perm ) +{ + int i; + + /* Check input arguments */ + if (m0 < 0) { + coreblas_error(1, "Illegal value of m0"); + return -1; + } + if (m < 0) { + coreblas_error(2, "Illegal value of m"); + return -2; + } + if (n < 0) { + coreblas_error(3, "Illegal value of n"); + return -3; + } + if (k < 0) { + coreblas_error(4, "Illegal value of k"); + return -4; + } + if ((lda < chameleon_max(1,m)) && (m > 0)) { + coreblas_error(6, "Illegal value of lda"); + return -6; + } + if ((ldb < chameleon_max(1,k)) && (k > 0)) { + coreblas_error(8, "Illegal value of ldb"); + return -8; + } + + /* Quick return */ + if ((m == 0) || (n == 0) || (k == 0)) { + return CHAMELEON_SUCCESS; + } + + for( i=0; i<k; i++ ) + { + int idx = perm[i] - m0; + + if ( ( idx >= 0 ) && (idx < m ) ) + { + cblas_zcopy( n, A + idx, lda, + B + i, ldb ); + } + } + + return CHAMELEON_SUCCESS; +} + +/** + ****************************************************************************** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlaswp_set copies the rows from the tile A into the tile B when they are + * the destination of the pivoted rows. + * + ******************************************************************************* + * + * @param[in] m0 + * The index of the first row of the tile B into the larger matrix it + * belongs to. + * + * @param[in] m + * The number of rows of the matrix B. + * + * @param[in] n + * The number of columns of the matrices A and B. + * + * @param[in] k + * The number of rows of the matrix A. This is the number of potential + * pivot that can be inserted into B. + * + * @param[in] A + * On entry, the matrix A of dimension lda-by-n where to read the + * pivoted rows. + * + * @param[in] lda + * The leading dimension of the array A. lda >= max(1,k). + * + * @param[inout] B + * On entry, a matrix of size ldb-by-n that may require some pivoted rows. + * On exit, B is updated with the pivoted rows it needs to receive, such that for + * each row i, A[i] = B[invp[i]-m0-1]. + * + * @param[in] ldb + * The leading dimension of the array B. ldb >= max(1,m). + * + * @param[in] invp + * The inverse permutation array of dimension k. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * + */ +int +CORE_zlaswp_set( int m0, int m, int n, int k, + const CHAMELEON_Complex64_t *A, int lda, + CHAMELEON_Complex64_t *B, int ldb, + const int *invp ) +{ + int i; + + /* Check input arguments */ + if (m0 < 0) { + coreblas_error(1, "Illegal value of m0"); + return -1; + } + if (m < 0) { + coreblas_error(2, "Illegal value of m"); + return -2; + } + if (n < 0) { + coreblas_error(3, "Illegal value of n"); + return -3; + } + if (k < 0) { + coreblas_error(4, "Illegal value of k"); + return -4; + } + if ((lda < chameleon_max(1,k)) && (k > 0)) { + coreblas_error(6, "Illegal value of lda"); + return -6; + } + if ((ldb < chameleon_max(1,m)) && (m > 0)) { + coreblas_error(8, "Illegal value of ldb"); + return -8; + } + + /* Quick return */ + if ((m == 0) || (n == 0) || (k == 0)) { + return CHAMELEON_SUCCESS; + } + + for( i=0; i<k; i++ ) + { + int idx = invp[i] - m0; + + if ( ( idx >= 0 ) && (idx < m ) ) + { + cblas_zcopy( n, A + i, lda, + B + idx, ldb ); + } + } + + return CHAMELEON_SUCCESS; +} diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c index 6de9b2c63cd546ddb8216fa0f5c0bdfd522fdb1d..0383290fa13b9b02bb321c939d73fa72720ad2ad 100644 --- a/coreblas/compute/core_ztile.c +++ b/coreblas/compute/core_ztile.c @@ -9,11 +9,11 @@ * * @brief Chameleon CPU kernel interface from CHAM_tile_t layout to the real one. * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @date 2023-08-31 * @precisions normal z -> c d s * */ @@ -464,6 +464,24 @@ TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CH CORE_zlaset2( uplo, n1, n2, alpha, CHAM_tile_get_ptr( A ), A->ld ); } +int +TCORE_zlaswp_get( int m0, int m, int n, int k, CHAM_tile_t *A, CHAM_tile_t *B, const int *perm ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + return CORE_zlaswp_get( m0, m, n, k, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, perm ); +} + +int +TCORE_zlaswp_set( int m0, int m, int n, int k, CHAM_tile_t *A, CHAM_tile_t *B, const int *invp ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + return CORE_zlaswp_set( m0, m, n, k, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, invp ); +} + int TCORE_zlatro( cham_uplo_t uplo, cham_trans_t trans, diff --git a/coreblas/compute/core_ztile_empty.c b/coreblas/compute/core_ztile_empty.c index ef1a09065093133f160a9b2891a202e8b82a3d6c..036c8326311cb1646690e9becc3c539158e33036 100644 --- a/coreblas/compute/core_ztile_empty.c +++ b/coreblas/compute/core_ztile_empty.c @@ -356,6 +356,24 @@ TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CH assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); } +int +TCORE_zlaswp_get( int m0, int m, int n, int k, CHAM_tile_t *A, CHAM_tile_t *B, const int *perm ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + return 0; +} + +int +TCORE_zlaswp_set( int m0, int m, int n, int k, CHAM_tile_t *A, CHAM_tile_t *B, const int *invp ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) ); + return 0; +} + int TCORE_zlatro( cham_uplo_t uplo, cham_trans_t trans, diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h index 74382a8df5509d1ca8f2a9420fd7415a58e9dfc1..e5e68e2989f11f56517714e2692adb4ab95771a9 100644 --- a/coreblas/include/coreblas/coreblas_z.h +++ b/coreblas/include/coreblas/coreblas_z.h @@ -11,7 +11,7 @@ * * @brief Chameleon CPU CHAMELEON_Complex64_t kernels header * - * @version 1.2.0 + * @version 1.3.0 * @comment This file has been automatically generated * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Jakub Kurzak @@ -22,7 +22,7 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Matthieu Kuhn - * @date 2022-02-22 + * @date 2023-08-31 * @precisions normal z -> c d s * */ @@ -178,6 +178,14 @@ void CORE_zlaset2(cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *tileA, int ldtilea); void CORE_zlaswp(int N, CHAMELEON_Complex64_t *A, int LDA, int I1, int I2, const int *IPIV, int INC); +int CORE_zlaswp_get( int m0, int m, int n, int k, + const CHAMELEON_Complex64_t *A, int lda, + CHAMELEON_Complex64_t *B, int ldb, + const int *perm ); +int CORE_zlaswp_set( int m0, int m, int n, int k, + const CHAMELEON_Complex64_t *A, int lda, + CHAMELEON_Complex64_t *B, int ldb, + const int *invp ); int CORE_zlaswp_ontile( CHAM_desc_t descA, int i1, int i2, const int *ipiv, int inc); int CORE_zlaswpc_ontile(CHAM_desc_t descA, int i1, int i2, const int *ipiv, int inc); int CORE_zlatro(cham_uplo_t uplo, cham_trans_t trans, diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h index 74cd413168d44653f27c408eee7dea77eca603ad..88d80d053b12e792beb61ea38d3a9171b3d8b2eb 100644 --- a/coreblas/include/coreblas/coreblas_ztile.h +++ b/coreblas/include/coreblas/coreblas_ztile.h @@ -7,11 +7,11 @@ * * @brief Chameleon CPU kernel CHAM_tile_t interface * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @date 2023-08-31 * @precisions normal z -> c d s * */ @@ -54,6 +54,8 @@ void TCORE_zlantr( cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, int int TCORE_zlascal( cham_uplo_t uplo, int m, int n, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A ); void TCORE_zlaset( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_tile_t *A ); void TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A ); +int TCORE_zlaswp_get( int m0, int m, int n, int k, CHAM_tile_t *A, CHAM_tile_t *B, const int *perm ); +int TCORE_zlaswp_set( int m0, int m, int n, int k, CHAM_tile_t *A, CHAM_tile_t *B, const int *invp ); int TCORE_zlatro( cham_uplo_t uplo, cham_trans_t trans, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B ); void TCORE_zlauum( cham_uplo_t uplo, int N, CHAM_tile_t *A ); #if defined(PRECISION_z) || defined(PRECISION_c)