diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 227afc04908a49920f6917668f9fa5b7b0d7fd85..02af5df38a90e034eebb9438238440267a1a3355 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -43,6 +43,7 @@ _extra_blas = [ ('', 'scesca', 'dcesca', 'ccesca', 'zcesca' ), ('', 'sgesum', 'dgesum', 'cgesum', 'zgesum' ), ('', 'sgersum', 'dgersum', 'cgersum', 'zgersum' ), + ('', 'sprint', 'dprint', 'cprint', 'zprint' ), ] _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ] diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 0106e546cf95b7b0875761e6de3b4209bcf91f4a..bed4df6f140c9449d7c7201bf4249373c14d6146 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -253,6 +253,7 @@ set(ZSRC ################## zgenm2.c pzgenm2.c + zprint.c ) precisions_rules_py(CHAMELEON_SRCS_GENERATED "${ZSRC}" diff --git a/compute/zprint.c b/compute/zprint.c new file mode 100644 index 0000000000000000000000000000000000000000..3dbd715e2c9120918817caa944836222ce344292 --- /dev/null +++ b/compute/zprint.c @@ -0,0 +1,227 @@ +/** + * + * @file zprint.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zprint wrappers + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Matthieu Kuhn + * @date 2022-02-22 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" +#if !defined(CHAMELEON_SIMULATION) +#include <coreblas/coreblas_z.h> +#endif + +/* + * Static variable to know how to handle the data within the kernel + * This assumes that only one runtime is enabled at a time. + */ +static RUNTIME_id_t zprint_runtime_id = RUNTIME_SCHED_STARPU; + +struct zprint_args_s { + FILE *file; + const char *header; +}; + +static inline int +zprint( const CHAM_desc_t *descA, + cham_uplo_t uplo, int m, int n, + CHAM_tile_t *tileA, void *op_args ) +{ + CHAMELEON_Complex64_t *A; + struct zprint_args_s *options = (struct zprint_args_s *)op_args; + + int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; + int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; + int minmn = chameleon_min( tempmm, tempnn ); + int lda, i; + + if ( zprint_runtime_id == RUNTIME_SCHED_PARSEC ) { + A = (CHAMELEON_Complex64_t*)tileA; + lda = descA->get_blkldd( descA, m ); + } + else { + A = CHAM_tile_get_ptr( tileA ); + lda = tileA->ld; + } + +#if !defined(CHAMELEON_SIMULATION) + CORE_zprint( options->file, options->header, uplo, + tempmm, tempnn, m, n, A, lda ); +#endif + + return 0; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Print a matrix tile by tile. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] A + * The M-by-N matrix A. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @retval the two-norm estimate. + * + ******************************************************************************* + * + * @sa CHAMELEON_zlatms_Tile + * @sa CHAMELEON_zlatms_Tile_Async + * @sa CHAMELEON_clange + * @sa CHAMELEON_dlange + * @sa CHAMELEON_slange + * + */ +int CHAMELEON_zprint( FILE *file, const char *header, + cham_uplo_t uplo, int M, int N, + CHAMELEON_Complex64_t *A, int LDA ) +{ + int NB; + int status; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + struct zprint_args_s options = { + .file = file, + .header = header, + }; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zlatms", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if (M < 0) { + chameleon_error("CHAMELEON_zlatms", "illegal value of M"); + return -1; + } + if (N < 0) { + chameleon_error("CHAMELEON_zlatms", "illegal value of N"); + return -2; + } + if (LDA < chameleon_max(1, M)) { + chameleon_error("CHAMELEON_zlatms", "illegal value of LDA"); + return -4; + } + + /* Quick return */ + if (chameleon_min(N, M) == 0) { + return (double)0.0; + } + + /* Tune NB depending on M, N & NRHS; Set NBNB */ + status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0); + if (status != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_zlatms", "chameleon_tune() failed"); + return status; + } + + /* Set NT */ + NB = CHAMELEON_NB; + + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, uplo, + A, NB, NB, LDA, N, M, N, sequence, &request ); + + /* Call the tile interface */ + zprint_runtime_id = chamctxt->scheduler; + chameleon_pmap( uplo, &descAt, zprint, &options, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInput, uplo, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * @brief Tile equivalent of CHAMELEON_zprint(). + * + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * On entry, the input matrix A. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_zlatms + * @sa CHAMELEON_zlatms_Tile_Async + * @sa CHAMELEON_clange_Tile + * @sa CHAMELEON_dlange_Tile + * @sa CHAMELEON_slange_Tile + * + */ +int CHAMELEON_zprint_Tile( FILE *file, const char *header, + cham_uplo_t uplo, CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + struct zprint_args_s options = { + .file = file, + .header = header, + }; + int status; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zlatms_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + zprint_runtime_id = chamctxt->scheduler; + chameleon_pmap( uplo, A, zprint, &options, sequence, &request ); + CHAMELEON_Desc_Flush( A, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index d880039fe2d83e112d882414c24bec1a1e24ec69..40baddc598df9da3ce04b58a54b7fe645435b564 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -107,6 +107,7 @@ set(ZSRC core_zttqrt.c core_zunmlq.c core_zunmqr.c + core_zprint.c ) if( CHAMELEON_USE_HMAT ) list( APPEND ZSRC diff --git a/coreblas/compute/core_zprint.c b/coreblas/compute/core_zprint.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4fdc887f0c47ca85d4900e1ff52341e9f8bc83 --- /dev/null +++ b/coreblas/compute/core_zprint.c @@ -0,0 +1,46 @@ +/** + * + * @file core_zprint.c + * + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon core_zprint CPU kernel + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-03-03 + * @precisions normal z -> c d s + * + */ +#include "coreblas/lapacke.h" +#include "coreblas.h" + +void +CORE_zprint( FILE *file, const char *header, + cham_uplo_t uplo, int M, int N, int Am, int An, + const CHAMELEON_Complex64_t *A, int lda ) +{ + FILE *output = (file == NULL) ? stdout : file; + int i, j; + + fflush( output ); + fprintf( output, "--- %10s (%2d, %2d) / %p, %d:\n", header, Am, An, A, lda ); + for(i=0; i<M; i++) { + fprintf( output, " " ); + for(j=0; j<N; j++) { +#if defined(PRECISION_z) || defined(PRECISION_c) + fprintf( output, " (% e, % e)", + creal( A[j*lda + i] ), + cimag( A[j*lda + i] )); +#else + fprintf( output, " % e", A[j*lda + i] ); +#endif + } + fprintf( output, "\n" ); + } + fprintf( output, "-------------------------\n" ); + fflush( output ); +} diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c index 986d587f80bf16abe4a1e6479aeeb88e09a4f686..6de9b2c63cd546ddb8216fa0f5c0bdfd522fdb1d 100644 --- a/coreblas/compute/core_ztile.c +++ b/coreblas/compute/core_ztile.c @@ -1072,3 +1072,13 @@ TCORE_zgram( cham_uplo_t uplo, return CORE_zgram( uplo, M, N, Mt, Nt, CHAM_tile_get_ptr( Di ), Di->ld, CHAM_tile_get_ptr( Dj ), Dj->ld, CHAM_tile_get_ptr( D ), CHAM_tile_get_ptr( A ), A->ld ); } + +void +TCORE_zprint( FILE *file, const char *header, + cham_uplo_t uplo, int M, int N, + int Am, int An, const CHAM_tile_t *A ) +{ + coreblas_kernel_trace( A ); + assert( A->format & CHAMELEON_TILE_FULLRANK ); + CORE_zprint( file, header, uplo, M, N, Am, An, CHAM_tile_get_ptr( A ), A->ld ); +} diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h index b0bcf3bdbbf7dfc8e465aa3f77a29451c66fe03a..050a7968769a5c37a138faee18ef0a577ee6be19 100644 --- a/coreblas/include/coreblas/coreblas_z.h +++ b/coreblas/include/coreblas/coreblas_z.h @@ -393,4 +393,8 @@ int CORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt, const double *D, double *A, int LDA ); +void CORE_zprint( FILE *file, const char *header, + cham_uplo_t uplo, int m, int n, int Am, int An, + const CHAMELEON_Complex64_t *A, int lda ); + #endif /* _coreblas_z_h_ */ diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h index aa66fdd751657288475070c6dec606ff08fd714a..74cd413168d44653f27c408eee7dea77eca603ad 100644 --- a/coreblas/include/coreblas/coreblas_ztile.h +++ b/coreblas/include/coreblas/coreblas_ztile.h @@ -86,5 +86,6 @@ int TCORE_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, in int TCORE_zgesum( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sum ); int TCORE_zcesca( int center, int scale, cham_store_t axis, int M, int N, int Mt, int Nt, const CHAM_tile_t *Gi, const CHAM_tile_t *Gj, const CHAM_tile_t *G, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, CHAM_tile_t *A ); int TCORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, const CHAM_tile_t *D, CHAM_tile_t *A ); +void TCORE_zprint( FILE *file, const char *header, cham_uplo_t uplo, int M, int N, int Am, int An, const CHAM_tile_t *A ); #endif /* _coreblas_ztile_h_ */ diff --git a/include/chameleon.h b/include/chameleon.h index 4f75ba8306c7da129df957100edf313cb121987c..c0a9ea9a9f6896be35a78ffab0b2e264c2bf6772 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -27,6 +27,7 @@ * CHAMELEON types and constants */ #include "chameleon/config.h" +#include <stdio.h> #include "chameleon/constants.h" #include "chameleon/types.h" #include "chameleon/struct.h" @@ -37,7 +38,6 @@ */ #include "chameleon/runtime.h" - /* **************************************************************************** * CHAMELEON Simulation mode */ diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index c36e6289dcfc7dd19122cd671f25a66717c586f2..e587d015c4239a90fb10c228d9bb41faaca7dda1 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -382,6 +382,9 @@ int CHAMELEON_zgram( cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA int CHAMELEON_zgram_Tile( cham_uplo_t uplo, CHAM_desc_t *A ); int CHAMELEON_zgram_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, void *user_ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +int CHAMELEON_zprint( FILE *file, const char *header, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, int LDA ); +int CHAMELEON_zprint_Tile( FILE *file, const char *header, cham_uplo_t uplo, CHAM_desc_t *descA ); + END_C_DECLS #endif /* _chameleon_z_h_ */