diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c409a3fa0b3b6e2f2ed5d47cac533051e0c3512..8b00b6d5e759a5ee7f44a0d2b69333bf630d9e5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ option(BUILD_SHARED_LIBS "Build shared libraries" OFF) # Define precision supported by CHAMELEON # ----------------------------------------- set( RP_CHAMELEON_DICTIONNARY ${CMAKE_SOURCE_DIR}/cmake_modules/local_subs.py ) -set( RP_CHAMELEON_PRECISIONS "s;d;c;z" ) +set( RP_CHAMELEON_PRECISIONS "s;d;c;z;ds;zc" ) include(RulesPrecisions) # Check that at least one option CHAMELEON_PREC_ is set to ON # count number of CHAMELEON_PREC_ sets to ON diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 0f404e1a52472500e2ada61ac251bff26bbb7c6d..54c37c11fd3b4b756a0d3f52c336b5441276e813 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -50,16 +50,18 @@ subs = { # ------------------------------------------------------------ # replacements applied to mixed precision files. 'mixed' : [ + ('dstile', 'zctile' ), # double/single, double/single-complex #'12345678901234567890', '12345678901234567890') (r'\bdouble', r'\bCHAMELEON_Complex64_t'), - (r'\bChamRealDouble', r'\bChamComplexDouble' ), + (r'\bChamRealDouble', r'\bChamComplexDouble' ), (r'\bfloat', r'\bCHAMELEON_Complex32_t'), - (r'\bChamRealFloat', r'\bChamComplexFloat' ), - (r'\breal\b', r'\bcomplex\b' ), + (r'\bChamRealFloat', r'\bChamComplexFloat' ), + (r'\breal\b', r'\bcomplex\b' ), - ('dsgels', 'zcgels' ), - ('dsorgesv', 'zcungesv' ), + ('dsgels', 'zcgels' ), + ('dsorgesv', 'zcungesv' ), + ('codelet_ds', 'codelet_zc' ), ], # ------------------------------------------------------------ # replacements applied to mixed precision files. diff --git a/compute/pzlag2c.c b/compute/pzlag2c.c index bc28fcf1165846645f139ba432d40a6aad19e174..f6d01a2469d2f3e414c8474e50eec79d229b058d 100644 --- a/compute/pzlag2c.c +++ b/compute/pzlag2c.c @@ -24,23 +24,54 @@ */ #include "control/common.h" -#define A(m,n) A, m, n -#define B(m,n) B, m, n -#define SA(m,n) SA, m, n -#define SB(m,n) SB, m, n +#define A( _m_, _n_ ) A, (_m_), (_n_) +#define B( _m_, _n_ ) B, (_m_), (_n_) + /** * */ +void chameleon_pclag2z( CHAM_desc_t *A, CHAM_desc_t *B, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + + int tempmm, tempnn; + int m, n; + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) { + return; + } + RUNTIME_options_init(&options, chamctxt, sequence, request); + + for(m = 0; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + + for(n = 0; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + + INSERT_TASK_clag2z( + &options, + tempmm, tempnn, A->mb, + A(m, n), + B(m, n)); + } + } + + RUNTIME_options_finalize(&options, chamctxt); +} + /** * */ -void chameleon_pclag2z(CHAM_desc_t *SA, CHAM_desc_t *B, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) +void chameleon_pzlag2c( CHAM_desc_t *A, CHAM_desc_t *B, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; RUNTIME_option_t options; - int X, Y; + int tempmm, tempnn; int m, n; chamctxt = chameleon_context_self(); @@ -49,16 +80,19 @@ void chameleon_pclag2z(CHAM_desc_t *SA, CHAM_desc_t *B, } RUNTIME_options_init(&options, chamctxt, sequence, request); - for(m = 0; m < SA->mt; m++) { - X = m == SA->mt-1 ? SA->m-m*SA->mb : SA->mb; - for(n = 0; n < SA->nt; n++) { - Y = n == SA->nt-1 ? SA->n-n*SA->nb : SA->nb; - INSERT_TASK_clag2z( + for(m = 0; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + + for(n = 0; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + + INSERT_TASK_zlag2c( &options, - X, Y, SA->mb, - SA(m, n), + tempmm, tempnn, A->mb, + A(m, n), B(m, n)); } } + RUNTIME_options_finalize(&options, chamctxt); } diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index dc2aac9e239f0d7136f548016105225de76a72c8..d4c8cb065d537cec4ee00355391a30f6a4b8afc5 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -116,10 +116,12 @@ endif() if ( CHAMELEON_NO_KERNELS ) list( APPEND ZSRC core_ztile_empty.c + core_zctile_empty.c ) else() list( APPEND ZSRC core_ztile.c + core_zctile.c ) endif() diff --git a/coreblas/compute/core_zctile.c b/coreblas/compute/core_zctile.c new file mode 100644 index 0000000000000000000000000000000000000000..55dd865158bfdbc3d4070c8da1dee7d0783f8ce8 --- /dev/null +++ b/coreblas/compute/core_zctile.c @@ -0,0 +1,41 @@ +/** + * + * @file core_zctile.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @brief Chameleon CPU kernel interface from CHAM_tile_t layout to the real one. + * + * @version 1.2.0 + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions mixed zc -> ds + * + */ +#include "coreblas.h" +#include "coreblas/coreblas_zctile.h" + +void +TCORE_clag2z( int M, int N, + const CHAM_tile_t *A, + CHAM_tile_t *B ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & CHAMELEON_TILE_FULLRANK ); + assert( B->format & CHAMELEON_TILE_FULLRANK ); + CORE_clag2z( M, N, A->mat, A->ld, B->mat, B->ld ); +} + +void +TCORE_zlag2c( int M, int N, + const CHAM_tile_t *A, + CHAM_tile_t *B, int *info ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & CHAMELEON_TILE_FULLRANK ); + assert( B->format & CHAMELEON_TILE_FULLRANK ); + CORE_zlag2c( M, N, A->mat, A->ld, B->mat, B->ld, info ); +} diff --git a/coreblas/compute/core_zctile_empty.c b/coreblas/compute/core_zctile_empty.c new file mode 100644 index 0000000000000000000000000000000000000000..6e265d6f56e90d669c0e105170d947aacad03a30 --- /dev/null +++ b/coreblas/compute/core_zctile_empty.c @@ -0,0 +1,39 @@ +/** + * + * @file core_zctile_empty.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @brief Chameleon CPU kernel interface from CHAM_tile_t layout to the real one. + * + * @version 1.2.0 + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions mixed zc -> ds + * + */ +#include "coreblas.h" +#include "coreblas/coreblas_zctile.h" + +void +TCORE_clag2z( int M, int N, + const CHAM_tile_t *A, + CHAM_tile_t *B ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & CHAMELEON_TILE_FULLRANK ); + assert( B->format & CHAMELEON_TILE_FULLRANK ); +} + +void +TCORE_zlag2c( int M, int N, + const CHAM_tile_t *A, + CHAM_tile_t *B, int *info ) +{ + coreblas_kernel_trace( A, B ); + assert( A->format & CHAMELEON_TILE_FULLRANK ); + assert( B->format & CHAMELEON_TILE_FULLRANK ); +} diff --git a/coreblas/include/CMakeLists.txt b/coreblas/include/CMakeLists.txt index aea8f77b606d71323f230a718ed22e4a0273c364..acd970620f0b741447b597e00eafb4de3c1e5b03 100644 --- a/coreblas/include/CMakeLists.txt +++ b/coreblas/include/CMakeLists.txt @@ -33,6 +33,7 @@ set(ZHDR coreblas/coreblas_z.h coreblas/coreblas_zc.h coreblas/coreblas_ztile.h + coreblas/coreblas_zctile.h ) if( CHAMELEON_USE_HMAT ) list( APPEND ZHDR diff --git a/coreblas/include/coreblas/coreblas_zc.h b/coreblas/include/coreblas/coreblas_zc.h index 0e62bef4d60cd0eff9880d99e85c764d7da1f318..74b4b68ae962affe10b4b7c9ea55cdcf480a22ec 100644 --- a/coreblas/include/coreblas/coreblas_zc.h +++ b/coreblas/include/coreblas/coreblas_zc.h @@ -12,8 +12,6 @@ * @brief Chameleon CPU complex mixed precision kernels header * * @version 1.2.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Jakub Kurzak * @author Hatem Ltaief * @author Mathieu Faverge @@ -30,11 +28,11 @@ /** * Declarations of serial kernels - alphabetical order */ -void CORE_clag2z(int m, int n, - const CHAMELEON_Complex32_t *A, int lda, - CHAMELEON_Complex64_t *B, int ldb); -void CORE_zlag2c(int m, int n, - const CHAMELEON_Complex64_t *A, int lda, - CHAMELEON_Complex32_t *B, int ldb, int *info); +void CORE_clag2z( int m, int n, + const CHAMELEON_Complex32_t *A, int lda, + CHAMELEON_Complex64_t *B, int ldb ); +void CORE_zlag2c( int m, int n, + const CHAMELEON_Complex64_t *A, int lda, + CHAMELEON_Complex32_t *B, int ldb, int *info ); #endif /* _coreblas_zc_h_ */ diff --git a/coreblas/include/coreblas/coreblas_zctile.h b/coreblas/include/coreblas/coreblas_zctile.h new file mode 100644 index 0000000000000000000000000000000000000000..ccf4f82f315e1667834ca95a4c11557440844908 --- /dev/null +++ b/coreblas/include/coreblas/coreblas_zctile.h @@ -0,0 +1,23 @@ +/** + * + * @file coreblas_zctile.h + * + * @copyright 2019-2022 Bordeaux INP, CNRS (LaBRI UMR 5800 ), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @brief Chameleon CPU kernel CHAM_tile_t interface + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2022-02-22 + * @precisions mixed zc -> ds + * + */ +#ifndef _coreblas_zctile_h_ +#define _coreblas_zctile_h_ + +void TCORE_clag2z( int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B ); +void TCORE_zlag2c( int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B, int *info ); + +#endif /* _coreblas_zctile_h_ */ diff --git a/include/chameleon/descriptor_helpers.h b/include/chameleon/descriptor_helpers.h index 575ed18a3afba3994512177c680a7fdedbd683d8..534ba8152ea885fb19a0934e88f5672733be5417 100644 --- a/include/chameleon/descriptor_helpers.h +++ b/include/chameleon/descriptor_helpers.h @@ -4,7 +4,7 @@ * * @copyright 2009-2014 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. * *** diff --git a/include/chameleon/tasks_zc.h b/include/chameleon/tasks_zc.h index 6e2b83469d899bbb1932a5199afe614ba3693a7e..ce6b5ad441a38bd332d80a9b91f438946ba9112b 100644 --- a/include/chameleon/tasks_zc.h +++ b/include/chameleon/tasks_zc.h @@ -31,12 +31,12 @@ * Declarations of QUARK wrappers (called by CHAMELEON) - alphabetical order */ void INSERT_TASK_clag2z( const RUNTIME_option_t *options, - int m, int n, int nb, - CHAM_desc_t *A, int Am, int An, int lda, - CHAM_desc_t *B, int Bm, int Bn, int ldb ); + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ); void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, - int m, int n, int nb, - CHAM_desc_t *A, int Am, int An, int lda, - CHAM_desc_t *B, int Bm, int Bn, int ldb ); + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ); #endif /* _chameleon_tasks_zc_h_ */ diff --git a/lapack_api/include/lapack_api_common.h b/lapack_api/include/lapack_api_common.h index c298574fbc46808d995b1d1352c07fe16fa9c121..88368c6b96ba0aad5b73385762d1d557878d3e32 100644 --- a/lapack_api/include/lapack_api_common.h +++ b/lapack_api/include/lapack_api_common.h @@ -2,8 +2,8 @@ * * @file lapack_api_common.h * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. - * All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_api_common.c b/lapack_api/src/lapack_api_common.c index f1773234e73dbac9afa92bb2941c46bf31975916..684add4206c538e24a354225c141ef9400c24a4c 100644 --- a/lapack_api/src/lapack_api_common.c +++ b/lapack_api/src/lapack_api_common.c @@ -2,8 +2,8 @@ * * @file lapack_api_common.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zgemm.c b/lapack_api/src/lapack_zgemm.c index 86c22af92f60b11566271c80d2294b3ae4b6b910..002f598132557fb915c72542ac2a778dff924320 100644 --- a/lapack_api/src/lapack_zgemm.c +++ b/lapack_api/src/lapack_zgemm.c @@ -2,8 +2,8 @@ * * @file lapack_zgemm.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zhemm.c b/lapack_api/src/lapack_zhemm.c index fbd99b5744e0d1e27c4745b841db2e271dca39b6..aaa015f4940edebf8293fa6a369472537fce569e 100644 --- a/lapack_api/src/lapack_zhemm.c +++ b/lapack_api/src/lapack_zhemm.c @@ -2,8 +2,8 @@ * * @file lapack_zhemm.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zher2k.c b/lapack_api/src/lapack_zher2k.c index 4fd98ae53b69ad3739209479cfba5cdd1fd5dd5f..bc7113586cfd6b9d9c8249288218c7d1dcfa8980 100644 --- a/lapack_api/src/lapack_zher2k.c +++ b/lapack_api/src/lapack_zher2k.c @@ -2,8 +2,8 @@ * * @file lapack_zher2k.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zherk.c b/lapack_api/src/lapack_zherk.c index 10345bb85606ab9f82440110a2bc546b5ef5195d..1da89edb1ac3b4238a478b2a3ed7eedf07e00db4 100644 --- a/lapack_api/src/lapack_zherk.c +++ b/lapack_api/src/lapack_zherk.c @@ -2,8 +2,8 @@ * * @file lapack_zherk.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zlacpy.c b/lapack_api/src/lapack_zlacpy.c index e027ebef9ec3c28be50f1ff3a12bfd036cf9c22c..768a4e2d4aaf19d60b2c194d89bcbb0441edcda4 100644 --- a/lapack_api/src/lapack_zlacpy.c +++ b/lapack_api/src/lapack_zlacpy.c @@ -2,8 +2,8 @@ * * @file lapack_zlacpy.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zlaset.c b/lapack_api/src/lapack_zlaset.c index adc5036c02b0a517e44da314459fa52038b94423..7b6b76fb0ed204d8084872717652f5f9b3bce89e 100644 --- a/lapack_api/src/lapack_zlaset.c +++ b/lapack_api/src/lapack_zlaset.c @@ -2,8 +2,8 @@ * * @file lapack_zlaset.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zlauum.c b/lapack_api/src/lapack_zlauum.c index d4c46cba4ae229ab0cee7f4d1bbe9f039c1cd4a0..14ab0ccde4e7c5a0c38fd1c3f93005e78c683ece 100644 --- a/lapack_api/src/lapack_zlauum.c +++ b/lapack_api/src/lapack_zlauum.c @@ -2,8 +2,8 @@ * * @file lapack_zlauum.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zposv.c b/lapack_api/src/lapack_zposv.c index 4894b1a3ee06fd82997715eb3525175d5c3eb080..9c8e3c6c9b5ab5d51bcfe5e329a0dbd396bf6ccf 100644 --- a/lapack_api/src/lapack_zposv.c +++ b/lapack_api/src/lapack_zposv.c @@ -2,8 +2,8 @@ * * @file lapack_zposv.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zpotrf.c b/lapack_api/src/lapack_zpotrf.c index de9d20e9eb423169ee21ea6b997e5513373dac29..a8a5471e7f72d106baa33c5057750106f9a57927 100644 --- a/lapack_api/src/lapack_zpotrf.c +++ b/lapack_api/src/lapack_zpotrf.c @@ -2,8 +2,8 @@ * * @file lapack_zpotrf.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zpotri.c b/lapack_api/src/lapack_zpotri.c index d7595dfd73203fca75f426024650fca906102b57..f5ecc40960e70dbd4dafe9a52e89f170653358ef 100644 --- a/lapack_api/src/lapack_zpotri.c +++ b/lapack_api/src/lapack_zpotri.c @@ -2,8 +2,8 @@ * * @file lapack_zpotri.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zpotrs.c b/lapack_api/src/lapack_zpotrs.c index d8fe9a5ce2e6adf11c41ef66a472a8e4d7ff3a15..acff42247364d248a509086c5c9063628689e348 100644 --- a/lapack_api/src/lapack_zpotrs.c +++ b/lapack_api/src/lapack_zpotrs.c @@ -2,8 +2,8 @@ * * @file lapack_zpotrs.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zsymm.c b/lapack_api/src/lapack_zsymm.c index 5bcc133b92dfe8a112bce29335650b2866e12c23..dbea3da8264220d44c11c6106435d5904d7667b7 100644 --- a/lapack_api/src/lapack_zsymm.c +++ b/lapack_api/src/lapack_zsymm.c @@ -2,8 +2,8 @@ * * @file lapack_zsymm.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zsyr2k.c b/lapack_api/src/lapack_zsyr2k.c index 10908fad9bf3a5be177c3f40621209dd6932f306..3019b0b8766846e4e7acc42c2e6c3de33bfc2f2e 100644 --- a/lapack_api/src/lapack_zsyr2k.c +++ b/lapack_api/src/lapack_zsyr2k.c @@ -2,8 +2,8 @@ * * @file lapack_zsyr2k.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_zsyrk.c b/lapack_api/src/lapack_zsyrk.c index 812c5f78d8cab521ded7bfefff68d6a324c00156..d9b36bfb124796d4fb727b543031123e54f48526 100644 --- a/lapack_api/src/lapack_zsyrk.c +++ b/lapack_api/src/lapack_zsyrk.c @@ -2,8 +2,8 @@ * * @file lapack_zsyrk.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_ztrmm.c b/lapack_api/src/lapack_ztrmm.c index f35081d9d18ddd501d036727af12e6777d2f9079..6a5160aaeb10a5cd88b09a5cffe715c02cb5664e 100644 --- a/lapack_api/src/lapack_ztrmm.c +++ b/lapack_api/src/lapack_ztrmm.c @@ -2,8 +2,8 @@ * * @file lapack_ztrmm.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_ztrsm.c b/lapack_api/src/lapack_ztrsm.c index f00521ef65a8f4795fddf468ca14925f2ed686e0..96d94855b6b794c344e70a013c7fe1e560505877 100644 --- a/lapack_api/src/lapack_ztrsm.c +++ b/lapack_api/src/lapack_ztrsm.c @@ -2,8 +2,8 @@ * * @file lapack_ztrsm.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/lapack_api/src/lapack_ztrtri.c b/lapack_api/src/lapack_ztrtri.c index acc665efeebfd2ca95ec72c26d7b848f2de5bb33..3681f54038b19ec44d3be52ca15eed259bcbb5c2 100644 --- a/lapack_api/src/lapack_ztrtri.c +++ b/lapack_api/src/lapack_ztrtri.c @@ -2,8 +2,8 @@ * * @file lapack_ztrtri.c * - * @copyright 2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. + * @copyright 2022-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * *** * diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index dec860e8e112b70e2e40798ebc4feda3e16ab46c..37b34b2750e5523e2b731d3bde0814ab722715c6 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -30,6 +30,7 @@ # List of codelets required by all runtimes # ----------------------------------------- set(CODELETS_ZSRC + codelets/codelet_zlag2c.c codelets/codelet_dlag2z.c codelets/codelet_dzasum.c ################## diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c index 369b5dd059103901ebb0977444dfe6c33fd85891..30a7c94e8d037f305fde9944ad346d740cb25f6a 100644 --- a/runtime/openmp/codelets/codelet_zlag2c.c +++ b/runtime/openmp/codelets/codelet_zlag2c.c @@ -17,8 +17,8 @@ * */ #include "chameleon_openmp.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_ztile.h" +#include "chameleon/tasks_zc.h" +#include "coreblas/coreblas_zctile.h" void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, int m, int n, int nb, @@ -26,9 +26,12 @@ void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn ) { CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); - CHAMELEON_Complex32_t *tileB = B->get_blktile( B, Bm, Bn ); + CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn ); #pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] ) - TCORE_zlag2c( m, n, tileA, tileB ); + { + int info = 0; + TCORE_zlag2c( m, n, tileA, tileB, &info ); + } (void)options; (void)nb; @@ -39,7 +42,7 @@ void INSERT_TASK_clag2z( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *B, int Bm, int Bn ) { - CHAMELEON_Complex32_t *tileA = A->get_blktile( A, Am, An ); + CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn ); #pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] ) TCORE_clag2z( m, n, tileA, tileB ); diff --git a/runtime/parsec/codelets/codelet_zlag2c.c b/runtime/parsec/codelets/codelet_zlag2c.c index 24255ade3f3e1dacfa3f9c491eadd4ef3b13d056..6df975c1144166e9b815b4f2caa50c2f9e4f9167 100644 --- a/runtime/parsec/codelets/codelet_zlag2c.c +++ b/runtime/parsec/codelets/codelet_zlag2c.c @@ -16,17 +16,18 @@ * @author Florent Pruvost * @author Mathieu Faverge * @date 2022-02-22 - * @precisions normal z -> c d s + * @precisions mixed zc -> ds * */ #include "chameleon_parsec.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_z.h" +#include "chameleon/tasks_zc.h" +#include "coreblas/coreblas_zc.h" static inline int CORE_zlag2c_parsec( parsec_execution_stream_t *context, parsec_task_t *this_task ) { + int info; int m; int n; CHAMELEON_Complex64_t *A; @@ -37,16 +38,16 @@ CORE_zlag2c_parsec( parsec_execution_stream_t *context, parsec_dtd_unpack_args( this_task, &m, &n, &A, &lda, &B, &ldb ); - CORE_zlag2c( m, n, A, lda, B, ldb ); + CORE_zlag2c( m, n, A, lda, B, ldb, &info ); (void)context; return PARSEC_HOOK_RETURN_DONE; } -void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); @@ -68,7 +69,8 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, * */ static inline int -CORE_clag2z_parsec(parsec_execution_stream_t *context, parsec_task_t *this_task) +CORE_clag2z_parsec( parsec_execution_stream_t *context, + parsec_task_t *this_task ) { int m; int n; @@ -86,10 +88,10 @@ CORE_clag2z_parsec(parsec_execution_stream_t *context, parsec_task_t *this_task) return PARSEC_HOOK_RETURN_DONE; } -void INSERT_TASK_clag2z(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_clag2z( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); CHAM_tile_t *tileA = A->get_blktile( A, Am, An ); diff --git a/runtime/quark/codelets/codelet_zlag2c.c b/runtime/quark/codelets/codelet_zlag2c.c index 336540606b8c3536aa2bd73407233c847174d52c..2c6bccb84afe9983a28162d3d860fdce0cc1faed 100644 --- a/runtime/quark/codelets/codelet_zlag2c.c +++ b/runtime/quark/codelets/codelet_zlag2c.c @@ -21,10 +21,10 @@ * */ #include "chameleon_quark.h" -#include "chameleon/tasks_z.h" -#include "coreblas/coreblas_ztile.h" +#include "chameleon/tasks_zc.h" +#include "coreblas/coreblas_zctile.h" -void CORE_zlag2c_quark(Quark *quark) +void CORE_zlag2c_quark( Quark *quark ) { int m; int n; @@ -34,50 +34,53 @@ void CORE_zlag2c_quark(Quark *quark) RUNTIME_request_t *request; int info; - quark_unpack_args_6(quark, m, n, tileA, tileB, sequence, request); + quark_unpack_args_6( quark, m, n, tileA, tileB, sequence, request ); TCORE_zlag2c( m, n, tileA, tileB, &info ); if ( (sequence->status != CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, info ); } } -void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zlag2c( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LAG2C; - QUARK_Insert_Task(opt->quark, CORE_zlag2c_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, - sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn), OUTPUT, - sizeof(RUNTIME_sequence_t*), &(options->sequence), VALUE, - sizeof(RUNTIME_request_t*), &(options->request), VALUE, - 0); + QUARK_Insert_Task( opt->quark, CORE_zlag2c_quark, (Quark_Task_Flags*)opt, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, + sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn), OUTPUT, + sizeof(RUNTIME_sequence_t*), &(options->sequence), VALUE, + sizeof(RUNTIME_request_t*), &(options->request), VALUE, + 0 ); } -void CORE_clag2z_quark(Quark *quark) +void CORE_clag2z_quark( Quark *quark ) { int m; int n; CHAM_tile_t *tileA; CHAM_tile_t *tileB; - quark_unpack_args_6(quark, m, n, tileA, tileB); - TCORE_clag2z( m, n, tileA, tileB); + quark_unpack_args_4( quark, m, n, tileA, tileB ); + TCORE_clag2z( m, n, tileA, tileB ); } -void INSERT_TASK_clag2z(const RUNTIME_option_t *options, - int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_clag2z( const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { - QUARK_Insert_Task(opt->quark, CORE_clag2z_quark, (Quark_Task_Flags*)opt, - sizeof(int), &m, VALUE, - sizeof(int), &n, VALUE, - sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An), INPUT, - sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, - 0); + quark_option_t *opt = (quark_option_t*)(options->schedopt); + DAG_CORE_LAG2C; + + QUARK_Insert_Task( opt->quark, CORE_clag2z_quark, (Quark_Task_Flags*)opt, + sizeof(int), &m, VALUE, + sizeof(int), &n, VALUE, + sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An), INPUT, + sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, + 0 ); } diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index cf460a8d1d0bb531b4b170975196ee5331589ac0..3863d96a0d834ea9517461c51f10fa0fa347427f 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -162,6 +162,7 @@ configure_file("include/chameleon_starpu.h.in" set(RUNTIME_HDRS_GENERATED "") set(ZHDR include/runtime_codelet_z.h + include/runtime_codelet_zc.h ) precisions_rules_py(RUNTIME_HDRS_GENERATED "${ZHDR}" @@ -233,6 +234,7 @@ set_source_files_properties(control/runtime_profiling.c PROPERTIES COMPILE_FLAGS set(RUNTIME_SRCS_GENERATED "") set(ZSRC codelets/codelet_zcallback.c + codelets/codelet_zccallback.c ${CODELETS_ZSRC} ) diff --git a/runtime/starpu/codelets/codelet_zccallback.c b/runtime/starpu/codelets/codelet_zccallback.c new file mode 100644 index 0000000000000000000000000000000000000000..40bdd7dc9f733333ea3bcacdb75a320cd3e214e1 --- /dev/null +++ b/runtime/starpu/codelets/codelet_zccallback.c @@ -0,0 +1,26 @@ +/** + * + * @file starpu/codelet_zccallback.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zc callback StarPU codelet + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Cedric Augonnet + * @author Florent Pruvost + * @date 2022-02-22 + * @precisions mixed zc -> ds + * + */ +#include "chameleon_starpu.h" +#include "runtime_codelet_zc.h" + +CHAMELEON_CL_CB(zlag2c, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*N) +CHAMELEON_CL_CB(clag2z, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*N) diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index 8c652200e8ba0f3eb2e752a257a6917ac8b989b0..d05ff0b3e85b6eb44dfa54bd54d7072d99187983 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -84,8 +84,9 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, - beta, B, Bm, Bn ); + INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, B, Bm, Bn ); + return; } struct starpu_codelet *codelet = &cl_zgeadd; diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 9e1574d7abd7fe95b0c47904aa44f83744f0e9a1..726c01f4715c84050fc5a01171ba9577dc34f602 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -104,8 +104,9 @@ void INSERT_TASK_zgemm_Astat( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zgemm_args_s *clargs = NULL; @@ -191,8 +192,9 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zgemm_args_s *clargs = NULL; diff --git a/runtime/starpu/codelets/codelet_zgersum.c b/runtime/starpu/codelets/codelet_zgersum.c index b3cfebbd93d958e206b56907cb42bb3ebaec6df5..9682c03fb666414f98bb1b6ed104eee3f78ad519 100644 --- a/runtime/starpu/codelets/codelet_zgersum.c +++ b/runtime/starpu/codelets/codelet_zgersum.c @@ -36,6 +36,7 @@ cl_zgersum_redux_cpu_func( void *descr[], void *cl_arg ) TCORE_zgeadd( ChamNoTrans, tileA->m, tileA->n, 1., tileB, 1., tileA ); + (void)cl_arg; return; } @@ -59,6 +60,7 @@ cl_zgersum_redux_cuda_func( void *descr[], void *cl_arg ) &zone, tileA->mat, tileA->ld, handle ); + (void)cl_arg; return; } #endif /* defined(CHAMELEON_USE_CUDA) */ @@ -67,7 +69,7 @@ cl_zgersum_redux_cuda_func( void *descr[], void *cl_arg ) /* * Codelet definition */ -CODELETS( zgersum_redux, cl_zgersum_redux_cpu_func, cl_zgersum_redux_cuda_func, STARPU_CUDA_ASYNC ); +CODELETS( zgersum_redux, cl_zgersum_redux_cpu_func, cl_zgersum_redux_cuda_func, STARPU_CUDA_ASYNC ) #if !defined(CHAMELEON_SIMULATION) static void @@ -103,7 +105,7 @@ cl_zgersum_init_cuda_func( void *descr[], void *cl_arg ) /* * Codelet definition */ -CODELETS( zgersum_init, cl_zgersum_init_cpu_func, cl_zgersum_init_cuda_func, STARPU_CUDA_ASYNC ); +CODELETS( zgersum_init, cl_zgersum_init_cpu_func, cl_zgersum_init_cuda_func, STARPU_CUDA_ASYNC ) void RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An ) diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 4451431f4c075feca2d6d6f6170098be3cf1d3cc..baf094206c5bc9d0952d167ad287d7a564058a96 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -101,8 +101,9 @@ void INSERT_TASK_zhemm_Astat( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zhemm_args_s *clargs = NULL; @@ -187,8 +188,9 @@ void INSERT_TASK_zhemm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zhemm_args_s *clargs = NULL; diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 9fa3a1f782aa51f49a542d9d7da2bda3bf3aede9..0bd71121a8db1feb978181341e71e5dad8a44d6e 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -97,8 +97,9 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options, double beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, uplo, n, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + return; } (void)nb; diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index a83d212a6cb7319f4b14f18c561d5f07aed9bc65..864b492e72c140e8b0bbbd0de05b97bd34fea7db 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -88,8 +88,9 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options, double beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, uplo, n, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zherk_args_s *clargs = NULL; diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index a259373a2e9b1b2a96af14c1f1829dfad864f4b3..3ca3d7e702ce34cd25ae67fbf7aa5049fc8ed503 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -25,11 +25,12 @@ * */ #include "chameleon_starpu.h" -#include "runtime_codelet_z.h" +#include "runtime_codelet_zc.h" #if !defined(CHAMELEON_SIMULATION) static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) { + int info = 0; int m; int n; CHAM_tile_t *tileA; @@ -39,7 +40,7 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) tileB = cti_interface_get(descr[1]); starpu_codelet_unpack_args(cl_arg, &m, &n); - TCORE_zlag2c( m, n, tileA, tileB); + TCORE_zlag2c( m, n, tileA, tileB, &info ); } #endif /* !defined(CHAMELEON_SIMULATION) */ diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 986292de9dcbc550e0d6b22b8553bec1a8e2a661..aacfbbb3fb0f0d9c09d01f3f6dd888546d2182ea 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -57,8 +57,9 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An ) { if ( alpha == 0. ) { - return INSERT_TASK_zlaset( options, uplo, m, n, - alpha, alpha, A, Am, An ); + INSERT_TASK_zlaset( options, uplo, m, n, + alpha, alpha, A, Am, An ); + return; } else if ( alpha == 1. ) { return; diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 62b8de470b7c36f0cb82ecf826c0444707289d71..d641fa8c18b14cadd7a018be6712ca24e98ef8f6 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -101,8 +101,9 @@ void INSERT_TASK_zsymm_Astat( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zsymm_args_s *clargs = NULL; @@ -187,8 +188,9 @@ void INSERT_TASK_zsymm( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zsymm_args_s *clargs = NULL; diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 169936ca95ff61153ffc582a73e63d0d21319887..d4c8686a4c248052435f344667bac610c0c1cdd5 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -96,8 +96,9 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, uplo, n, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + return; } (void)nb; diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 86a95d034f5fab0cf1e12eec399a17b4c4f556e9..d6fa9a62a47affecb1a7d1e8716c259ee4b18282 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -88,8 +88,9 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, uplo, n, n, nb, - beta, C, Cm, Cn ); + INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + return; } struct cl_zsyrk_args_s *clargs = NULL; diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index f1522ecc06f82b994a50d407a0530b2a7ca4ea29..6b6d3e34db9123bc42f3f92fa305436ded1e8caf 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -61,8 +61,9 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { if ( alpha == 0. ) { - return INSERT_TASK_zlascal( options, uplo, m, n, nb, - beta, B, Bm, Bn ); + INSERT_TASK_zlascal( options, uplo, m, n, nb, + beta, B, Bm, Bn ); + return; } struct cl_ztradd_args_s *clargs = NULL; diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index bd823f41066f1958b5396b10a7bc4589de887b23..13c2cdb8be8986ddf130eabe040b9b43802d4fa8 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -105,11 +105,6 @@ CODELETS_HEADER(zlatro); CODELETS_HEADER(zplssq); CODELETS_HEADER(zplssq2); -/* - * MIXED PRECISION functions - */ -CODELETS_HEADER(zlag2c); - /* * DZ functions */ diff --git a/runtime/starpu/include/runtime_codelet_zc.h b/runtime/starpu/include/runtime_codelet_zc.h new file mode 100644 index 0000000000000000000000000000000000000000..d357630c56398808658adbbb035fd275d69db911 --- /dev/null +++ b/runtime/starpu/include/runtime_codelet_zc.h @@ -0,0 +1,44 @@ +/** + * + * @file starpu/runtime_codelet_zc.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU CHAMELEON_Complex64_t codelets header + * + * @version 1.2.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @author Florent Pruvost + * @date 2022-02-22 + * @precisions mixed zc -> ds + * + */ +#ifndef _runtime_codelet_zc_h_ +#define _runtime_codelet_zc_h_ + +#include <stdio.h> +#include "runtime_codelets.h" + +#include "chameleon/tasks_zc.h" +#if !defined(CHAMELEON_SIMULATION) +#include "coreblas/coreblas_zc.h" +#include "coreblas/coreblas_zctile.h" +#if defined(CHAMELEON_USE_CUDA) +#include "cudablas.h" +#endif +#endif + +/* + * MIXED PRECISION functions + */ +CODELETS_HEADER(zlag2c); +CODELETS_HEADER(clag2z); + +#endif /* _runtime_codelet_zc_h_ */ diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index ae8d74f73579b4fe851cf31d558d2cb07108f0bc..3391dca0e688d0eb4c612a0174e1dde5bb96b3f7 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -130,6 +130,10 @@ set(ZSRC # ----------------------------------------------- foreach(_precision ${CHAMELEON_PRECISION} ) + if ( ${_precision} STREQUAL ds OR ${_precision} STREQUAL zc ) + continue() + endif() + precisions_rules_py(${_precision}SRC_GENERATED "${ZSRC}" PRECISIONS "${_precision}" ) diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 502add004a084acfc4fdbef08151bca551285c98..7c979a91710167fff8c41bd92d58a62804f7e5ec 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -21,6 +21,10 @@ endif() if (NOT CHAMELEON_SIMULATION) foreach(prec ${RP_CHAMELEON_PRECISIONS}) + if ( ${prec} STREQUAL ds OR ${prec} STREQUAL zc ) + continue() + endif() + set (CMD ./chameleon_${prec}testing) #