From c060cfd10900d2fce4e1d42d188d9bf064d839d3 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Fri, 19 May 2023 11:48:28 -0400 Subject: [PATCH] constants: Split the flttype field as a bitmask of datatype, datasize and mixed --- CMakeLists.txt | 2 +- cmake_modules/local_subs.py | 7 +++- cmake_modules/morse_cmake | 2 +- control/auxiliary.c | 14 ++++--- control/descriptor.c | 20 ++++++---- control/descriptor.h | 6 +-- include/chameleon.h | 6 +-- include/chameleon/constants.h | 75 +++++++++++++++++++++++++++++++---- include/chameleon/types.h | 15 ++++++- 9 files changed, 115 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 13b5ea5fa..796cb3fa2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ endif() # set project version number set(CHAMELEON_VERSION_MAJOR 1) -set(CHAMELEON_VERSION_MINOR 2) +set(CHAMELEON_VERSION_MINOR 3) set(CHAMELEON_VERSION_MICRO 0) set(CHAMELEON_VERSION "${CHAMELEON_VERSION_MAJOR}.${CHAMELEON_VERSION_MINOR}.${CHAMELEON_VERSION_MICRO}") diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 02af5df38..2e8cb7106 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -6,12 +6,12 @@ @copyright 2019-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. - @version 1.2.0 + @version 1.3.0 @author Mathieu Faverge @author Florent Pruvost @author Nathalie Furmento @author Alycia Lisito - @date 2022-02-22 + @date 2023-07-04 """ _extra_blas = [ @@ -73,6 +73,9 @@ subs = { ('int', 'float', 'double', 'CHAMELEON_Complex32_t', r'\bCHAMELEON_Complex64_t'), ('ChamPattern', 'ChamRealFloat', 'ChamRealDouble', 'ChamComplexFloat', r'\bChamComplexDouble' ), ('ChamPattern', 'ChamRealFloat', 'ChamRealDouble', 'ChamRealFloat', r'\bChamRealDouble' ), + ('ChamPattern', 'ChamRealFloat', 'ChamRealFloat', 'ChamComplexFloat', r'\bChamComplexFloat' ), + ('ChamPattern', 'ChamRealFloat', 'ChamRealFloat', 'ChamRealFloat', r'\bChamRealFloat' ), + ('ChamPattern', 'ChamRealHalf', 'ChamRealHalf', 'ChamComplexHalf', r'\bChamComplexHalf' ), ('int', 'float', 'double', 'complex32', 'complex64' ), ('Int', 'Float', 'Double', 'Complex32', 'Complex64' ), ('Int', 'HMAT_SIMPLE_PRECISION','HMAT_DOUBLE_PRECISION','HMAT_SIMPLE_COMPLEX', 'HMAT_DOUBLE_COMPLEX'), diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index b9e9ec800..332bf7de0 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit b9e9ec80030a349e1c768f8aec2879d1a6ed28a9 +Subproject commit 332bf7de07c7eaf85cf370236d5a209b83a58dd7 diff --git a/control/auxiliary.c b/control/auxiliary.c index 1d1b9147f..d299d8fa8 100644 --- a/control/auxiliary.c +++ b/control/auxiliary.c @@ -11,7 +11,7 @@ * * @brief Chameleon auxiliary routines * - * @version 1.2.0 + * @version 1.3.0 * @author Jakub Kurzak * @author Piotr Luszczek * @author Emmanuel Agullo @@ -19,7 +19,7 @@ * @author Florent Pruvost * @author Guillaume Sylvand * @author Mathieu Faverge - * @date 2022-02-22 + * @date 2023-07-04 * *** * @@ -187,13 +187,17 @@ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro) * @retval Element size in bytes * */ -int CHAMELEON_Element_Size(int type) +int CHAMELEON_Element_Size( cham_flttype_t type ) { - switch(type) { + switch( cham_get_flttype(type) ) { case ChamByte: return 1; - case ChamInteger: return sizeof(int); + case ChamInteger16: return sizeof(int16_t); + case ChamInteger32: return sizeof(int32_t); + case ChamInteger64: return sizeof(int64_t); + case ChamRealHalf: return 2; case ChamRealFloat: return sizeof(float); case ChamRealDouble: return sizeof(double); + case ChamComplexHalf: return 4; case ChamComplexFloat: return 2*sizeof(float); case ChamComplexDouble: return 2*sizeof(double); default: chameleon_fatal_error("CHAMELEON_Element_Size", "undefined type"); diff --git a/control/descriptor.c b/control/descriptor.c index 1005a04cf..d17748972 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -11,14 +11,14 @@ * * @brief Chameleon descriptors routines * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Cedric Castagnede * @author Florent Pruvost * @author Guillaume Sylvand * @author Raphael Boucherie * @author Samuel Thibault - * @date 2022-12-13 + * @date 2023-07-04 * *** * @@ -376,11 +376,14 @@ int chameleon_desc_check(const CHAM_desc_t *desc) chameleon_error("chameleon_desc_check", "NULL matrix pointer"); return CHAMELEON_ERR_UNALLOCATED; } - if (desc->dtyp != ChamInteger && - desc->dtyp != ChamRealFloat && - desc->dtyp != ChamRealDouble && - desc->dtyp != ChamComplexFloat && - desc->dtyp != ChamComplexDouble ) { + if ( (desc->dtyp != ChamInteger ) && + (desc->dtyp != ChamRealHalf ) && + (desc->dtyp != ChamRealFloat ) && + (desc->dtyp != ChamRealDouble ) && + (desc->dtyp != ChamComplexHalf ) && + (desc->dtyp != ChamComplexFloat ) && + (desc->dtyp != ChamComplexDouble ) ) + { chameleon_error("chameleon_desc_check", "invalid matrix type"); return CHAMELEON_ERR_ILLEGAL_VALUE; } @@ -435,8 +438,11 @@ CHAMELEON_Desc_SubMatrix( CHAM_desc_t *descA, int i, int j, int m, int n ) * * @param[in] dtyp * Data type of the matrix: + * @arg ChamInteger: integer (i), + * @arg ChamRealHalf: half precision real (H), * @arg ChamRealFloat: single precision real (S), * @arg ChamRealDouble: double precision real (D), + * @arg ChamComplexHalf: half precision complex (), * @arg ChamComplexFloat: single precision complex (C), * @arg ChamComplexDouble: double precision complex (Z). * diff --git a/control/descriptor.h b/control/descriptor.h index cff3b76b9..96bf0bf0b 100644 --- a/control/descriptor.h +++ b/control/descriptor.h @@ -11,7 +11,7 @@ * * @brief Chameleon descriptor header * - * @version 1.2.0 + * @version 1.3.0 * @author Jakub Kurzak * @author Mathieu Faverge * @author Cedric Castagnede @@ -19,7 +19,7 @@ * @author Guillaume Sylvand * @author Raphael Boucherie * @author Samuel Thibault - * @date 2022-02-22 + * @date 2023-07-04 * */ #ifndef _chameleon_descriptor_h_ @@ -76,7 +76,7 @@ int chameleon_desc_check ( const CHAM_desc_t *desc ); /** * Internal function to return address of block (m,n) with m,n = block indices */ -inline static CHAM_tile_t *chameleon_desc_gettile(const CHAM_desc_t *A, int m, int n) +inline static CHAM_tile_t *chameleon_desc_gettile(const CHAM_desc_t *A, int m, int n ) { size_t mm = m + A->i / A->mb; size_t nn = n + A->j / A->nb; diff --git a/include/chameleon.h b/include/chameleon.h index 77b6544f0..b6ff6913c 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -11,13 +11,13 @@ * * @brief Chameleon main header * - * @version 1.2.0 + * @version 1.3.0 * @author Mathieu Faverge * @author Cedric Augonnet * @author Cedric Castagnede * @author Florent Pruvost * @author Philippe Virouleau - * @date 2022-02-22 + * @date 2023-07-04 * */ #ifndef _chameleon_h_ @@ -120,7 +120,7 @@ int CHAMELEON_Lapack_to_Tile( void *Af77, int LDA, CHAM_desc_t *A ) __attribute_ int CHAMELEON_Tile_to_Lapack( CHAM_desc_t *A, void *Af77, int LDA ) __attribute__((deprecated("Please refer to CHAMELEON_Desc2Lap() instead"))); /* Descriptor */ -int CHAMELEON_Element_Size(int type); +int CHAMELEON_Element_Size( cham_flttype_t type ); int CHAMELEON_Desc_Create_User( CHAM_desc_t **desc, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, int lm, int ln, int i, int j, int m, int n, int p, int q, diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index cff1f5652..fe50552f4 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -11,14 +11,14 @@ * * @brief Chameleon global constants * - * @version 1.2.0 + * @version 1.3.0 * @author Cedric Augonnet * @author Mathieu Faverge * @author Cedric Castagnede * @author Florent Pruvost * @author Alycia Lisito * @author Terry Cojean - * @date 2022-02-22 + * @date 2023-07-04 * */ #ifndef _chameleon_constants_h_ @@ -36,15 +36,74 @@ /** * @brief Matrix floating point arithmetic */ +typedef enum chameleon_arithmetic_e { + Cham8 = 0, + ChamHalf = 1, + ChamSingle = 2, + ChamDouble = 3, +} cham_arithmetic_t; + +#define CHAM_ARITHMETIC_MASK 0b11 + +typedef enum chameleon_ftype_e { + ChamInt = 0, + ChamReal = 1, + ChamComplex = 2, +} cham_ftype_t; + +#define CHAM_FTYPE_MASK 0b1100 +#define CHAM_MIXED_MASK 0b10000 + +#define cham_get_arith( _ftype_ ) ( (_ftype_) & CHAM_ARITHMETIC_MASK ) +#define cham_get_ftype( _ftype_ ) (( (_ftype_) & CHAM_FTYPE_MASK ) >> 2 ) +#define cham_get_flttype( _ftype_ ) ( (_ftype_) & (CHAM_FTYPE_MASK | CHAM_ARITHMETIC_MASK) ) +#define cham_is_mixed( _ftype_ ) ( (_ftype_) & CHAM_MIXED_MASK ) + +#define cham_clean_mixed( _ftype_ ) ( (_ftype_) & ~CHAM_MIXED_MASK ) + +#define CHAMELEON_FLTTYPE( _ftype_, _arithmetic_ ) ( ((_ftype_) << 2) | (_arithmetic_) ) + typedef enum chameleon_flttype_e { - ChamByte = 0, - ChamInteger = 1, - ChamRealFloat = 2, - ChamRealDouble = 3, - ChamComplexFloat = 4, - ChamComplexDouble = 5, + ChamByte = CHAMELEON_FLTTYPE( ChamInt, Cham8 ), + ChamInteger16 = CHAMELEON_FLTTYPE( ChamInt, ChamHalf ), + ChamInteger = CHAMELEON_FLTTYPE( ChamInt, ChamSingle ), + ChamInteger32 = CHAMELEON_FLTTYPE( ChamInt, ChamSingle ), + ChamInteger64 = CHAMELEON_FLTTYPE( ChamInt, ChamDouble ), + ChamRealHalf = CHAMELEON_FLTTYPE( ChamReal, ChamHalf ), + ChamRealFloat = CHAMELEON_FLTTYPE( ChamReal, ChamSingle ), + ChamRealDouble = CHAMELEON_FLTTYPE( ChamReal, ChamDouble ), + ChamComplexHalf = CHAMELEON_FLTTYPE( ChamComplex, ChamHalf ), + ChamComplexFloat = CHAMELEON_FLTTYPE( ChamComplex, ChamSingle ), + ChamComplexDouble = CHAMELEON_FLTTYPE( ChamComplex, ChamDouble ), + ChamRealDoubleMixed = ChamRealDouble | CHAM_MIXED_MASK, + ChamComplexDoubleMixed = ChamComplexDouble | CHAM_MIXED_MASK, } cham_flttype_t; +#define ChamComplexSingle ChamComplexFloat +#define ChamRealSingle ChamRealFloat + +#define ChamConvert( in, out ) ( cham_clean_mixed(in) | (cham_clean_mixed(out) << 5) ) + +#define ChamConvertComplexDoubleToDouble ChamConvert( ChamComplexDouble, ChamComplexDouble ) +#define ChamConvertComplexDoubleToSingle ChamConvert( ChamComplexDouble, ChamComplexSingle ) +#define ChamConvertComplexDoubleToHalf ChamConvert( ChamComplexDouble, ChamComplexHalf ) +#define ChamConvertComplexSingleToDouble ChamConvert( ChamComplexSingle, ChamComplexDouble ) +#define ChamConvertComplexSingleToSingle ChamConvert( ChamComplexSingle, ChamComplexSingle ) +#define ChamConvertComplexSingleToHalf ChamConvert( ChamComplexSingle, ChamComplexHalf ) +#define ChamConvertComplexHalfToDouble ChamConvert( ChamComplexHalf, ChamComplexDouble ) +#define ChamConvertComplexHalfToSingle ChamConvert( ChamComplexHalf, ChamComplexSingle ) +#define ChamConvertComplexHalfToHalf ChamConvert( ChamComplexHalf, ChamComplexHalf ) + +#define ChamConvertRealDoubleToDouble ChamConvert( ChamRealDouble, ChamRealDouble ) +#define ChamConvertRealDoubleToSingle ChamConvert( ChamRealDouble, ChamRealSingle ) +#define ChamConvertRealDoubleToHalf ChamConvert( ChamRealDouble, ChamRealHalf ) +#define ChamConvertRealSingleToDouble ChamConvert( ChamRealSingle, ChamRealDouble ) +#define ChamConvertRealSingleToSingle ChamConvert( ChamRealSingle, ChamRealSingle ) +#define ChamConvertRealSingleToHalf ChamConvert( ChamRealSingle, ChamRealHalf ) +#define ChamConvertRealHalfToDouble ChamConvert( ChamRealHalf, ChamRealDouble ) +#define ChamConvertRealHalfToSingle ChamConvert( ChamRealHalf, ChamRealSingle ) +#define ChamConvertRealHalfToHalf ChamConvert( ChamRealHalf, ChamRealHalf ) + /** * @brief Matrix tile storage */ diff --git a/include/chameleon/types.h b/include/chameleon/types.h index a174db70e..4d46a65fe 100644 --- a/include/chameleon/types.h +++ b/include/chameleon/types.h @@ -11,14 +11,14 @@ * * @brief Chameleon basic datatypes header * - * @version 1.2.0 + * @version 1.3.0 * @author Cedric Augonnet * @author Mathieu Faverge * @author Cedric Castagnede * @author Florent Pruvost * @author Lucas Barros de Assis * @author Thomas Mijieux - * @date 2022-02-22 + * @date 2023-07-04 * */ #ifndef _chameleon_types_h_ @@ -104,6 +104,17 @@ typedef int8_t cham_bool_t; #endif #endif /* CHAMELEON_COMPLEX_CPP */ +/** + * Half precision on GPUs + */ +#if defined(__cplusplus) +typedef __half CHAMELEON_Real16_t; +#else +/* use short for cuda older than 7.5 and non-cuda files + * corresponding routines would not work anyway since there is no half precision */ +typedef short CHAMELEON_Real16_t; +#endif + /** * CHAMELEON Deprecated attribute */ -- GitLab