From c060cfd10900d2fce4e1d42d188d9bf064d839d3 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Fri, 19 May 2023 11:48:28 -0400
Subject: [PATCH] constants: Split the flttype field as a bitmask of datatype,
 datasize and mixed

---
 CMakeLists.txt                |  2 +-
 cmake_modules/local_subs.py   |  7 +++-
 cmake_modules/morse_cmake     |  2 +-
 control/auxiliary.c           | 14 ++++---
 control/descriptor.c          | 20 ++++++----
 control/descriptor.h          |  6 +--
 include/chameleon.h           |  6 +--
 include/chameleon/constants.h | 75 +++++++++++++++++++++++++++++++----
 include/chameleon/types.h     | 15 ++++++-
 9 files changed, 115 insertions(+), 32 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13b5ea5fa..796cb3fa2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,7 @@ endif()
 
 # set project version number
 set(CHAMELEON_VERSION_MAJOR 1)
-set(CHAMELEON_VERSION_MINOR 2)
+set(CHAMELEON_VERSION_MINOR 3)
 set(CHAMELEON_VERSION_MICRO 0)
 set(CHAMELEON_VERSION "${CHAMELEON_VERSION_MAJOR}.${CHAMELEON_VERSION_MINOR}.${CHAMELEON_VERSION_MICRO}")
 
diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index 02af5df38..2e8cb7106 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -6,12 +6,12 @@
  @copyright 2019-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
                       Univ. Bordeaux. All rights reserved.
 
- @version 1.2.0
+ @version 1.3.0
  @author Mathieu Faverge
  @author Florent Pruvost
  @author Nathalie Furmento
  @author Alycia Lisito
- @date 2022-02-22
+ @date 2023-07-04
 
 """
 _extra_blas = [
@@ -73,6 +73,9 @@ subs = {
         ('int',                  'float',                'double',               'CHAMELEON_Complex32_t', r'\bCHAMELEON_Complex64_t'),
         ('ChamPattern',          'ChamRealFloat',        'ChamRealDouble',       'ChamComplexFloat',    r'\bChamComplexDouble' ),
         ('ChamPattern',          'ChamRealFloat',        'ChamRealDouble',       'ChamRealFloat',       r'\bChamRealDouble'    ),
+        ('ChamPattern',          'ChamRealFloat',        'ChamRealFloat',        'ChamComplexFloat',    r'\bChamComplexFloat'  ),
+        ('ChamPattern',          'ChamRealFloat',        'ChamRealFloat',        'ChamRealFloat',       r'\bChamRealFloat'     ),
+        ('ChamPattern',          'ChamRealHalf',         'ChamRealHalf',         'ChamComplexHalf',     r'\bChamComplexHalf'   ),
         ('int',                  'float',                'double',               'complex32',             'complex64'          ),
         ('Int',                  'Float',                'Double',               'Complex32',             'Complex64'          ),
         ('Int',                  'HMAT_SIMPLE_PRECISION','HMAT_DOUBLE_PRECISION','HMAT_SIMPLE_COMPLEX',   'HMAT_DOUBLE_COMPLEX'),
diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake
index b9e9ec800..332bf7de0 160000
--- a/cmake_modules/morse_cmake
+++ b/cmake_modules/morse_cmake
@@ -1 +1 @@
-Subproject commit b9e9ec80030a349e1c768f8aec2879d1a6ed28a9
+Subproject commit 332bf7de07c7eaf85cf370236d5a209b83a58dd7
diff --git a/control/auxiliary.c b/control/auxiliary.c
index 1d1b9147f..d299d8fa8 100644
--- a/control/auxiliary.c
+++ b/control/auxiliary.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon auxiliary routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Jakub Kurzak
  * @author Piotr Luszczek
  * @author Emmanuel Agullo
@@ -19,7 +19,7 @@
  * @author Florent Pruvost
  * @author Guillaume Sylvand
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2023-07-04
  *
  ***
  *
@@ -187,13 +187,17 @@ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro)
  * @retval Element size in bytes
  *
  */
-int CHAMELEON_Element_Size(int type)
+int CHAMELEON_Element_Size( cham_flttype_t type )
 {
-    switch(type) {
+    switch( cham_get_flttype(type) ) {
         case ChamByte:          return          1;
-        case ChamInteger:       return   sizeof(int);
+        case ChamInteger16:     return   sizeof(int16_t);
+        case ChamInteger32:     return   sizeof(int32_t);
+        case ChamInteger64:     return   sizeof(int64_t);
+        case ChamRealHalf:      return   2;
         case ChamRealFloat:     return   sizeof(float);
         case ChamRealDouble:    return   sizeof(double);
+        case ChamComplexHalf:   return   4;
         case ChamComplexFloat:  return 2*sizeof(float);
         case ChamComplexDouble: return 2*sizeof(double);
         default: chameleon_fatal_error("CHAMELEON_Element_Size", "undefined type");
diff --git a/control/descriptor.c b/control/descriptor.c
index 1005a04cf..d17748972 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon descriptors routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Guillaume Sylvand
  * @author Raphael Boucherie
  * @author Samuel Thibault
- * @date 2022-12-13
+ * @date 2023-07-04
  *
  ***
  *
@@ -376,11 +376,14 @@ int chameleon_desc_check(const CHAM_desc_t *desc)
         chameleon_error("chameleon_desc_check", "NULL matrix pointer");
         return CHAMELEON_ERR_UNALLOCATED;
     }
-    if (desc->dtyp != ChamInteger &&
-        desc->dtyp != ChamRealFloat &&
-        desc->dtyp != ChamRealDouble &&
-        desc->dtyp != ChamComplexFloat &&
-        desc->dtyp != ChamComplexDouble  ) {
+    if ( (desc->dtyp != ChamInteger       ) &&
+         (desc->dtyp != ChamRealHalf      ) &&
+         (desc->dtyp != ChamRealFloat     ) &&
+         (desc->dtyp != ChamRealDouble    ) &&
+         (desc->dtyp != ChamComplexHalf   ) &&
+         (desc->dtyp != ChamComplexFloat  ) &&
+         (desc->dtyp != ChamComplexDouble ) )
+    {
         chameleon_error("chameleon_desc_check", "invalid matrix type");
         return CHAMELEON_ERR_ILLEGAL_VALUE;
     }
@@ -435,8 +438,11 @@ CHAMELEON_Desc_SubMatrix( CHAM_desc_t *descA, int i, int j, int m, int n )
  *
  * @param[in] dtyp
  *          Data type of the matrix:
+ *          @arg ChamInteger:       integer (i),
+ *          @arg ChamRealHalf:      half precision real (H),
  *          @arg ChamRealFloat:     single precision real (S),
  *          @arg ChamRealDouble:    double precision real (D),
+ *          @arg ChamComplexHalf:   half precision complex (),
  *          @arg ChamComplexFloat:  single precision complex (C),
  *          @arg ChamComplexDouble: double precision complex (Z).
  *
diff --git a/control/descriptor.h b/control/descriptor.h
index cff3b76b9..96bf0bf0b 100644
--- a/control/descriptor.h
+++ b/control/descriptor.h
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon descriptor header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Jakub Kurzak
  * @author Mathieu Faverge
  * @author Cedric Castagnede
@@ -19,7 +19,7 @@
  * @author Guillaume Sylvand
  * @author Raphael Boucherie
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-04
  *
  */
 #ifndef _chameleon_descriptor_h_
@@ -76,7 +76,7 @@ int          chameleon_desc_check    ( const CHAM_desc_t *desc );
 /**
  *  Internal function to return address of block (m,n) with m,n = block indices
  */
-inline static CHAM_tile_t *chameleon_desc_gettile(const CHAM_desc_t *A, int m, int n)
+inline static CHAM_tile_t *chameleon_desc_gettile(const CHAM_desc_t *A, int m, int n )
 {
     size_t mm = m + A->i / A->mb;
     size_t nn = n + A->j / A->nb;
diff --git a/include/chameleon.h b/include/chameleon.h
index 77b6544f0..b6ff6913c 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -11,13 +11,13 @@
  *
  * @brief Chameleon main header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Cedric Augonnet
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Philippe Virouleau
- * @date 2022-02-22
+ * @date 2023-07-04
  *
  */
 #ifndef _chameleon_h_
@@ -120,7 +120,7 @@ int CHAMELEON_Lapack_to_Tile( void *Af77, int LDA, CHAM_desc_t *A ) __attribute_
 int CHAMELEON_Tile_to_Lapack( CHAM_desc_t *A, void *Af77, int LDA ) __attribute__((deprecated("Please refer to CHAMELEON_Desc2Lap() instead")));
 
 /* Descriptor */
-int CHAMELEON_Element_Size(int type);
+int CHAMELEON_Element_Size( cham_flttype_t type );
 
 int CHAMELEON_Desc_Create_User( CHAM_desc_t **desc, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz,
                                 int lm, int ln, int i, int j, int m, int n, int p, int q,
diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h
index cff1f5652..fe50552f4 100644
--- a/include/chameleon/constants.h
+++ b/include/chameleon/constants.h
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon global constants
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Cedric Augonnet
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Alycia Lisito
  * @author Terry Cojean
- * @date 2022-02-22
+ * @date 2023-07-04
  *
  */
 #ifndef _chameleon_constants_h_
@@ -36,15 +36,74 @@
 /**
  * @brief Matrix floating point arithmetic
  */
+typedef enum chameleon_arithmetic_e {
+    Cham8      = 0,
+    ChamHalf   = 1,
+    ChamSingle = 2,
+    ChamDouble = 3,
+} cham_arithmetic_t;
+
+#define CHAM_ARITHMETIC_MASK 0b11
+
+typedef enum chameleon_ftype_e {
+    ChamInt     = 0,
+    ChamReal    = 1,
+    ChamComplex = 2,
+} cham_ftype_t;
+
+#define CHAM_FTYPE_MASK 0b1100
+#define CHAM_MIXED_MASK 0b10000
+
+#define cham_get_arith( _ftype_ )   ( (_ftype_) & CHAM_ARITHMETIC_MASK )
+#define cham_get_ftype( _ftype_ )   (( (_ftype_) & CHAM_FTYPE_MASK ) >> 2 )
+#define cham_get_flttype( _ftype_ ) ( (_ftype_) & (CHAM_FTYPE_MASK | CHAM_ARITHMETIC_MASK) )
+#define cham_is_mixed( _ftype_ )    ( (_ftype_) & CHAM_MIXED_MASK )
+
+#define cham_clean_mixed( _ftype_ )    ( (_ftype_) & ~CHAM_MIXED_MASK )
+
+#define CHAMELEON_FLTTYPE( _ftype_, _arithmetic_ ) ( ((_ftype_) << 2) | (_arithmetic_) )
+
 typedef enum chameleon_flttype_e {
-    ChamByte          = 0,
-    ChamInteger       = 1,
-    ChamRealFloat     = 2,
-    ChamRealDouble    = 3,
-    ChamComplexFloat  = 4,
-    ChamComplexDouble = 5,
+    ChamByte               = CHAMELEON_FLTTYPE( ChamInt,     Cham8      ),
+    ChamInteger16          = CHAMELEON_FLTTYPE( ChamInt,     ChamHalf   ),
+    ChamInteger            = CHAMELEON_FLTTYPE( ChamInt,     ChamSingle ),
+    ChamInteger32          = CHAMELEON_FLTTYPE( ChamInt,     ChamSingle ),
+    ChamInteger64          = CHAMELEON_FLTTYPE( ChamInt,     ChamDouble ),
+    ChamRealHalf           = CHAMELEON_FLTTYPE( ChamReal,    ChamHalf   ),
+    ChamRealFloat          = CHAMELEON_FLTTYPE( ChamReal,    ChamSingle ),
+    ChamRealDouble         = CHAMELEON_FLTTYPE( ChamReal,    ChamDouble ),
+    ChamComplexHalf        = CHAMELEON_FLTTYPE( ChamComplex, ChamHalf   ),
+    ChamComplexFloat       = CHAMELEON_FLTTYPE( ChamComplex, ChamSingle ),
+    ChamComplexDouble      = CHAMELEON_FLTTYPE( ChamComplex, ChamDouble ),
+    ChamRealDoubleMixed    = ChamRealDouble    | CHAM_MIXED_MASK,
+    ChamComplexDoubleMixed = ChamComplexDouble | CHAM_MIXED_MASK,
 } cham_flttype_t;
 
+#define ChamComplexSingle ChamComplexFloat
+#define ChamRealSingle    ChamRealFloat
+
+#define ChamConvert( in, out ) ( cham_clean_mixed(in) | (cham_clean_mixed(out) << 5) )
+
+#define ChamConvertComplexDoubleToDouble ChamConvert( ChamComplexDouble, ChamComplexDouble )
+#define ChamConvertComplexDoubleToSingle ChamConvert( ChamComplexDouble, ChamComplexSingle )
+#define ChamConvertComplexDoubleToHalf   ChamConvert( ChamComplexDouble, ChamComplexHalf   )
+#define ChamConvertComplexSingleToDouble ChamConvert( ChamComplexSingle, ChamComplexDouble )
+#define ChamConvertComplexSingleToSingle ChamConvert( ChamComplexSingle, ChamComplexSingle )
+#define ChamConvertComplexSingleToHalf   ChamConvert( ChamComplexSingle, ChamComplexHalf   )
+#define ChamConvertComplexHalfToDouble   ChamConvert( ChamComplexHalf,   ChamComplexDouble )
+#define ChamConvertComplexHalfToSingle   ChamConvert( ChamComplexHalf,   ChamComplexSingle )
+#define ChamConvertComplexHalfToHalf     ChamConvert( ChamComplexHalf,   ChamComplexHalf   )
+
+#define ChamConvertRealDoubleToDouble ChamConvert( ChamRealDouble, ChamRealDouble )
+#define ChamConvertRealDoubleToSingle ChamConvert( ChamRealDouble, ChamRealSingle )
+#define ChamConvertRealDoubleToHalf   ChamConvert( ChamRealDouble, ChamRealHalf   )
+#define ChamConvertRealSingleToDouble ChamConvert( ChamRealSingle, ChamRealDouble )
+#define ChamConvertRealSingleToSingle ChamConvert( ChamRealSingle, ChamRealSingle )
+#define ChamConvertRealSingleToHalf   ChamConvert( ChamRealSingle, ChamRealHalf   )
+#define ChamConvertRealHalfToDouble   ChamConvert( ChamRealHalf,   ChamRealDouble )
+#define ChamConvertRealHalfToSingle   ChamConvert( ChamRealHalf,   ChamRealSingle )
+#define ChamConvertRealHalfToHalf     ChamConvert( ChamRealHalf,   ChamRealHalf   )
+
 /**
  * @brief Matrix tile storage
  */
diff --git a/include/chameleon/types.h b/include/chameleon/types.h
index a174db70e..4d46a65fe 100644
--- a/include/chameleon/types.h
+++ b/include/chameleon/types.h
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon basic datatypes header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Cedric Augonnet
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Lucas Barros de Assis
  * @author Thomas Mijieux
- * @date 2022-02-22
+ * @date 2023-07-04
  *
  */
 #ifndef _chameleon_types_h_
@@ -104,6 +104,17 @@ typedef int8_t cham_bool_t;
     #endif
 #endif /* CHAMELEON_COMPLEX_CPP */
 
+/**
+ * Half precision on GPUs
+ */
+#if defined(__cplusplus)
+typedef __half CHAMELEON_Real16_t;
+#else
+/* use short for cuda older than 7.5 and non-cuda files
+ * corresponding routines would not work anyway since there is no half precision */
+typedef short  CHAMELEON_Real16_t;
+#endif
+
 /**
  *  CHAMELEON Deprecated attribute
  */
-- 
GitLab