diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6715cc43f625a0f7e810f6401938e4ced372a364..f62ddbdf0139e68c4a5a56a6ab37847562ca39b4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,8 @@
 #  @author Samuel Thibault
 #  @author Guillaume Sylvand
 #  @author Alycia Lisito
-#  @date 2022-02-22
+#  @author Loris Lucido
+#  @date 2023-01-30
 #
 ###
 cmake_minimum_required(VERSION 3.3)
@@ -188,6 +189,11 @@ if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU )
      set(CHAMELEON_ENABLE_CUDA ON FORCE)
 endif()
 
+set(CHAMELEON_ENABLE_HIP OFF CACHE INTERNAL "Tells if HIP might be supported by the runtime")
+if ( CHAMELEON_SCHED_STARPU )
+     set(CHAMELEON_ENABLE_HIP ON FORCE)
+endif()
+
 # Additional options
 # ------------------
 
@@ -217,6 +223,20 @@ if (CHAMELEON_ENABLE_CUDA AND NOT CHAMELEON_USE_CUDA)
     message("-- ${BoldGreen}CHAMELEON_USE_CUDA is set to OFF, turn it ON to use CUDA (unsupported by Quark)${ColourReset}")
 endif()
 
+cmake_dependent_option(CHAMELEON_USE_HIP_CUDA
+                    "Enable HIP kernels with CUDA backend" OFF
+                    "CHAMELEON_ENABLE_HIP" OFF)
+cmake_dependent_option(CHAMELEON_USE_HIP_ROC
+                    "Enable HIP kernels with ROCclr backend" OFF
+                    "CHAMELEON_ENABLE_HIP" OFF)
+
+set(CHAMELEON_USE_HIP OFF CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled")
+if( CHAMELEON_USE_HIP_CUDA OR CHAMELEON_USE_HIP_ROC )
+  set(CHAMELEON_USE_HIP ON)
+else()
+  set(CHAMELEON_USE_HIP OFF)
+endif()
+
 # Enable Hmat-OSS kernels
 option(CHAMELEON_USE_HMAT "Enable hmat kernels" OFF)
 cmake_dependent_option(CHAMELEON_HMAT_EXTERNAL
@@ -405,6 +425,121 @@ if(NOT CHAMELEON_SIMULATION)
 
     endif(CHAMELEON_USE_CUDA)
 
+    # CHAMELEON depends on HIP/HIPBLAS
+    #----------------------------------
+    if (CHAMELEON_USE_HIP)
+        if (CHAMELEON_USE_HIP_ROC)
+
+            find_package(HIP REQUIRED)
+            find_package(hipblas REQUIRED)
+
+        elseif(CHAMELEON_USE_HIP_CUDA)
+
+            # for CUDA backend we need a user-provided hipblas library as the one
+            # shipped with ROCm only works with AMD GPU
+            set(CHAMELEON_HIPBLAS_PATH "" CACHE PATH "Directory of hipblas installation with CUDA support")
+            if(NOT CHAMELEON_HIPBLAS_PATH)
+              message(FATAL_ERROR "CHAMELEON_USE_HIP_CUDA requires you to set"
+                " CHAMELEON_HIPBLAS_PATH to a hipblas installation compiled with CUDA")
+            endif()
+
+            if(NOT DEFINED HIP_PATH)
+                if(DEFINED ENV{HIP_PATH})
+                    set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
+                else()
+                    message(FATAL_ERROR "Please set HIP_PATH to your HIP installation.")
+                endif()
+            endif()
+
+            find_package(CUDA REQUIRED)
+
+            # location of FindHIP.cmake depending on how HIP_PATH is set
+            # HIP_PATH points to /opt/rocm-<version>
+            set(CMAKE_MODULE_PATH "${HIP_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
+            # HIP_PATH points to /opt/rocm-<version>/hip
+            set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
+
+            # We use MODULE keyword to force using FindHIP.cmake instead of
+            # hip-config.cmake which doesn't support CUDA backend
+            find_package(HIP REQUIRED MODULE)
+            list( APPEND HIP_INCLUDE_DIRS "${HIP_PATH}/include")
+
+            if(CHAMELEON_HIPBLAS_PATH)
+                list(APPEND HIPBLAS_INCLUDE_DIRS "${CHAMELEON_HIPBLAS_PATH}/include")
+                list(APPEND HIPBLAS_LIBRARIES "${CHAMELEON_HIPBLAS_PATH}/lib/libhipblas.so")
+            else()
+                message(FATAL_ERROR "Please set CHAMELEON_HIPBLAS_PATH to your HIPBLAS installation.")
+            endif()
+        endif()
+
+        if (HIP_FOUND)
+            message("-- ${Blue}Add definition CHAMELEON_USE_HIP"
+                " - Activate HIP in Chameleon${ColourReset}")
+            # create imported target because not provided with old cmake
+            add_library(HIP::HIP INTERFACE IMPORTED)
+            add_library(HIP::HIPBLAS INTERFACE IMPORTED)
+
+            if (CHAMELEON_USE_HIP_CUDA)
+                target_compile_definitions(HIP::HIP INTERFACE "__HIP_PLATFORM_NVIDIA__")
+                set(HIP_INCLUDE_DIRS "${HIP_INCLUDE_DIRS};${CUDA_INCLUDE_DIRS}")
+                set(HIP_LIBRARIES "${HIP_LIBRARIES};${CUDA_LIBRARIES}")
+            endif()
+
+            if (HIP_INCLUDE_DIRS)
+                set_target_properties(HIP::HIP PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HIP_INCLUDE_DIRS}")
+                if (HIPBLAS_INCLUDE_DIRS)
+                    set_target_properties(HIP::HIPBLAS PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HIPBLAS_INCLUDE_DIRS}")
+                else()
+                    message(WARNING "CHAMELEON_USE_HIP requires"
+                        "\n   HIPBLAS_INCLUDE_DIRS to be found. Be sure you have"
+                        "\n   hipblas headers with your distribution of HIP.")
+                endif()
+            else()
+                message(WARNING "CHAMELEON_USE_HIP requires"
+                    "\n   HIP_INCLUDE_DIRS to be found. Be sure you have"
+                    "\n   hip headers with your distribution of HIP.")
+            endif()
+            if (HIP_LIBRARIES)
+                set_target_properties(HIP::HIP PROPERTIES INTERFACE_LINK_LIBRARIES "${HIP_LIBRARIES}")
+                if (HIPBLAS_LIBRARIES)
+                    set_target_properties(HIP::HIPBLAS PROPERTIES INTERFACE_LINK_LIBRARIES "${HIPBLAS_LIBRARIES}")
+                    target_link_libraries(HIP::HIPBLAS INTERFACE HIP::HIP)
+                    message("-- ${Blue}Add definition CHAMELEON_USE_HIPBLAS"
+                        " - Use GPU kernels from hipblas${ColourReset}")
+                else()
+                    message(FATAL_ERROR "CHAMELEON_USE_HIP requires"
+                        "\n   HIPBLAS_LIBRARIES to be found. Be sure you have"
+                        "\n   libhipblas with your distribution of HIP.")
+                endif()
+            else()
+                message(FATAL_ERROR "CHAMELEON_USE_HIP requires"
+                    "\n   HIP_LIBRARIES to be found. Be sure you have"
+                    "\n   libamdhip64 with your distribution of ROCm.")
+            endif()
+            if (CHAMELEON_USE_HIP_CUDA AND NOT CUDA_LIBRARIES)
+                message(FATAL_ERROR "CHAMELEON_USE_HIP_CUDA requires"
+                "\n   CUDA_LIBRARIES to be found. Be sure you have"
+                "\n   libcuda with your distribution of CUDA.")
+            endif()
+
+            message("-- HIP PATH:              ${HIP_PATH}")
+            message("-- HIP version:           ${HIP_VERSION_STRING}")
+            message("-- HIP platform:          ${HIP_PLATFORM}")
+            message("-- HIP runtime:           ${HIP_RUNTIME}")
+            message("-- HIP compiler:          ${HIP_COMPILER}")
+            message("-- HIP include path:      ${HIP_INCLUDE_DIRS}")
+            message("-- HIP libraries:         ${HIP_LIBRARIES}")
+            message("-- HIPBLAS include path:  ${HIPBLAS_INCLUDE_DIRS}")
+            message("-- HIPBLAS libraries:     ${HIPBLAS_LIBRARIES}")
+
+            morse_export_imported_target(HIP HIP hip chameleon)
+            morse_export_imported_target(HIP HIPBLAS hipblas chameleon)
+        else(HIP_FOUND)
+            message(FATAL_ERROR "CHAMELEON_USE_HIP requires HIP to be found")
+        endif (HIP_FOUND)
+
+    endif(CHAMELEON_USE_HIP)
+
     # CHAMELEON depends on MPI
     #-------------------------
     if (CHAMELEON_USE_MPI)
@@ -565,6 +700,9 @@ if (NOT CHAMELEON_SIMULATION)
   if(CHAMELEON_USE_CUDA)
     add_subdirectory(cudablas)
   endif()
+  if(CHAMELEON_USE_HIP)
+    add_subdirectory(hipblas)
+  endif()
 
 endif()
 
diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index 22b9048e0f586e80aaea116a9e8a3a3a5b8a5822..9202310090400c24b0f9773acd6476eb39da6466 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -53,4 +53,8 @@ if(NOT BUILDNAME)
     set(BUILDNAME "${BUILDNAME}-CUDA")
   endif(CHAMELEON_USE_CUDA)
 
+  if(CHAMELEON_USE_HIP)
+    set(BUILDNAME "${BUILDNAME}-HIP")
+  endif(CHAMELEON_USE_HIP)
+
 endif()
diff --git a/cmake_modules/PrintOpts.cmake b/cmake_modules/PrintOpts.cmake
index 777d0c37f90c5c552ca79283dd31fb8a5303fa90..f50f01c7b16bad42db97504fb2353e1f274d627a 100644
--- a/cmake_modules/PrintOpts.cmake
+++ b/cmake_modules/PrintOpts.cmake
@@ -20,7 +20,8 @@
 # @version 1.2.0
 #  @author Florent Pruvost
 #  @author Mathieu Faverge
-#  @date 2022-02-22
+#  @author Loris Lucido
+#  @date 2023-01-30
 #
 ###
 
@@ -74,6 +75,8 @@ set(dep_message "${dep_message}"
 "\n"
 "       Implementation paradigm\n"
 "       CUDA ................: ${CHAMELEON_USE_CUDA}\n"
+"       HIP-ROC .............: ${CHAMELEON_USE_HIP_ROC}\n"
+"       HIP-CUDA ............: ${CHAMELEON_USE_HIP_CUDA}\n"
 "       MPI .................: ${CHAMELEON_USE_MPI}\n"
 "\n"
 "       Runtime specific\n"
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 081868c78db8635c2c9473b36bbea1bf1ffb4e8a..59a24ff8eb4eb9e02cf765c1429b3cc4db7c009f 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -25,7 +25,8 @@
 #  @author Guillaume Sylvand
 #  @author Raphael Boucherie
 #  @author Alycia Lisito
-#  @date 2022-02-22
+#  @author Loris Lucido
+#  @date 2023-01-30
 #
 ###
 
@@ -344,6 +345,12 @@ if (CHAMELEON_USE_CUDA)
     target_link_libraries(chameleon PUBLIC CUDA::CUBLAS)
   endif()
 endif()
+if (CHAMELEON_USE_HIP)
+  if (NOT CHAMELEON_SIMULATION)
+    target_link_libraries(chameleon PUBLIC hipblas)
+    target_link_libraries(chameleon PUBLIC HIP::HIPBLAS)
+  endif()
+endif()
 target_link_libraries(chameleon PUBLIC MORSE::M)
 
 set_property(TARGET chameleon PROPERTY Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/include")
diff --git a/control/control.c b/control/control.c
index d2ffcd37ca245f8c2741cb0b8e77eae35539fc82..374ec1c873b68005d31746575135b94b2c2805cd 100644
--- a/control/control.c
+++ b/control/control.c
@@ -19,7 +19,8 @@
  * @author Philippe Virouleau
  * @author Samuel Thibault
  * @author Philippe Swartvagher
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @date 2023-01-30
  *
  ***
  *
@@ -65,7 +66,7 @@ int __chameleon_init(int cores, int gpus)
  * @param[in] ncpus
  *          Number of cores to use.
  *
- * @param[in] ncudas
+ * @param[in] ngpus
  *          Number of cuda devices to use.
  *
  * @param[in] nthreads_per_worker
@@ -76,7 +77,7 @@ int __chameleon_init(int cores, int gpus)
  * @retval CHAMELEON_SUCCESS successful exit
  *
  */
-int __chameleon_initpar(int ncpus, int ncudas, int nthreads_per_worker)
+int __chameleon_initpar(int ncpus, int ngpus, int nthreads_per_worker)
 {
     CHAM_context_t *chamctxt;
 
@@ -113,14 +114,14 @@ int __chameleon_initpar(int ncpus, int ncudas, int nthreads_per_worker)
 #  endif
 #endif
 
-#if !defined(CHAMELEON_USE_CUDA)
-    if ( ncudas != 0 ) {
-        chameleon_warning("CHAMELEON_Init", "CHAMELEON_USE_CUDA is not defined, ncudas is forced to 0");
-        ncudas = 0;
+#if !defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_USE_HIP)
+    if ( ngpus != 0 ) {
+        chameleon_warning("CHAMELEON_Init", "CHAMELEON_USE_CUDA or CHAMELEON_USE_HIP are not defined, ngpus is forced to 0");
+        ngpus = 0;
     }
 #endif
 
-    return RUNTIME_init( chamctxt, ncpus, ncudas, nthreads_per_worker );
+    return RUNTIME_init( chamctxt, ncpus, ngpus, nthreads_per_worker );
 }
 
 /**
diff --git a/hipblas/CMakeLists.txt b/hipblas/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87a384f7b4a14f7e28ba534b14fa7cbc738e1c6d
--- /dev/null
+++ b/hipblas/CMakeLists.txt
@@ -0,0 +1,36 @@
+###
+#
+# @file CMakeLists.txt
+#
+# @copyright 2009-2014 The University of Tennessee and The University of
+#                      Tennessee Research Foundation. All rights reserved.
+# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+#                      Univ. Bordeaux. All rights reserved.
+#
+###
+#
+#  @project CHAMELEON
+#  CHAMELEON is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+# @version 1.2.0
+#  @author Cedric Castagnede
+#  @author Emmanuel Agullo
+#  @author Mathieu Faverge
+#  @author Florent Pruvost
+#  @author Loris Lucido
+#  @date 2023-01-30
+#
+###
+
+add_subdirectory(include)
+add_subdirectory(compute)
+add_subdirectory(eztrace_module)
+
+###
+### END CMakeLists.txt
+###
diff --git a/hipblas/compute/CMakeLists.txt b/hipblas/compute/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..632049557a2206d22f83e7432268dd39f42ed303
--- /dev/null
+++ b/hipblas/compute/CMakeLists.txt
@@ -0,0 +1,91 @@
+###
+#
+# @file CMakeLists.txt
+#
+# @copyright 2009-2014 The University of Tennessee and The University of
+#                      Tennessee Research Foundation. All rights reserved.
+# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+#                      Univ. Bordeaux. All rights reserved.
+#
+###
+#
+#  @project CHAMELEON
+#  CHAMELEON is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+# @version 1.2.0
+#  @author Florent Pruvost
+#  @author Guillaume Sylvand
+#  @author Mathieu Faverge
+#  @author Loris Lucido
+#  @date 2023-01-30
+#
+###
+
+# Generate the chameleon sources for all possible precisions
+# ------------------------------------------------------
+set(HIPBLAS_SRCS_GENERATED "")
+set(ZSRC
+    hip_zgemm.c
+    hip_zhemm.c
+    hip_zher2k.c
+    hip_zherk.c
+    hip_zsymm.c
+    hip_zsyr2k.c
+    hip_zsyrk.c
+    hip_ztrmm.c
+    hip_ztrsm.c
+    )
+
+precisions_rules_py(
+  HIPBLAS_SRCS_GENERATED "${ZSRC}"
+  PRECISIONS "${CHAMELEON_PRECISION}")
+
+set(HIPBLAS_SRCS
+  ${HIPBLAS_SRCS_GENERATED}
+  hipglobal.c
+  )
+
+# Force generation of sources
+# ---------------------------
+add_custom_target(hipblas_sources ALL SOURCES ${HIPBLAS_SRCS})
+set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};hipblas_sources" CACHE INTERNAL "List of targets of sources")
+
+# Compile step
+# ------------
+add_library(hipblas ${HIPBLAS_SRCS})
+set_target_properties(hipblas PROPERTIES VERSION ${CHAMELEON_VERSION})
+set_target_properties(hipblas PROPERTIES SOVERSION ${CHAMELEON_VERSION_MAJOR})
+add_dependencies(hipblas hipblas_include hipblas_sources)
+target_include_directories(hipblas PUBLIC
+  $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/hipblas/include>
+  $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/hipblas/include>
+  $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/include>
+  $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/include>
+  $<INSTALL_INTERFACE:include>)
+set_property(TARGET hipblas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+
+target_link_libraries(hipblas PRIVATE coreblas HIP::HIPBLAS)
+target_link_libraries(hipblas PUBLIC MORSE::M)
+
+# export target coreblas
+install(EXPORT hipblasTargets
+        NAMESPACE CHAMELEON::
+        DESTINATION lib/cmake/chameleon
+        )
+
+# installation
+# ------------
+install(TARGETS hipblas
+        EXPORT hipblasTargets
+        ARCHIVE DESTINATION lib
+        LIBRARY DESTINATION lib
+        )
+
+###
+### END CMakeLists.txt
+###
diff --git a/hipblas/compute/hip_zgemm.c b/hipblas/compute/hip_zgemm.c
new file mode 100644
index 0000000000000000000000000000000000000000..8afd4b869f15431cae942946e1e565086b0349d5
--- /dev/null
+++ b/hipblas/compute/hip_zgemm.c
@@ -0,0 +1,46 @@
+/**
+ *
+ * @file hip_zgemm.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_zgemm GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @author Loris Lucido
+ * @date 2023-01-30
+ * @precisions normal z -> c d s
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_zgemm( cham_trans_t transa, cham_trans_t transb,
+           int m, int n, int k,
+           const hipDoubleComplex *alpha,
+           const hipDoubleComplex *A, int lda,
+           const hipDoubleComplex *B, int ldb,
+           const hipDoubleComplex *beta,
+           hipDoubleComplex *C, int ldc,
+           hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZgemm( handle,
+                       chameleon_hipblas_const(transa), chameleon_hipblas_const(transb),
+                       m, n, k,
+                       HIPBLAS_VALUE(alpha), A, lda,
+                                             B, ldb,
+                       HIPBLAS_VALUE(beta),  C, ldc );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hip_zhemm.c b/hipblas/compute/hip_zhemm.c
new file mode 100644
index 0000000000000000000000000000000000000000..30d722430f1660fc470380358237f7b43fa3b165
--- /dev/null
+++ b/hipblas/compute/hip_zhemm.c
@@ -0,0 +1,45 @@
+/**
+ *
+ * @file hip_zhemm.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_zhemm GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @date 2022-02-22
+ * @precisions normal z -> c
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_zhemm( cham_side_t side, cham_uplo_t uplo,
+           int m, int n,
+           const hipDoubleComplex *alpha,
+           const hipDoubleComplex *A, int lda,
+           const hipDoubleComplex *B, int ldb,
+           const hipDoubleComplex *beta,
+           hipDoubleComplex *C, int ldc,
+           hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZhemm( handle,
+                       chameleon_hipblas_const(side), chameleon_hipblas_const(uplo),
+                       m, n,
+                       HIPBLAS_VALUE(alpha), A, lda,
+                                             B, ldb,
+                       HIPBLAS_VALUE(beta),  C, ldc );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hip_zher2k.c b/hipblas/compute/hip_zher2k.c
new file mode 100644
index 0000000000000000000000000000000000000000..601df7fe38fab3009b3fb60a37228c50839077e6
--- /dev/null
+++ b/hipblas/compute/hip_zher2k.c
@@ -0,0 +1,45 @@
+/**
+ *
+ * @file hip_zher2k.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_zher2k GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @date 2022-02-22
+ * @precisions normal z -> c
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_zher2k( cham_uplo_t uplo, cham_trans_t trans,
+            int n, int k,
+            const hipDoubleComplex *alpha,
+            const hipDoubleComplex *A, int lda,
+            const hipDoubleComplex *B, int ldb,
+            const double *beta,
+            hipDoubleComplex *C, int ldc,
+            hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZher2k( handle,
+                        chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans),
+                        n, k,
+                        HIPBLAS_VALUE(alpha), A, lda,
+                                              B, ldb,
+                        HIPBLAS_VALUE(beta),  C, ldc );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hip_zherk.c b/hipblas/compute/hip_zherk.c
new file mode 100644
index 0000000000000000000000000000000000000000..56693b30767031443f99b8f1993f64710e9a3317
--- /dev/null
+++ b/hipblas/compute/hip_zherk.c
@@ -0,0 +1,43 @@
+/**
+ *
+ * @file hip_zherk.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_zherk GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @date 2022-02-22
+ * @precisions normal z -> c
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_zherk( cham_uplo_t uplo, cham_trans_t trans,
+           int n, int k,
+           const double *alpha,
+           const hipDoubleComplex *A, int lda,
+           const double *beta,
+           hipDoubleComplex *B, int ldb,
+           hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZherk( handle,
+                       chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans),
+                       n, k,
+                       HIPBLAS_VALUE(alpha), A, lda,
+                       HIPBLAS_VALUE(beta),  B, ldb );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hip_zsymm.c b/hipblas/compute/hip_zsymm.c
new file mode 100644
index 0000000000000000000000000000000000000000..146412570c0fcb0a1bb3d2a1c57438c25d0e2962
--- /dev/null
+++ b/hipblas/compute/hip_zsymm.c
@@ -0,0 +1,45 @@
+/**
+ *
+ * @file hip_zsymm.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_zsymm GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @date 2022-02-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_zsymm( cham_side_t side, cham_uplo_t uplo,
+            int m, int n,
+            const hipDoubleComplex *alpha,
+            const hipDoubleComplex *A, int lda,
+            const hipDoubleComplex *B, int ldb,
+            const hipDoubleComplex *beta,
+            hipDoubleComplex *C, int ldc,
+            hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZsymm( handle,
+                      chameleon_hipblas_const(side), chameleon_hipblas_const(uplo),
+                      m, n,
+                      HIPBLAS_VALUE(alpha), A, lda,
+                                           B, ldb,
+                      HIPBLAS_VALUE(beta),  C, ldc );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hip_zsyr2k.c b/hipblas/compute/hip_zsyr2k.c
new file mode 100644
index 0000000000000000000000000000000000000000..5bef9817e5a6951bfc4cbd301f2b3e4b4111db7e
--- /dev/null
+++ b/hipblas/compute/hip_zsyr2k.c
@@ -0,0 +1,45 @@
+/**
+ *
+ * @file hip_zsyr2k.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_zsyr2k GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @date 2022-02-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_zsyr2k( cham_uplo_t uplo, cham_trans_t trans,
+            int n, int k,
+            const hipDoubleComplex *alpha,
+            const hipDoubleComplex *A, int lda,
+            const hipDoubleComplex *B, int ldb,
+            const hipDoubleComplex *beta,
+            hipDoubleComplex *C, int ldc,
+            hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZsyr2k( handle,
+                        chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans),
+                        n, k,
+                        HIPBLAS_VALUE(alpha), A, lda,
+                                              B, ldb,
+                        HIPBLAS_VALUE(beta),  C, ldc );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hip_zsyrk.c b/hipblas/compute/hip_zsyrk.c
new file mode 100644
index 0000000000000000000000000000000000000000..44838f78bed6ee161288976ed621f4276b600955
--- /dev/null
+++ b/hipblas/compute/hip_zsyrk.c
@@ -0,0 +1,43 @@
+/**
+ *
+ * @file hip_zsyrk.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_zsyrk GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @date 2022-02-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_zsyrk( cham_uplo_t uplo, cham_trans_t trans,
+           int n, int k,
+           const hipDoubleComplex *alpha,
+           const hipDoubleComplex *A, int lda,
+           const hipDoubleComplex *beta,
+           hipDoubleComplex *B, int ldb,
+           hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZsyrk( handle,
+                       chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans),
+                       n, k,
+                       HIPBLAS_VALUE(alpha), A, lda,
+                       HIPBLAS_VALUE(beta),  B, ldb );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hip_ztrmm.c b/hipblas/compute/hip_ztrmm.c
new file mode 100644
index 0000000000000000000000000000000000000000..9cc52790420eb2cb9593d3ddbef21bd41e5cd292
--- /dev/null
+++ b/hipblas/compute/hip_ztrmm.c
@@ -0,0 +1,45 @@
+/**
+ *
+ * @file hip_ztrmm.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_ztrmm GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @date 2022-02-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_ztrmm( cham_side_t side, cham_uplo_t uplo,
+           cham_trans_t transa, cham_diag_t diag,
+           int m, int n,
+           const hipDoubleComplex *alpha,
+           const hipDoubleComplex *A, int lda,
+           hipDoubleComplex *B, int ldb,
+           hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZtrmm( handle,
+                       chameleon_hipblas_const(side), chameleon_hipblas_const(uplo),
+                       chameleon_hipblas_const(transa), chameleon_hipblas_const(diag),
+                       m, n,
+                       HIPBLAS_VALUE(alpha), A, lda,
+                       B, ldb );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
+
diff --git a/hipblas/compute/hip_ztrsm.c b/hipblas/compute/hip_ztrsm.c
new file mode 100644
index 0000000000000000000000000000000000000000..cba7629c396f56c119a54d88076f55e526089f7f
--- /dev/null
+++ b/hipblas/compute/hip_ztrsm.c
@@ -0,0 +1,45 @@
+/**
+ *
+ * @file hip_ztrsm.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hip_ztrsm GPU kernel
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @author Loris Lucido
+ * @date 2023-01-30
+ * @precisions normal z -> c d s
+ *
+ */
+#include "hipblas.h"
+
+int
+HIP_ztrsm( cham_side_t side, cham_uplo_t uplo,
+           cham_trans_t transa, cham_diag_t diag,
+           int m, int n,
+           const hipDoubleComplex *alpha,
+           const hipDoubleComplex *A, int lda,
+           hipDoubleComplex *B, int ldb,
+           hipblasHandle_t handle )
+{
+    hipblasStatus_t rc;
+
+    rc = hipblasZtrsm( handle,
+                       chameleon_hipblas_const(side), chameleon_hipblas_const(uplo),
+                       chameleon_hipblas_const(transa), chameleon_hipblas_const(diag),
+                       m, n,
+                       HIPBLAS_VALUE(alpha), A, lda,
+                       B, ldb );
+
+    assert( rc == HIPBLAS_STATUS_SUCCESS );
+    (void)rc;
+    return CHAMELEON_SUCCESS;
+}
diff --git a/hipblas/compute/hipglobal.c b/hipblas/compute/hipglobal.c
new file mode 100644
index 0000000000000000000000000000000000000000..3c9fce60274ea6d317354cea67b7396e7fc5f65e
--- /dev/null
+++ b/hipblas/compute/hipglobal.c
@@ -0,0 +1,128 @@
+/**
+ *
+ * @file hipglobal.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon global hipblas variables and functions
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Loris Lucido
+ * @date 2023-01-30
+ *
+ */
+#include "hipblas.h"
+
+/**
+ *  LAPACK Constants
+ */
+int chameleon_hipblas_constants[] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 100
+    0,                      // 101: ChamRowMajor
+    0,                      // 102: ChamColMajor
+    0, 0, 0, 0, 0, 0, 0, 0,
+    HIPBLAS_OP_N,            // 111: ChamNoTrans
+    HIPBLAS_OP_T,            // 112: ChamTrans
+    HIPBLAS_OP_C,            // 113: ChamConjTrans
+    0, 0, 0, 0, 0, 0, 0,
+    HIPBLAS_FILL_MODE_UPPER, // 121: ChamUpper
+    HIPBLAS_FILL_MODE_LOWER, // 122: ChamLower
+    0,                      // 123: ChamUpperLower
+    0, 0, 0, 0, 0, 0, 0,
+    HIPBLAS_DIAG_NON_UNIT,   // 131: ChamNonUnit
+    HIPBLAS_DIAG_UNIT,       // 132: ChamUnit
+    0, 0, 0, 0, 0, 0, 0, 0,
+    HIPBLAS_SIDE_LEFT,       // 141: ChamLeft
+    HIPBLAS_SIDE_RIGHT,      // 142: ChamRight
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 151:
+    0,                      // 152:
+    0,                      // 153:
+    0,                      // 154:
+    0,                      // 155:
+    0,                      // 156:
+    0,                      // 157: ChamEps
+    0,                      // 158:
+    0,                      // 159:
+    0,                      // 160:
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 171: ChamOneNorm
+    0,                      // 172: ChamRealOneNorm
+    0,                      // 173: ChamTwoNorm
+    0,                      // 174: ChamFrobeniusNorm
+    0,                      // 175: ChamInfNorm
+    0,                      // 176: ChamRealInfNorm
+    0,                      // 177: ChamMaxNorm
+    0,                      // 178: ChamRealMaxNorm
+    0,                      // 179
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 200
+    0,                      // 201: ChamDistUniform
+    0,                      // 202: ChamDistSymmetric
+    0,                      // 203: ChamDistNormal
+    0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 240
+    0,                      // 241 ChamHermGeev
+    0,                      // 242 ChamHermPoev
+    0,                      // 243 ChamNonsymPosv
+    0,                      // 244 ChamSymPosv
+    0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 290
+    0,                      // 291 ChamNoPacking
+    0,                      // 292 ChamPackSubdiag
+    0,                      // 293 ChamPackSupdiag
+    0,                      // 294 ChamPackColumn
+    0,                      // 295 ChamPackRow
+    0,                      // 296 ChamPackLowerBand
+    0,                      // 297 ChamPackUpeprBand
+    0,                      // 298 ChamPackAll
+    0,                      // 299
+    0,                      // 300
+    0,                      // 301 ChamNoVec
+    0,                      // 302 ChamVec, ChamSVDvrange
+    0,                      // 303 ChamIvec, ChamSVDirange
+    0,                      // 304 ChamAllVec, ChamSVDall
+    0,                      // 305 ChamSVec
+    0,                      // 306 ChamOVec
+    0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 390
+    0,                      // 391 Forward
+    0,                      // 392 Backward
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0,                      // 401 Columnwise
+    0,                      // 402 Rowwise
+    0, 0, 0, 0, 0, 0, 0, 0  // Remember to add a coma!
+};
diff --git a/hipblas/eztrace_module/CMakeLists.txt b/hipblas/eztrace_module/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f46c2e28d69199403a00cb16d60a2497ff9db207
--- /dev/null
+++ b/hipblas/eztrace_module/CMakeLists.txt
@@ -0,0 +1,79 @@
+###
+#
+# @file CMakeLists.txt
+#
+# @copyright 2009-2014 The University of Tennessee and The University of
+#                      Tennessee Research Foundation. All rights reserved.
+# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+#                      Univ. Bordeaux. All rights reserved.
+#
+###
+#
+#  @project CHAMELEON
+#  CHAMELEON is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+# @version 1.2.0
+#  @author Florent Pruvost
+#  @author Mathieu Faverge
+#  @date 2022-02-22
+#
+###
+
+if (NOT EZTRACE_FOUND)
+    find_package(EZTRACE)
+endif()
+
+if (EZTRACE_FOUND AND EZTRACE_DIR_FOUND)
+
+    set(EZTRACE_eztrace_create_plugin_DIR "EZTRACE_eztrace_create_plugin_DIR-NOTFOUND")
+    find_path(EZTRACE_eztrace_create_plugin_DIR
+      NAMES eztrace_create_plugin
+      HINTS ${EZTRACE_DIR_FOUND}/bin)
+    mark_as_advanced(EZTRACE_eztrace_create_plugin_DIR)
+
+    if (EZTRACE_eztrace_create_plugin_DIR)
+
+        set(EZTRACE_CREATE_PLUGIN "${EZTRACE_eztrace_create_plugin_DIR}/eztrace_create_plugin")
+
+        add_custom_command(
+            OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/output
+            COMMAND ${EZTRACE_CREATE_PLUGIN}
+            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/hipblas_eztrace_module
+            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/hipblas_eztrace_module
+            )
+        add_custom_target(
+            eztrace-module-chameleon_hip-dir ALL
+            DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/output
+            )
+        add_custom_command(
+            OUTPUT libeztrace-convert-chameleon_hip.so
+            COMMAND make
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/output
+            DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/output
+            )
+        add_custom_target(
+            eztrace-module-chameleon_hip-libs ALL
+            DEPENDS libeztrace-convert-chameleon_hip.so
+            )
+        # installation
+        # ------------
+        install(
+            FILES
+            ${CMAKE_CURRENT_BINARY_DIR}/output/libeztrace-autostart-chameleon_hip.so
+            ${CMAKE_CURRENT_BINARY_DIR}/output/libeztrace-chameleon_hip.so
+            ${CMAKE_CURRENT_BINARY_DIR}/output/libeztrace-convert-chameleon_hip.so
+            DESTINATION ${EZTRACE_LIBRARY_DIRS}
+            )
+
+    endif (EZTRACE_eztrace_create_plugin_DIR)
+
+endif (EZTRACE_FOUND AND EZTRACE_DIR_FOUND)
+
+###
+### END CMakeLists.txt
+###
diff --git a/hipblas/eztrace_module/hipblas_eztrace_module b/hipblas/eztrace_module/hipblas_eztrace_module
new file mode 100644
index 0000000000000000000000000000000000000000..2170a72dab2640f20cd67908f8ff4d0768d59c1c
--- /dev/null
+++ b/hipblas/eztrace_module/hipblas_eztrace_module
@@ -0,0 +1,266 @@
+BEGIN_MODULE
+NAME chameleon_hip
+DESC "Module for Chameleon HIP functions"
+ID 7771
+
+int HIP_cgemm(
+        void* transa, void* transb,
+        int m, int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_chemm(
+        void* side, void* uplo,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_cher2k(
+        void* uplo, void* trans,
+        int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        float *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_cherk(
+        void* uplo, void* trans,
+        int n, int k,
+        float *alpha,
+        const void *A, int lda,
+        float *beta,
+        void *B, int ldb,
+        void* stream);
+int HIP_csymm(
+        void* side, void* uplo,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_csyr2k(
+        void* uplo, void* trans,
+        int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_csyrk(
+        void* uplo, void* trans,
+        int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_ctrmm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        void *B, int ldb,
+        void* stream);
+int HIP_ctrsm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        void *B, int ldb,
+        void* stream);
+
+int HIP_dgemm(
+        void* transa, void* transb,
+        int m, int n, int k,
+        double *alpha,
+        const double *A, int lda,
+        const double *B, int ldb,
+        double *beta,
+        double *C, int ldc,
+        void* stream);
+int HIP_dsymm(
+        void* side, void* uplo,
+        int m, int n,
+        double *alpha,
+        const double *A, int lda,
+        const double *B, int ldb,
+        double *beta,
+        double *C, int ldc,
+        void* stream);
+int HIP_dsyr2k(
+        void* uplo, void* trans,
+        int n, int k,
+        double *alpha,
+        const double *A, int lda,
+        const double *B, int ldb,
+        double *beta,
+        double *C, int ldc,
+        void* stream);
+int HIP_dsyrk(
+        void* uplo, void* trans,
+        int n, int k,
+        double *alpha,
+        const double *A, int lda,
+        double *beta,
+        double *B, int ldb,
+        void* stream);
+int HIP_dtrmm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        double *alpha,
+        const double *A, int lda,
+        double *B, int ldb,
+        void* stream);
+int HIP_dtrsm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        double *alpha,
+        const double *A, int lda,
+        double *B, int ldb,
+        void* stream);
+
+int HIP_sgemm(
+        void* transa, void* transb,
+        int m, int n, int k,
+        float *alpha,
+        const float *A, int lda,
+        const float *B, int ldb,
+        float *beta,
+        float *C, int ldc,
+        void* stream);
+int HIP_ssymm(
+        void* side, void* uplo,
+        int m, int n,
+        float *alpha,
+        const float *A, int lda,
+        const float *B, int ldb,
+        float *beta,
+        float *C, int ldc,
+        void* stream);
+int HIP_ssyr2k(
+        void* uplo, void* trans,
+        int n, int k,
+        float *alpha,
+        const float *A, int lda,
+        const float *B, int ldb,
+        float *beta,
+        float *C, int ldc,
+        void* stream);
+int HIP_ssyrk(
+        void* uplo, void* trans,
+        int n, int k,
+        float *alpha,
+        const float *A, int lda,
+        float *beta,
+        float *B, int ldb,
+        void* stream);
+int HIP_strmm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        float *alpha,
+        const float *A, int lda,
+        float *B, int ldb,
+        void* stream);
+int HIP_strsm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        float *alpha,
+        const float *A, int lda,
+        float *B, int ldb,
+        void* stream);
+
+int HIP_zgemm(
+        void* transa, void* transb,
+        int m, int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_zhemm(
+        void* side, void* uplo,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_zher2k(
+        void* uplo, void* trans,
+        int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        double *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_zherk(
+        void* uplo, void* trans,
+        int n, int k,
+        double *alpha,
+        const void *A, int lda,
+        double *beta,
+        void *B, int ldb,
+        void* stream);
+int HIP_zsymm(
+        void* side, void* uplo,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_zsyr2k(
+        void* uplo, void* trans,
+        int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        const void *B, int ldb,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_zsyrk(
+        void* uplo, void* trans,
+        int n, int k,
+        void *alpha,
+        const void *A, int lda,
+        void *beta,
+        void *C, int ldc,
+        void* stream);
+int HIP_ztrmm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        void *B, int ldb,
+        void* stream);
+int HIP_ztrsm(
+        void* side, void* uplo,
+        void* transa, void* diag,
+        int m, int n,
+        void *alpha,
+        const void *A, int lda,
+        void *B, int ldb,
+        void* stream);
+
+END_MODULE
diff --git a/hipblas/include/CMakeLists.txt b/hipblas/include/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec1b8e3b459333670f40b5c2389a37e6c435f1fb
--- /dev/null
+++ b/hipblas/include/CMakeLists.txt
@@ -0,0 +1,66 @@
+###
+#
+# @file CMakeLists.txt
+#
+# @copyright 2009-2014 The University of Tennessee and The University of
+#                      Tennessee Research Foundation. All rights reserved.
+# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+#                      Univ. Bordeaux. All rights reserved.
+#
+###
+#
+#  @project CHAMELEON
+#  CHAMELEON is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+# @version 1.2.0
+#  @author Florent Pruvost
+#  @author Mathieu Faverge
+#  @author Loris Lucido
+#  @date 2023-01-30
+#
+###
+
+# Generate header files
+# ---------------------
+set(HIPBLAS_HDRS_GENERATED "")
+set(ZHDR
+    hipblas/hipblas_z.h
+)
+precisions_rules_py(
+  HIPBLAS_HDRS_GENERATED "${ZHDR}"
+  TARGETDIR hipblas
+  PRECISIONS "s;d;c;z;zc;ds" )
+
+# Define the list of headers
+# --------------------------
+set(HIPBLAS_HDRS
+    hipblas.h
+    )
+
+# Add generated headers
+# ---------------------
+foreach( hdr_file ${HIPBLAS_HDRS_GENERATED} )
+  list(APPEND HIPBLAS_HDRS ${CMAKE_CURRENT_BINARY_DIR}/${hdr_file})
+endforeach()
+
+# Force generation of headers
+# ---------------------------
+add_custom_target(hipblas_include ALL SOURCES ${HIPBLAS_HDRS})
+set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};hipblas_include" CACHE INTERNAL "List of targets of sources")
+
+# Installation
+# ------------
+install( FILES hipblas.h
+         DESTINATION include )
+
+install( FILES ${HIPBLAS_HDRS}
+         DESTINATION include/hipblas )
+
+###
+### END CMakeLists.txt
+###
diff --git a/hipblas/include/hipblas.h b/hipblas/include/hipblas.h
new file mode 100644
index 0000000000000000000000000000000000000000..333bbc2d91801f05404d0655ec6e15848edf849f
--- /dev/null
+++ b/hipblas/include/hipblas.h
@@ -0,0 +1,82 @@
+/**
+ *
+ * @file hipblas.h
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon GPU kernels main header
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @author Nathalie Furmento
+ * @author Loris Lucido
+ * @date 2023-01-30
+ * @precisions normal z -> c d s
+ *
+ */
+#ifndef _hipblas_h_
+#define _hipblas_h_
+
+#include "chameleon/config.h"
+
+#if !defined(CHAMELEON_USE_HIP)
+#error "This file should not be included"
+#endif
+
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+
+#include <hip/hip_runtime.h>
+#include <hip/hip_complex.h>
+
+#include <hipblas/hipblas.h>
+
+#define HIPBLAS_SADDR(_a_) (&(_a_))
+#define HIPBLAS_VALUE(_a_) (_a_)
+
+/**
+ * CHAMELEON types and constants
+ */
+#include "chameleon/types.h"
+#include "chameleon/struct.h"
+#include "chameleon/constants.h"
+
+/**
+ * HIP BLAS headers
+ */
+BEGIN_C_DECLS
+
+#include "hipblas/hipblas_z.h"
+#include "hipblas/hipblas_d.h"
+#include "hipblas/hipblas_c.h"
+#include "hipblas/hipblas_s.h"
+
+END_C_DECLS
+
+/**
+ * Coreblas Error
+ */
+#define hipblas_error(k, str) fprintf(stderr, "%s: Parameter %d / %s\n", __func__, k, str)
+
+/**
+ *  LAPACK Constants
+ */
+BEGIN_C_DECLS
+
+extern char *chameleon_lapack_constants[];
+#define chameleon_lapack_const(chameleon_const) chameleon_lapack_constants[chameleon_const][0]
+
+extern int chameleon_hipblas_constants[];
+#define chameleon_hipblas_const(chameleon_const) chameleon_hipblas_constants[chameleon_const]
+
+END_C_DECLS
+
+#endif /* _hipblas_h_ */
diff --git a/hipblas/include/hipblas/hipblas_z.h b/hipblas/include/hipblas/hipblas_z.h
new file mode 100644
index 0000000000000000000000000000000000000000..647a0a7e5b68e8afa198b289a667eb0038e286cd
--- /dev/null
+++ b/hipblas/include/hipblas/hipblas_z.h
@@ -0,0 +1,38 @@
+/**
+ *
+ * @file hipblas_z.h
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon GPU CHAMELEON_Complex64_t kernels header
+ *
+ * @version 1.2.0
+ * @author Florent Pruvost
+ * @author Mathieu Faverge
+ * @author Loris Lucido
+ * @date 2023-01-30
+ * @precisions normal z -> c d s
+ *
+ */
+#ifndef _hipblas_z_h_
+#define _hipblas_z_h_
+
+/**
+ *  Declarations of hip kernels - alphabetical order
+ */
+int HIP_zgemm(  cham_trans_t transa, cham_trans_t transb, int m, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle );
+int HIP_zhemm(  cham_side_t side, cham_uplo_t uplo, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle );
+int HIP_zher2k( cham_uplo_t uplo, cham_trans_t trans, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const double *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle );
+int HIP_zherk(  cham_uplo_t uplo, cham_trans_t trans, int n, int k, const double *alpha, const hipDoubleComplex *A, int lda, const double *beta, hipDoubleComplex *B, int ldb, hipblasHandle_t handle );
+int HIP_zsymm(  cham_side_t side, cham_uplo_t uplo, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle );
+int HIP_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle );
+int HIP_zsyrk(  cham_uplo_t uplo, cham_trans_t trans, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle );
+int HIP_ztrmm(  cham_side_t side, cham_uplo_t uplo, cham_trans_t transa, cham_diag_t diag, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, hipDoubleComplex *B, int ldb, hipblasHandle_t handle );
+int HIP_ztrsm(  cham_side_t side, cham_uplo_t uplo, cham_trans_t transa, cham_diag_t diag, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, hipDoubleComplex *B, int ldb, hipblasHandle_t handle );
+
+#endif /* _hipblas_z_h_ */
diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in
index 2b02d4bf4c9a08d2969bde5bd89bf0d162278112..34b3f3febe2558bd3eca466e7b9fcdd21d8dbeb1 100644
--- a/include/chameleon/config.h.in
+++ b/include/chameleon/config.h.in
@@ -16,7 +16,8 @@
  * @author Mathieu Faverge
  * @author Philippe Virouleau
  * @author Raphael Boucherie
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @date 2023-01-30
  *
  */
 #ifndef CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED
@@ -54,6 +55,7 @@
 /* GPU Support */
 #cmakedefine CHAMELEON_USE_CUDA
 #cmakedefine CHAMELEON_USE_CUBLAS
+#cmakedefine CHAMELEON_USE_HIP
 
 /* Hmat-oss */
 #cmakedefine CHAMELEON_USE_HMAT
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index 2b4d0a1db651921f94fe509cd5976800e69c162a..0968de66d35b777f2efbbc2d75ebfdbc9ccdb475 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -24,7 +24,9 @@
 #  @author Florent Pruvost
 #  @author Samuel Thibault
 #  @author Matthieu Kuhn
-#  @date 2022-02-22
+#  @author Loris Lucido
+#  @author Terry Cojean
+#  @date 2023-01-30
 #
 ###
 cmake_minimum_required(VERSION 3.1)
@@ -147,6 +149,13 @@ if ( STARPU_FOUND )
     endif()
   endif()
 
+  if (CHAMELEON_USE_HIP AND NOT CHAMELEON_SIMULATION)
+    check_function_exists(starpu_hipblas_get_local_handle HAVE_STARPU_HIPBLAS_GET_LOCAL_HANDLE)
+    if ( NOT HAVE_STARPU_HIPBLAS_GET_LOCAL_HANDLE )
+      message(FATAL_ERROR "The detected StarPU library does not include hipblas support. Please disable CHAMELEON_USE_HIP_{CUDA,ROC} or provide a StarPU library with hipblas support.")
+    endif()
+  endif()
+
   morse_cmake_required_unset()
 endif ( STARPU_FOUND )
 
@@ -268,6 +277,9 @@ if (NOT CHAMELEON_SIMULATION)
   if (CHAMELEON_USE_CUDA)
     add_dependencies(chameleon_starpu cudablas_include)
   endif()
+  if (CHAMELEON_USE_HIP)
+    add_dependencies(chameleon_starpu hipblas_include)
+  endif()
 endif()
 
 target_include_directories(chameleon_starpu PUBLIC
@@ -294,6 +306,9 @@ if (NOT CHAMELEON_SIMULATION)
   if(CHAMELEON_USE_CUDA)
     target_link_libraries(chameleon_starpu PRIVATE cudablas)
   endif(CHAMELEON_USE_CUDA)
+  if(CHAMELEON_USE_HIP)
+    target_link_libraries(chameleon_starpu PRIVATE hipblas)
+  endif(CHAMELEON_USE_HIP)
 endif(NOT CHAMELEON_SIMULATION)
 target_link_libraries(chameleon_starpu PRIVATE MORSE::M)
 
@@ -307,6 +322,9 @@ if (NOT CHAMELEON_SIMULATION)
   if (CHAMELEON_USE_CUDA)
     target_link_libraries(chameleon_starpu PUBLIC CUDA::CUBLAS)
   endif()
+  if (CHAMELEON_USE_HIP)
+    target_link_libraries(chameleon_starpu PUBLIC HIP::HIPBLAS)
+  endif()
 endif()
 
 # export target
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index bc972bcf54562388b6c091049c21d972b79cadf9..bd0823a6f425e600243e83e1e51794d70209f08b 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -22,7 +22,9 @@
  * @author Gwenole Lucas
  * @author Philippe Swartvagher
  * @author Lucas Nesi
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @author Terry Cojean
+ * @date 2023-01-30
  * @precisions normal z -> c d s
  *
  */
@@ -61,7 +63,7 @@ cl_zgemm_cpu_func( void *descr[], void *cl_arg )
                  clargs->beta,  tileC );
 }
 
-#ifdef CHAMELEON_USE_CUDA
+#if defined(CHAMELEON_USE_CUDA)
 static void
 cl_zgemm_cuda_func( void *descr[], void *cl_arg )
 {
@@ -90,12 +92,48 @@ cl_zgemm_cuda_func( void *descr[], void *cl_arg )
         handle );
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_zgemm_hip_func( void *descr[], void *cl_arg )
+{
+    struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)cl_arg;
+    hipblasHandle_t          handle = starpu_hipblas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileC->format & CHAMELEON_TILE_FULLRANK );
+
+    HIP_zgemm(
+        clargs->transA, clargs->transB,
+        clargs->m, clargs->n, clargs->k,
+        (hipDoubleComplex*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        (hipDoubleComplex*)&(clargs->beta),
+        tileC->mat, tileC->ld,
+        handle );
+
+    return;
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( zgemm, cl_zgemm_cpu_func, cl_zgemm_hip_func, STARPU_HIP_ASYNC )
+#else
 CODELETS( zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 void INSERT_TASK_zgemm_Astat( const RUNTIME_option_t *options,
                               cham_trans_t transA, cham_trans_t transB,
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index b7f70ca6868e5a5e8c1543c5ff1c51204be7ec60..cff474bbf6c75e4fdb1108895d6ee76e8a72f0e8 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -57,7 +57,7 @@ cl_zhemm_cpu_func( void *descr[], void *cl_arg )
                  clargs->beta,  tileC );
 }
 
-#ifdef CHAMELEON_USE_CUDA
+#if defined(CHAMELEON_USE_CUDA)
 static void
 cl_zhemm_cuda_func( void *descr[], void *cl_arg )
 {
@@ -86,12 +86,46 @@ cl_zhemm_cuda_func( void *descr[], void *cl_arg )
         handle );
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_zhemm_hip_func( void *descr[], void *cl_arg )
+{
+    struct cl_zhemm_args_s *clargs = (struct cl_zhemm_args_s *)cl_arg;
+    hipblasHandle_t         handle = starpu_hipblas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileC->format & CHAMELEON_TILE_FULLRANK );
+
+    HIP_zhemm(
+        clargs->side, clargs->uplo,
+        clargs->m, clargs->n,
+        (hipDoubleComplex*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        (hipDoubleComplex*)&(clargs->beta),
+        tileC->mat, tileC->ld,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( zhemm, cl_zhemm_cpu_func, cl_zhemm_hip_func, STARPU_HIP_ASYNC )
+#else
 CODELETS( zhemm, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 void INSERT_TASK_zhemm_Astat( const RUNTIME_option_t *options,
                               cham_side_t side, cham_uplo_t uplo,
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 38babbe5a230611d595f4716c170f1a5618ad420..fa102852615bac660a9ea9f40367faa10cfc4f3a 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -48,7 +48,7 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
                 n, k, alpha, tileA, tileB, beta, tileC);
 }
 
-#ifdef CHAMELEON_USE_CUDA
+#if defined(CHAMELEON_USE_CUDA)
 static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
 {
     cublasHandle_t handle = starpu_cublas_get_local_handle();
@@ -75,13 +75,46 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
                  &beta,  tileC->mat, tileC->ld,
                  handle );
 }
-#endif /* CHAMELEON_USE_CUDA */
+#endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void cl_zher2k_hip_func(void *descr[], void *cl_arg)
+{
+    hipblasHandle_t handle = starpu_hipblas_get_local_handle();
+    cham_uplo_t uplo;
+    cham_trans_t trans;
+    int n;
+    int k;
+    hipDoubleComplex alpha;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    double beta;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
+
+    HIP_zher2k( uplo, trans,
+                 n, k,
+                 &alpha, tileA->mat, tileA->ld,
+                         tileB->mat, tileB->ld,
+                 &beta,  tileC->mat, tileC->ld,
+                 handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS(zher2k, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( zher2k, cl_zher2k_cpu_func, cl_zher2k_hip_func, STARPU_HIP_ASYNC )
+#else
+CODELETS( zher2k, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 894f0b0269754ba9e0c0171efda6abdaa59b34b0..29301b83097c8b61383c00861777c35917ac27cd 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -74,12 +74,38 @@ cl_zherk_cuda_func(void *descr[], void *cl_arg)
         handle );
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_zherk_hip_func(void *descr[], void *cl_arg)
+{
+    hipblasHandle_t handle = starpu_hipblas_get_local_handle();
+    struct cl_zherk_args_s *clargs = (struct cl_zherk_args_s *)cl_arg;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileC = cti_interface_get(descr[1]);
+
+    HIP_zherk(
+        clargs->uplo, clargs->trans, clargs->n, clargs->k,
+        &(clargs->alpha),
+        tileA->mat, tileA->ld,
+        &(clargs->beta),
+        tileC->mat, tileC->ld,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( zherk, cl_zherk_cpu_func, cl_zherk_hip_func, STARPU_HIP_ASYNC )
+#else
 CODELETS( zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 void INSERT_TASK_zherk( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_trans_t trans,
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 9fad20e13bdc75a10cde4bffb998343d71503fdb..54750f0b3f7de2f76083163b9a881db77d8066e3 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -58,7 +58,7 @@ cl_zsymm_cpu_func( void *descr[], void *cl_arg )
                  clargs->beta,  tileC );
 }
 
-#ifdef CHAMELEON_USE_CUDA
+#if defined(CHAMELEON_USE_CUDA)
 static void
 cl_zsymm_cuda_func( void *descr[], void *cl_arg )
 {
@@ -87,12 +87,46 @@ cl_zsymm_cuda_func( void *descr[], void *cl_arg )
         handle );
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_zsymm_hip_func( void *descr[], void *cl_arg )
+{
+    struct cl_zsymm_args_s *clargs = (struct cl_zsymm_args_s *)cl_arg;
+    hipblasHandle_t         handle = starpu_hipblas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileC->format & CHAMELEON_TILE_FULLRANK );
+
+    HIP_zsymm(
+        clargs->side, clargs->uplo,
+        clargs->m, clargs->n,
+        (hipDoubleComplex*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        (hipDoubleComplex*)&(clargs->beta),
+        tileC->mat, tileC->ld,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( zsymm, cl_zsymm_cpu_func, cl_zsymm_hip_func, STARPU_HIP_ASYNC )
+#else
 CODELETS( zsymm, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 void INSERT_TASK_zsymm_Astat( const RUNTIME_option_t *options,
                               cham_side_t side, cham_uplo_t uplo,
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index af47c87edf546b3a4d3005081c4acff8d61161fe..7d5ce81aadff62dd3866bee1a4ee9ecc63b354a0 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -48,7 +48,7 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
                  n, k, alpha, tileA, tileB, beta, tileC);
 }
 
-#ifdef CHAMELEON_USE_CUDA
+#if defined(CHAMELEON_USE_CUDA)
 static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
 {
     cublasHandle_t handle = starpu_cublas_get_local_handle();
@@ -75,13 +75,46 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
                  &beta,  tileC->mat, tileC->ld,
                  handle );
 }
-#endif /* CHAMELEON_USE_CUDA */
+#endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void cl_zsyr2k_hip_func(void *descr[], void *cl_arg)
+{
+    hipblasHandle_t handle = starpu_hipblas_get_local_handle();
+    cham_uplo_t uplo;
+    cham_trans_t trans;
+    int n;
+    int k;
+    hipDoubleComplex alpha;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    hipDoubleComplex beta;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
+
+    HIP_zsyr2k( uplo, trans,
+                 n, k,
+                 &alpha, tileA->mat, tileA->ld,
+                         tileB->mat, tileB->ld,
+                 &beta,  tileC->mat, tileC->ld,
+                 handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS(zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC)
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_hip_func, STARPU_HIP_ASYNC )
+#else
+CODELETS( zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 8d8dcdbe752e256a40ec8c914378dbc6356ba91f..d9b7e84fd00336b445a05eeac648957f61eb51e7 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -75,12 +75,38 @@ cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
         handle );
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_zsyrk_hip_func(void *descr[], void *cl_arg)
+{
+    hipblasHandle_t handle = starpu_hipblas_get_local_handle();
+    struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileC = cti_interface_get(descr[1]);
+
+    HIP_zsyrk(
+        clargs->uplo, clargs->trans, clargs->n, clargs->k,
+        (hipDoubleComplex*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        (hipDoubleComplex*)&(clargs->beta),
+        tileC->mat, tileC->ld,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_hip_func, STARPU_HIP_ASYNC )
+#else
 CODELETS( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_trans_t trans,
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index e51b81ab7b9b45028b7f63aa00f1643be0a948c6..3bb14d38bef1119024293da7ecdfb6f58a67c12f 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -53,7 +53,7 @@ cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
                  clargs->m, clargs->n, clargs->alpha, tileA, tileB );
 }
 
-#ifdef CHAMELEON_USE_CUDA
+#if defined(CHAMELEON_USE_CUDA)
 static void
 cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
 {
@@ -74,12 +74,38 @@ cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
         handle );
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_ztrmm_hip_func(void *descr[], void *cl_arg)
+{
+    struct cl_ztrmm_args_s *clargs = (struct cl_ztrmm_args_s *)cl_arg;
+    hipblasHandle_t         handle = starpu_hipblas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    HIP_ztrmm(
+        clargs->side, clargs->uplo, clargs->transA, clargs->diag,
+        clargs->m, clargs->n,
+        (hipDoubleComplex*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_hip_func, STARPU_HIP_ASYNC )
+#else
 CODELETS( ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index 0196649684a4652ff5686bc060aad7e7879a1f2b..19e83c01a0010bcd1d495932ab7c67e1ac3dc139 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -20,7 +20,9 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Gwenole Lucas
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @author Terry Cojean
+ * @date 2023-01-30
  * @precisions normal z -> c d s
  *
  */
@@ -54,7 +56,7 @@ cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
                  clargs->m, clargs->n, clargs->alpha, tileA, tileB );
 }
 
-#ifdef CHAMELEON_USE_CUDA
+#if defined(CHAMELEON_USE_CUDA)
 static void
 cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
 {
@@ -75,12 +77,40 @@ cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
         handle );
 }
 #endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_ztrsm_hip_func(void *descr[], void *cl_arg)
+{
+    struct cl_ztrsm_args_s *clargs = (struct cl_ztrsm_args_s*)cl_arg;
+    hipblasHandle_t         handle = starpu_hipblas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    HIP_ztrsm(
+        clargs->side, clargs->uplo, clargs->transA, clargs->diag,
+        clargs->m, clargs->n,
+        (hipDoubleComplex*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
+
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
+
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_hip_func, STARPU_HIP_ASYNC )
+#else
 CODELETS( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC )
+#endif
 
 void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c
index c83ab1283339f214c91c61f7643d75874e67658e..e2cdb399f401872d4712cb7894d350155994c098 100644
--- a/runtime/starpu/control/runtime_context.c
+++ b/runtime/starpu/control/runtime_context.c
@@ -16,6 +16,7 @@
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
+ * @author Terry Cojean
  * @date 2022-02-22
  *
  */
diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c
index 5a36b8a6f0a2643f148b09b90b900b9a323caa27..d527cd56b307ad2dc033bf7ecc878a26cddff523 100644
--- a/runtime/starpu/control/runtime_control.c
+++ b/runtime/starpu/control/runtime_control.c
@@ -19,7 +19,9 @@
  * @author Philippe Swartvagher
  * @author Samuel Thibault
  * @author Matthieu Kuhn
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @author Terry Cojean
+ * @date 2023-01-30
  *
  */
 #include "chameleon_starpu.h"
@@ -221,8 +223,12 @@ int RUNTIME_init( CHAM_context_t *chamctxt,
                                              );
 #endif
 
-#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA)
     starpu_cublas_init();
+#elif defined(CHAMELEON_USE_HIP)
+    starpu_hipblas_init();
+#endif
 #endif
 
     starpu_cham_tile_interface_init();
@@ -249,6 +255,9 @@ void RUNTIME_finalize( CHAM_context_t *chamctxt )
 #if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
     starpu_cublas_shutdown();
 #endif
+#if defined(CHAMELEON_USE_HIP) && !defined(CHAMELEON_SIMULATION)
+    starpu_hipblas_shutdown();
+#endif
 
 #if defined(CHAMELEON_USE_MPI)
     starpu_mpi_shutdown();
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index 72dd1b81d81b6830edc807e5fdb4a48c9276bd90..385adc0df81a1a87de81971fe3590853b5214287 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -19,7 +19,8 @@
  * @author Guillaume Sylvand
  * @author Raphael Boucherie
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @date 2023-01-30
  *
  */
 #include "chameleon_starpu.h"
@@ -138,6 +139,26 @@ void RUNTIME_free( void  *ptr,
 #endif
 }
 
+#if defined(CHAMELEON_USE_CUDA)
+
+#define gpuError_t              cudaError_t
+#define gpuHostRegister         cudaHostRegister
+#define gpuHostUnregister       cudaHostUnregister
+#define gpuHostRegisterPortable cudaHostRegisterPortable
+#define gpuSuccess              cudaSuccess
+#define gpuGetErrorString       cudaGetErrorString
+
+#elif defined(CHAMELEON_USE_HIP)
+
+#define gpuError_t              hipError_t
+#define gpuHostRegister         hipHostRegister
+#define gpuHostUnregister       hipHostUnregister
+#define gpuHostRegisterPortable hipHostRegisterPortable
+#define gpuSuccess              hipSuccess
+#define gpuGetErrorString       hipGetErrorString
+
+#endif
+
 /**
  *  Create data descriptor
  */
@@ -155,25 +176,27 @@ void RUNTIME_desc_create( CHAM_desc_t *desc )
     desc->schedopt = (void*)calloc(lnt*lmt,sizeof(starpu_data_handle_t));
     assert(desc->schedopt);
 
-#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
     /*
-     * Register allocated memory as CUDA pinned memory
+     * Register allocated memory as GPU pinned memory
      */
     if ( (desc->use_mat == 1) && (desc->register_mat == 1) )
     {
         int64_t eltsze = CHAMELEON_Element_Size(desc->dtyp);
         size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) * eltsze;
-        cudaError_t rc;
+        gpuError_t rc;
 
         /* Register the matrix as pinned memory */
-        rc = cudaHostRegister( desc->mat, size, cudaHostRegisterPortable );
-        if ( rc != cudaSuccess )
+        rc = gpuHostRegister( desc->mat, size, gpuHostRegisterPortable );
+        if ( rc != gpuSuccess )
         {
             /* Disable the unregister as register failed */
             desc->register_mat = 0;
-            chameleon_warning("RUNTIME_desc_create(StarPU): cudaHostRegister - ", cudaGetErrorString( rc ));
+            chameleon_warning("RUNTIME_desc_create(StarPU): gpuHostRegister - ", gpuGetErrorString( rc ));
         }
     }
+#endif
 #endif
 
     if (desc->ooc) {
@@ -247,18 +270,20 @@ void RUNTIME_desc_destroy( CHAM_desc_t *desc )
             }
         }
 
-#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
         if ( (desc->use_mat == 1) && (desc->register_mat == 1) )
         {
             /* Unmap the pinned memory associated to the matrix */
-            if (cudaHostUnregister(desc->mat) != cudaSuccess)
+            if (gpuHostUnregister(desc->mat) != gpuSuccess)
             {
                 chameleon_warning("RUNTIME_desc_destroy(StarPU)",
-                                  "cudaHostUnregister failed to unregister the "
+                                  "gpuHostUnregister failed to unregister the "
                                   "pinned memory associated to the matrix");
             }
         }
-#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif
+#endif
 
         free(desc->schedopt);
     }
diff --git a/runtime/starpu/control/runtime_workspace.c b/runtime/starpu/control/runtime_workspace.c
index fb209146e3c7e44b4b129379bf23536620322cf9..ad9b5cd9fd0d0abb8f7760b9a66c0936473f1281 100644
--- a/runtime/starpu/control/runtime_workspace.c
+++ b/runtime/starpu/control/runtime_workspace.c
@@ -15,11 +15,26 @@
  * @author Cedric Augonnet
  * @author Mathieu Faverge
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @date 2023-01-30
  *
  */
 #include "chameleon_starpu.h"
 
+#if defined(CHAMELEON_USE_CUDA)
+#define GPU_WORKER_TYPE STARPU_CUDA_WORKER
+#define gpuMallocHost   cudaMallocHost
+#define gpuMalloc       cudaMalloc
+#define gpuFreeHost     cudaFreeHost
+#define gpuFree         cudaFree
+#elif defined(CHAMELEON_USE_HIP)
+#define GPU_WORKER_TYPE STARPU_HIP_WORKER
+#define gpuMallocHost   hipMallocHost
+#define gpuMalloc       hipMalloc
+#define gpuFreeHost     hipFreeHost
+#define gpuFree         hipFree
+#endif
+
 static void RUNTIME_allocate_workspace_on_workers(void *arg)
 {
     struct chameleon_starpu_ws_s *workspace = arg;
@@ -28,9 +43,10 @@ static void RUNTIME_allocate_workspace_on_workers(void *arg)
 
     int id = starpu_worker_get_id();
 
-#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
     type = starpu_worker_get_type(id);
-    if (type == STARPU_CUDA_WORKER)
+    if ( type == GPU_WORKER_TYPE )
     {
         int memory_location = workspace->memory_location;
 
@@ -39,14 +55,15 @@ static void RUNTIME_allocate_workspace_on_workers(void *arg)
             /* Use pinned memory because the kernel is very likely
              * to transfer these data between the CPU and the GPU.
              * */
-            cudaMallocHost(&workspace->workspaces[id], workspace->size);
+            gpuMallocHost( &workspace->workspaces[id], workspace->size );
         }
         else {
             /* Allocate on the device */
-            cudaMalloc(&workspace->workspaces[id], workspace->size);
+            gpuMalloc( &workspace->workspaces[id], workspace->size );
         }
     }
     else
+#endif
 #endif
     {
         /* This buffer should only be used within the CPU kernel, so
@@ -54,7 +71,7 @@ static void RUNTIME_allocate_workspace_on_workers(void *arg)
         workspace->workspaces[id] = malloc(workspace->size);
     }
 
-    assert(workspace->workspaces[id]);
+    assert( workspace->workspaces[id] );
 }
 
 
@@ -65,24 +82,26 @@ static void RUNTIME_free_workspace_on_workers(void *arg)
     (void)type;
     int id = starpu_worker_get_id();
 
-#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
-    type = starpu_worker_get_type(id);
-    if (type == STARPU_CUDA_WORKER)
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
+    type = starpu_worker_get_type( id );
+    if ( type == GPU_WORKER_TYPE )
     {
         int memory_location = workspace->memory_location;
 
         if (memory_location == CHAMELEON_HOST_MEM)
         {
-            cudaFreeHost(workspace->workspaces[id]);
+            gpuFreeHost( workspace->workspaces[id] );
         }
         else {
-            cudaFree(workspace->workspaces[id]);
+            gpuFree( workspace->workspaces[id] );
         }
     }
     else
+#endif
 #endif
     {
-        free(workspace->workspaces[id]);
+        free( workspace->workspaces[id] );
     }
 
     workspace->workspaces[id] = NULL;
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index e2d964ccc98ae105c4bd79ceac832f77c8ea89e2..e92e23387f1f20912e3d28f910047c2d9629dfd4 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -17,7 +17,9 @@
  * @author Florent Pruvost
  * @author Philippe Swartvagher
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @author Terry Cojean
+ * @date 2023-01-30
  *
  */
 #ifndef _chameleon_starpu_h_
@@ -71,6 +73,14 @@
 #include <starpu_cublas_v2.h>
 #endif
 
+#if defined(CHAMELEON_USE_HIP) && !defined(CHAMELEON_SIMULATION)
+#include <starpu_scheduler.h>
+#include <starpu_hip.h>
+
+#include <hipblas/hipblas.h>
+#include <starpu_hipblas.h>
+#endif
+
 #if defined(CHAMELEON_SIMULATION)
 # if !defined(STARPU_SIMGRID)
 #  error "Starpu was not built with simgrid support (--enable-simgrid). Can not run Chameleon with simulation support."
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 82afe7b42cea6f37c0bd2ede6aeaf0041048fa34..673d8c5d7a6ea287ed2fa4e948c6d8820f35a00a 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -17,7 +17,8 @@
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Alycia Lisito
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @date 2023-01-30
  * @precisions normal z -> c d s
  *
  */
@@ -31,11 +32,17 @@
 #if !defined(CHAMELEON_SIMULATION)
 #include "coreblas/coreblas_z.h"
 #include "coreblas/coreblas_ztile.h"
+
 #if defined(CHAMELEON_USE_CUDA)
 #include "cudablas.h"
 #endif
+
+#if defined(CHAMELEON_USE_HIP)
+#include "hipblas.h"
 #endif
 
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
 /*
  * BLAS 1 functions
  */
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index d5c923355299277f32747c76f5534bf3edbd5ce0..00808fc52fec20078744d3603bec4cb2be680a24 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -16,7 +16,8 @@
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @author Loris Lucido
+ * @date 2023-01-30
  *
  */
 #ifndef _runtime_codelets_h_
@@ -25,14 +26,31 @@
 #include "chameleon/config.h"
 #include "runtime_codelet_profile.h"
 
-//#undef STARPU_CUDA_ASYNC
-#ifdef STARPU_CUDA_ASYNC
+#if defined(STARPU_CUDA_ASYNC)
 #define CODELET_CUDA_FLAGS(flags) .cuda_flags = {(flags)},
 #else
 #define CODELET_CUDA_FLAGS(flags)
 #endif
 
-#define CODELETS_ALL(cl_name, cpu_func_name, cuda_func_name, _original_location_, cuda_flags) \
+#if defined(STARPU_HIP_ASYNC)
+#define CODELET_HIP_FLAGS(flags) .hip_flags = {(flags)},
+#else
+#define CODELET_HIP_FLAGS(flags)
+#endif
+
+#if defined(CHAMELEON_USE_CUDA)
+#define CODELET_GPU_FIELDS( gpu_func_name, gpu_flags )                 \
+        CODELET_CUDA_FLAGS( gpu_flags )                                \
+        .cuda_func = ((gpu_func_name)),
+#elif defined(CHAMELEON_USE_HIP)
+#define CODELET_GPU_FIELDS( gpu_func_name, gpu_flags )                 \
+        CODELET_HIP_FLAGS( gpu_flags )                                 \
+        .hip_funcs = {(gpu_func_name)},
+#else
+#define CODELET_GPU_FIELDS( gpu_func_name, gpu_flags )
+#endif
+
+#define CODELETS_ALL(cl_name, cpu_func_name, gpu_func_name, _original_location_, gpu_flags) \
     struct starpu_perfmodel cl_##cl_name##_fake = {                     \
         .type   = STARPU_HISTORY_BASED,                                 \
         .symbol = "fake_"#cl_name                                       \
@@ -46,8 +64,7 @@
     struct starpu_codelet cl_##cl_name = {                              \
         .where     = (_original_location_),                             \
         .cpu_func  = ((cpu_func_name)),                                 \
-        CODELET_CUDA_FLAGS(cuda_flags)                                  \
-        .cuda_func = ((cuda_func_name)),                                \
+        CODELET_GPU_FIELDS( gpu_func_name, gpu_flags )                  \
         .nbuffers  = STARPU_VARIABLE_NBUFFERS,                          \
         .model     = &cl_##cl_name##_model,                             \
         .name      = #cl_name                                           \
@@ -77,8 +94,13 @@
     CODELETS_ALL( name, cpu_func_name, NULL, STARPU_CPU, 0 )
 #endif
 
-#define CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_ALL( name, cpu_func_name, cuda_func_name, STARPU_CPU  | STARPU_CUDA, cuda_flags )
+#if defined(CHAMELEON_USE_HIP)
+#define CODELETS_GPU(name, cpu_func_name, gpu_func_name, gpu_flags) \
+    CODELETS_ALL( name, cpu_func_name, gpu_func_name, STARPU_CPU | STARPU_HIP, gpu_flags )
+#else
+#define CODELETS_GPU(name, cpu_func_name, gpu_func_name, gpu_flags) \
+    CODELETS_ALL( name, cpu_func_name, gpu_func_name, STARPU_CPU | STARPU_CUDA, gpu_flags )
+#endif
 
 #define CODELETS_ALL_HEADER(name)                            \
      CHAMELEON_CL_CB_HEADER(name);                           \
@@ -89,29 +111,35 @@
      void cl_##name##_restore_where(void)
 
 #if defined(CHAMELEON_SIMULATION)
-#if defined(CHAMELEON_USE_CUDA)
-#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_GPU(name, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, cuda_flags)
+
+#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
+#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \
+    CODELETS_GPU(name, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, gpu_flags)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #else
-#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
+#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \
     CODELETS_CPU(name, (starpu_cpu_func_t) 1)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #endif
-#elif defined(CHAMELEON_USE_CUDA)
-#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags)
+
+#else /* defined(CHAMELEON_SIMULATION) */
+
+#if defined(CHAMELEON_USE_CUDA) //|| defined(CHAMELEON_USE_HIP)
+#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \
+    CODELETS_GPU(name, cpu_func_name, gpu_func_name, gpu_flags)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #else
-#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
+#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \
     CODELETS_CPU(name, cpu_func_name)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #endif
 
+#endif
+
 CODELETS_HEADER(map);
 
 #endif /* _runtime_codelets_h_ */
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 7c979a91710167fff8c41bd92d58a62804f7e5ec..9dcacf75e7ed0f0ff4699ab38e7d7497f471f227 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -11,6 +11,9 @@ endif()
 if (CHAMELEON_USE_CUDA AND CUDA_FOUND)
   set(N_GPUS 0 1)
 endif()
+if (CHAMELEON_USE_HIP AND HIP_FOUND)
+  set(N_GPUS 0 1)
+endif()
 if (CHAMELEON_SIMULATION)
   set(TEST_CATEGORIES simushm)
   if (CHAMELEON_USE_CUDA)