diff --git a/CMakeLists.txt b/CMakeLists.txt index 6715cc43f625a0f7e810f6401938e4ced372a364..f62ddbdf0139e68c4a5a56a6ab37847562ca39b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,8 @@ # @author Samuel Thibault # @author Guillaume Sylvand # @author Alycia Lisito -# @date 2022-02-22 +# @author Loris Lucido +# @date 2023-01-30 # ### cmake_minimum_required(VERSION 3.3) @@ -188,6 +189,11 @@ if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU ) set(CHAMELEON_ENABLE_CUDA ON FORCE) endif() +set(CHAMELEON_ENABLE_HIP OFF CACHE INTERNAL "Tells if HIP might be supported by the runtime") +if ( CHAMELEON_SCHED_STARPU ) + set(CHAMELEON_ENABLE_HIP ON FORCE) +endif() + # Additional options # ------------------ @@ -217,6 +223,20 @@ if (CHAMELEON_ENABLE_CUDA AND NOT CHAMELEON_USE_CUDA) message("-- ${BoldGreen}CHAMELEON_USE_CUDA is set to OFF, turn it ON to use CUDA (unsupported by Quark)${ColourReset}") endif() +cmake_dependent_option(CHAMELEON_USE_HIP_CUDA + "Enable HIP kernels with CUDA backend" OFF + "CHAMELEON_ENABLE_HIP" OFF) +cmake_dependent_option(CHAMELEON_USE_HIP_ROC + "Enable HIP kernels with ROCclr backend" OFF + "CHAMELEON_ENABLE_HIP" OFF) + +set(CHAMELEON_USE_HIP OFF CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled") +if( CHAMELEON_USE_HIP_CUDA OR CHAMELEON_USE_HIP_ROC ) + set(CHAMELEON_USE_HIP ON) +else() + set(CHAMELEON_USE_HIP OFF) +endif() + # Enable Hmat-OSS kernels option(CHAMELEON_USE_HMAT "Enable hmat kernels" OFF) cmake_dependent_option(CHAMELEON_HMAT_EXTERNAL @@ -405,6 +425,121 @@ if(NOT CHAMELEON_SIMULATION) endif(CHAMELEON_USE_CUDA) + # CHAMELEON depends on HIP/HIPBLAS + #---------------------------------- + if (CHAMELEON_USE_HIP) + if (CHAMELEON_USE_HIP_ROC) + + find_package(HIP REQUIRED) + find_package(hipblas REQUIRED) + + elseif(CHAMELEON_USE_HIP_CUDA) + + # for CUDA backend we need a user-provided hipblas library as the one + # shipped with ROCm only works with AMD GPU + set(CHAMELEON_HIPBLAS_PATH "" CACHE PATH "Directory of hipblas installation with CUDA support") + if(NOT CHAMELEON_HIPBLAS_PATH) + message(FATAL_ERROR "CHAMELEON_USE_HIP_CUDA requires you to set" + " CHAMELEON_HIPBLAS_PATH to a hipblas installation compiled with CUDA") + endif() + + if(NOT DEFINED HIP_PATH) + if(DEFINED ENV{HIP_PATH}) + set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + else() + message(FATAL_ERROR "Please set HIP_PATH to your HIP installation.") + endif() + endif() + + find_package(CUDA REQUIRED) + + # location of FindHIP.cmake depending on how HIP_PATH is set + # HIP_PATH points to /opt/rocm-<version> + set(CMAKE_MODULE_PATH "${HIP_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH}) + # HIP_PATH points to /opt/rocm-<version>/hip + set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) + + # We use MODULE keyword to force using FindHIP.cmake instead of + # hip-config.cmake which doesn't support CUDA backend + find_package(HIP REQUIRED MODULE) + list( APPEND HIP_INCLUDE_DIRS "${HIP_PATH}/include") + + if(CHAMELEON_HIPBLAS_PATH) + list(APPEND HIPBLAS_INCLUDE_DIRS "${CHAMELEON_HIPBLAS_PATH}/include") + list(APPEND HIPBLAS_LIBRARIES "${CHAMELEON_HIPBLAS_PATH}/lib/libhipblas.so") + else() + message(FATAL_ERROR "Please set CHAMELEON_HIPBLAS_PATH to your HIPBLAS installation.") + endif() + endif() + + if (HIP_FOUND) + message("-- ${Blue}Add definition CHAMELEON_USE_HIP" + " - Activate HIP in Chameleon${ColourReset}") + # create imported target because not provided with old cmake + add_library(HIP::HIP INTERFACE IMPORTED) + add_library(HIP::HIPBLAS INTERFACE IMPORTED) + + if (CHAMELEON_USE_HIP_CUDA) + target_compile_definitions(HIP::HIP INTERFACE "__HIP_PLATFORM_NVIDIA__") + set(HIP_INCLUDE_DIRS "${HIP_INCLUDE_DIRS};${CUDA_INCLUDE_DIRS}") + set(HIP_LIBRARIES "${HIP_LIBRARIES};${CUDA_LIBRARIES}") + endif() + + if (HIP_INCLUDE_DIRS) + set_target_properties(HIP::HIP PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HIP_INCLUDE_DIRS}") + if (HIPBLAS_INCLUDE_DIRS) + set_target_properties(HIP::HIPBLAS PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HIPBLAS_INCLUDE_DIRS}") + else() + message(WARNING "CHAMELEON_USE_HIP requires" + "\n HIPBLAS_INCLUDE_DIRS to be found. Be sure you have" + "\n hipblas headers with your distribution of HIP.") + endif() + else() + message(WARNING "CHAMELEON_USE_HIP requires" + "\n HIP_INCLUDE_DIRS to be found. Be sure you have" + "\n hip headers with your distribution of HIP.") + endif() + if (HIP_LIBRARIES) + set_target_properties(HIP::HIP PROPERTIES INTERFACE_LINK_LIBRARIES "${HIP_LIBRARIES}") + if (HIPBLAS_LIBRARIES) + set_target_properties(HIP::HIPBLAS PROPERTIES INTERFACE_LINK_LIBRARIES "${HIPBLAS_LIBRARIES}") + target_link_libraries(HIP::HIPBLAS INTERFACE HIP::HIP) + message("-- ${Blue}Add definition CHAMELEON_USE_HIPBLAS" + " - Use GPU kernels from hipblas${ColourReset}") + else() + message(FATAL_ERROR "CHAMELEON_USE_HIP requires" + "\n HIPBLAS_LIBRARIES to be found. Be sure you have" + "\n libhipblas with your distribution of HIP.") + endif() + else() + message(FATAL_ERROR "CHAMELEON_USE_HIP requires" + "\n HIP_LIBRARIES to be found. Be sure you have" + "\n libamdhip64 with your distribution of ROCm.") + endif() + if (CHAMELEON_USE_HIP_CUDA AND NOT CUDA_LIBRARIES) + message(FATAL_ERROR "CHAMELEON_USE_HIP_CUDA requires" + "\n CUDA_LIBRARIES to be found. Be sure you have" + "\n libcuda with your distribution of CUDA.") + endif() + + message("-- HIP PATH: ${HIP_PATH}") + message("-- HIP version: ${HIP_VERSION_STRING}") + message("-- HIP platform: ${HIP_PLATFORM}") + message("-- HIP runtime: ${HIP_RUNTIME}") + message("-- HIP compiler: ${HIP_COMPILER}") + message("-- HIP include path: ${HIP_INCLUDE_DIRS}") + message("-- HIP libraries: ${HIP_LIBRARIES}") + message("-- HIPBLAS include path: ${HIPBLAS_INCLUDE_DIRS}") + message("-- HIPBLAS libraries: ${HIPBLAS_LIBRARIES}") + + morse_export_imported_target(HIP HIP hip chameleon) + morse_export_imported_target(HIP HIPBLAS hipblas chameleon) + else(HIP_FOUND) + message(FATAL_ERROR "CHAMELEON_USE_HIP requires HIP to be found") + endif (HIP_FOUND) + + endif(CHAMELEON_USE_HIP) + # CHAMELEON depends on MPI #------------------------- if (CHAMELEON_USE_MPI) @@ -565,6 +700,9 @@ if (NOT CHAMELEON_SIMULATION) if(CHAMELEON_USE_CUDA) add_subdirectory(cudablas) endif() + if(CHAMELEON_USE_HIP) + add_subdirectory(hipblas) + endif() endif() diff --git a/CTestConfig.cmake b/CTestConfig.cmake index 22b9048e0f586e80aaea116a9e8a3a3a5b8a5822..9202310090400c24b0f9773acd6476eb39da6466 100644 --- a/CTestConfig.cmake +++ b/CTestConfig.cmake @@ -53,4 +53,8 @@ if(NOT BUILDNAME) set(BUILDNAME "${BUILDNAME}-CUDA") endif(CHAMELEON_USE_CUDA) + if(CHAMELEON_USE_HIP) + set(BUILDNAME "${BUILDNAME}-HIP") + endif(CHAMELEON_USE_HIP) + endif() diff --git a/cmake_modules/PrintOpts.cmake b/cmake_modules/PrintOpts.cmake index 777d0c37f90c5c552ca79283dd31fb8a5303fa90..f50f01c7b16bad42db97504fb2353e1f274d627a 100644 --- a/cmake_modules/PrintOpts.cmake +++ b/cmake_modules/PrintOpts.cmake @@ -20,7 +20,8 @@ # @version 1.2.0 # @author Florent Pruvost # @author Mathieu Faverge -# @date 2022-02-22 +# @author Loris Lucido +# @date 2023-01-30 # ### @@ -74,6 +75,8 @@ set(dep_message "${dep_message}" "\n" " Implementation paradigm\n" " CUDA ................: ${CHAMELEON_USE_CUDA}\n" +" HIP-ROC .............: ${CHAMELEON_USE_HIP_ROC}\n" +" HIP-CUDA ............: ${CHAMELEON_USE_HIP_CUDA}\n" " MPI .................: ${CHAMELEON_USE_MPI}\n" "\n" " Runtime specific\n" diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 081868c78db8635c2c9473b36bbea1bf1ffb4e8a..59a24ff8eb4eb9e02cf765c1429b3cc4db7c009f 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -25,7 +25,8 @@ # @author Guillaume Sylvand # @author Raphael Boucherie # @author Alycia Lisito -# @date 2022-02-22 +# @author Loris Lucido +# @date 2023-01-30 # ### @@ -344,6 +345,12 @@ if (CHAMELEON_USE_CUDA) target_link_libraries(chameleon PUBLIC CUDA::CUBLAS) endif() endif() +if (CHAMELEON_USE_HIP) + if (NOT CHAMELEON_SIMULATION) + target_link_libraries(chameleon PUBLIC hipblas) + target_link_libraries(chameleon PUBLIC HIP::HIPBLAS) + endif() +endif() target_link_libraries(chameleon PUBLIC MORSE::M) set_property(TARGET chameleon PROPERTY Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/include") diff --git a/control/control.c b/control/control.c index d2ffcd37ca245f8c2741cb0b8e77eae35539fc82..374ec1c873b68005d31746575135b94b2c2805cd 100644 --- a/control/control.c +++ b/control/control.c @@ -19,7 +19,8 @@ * @author Philippe Virouleau * @author Samuel Thibault * @author Philippe Swartvagher - * @date 2022-02-22 + * @author Loris Lucido + * @date 2023-01-30 * *** * @@ -65,7 +66,7 @@ int __chameleon_init(int cores, int gpus) * @param[in] ncpus * Number of cores to use. * - * @param[in] ncudas + * @param[in] ngpus * Number of cuda devices to use. * * @param[in] nthreads_per_worker @@ -76,7 +77,7 @@ int __chameleon_init(int cores, int gpus) * @retval CHAMELEON_SUCCESS successful exit * */ -int __chameleon_initpar(int ncpus, int ncudas, int nthreads_per_worker) +int __chameleon_initpar(int ncpus, int ngpus, int nthreads_per_worker) { CHAM_context_t *chamctxt; @@ -113,14 +114,14 @@ int __chameleon_initpar(int ncpus, int ncudas, int nthreads_per_worker) # endif #endif -#if !defined(CHAMELEON_USE_CUDA) - if ( ncudas != 0 ) { - chameleon_warning("CHAMELEON_Init", "CHAMELEON_USE_CUDA is not defined, ncudas is forced to 0"); - ncudas = 0; +#if !defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_USE_HIP) + if ( ngpus != 0 ) { + chameleon_warning("CHAMELEON_Init", "CHAMELEON_USE_CUDA or CHAMELEON_USE_HIP are not defined, ngpus is forced to 0"); + ngpus = 0; } #endif - return RUNTIME_init( chamctxt, ncpus, ncudas, nthreads_per_worker ); + return RUNTIME_init( chamctxt, ncpus, ngpus, nthreads_per_worker ); } /** diff --git a/hipblas/CMakeLists.txt b/hipblas/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..87a384f7b4a14f7e28ba534b14fa7cbc738e1c6d --- /dev/null +++ b/hipblas/CMakeLists.txt @@ -0,0 +1,36 @@ +### +# +# @file CMakeLists.txt +# +# @copyright 2009-2014 The University of Tennessee and The University of +# Tennessee Research Foundation. All rights reserved. +# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# +### +# +# @project CHAMELEON +# CHAMELEON is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.2.0 +# @author Cedric Castagnede +# @author Emmanuel Agullo +# @author Mathieu Faverge +# @author Florent Pruvost +# @author Loris Lucido +# @date 2023-01-30 +# +### + +add_subdirectory(include) +add_subdirectory(compute) +add_subdirectory(eztrace_module) + +### +### END CMakeLists.txt +### diff --git a/hipblas/compute/CMakeLists.txt b/hipblas/compute/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..632049557a2206d22f83e7432268dd39f42ed303 --- /dev/null +++ b/hipblas/compute/CMakeLists.txt @@ -0,0 +1,91 @@ +### +# +# @file CMakeLists.txt +# +# @copyright 2009-2014 The University of Tennessee and The University of +# Tennessee Research Foundation. All rights reserved. +# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# +### +# +# @project CHAMELEON +# CHAMELEON is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.2.0 +# @author Florent Pruvost +# @author Guillaume Sylvand +# @author Mathieu Faverge +# @author Loris Lucido +# @date 2023-01-30 +# +### + +# Generate the chameleon sources for all possible precisions +# ------------------------------------------------------ +set(HIPBLAS_SRCS_GENERATED "") +set(ZSRC + hip_zgemm.c + hip_zhemm.c + hip_zher2k.c + hip_zherk.c + hip_zsymm.c + hip_zsyr2k.c + hip_zsyrk.c + hip_ztrmm.c + hip_ztrsm.c + ) + +precisions_rules_py( + HIPBLAS_SRCS_GENERATED "${ZSRC}" + PRECISIONS "${CHAMELEON_PRECISION}") + +set(HIPBLAS_SRCS + ${HIPBLAS_SRCS_GENERATED} + hipglobal.c + ) + +# Force generation of sources +# --------------------------- +add_custom_target(hipblas_sources ALL SOURCES ${HIPBLAS_SRCS}) +set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};hipblas_sources" CACHE INTERNAL "List of targets of sources") + +# Compile step +# ------------ +add_library(hipblas ${HIPBLAS_SRCS}) +set_target_properties(hipblas PROPERTIES VERSION ${CHAMELEON_VERSION}) +set_target_properties(hipblas PROPERTIES SOVERSION ${CHAMELEON_VERSION_MAJOR}) +add_dependencies(hipblas hipblas_include hipblas_sources) +target_include_directories(hipblas PUBLIC + $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/hipblas/include> + $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/hipblas/include> + $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/include> + $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/include> + $<INSTALL_INTERFACE:include>) +set_property(TARGET hipblas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") + +target_link_libraries(hipblas PRIVATE coreblas HIP::HIPBLAS) +target_link_libraries(hipblas PUBLIC MORSE::M) + +# export target coreblas +install(EXPORT hipblasTargets + NAMESPACE CHAMELEON:: + DESTINATION lib/cmake/chameleon + ) + +# installation +# ------------ +install(TARGETS hipblas + EXPORT hipblasTargets + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + ) + +### +### END CMakeLists.txt +### diff --git a/hipblas/compute/hip_zgemm.c b/hipblas/compute/hip_zgemm.c new file mode 100644 index 0000000000000000000000000000000000000000..8afd4b869f15431cae942946e1e565086b0349d5 --- /dev/null +++ b/hipblas/compute/hip_zgemm.c @@ -0,0 +1,46 @@ +/** + * + * @file hip_zgemm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_zgemm GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @author Loris Lucido + * @date 2023-01-30 + * @precisions normal z -> c d s + * + */ +#include "hipblas.h" + +int +HIP_zgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + const hipDoubleComplex *B, int ldb, + const hipDoubleComplex *beta, + hipDoubleComplex *C, int ldc, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZgemm( handle, + chameleon_hipblas_const(transa), chameleon_hipblas_const(transb), + m, n, k, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb, + HIPBLAS_VALUE(beta), C, ldc ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hip_zhemm.c b/hipblas/compute/hip_zhemm.c new file mode 100644 index 0000000000000000000000000000000000000000..30d722430f1660fc470380358237f7b43fa3b165 --- /dev/null +++ b/hipblas/compute/hip_zhemm.c @@ -0,0 +1,45 @@ +/** + * + * @file hip_zhemm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_zhemm GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c + * + */ +#include "hipblas.h" + +int +HIP_zhemm( cham_side_t side, cham_uplo_t uplo, + int m, int n, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + const hipDoubleComplex *B, int ldb, + const hipDoubleComplex *beta, + hipDoubleComplex *C, int ldc, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZhemm( handle, + chameleon_hipblas_const(side), chameleon_hipblas_const(uplo), + m, n, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb, + HIPBLAS_VALUE(beta), C, ldc ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hip_zher2k.c b/hipblas/compute/hip_zher2k.c new file mode 100644 index 0000000000000000000000000000000000000000..601df7fe38fab3009b3fb60a37228c50839077e6 --- /dev/null +++ b/hipblas/compute/hip_zher2k.c @@ -0,0 +1,45 @@ +/** + * + * @file hip_zher2k.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_zher2k GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c + * + */ +#include "hipblas.h" + +int +HIP_zher2k( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + const hipDoubleComplex *B, int ldb, + const double *beta, + hipDoubleComplex *C, int ldc, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZher2k( handle, + chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans), + n, k, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb, + HIPBLAS_VALUE(beta), C, ldc ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hip_zherk.c b/hipblas/compute/hip_zherk.c new file mode 100644 index 0000000000000000000000000000000000000000..56693b30767031443f99b8f1993f64710e9a3317 --- /dev/null +++ b/hipblas/compute/hip_zherk.c @@ -0,0 +1,43 @@ +/** + * + * @file hip_zherk.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_zherk GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c + * + */ +#include "hipblas.h" + +int +HIP_zherk( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const double *alpha, + const hipDoubleComplex *A, int lda, + const double *beta, + hipDoubleComplex *B, int ldb, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZherk( handle, + chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans), + n, k, + HIPBLAS_VALUE(alpha), A, lda, + HIPBLAS_VALUE(beta), B, ldb ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hip_zsymm.c b/hipblas/compute/hip_zsymm.c new file mode 100644 index 0000000000000000000000000000000000000000..146412570c0fcb0a1bb3d2a1c57438c25d0e2962 --- /dev/null +++ b/hipblas/compute/hip_zsymm.c @@ -0,0 +1,45 @@ +/** + * + * @file hip_zsymm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_zsymm GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "hipblas.h" + +int +HIP_zsymm( cham_side_t side, cham_uplo_t uplo, + int m, int n, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + const hipDoubleComplex *B, int ldb, + const hipDoubleComplex *beta, + hipDoubleComplex *C, int ldc, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZsymm( handle, + chameleon_hipblas_const(side), chameleon_hipblas_const(uplo), + m, n, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb, + HIPBLAS_VALUE(beta), C, ldc ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hip_zsyr2k.c b/hipblas/compute/hip_zsyr2k.c new file mode 100644 index 0000000000000000000000000000000000000000..5bef9817e5a6951bfc4cbd301f2b3e4b4111db7e --- /dev/null +++ b/hipblas/compute/hip_zsyr2k.c @@ -0,0 +1,45 @@ +/** + * + * @file hip_zsyr2k.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_zsyr2k GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "hipblas.h" + +int +HIP_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + const hipDoubleComplex *B, int ldb, + const hipDoubleComplex *beta, + hipDoubleComplex *C, int ldc, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZsyr2k( handle, + chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans), + n, k, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb, + HIPBLAS_VALUE(beta), C, ldc ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hip_zsyrk.c b/hipblas/compute/hip_zsyrk.c new file mode 100644 index 0000000000000000000000000000000000000000..44838f78bed6ee161288976ed621f4276b600955 --- /dev/null +++ b/hipblas/compute/hip_zsyrk.c @@ -0,0 +1,43 @@ +/** + * + * @file hip_zsyrk.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_zsyrk GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "hipblas.h" + +int +HIP_zsyrk( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + const hipDoubleComplex *beta, + hipDoubleComplex *B, int ldb, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZsyrk( handle, + chameleon_hipblas_const(uplo), chameleon_hipblas_const(trans), + n, k, + HIPBLAS_VALUE(alpha), A, lda, + HIPBLAS_VALUE(beta), B, ldb ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hip_ztrmm.c b/hipblas/compute/hip_ztrmm.c new file mode 100644 index 0000000000000000000000000000000000000000..9cc52790420eb2cb9593d3ddbef21bd41e5cd292 --- /dev/null +++ b/hipblas/compute/hip_ztrmm.c @@ -0,0 +1,45 @@ +/** + * + * @file hip_ztrmm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_ztrmm GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include "hipblas.h" + +int +HIP_ztrmm( cham_side_t side, cham_uplo_t uplo, + cham_trans_t transa, cham_diag_t diag, + int m, int n, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + hipDoubleComplex *B, int ldb, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZtrmm( handle, + chameleon_hipblas_const(side), chameleon_hipblas_const(uplo), + chameleon_hipblas_const(transa), chameleon_hipblas_const(diag), + m, n, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} + diff --git a/hipblas/compute/hip_ztrsm.c b/hipblas/compute/hip_ztrsm.c new file mode 100644 index 0000000000000000000000000000000000000000..cba7629c396f56c119a54d88076f55e526089f7f --- /dev/null +++ b/hipblas/compute/hip_ztrsm.c @@ -0,0 +1,45 @@ +/** + * + * @file hip_ztrsm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon hip_ztrsm GPU kernel + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @author Loris Lucido + * @date 2023-01-30 + * @precisions normal z -> c d s + * + */ +#include "hipblas.h" + +int +HIP_ztrsm( cham_side_t side, cham_uplo_t uplo, + cham_trans_t transa, cham_diag_t diag, + int m, int n, + const hipDoubleComplex *alpha, + const hipDoubleComplex *A, int lda, + hipDoubleComplex *B, int ldb, + hipblasHandle_t handle ) +{ + hipblasStatus_t rc; + + rc = hipblasZtrsm( handle, + chameleon_hipblas_const(side), chameleon_hipblas_const(uplo), + chameleon_hipblas_const(transa), chameleon_hipblas_const(diag), + m, n, + HIPBLAS_VALUE(alpha), A, lda, + B, ldb ); + + assert( rc == HIPBLAS_STATUS_SUCCESS ); + (void)rc; + return CHAMELEON_SUCCESS; +} diff --git a/hipblas/compute/hipglobal.c b/hipblas/compute/hipglobal.c new file mode 100644 index 0000000000000000000000000000000000000000..3c9fce60274ea6d317354cea67b7396e7fc5f65e --- /dev/null +++ b/hipblas/compute/hipglobal.c @@ -0,0 +1,128 @@ +/** + * + * @file hipglobal.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon global hipblas variables and functions + * + * @version 1.2.0 + * @author Mathieu Faverge + * @author Loris Lucido + * @date 2023-01-30 + * + */ +#include "hipblas.h" + +/** + * LAPACK Constants + */ +int chameleon_hipblas_constants[] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 100 + 0, // 101: ChamRowMajor + 0, // 102: ChamColMajor + 0, 0, 0, 0, 0, 0, 0, 0, + HIPBLAS_OP_N, // 111: ChamNoTrans + HIPBLAS_OP_T, // 112: ChamTrans + HIPBLAS_OP_C, // 113: ChamConjTrans + 0, 0, 0, 0, 0, 0, 0, + HIPBLAS_FILL_MODE_UPPER, // 121: ChamUpper + HIPBLAS_FILL_MODE_LOWER, // 122: ChamLower + 0, // 123: ChamUpperLower + 0, 0, 0, 0, 0, 0, 0, + HIPBLAS_DIAG_NON_UNIT, // 131: ChamNonUnit + HIPBLAS_DIAG_UNIT, // 132: ChamUnit + 0, 0, 0, 0, 0, 0, 0, 0, + HIPBLAS_SIDE_LEFT, // 141: ChamLeft + HIPBLAS_SIDE_RIGHT, // 142: ChamRight + 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 151: + 0, // 152: + 0, // 153: + 0, // 154: + 0, // 155: + 0, // 156: + 0, // 157: ChamEps + 0, // 158: + 0, // 159: + 0, // 160: + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 171: ChamOneNorm + 0, // 172: ChamRealOneNorm + 0, // 173: ChamTwoNorm + 0, // 174: ChamFrobeniusNorm + 0, // 175: ChamInfNorm + 0, // 176: ChamRealInfNorm + 0, // 177: ChamMaxNorm + 0, // 178: ChamRealMaxNorm + 0, // 179 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 200 + 0, // 201: ChamDistUniform + 0, // 202: ChamDistSymmetric + 0, // 203: ChamDistNormal + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 240 + 0, // 241 ChamHermGeev + 0, // 242 ChamHermPoev + 0, // 243 ChamNonsymPosv + 0, // 244 ChamSymPosv + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 290 + 0, // 291 ChamNoPacking + 0, // 292 ChamPackSubdiag + 0, // 293 ChamPackSupdiag + 0, // 294 ChamPackColumn + 0, // 295 ChamPackRow + 0, // 296 ChamPackLowerBand + 0, // 297 ChamPackUpeprBand + 0, // 298 ChamPackAll + 0, // 299 + 0, // 300 + 0, // 301 ChamNoVec + 0, // 302 ChamVec, ChamSVDvrange + 0, // 303 ChamIvec, ChamSVDirange + 0, // 304 ChamAllVec, ChamSVDall + 0, // 305 ChamSVec + 0, // 306 ChamOVec + 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 390 + 0, // 391 Forward + 0, // 392 Backward + 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 401 Columnwise + 0, // 402 Rowwise + 0, 0, 0, 0, 0, 0, 0, 0 // Remember to add a coma! +}; diff --git a/hipblas/eztrace_module/CMakeLists.txt b/hipblas/eztrace_module/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f46c2e28d69199403a00cb16d60a2497ff9db207 --- /dev/null +++ b/hipblas/eztrace_module/CMakeLists.txt @@ -0,0 +1,79 @@ +### +# +# @file CMakeLists.txt +# +# @copyright 2009-2014 The University of Tennessee and The University of +# Tennessee Research Foundation. All rights reserved. +# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# +### +# +# @project CHAMELEON +# CHAMELEON is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.2.0 +# @author Florent Pruvost +# @author Mathieu Faverge +# @date 2022-02-22 +# +### + +if (NOT EZTRACE_FOUND) + find_package(EZTRACE) +endif() + +if (EZTRACE_FOUND AND EZTRACE_DIR_FOUND) + + set(EZTRACE_eztrace_create_plugin_DIR "EZTRACE_eztrace_create_plugin_DIR-NOTFOUND") + find_path(EZTRACE_eztrace_create_plugin_DIR + NAMES eztrace_create_plugin + HINTS ${EZTRACE_DIR_FOUND}/bin) + mark_as_advanced(EZTRACE_eztrace_create_plugin_DIR) + + if (EZTRACE_eztrace_create_plugin_DIR) + + set(EZTRACE_CREATE_PLUGIN "${EZTRACE_eztrace_create_plugin_DIR}/eztrace_create_plugin") + + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/output + COMMAND ${EZTRACE_CREATE_PLUGIN} + ARGS ${CMAKE_CURRENT_SOURCE_DIR}/hipblas_eztrace_module + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/hipblas_eztrace_module + ) + add_custom_target( + eztrace-module-chameleon_hip-dir ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/output + ) + add_custom_command( + OUTPUT libeztrace-convert-chameleon_hip.so + COMMAND make + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/output + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/output + ) + add_custom_target( + eztrace-module-chameleon_hip-libs ALL + DEPENDS libeztrace-convert-chameleon_hip.so + ) + # installation + # ------------ + install( + FILES + ${CMAKE_CURRENT_BINARY_DIR}/output/libeztrace-autostart-chameleon_hip.so + ${CMAKE_CURRENT_BINARY_DIR}/output/libeztrace-chameleon_hip.so + ${CMAKE_CURRENT_BINARY_DIR}/output/libeztrace-convert-chameleon_hip.so + DESTINATION ${EZTRACE_LIBRARY_DIRS} + ) + + endif (EZTRACE_eztrace_create_plugin_DIR) + +endif (EZTRACE_FOUND AND EZTRACE_DIR_FOUND) + +### +### END CMakeLists.txt +### diff --git a/hipblas/eztrace_module/hipblas_eztrace_module b/hipblas/eztrace_module/hipblas_eztrace_module new file mode 100644 index 0000000000000000000000000000000000000000..2170a72dab2640f20cd67908f8ff4d0768d59c1c --- /dev/null +++ b/hipblas/eztrace_module/hipblas_eztrace_module @@ -0,0 +1,266 @@ +BEGIN_MODULE +NAME chameleon_hip +DESC "Module for Chameleon HIP functions" +ID 7771 + +int HIP_cgemm( + void* transa, void* transb, + int m, int n, int k, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_chemm( + void* side, void* uplo, + int m, int n, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_cher2k( + void* uplo, void* trans, + int n, int k, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + float *beta, + void *C, int ldc, + void* stream); +int HIP_cherk( + void* uplo, void* trans, + int n, int k, + float *alpha, + const void *A, int lda, + float *beta, + void *B, int ldb, + void* stream); +int HIP_csymm( + void* side, void* uplo, + int m, int n, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_csyr2k( + void* uplo, void* trans, + int n, int k, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_csyrk( + void* uplo, void* trans, + int n, int k, + void *alpha, + const void *A, int lda, + void *beta, + void *C, int ldc, + void* stream); +int HIP_ctrmm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + void *alpha, + const void *A, int lda, + void *B, int ldb, + void* stream); +int HIP_ctrsm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + void *alpha, + const void *A, int lda, + void *B, int ldb, + void* stream); + +int HIP_dgemm( + void* transa, void* transb, + int m, int n, int k, + double *alpha, + const double *A, int lda, + const double *B, int ldb, + double *beta, + double *C, int ldc, + void* stream); +int HIP_dsymm( + void* side, void* uplo, + int m, int n, + double *alpha, + const double *A, int lda, + const double *B, int ldb, + double *beta, + double *C, int ldc, + void* stream); +int HIP_dsyr2k( + void* uplo, void* trans, + int n, int k, + double *alpha, + const double *A, int lda, + const double *B, int ldb, + double *beta, + double *C, int ldc, + void* stream); +int HIP_dsyrk( + void* uplo, void* trans, + int n, int k, + double *alpha, + const double *A, int lda, + double *beta, + double *B, int ldb, + void* stream); +int HIP_dtrmm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + double *alpha, + const double *A, int lda, + double *B, int ldb, + void* stream); +int HIP_dtrsm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + double *alpha, + const double *A, int lda, + double *B, int ldb, + void* stream); + +int HIP_sgemm( + void* transa, void* transb, + int m, int n, int k, + float *alpha, + const float *A, int lda, + const float *B, int ldb, + float *beta, + float *C, int ldc, + void* stream); +int HIP_ssymm( + void* side, void* uplo, + int m, int n, + float *alpha, + const float *A, int lda, + const float *B, int ldb, + float *beta, + float *C, int ldc, + void* stream); +int HIP_ssyr2k( + void* uplo, void* trans, + int n, int k, + float *alpha, + const float *A, int lda, + const float *B, int ldb, + float *beta, + float *C, int ldc, + void* stream); +int HIP_ssyrk( + void* uplo, void* trans, + int n, int k, + float *alpha, + const float *A, int lda, + float *beta, + float *B, int ldb, + void* stream); +int HIP_strmm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + float *alpha, + const float *A, int lda, + float *B, int ldb, + void* stream); +int HIP_strsm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + float *alpha, + const float *A, int lda, + float *B, int ldb, + void* stream); + +int HIP_zgemm( + void* transa, void* transb, + int m, int n, int k, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_zhemm( + void* side, void* uplo, + int m, int n, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_zher2k( + void* uplo, void* trans, + int n, int k, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + double *beta, + void *C, int ldc, + void* stream); +int HIP_zherk( + void* uplo, void* trans, + int n, int k, + double *alpha, + const void *A, int lda, + double *beta, + void *B, int ldb, + void* stream); +int HIP_zsymm( + void* side, void* uplo, + int m, int n, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_zsyr2k( + void* uplo, void* trans, + int n, int k, + void *alpha, + const void *A, int lda, + const void *B, int ldb, + void *beta, + void *C, int ldc, + void* stream); +int HIP_zsyrk( + void* uplo, void* trans, + int n, int k, + void *alpha, + const void *A, int lda, + void *beta, + void *C, int ldc, + void* stream); +int HIP_ztrmm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + void *alpha, + const void *A, int lda, + void *B, int ldb, + void* stream); +int HIP_ztrsm( + void* side, void* uplo, + void* transa, void* diag, + int m, int n, + void *alpha, + const void *A, int lda, + void *B, int ldb, + void* stream); + +END_MODULE diff --git a/hipblas/include/CMakeLists.txt b/hipblas/include/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec1b8e3b459333670f40b5c2389a37e6c435f1fb --- /dev/null +++ b/hipblas/include/CMakeLists.txt @@ -0,0 +1,66 @@ +### +# +# @file CMakeLists.txt +# +# @copyright 2009-2014 The University of Tennessee and The University of +# Tennessee Research Foundation. All rights reserved. +# @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# +### +# +# @project CHAMELEON +# CHAMELEON is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.2.0 +# @author Florent Pruvost +# @author Mathieu Faverge +# @author Loris Lucido +# @date 2023-01-30 +# +### + +# Generate header files +# --------------------- +set(HIPBLAS_HDRS_GENERATED "") +set(ZHDR + hipblas/hipblas_z.h +) +precisions_rules_py( + HIPBLAS_HDRS_GENERATED "${ZHDR}" + TARGETDIR hipblas + PRECISIONS "s;d;c;z;zc;ds" ) + +# Define the list of headers +# -------------------------- +set(HIPBLAS_HDRS + hipblas.h + ) + +# Add generated headers +# --------------------- +foreach( hdr_file ${HIPBLAS_HDRS_GENERATED} ) + list(APPEND HIPBLAS_HDRS ${CMAKE_CURRENT_BINARY_DIR}/${hdr_file}) +endforeach() + +# Force generation of headers +# --------------------------- +add_custom_target(hipblas_include ALL SOURCES ${HIPBLAS_HDRS}) +set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};hipblas_include" CACHE INTERNAL "List of targets of sources") + +# Installation +# ------------ +install( FILES hipblas.h + DESTINATION include ) + +install( FILES ${HIPBLAS_HDRS} + DESTINATION include/hipblas ) + +### +### END CMakeLists.txt +### diff --git a/hipblas/include/hipblas.h b/hipblas/include/hipblas.h new file mode 100644 index 0000000000000000000000000000000000000000..333bbc2d91801f05404d0655ec6e15848edf849f --- /dev/null +++ b/hipblas/include/hipblas.h @@ -0,0 +1,82 @@ +/** + * + * @file hipblas.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon GPU kernels main header + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @author Nathalie Furmento + * @author Loris Lucido + * @date 2023-01-30 + * @precisions normal z -> c d s + * + */ +#ifndef _hipblas_h_ +#define _hipblas_h_ + +#include "chameleon/config.h" + +#if !defined(CHAMELEON_USE_HIP) +#error "This file should not be included" +#endif + +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <assert.h> + +#include <hip/hip_runtime.h> +#include <hip/hip_complex.h> + +#include <hipblas/hipblas.h> + +#define HIPBLAS_SADDR(_a_) (&(_a_)) +#define HIPBLAS_VALUE(_a_) (_a_) + +/** + * CHAMELEON types and constants + */ +#include "chameleon/types.h" +#include "chameleon/struct.h" +#include "chameleon/constants.h" + +/** + * HIP BLAS headers + */ +BEGIN_C_DECLS + +#include "hipblas/hipblas_z.h" +#include "hipblas/hipblas_d.h" +#include "hipblas/hipblas_c.h" +#include "hipblas/hipblas_s.h" + +END_C_DECLS + +/** + * Coreblas Error + */ +#define hipblas_error(k, str) fprintf(stderr, "%s: Parameter %d / %s\n", __func__, k, str) + +/** + * LAPACK Constants + */ +BEGIN_C_DECLS + +extern char *chameleon_lapack_constants[]; +#define chameleon_lapack_const(chameleon_const) chameleon_lapack_constants[chameleon_const][0] + +extern int chameleon_hipblas_constants[]; +#define chameleon_hipblas_const(chameleon_const) chameleon_hipblas_constants[chameleon_const] + +END_C_DECLS + +#endif /* _hipblas_h_ */ diff --git a/hipblas/include/hipblas/hipblas_z.h b/hipblas/include/hipblas/hipblas_z.h new file mode 100644 index 0000000000000000000000000000000000000000..647a0a7e5b68e8afa198b289a667eb0038e286cd --- /dev/null +++ b/hipblas/include/hipblas/hipblas_z.h @@ -0,0 +1,38 @@ +/** + * + * @file hipblas_z.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon GPU CHAMELEON_Complex64_t kernels header + * + * @version 1.2.0 + * @author Florent Pruvost + * @author Mathieu Faverge + * @author Loris Lucido + * @date 2023-01-30 + * @precisions normal z -> c d s + * + */ +#ifndef _hipblas_z_h_ +#define _hipblas_z_h_ + +/** + * Declarations of hip kernels - alphabetical order + */ +int HIP_zgemm( cham_trans_t transa, cham_trans_t transb, int m, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle ); +int HIP_zhemm( cham_side_t side, cham_uplo_t uplo, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle ); +int HIP_zher2k( cham_uplo_t uplo, cham_trans_t trans, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const double *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle ); +int HIP_zherk( cham_uplo_t uplo, cham_trans_t trans, int n, int k, const double *alpha, const hipDoubleComplex *A, int lda, const double *beta, hipDoubleComplex *B, int ldb, hipblasHandle_t handle ); +int HIP_zsymm( cham_side_t side, cham_uplo_t uplo, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle ); +int HIP_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle ); +int HIP_zsyrk( cham_uplo_t uplo, cham_trans_t trans, int n, int k, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc, hipblasHandle_t handle ); +int HIP_ztrmm( cham_side_t side, cham_uplo_t uplo, cham_trans_t transa, cham_diag_t diag, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, hipDoubleComplex *B, int ldb, hipblasHandle_t handle ); +int HIP_ztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t transa, cham_diag_t diag, int m, int n, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, hipDoubleComplex *B, int ldb, hipblasHandle_t handle ); + +#endif /* _hipblas_z_h_ */ diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index 2b02d4bf4c9a08d2969bde5bd89bf0d162278112..34b3f3febe2558bd3eca466e7b9fcdd21d8dbeb1 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -16,7 +16,8 @@ * @author Mathieu Faverge * @author Philippe Virouleau * @author Raphael Boucherie - * @date 2022-02-22 + * @author Loris Lucido + * @date 2023-01-30 * */ #ifndef CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED @@ -54,6 +55,7 @@ /* GPU Support */ #cmakedefine CHAMELEON_USE_CUDA #cmakedefine CHAMELEON_USE_CUBLAS +#cmakedefine CHAMELEON_USE_HIP /* Hmat-oss */ #cmakedefine CHAMELEON_USE_HMAT diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index 2b4d0a1db651921f94fe509cd5976800e69c162a..0968de66d35b777f2efbbc2d75ebfdbc9ccdb475 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -24,7 +24,9 @@ # @author Florent Pruvost # @author Samuel Thibault # @author Matthieu Kuhn -# @date 2022-02-22 +# @author Loris Lucido +# @author Terry Cojean +# @date 2023-01-30 # ### cmake_minimum_required(VERSION 3.1) @@ -147,6 +149,13 @@ if ( STARPU_FOUND ) endif() endif() + if (CHAMELEON_USE_HIP AND NOT CHAMELEON_SIMULATION) + check_function_exists(starpu_hipblas_get_local_handle HAVE_STARPU_HIPBLAS_GET_LOCAL_HANDLE) + if ( NOT HAVE_STARPU_HIPBLAS_GET_LOCAL_HANDLE ) + message(FATAL_ERROR "The detected StarPU library does not include hipblas support. Please disable CHAMELEON_USE_HIP_{CUDA,ROC} or provide a StarPU library with hipblas support.") + endif() + endif() + morse_cmake_required_unset() endif ( STARPU_FOUND ) @@ -268,6 +277,9 @@ if (NOT CHAMELEON_SIMULATION) if (CHAMELEON_USE_CUDA) add_dependencies(chameleon_starpu cudablas_include) endif() + if (CHAMELEON_USE_HIP) + add_dependencies(chameleon_starpu hipblas_include) + endif() endif() target_include_directories(chameleon_starpu PUBLIC @@ -294,6 +306,9 @@ if (NOT CHAMELEON_SIMULATION) if(CHAMELEON_USE_CUDA) target_link_libraries(chameleon_starpu PRIVATE cudablas) endif(CHAMELEON_USE_CUDA) + if(CHAMELEON_USE_HIP) + target_link_libraries(chameleon_starpu PRIVATE hipblas) + endif(CHAMELEON_USE_HIP) endif(NOT CHAMELEON_SIMULATION) target_link_libraries(chameleon_starpu PRIVATE MORSE::M) @@ -307,6 +322,9 @@ if (NOT CHAMELEON_SIMULATION) if (CHAMELEON_USE_CUDA) target_link_libraries(chameleon_starpu PUBLIC CUDA::CUBLAS) endif() + if (CHAMELEON_USE_HIP) + target_link_libraries(chameleon_starpu PUBLIC HIP::HIPBLAS) + endif() endif() # export target diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index bc972bcf54562388b6c091049c21d972b79cadf9..bd0823a6f425e600243e83e1e51794d70209f08b 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -22,7 +22,9 @@ * @author Gwenole Lucas * @author Philippe Swartvagher * @author Lucas Nesi - * @date 2022-02-22 + * @author Loris Lucido + * @author Terry Cojean + * @date 2023-01-30 * @precisions normal z -> c d s * */ @@ -61,7 +63,7 @@ cl_zgemm_cpu_func( void *descr[], void *cl_arg ) clargs->beta, tileC ); } -#ifdef CHAMELEON_USE_CUDA +#if defined(CHAMELEON_USE_CUDA) static void cl_zgemm_cuda_func( void *descr[], void *cl_arg ) { @@ -90,12 +92,48 @@ cl_zgemm_cuda_func( void *descr[], void *cl_arg ) handle ); } #endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void +cl_zgemm_hip_func( void *descr[], void *cl_arg ) +{ + struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)cl_arg; + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + tileC = cti_interface_get(descr[2]); + + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); + assert( tileB->format & CHAMELEON_TILE_FULLRANK ); + assert( tileC->format & CHAMELEON_TILE_FULLRANK ); + + HIP_zgemm( + clargs->transA, clargs->transB, + clargs->m, clargs->n, clargs->k, + (hipDoubleComplex*)&(clargs->alpha), + tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + (hipDoubleComplex*)&(clargs->beta), + tileC->mat, tileC->ld, + handle ); + + return; +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( zgemm, cl_zgemm_cpu_func, cl_zgemm_hip_func, STARPU_HIP_ASYNC ) +#else CODELETS( zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC ) +#endif void INSERT_TASK_zgemm_Astat( const RUNTIME_option_t *options, cham_trans_t transA, cham_trans_t transB, diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index b7f70ca6868e5a5e8c1543c5ff1c51204be7ec60..cff474bbf6c75e4fdb1108895d6ee76e8a72f0e8 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -57,7 +57,7 @@ cl_zhemm_cpu_func( void *descr[], void *cl_arg ) clargs->beta, tileC ); } -#ifdef CHAMELEON_USE_CUDA +#if defined(CHAMELEON_USE_CUDA) static void cl_zhemm_cuda_func( void *descr[], void *cl_arg ) { @@ -86,12 +86,46 @@ cl_zhemm_cuda_func( void *descr[], void *cl_arg ) handle ); } #endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void +cl_zhemm_hip_func( void *descr[], void *cl_arg ) +{ + struct cl_zhemm_args_s *clargs = (struct cl_zhemm_args_s *)cl_arg; + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + tileC = cti_interface_get(descr[2]); + + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); + assert( tileB->format & CHAMELEON_TILE_FULLRANK ); + assert( tileC->format & CHAMELEON_TILE_FULLRANK ); + + HIP_zhemm( + clargs->side, clargs->uplo, + clargs->m, clargs->n, + (hipDoubleComplex*)&(clargs->alpha), + tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + (hipDoubleComplex*)&(clargs->beta), + tileC->mat, tileC->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( zhemm, cl_zhemm_cpu_func, cl_zhemm_hip_func, STARPU_HIP_ASYNC ) +#else CODELETS( zhemm, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC ) +#endif void INSERT_TASK_zhemm_Astat( const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 38babbe5a230611d595f4716c170f1a5618ad420..fa102852615bac660a9ea9f40367faa10cfc4f3a 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -48,7 +48,7 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) n, k, alpha, tileA, tileB, beta, tileC); } -#ifdef CHAMELEON_USE_CUDA +#if defined(CHAMELEON_USE_CUDA) static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) { cublasHandle_t handle = starpu_cublas_get_local_handle(); @@ -75,13 +75,46 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) &beta, tileC->mat, tileC->ld, handle ); } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void cl_zher2k_hip_func(void *descr[], void *cl_arg) +{ + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + cham_uplo_t uplo; + cham_trans_t trans; + int n; + int k; + hipDoubleComplex alpha; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + double beta; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + tileC = cti_interface_get(descr[2]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); + + HIP_zher2k( uplo, trans, + n, k, + &alpha, tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + &beta, tileC->mat, tileC->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(zher2k, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC) +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( zher2k, cl_zher2k_cpu_func, cl_zher2k_hip_func, STARPU_HIP_ASYNC ) +#else +CODELETS( zher2k, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC ) +#endif /** * diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 894f0b0269754ba9e0c0171efda6abdaa59b34b0..29301b83097c8b61383c00861777c35917ac27cd 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -74,12 +74,38 @@ cl_zherk_cuda_func(void *descr[], void *cl_arg) handle ); } #endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void +cl_zherk_hip_func(void *descr[], void *cl_arg) +{ + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + struct cl_zherk_args_s *clargs = (struct cl_zherk_args_s *)cl_arg; + CHAM_tile_t *tileA; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileC = cti_interface_get(descr[1]); + + HIP_zherk( + clargs->uplo, clargs->trans, clargs->n, clargs->k, + &(clargs->alpha), + tileA->mat, tileA->ld, + &(clargs->beta), + tileC->mat, tileC->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( zherk, cl_zherk_cpu_func, cl_zherk_hip_func, STARPU_HIP_ASYNC ) +#else CODELETS( zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC ) +#endif void INSERT_TASK_zherk( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 9fad20e13bdc75a10cde4bffb998343d71503fdb..54750f0b3f7de2f76083163b9a881db77d8066e3 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -58,7 +58,7 @@ cl_zsymm_cpu_func( void *descr[], void *cl_arg ) clargs->beta, tileC ); } -#ifdef CHAMELEON_USE_CUDA +#if defined(CHAMELEON_USE_CUDA) static void cl_zsymm_cuda_func( void *descr[], void *cl_arg ) { @@ -87,12 +87,46 @@ cl_zsymm_cuda_func( void *descr[], void *cl_arg ) handle ); } #endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void +cl_zsymm_hip_func( void *descr[], void *cl_arg ) +{ + struct cl_zsymm_args_s *clargs = (struct cl_zsymm_args_s *)cl_arg; + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + tileC = cti_interface_get(descr[2]); + + assert( tileA->format & CHAMELEON_TILE_FULLRANK ); + assert( tileB->format & CHAMELEON_TILE_FULLRANK ); + assert( tileC->format & CHAMELEON_TILE_FULLRANK ); + + HIP_zsymm( + clargs->side, clargs->uplo, + clargs->m, clargs->n, + (hipDoubleComplex*)&(clargs->alpha), + tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + (hipDoubleComplex*)&(clargs->beta), + tileC->mat, tileC->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( zsymm, cl_zsymm_cpu_func, cl_zsymm_hip_func, STARPU_HIP_ASYNC ) +#else CODELETS( zsymm, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC ) +#endif void INSERT_TASK_zsymm_Astat( const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index af47c87edf546b3a4d3005081c4acff8d61161fe..7d5ce81aadff62dd3866bee1a4ee9ecc63b354a0 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -48,7 +48,7 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) n, k, alpha, tileA, tileB, beta, tileC); } -#ifdef CHAMELEON_USE_CUDA +#if defined(CHAMELEON_USE_CUDA) static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) { cublasHandle_t handle = starpu_cublas_get_local_handle(); @@ -75,13 +75,46 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) &beta, tileC->mat, tileC->ld, handle ); } -#endif /* CHAMELEON_USE_CUDA */ +#endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void cl_zsyr2k_hip_func(void *descr[], void *cl_arg) +{ + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + cham_uplo_t uplo; + cham_trans_t trans; + int n; + int k; + hipDoubleComplex alpha; + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + hipDoubleComplex beta; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + tileC = cti_interface_get(descr[2]); + + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); + + HIP_zsyr2k( uplo, trans, + n, k, + &alpha, tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + &beta, tileC->mat, tileC->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -CODELETS(zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC) +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_hip_func, STARPU_HIP_ASYNC ) +#else +CODELETS( zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC ) +#endif /** * diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 8d8dcdbe752e256a40ec8c914378dbc6356ba91f..d9b7e84fd00336b445a05eeac648957f61eb51e7 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -75,12 +75,38 @@ cl_zsyrk_cuda_func(void *descr[], void *cl_arg) handle ); } #endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void +cl_zsyrk_hip_func(void *descr[], void *cl_arg) +{ + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + struct cl_zsyrk_args_s *clargs = (struct cl_zsyrk_args_s *)cl_arg; + CHAM_tile_t *tileA; + CHAM_tile_t *tileC; + + tileA = cti_interface_get(descr[0]); + tileC = cti_interface_get(descr[1]); + + HIP_zsyrk( + clargs->uplo, clargs->trans, clargs->n, clargs->k, + (hipDoubleComplex*)&(clargs->alpha), + tileA->mat, tileA->ld, + (hipDoubleComplex*)&(clargs->beta), + tileC->mat, tileC->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_hip_func, STARPU_HIP_ASYNC ) +#else CODELETS( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC ) +#endif void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index e51b81ab7b9b45028b7f63aa00f1643be0a948c6..3bb14d38bef1119024293da7ecdfb6f58a67c12f 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -53,7 +53,7 @@ cl_ztrmm_cpu_func(void *descr[], void *cl_arg) clargs->m, clargs->n, clargs->alpha, tileA, tileB ); } -#ifdef CHAMELEON_USE_CUDA +#if defined(CHAMELEON_USE_CUDA) static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) { @@ -74,12 +74,38 @@ cl_ztrmm_cuda_func(void *descr[], void *cl_arg) handle ); } #endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void +cl_ztrmm_hip_func(void *descr[], void *cl_arg) +{ + struct cl_ztrmm_args_s *clargs = (struct cl_ztrmm_args_s *)cl_arg; + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + + HIP_ztrmm( + clargs->side, clargs->uplo, clargs->transA, clargs->diag, + clargs->m, clargs->n, + (hipDoubleComplex*)&(clargs->alpha), + tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_hip_func, STARPU_HIP_ASYNC ) +#else CODELETS( ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC ) +#endif void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 0196649684a4652ff5686bc060aad7e7879a1f2b..19e83c01a0010bcd1d495932ab7c67e1ac3dc139 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -20,7 +20,9 @@ * @author Lucas Barros de Assis * @author Florent Pruvost * @author Gwenole Lucas - * @date 2022-02-22 + * @author Loris Lucido + * @author Terry Cojean + * @date 2023-01-30 * @precisions normal z -> c d s * */ @@ -54,7 +56,7 @@ cl_ztrsm_cpu_func(void *descr[], void *cl_arg) clargs->m, clargs->n, clargs->alpha, tileA, tileB ); } -#ifdef CHAMELEON_USE_CUDA +#if defined(CHAMELEON_USE_CUDA) static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) { @@ -75,12 +77,40 @@ cl_ztrsm_cuda_func(void *descr[], void *cl_arg) handle ); } #endif /* defined(CHAMELEON_USE_CUDA) */ + +#if defined(CHAMELEON_USE_HIP) +static void +cl_ztrsm_hip_func(void *descr[], void *cl_arg) +{ + struct cl_ztrsm_args_s *clargs = (struct cl_ztrsm_args_s*)cl_arg; + hipblasHandle_t handle = starpu_hipblas_get_local_handle(); + CHAM_tile_t *tileA; + CHAM_tile_t *tileB; + + tileA = cti_interface_get(descr[0]); + tileB = cti_interface_get(descr[1]); + + HIP_ztrsm( + clargs->side, clargs->uplo, clargs->transA, clargs->diag, + clargs->m, clargs->n, + (hipDoubleComplex*)&(clargs->alpha), + tileA->mat, tileA->ld, + tileB->mat, tileB->ld, + handle ); +} +#endif /* defined(CHAMELEON_USE_HIP) */ + #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ + +#if defined(CHAMELEON_USE_HIP) +CODELETS_GPU( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_hip_func, STARPU_HIP_ASYNC ) +#else CODELETS( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC ) +#endif void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c index c83ab1283339f214c91c61f7643d75874e67658e..e2cdb399f401872d4712cb7894d350155994c098 100644 --- a/runtime/starpu/control/runtime_context.c +++ b/runtime/starpu/control/runtime_context.c @@ -16,6 +16,7 @@ * @author Mathieu Faverge * @author Cedric Castagnede * @author Florent Pruvost + * @author Terry Cojean * @date 2022-02-22 * */ diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c index 5a36b8a6f0a2643f148b09b90b900b9a323caa27..d527cd56b307ad2dc033bf7ecc878a26cddff523 100644 --- a/runtime/starpu/control/runtime_control.c +++ b/runtime/starpu/control/runtime_control.c @@ -19,7 +19,9 @@ * @author Philippe Swartvagher * @author Samuel Thibault * @author Matthieu Kuhn - * @date 2022-02-22 + * @author Loris Lucido + * @author Terry Cojean + * @date 2023-01-30 * */ #include "chameleon_starpu.h" @@ -221,8 +223,12 @@ int RUNTIME_init( CHAM_context_t *chamctxt, ); #endif -#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) +#if !defined(CHAMELEON_SIMULATION) +#if defined(CHAMELEON_USE_CUDA) starpu_cublas_init(); +#elif defined(CHAMELEON_USE_HIP) + starpu_hipblas_init(); +#endif #endif starpu_cham_tile_interface_init(); @@ -249,6 +255,9 @@ void RUNTIME_finalize( CHAM_context_t *chamctxt ) #if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) starpu_cublas_shutdown(); #endif +#if defined(CHAMELEON_USE_HIP) && !defined(CHAMELEON_SIMULATION) + starpu_hipblas_shutdown(); +#endif #if defined(CHAMELEON_USE_MPI) starpu_mpi_shutdown(); diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index 72dd1b81d81b6830edc807e5fdb4a48c9276bd90..385adc0df81a1a87de81971fe3590853b5214287 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -19,7 +19,8 @@ * @author Guillaume Sylvand * @author Raphael Boucherie * @author Samuel Thibault - * @date 2022-02-22 + * @author Loris Lucido + * @date 2023-01-30 * */ #include "chameleon_starpu.h" @@ -138,6 +139,26 @@ void RUNTIME_free( void *ptr, #endif } +#if defined(CHAMELEON_USE_CUDA) + +#define gpuError_t cudaError_t +#define gpuHostRegister cudaHostRegister +#define gpuHostUnregister cudaHostUnregister +#define gpuHostRegisterPortable cudaHostRegisterPortable +#define gpuSuccess cudaSuccess +#define gpuGetErrorString cudaGetErrorString + +#elif defined(CHAMELEON_USE_HIP) + +#define gpuError_t hipError_t +#define gpuHostRegister hipHostRegister +#define gpuHostUnregister hipHostUnregister +#define gpuHostRegisterPortable hipHostRegisterPortable +#define gpuSuccess hipSuccess +#define gpuGetErrorString hipGetErrorString + +#endif + /** * Create data descriptor */ @@ -155,25 +176,27 @@ void RUNTIME_desc_create( CHAM_desc_t *desc ) desc->schedopt = (void*)calloc(lnt*lmt,sizeof(starpu_data_handle_t)); assert(desc->schedopt); -#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) +#if !defined(CHAMELEON_SIMULATION) +#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP) /* - * Register allocated memory as CUDA pinned memory + * Register allocated memory as GPU pinned memory */ if ( (desc->use_mat == 1) && (desc->register_mat == 1) ) { int64_t eltsze = CHAMELEON_Element_Size(desc->dtyp); size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) * eltsze; - cudaError_t rc; + gpuError_t rc; /* Register the matrix as pinned memory */ - rc = cudaHostRegister( desc->mat, size, cudaHostRegisterPortable ); - if ( rc != cudaSuccess ) + rc = gpuHostRegister( desc->mat, size, gpuHostRegisterPortable ); + if ( rc != gpuSuccess ) { /* Disable the unregister as register failed */ desc->register_mat = 0; - chameleon_warning("RUNTIME_desc_create(StarPU): cudaHostRegister - ", cudaGetErrorString( rc )); + chameleon_warning("RUNTIME_desc_create(StarPU): gpuHostRegister - ", gpuGetErrorString( rc )); } } +#endif #endif if (desc->ooc) { @@ -247,18 +270,20 @@ void RUNTIME_desc_destroy( CHAM_desc_t *desc ) } } -#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) +#if !defined(CHAMELEON_SIMULATION) +#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP) if ( (desc->use_mat == 1) && (desc->register_mat == 1) ) { /* Unmap the pinned memory associated to the matrix */ - if (cudaHostUnregister(desc->mat) != cudaSuccess) + if (gpuHostUnregister(desc->mat) != gpuSuccess) { chameleon_warning("RUNTIME_desc_destroy(StarPU)", - "cudaHostUnregister failed to unregister the " + "gpuHostUnregister failed to unregister the " "pinned memory associated to the matrix"); } } -#endif /* defined(CHAMELEON_USE_CUDA) */ +#endif +#endif free(desc->schedopt); } diff --git a/runtime/starpu/control/runtime_workspace.c b/runtime/starpu/control/runtime_workspace.c index fb209146e3c7e44b4b129379bf23536620322cf9..ad9b5cd9fd0d0abb8f7760b9a66c0936473f1281 100644 --- a/runtime/starpu/control/runtime_workspace.c +++ b/runtime/starpu/control/runtime_workspace.c @@ -15,11 +15,26 @@ * @author Cedric Augonnet * @author Mathieu Faverge * @author Florent Pruvost - * @date 2022-02-22 + * @author Loris Lucido + * @date 2023-01-30 * */ #include "chameleon_starpu.h" +#if defined(CHAMELEON_USE_CUDA) +#define GPU_WORKER_TYPE STARPU_CUDA_WORKER +#define gpuMallocHost cudaMallocHost +#define gpuMalloc cudaMalloc +#define gpuFreeHost cudaFreeHost +#define gpuFree cudaFree +#elif defined(CHAMELEON_USE_HIP) +#define GPU_WORKER_TYPE STARPU_HIP_WORKER +#define gpuMallocHost hipMallocHost +#define gpuMalloc hipMalloc +#define gpuFreeHost hipFreeHost +#define gpuFree hipFree +#endif + static void RUNTIME_allocate_workspace_on_workers(void *arg) { struct chameleon_starpu_ws_s *workspace = arg; @@ -28,9 +43,10 @@ static void RUNTIME_allocate_workspace_on_workers(void *arg) int id = starpu_worker_get_id(); -#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) +#if !defined(CHAMELEON_SIMULATION) +#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP) type = starpu_worker_get_type(id); - if (type == STARPU_CUDA_WORKER) + if ( type == GPU_WORKER_TYPE ) { int memory_location = workspace->memory_location; @@ -39,14 +55,15 @@ static void RUNTIME_allocate_workspace_on_workers(void *arg) /* Use pinned memory because the kernel is very likely * to transfer these data between the CPU and the GPU. * */ - cudaMallocHost(&workspace->workspaces[id], workspace->size); + gpuMallocHost( &workspace->workspaces[id], workspace->size ); } else { /* Allocate on the device */ - cudaMalloc(&workspace->workspaces[id], workspace->size); + gpuMalloc( &workspace->workspaces[id], workspace->size ); } } else +#endif #endif { /* This buffer should only be used within the CPU kernel, so @@ -54,7 +71,7 @@ static void RUNTIME_allocate_workspace_on_workers(void *arg) workspace->workspaces[id] = malloc(workspace->size); } - assert(workspace->workspaces[id]); + assert( workspace->workspaces[id] ); } @@ -65,24 +82,26 @@ static void RUNTIME_free_workspace_on_workers(void *arg) (void)type; int id = starpu_worker_get_id(); -#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) - type = starpu_worker_get_type(id); - if (type == STARPU_CUDA_WORKER) +#if !defined(CHAMELEON_SIMULATION) +#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP) + type = starpu_worker_get_type( id ); + if ( type == GPU_WORKER_TYPE ) { int memory_location = workspace->memory_location; if (memory_location == CHAMELEON_HOST_MEM) { - cudaFreeHost(workspace->workspaces[id]); + gpuFreeHost( workspace->workspaces[id] ); } else { - cudaFree(workspace->workspaces[id]); + gpuFree( workspace->workspaces[id] ); } } else +#endif #endif { - free(workspace->workspaces[id]); + free( workspace->workspaces[id] ); } workspace->workspaces[id] = NULL; diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index e2d964ccc98ae105c4bd79ceac832f77c8ea89e2..e92e23387f1f20912e3d28f910047c2d9629dfd4 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -17,7 +17,9 @@ * @author Florent Pruvost * @author Philippe Swartvagher * @author Samuel Thibault - * @date 2022-02-22 + * @author Loris Lucido + * @author Terry Cojean + * @date 2023-01-30 * */ #ifndef _chameleon_starpu_h_ @@ -71,6 +73,14 @@ #include <starpu_cublas_v2.h> #endif +#if defined(CHAMELEON_USE_HIP) && !defined(CHAMELEON_SIMULATION) +#include <starpu_scheduler.h> +#include <starpu_hip.h> + +#include <hipblas/hipblas.h> +#include <starpu_hipblas.h> +#endif + #if defined(CHAMELEON_SIMULATION) # if !defined(STARPU_SIMGRID) # error "Starpu was not built with simgrid support (--enable-simgrid). Can not run Chameleon with simulation support." diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index 82afe7b42cea6f37c0bd2ede6aeaf0041048fa34..673d8c5d7a6ea287ed2fa4e948c6d8820f35a00a 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -17,7 +17,8 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @author Loris Lucido + * @date 2023-01-30 * @precisions normal z -> c d s * */ @@ -31,11 +32,17 @@ #if !defined(CHAMELEON_SIMULATION) #include "coreblas/coreblas_z.h" #include "coreblas/coreblas_ztile.h" + #if defined(CHAMELEON_USE_CUDA) #include "cudablas.h" #endif + +#if defined(CHAMELEON_USE_HIP) +#include "hipblas.h" #endif +#endif /* !defined(CHAMELEON_SIMULATION) */ + /* * BLAS 1 functions */ diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h index d5c923355299277f32747c76f5534bf3edbd5ce0..00808fc52fec20078744d3603bec4cb2be680a24 100644 --- a/runtime/starpu/include/runtime_codelets.h +++ b/runtime/starpu/include/runtime_codelets.h @@ -16,7 +16,8 @@ * @author Mathieu Faverge * @author Cedric Castagnede * @author Florent Pruvost - * @date 2022-02-22 + * @author Loris Lucido + * @date 2023-01-30 * */ #ifndef _runtime_codelets_h_ @@ -25,14 +26,31 @@ #include "chameleon/config.h" #include "runtime_codelet_profile.h" -//#undef STARPU_CUDA_ASYNC -#ifdef STARPU_CUDA_ASYNC +#if defined(STARPU_CUDA_ASYNC) #define CODELET_CUDA_FLAGS(flags) .cuda_flags = {(flags)}, #else #define CODELET_CUDA_FLAGS(flags) #endif -#define CODELETS_ALL(cl_name, cpu_func_name, cuda_func_name, _original_location_, cuda_flags) \ +#if defined(STARPU_HIP_ASYNC) +#define CODELET_HIP_FLAGS(flags) .hip_flags = {(flags)}, +#else +#define CODELET_HIP_FLAGS(flags) +#endif + +#if defined(CHAMELEON_USE_CUDA) +#define CODELET_GPU_FIELDS( gpu_func_name, gpu_flags ) \ + CODELET_CUDA_FLAGS( gpu_flags ) \ + .cuda_func = ((gpu_func_name)), +#elif defined(CHAMELEON_USE_HIP) +#define CODELET_GPU_FIELDS( gpu_func_name, gpu_flags ) \ + CODELET_HIP_FLAGS( gpu_flags ) \ + .hip_funcs = {(gpu_func_name)}, +#else +#define CODELET_GPU_FIELDS( gpu_func_name, gpu_flags ) +#endif + +#define CODELETS_ALL(cl_name, cpu_func_name, gpu_func_name, _original_location_, gpu_flags) \ struct starpu_perfmodel cl_##cl_name##_fake = { \ .type = STARPU_HISTORY_BASED, \ .symbol = "fake_"#cl_name \ @@ -46,8 +64,7 @@ struct starpu_codelet cl_##cl_name = { \ .where = (_original_location_), \ .cpu_func = ((cpu_func_name)), \ - CODELET_CUDA_FLAGS(cuda_flags) \ - .cuda_func = ((cuda_func_name)), \ + CODELET_GPU_FIELDS( gpu_func_name, gpu_flags ) \ .nbuffers = STARPU_VARIABLE_NBUFFERS, \ .model = &cl_##cl_name##_model, \ .name = #cl_name \ @@ -77,8 +94,13 @@ CODELETS_ALL( name, cpu_func_name, NULL, STARPU_CPU, 0 ) #endif -#define CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_ALL( name, cpu_func_name, cuda_func_name, STARPU_CPU | STARPU_CUDA, cuda_flags ) +#if defined(CHAMELEON_USE_HIP) +#define CODELETS_GPU(name, cpu_func_name, gpu_func_name, gpu_flags) \ + CODELETS_ALL( name, cpu_func_name, gpu_func_name, STARPU_CPU | STARPU_HIP, gpu_flags ) +#else +#define CODELETS_GPU(name, cpu_func_name, gpu_func_name, gpu_flags) \ + CODELETS_ALL( name, cpu_func_name, gpu_func_name, STARPU_CPU | STARPU_CUDA, gpu_flags ) +#endif #define CODELETS_ALL_HEADER(name) \ CHAMELEON_CL_CB_HEADER(name); \ @@ -89,29 +111,35 @@ void cl_##name##_restore_where(void) #if defined(CHAMELEON_SIMULATION) -#if defined(CHAMELEON_USE_CUDA) -#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_GPU(name, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, cuda_flags) + +#if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP) +#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \ + CODELETS_GPU(name, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, gpu_flags) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #else -#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ +#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \ CODELETS_CPU(name, (starpu_cpu_func_t) 1) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #endif -#elif defined(CHAMELEON_USE_CUDA) -#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags) + +#else /* defined(CHAMELEON_SIMULATION) */ + +#if defined(CHAMELEON_USE_CUDA) //|| defined(CHAMELEON_USE_HIP) +#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \ + CODELETS_GPU(name, cpu_func_name, gpu_func_name, gpu_flags) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #else -#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ +#define CODELETS(name, cpu_func_name, gpu_func_name, gpu_flags) \ CODELETS_CPU(name, cpu_func_name) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #endif +#endif + CODELETS_HEADER(map); #endif /* _runtime_codelets_h_ */ diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 7c979a91710167fff8c41bd92d58a62804f7e5ec..9dcacf75e7ed0f0ff4699ab38e7d7497f471f227 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -11,6 +11,9 @@ endif() if (CHAMELEON_USE_CUDA AND CUDA_FOUND) set(N_GPUS 0 1) endif() +if (CHAMELEON_USE_HIP AND HIP_FOUND) + set(N_GPUS 0 1) +endif() if (CHAMELEON_SIMULATION) set(TEST_CATEGORIES simushm) if (CHAMELEON_USE_CUDA)