diff --git a/CMakeLists.txt b/CMakeLists.txt index ff59bbf70240ad7667b0c7c91a8ef3c6b7f3eae8..4d3df32a13a6dc0243d500da86cd7ba7a26a1fc6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ option(CHAMELEON_USE_MIGRATE # ----------------------------- # Create a list of possible runtime -set(CHAMELEON_SCHED_list PARSEC STARPU QUARK +set(CHAMELEON_SCHED_list PARSEC STARPU QUARK OPENMP CACHE INTERNAL "List of available runtimes" ) set( CHAMELEON_SCHED_PARSEC OFF CACHE INTERNAL @@ -120,6 +120,9 @@ set(CHAMELEON_SCHED_STARPU OFF CACHE INTERNAL set(CHAMELEON_SCHED_QUARK OFF CACHE INTERNAL "Enable Quark scheduler as the default runtime (Conflict with other CHAMELEON_SCHED_* options)") +set(CHAMELEON_SCHED_OPENMP OFF CACHE INTERNAL + "Enable OpenMP scheduler as the default runtime + (Conflict with other CHAMELEON_SCHED_* options)") set( CHAMELEON_SCHED STARPU CACHE STRING "Choose the chameleon internal runtime from ${CHAMELEON_SCHED_list}") @@ -154,6 +157,7 @@ if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU ) set(CHAMELEON_ENABLE_MPI ON FORCE) endif() +# FIXME: with OpenMP-target we should enable CUDA too # Use intermediate variable since cmake_dependent_option doesn't have OR conditions set(CHAMELEON_ENABLE_CUDA OFF CACHE INTERNAL "Tells if CUDA might be supported by the runtime") if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU ) @@ -892,6 +896,133 @@ if( CHAMELEON_SCHED_QUARK ) endif() +if( CHAMELEON_SCHED_OPENMP ) + + # create list of components in order to make a single call to find_package(starpu...) + set(OPENMP_COMPONENT_LIST "HWLOC") + if(CHAMELEON_USE_CUDA) + list(APPEND OPENMP_COMPONENT_LIST "CUDA") + endif() + + find_package(${OPENMP_COMPONENT_LIST}) + + # Add definition and include_dir if found + # TODO + if ( OPENMP_FOUND ) + message("-- ${Blue}Add definition CHAMELEON_SCHED_OPENMP" + " - Activate OpenMP in Chameleon${ColourReset}") + #if (STARPU_INCLUDE_DIRS_DEP) + #include_directories(${STARPU_INCLUDE_DIRS_DEP}) + #set(CMAKE_REQUIRED_INCLUDES "${STARPU_INCLUDE_DIRS_DEP}") + #endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fopenmp") + #if(STARPU_LDFLAGS_OTHER_DEP) + #set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STARPU_LDFLAGS_OTHER_DEP}") + #set(CMAKE_REQUIRED_LDFLAGS "${STARPU_LDFLAGS_OTHER_DEP}") + #endif() + #if(STARPU_LIBRARY_DIRS_DEP) + #list(APPEND CMAKE_INSTALL_RPATH "${STARPU_LIBRARY_DIRS_DEP}") + #endif() + #if (STARPU_LIBRARIES_DEP) + #list(INSERT CHAMELEON_DEP 0 ${STARPU_LIBRARIES_DEP}) + #set(CMAKE_REQUIRED_LIBRARIES "${STARPU_LIBRARIES_DEP}") + #endif() + #if (CHAMELEON_SIMULATION) + #list(APPEND CMAKE_REQUIRED_FLAGS "-include" "starpu_simgrid_wrap.h") + #endif() + #string(REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + #check_function_exists(starpu_data_idle_prefetch_on_node HAVE_STARPU_IDLE_PREFETCH) + #if ( HAVE_STARPU_IDLE_PREFETCH ) + #message("-- ${Blue}Add definition HAVE_STARPU_IDLE_PREFETCH${ColourReset}") + #endif() + #check_function_exists(starpu_iteration_push HAVE_STARPU_ITERATION_PUSH) + #if ( HAVE_STARPU_ITERATION_PUSH ) + #message("-- ${Blue}Add definition HAVE_STARPU_ITERATION_PUSH${ColourReset}") + #endif() + #check_function_exists(starpu_data_wont_use HAVE_STARPU_DATA_WONT_USE) + #if ( HAVE_STARPU_DATA_WONT_USE ) + #message("-- ${Blue}Add definition HAVE_STARPU_DATA_WONT_USE${ColourReset}") + #endif() + #check_function_exists(starpu_data_set_coordinates HAVE_STARPU_DATA_SET_COORDINATES) + #if ( HAVE_STARPU_DATA_SET_COORDINATES ) + #message("-- ${Blue}Add definition HAVE_STARPU_DATA_SET_COORDINATES${ColourReset}") + #endif() + #check_function_exists(starpu_malloc_on_node_set_default_flags HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS) + #if ( HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS ) + #message("-- ${Blue}Add definition HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS${ColourReset}") + #endif() + #check_function_exists(starpu_mpi_data_migrate HAVE_STARPU_MPI_DATA_MIGRATE) + #if ( HAVE_STARPU_MPI_DATA_MIGRATE ) + #message("-- ${Blue}Add definition HAVE_STARPU_MPI_DATA_MIGRATE${ColourReset}") + #elseif(CHAMELEON_USE_MIGRATE) + #set(CHAMELEON_USE_MIGRATE "OFF") + #message("-- ${Blue}CHAMELEON_USE_MIGRATE is turned OFF because starpu_mpi_data_migrate not found${ColourReset}") + #endif() + else ( OPENMP_FOUND ) + #if(MORSE_VERBOSE_FIND_PACKAGE) + #if(NOT HWLOC_FOUND OR NOT HWLOC_LIBRARIES) + #if (NOT HWLOC_hwloc.h_DIRS) + #Print_Find_Header_Status(hwloc hwloc.h) + #endif () + #if (NOT HWLOC_hwloc_LIBRARY) + #Print_Find_Library_Status(hwloc libhwloc) + #endif () + #endif() + #if(CHAMELEON_ENABLE_TRACING AND (NOT FXT_FOUND OR NOT FXT_LIBRARIES)) + #if (NOT FXT_fxt.h_DIRS) + #Print_Find_Header_Status(fxt fxt.h) + #endif () + #if (NOT FXT_fxt_LIBRARY) + #Print_Find_Library_Status(fxt libfxt) + #endif () + #endif() + #if(CHAMELEON_SIMULATION AND (NOT SIMGRID_FOUND OR NOT SIMGRID_LIBRARIES)) + #if (NOT SIMGRID_simgrid.h_DIRS) + #Print_Find_Header_Status(simgrid simgrid.h) + #endif () + #if (NOT SIMGRID_simgrid_LIBRARY) + #Print_Find_Library_Status(simgrid libsimgrid) + #endif () + #endif() + #if( (NOT STARPU_SHM_FOUND) OR (NOT STARPU_SHM_LIBRARIES) OR + #( STARPU_LOOK_FOR_MPI AND (NOT STARPU_MPI_FOUND OR NOT STARPU_MPI_LIBRARIES) ) + #) + #foreach(starpu_hdr ${STARPU_hdrs_to_find}) + #if (NOT STARPU_${starpu_hdr}_INCLUDE_DIRS) + #Print_Find_Header_Status(starpu ${starpu_hdr}) + #endif () + #endforeach() + #if(STARPU_VERSION_STRING) + #foreach(starpu_lib ${STARPU_libs_to_find}) + #if (NOT STARPU_${starpu_lib}_LIBRARY) + #Print_Find_Library_Status(starpu ${starpu_lib}) + #endif () + #endforeach() + #endif () + #endif () + #else(MORSE_VERBOSE_FIND_PACKAGE) + #message(WARNING "StarPU library has not been found and MORSE_VERBOSE_FIND_PACKAGE is set to OFF." + #" Try to activate MORSE_VERBOSE_FIND_PACKAGE option (-DMORSE_VERBOSE_FIND_PACKAGE=ON) to get some hints for the detection") + #endif(MORSE_VERBOSE_FIND_PACKAGE) + #if(NOT HWLOC_FOUND OR NOT HWLOC_LIBRARIES) + #message(FATAL_ERROR "hwloc library is required but has not been found") + #endif() + #if(CHAMELEON_SIMULATION AND (NOT SIMGRID_FOUND OR NOT SIMGRID_LIBRARIES)) + #message(FATAL_ERROR "SimGrid library is required but has not been found") + #endif() + #if(CHAMELEON_ENABLE_TRACING AND (NOT FXT_FOUND OR NOT FXT_LIBRARIES)) + #message(FATAL_ERROR "FxT library is required but has not been found") + #endif() + #if( (NOT STARPU_SHM_FOUND) OR (NOT STARPU_SHM_LIBRARIES) OR + #( STARPU_LOOK_FOR_MPI AND (NOT STARPU_MPI_FOUND OR NOT STARPU_MPI_LIBRARIES) ) + #) + #message(FATAL_ERROR "StarPU library is required but has not been found") + #endif() + endif ( OPENMP_FOUND ) + +endif( CHAMELEON_SCHED_OPENMP ) + # getopt check_include_files(getopt.h CHAMELEON_HAVE_GETOPT_H) if (CHAMELEON_HAVE_GETOPT_H) @@ -937,6 +1068,9 @@ endif() if (CHAMELEON_SCHED_STARPU) list(APPEND CHAMELEON_LIBRARIES chameleon_starpu) endif() +if (CHAMELEON_SCHED_OPENMP) + list(APPEND CHAMELEON_LIBRARIES chameleon_openmp) +endif() list(APPEND CHAMELEON_LIBRARIES hqr) set(CHAMELEON_LIBRARIES_DEP ${CHAMELEON_LIBRARIES} ${CHAMELEON_DEP}) diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index e4ebc3691cf2e2642a2ed87e85dda0d2d2383f5a..ffb631f153b6cdce0c6a40c23ed4a080673bd84c 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -296,6 +296,8 @@ elseif(CHAMELEON_SCHED_PARSEC) target_link_libraries(chameleon chameleon_parsec) elseif(CHAMELEON_SCHED_QUARK) target_link_libraries(chameleon chameleon_quark) +elseif(CHAMELEON_SCHED_OPENMP) + target_link_libraries(chameleon chameleon_openmp) endif() if (NOT CHAMELEON_SIMULATION) # Depends on coreblas only for set_coreblas_gemm3m_enabled() (Maybe we should change that) diff --git a/compute/zpotrf.c b/compute/zpotrf.c index bb84853375351a679a23519f2d6624f9fb434139..f9ff8878fb6a8faf90b0a5f38ca8f65eadc49fbf 100644 --- a/compute/zpotrf.c +++ b/compute/zpotrf.c @@ -23,6 +23,7 @@ * */ #include "control/common.h" +#include <stdio.h> /** ******************************************************************************** diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index 5edc1093ce3b70c6db269090fb99e85741021ae8..88abfe51c42e47788d422b55e7e6b8b62f4968c4 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -27,6 +27,7 @@ #cmakedefine CHAMELEON_SCHED_QUARK #cmakedefine CHAMELEON_SCHED_PARSEC #cmakedefine CHAMELEON_SCHED_STARPU +#cmakedefine CHAMELEON_SCHED_OPENMP /* Communication engine */ #cmakedefine CHAMELEON_USE_MPI diff --git a/include/chameleon/runtime_struct.h b/include/chameleon/runtime_struct.h index 70df9e09d3a2dd3d7e2220715c6bc5cbfa5ba3fa..9d34949736e4f6cf811afed2edf91c1079d84b50 100644 --- a/include/chameleon/runtime_struct.h +++ b/include/chameleon/runtime_struct.h @@ -30,6 +30,7 @@ typedef enum runtime_id_e { RUNTIME_SCHED_QUARK, /**< Quark runtime */ RUNTIME_SCHED_PARSEC, /**< PaRSEC runtime */ RUNTIME_SCHED_STARPU, /**< StarPU runtime */ + RUNTIME_SCHED_OPENMP, /**< OpenMP runtime */ } RUNTIME_id_t; /** diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 73503ee0ce89aff056eb5971f4904bdbe0787315..cabe559c252719de9501e0f95093de147fa9ec18 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -109,6 +109,8 @@ elseif( CHAMELEON_SCHED_PARSEC ) add_subdirectory(parsec) elseif( CHAMELEON_SCHED_STARPU ) add_subdirectory(starpu) +elseif( CHAMELEON_SCHED_OPENMP ) + add_subdirectory(openmp) endif() ### diff --git a/runtime/openmp/CMakeLists.txt b/runtime/openmp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d072f05be4c2018e8693a4967951a2fd2afa1ada --- /dev/null +++ b/runtime/openmp/CMakeLists.txt @@ -0,0 +1,153 @@ +### +# +# @file CMakeLists.txt +# +# @copyright 2009-2015 The University of Tennessee and The University of +# Tennessee Research Foundation. All rights reserved. +# @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# +### +# +# @project CHAMELEON +# CHAMELEON is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Cedric Castagnede +# @author Emmanuel Agullo +# @author Mathieu Faverge +# @author Florent Pruvost +# @date 2012-07-13 +# +### +cmake_minimum_required(VERSION 2.8) + +include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/include ) +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/include ) + +# Generate headers for all possible precisions +# -------------------------------------------- +#set(RUNTIME_HDRS_GENERATED "") +#set(ZHDR + #include/runtime_codelet_z.h + #) + +#precisions_rules_py(RUNTIME_HDRS_GENERATED "${ZHDR}" + #PRECISIONS "s;d;c;z;ds;zc" + #TARGETDIR "include") + +# Define the list of headers +# -------------------------- +set(RUNTIME_HDRS + include/chameleon_openmp.h + #include/runtime_codelet_profile.h + #include/runtime_codelets.h + #include/runtime_profiling.h + #include/runtime_workspace.h + ) + +# Add generated headers +# --------------------- +#foreach( hdr_file ${RUNTIME_HDRS_GENERATED} ) + #list(APPEND RUNTIME_HDRS ${CMAKE_CURRENT_BINARY_DIR}/${hdr_file}) +#endforeach() + +# Force generation of headers +# --------------------------- +add_custom_target( + runtime_openmp_include + ALL SOURCES ${RUNTIME_HDRS}) + +# Installation +# ------------ +install( + FILES ${RUNTIME_HDRS} + DESTINATION include/runtime/openmp ) + +# Generate the Chameleon common for all possible precisions +# --------------------------------------------------------- +# FIXME: restore this generation once file is done +#set(RUNTIME_COMMON_GENERATED "") +#set(ZSRC + #control/runtime_zprofiling.c + #control/runtime_zlocality.c + #) + +#precisions_rules_py( + #RUNTIME_COMMON_GENERATED "${ZSRC}" + #PRECISIONS "${CHAMELEON_PRECISION}" + #TARGETDIR "control") + +set(RUNTIME_COMMON + control/runtime_async.c + control/runtime_context.c + control/runtime_control.c + control/runtime_descriptor.c + control/runtime_options.c + control/runtime_profiling.c + #control/runtime_workspace.c + ${RUNTIME_COMMON_GENERATED} + ) + +#set(flags_to_add "") +#foreach(_prec ${CHAMELEON_PRECISION}) + #set(flags_to_add "${flags_to_add} -DPRECISION_${_prec}") +#endforeach() +#set_source_files_properties(control/runtime_profiling.c PROPERTIES COMPILE_FLAGS "${flags_to_add}") + +# Generate the Chameleon sources for all possible precisions +# ---------------------------------------------------------- +set(RUNTIME_SRCS_GENERATED "") +set(ZSRC + #codelets/codelet_zcallback.c + ${CODELETS_ZSRC} + ) + +precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}" + PRECISIONS "${CHAMELEON_PRECISION}" + TARGETDIR "codelets") + +set(RUNTIME_SRCS + ${RUNTIME_COMMON} + ${RUNTIME_SRCS_GENERATED} + ) + +# Force generation of sources +# --------------------------- +add_custom_target(openmp_sources ALL SOURCES ${RUNTIME_SRCS}) +set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};runtime_openmp_include;openmp_sources" CACHE INTERNAL "List of targets of sources") + +# Add library +# ----------- +add_library(chameleon_openmp ${RUNTIME_SRCS}) +set_property(TARGET chameleon_openmp PROPERTY LINKER_LANGUAGE Fortran) +set_property(TARGET chameleon_openmp PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") + +target_link_libraries(chameleon_openmp + ${OPENMP_LIBRARIES_DEP}) +target_link_libraries(chameleon_openmp + coreblas) + +add_dependencies(chameleon_openmp + chameleon_include + control_include + runtime_openmp_include + openmp_sources + ) + +add_dependencies(chameleon_openmp coreblas_include) + +# installation +# ------------ +install(TARGETS chameleon_openmp + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + +### +### END CMakeLists.txt +### diff --git a/runtime/openmp/codelets/codelet_zasum.c b/runtime/openmp/codelets/codelet_zasum.c new file mode 100644 index 0000000000000000000000000000000000000000..f48d54c79fa9ddd132454533b543d4b33dc7e628 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zasum.c @@ -0,0 +1,38 @@ +/** + * + * @file codelet_zasum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zasum OpenMP codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Florent Pruvost + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "coreblas/coreblas_z.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_dzasum(const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ); + double *ptrB = RTBLKADDR( B, double, Bm, Bn ); +#pragma omp task firstprivate(storev, uplo, M, N, lda, ptrA, ptrB) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_dzasum(storev, uplo, M, N, ptrA, lda, ptrB); +} + + diff --git a/runtime/openmp/codelets/codelet_zaxpy.c b/runtime/openmp/codelets/codelet_zaxpy.c new file mode 100644 index 0000000000000000000000000000000000000000..a14f7c3e1378f1b5e429a3c1a067ec4054764076 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zaxpy.c @@ -0,0 +1,35 @@ +/** + * + * @file codelet_zaxpy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zaxpy StarPU codelet + * + * @version 1.0.0 + * @author Florent Pruvost + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, + int M, CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int incA, + const CHAM_desc_t *B, int Bm, int Bn, int incB) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(M, alpha, incA, incB, ptrA, ptrB) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zaxpy(M, alpha, ptrA, incA, ptrB, incB); +} + diff --git a/runtime/openmp/codelets/codelet_zbuild.c b/runtime/openmp/codelets/codelet_zbuild.c new file mode 100644 index 0000000000000000000000000000000000000000..2ac4aae9d80e6eacf1504b1ab2bfc8e6f8382eff --- /dev/null +++ b/runtime/openmp/codelets/codelet_zbuild.c @@ -0,0 +1,47 @@ +/** + * + * @file codelet_zbuild.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zbuild StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Guillaume Sylvand + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zbuild( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An, int lda, + void *user_data, void* user_build_callback ) +{ + int row_min, row_max, col_min, col_max; + row_min = Am*A->mb ; + row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ; + col_min = An*A->nb ; + col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + void (*callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; + callback = user_build_callback; + +#pragma omp task firstprivate(row_min, row_max, col_min, col_max, ptrA, lda, user_data) depend(inout:ptrA[0:Am*An]) + callback(row_min, row_max, col_min, col_max, ptrA, lda, user_data); +} diff --git a/runtime/openmp/codelets/codelet_zgeadd.c b/runtime/openmp/codelets/codelet_zgeadd.c new file mode 100644 index 0000000000000000000000000000000000000000..316170fc7db9de9ad944669594b68ead69261c17 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgeadd.c @@ -0,0 +1,92 @@ +/** + * + * @file codelet_zgeadd.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeadd StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') + * + ******************************************************************************* + * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = ChamNoTrans: op(A) = A + * = ChamTrans: op(A) = A' + * = ChamConjTrans: op(A) = conj(A') + * + * @param[in] M + * Number of rows of the matrices op(A) and B. + * + * @param[in] N + * Number of columns of the matrices op(A) and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M + * otherwise. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = ChamNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. + * + * @param[in,out] B + * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B + * + * @param[in] LDB + * Leading dimension of the array B. LDB >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ +void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(trans, m, n, alpha, beta, lda, ldb, ptrA, ptrB) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zgeadd(trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c new file mode 100644 index 0000000000000000000000000000000000000000..8c69936b7b52e65b6dfb2a54d3e2279c36e29f27 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgelqt.c @@ -0,0 +1,104 @@ +/** + * + * @file codelet_zgelqt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgelqt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q. + * + * The tile Q is represented as a product of elementary reflectors + * + * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). + * + * Each H(i) has the form + * + * H(i) = I - tau * v * v' + * + * where tau is a complex scalar, and v is a complex vector with + * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + * A(i,i+1:n), and tau in TAU(i). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, the elements on and below the diagonal of the array + * contain the M-by-min(M,N) lower trapezoidal tile L (L is + * lower triangular if M <= N); the elements above the diagonal, + * with the array TAU, represent the unitary tile Q as a + * product of elementary reflectors (see Further Details). + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *TAU = options->ws_worker; + CHAMELEON_Complex64_t *work = options->ws_host; +#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt, work, TAU) depend(inout:ptrA[0:Am*An]) depend(inout:ptrT[0:Tm*Tn]) + CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); +} diff --git a/runtime/openmp/codelets/codelet_zgemm.c b/runtime/openmp/codelets/codelet_zgemm.c new file mode 100644 index 0000000000000000000000000000000000000000..8cbfa36f004f1d580ca59c0afc2a30e2701c7f3a --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgemm.c @@ -0,0 +1,52 @@ +/** + * + * @file codelet_zgemm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgemm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zgemm(const RUNTIME_option_t *options, + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zgemm(transA, transB, + m, n, k, + alpha, ptrA, lda, + ptrB, ldb, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..0337e0de946f5323c665d50bef0529b3d89eeceb --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgeqrt.c @@ -0,0 +1,105 @@ +/** + * + * @file codelet_zgeqrt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeqrt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgeqrt computes a QR factorization of a complex M-by-N tile A: + * A = Q * R. + * + * The tile Q is represented as a product of elementary reflectors + * + * Q = H(1) H(2) . . . H(k), where k = min(M,N). + * + * Each H(i) has the form + * + * H(i) = I - tau * v * v' + * + * where tau is a complex scalar, and v is a complex vector with + * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + * and tau in TAU(i). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, the elements on and above the diagonal of the array + * contain the min(M,N)-by-N upper trapezoidal tile R (R is + * upper triangular if M >= N); the elements below the diagonal, + * with the array TAU, represent the unitary tile Q as a + * product of elementary reflectors (see Further Details). + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *TAU = options->ws_worker; + CHAMELEON_Complex64_t *work = options->ws_host; +#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt, work, TAU) depend(inout:ptrA[0:Am*An]) depend(inout:ptrT[0:Tm*Tn]) + CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); +} diff --git a/runtime/openmp/codelets/codelet_zgessm.c b/runtime/openmp/codelets/codelet_zgessm.c new file mode 100644 index 0000000000000000000000000000000000000000..cfe3a6de0ed67a612a2df7566c0cfbc0d6ed7180 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgessm.c @@ -0,0 +1,88 @@ +/** + * + * @file codelet_zgessm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgessm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgessm applies the factors L computed by CORE_zgetrf_incpiv to + * a complex M-by-N tile A. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] K + * The number of columns of the tile L. K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] IPIV + * The pivot indices array of size K as returned by + * CORE_zgetrf_incpiv. + * + * @param[in] L + * The M-by-K lower triangular tile. + * + * @param[in] LDL + * The leading dimension of the array L. LDL >= max(1,M). + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, updated by the application of L. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * + */ + +void INSERT_TASK_zgessm(const RUNTIME_option_t *options, + int m, int n, int k, int ib, int nb, + int *IPIV, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + const CHAM_desc_t *D, int Dm, int Dn, int ldd, + const CHAM_desc_t *A, int Am, int An, int lda) +{ + CHAMELEON_Complex64_t *ptrD = RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn); + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0:Dm*Dn]) depend(inout:ptrA[0:Am*An]) + CORE_zgessm(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_zgessq.c b/runtime/openmp/codelets/codelet_zgessq.c new file mode 100644 index 0000000000000000000000000000000000000000..b6c55d8116ea677247900a35ba11a209fb76af94 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgessq.c @@ -0,0 +1,36 @@ +/** + * + * @file codelet_zgessq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgessq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zgessq( const RUNTIME_option_t *options, + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrScaleSum) depend(in:ptrA[0:Am*An]) depend(inout:ptrScaleSum[0:SCALESUMSQm*SCALESUMSQn]) + CORE_zgessq( m, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] ); +} diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c new file mode 100644 index 0000000000000000000000000000000000000000..4bb4173ac113566a90c8702cb1d08002142ec0e9 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf.c @@ -0,0 +1,39 @@ +/** + * + * @file codelet_zgetrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zgetrf(const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int *IPIV, + cham_bool_t check_info, int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int info = 0; +#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An]) + CORE_zgetrf( m, n, ptrA, lda, IPIV, &info ); +} diff --git a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c new file mode 100644 index 0000000000000000000000000000000000000000..4c5e6356c7b5a72652f0a5d9300c3575463d87d8 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c @@ -0,0 +1,97 @@ +/** + * + * @file codelet_zgetrf_incpiv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_incpiv StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgetrf_incpiv computes an LU factorization of a general M-by-N tile A + * using partial pivoting with row interchanges. + * + * The factorization has the form + * + * A = P * L * U + * + * where P is a permutation matrix, L is lower triangular with unit + * diagonal elements (lower trapezoidal if m > n), and U is upper + * triangular (upper trapezoidal if m < n). + * + * This is the right-looking Level 2.5 BLAS version of the algorithm. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A + * On entry, the M-by-N tile to be factored. + * On exit, the factors L and U from the factorization + * A = P*L*U; the unit diagonal elements of L are not stored. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] IPIV + * The pivot indices; for 1 <= i <= min(M,N), row i of the + * tile was interchanged with row IPIV(i). + * + * @param[out] INFO + * See returned value. + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * has been completed, but the factor U is exactly + * singular, and division by zero will occur if it is used + * to solve a system of equations. + * + */ + +void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int info = 0; +#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An]) + CORE_zgetrf_incpiv(m, n, ib, ptrA, lda, IPIV, &info); +} diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c new file mode 100644 index 0000000000000000000000000000000000000000..8ae2cad43a8b56dd055157089268e9510758dd68 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c @@ -0,0 +1,85 @@ +/** + * + * @file codelet_zgetrf_nopiv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_nopiv StarPU codelet + * + * @version 1.0.0 + * @author Omar Zenati + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgetrf_nopiv computes an LU factorization of a general diagonal + * dominant M-by-N matrix A witout pivoting. + * + * The factorization has the form + * A = L * U + * where L is lower triangular with unit + * diagonal elements (lower trapezoidal if m > n), and U is upper + * triangular (upper trapezoidal if m < n). + * + * This is the right-looking Level 3 BLAS version of the algorithm. + * WARNING: Your matrix need to be diagonal dominant if you want to call this + * routine safely. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] IB + * The block size to switch between blocked and unblocked code. + * + * @param[in,out] A + * On entry, the M-by-N matrix to be factored. + * On exit, the factors L and U from the factorization + * A = P*L*U; the unit diagonal elements of L are not stored. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * has been completed, but the factor U is exactly + * singular, and division by zero will occur if it is used + * to solve a system of equations. + * + */ + +void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int info = 0; +#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0:Am*An]) + CORE_zgetrf_nopiv(m, n, ib, ptrA, lda, &info); +} diff --git a/runtime/openmp/codelets/codelet_zhe2ge.c b/runtime/openmp/codelets/codelet_zhe2ge.c new file mode 100644 index 0000000000000000000000000000000000000000..ad223c24cd36ed12577e3c001dc48427ae41368b --- /dev/null +++ b/runtime/openmp/codelets/codelet_zhe2ge.c @@ -0,0 +1,37 @@ +/** + * + * @file codelet_zhe2ge.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhe2ge StarPU codelet + * + * @version 1.0.0 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ); +#pragma omp task firstprivate(uplo, m, n, ptrA, lda, ptrB, ldb) depend(in: ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zhe2ge(uplo, m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zhemm.c b/runtime/openmp/codelets/codelet_zhemm.c new file mode 100644 index 0000000000000000000000000000000000000000..3f1bad6db3a137bbf9e94657ab1f28d1995902af --- /dev/null +++ b/runtime/openmp/codelets/codelet_zhemm.c @@ -0,0 +1,52 @@ +/** + * + * @file codelet_zhemm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhemm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zhemm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zhemm(side, uplo, + m, n, + alpha, ptrA, lda, + ptrB, ldb, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zher2k.c b/runtime/openmp/codelets/codelet_zher2k.c new file mode 100644 index 0000000000000000000000000000000000000000..78fd8cad0e54f118736943ba83a8df57e00ee710 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zher2k.c @@ -0,0 +1,49 @@ +/** + * + * @file codelet_zher2k.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zher2k StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zher2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zher2k(uplo, trans, + n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zherfb.c b/runtime/openmp/codelets/codelet_zherfb.c new file mode 100644 index 0000000000000000000000000000000000000000..1531406b22a42ba5beb0927f56f7581864bde586 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zherfb.c @@ -0,0 +1,41 @@ +/** + * + * @file codelet_zherfb.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zherfb StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zherfb(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); + CHAMELEON_Complex64_t *work = options->ws_host; +#pragma omp task firstprivate(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, work) depend(in:ptrA[0:Am*An], ptrT[0:Tm*Tn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zherfb(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); +} diff --git a/runtime/openmp/codelets/codelet_zherk.c b/runtime/openmp/codelets/codelet_zherk.c new file mode 100644 index 0000000000000000000000000000000000000000..7604e60b20d71bcb65d126f7e6480134ab4dbe60 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zherk.c @@ -0,0 +1,49 @@ +/** + * + * @file codelet_zherk.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zherk StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zherk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, int lda, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, beta, ptrC, ldc) depend(in:ptrA[0:Am*An]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zherk(uplo, trans, + n, k, + alpha, ptrA, lda, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zhessq.c b/runtime/openmp/codelets/codelet_zhessq.c new file mode 100644 index 0000000000000000000000000000000000000000..5d0fc75e21b0a596fd31bedb0c824155d2e2562f --- /dev/null +++ b/runtime/openmp/codelets/codelet_zhessq.c @@ -0,0 +1,36 @@ +/** + * + * @file codelet_zhessq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhessq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zhessq( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0:SCALESUMSQm*SCALESUMSQn]) depend(inout:ptrA[0:Am*An]) + CORE_zhessq( uplo, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] ); +} diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c new file mode 100644 index 0000000000000000000000000000000000000000..a6ab833afd2b85e392cf6299e6fdd8e1148d8d14 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlacpy.c @@ -0,0 +1,55 @@ +/** + * + * @file codelet_zlacpy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlacpy StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A + displA, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B + displB, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(uplo, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zlacpy(uplo, m, n, ptrA, lda, ptrB, ldb); +} + +void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + INSERT_TASK_zlacpyx( options, uplo, m, n, nb, + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); +} diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c new file mode 100644 index 0000000000000000000000000000000000000000..56b50f613da0219433ae77be8d143eccea2129cb --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlag2c.c @@ -0,0 +1,43 @@ +/** + * + * @file codelet_zlag2c.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlag2c StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions mixed zc -> ds + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zlag2c( m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zlange.c b/runtime/openmp/codelets/codelet_zlange.c new file mode 100644 index 0000000000000000000000000000000000000000..1358fc57d2216dc6cf44a9adb27357b23c711499 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlange.c @@ -0,0 +1,50 @@ +/** + * + * @file codelet_zlange.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlange StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlange(const RUNTIME_option_t *options, + cham_normtype_t norm, int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); + double *work = options->ws_worker; +#pragma omp task firstprivate(M, N, ptrA, LDA, ptrB, options, work) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zlange( norm, M, N, ptrA, LDA, work, ptrB); +} + +void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn) +{ + double *ptrA = RTBLKADDR(A, double, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); + + if ( *ptrA > *ptrB ) + *ptrB = *ptrA; +} diff --git a/runtime/openmp/codelets/codelet_zlanhe.c b/runtime/openmp/codelets/codelet_zlanhe.c new file mode 100644 index 0000000000000000000000000000000000000000..f77acaad28dffa0abecc17a0f9308d009c0fea36 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlanhe.c @@ -0,0 +1,39 @@ +/** + * + * @file codelet_zlanhe.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlanhe StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *work = options->ws_worker; + double *normA = RTBLKADDR(B, double, Bm, Bn); +#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, work, normA) depend(in:ptrA[0:Am*An]) depend(inout:normA[0:Bm*Bn]) + CORE_zlanhe( norm, uplo, N, ptrA, LDA, work, normA); +} diff --git a/runtime/openmp/codelets/codelet_zlansy.c b/runtime/openmp/codelets/codelet_zlansy.c new file mode 100644 index 0000000000000000000000000000000000000000..c3dd736faac2e267f1703ea63e29633d45dd5b02 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlansy.c @@ -0,0 +1,39 @@ +/** + * + * @file codelet_zlansy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlansy StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlansy(const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *work = options->ws_worker; + double *normA = RTBLKADDR(B, double, Bm, Bn); +#pragma omp task firstprivate(norm, uplo, N, ptrA, LDA, work, normA) depend(in:ptrA[0:Am*An]) depend(inout:normA[0:Bm*Bn]) + CORE_zlansy( norm, uplo, N, ptrA, LDA, work, normA); +} diff --git a/runtime/openmp/codelets/codelet_zlantr.c b/runtime/openmp/codelets/codelet_zlantr.c new file mode 100644 index 0000000000000000000000000000000000000000..08db23b539612f9c4d2c70c9a6b1d1564731a680 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlantr.c @@ -0,0 +1,38 @@ +/** + * + * @file codelet_zlantr.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlantr StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlantr(const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, + int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); + double *work = options->ws_host; +#pragma omp task firstprivate(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_zlantr(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB); +} diff --git a/runtime/openmp/codelets/codelet_zlascal.c b/runtime/openmp/codelets/codelet_zlascal.c new file mode 100644 index 0000000000000000000000000000000000000000..79677fae8449a7d256004084f5930e58007c34ba --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlascal.c @@ -0,0 +1,69 @@ +/** + * + * @file codelet_zlascal.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlascal StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Dalal Sukkari + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlascal adds to matrices together. + * + * A <- alpha * A + * + ******************************************************************************* + * + * @param[in] M + * Number of rows of the matrices A and B. + * + * @param[in] N + * Number of columns of the matrices A and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zlascal(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int lda) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0:Am*An]) + CORE_zlascal(uplo, m, n, alpha, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_zlaset.c b/runtime/openmp/codelets/codelet_zlaset.c new file mode 100644 index 0000000000000000000000000000000000000000..1dbc2e48f25945190ea31fe5c013ca59052a8802 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlaset.c @@ -0,0 +1,74 @@ +/** + * + * @file codelet_zlaset.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlaset StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlaset - Sets the elements of the matrix A on the diagonal + * to beta and on the off-diagonals to alpha + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies which elements of the matrix are to be set + * = ChamUpper: Upper part of A is set; + * = ChamLower: Lower part of A is set; + * = ChamUpperLower: ALL elements of A are set. + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] alpha + * The constant to which the off-diagonal elements are to be set. + * + * @param[in] beta + * The constant to which the diagonal elements are to be set. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, A has been set accordingly. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + */ +void INSERT_TASK_zlaset(const RUNTIME_option_t *options, + cham_uplo_t uplo, int M, int N, + CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, + const CHAM_desc_t *A, int Am, int An, int LDA) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, M, N, alpha, beta, ptrA, LDA) depend(inout:ptrA[0:Am*An]) + CORE_zlaset(uplo, M, N, alpha, beta, ptrA, LDA); +} diff --git a/runtime/openmp/codelets/codelet_zlaset2.c b/runtime/openmp/codelets/codelet_zlaset2.c new file mode 100644 index 0000000000000000000000000000000000000000..cc1a0b5cc2c9479edfd31d1bb1541de5ccfeb7ea --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlaset2.c @@ -0,0 +1,72 @@ +/** + * + * @file codelet_zlaset2.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlaset2 StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlaset2 - Sets the elements of the matrix A to alpha. + * Not LAPACK compliant! Read below. + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies which elements of the matrix are to be set + * = ChamUpper: STRICT Upper part of A is set to alpha; + * = ChamLower: STRICT Lower part of A is set to alpha; + * = ChamUpperLower: ALL elements of A are set to alpha. + * Not LAPACK Compliant. + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] alpha + * The constant to which the elements are to be set. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, A has been set to alpha accordingly. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + */ +void INSERT_TASK_zlaset2(const RUNTIME_option_t *options, + cham_uplo_t uplo, int M, int N, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int LDA) +{ + + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, M, N, alpha, ptrA, LDA) depend(inout:ptrA[0:Am*An]) + CORE_zlaset2(uplo, M, N, alpha, ptrA, LDA); +} diff --git a/runtime/openmp/codelets/codelet_zlatro.c b/runtime/openmp/codelets/codelet_zlatro.c new file mode 100644 index 0000000000000000000000000000000000000000..35a244477a9eb18d68d91042e16b88264d9019a0 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlatro.c @@ -0,0 +1,46 @@ +/** + * + * @file codelet_zlatro.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlatro StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlatro(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zlatro(uplo, trans, m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zlauum.c b/runtime/openmp/codelets/codelet_zlauum.c new file mode 100644 index 0000000000000000000000000000000000000000..ef80073b7de4d6fd385e045b7757c53f801c2081 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlauum.c @@ -0,0 +1,43 @@ +/** + * + * @file codelet_zlauum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlauum StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlauum(const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) + CORE_zlauum(uplo, n, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_zplghe.c b/runtime/openmp/codelets/codelet_zplghe.c new file mode 100644 index 0000000000000000000000000000000000000000..eef7cc35a208595b644c21864e504696c7f94d32 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplghe.c @@ -0,0 +1,40 @@ +/** + * + * @file codelet_zplghe.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplghe StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/* INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ + +void INSERT_TASK_zplghe( const RUNTIME_option_t *options, + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An]) + CORE_zplghe( bump, m, n, ptrA, lda, bigM, m0, n0, seed ); +} diff --git a/runtime/openmp/codelets/codelet_zplgsy.c b/runtime/openmp/codelets/codelet_zplgsy.c new file mode 100644 index 0000000000000000000000000000000000000000..83f9b4419c40fbb0d95adccc502aa7dce88495c4 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplgsy.c @@ -0,0 +1,40 @@ +/** + * + * @file codelet_zplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplgsy StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/* INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ + +void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An]) + CORE_zplgsy( bump, m, n, ptrA, lda, bigM, m0, n0, seed ); +} diff --git a/runtime/openmp/codelets/codelet_zplrnt.c b/runtime/openmp/codelets/codelet_zplrnt.c new file mode 100644 index 0000000000000000000000000000000000000000..fde7a8d2c7183495175b7fd1a1ed7b55ea087b77 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplrnt.c @@ -0,0 +1,40 @@ +/** + * + * @file codelet_zplrnt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplrnt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/* INSERT_TASK_zplrnt - Generate a tile for random matrix. */ + +void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, + int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An]) + CORE_zplrnt( m, n, ptrA, lda, bigM, m0, n0, seed ); +} diff --git a/runtime/openmp/codelets/codelet_zplssq.c b/runtime/openmp/codelets/codelet_zplssq.c new file mode 100644 index 0000000000000000000000000000000000000000..0a4fd2b42a0428decc6966a2440770365a6b5a05 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplssq.c @@ -0,0 +1,80 @@ +/** + * + * @file codelet_zplssq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplssq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include <math.h> +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * + * with scl and ssq such that + * + * ( scl**2 )*ssq = sum( A( 2*i )**2 * A( 2*i+1 ) ) + * i + * + * The values of A(2*i+1) are assumed to be at least unity. + * The values of A(2*i) are assumed to be non-negative and scl is + * + * scl = max( A( 2*i ) ), + * i + * + * The routine makes only one pass through the matrix A. + * + ******************************************************************************* + * + * @param[in] M + * The number of couple (scale, sumsq) in the matrix A. + * + * @param[in] A + * The 2-by-M matrix. + * + * @param[out] result + * On exit, result contains scl * sqrt( ssq ) + * + */ +void INSERT_TASK_zplssq( const RUNTIME_option_t *options, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) +{ + double *scl = RTBLKADDR(SCLSSQ, double, SCLSSQm, SCLSSQn); + double *scalesum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); + + if( scl[0] < scalesum[0] ) { + scl[1] = scalesum[1] + (scl[1] * (( scl[0] / scalesum[0] ) * ( scl[0] / scalesum[0] ))); + scl[0] = scalesum[0]; + } else { + scl[1] = scl[1] + (scalesum[1] * (( scalesum[0] / scl[0] ) * ( scalesum[0] / scl[0] ))); + } +} + +void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) +{ + CHAMELEON_Complex64_t *res = RTBLKADDR(RESULT, CHAMELEON_Complex64_t, RESULTm, RESULTn); + + res[0] = res[0] * sqrt( res[1] ); +} diff --git a/runtime/openmp/codelets/codelet_zpotrf.c b/runtime/openmp/codelets/codelet_zpotrf.c new file mode 100644 index 0000000000000000000000000000000000000000..42ff02395c9cc56e97c96ed7c5b230dc0b0f5e5f --- /dev/null +++ b/runtime/openmp/codelets/codelet_zpotrf.c @@ -0,0 +1,47 @@ +/** + * + * @file codelet_zpotrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zpotrf StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + (void)nb; + int info = 0; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, n, lda, info, ptrA) depend(inout:ptrA[0:Am*An]) + CORE_zpotrf(uplo, n, ptrA, lda, &info); +} diff --git a/runtime/openmp/codelets/codelet_zssssm.c b/runtime/openmp/codelets/codelet_zssssm.c new file mode 100644 index 0000000000000000000000000000000000000000..26e92081c41d2f804f40fcc265ce55f2ad6113f8 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zssssm.c @@ -0,0 +1,118 @@ +/** + * + * @file codelet_zssssm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zssssm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zssssm applies the LU factorization update from a complex + * matrix formed by a lower triangular IB-by-K tile L1 on top of a + * M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1 + * tile A1 on top of a M2-by-N2 tile A2 (N1 == N2). + * + * This is the right-looking Level 2.5 BLAS version of the algorithm. + * + ******************************************************************************* + * + * @param[in] M1 + * The number of rows of the tile A1. M1 >= 0. + * + * @param[in] N1 + * The number of columns of the tile A1. N1 >= 0. + * + * @param[in] M2 + * The number of rows of the tile A2 and of the tile L2. + * M2 >= 0. + * + * @param[in] N2 + * The number of columns of the tile A2. N2 >= 0. + * + * @param[in] K + * The number of columns of the tiles L1 and L2. K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M1-by-N1 tile A1. + * On exit, A1 is updated by the application of L (L1 L2). + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,M1). + * + * @param[in,out] A2 + * On entry, the M2-by-N2 tile A2. + * On exit, A2 is updated by the application of L (L1 L2). + * + * @param[in] LDA2 + * The leading dimension of the array A2. LDA2 >= max(1,M2). + * + * @param[in] L1 + * The IB-by-K lower triangular tile as returned by + * CORE_ztstrf. + * + * @param[in] LDL1 + * The leading dimension of the array L1. LDL1 >= max(1,IB). + * + * @param[in] L2 + * The M2-by-K tile as returned by CORE_ztstrf. + * + * @param[in] LDL2 + * The leading dimension of the array L2. LDL2 >= max(1,M2). + * + * @param[in] IPIV + * The pivot indices array of size K as returned by + * CORE_ztstrf. + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * + */ + +void INSERT_TASK_zssssm(const RUNTIME_option_t *options, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, + const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, + const int *IPIV) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrL1 = RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n); + CHAMELEON_Complex64_t *ptrL2 = RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n); +#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV)\ + depend(inout:ptrA1[0:A1m*A1n])\ + depend(inout:ptrA2[0:A2m*A2n])\ + depend(in:ptrL1[0:L1m*L1n])\ + depend(in:ptrL2[0:L2m*L2n]) + CORE_zssssm(m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrL1, ldl1, ptrL2, ldl2, IPIV); +} diff --git a/runtime/openmp/codelets/codelet_zsymm.c b/runtime/openmp/codelets/codelet_zsymm.c new file mode 100644 index 0000000000000000000000000000000000000000..0223756b164d997ec564031b70ef5224fdd81d02 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsymm.c @@ -0,0 +1,50 @@ +/** + * + * @file codelet_zsymm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsymm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zsymm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zsymm(side, uplo, + m, n, + alpha, ptrA, lda, + ptrB, ldb, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zsyr2k.c b/runtime/openmp/codelets/codelet_zsyr2k.c new file mode 100644 index 0000000000000000000000000000000000000000..415517280ec138dacdce5f59d1b4c0db6bd59868 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsyr2k.c @@ -0,0 +1,50 @@ +/** + * + * @file codelet_zsyr2k.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyr2k StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(B, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(in:ptrC[0:Cm*Cn]) + CORE_zsyr2k(uplo, trans, + n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zsyrk.c b/runtime/openmp/codelets/codelet_zsyrk.c new file mode 100644 index 0000000000000000000000000000000000000000..129229e9fd215b501d7aa10f406f22049a96180a --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsyrk.c @@ -0,0 +1,50 @@ +/** + * + * @file codelet_zsyrk.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyrk StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, beta, ptrC, ldc) depend(in:ptrA[0:Am*An]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zsyrk(uplo, trans, + n, k, + alpha, ptrA, lda, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zsyssq.c b/runtime/openmp/codelets/codelet_zsyssq.c new file mode 100644 index 0000000000000000000000000000000000000000..190a1dfd1e25902c93b6f5b7e7bde184ff7fa5d7 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsyssq.c @@ -0,0 +1,34 @@ +/** + * + * @file codelet_zsyssq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyssq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0:Am*An]) depend(inout:ptrSCALESUMSQ[0]) + CORE_zsyssq( uplo, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1] ); +} diff --git a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c new file mode 100644 index 0000000000000000000000000000000000000000..52afed56847c9820ec4b9df1e4b95946d49b0d1d --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c @@ -0,0 +1,37 @@ +/** + * + * @file codelet_zsytrf_nopiv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsytrf_nopiv StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Florent Pruvost + * @author Marc Sergent + * @date 2011-10-09 + * @precisions normal z -> c + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) + CORE_zsytf2_nopiv(uplo, n, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_ztile_zero.c b/runtime/openmp/codelets/codelet_ztile_zero.c new file mode 100644 index 0000000000000000000000000000000000000000..fee93ab0b3f870fd565477a0dafb074739db8f39 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztile_zero.c @@ -0,0 +1,38 @@ +/** + * + * @file codelet_ztile_zero.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztile_zero StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Jakub Kurzak + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas.h" +/** + * + */ +void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, + int X1, int X2, int Y1, int Y2, + const CHAM_desc_t *A, int Am, int An, int lda ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int x, y; + for (x = X1; x < X2; x++) + for (y = Y1; y < Y2; y++) + ptrA[lda*x+y] = 0.0; +} diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c new file mode 100644 index 0000000000000000000000000000000000000000..2f37931e67737f7b7ca766f43830b1825714679b --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztplqt.c @@ -0,0 +1,37 @@ +/** + * + * @file codelet_ztplqt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztplqt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztplqt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *work = options->ws_host; +#pragma omp task firstprivate(M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt, work) depend(inout:ptrA[0:Am*An], ptrB[0:Bm*Bn], ptrT[0:Tm*Tn]) + CORE_ztplqt( M, N, L, ib, + ptrA, lda, ptrB, ldb, ptrT, ldt, work ); +} diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c new file mode 100644 index 0000000000000000000000000000000000000000..3746c30415d0896390263a373fffe51a1e04c372 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztpmlqt.c @@ -0,0 +1,37 @@ +/** + * + * @file codelet_ztpmlqt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @brief Chameleon ztpmlqt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_host; +#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + CORE_ztpmlqt( side, trans, M, N, K, L, ib, + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); +} diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..a5a42d95c47e13e1edfc0f423df27a74759ecff8 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztpmqrt.c @@ -0,0 +1,37 @@ +/** + * + * @file codelet_ztpmqrt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @brief Chameleon ztpmqrt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_host; +#pragma omp task firstprivate(side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + CORE_ztpmqrt( side, trans, M, N, K, L, ib, + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); +} diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..8930bc9797ba22e4f0c5d18dfe4e632e5f4eb2c6 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztpqrt.c @@ -0,0 +1,36 @@ +/** + * + * @file codelet_ztpqrt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztpqrt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *work = options->ws_host; +#pragma omp task firstprivate(M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb, work) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + CORE_ztpqrt( M, N, L, ib, + ptrA, lda, ptrB, ldb, ptrT, ldt, work ); +} diff --git a/runtime/openmp/codelets/codelet_ztradd.c b/runtime/openmp/codelets/codelet_ztradd.c new file mode 100644 index 0000000000000000000000000000000000000000..fe1e7dcfc192ebab786b57aebfdd70453adc6c9d --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztradd.c @@ -0,0 +1,94 @@ +/** + * + * @file codelet_ztradd.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztradd StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2011-11-03 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + ****************************************************************************** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A and B matrices: + * = ChamUpperLower: A and B are general matrices. + * = ChamUpper: op(A) and B are upper trapezoidal matrices. + * = ChamLower: op(A) and B are lower trapezoidal matrices. + * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = ChamNoTrans: op(A) = A + * = ChamTrans: op(A) = A' + * = ChamConjTrans: op(A) = conj(A') + * + * @param[in] M + * Number of rows of the matrices op(A) and B. + * + * @param[in] N + * Number of columns of the matrices op(A) and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M + * otherwise. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = ChamNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. + * + * @param[in,out] B + * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B + * + * @param[in] LDB + * Leading dimension of the array B. LDB >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ +void INSERT_TASK_ztradd(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(uplo, trans, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_ztradd(uplo, trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_ztrasm.c b/runtime/openmp/codelets/codelet_ztrasm.c new file mode 100644 index 0000000000000000000000000000000000000000..25cffbd33d6dfbfa370d17edc35973e2335f4ead --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrasm.c @@ -0,0 +1,34 @@ +/** + * + * @file codelet_ztrasm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrasm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void INSERT_TASK_ztrasm(const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); +#pragma omp task firstprivate(storev, uplo, diag, M, N, ptrA, lda, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_ztrasm(storev, uplo, diag, M, N, ptrA, lda, ptrB); +} diff --git a/runtime/openmp/codelets/codelet_ztrmm.c b/runtime/openmp/codelets/codelet_ztrmm.c new file mode 100644 index 0000000000000000000000000000000000000000..6e17b1c1c3105e7b3d908e3a8fd5ebd8278f72b2 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrmm.c @@ -0,0 +1,48 @@ +/** + * + * @file codelet_ztrmm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrmm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(side, uplo, transA, diag, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_ztrmm(side, uplo, + transA, diag, + m, n, + alpha, ptrA, lda, + ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_ztrsm.c b/runtime/openmp/codelets/codelet_ztrsm.c new file mode 100644 index 0000000000000000000000000000000000000000..9d62e063d0e547266f96ba17ae6f599b9ea24fd7 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrsm.c @@ -0,0 +1,50 @@ +/** + * + * @file codelet_ztrsm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrsm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(side, uplo, transA, diag, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout: ptrB[0:Bm*Bn]) + CORE_ztrsm(side, uplo, + transA, diag, + m, n, + alpha, ptrA, lda, + ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_ztrssq.c b/runtime/openmp/codelets/codelet_ztrssq.c new file mode 100644 index 0000000000000000000000000000000000000000..bc04bf30c69db3790d7c27e56832ca8fedd450e0 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrssq.c @@ -0,0 +1,35 @@ +/** + * + * @file codelet_ztrssq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrssq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_diag_t diag, + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(uplo, diag, m, n, ptrA, lda, SCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0]) + CORE_ztrssq( uplo, diag, m, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1]); +} diff --git a/runtime/openmp/codelets/codelet_ztrtri.c b/runtime/openmp/codelets/codelet_ztrtri.c new file mode 100644 index 0000000000000000000000000000000000000000..d59f593752004257260c0b0d407199eb488544bc --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrtri.c @@ -0,0 +1,43 @@ +/** + * + * @file codelet_ztrtri.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrtri StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztrtri(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_diag_t diag, + int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, diag, n, ptrA, lda, iinfo) depend(inout:ptrA[0]) + CORE_ztrtri(uplo, diag, n, ptrA, lda, &iinfo); +} diff --git a/runtime/openmp/codelets/codelet_ztslqt.c b/runtime/openmp/codelets/codelet_ztslqt.c new file mode 100644 index 0000000000000000000000000000000000000000..9a3b6db7bcc567933b090319837a9ad5d37ad17f --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztslqt.c @@ -0,0 +1,116 @@ +/** + * + * @file codelet_ztslqt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztslqt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_ztslqt computes a LQ factorization of a rectangular matrix + * formed by coupling side-by-side a complex M-by-M + * lower triangular tile A1 and a complex M-by-N tile A2: + * + * | A1 A2 | = L * Q + * + * The tile Q is represented as a product of elementary reflectors + * + * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). + * + * Each H(i) has the form + * + * H(i) = I - tau * v * v' + * + * where tau is a complex scalar, and v is a complex vector with + * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + * A2(i,1:n), and tau in TAU(i). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A1 and A2. M >= 0. + * The number of columns of the tile A1. + * + * @param[in] N + * The number of columns of the tile A2. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M-by-M tile A1. + * On exit, the elements on and below the diagonal of the array + * contain the M-by-M lower trapezoidal tile L; + * the elements above the diagonal are not referenced. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,M). + * + * @param[in,out] A2 + * On entry, the M-by-N tile A2. + * On exit, all the elements with the array TAU, represent + * the unitary tile Q as a product of elementary reflectors + * (see Further Details). + * + * @param[in] LDA2 + * The leading dimension of the tile A2. LDA2 >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_ztslqt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *work = options->ws_worker; + CHAMELEON_Complex64_t *tau = options->ws_host; +#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work) depend(inout:ptrA1[0], ptrA2[0], ptrT[0]) + CORE_ztslqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); +} diff --git a/runtime/openmp/codelets/codelet_ztsmlq.c b/runtime/openmp/codelets/codelet_ztsmlq.c new file mode 100644 index 0000000000000000000000000000000000000000..83611cf1f5795f82a6536fe1b6ce4b950337acd2 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztsmlq.c @@ -0,0 +1,148 @@ +/** + * + * @file codelet_ztsmlq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztsmlq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Azzam Haidar + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_ztsmlq overwrites the general complex M1-by-N1 tile A1 and + * M2-by-N2 tile A2 with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * | A1 | | A1 A2 | * Q + * | A2 | + * + * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H + * | A2 | + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(k)' . . . H(2)' H(1)' + * + * as returned by CORE_ZTSLQT. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * + * @param[in] M1 + * The number of rows of the tile A1. M1 >= 0. + * + * @param[in] N1 + * The number of columns of the tile A1. N1 >= 0. + * + * @param[in] M2 + * The number of rows of the tile A2. M2 >= 0. + * M2 = M1 if side == ChamRight. + * + * @param[in] N2 + * The number of columns of the tile A2. N2 >= 0. + * N2 = N1 if side == ChamLeft. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M1-by-N1 tile A1. + * On exit, A1 is overwritten by the application of Q. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,M1). + * + * @param[in,out] A2 + * On entry, the M2-by-N2 tile A2. + * On exit, A2 is overwritten by the application of Q. + * + * @param[in] LDA2 + * The leading dimension of the tile A2. LDA2 >= max(1,M2). + * + * @param[in] V + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_ZTSLQT in the first k rows of its array argument V. + * + * @param[in] LDV + * The leading dimension of the array V. LDV >= max(1,K). + * + * @param[in] T + * The IB-by-N1 triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] WORK + * Workspace array of size + * LDWORK-by-M1 if side == ChamLeft + * LDWORK-by-IB if side == ChamRight + * + * @param[in] LDWORK + * The leading dimension of the array WORK. + * LDWORK >= max(1,IB) if side == ChamLeft + * LDWORK >= max(1,N1) if side == ChamRight + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_ztsmlq(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_worker; + int ldwork = side == ChamLeft ? ib : nb; +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib, + ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +} diff --git a/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c new file mode 100644 index 0000000000000000000000000000000000000000..57071465d08e31024798e7fdf2e303ac1b8c7723 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c @@ -0,0 +1,47 @@ +/** + * + * @file codelet_ztsmlq_hetra1.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztsmlq_hetra1 StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Azzam Haidar + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_worker; + int ldwork = side == ChamLeft ? ib : nb; +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, + ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +} diff --git a/runtime/openmp/codelets/codelet_ztsmqr.c b/runtime/openmp/codelets/codelet_ztsmqr.c new file mode 100644 index 0000000000000000000000000000000000000000..e6beea2243ab1445f7641e4897769d5d8e251d01 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztsmqr.c @@ -0,0 +1,148 @@ +/** + * + * @file codelet_ztsmqr.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztsmqr StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Azzam Haidar + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_ztsmqr overwrites the general complex M1-by-N1 tile A1 and + * M2-by-N2 tile A2 with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * | A1 | | A1 A2 | * Q + * | A2 | + * + * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H + * | A2 | + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by CORE_ZTSQRT. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * + * @param[in] M1 + * The number of rows of the tile A1. M1 >= 0. + * + * @param[in] N1 + * The number of columns of the tile A1. N1 >= 0. + * + * @param[in] M2 + * The number of rows of the tile A2. M2 >= 0. + * M2 = M1 if side == ChamRight. + * + * @param[in] N2 + * The number of columns of the tile A2. N2 >= 0. + * N2 = N1 if side == ChamLeft. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M1-by-N1 tile A1. + * On exit, A1 is overwritten by the application of Q. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,M1). + * + * @param[in,out] A2 + * On entry, the M2-by-N2 tile A2. + * On exit, A2 is overwritten by the application of Q. + * + * @param[in] LDA2 + * The leading dimension of the tile A2. LDA2 >= max(1,M2). + * + * @param[in] V + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_ZTSQRT in the first k columns of its array argument V. + * + * @param[in] LDV + * The leading dimension of the array V. LDV >= max(1,K). + * + * @param[in] T + * The IB-by-N1 triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] WORK + * Workspace array of size + * LDWORK-by-N1 if side == ChamLeft + * LDWORK-by-IB if side == ChamRight + * + * @param[in] LDWORK + * The leading dimension of the array WORK. + * LDWORK >= max(1,IB) if side == ChamLeft + * LDWORK >= max(1,M1) if side == ChamRight + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_ztsmqr(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_worker; + int ldwork = side == ChamLeft ? ib : nb; +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib, + ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +} diff --git a/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c new file mode 100644 index 0000000000000000000000000000000000000000..9a1fe799e6b38aae04256a0a569a882b23d73ff0 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c @@ -0,0 +1,47 @@ +/** + * + * @file codelet_ztsmqr_hetra1.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztsmqr_hetra1 StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Azzam Haidar + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_worker; + int ldwork = side == ChamLeft ? ib : nb; +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, + ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +} diff --git a/runtime/openmp/codelets/codelet_ztsqrt.c b/runtime/openmp/codelets/codelet_ztsqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..bc16fb146e1bd534c16489c2621e69958c43d5e0 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztsqrt.c @@ -0,0 +1,105 @@ +/** + * + * @file codelet_ztsqrt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztsqrt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_ztsqrt computes a QR factorization of a rectangular matrix + * formed by coupling a complex N-by-N upper triangular tile A1 + * on top of a complex M-by-N tile A2: + * + * | A1 | = Q * R + * | A2 | + * + ******************************************************************************* + * + * @param[in] M + * The number of columns of the tile A2. M >= 0. + * + * @param[in] N + * The number of rows of the tile A1. + * The number of columns of the tiles A1 and A2. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the N-by-N tile A1. + * On exit, the elements on and above the diagonal of the array + * contain the N-by-N upper trapezoidal tile R; + * the elements below the diagonal are not referenced. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,N). + * + * @param[in,out] A2 + * On entry, the M-by-N tile A2. + * On exit, all the elements with the array TAU, represent + * the unitary tile Q as a product of elementary reflectors + * (see Further Details). + * + * @param[in] LDA2 + * The leading dimension of the tile A2. LDA2 >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_ztsqrt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *work = options->ws_worker; + CHAMELEON_Complex64_t *tau = options->ws_host; +#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, work, tau) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0]) + CORE_ztsqrt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); +} diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c new file mode 100644 index 0000000000000000000000000000000000000000..2748674e5aee0fda795eb34ba3cf1a271fdb01f8 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztstrf.c @@ -0,0 +1,110 @@ +/** + * + * @file codelet_ztstrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztstrf StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_ztstrf computes an LU factorization of a complex matrix formed + * by an upper triangular NB-by-N tile U on top of a M-by-N tile A + * using partial pivoting with row interchanges. + * + * This is the right-looking Level 2.5 BLAS version of the algorithm. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] NB + * + * @param[in,out] U + * On entry, the NB-by-N upper triangular tile. + * On exit, the new factor U from the factorization + * + * @param[in] LDU + * The leading dimension of the array U. LDU >= max(1,NB). + * + * @param[in,out] A + * On entry, the M-by-N tile to be factored. + * On exit, the factor L from the factorization + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in,out] L + * On entry, the IB-by-N lower triangular tile. + * On exit, the interchanged rows form the tile A in case of pivoting. + * + * @param[in] LDL + * The leading dimension of the array L. LDL >= max(1,IB). + * + * @param[out] IPIV + * The pivot indices; for 1 <= i <= min(M,N), row i of the + * tile U was interchanged with row IPIV(i) of the tile A. + * + * @param[in,out] WORK + * + * @param[in] LDWORK + * The dimension of the array WORK. + * + * @param[out] INFO + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * has been completed, but the factor U is exactly + * singular, and division by zero will occur if it is used + * to solve a system of equations. + * + */ + +void INSERT_TASK_ztstrf(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *U, int Um, int Un, int ldu, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrU = RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un); + CHAMELEON_Complex64_t *ptrL = RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln); + CHAMELEON_Complex64_t *work = options->ws_worker; +#pragma omp task firstprivate(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, iinfo) depend(inout:ptrA[0], ptrU[0], ptrL[0]) + CORE_ztstrf(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, nb, &iinfo); +} diff --git a/runtime/openmp/codelets/codelet_zttlqt.c b/runtime/openmp/codelets/codelet_zttlqt.c new file mode 100644 index 0000000000000000000000000000000000000000..e693c6b7a19f57c182e3828b178ae60500579380 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zttlqt.c @@ -0,0 +1,117 @@ +/** + * + * @file codelet_zttlqt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zttlqt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zttlqt computes a LQ factorization of a rectangular matrix + * formed by coupling side-by-side a complex M-by-M lower triangular tile A1 + * and a complex M-by-N lower triangular tile A2: + * + * | A1 A2 | = L * Q + * + * The tile Q is represented as a product of elementary reflectors + * + * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). + * + * Each H(i) has the form + * + * H(i) = I - tau * v * v' + * + * where tau is a complex scalar, and v is a complex vector with + * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + * A2(i,1:n), and tau in TAU(i). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A1 and A2. M >= 0. + * The number of columns of the tile A1. + * + * @param[in] N + * The number of columns of the tile A2. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M-by-M tile A1. + * On exit, the elements on and below the diagonal of the array + * contain the M-by-M lower trapezoidal tile L; + * the elements above the diagonal are not referenced. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,N). + * + * @param[in,out] A2 + * On entry, the M-by-N lower triangular tile A2. + * On exit, the elements on and below the diagonal of the array + * with the array TAU, represent + * the unitary tile Q as a product of elementary reflectors + * (see Further Details). + * + * @param[in] LDA2 + * The leading dimension of the array A2. LDA2 >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[in,out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zttlqt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *work = options->ws_worker; + CHAMELEON_Complex64_t *tau = options->ws_host; +#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, work, tau) depend(inout:ptrA1[0], ptrA2[0], ptrT[0]) + CORE_zttlqt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); +} diff --git a/runtime/openmp/codelets/codelet_zttmlq.c b/runtime/openmp/codelets/codelet_zttmlq.c new file mode 100644 index 0000000000000000000000000000000000000000..e5093489c58e6a1811b388da81de89d180b72103 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zttmlq.c @@ -0,0 +1,140 @@ +/** + * + * @file codelet_zttmlq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zttmlq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zttmlq overwrites the general complex M1-by-N1 tile A1 and + * M2-by-N2 tile A2 (N1 == N2) with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * | A1 | | A1 | * Q + * | A2 | | A2 | + * + * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H + * | A2 | | A2 | + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by CORE_zttqrt. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * + * @param[in] M1 + * The number of rows of the tile A1. M1 >= 0. + * + * @param[in] N1 + * The number of columns of the tile A1. N1 >= 0. + * + * @param[in] M2 + * The number of rows of the tile A2. M2 >= 0. + * + * @param[in] N2 + * The number of columns of the tile A2. N2 >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M1-by-N1 tile A1. + * On exit, A1 is overwritten by the application of Q. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,M1). + * + * @param[in,out] A2 + * On entry, the M2-by-N2 tile A2. + * On exit, A2 is overwritten by the application of Q. + * + * @param[in] LDA2 + * The leading dimension of the tile A2. LDA2 >= max(1,M2). + * + * @param[in] V + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_ZTTQRT in the first k rows of its array argument V. + * + * @param[in] LDV + * The leading dimension of the array V. LDV >= max(1,K). + * + * @param[out] T + * The IB-by-N1 triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] WORK + * Workspace array of size LDWORK-by-N1. + * + * @param[in] LDWORK + * The dimension of the array WORK. LDWORK >= max(1,IB). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zttmlq(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_worker; + int ldwork = side == ChamLeft ? ib : nb; +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, + ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +} diff --git a/runtime/openmp/codelets/codelet_zttmqr.c b/runtime/openmp/codelets/codelet_zttmqr.c new file mode 100644 index 0000000000000000000000000000000000000000..b28b47eb31543b4dd2aa4323043a5ec814fce3c8 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zttmqr.c @@ -0,0 +1,146 @@ +/** + * + * @file codelet_zttmqr.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zttmqr StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zttmqr overwrites the general complex M1-by-N1 tile A1 and + * M2-by-N2 tile A2 (N1 == N2) with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * | A1 | | A1 | * Q + * | A2 | | A2 | + * + * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H + * | A2 | | A2 | + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by CORE_zttqrt. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * + * @param[in] M1 + * The number of rows of the tile A1. M1 >= 0. + * + * @param[in] N1 + * The number of columns of the tile A1. N1 >= 0. + * + * @param[in] M2 + * The number of rows of the tile A2. M2 >= 0. + * M2 = M1 if side == ChamRight. + * + * @param[in] N2 + * The number of columns of the tile A2. N2 >= 0. + * N2 = N1 if side == ChamLeft. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M1-by-N1 tile A1. + * On exit, A1 is overwritten by the application of Q. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,M1). + * + * @param[in,out] A2 + * On entry, the M2-by-N2 tile A2. + * On exit, A2 is overwritten by the application of Q. + * + * @param[in] LDA2 + * The leading dimension of the tile A2. LDA2 >= max(1,M2). + * + * @param[in] V + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_ZTTQRT in the first k columns of its array argument V. + * + * @param[in] LDV + * The leading dimension of the array V. LDV >= max(1,K). + * + * @param[in] T + * The IB-by-N1 triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] WORK + * Workspace array of size + * LDWORK-by-N1 if side == ChamLeft + * LDWORK-by-IB if side == ChamRight + * + * @param[in] LDWORK + * The leading dimension of the array WORK. + * LDWORK >= max(1,IB) if side == ChamLeft + * LDWORK >= max(1,M1) if side == ChamRight + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zttmqr(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + CHAMELEON_Complex64_t *work = options->ws_worker; + int ldwork = side == ChamLeft ? ib : nb; +#pragma omp task firstprivate(side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib, + ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); +} diff --git a/runtime/openmp/codelets/codelet_zttqrt.c b/runtime/openmp/codelets/codelet_zttqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..5061ef3babfe8d1097b22c25de68f3a2af7e4f7c --- /dev/null +++ b/runtime/openmp/codelets/codelet_zttqrt.c @@ -0,0 +1,117 @@ +/** + * + * @file codelet_zttqrt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zttqrt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zttqrt computes a QR factorization of a rectangular matrix + * formed by coupling a complex N-by-N upper triangular tile A1 + * on top of a complex M-by-N upper trapezoidal tile A2: + * + * | A1 | = Q * R + * | A2 | + * + * The tile Q is represented as a product of elementary reflectors + * + * Q = H(1) H(2) . . . H(k), where k = min(M,N). + * + * Each H(i) has the form + * + * H(i) = I - tau * v * v' + * + * where tau is a complex scalar, and v is a complex vector with + * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i), + * and tau in TAU(i). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A2. M >= 0. + * + * @param[in] N + * The number of columns of the tile A1 and A2. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the N-by-N tile A1. + * On exit, the elements on and above the diagonal of the array + * contain the N-by-N upper trapezoidal tile R; + * the elements below the diagonal are not referenced. + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,N). + * + * @param[in,out] A2 + * On entry, the M-by-N upper triangular tile A2. + * On exit, the elements on and above the diagonal of the array + * with the array TAU, represent + * the unitary tile Q as a product of elementary reflectors + * (see Further Details). + * + * @param[in] LDA2 + * The leading dimension of the array A2. LDA2 >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[in,out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zttqrt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *work = options->ws_worker; + CHAMELEON_Complex64_t *tau = options->ws_host; +#pragma omp task firstprivate(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, work, tau) depend(inout:ptrA1[0], ptrA2[0], ptrT[0]) + CORE_zttqrt(m, n, ib, ptrA1, lda1, ptrA2, lda2, ptrT, ldt, tau, work); +} diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c new file mode 100644 index 0000000000000000000000000000000000000000..fc16b5e13d3ab386940b562c319f28c5ca7e16dc --- /dev/null +++ b/runtime/openmp/codelets/codelet_zunmlq.c @@ -0,0 +1,128 @@ +/** + * + * @file codelet_zunmlq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmlq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zunmlq overwrites the general complex M-by-N tile C with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * C C * Q + * TRANS = 'C': Q**H * C C * Q**H + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(k) . . . H(2) H(1) + * + * as returned by CORE_zgelqt. Q is of order M if SIDE = 'L' and of order N + * if SIDE = 'R'. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : Transpose, apply Q**H. + * + * @param[in] M + * The number of rows of the tile C. M >= 0. + * + * @param[in] N + * The number of columns of the tile C. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * If SIDE = ChamLeft, M >= K >= 0; + * if SIDE = ChamRight, N >= K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] A + * Dimension: (LDA,M) if SIDE = ChamLeft, + * (LDA,N) if SIDE = ChamRight, + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_zgelqt in the first k rows of its array argument A. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,K). + * + * @param[in] T + * The IB-by-K triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[in,out] C + * On entry, the M-by-N tile C. + * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + * @param[in,out] WORK + * On exit, if INFO = 0, WORK(1) returns the optimal LDWORK. + * + * @param[in] LDWORK + * The dimension of the array WORK. + * If SIDE = ChamLeft, LDWORK >= max(1,N); + * if SIDE = ChamRight, LDWORK >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); + CHAMELEON_Complex64_t *work = options->ws_worker; +#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + CORE_zunmlq(side, trans, m, n, k, ib, + ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); +} diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c new file mode 100644 index 0000000000000000000000000000000000000000..207469b5d2b08e157f9cd2d2346d5195465ef90a --- /dev/null +++ b/runtime/openmp/codelets/codelet_zunmqr.c @@ -0,0 +1,128 @@ +/** + * + * @file codelet_zunmqr.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmqr StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zunmqr overwrites the general complex M-by-N tile C with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * C C * Q + * TRANS = 'C': Q**H * C C * Q**H + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by CORE_zgeqrt. Q is of order M if SIDE = 'L' and of order N + * if SIDE = 'R'. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : Transpose, apply Q**H. + * + * @param[in] M + * The number of rows of the tile C. M >= 0. + * + * @param[in] N + * The number of columns of the tile C. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * If SIDE = ChamLeft, M >= K >= 0; + * if SIDE = ChamRight, N >= K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] A + * Dimension: (LDA,K) + * The i-th column must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_zgeqrt in the first k columns of its array argument A. + * + * @param[in] LDA + * The leading dimension of the array A. + * If SIDE = ChamLeft, LDA >= max(1,M); + * if SIDE = ChamRight, LDA >= max(1,N). + * + * @param[in] T + * The IB-by-K triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[in,out] C + * On entry, the M-by-N tile C. + * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + * @param[in,out] WORK + * On exit, if INFO = 0, WORK(1) returns the optimal LDWORK. + * + * @param[in] LDWORK + * The dimension of the array WORK. + * If SIDE = ChamLeft, LDWORK >= max(1,N); + * if SIDE = ChamRight, LDWORK >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); + CHAMELEON_Complex64_t *work = options->ws_worker; +#pragma omp task firstprivate(side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + CORE_zunmqr(side, trans, m, n, k, ib, + ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); +} diff --git a/runtime/openmp/control/runtime_async.c b/runtime/openmp/control/runtime_async.c new file mode 100644 index 0000000000000000000000000000000000000000..3cf365c3b4e4c5cd845dd0df9528c9dbab8818fd --- /dev/null +++ b/runtime/openmp/control/runtime_async.c @@ -0,0 +1,71 @@ +/** + * + * @file runtime_async.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU asynchronous routines + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +/** + * Create a sequence + */ +int RUNTIME_sequence_create( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence ) +{ + (void)chamctxt; + (void)sequence; + return CHAMELEON_SUCCESS; +} + +/** + * Destroy a sequence + */ +int RUNTIME_sequence_destroy( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence ) +{ + (void)chamctxt; + (void)sequence; + return CHAMELEON_SUCCESS; +} + +/** + * Wait for the completion of a sequence + */ +int RUNTIME_sequence_wait( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence ) +{ + (void)chamctxt; + (void)sequence; + +#pragma omp taskwait + return CHAMELEON_SUCCESS; +} + +/** + * Terminate a sequence + */ +void RUNTIME_sequence_flush( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request, + int status ) +{ + (void)chamctxt; + sequence->request = request; + sequence->status = status; + request->status = status; + return; +} diff --git a/runtime/openmp/control/runtime_context.c b/runtime/openmp/control/runtime_context.c new file mode 100644 index 0000000000000000000000000000000000000000..f74adcb278deacf2c6598cce162e36ef34e858dd --- /dev/null +++ b/runtime/openmp/control/runtime_context.c @@ -0,0 +1,79 @@ +/** + * + * @file runtime_context.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU context routines + * + * @version 1.0.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +/** + * Create new context + */ +void RUNTIME_context_create( CHAM_context_t *chamctxt ) +{ + chamctxt->scheduler = RUNTIME_SCHED_OPENMP; + /* Will require the static initialization if we want to use it in this code */ + return; +} + +/** + * Clean the context + */ +void RUNTIME_context_destroy( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * + */ +void RUNTIME_enable( int lever ) +{ + switch (lever) + { + case CHAMELEON_PROFILING_MODE: + fprintf(stderr, "Profiling is not available with OpenMP\n"); + break; + case CHAMELEON_BOUND: + fprintf(stderr, "Bound computation is not available with OpenMP\n"); + break; + default: + return; + } + return; +} + +/** + * + */ +void RUNTIME_disable( int lever ) +{ + switch (lever) + { + case CHAMELEON_PROFILING_MODE: + fprintf(stderr, "Profiling is not available with OpenMP\n"); + break; + case CHAMELEON_BOUND: + fprintf(stderr, "Bound computation is not available with OpenMP\n"); + break; + default: + return; + } + return; +} diff --git a/runtime/openmp/control/runtime_control.c b/runtime/openmp/control/runtime_control.c new file mode 100644 index 0000000000000000000000000000000000000000..44dafa955f731f93d1ea3a59c9fe008ecd6e444f --- /dev/null +++ b/runtime/openmp/control/runtime_control.c @@ -0,0 +1,125 @@ +/** + * + * @file runtime_control.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU control routines + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Cedric Augonnet + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +/** + * + */ +int RUNTIME_init( CHAM_context_t *chamctxt, + int ncpus, + int ncudas, + int nthreads_per_worker ) +{ + int hres = 0; + if ( ncudas > 0 ) { + chameleon_warning( "RUNTIME_init_scheduler(OpenMP)", "GPUs are not supported for now"); + } + + if ( nthreads_per_worker > 0 ) { + chameleon_warning( "RUNTIME_init_scheduler(OpenMP)", "Multi-threaded kernels are not supported for now"); + } + + return hres; +} + +/** + * + */ +void RUNTIME_finalize( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * To suspend the processing of new tasks by workers + */ +void RUNTIME_pause( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * This is the symmetrical call to RUNTIME_pause, + * used to resume the workers polling for new tasks. + */ +void RUNTIME_resume( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * Busy-waiting barrier + */ +void RUNTIME_barrier( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; +#pragma omp barrier +} + +/** + * Display a progress information when executing the tasks + */ +void RUNTIME_progress( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + + +/** + * Thread rank. + */ +int RUNTIME_thread_rank( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return omp_get_thread_num(); +} + +/** + * Number of threads. + */ +int RUNTIME_thread_size( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return omp_get_num_threads(); +} + +/** + * The process rank + */ +int RUNTIME_comm_rank( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return 0; +} + +/** + * This returns the size of the distributed computation + */ +int RUNTIME_comm_size( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return 1; +} diff --git a/runtime/openmp/control/runtime_descriptor.c b/runtime/openmp/control/runtime_descriptor.c new file mode 100644 index 0000000000000000000000000000000000000000..2c719b7573bc470b5ca6aa42ce769f8a61c365cc --- /dev/null +++ b/runtime/openmp/control/runtime_descriptor.c @@ -0,0 +1,106 @@ +/** + * + * @file runtime_descriptor.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon OpenMP descriptor routines + * + * @version 1.0.0 + * @author Vijay Joshi + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-21 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +void RUNTIME_comm_set_tag_sizes( int user_tag_width, + int user_tag_sep ) +{ + (void)user_tag_width; + (void)user_tag_sep; +} + +void *RUNTIME_malloc( size_t size ) +{ + return malloc( size ); +} + +void RUNTIME_free( void *ptr, + size_t size ) +{ + (void)size; + free( ptr ); + return; +} + +void RUNTIME_desc_create( CHAM_desc_t *desc ) +{ + (void)desc; + return; +} + +void RUNTIME_desc_destroy( CHAM_desc_t *desc ) +{ + (void)desc; + return; +} + +int RUNTIME_desc_acquire( const CHAM_desc_t *desc ) +{ + (void)desc; + return CHAMELEON_SUCCESS; +} + +int RUNTIME_desc_release( const CHAM_desc_t *desc ) +{ + (void)desc; + return CHAMELEON_SUCCESS; +} + +void +RUNTIME_desc_flush( const CHAM_desc_t *desc, + const RUNTIME_sequence_t *sequence ) +{ + (void)desc; + (void)sequence; + return; +} + + +void +RUNTIME_flush( ) +{ + return; +} + +void +RUNTIME_data_flush( const RUNTIME_sequence_t *sequence, + const CHAM_desc_t *A, int Am, int An ) +{ + (void)sequence; + (void)A; + (void)Am; + (void)An; + return; +} + +#if defined(CHAMELEON_USE_MIGRATE) +void RUNTIME_data_migrate( const RUNTIME_sequence_t *sequence, + const CHAM_desc_t *A, int Am, int An, int new_rank ) +{ + (void)sequence; (void)A; (void)Am; (void)An; (void)new_rank; +} +#endif + +void *RUNTIME_data_getaddr( const CHAM_desc_t *desc, int m, int n ) +{ + return desc->get_blkaddr( desc, m, n ); +} diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c new file mode 100644 index 0000000000000000000000000000000000000000..83b8480121efc28575c9b66757711105f577074e --- /dev/null +++ b/runtime/openmp/control/runtime_options.c @@ -0,0 +1,66 @@ +/** + * + * @file runtime_options.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU options routines + * + * @version 1.0.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +void RUNTIME_options_init( RUNTIME_option_t *option, CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + option->sequence = sequence; + option->request = request; + option->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; + option->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; + option->priority = RUNTIME_PRIORITY_MIN; + option->ws_wsize = 0; + option->ws_hsize = 0; + option->ws_worker = NULL; + option->ws_host = NULL; + return; +} + +void RUNTIME_options_finalize( RUNTIME_option_t *option, CHAM_context_t *chamctxt ) +{ + (void)option; + (void)chamctxt; + return; +} + +int RUNTIME_options_ws_alloc( RUNTIME_option_t *options, size_t worker_size, size_t host_size ) +{ + if (worker_size > 0) { + // TODO used for scratch, maybe we can do better than malloc + options->ws_worker = malloc(worker_size* sizeof(char)); + options->ws_wsize = worker_size; + } + // TODO maybe we'll need it at some point + options->ws_hsize = host_size; + return CHAMELEON_SUCCESS; +} + +int RUNTIME_options_ws_free( RUNTIME_option_t *options ) +{ + if (options->ws_wsize) { + free(options->ws_worker); + options->ws_wsize = 0; + } + options->ws_hsize = 0; + return CHAMELEON_SUCCESS; +} diff --git a/runtime/openmp/control/runtime_profiling.c b/runtime/openmp/control/runtime_profiling.c new file mode 100644 index 0000000000000000000000000000000000000000..3d3a7070114f6e0d3707a28dc5d059466b698189 --- /dev/null +++ b/runtime/openmp/control/runtime_profiling.c @@ -0,0 +1,70 @@ +/** + * + * @file runtime_profiling.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU profiling routines + * + * @version 1.0.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include "chameleon_openmp.h" +#include "chameleon/timer.h" + +double RUNTIME_get_time(){ + return CHAMELEON_timer(); +} + +void RUNTIME_start_profiling() +{ + chameleon_warning("RUNTIME_start_profiling()", "FxT profiling is not available with OpenMP\n"); +} + +void RUNTIME_stop_profiling() +{ + chameleon_warning("RUNTIME_stop_profiling()", "FxT profiling is not available with OpenMP\n"); +} + +void RUNTIME_start_stats() +{ + chameleon_warning("RUNTIME_start_stats()", "pruning stats are not available with OpenMP\n"); +} + +void RUNTIME_stop_stats() +{ + chameleon_warning("RUNTIME_stop_stats()", "pruning stats are not available with OpenMP\n"); +} + +void RUNTIME_schedprofile_display(void) +{ + chameleon_warning("RUNTIME_schedprofile_display(openmp)", "Scheduler profiling is not available with OpenMP\n"); +} + +void RUNTIME_kernelprofile_display(void) +{ + chameleon_warning("RUNTIME_kernelprofile_display(openmp)", "Kernel profiling is not available with OpenMP\n"); +} + +/** + * Set iteration numbers for traces + */ +void RUNTIME_iteration_push( CHAM_context_t *chamctxt, unsigned long iteration ) +{ + (void)chamctxt; (void)iteration; + return; +} +void RUNTIME_iteration_pop( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} diff --git a/runtime/openmp/include/chameleon_openmp.h b/runtime/openmp/include/chameleon_openmp.h new file mode 100644 index 0000000000000000000000000000000000000000..acd5671b8291efa62396f8e8892584cbde3d02a0 --- /dev/null +++ b/runtime/openmp/include/chameleon_openmp.h @@ -0,0 +1,33 @@ +/** + * + * @file chameleon_openmp.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon OpenMP runtime main header + * + * @version 1.0.0 + * @author Philippe Virouleau + * @date 2018-06-21 + * + */ +#ifndef _CHAMELEON_OPENMP_H_ +#define _CHAMELEON_OPENMP_H_ + +#include "coreblas.h" + +#include "control/common.h" +#include <omp.h> + +/* + * Access to block pointer and leading dimension + */ +#define RTBLKADDR( desc, type, m, n ) ( (type*)RUNTIME_data_getaddr( desc, m, n ) ) + + +#endif /* _CHAMELEON_OPENMP_H_ */ diff --git a/timing/time_zgeqrf_tile.c b/timing/time_zgeqrf_tile.c index b35782a698828c262c27b3630885bec1821880ca..e04bfb8a6a3962a908bac2a74c079a4c1edc817a 100644 --- a/timing/time_zgeqrf_tile.c +++ b/timing/time_zgeqrf_tile.c @@ -48,8 +48,14 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) memset(descT->mat, 0, (descT->llm*descT->lln)*sizeof(ChamComplexDouble)); /* CHAMELEON ZGEQRF */ +#if defined (CHAMELEON_SCHED_OPENMP) +#pragma omp parallel +#pragma omp master +#endif + { START_TIMING(); CHAMELEON_zgeqrf_Tile( descA, descT ); + } STOP_TIMING(); /* Check the solution */ diff --git a/timing/time_zpotrf_tile.c b/timing/time_zpotrf_tile.c index 67e95738955c0575092566eeafbe94a21179f18c..9707f44d608a4487a80d04dc1345c6cfb8d124bb 100644 --- a/timing/time_zpotrf_tile.c +++ b/timing/time_zpotrf_tile.c @@ -37,6 +37,7 @@ RunTest( int *iparam, double *dparam, chameleon_time_t *t_ ) PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); + CHAMELEON_zplghe_Tile( (double)N, ChamUpperLower, descA, 51 ); /* Initialize data and save A if check */ if ( check ) { diff --git a/timing/timing.c b/timing/timing.c index ad6c1b57c532bb8d6c0f5ded5a9c34e64741163d..274c486a04b06c728253f3977eef3a4582e036c5 100644 --- a/timing/timing.c +++ b/timing/timing.c @@ -58,6 +58,10 @@ #include <starpu.h> #endif /* defined(CHAMELEON_SCHED_STARPU) */ +#if defined(CHAMELEON_SCHED_OPENMP) +#include <omp.h> +#endif /* defined(CHAMELEON_SCHED_OPENMP) */ + #if defined(CHAMELEON_HAVE_GETOPT_H) #include <getopt.h> @@ -146,6 +150,8 @@ Test(int64_t n, int *iparam) { printf( "%7d %7d %7d ", iparam[IPARAM_M], iparam[IPARAM_N], iparam[IPARAM_K] ); fflush( stdout ); + // FIXME: use posix_memalign, or hwloc allocation with first touch, to avoid + // re-using existing allocated bloc, which prevent first-touch t = (double*)malloc(niter*sizeof(double)); memset(t, 0, niter*sizeof(double)); @@ -645,8 +651,14 @@ main(int argc, char *argv[]) { CHAMELEON_Init( iparam[IPARAM_THRDNBR], iparam[IPARAM_NCUDAS] ); +#if defined (CHAMELEON_SCHED_OPENMP) +#pragma omp parallel +#pragma omp master +#endif + { /* Get the number of threads set by the runtime */ iparam[IPARAM_THRDNBR] = CHAMELEON_GetThreadNbr(); + } /* Stops profiling here to avoid profiling uninteresting routines. It will be reactivated in the time_*.c routines with the macro START_TIMING() */