Commit 835fd102 authored by Philippe Virouleau's avatar Philippe Virouleau

Add OpenMP in codelets

parent 1f74f316
......@@ -108,7 +108,7 @@ option(CHAMELEON_USE_MIGRATE
# -----------------------------
# Create a list of possible runtime
set(CHAMELEON_SCHED_list PARSEC STARPU QUARK
set(CHAMELEON_SCHED_list PARSEC STARPU QUARK OPENMP
CACHE INTERNAL "List of available runtimes" )
set( CHAMELEON_SCHED_PARSEC OFF CACHE INTERNAL
......@@ -120,6 +120,9 @@ set(CHAMELEON_SCHED_STARPU OFF CACHE INTERNAL
set(CHAMELEON_SCHED_QUARK OFF CACHE INTERNAL
"Enable Quark scheduler as the default runtime
(Conflict with other CHAMELEON_SCHED_* options)")
set(CHAMELEON_SCHED_OPENMP OFF CACHE INTERNAL
"Enable OpenMP scheduler as the default runtime
(Conflict with other CHAMELEON_SCHED_* options)")
set( CHAMELEON_SCHED STARPU
CACHE STRING "Choose the chameleon internal runtime from ${CHAMELEON_SCHED_list}")
......@@ -154,6 +157,7 @@ if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU )
set(CHAMELEON_ENABLE_MPI ON FORCE)
endif()
# FIXME: with OpenMP-target we should enable CUDA too
# Use intermediate variable since cmake_dependent_option doesn't have OR conditions
set(CHAMELEON_ENABLE_CUDA OFF CACHE INTERNAL "Tells if CUDA might be supported by the runtime")
if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU )
......@@ -892,6 +896,133 @@ if( CHAMELEON_SCHED_QUARK )
endif()
if( CHAMELEON_SCHED_OPENMP )
# create list of components in order to make a single call to find_package(starpu...)
set(OPENMP_COMPONENT_LIST "HWLOC")
if(CHAMELEON_USE_CUDA)
list(APPEND OPENMP_COMPONENT_LIST "CUDA")
endif()
find_package(${OPENMP_COMPONENT_LIST})
# Add definition and include_dir if found
# TODO
if ( OPENMP_FOUND )
message("-- ${Blue}Add definition CHAMELEON_SCHED_OPENMP"
" - Activate OpenMP in Chameleon${ColourReset}")
#if (STARPU_INCLUDE_DIRS_DEP)
#include_directories(${STARPU_INCLUDE_DIRS_DEP})
#set(CMAKE_REQUIRED_INCLUDES "${STARPU_INCLUDE_DIRS_DEP}")
#endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fopenmp")
#if(STARPU_LDFLAGS_OTHER_DEP)
#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STARPU_LDFLAGS_OTHER_DEP}")
#set(CMAKE_REQUIRED_LDFLAGS "${STARPU_LDFLAGS_OTHER_DEP}")
#endif()
#if(STARPU_LIBRARY_DIRS_DEP)
#list(APPEND CMAKE_INSTALL_RPATH "${STARPU_LIBRARY_DIRS_DEP}")
#endif()
#if (STARPU_LIBRARIES_DEP)
#list(INSERT CHAMELEON_DEP 0 ${STARPU_LIBRARIES_DEP})
#set(CMAKE_REQUIRED_LIBRARIES "${STARPU_LIBRARIES_DEP}")
#endif()
#if (CHAMELEON_SIMULATION)
#list(APPEND CMAKE_REQUIRED_FLAGS "-include" "starpu_simgrid_wrap.h")
#endif()
#string(REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
#check_function_exists(starpu_data_idle_prefetch_on_node HAVE_STARPU_IDLE_PREFETCH)
#if ( HAVE_STARPU_IDLE_PREFETCH )
#message("-- ${Blue}Add definition HAVE_STARPU_IDLE_PREFETCH${ColourReset}")
#endif()
#check_function_exists(starpu_iteration_push HAVE_STARPU_ITERATION_PUSH)
#if ( HAVE_STARPU_ITERATION_PUSH )
#message("-- ${Blue}Add definition HAVE_STARPU_ITERATION_PUSH${ColourReset}")
#endif()
#check_function_exists(starpu_data_wont_use HAVE_STARPU_DATA_WONT_USE)
#if ( HAVE_STARPU_DATA_WONT_USE )
#message("-- ${Blue}Add definition HAVE_STARPU_DATA_WONT_USE${ColourReset}")
#endif()
#check_function_exists(starpu_data_set_coordinates HAVE_STARPU_DATA_SET_COORDINATES)
#if ( HAVE_STARPU_DATA_SET_COORDINATES )
#message("-- ${Blue}Add definition HAVE_STARPU_DATA_SET_COORDINATES${ColourReset}")
#endif()
#check_function_exists(starpu_malloc_on_node_set_default_flags HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS)
#if ( HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS )
#message("-- ${Blue}Add definition HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS${ColourReset}")
#endif()
#check_function_exists(starpu_mpi_data_migrate HAVE_STARPU_MPI_DATA_MIGRATE)
#if ( HAVE_STARPU_MPI_DATA_MIGRATE )
#message("-- ${Blue}Add definition HAVE_STARPU_MPI_DATA_MIGRATE${ColourReset}")
#elseif(CHAMELEON_USE_MIGRATE)
#set(CHAMELEON_USE_MIGRATE "OFF")
#message("-- ${Blue}CHAMELEON_USE_MIGRATE is turned OFF because starpu_mpi_data_migrate not found${ColourReset}")
#endif()
else ( OPENMP_FOUND )
#if(MORSE_VERBOSE_FIND_PACKAGE)
#if(NOT HWLOC_FOUND OR NOT HWLOC_LIBRARIES)
#if (NOT HWLOC_hwloc.h_DIRS)
#Print_Find_Header_Status(hwloc hwloc.h)
#endif ()
#if (NOT HWLOC_hwloc_LIBRARY)
#Print_Find_Library_Status(hwloc libhwloc)
#endif ()
#endif()
#if(CHAMELEON_ENABLE_TRACING AND (NOT FXT_FOUND OR NOT FXT_LIBRARIES))
#if (NOT FXT_fxt.h_DIRS)
#Print_Find_Header_Status(fxt fxt.h)
#endif ()
#if (NOT FXT_fxt_LIBRARY)
#Print_Find_Library_Status(fxt libfxt)
#endif ()
#endif()
#if(CHAMELEON_SIMULATION AND (NOT SIMGRID_FOUND OR NOT SIMGRID_LIBRARIES))
#if (NOT SIMGRID_simgrid.h_DIRS)
#Print_Find_Header_Status(simgrid simgrid.h)
#endif ()
#if (NOT SIMGRID_simgrid_LIBRARY)
#Print_Find_Library_Status(simgrid libsimgrid)
#endif ()
#endif()
#if( (NOT STARPU_SHM_FOUND) OR (NOT STARPU_SHM_LIBRARIES) OR
#( STARPU_LOOK_FOR_MPI AND (NOT STARPU_MPI_FOUND OR NOT STARPU_MPI_LIBRARIES) )
#)
#foreach(starpu_hdr ${STARPU_hdrs_to_find})
#if (NOT STARPU_${starpu_hdr}_INCLUDE_DIRS)
#Print_Find_Header_Status(starpu ${starpu_hdr})
#endif ()
#endforeach()
#if(STARPU_VERSION_STRING)
#foreach(starpu_lib ${STARPU_libs_to_find})
#if (NOT STARPU_${starpu_lib}_LIBRARY)
#Print_Find_Library_Status(starpu ${starpu_lib})
#endif ()
#endforeach()
#endif ()
#endif ()
#else(MORSE_VERBOSE_FIND_PACKAGE)
#message(WARNING "StarPU library has not been found and MORSE_VERBOSE_FIND_PACKAGE is set to OFF."
#" Try to activate MORSE_VERBOSE_FIND_PACKAGE option (-DMORSE_VERBOSE_FIND_PACKAGE=ON) to get some hints for the detection")
#endif(MORSE_VERBOSE_FIND_PACKAGE)
#if(NOT HWLOC_FOUND OR NOT HWLOC_LIBRARIES)
#message(FATAL_ERROR "hwloc library is required but has not been found")
#endif()
#if(CHAMELEON_SIMULATION AND (NOT SIMGRID_FOUND OR NOT SIMGRID_LIBRARIES))
#message(FATAL_ERROR "SimGrid library is required but has not been found")
#endif()
#if(CHAMELEON_ENABLE_TRACING AND (NOT FXT_FOUND OR NOT FXT_LIBRARIES))
#message(FATAL_ERROR "FxT library is required but has not been found")
#endif()
#if( (NOT STARPU_SHM_FOUND) OR (NOT STARPU_SHM_LIBRARIES) OR
#( STARPU_LOOK_FOR_MPI AND (NOT STARPU_MPI_FOUND OR NOT STARPU_MPI_LIBRARIES) )
#)
#message(FATAL_ERROR "StarPU library is required but has not been found")
#endif()
endif ( OPENMP_FOUND )
endif( CHAMELEON_SCHED_OPENMP )
# getopt
check_include_files(getopt.h CHAMELEON_HAVE_GETOPT_H)
if (CHAMELEON_HAVE_GETOPT_H)
......@@ -937,6 +1068,9 @@ endif()
if (CHAMELEON_SCHED_STARPU)
list(APPEND CHAMELEON_LIBRARIES chameleon_starpu)
endif()
if (CHAMELEON_SCHED_OPENMP)
list(APPEND CHAMELEON_LIBRARIES chameleon_openmp)
endif()
list(APPEND CHAMELEON_LIBRARIES hqr)
set(CHAMELEON_LIBRARIES_DEP ${CHAMELEON_LIBRARIES} ${CHAMELEON_DEP})
......
......@@ -296,6 +296,8 @@ elseif(CHAMELEON_SCHED_PARSEC)
target_link_libraries(chameleon chameleon_parsec)
elseif(CHAMELEON_SCHED_QUARK)
target_link_libraries(chameleon chameleon_quark)
elseif(CHAMELEON_SCHED_OPENMP)
target_link_libraries(chameleon chameleon_openmp)
endif()
if (NOT CHAMELEON_SIMULATION)
# Depends on coreblas only for set_coreblas_gemm3m_enabled() (Maybe we should change that)
......
......@@ -23,6 +23,7 @@
*
*/
#include "control/common.h"
#include <stdio.h>
/**
********************************************************************************
......
......@@ -27,6 +27,7 @@
#cmakedefine CHAMELEON_SCHED_QUARK
#cmakedefine CHAMELEON_SCHED_PARSEC
#cmakedefine CHAMELEON_SCHED_STARPU
#cmakedefine CHAMELEON_SCHED_OPENMP
/* Communication engine */
#cmakedefine CHAMELEON_USE_MPI
......
......@@ -30,6 +30,7 @@ typedef enum runtime_id_e {
RUNTIME_SCHED_QUARK, /**< Quark runtime */
RUNTIME_SCHED_PARSEC, /**< PaRSEC runtime */
RUNTIME_SCHED_STARPU, /**< StarPU runtime */
RUNTIME_SCHED_OPENMP, /**< OpenMP runtime */
} RUNTIME_id_t;
/**
......
......@@ -109,6 +109,8 @@ elseif( CHAMELEON_SCHED_PARSEC )
add_subdirectory(parsec)
elseif( CHAMELEON_SCHED_STARPU )
add_subdirectory(starpu)
elseif( CHAMELEON_SCHED_OPENMP )
add_subdirectory(openmp)
endif()
###
......
###
#
# @file CMakeLists.txt
#
# @copyright 2009-2015 The University of Tennessee and The University of
# Tennessee Research Foundation. All rights reserved.
# @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
# Univ. Bordeaux. All rights reserved.
#
###
#
# @project CHAMELEON
# CHAMELEON is a software package provided by:
# Inria Bordeaux - Sud-Ouest,
# Univ. of Tennessee,
# King Abdullah Univesity of Science and Technology
# Univ. of California Berkeley,
# Univ. of Colorado Denver.
#
# @version 1.0.0
# @author Cedric Castagnede
# @author Emmanuel Agullo
# @author Mathieu Faverge
# @author Florent Pruvost
# @date 2012-07-13
#
###
cmake_minimum_required(VERSION 2.8)
include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/include )
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/include )
# Generate headers for all possible precisions
# --------------------------------------------
#set(RUNTIME_HDRS_GENERATED "")
#set(ZHDR
#include/runtime_codelet_z.h
#)
#precisions_rules_py(RUNTIME_HDRS_GENERATED "${ZHDR}"
#PRECISIONS "s;d;c;z;ds;zc"
#TARGETDIR "include")
# Define the list of headers
# --------------------------
set(RUNTIME_HDRS
include/chameleon_openmp.h
#include/runtime_codelet_profile.h
#include/runtime_codelets.h
#include/runtime_profiling.h
#include/runtime_workspace.h
)
# Add generated headers
# ---------------------
#foreach( hdr_file ${RUNTIME_HDRS_GENERATED} )
#list(APPEND RUNTIME_HDRS ${CMAKE_CURRENT_BINARY_DIR}/${hdr_file})
#endforeach()
# Force generation of headers
# ---------------------------
add_custom_target(
runtime_openmp_include
ALL SOURCES ${RUNTIME_HDRS})
# Installation
# ------------
install(
FILES ${RUNTIME_HDRS}
DESTINATION include/runtime/openmp )
# Generate the Chameleon common for all possible precisions
# ---------------------------------------------------------
# FIXME: restore this generation once file is done
#set(RUNTIME_COMMON_GENERATED "")
#set(ZSRC
#control/runtime_zprofiling.c
#control/runtime_zlocality.c
#)
#precisions_rules_py(
#RUNTIME_COMMON_GENERATED "${ZSRC}"
#PRECISIONS "${CHAMELEON_PRECISION}"
#TARGETDIR "control")
set(RUNTIME_COMMON
control/runtime_async.c
control/runtime_context.c
control/runtime_control.c
control/runtime_descriptor.c
control/runtime_options.c
control/runtime_profiling.c
#control/runtime_workspace.c
${RUNTIME_COMMON_GENERATED}
)
#set(flags_to_add "")
#foreach(_prec ${CHAMELEON_PRECISION})
#set(flags_to_add "${flags_to_add} -DPRECISION_${_prec}")
#endforeach()
#set_source_files_properties(control/runtime_profiling.c PROPERTIES COMPILE_FLAGS "${flags_to_add}")
# Generate the Chameleon sources for all possible precisions
# ----------------------------------------------------------
set(RUNTIME_SRCS_GENERATED "")
set(ZSRC
#codelets/codelet_zcallback.c
${CODELETS_ZSRC}
)
precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}"
TARGETDIR "codelets")
set(RUNTIME_SRCS
${RUNTIME_COMMON}
${RUNTIME_SRCS_GENERATED}
)
# Force generation of sources
# ---------------------------
add_custom_target(openmp_sources ALL SOURCES ${RUNTIME_SRCS})
set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};runtime_openmp_include;openmp_sources" CACHE INTERNAL "List of targets of sources")
# Add library
# -----------
add_library(chameleon_openmp ${RUNTIME_SRCS})
set_property(TARGET chameleon_openmp PROPERTY LINKER_LANGUAGE Fortran)
set_property(TARGET chameleon_openmp PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
target_link_libraries(chameleon_openmp
${OPENMP_LIBRARIES_DEP})
target_link_libraries(chameleon_openmp
coreblas)
add_dependencies(chameleon_openmp
chameleon_include
control_include
runtime_openmp_include
openmp_sources
)
add_dependencies(chameleon_openmp coreblas_include)
# installation
# ------------
install(TARGETS chameleon_openmp
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
###
### END CMakeLists.txt
###
/**
*
* @file codelet_zasum.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zasum OpenMP codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 1.0.0
* @author Florent Pruvost
* @author Philippe Virouleau
* @date 2018-06-20
* @precisions normal z -> c d s
*
*/
#include "chameleon_openmp.h"
#include "coreblas/coreblas_z.h"
#include "chameleon/tasks_z.h"
void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
cham_store_t storev, cham_uplo_t uplo, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn)
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An );
double *ptrB = RTBLKADDR( B, double, Bm, Bn );
#pragma omp task firstprivate(storev, uplo, M, N, lda, ptrA, ptrB) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
CORE_dzasum(storev, uplo, M, N, ptrA, lda, ptrB);
}
/**
*
* @file codelet_zaxpy.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zaxpy StarPU codelet
*
* @version 1.0.0
* @author Florent Pruvost
* @author Philippe Virouleau
* @date 2018-06-20
* @precisions normal z -> c d s
*
*/
#include "chameleon_openmp.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
int M, CHAMELEON_Complex64_t alpha,
const CHAM_desc_t *A, int Am, int An, int incA,
const CHAM_desc_t *B, int Bm, int Bn, int incB)
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
#pragma omp task firstprivate(M, alpha, incA, incB, ptrA, ptrB) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
CORE_zaxpy(M, alpha, ptrA, incA, ptrB, incB);
}
/**
*
* @file codelet_zbuild.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zbuild StarPU codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Piotr Luszczek
* @author Pierre Lemarinier
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @author Philippe Virouleau
* @date 2018-06-20
* @precisions normal z -> c d s
*
*/
#include "chameleon_openmp.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, int lda,
void *user_data, void* user_build_callback )
{
int row_min, row_max, col_min, col_max;
row_min = Am*A->mb ;
row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
col_min = An*A->nb ;
col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ;
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
void (*callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
callback = user_build_callback;
#pragma omp task firstprivate(row_min, row_max, col_min, col_max, ptrA, lda, user_data) depend(inout:ptrA[0:Am*An])
callback(row_min, row_max, col_min, col_max, ptrA, lda, user_data);
}
/**
*
* @file codelet_zgeadd.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeadd StarPU codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Philippe Virouleau
* @date 2018-06-20
* @precisions normal z -> c d s
*
*/
#include "chameleon_openmp.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
/**
******************************************************************************
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
*
* B <- alpha * op(A) + beta * B,
*
* where op(X) = X, X', or conj(X')
*
*******************************************************************************
*
* @param[in] trans
* Specifies whether the matrix A is non-transposed, transposed, or
* conjugate transposed
* = ChamNoTrans: op(A) = A
* = ChamTrans: op(A) = A'
* = ChamConjTrans: op(A) = conj(A')
*
* @param[in] M
* Number of rows of the matrices op(A) and B.
*
* @param[in] N
* Number of columns of the matrices op(A) and B.
*
* @param[in] alpha
* Scalar factor of A.
*
* @param[in] A
* Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M
* otherwise.
*
* @param[in] LDA
* Leading dimension of the array A. LDA >= max(1,k), with k=M, if
* trans = ChamNoTrans, and k=N otherwise.
*
* @param[in] beta
* Scalar factor of B.
*
* @param[in,out] B
* Matrix of size LDB-by-N.
* On exit, B = alpha * op(A) + beta * B
*
* @param[in] LDB
* Leading dimension of the array B. LDB >= max(1,M)
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
#pragma omp task firstprivate(trans, m, n, alpha, beta, lda, ldb, ptrA, ptrB) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
CORE_zgeadd(trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb);
}
/**
*
* @file codelet_zgelqt.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgelqt StarPU codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Philippe Virouleau
* @date 2018-06-20
* @precisions normal z -> c d s
*
*/
#include "chameleon_openmp.h"
#include "chameleon/tasks_z.h"
#include "coreblas/coreblas_z.h"
/**
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* CORE_zgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q.
*
* The tile Q is represented as a product of elementary reflectors
*
* Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v'
*
* where tau is a complex scalar, and v is a complex vector with
* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
* A(i,i+1:n), and tau in TAU(i).
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the tile A. M >= 0.
*
* @param[in] N
* The number of columns of the tile A. N >= 0.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A
* On entry, the M-by-N tile A.
* On exit, the elements on and below the diagonal of the array
* contain the M-by-min(M,N) lower trapezoidal tile L (L is
* lower triangular if M <= N); the elements above the diagonal,
* with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details).
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* @param[out] WORK
*
*******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
{
CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
CHAMELEON_Complex64_t *TAU = options->ws_worker;
CHAMELEON_Complex64_t *work = options->ws_host;
#pragma omp task firstprivate(m, n, ib, ptrA, lda, ptrT, ldt, work, TAU) depend(inout:ptrA[0:Am*An]) depend(inout:ptrT[0:Tm*Tn])
CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work);
}
/**
*
* @file codelet_zgemm.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgemm StarPU codelet
*
* @version 1.0.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Philippe Virouleau
* @date 2018-06-20
* @precisions normal z -> c d s
*
*/