diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 06ae4b92d81d7fbfd0e9f8872b467d65e03d82a5..025e11002037932bbcc7774f777191888b19a265 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -93,6 +93,18 @@ build_parsec: BUILD_OPTIONS: "-DCHAMELEON_SCHED=PARSEC" VERSION: parsec +build_openmp: + <<: *build_script + artifacts: + name: build_openmp + expire_in: 48 hours + paths: + - build + - chameleon_openmp.log + variables: + BUILD_OPTIONS: "-DCHAMELEON_SCHED=OPENMP" + VERSION: openmp + test_starpu_branches: <<: *test_script variables: @@ -239,6 +251,42 @@ test_parsec_master: only: - master@solverstack/chameleon +test_openmp_branches: + <<: *test_script + variables: + TESTS_RESTRICTION: "-R \"test_shm_s|test_mpi_s\"" + VERSION: openmp + dependencies: + - build_openmp + artifacts: + name: test_openmp + expire_in: 48 hours + paths: + - build + - chameleon_openmp.log + - chameleon_openmp.lcov + only: + - branches + except: + - master + +test_openmp_master: + <<: *test_script + variables: + TESTS_RESTRICTION: "-R \"_shm_|_mpi_\"" + VERSION: openmp + dependencies: + - build_openmp + artifacts: + name: test_openmp + expire_in: 48 hours + paths: + - build + - chameleon_openmp.log + - chameleon_openmp.lcov + only: + - master@solverstack/chameleon + coverage: stage: analyse dependencies: @@ -246,11 +294,13 @@ coverage: - test_starpu_simgrid_branches - test_quark_branches - test_parsec_branches + - test_openmp_branches script: - lcov -a chameleon_starpu.lcov -a chameleon_starpu_simgrid.lcov -a chameleon_quark.lcov -a chameleon_parsec.lcov + -a chameleon_openmp.lcov -o chameleon.lcov - lcov --summary chameleon.lcov only: @@ -289,6 +339,7 @@ sonarqube: - test_starpu_simgrid_master - test_quark_master - test_parsec_master + - test_openmp_master artifacts: name: chameleon_analysis expire_in: 1 week diff --git a/CMakeLists.txt b/CMakeLists.txt index ff59bbf70240ad7667b0c7c91a8ef3c6b7f3eae8..b9ddfa195420d1b064be7caeb3ee8a93383f830f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ option(CHAMELEON_USE_MIGRATE # ----------------------------- # Create a list of possible runtime -set(CHAMELEON_SCHED_list PARSEC STARPU QUARK +set(CHAMELEON_SCHED_list PARSEC STARPU QUARK OPENMP CACHE INTERNAL "List of available runtimes" ) set( CHAMELEON_SCHED_PARSEC OFF CACHE INTERNAL @@ -120,6 +120,9 @@ set(CHAMELEON_SCHED_STARPU OFF CACHE INTERNAL set(CHAMELEON_SCHED_QUARK OFF CACHE INTERNAL "Enable Quark scheduler as the default runtime (Conflict with other CHAMELEON_SCHED_* options)") +set(CHAMELEON_SCHED_OPENMP OFF CACHE INTERNAL + "Enable OpenMP scheduler as the default runtime + (Conflict with other CHAMELEON_SCHED_* options)") set( CHAMELEON_SCHED STARPU CACHE STRING "Choose the chameleon internal runtime from ${CHAMELEON_SCHED_list}") @@ -892,6 +895,22 @@ if( CHAMELEON_SCHED_QUARK ) endif() +if( CHAMELEON_SCHED_OPENMP ) + find_package(OpenMP REQUIRED) + + if ( OPENMP_FOUND ) + message("-- ${Blue}Add definition CHAMELEON_SCHED_OPENMP" + " - Activate OpenMP in Chameleon${ColourReset}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OpenMP_Fortran_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_C_FLAGS}") + # TODO: check for compiler support for "_Pragma" + else ( OPENMP_FOUND ) + message(FATAL_ERROR "Something went wrong when finding OpenMP") + endif ( OPENMP_FOUND ) + +endif( CHAMELEON_SCHED_OPENMP ) + # getopt check_include_files(getopt.h CHAMELEON_HAVE_GETOPT_H) if (CHAMELEON_HAVE_GETOPT_H) @@ -937,6 +956,9 @@ endif() if (CHAMELEON_SCHED_STARPU) list(APPEND CHAMELEON_LIBRARIES chameleon_starpu) endif() +if (CHAMELEON_SCHED_OPENMP) + list(APPEND CHAMELEON_LIBRARIES chameleon_openmp) +endif() list(APPEND CHAMELEON_LIBRARIES hqr) set(CHAMELEON_LIBRARIES_DEP ${CHAMELEON_LIBRARIES} ${CHAMELEON_DEP}) diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index e4ebc3691cf2e2642a2ed87e85dda0d2d2383f5a..11e057fe1d72b2739abb3e2a7e5862bb4bc2c22a 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -29,7 +29,7 @@ if ( CHAMELEON_USE_CUDA ) set( CHAMELEON_COPY_DIAG ON ) else() option(CHAMELEON_COPY_DIAG - "This options enables the duplication of the diagonal tiles in some algorithm to avoid anti-dependencies on lower/upper triangular parts (Might be useful to StarPU)" ON) + "This options enables the duplication of the diagonal tiles in some algorithm to avoid anti-dependencies on lower/upper triangular parts (Might be useful to StarPU and OpenMP)" ON) endif() if ( CHAMELEON_SCHED_QUARK ) @@ -296,6 +296,8 @@ elseif(CHAMELEON_SCHED_PARSEC) target_link_libraries(chameleon chameleon_parsec) elseif(CHAMELEON_SCHED_QUARK) target_link_libraries(chameleon chameleon_quark) +elseif(CHAMELEON_SCHED_OPENMP) + target_link_libraries(chameleon chameleon_openmp) endif() if (NOT CHAMELEON_SIMULATION) # Depends on coreblas only for set_coreblas_gemm3m_enabled() (Maybe we should change that) diff --git a/control/chameleon_f77.c b/control/chameleon_f77.c index 961eb3b29fd579baf98e42d68583d40f56e41017..6ae0feedf53c5b489f4512fdeedd16a3ef283d4f 100644 --- a/control/chameleon_f77.c +++ b/control/chameleon_f77.c @@ -30,11 +30,11 @@ extern "C" { /** * FORTRAN API - auxiliary function prototypes */ - void CHAMELEON_INIT(int *CORES, int *NGPUS, int *INFO) - { *INFO = CHAMELEON_Init(*CORES, *NGPUS); } + void __CHAMELEON_INIT(int *CORES, int *NGPUS, int *INFO) + { *INFO = __chameleon_init(*CORES, *NGPUS); } - void CHAMELEON_FINALIZE(int *INFO) - { *INFO = CHAMELEON_Finalize(); } + void __CHAMELEON_FINALIZE(int *INFO) + { *INFO = __chameleon_finalize(); } void CHAMELEON_ENABLE(int *lever, int *INFO) { *INFO = CHAMELEON_Enable(*lever); } diff --git a/control/chameleon_f77.h b/control/chameleon_f77.h index 2e8b02a572b94ef707319a10b19c0de976f509b2..c70c63270b835c58b4ae85923487ccb697a5e9f2 100644 --- a/control/chameleon_f77.h +++ b/control/chameleon_f77.h @@ -30,8 +30,8 @@ #define CHAMELEON_WS_FNAME(lcname, UCNAME) CHAMELEON_GLOBAL(chameleon_alloc_workspace_##lcname, CHAMELEON_ALLOC_WORKSPACE_##UCNAME) #define CHAMELEON_WST_FNAME(lcname, UCNAME) CHAMELEON_GLOBAL(chameleon_alloc_workspace_##lcname##_tile, CHAMELEON_ALLOC_WORKSPACE_##UCNAME##_TILE) -#define CHAMELEON_INIT CHAMELEON_GLOBAL(chameleon_init, CHAMELEON_INIT) -#define CHAMELEON_FINALIZE CHAMELEON_GLOBAL(chameleon_finalize, CHAMELEON_FINALIZE) +#define __CHAMELEON_INIT CHAMELEON_GLOBAL(__chameleon_init, __CHAMELEON_INIT) +#define __CHAMELEON_FINALIZE CHAMELEON_GLOBAL(__chameleon_finalize, __CHAMELEON_FINALIZE) #define CHAMELEON_ENABLE CHAMELEON_GLOBAL(chameleon_enable, CHAMELEON_ENABLE) #define CHAMELEON_DISABLE CHAMELEON_GLOBAL(chameleon_disable, CHAMELEON_DISABLE) #define CHAMELEON_SET CHAMELEON_GLOBAL(chameleon_set, CHAMELEON_SET) @@ -46,4 +46,20 @@ #define CHAMELEON_LAPACK_TO_TILE CHAMELEON_GLOBAL(chameleon_lapack_to_tile, CHAMELEON_LAPACK_TO_TILE) #define CHAMELEON_TILE_TO_LAPACK CHAMELEON_GLOBAL(chameleon_tile_to_lapack, CHAMELEON_TILE_TO_LAPACK) +#if defined(CHAMELEON_SCHED_OPENMP) +#define CHAMELEON_INIT(nworkers, ncudas)\ + CALL __CHAMELEON_INIT(nworkers, ncudas)\ + !$omp parallel\ + !$omp master +#define CHAMELEON_FINALIZE()\ + !$omp end master\ + !$omp end parallel\ + CALL __CHAMELEON_FINALIZE() +#else +#define CHAMELEON_INIT(nworkers, ncudas)\ + CALL __CHAMELEON_INIT(nworkers, ncudas) +#define CHAMELEON_FINALIZE()\ + CALL __CHAMELEON_FINALIZE() +#endif + #endif /* _chameleon_f77_h_ */ diff --git a/control/control.c b/control/control.c index c0657726ed1a9eb3e67bcef408ed68aa4f46838e..8a8de87173c63ad1998baa0ee00e31fcd9df0110 100644 --- a/control/control.c +++ b/control/control.c @@ -34,7 +34,7 @@ * * @ingroup Control * - * CHAMELEON_Init - Initialize CHAMELEON. + * @brief Initialize CHAMELEON. * ****************************************************************************** * @@ -46,20 +46,19 @@ * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Init(int cores, int gpus) +int __chameleon_init(int cores, int gpus) { - return CHAMELEON_InitPar(cores, gpus, -1); + return __chameleon_initpar(cores, gpus, -1); } /** * * @ingroup Control * - * CHAMELEON_InitPar - Initialize CHAMELEON. + * @brief Initialize CHAMELEON. * ****************************************************************************** * @@ -74,11 +73,10 @@ int CHAMELEON_Init(int cores, int gpus) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_InitPar(int ncpus, int ncudas, int nthreads_per_worker) +int __chameleon_initpar(int ncpus, int ncudas, int nthreads_per_worker) { CHAM_context_t *chamctxt; @@ -119,15 +117,14 @@ int CHAMELEON_InitPar(int ncpus, int ncudas, int nthreads_per_worker) * * @ingroup Control * - * CHAMELEON_Finalize - Finalize CHAMELEON. + * @brief Finalize CHAMELEON. * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Finalize(void) +int __chameleon_finalize(void) { CHAM_context_t *chamctxt = chameleon_context_self(); if (chamctxt == NULL) { diff --git a/example/lapack_to_chameleon/step1.c b/example/lapack_to_chameleon/step1.c index 96511d058f4a53e121815a306033e090163418cd..69fbc50db34b7934581db0e57ed391dd09bd52dd 100644 --- a/example/lapack_to_chameleon/step1.c +++ b/example/lapack_to_chameleon/step1.c @@ -79,9 +79,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* @@ -170,8 +170,14 @@ int main(int argc, char *argv[]) { free(B); free(X); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step2.c b/example/lapack_to_chameleon/step2.c index 3f16a7175454b1f92253e5ec8486d4baf2299b71..871bbac8476abbb2a86d0b06d651ce2bddfa8201 100644 --- a/example/lapack_to_chameleon/step2.c +++ b/example/lapack_to_chameleon/step2.c @@ -74,9 +74,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* Question chameleon to get the block (tile) size (number of columns) */ @@ -220,8 +220,14 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step3.c b/example/lapack_to_chameleon/step3.c index 7005b7c90f7dda53b8e39fabe1a5ba4db4dd95bb..02755d1fc7b644d6ab407b86ebc63cbe7c82f32a 100644 --- a/example/lapack_to_chameleon/step3.c +++ b/example/lapack_to_chameleon/step3.c @@ -75,11 +75,12 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } + /* Question chameleon to get the block (tile) size (number of columns) */ CHAMELEON_Get( CHAMELEON_TILE_SIZE, &NB ); @@ -201,8 +202,14 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step4.c b/example/lapack_to_chameleon/step4.c index 7ed2847a37754a5d074fc5eed83b1564bf642053..635e14c633d596c1bbebff5d11ddf4c2da80c0b9 100644 --- a/example/lapack_to_chameleon/step4.c +++ b/example/lapack_to_chameleon/step4.c @@ -53,7 +53,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -83,9 +82,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* Question chameleon to get the block (tile) size (number of columns) */ @@ -141,10 +140,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -195,8 +194,14 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step5.c b/example/lapack_to_chameleon/step5.c index c9daf2ebfbbf60b00d7bbf5b11e1b2f35f50f35e..1b154d5387704c915b351e8ad4470273435f88c5 100644 --- a/example/lapack_to_chameleon/step5.c +++ b/example/lapack_to_chameleon/step5.c @@ -53,7 +53,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -86,9 +85,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* set some specific parameters related to CHAMELEON: blocks size and inner-blocking size */ @@ -145,10 +144,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -199,6 +198,12 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); diff --git a/example/lapack_to_chameleon/step6.c b/example/lapack_to_chameleon/step6.c index 03309a4b82dd09ae7a6a2207470fb5a0f641580e..aca3f3feb479fc868f131795203bc0df5693124c 100644 --- a/example/lapack_to_chameleon/step6.c +++ b/example/lapack_to_chameleon/step6.c @@ -54,7 +54,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -84,9 +83,9 @@ int main(int argc, char *argv[]) { NGPU = iparam[IPARAM_NCUDAS]; /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* set some specific parameters related to CHAMELEON: blocks size and inner-blocking size */ @@ -167,10 +166,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -225,6 +224,12 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); diff --git a/example/lapack_to_chameleon/step7.c b/example/lapack_to_chameleon/step7.c index e688e3c7f62e86355fa7c40be823d73eb1daea28..7b611bf9ba7ec516fba1558ca613fa80da26deed 100644 --- a/example/lapack_to_chameleon/step7.c +++ b/example/lapack_to_chameleon/step7.c @@ -58,7 +58,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -88,9 +87,9 @@ int main(int argc, char *argv[]) { NGPU = iparam[IPARAM_NCUDAS]; /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* set some specific parameters related to CHAMELEON: blocks size and inner-blocking size */ @@ -175,10 +174,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -233,6 +232,12 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); diff --git a/include/chameleon.h b/include/chameleon.h index da34cab2ab698707fd3a240487aefe7115854d4b..7312639602283834e827337f5635cbcbb806979d 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -80,9 +80,9 @@ BEGIN_C_DECLS /* Auxiliary */ int CHAMELEON_Version (int *ver_major, int *ver_minor, int *ver_micro); int CHAMELEON_My_Mpi_Rank (void); -int CHAMELEON_Init (int nworkers, int ncudas); -int CHAMELEON_InitPar (int nworkers, int ncudas, int nthreads_per_worker); -int CHAMELEON_Finalize (void); +int __chameleon_init (int nworkers, int ncudas); +int __chameleon_initpar (int nworkers, int ncudas, int nthreads_per_worker); +int __chameleon_finalize (void); int CHAMELEON_Pause (void); int CHAMELEON_Resume (void); int CHAMELEON_Distributed_start (void); @@ -135,6 +135,48 @@ int CHAMELEON_Sequence_Create (RUNTIME_sequence_t **sequence); int CHAMELEON_Sequence_Destroy (RUNTIME_sequence_t *sequence); int CHAMELEON_Sequence_Wait (RUNTIME_sequence_t *sequence); +/** + * + * @ingroup Control + * + * @brief Initialize CHAMELEON. + * + ****************************************************************************** + * + * @param[in] cores + * Number of cores to use. + * + * @param[in] gpus + * Number of cuda devices to use. + * + ****************************************************************************** + * + * @retval CHAMELEON_SUCCESS successful exit + * + */ +#if defined(CHAMELEON_SCHED_OPENMP) +#define CHAMELEON_Init(nworkers, ncudas)\ + __chameleon_init(nworkers, ncudas);\ + _Pragma("omp parallel")\ + _Pragma("omp master")\ + { +#define CHAMELEON_InitPar(nworkers, ncudas, nthreads_per_worker)\ + __chameleon_initpar(nworkers, ncudas, nthreads_per_worker);\ + _Pragma("omp parallel")\ + _Pragma("omp master")\ + { +#define CHAMELEON_Finalize()\ + }\ + __chameleon_finalize(); +#else +#define CHAMELEON_Init(nworkers, ncudas)\ + __chameleon_init(nworkers, ncudas); +#define CHAMELEON_InitPar(nworkers, ncudas, nthreads_per_worker)\ + __chameleon_initpar(nworkers, ncudas, nthreads_per_worker); +#define CHAMELEON_Finalize()\ + __chameleon_finalize(); +#endif + END_C_DECLS #endif /* _chameleon_h_ */ diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in index 5edc1093ce3b70c6db269090fb99e85741021ae8..88abfe51c42e47788d422b55e7e6b8b62f4968c4 100644 --- a/include/chameleon/config.h.in +++ b/include/chameleon/config.h.in @@ -27,6 +27,7 @@ #cmakedefine CHAMELEON_SCHED_QUARK #cmakedefine CHAMELEON_SCHED_PARSEC #cmakedefine CHAMELEON_SCHED_STARPU +#cmakedefine CHAMELEON_SCHED_OPENMP /* Communication engine */ #cmakedefine CHAMELEON_USE_MPI diff --git a/include/chameleon/runtime_struct.h b/include/chameleon/runtime_struct.h index 70df9e09d3a2dd3d7e2220715c6bc5cbfa5ba3fa..9d34949736e4f6cf811afed2edf91c1079d84b50 100644 --- a/include/chameleon/runtime_struct.h +++ b/include/chameleon/runtime_struct.h @@ -30,6 +30,7 @@ typedef enum runtime_id_e { RUNTIME_SCHED_QUARK, /**< Quark runtime */ RUNTIME_SCHED_PARSEC, /**< PaRSEC runtime */ RUNTIME_SCHED_STARPU, /**< StarPU runtime */ + RUNTIME_SCHED_OPENMP, /**< OpenMP runtime */ } RUNTIME_id_t; /** diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 73503ee0ce89aff056eb5971f4904bdbe0787315..cabe559c252719de9501e0f95093de147fa9ec18 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -109,6 +109,8 @@ elseif( CHAMELEON_SCHED_PARSEC ) add_subdirectory(parsec) elseif( CHAMELEON_SCHED_STARPU ) add_subdirectory(starpu) +elseif( CHAMELEON_SCHED_OPENMP ) + add_subdirectory(openmp) endif() ### diff --git a/runtime/openmp/CMakeLists.txt b/runtime/openmp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d283d663d9f809f0b95d00c3c476a46a381cf51c --- /dev/null +++ b/runtime/openmp/CMakeLists.txt @@ -0,0 +1,114 @@ +### +# +# @file openmp/CMakeLists.txt +# +# @copyright 2009-2015 The University of Tennessee and The University of +# Tennessee Research Foundation. All rights reserved. +# @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# +### +# +# @project CHAMELEON +# CHAMELEON is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Cedric Castagnede +# @author Emmanuel Agullo +# @author Mathieu Faverge +# @author Florent Pruvost +# @date 2012-07-13 +# +### +cmake_minimum_required(VERSION 2.8) + +include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/include ) +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/include ) + +# Define the list of headers +# -------------------------- +set(RUNTIME_HDRS + include/chameleon_openmp.h + ) + +# Force generation of headers +# --------------------------- +add_custom_target( + runtime_openmp_include + ALL SOURCES ${RUNTIME_HDRS}) + +# Installation +# ------------ +install( + FILES ${RUNTIME_HDRS} + DESTINATION include/runtime/openmp ) + +# Generate the Chameleon common for all possible precisions +# --------------------------------------------------------- + +set(RUNTIME_COMMON + control/runtime_async.c + control/runtime_context.c + control/runtime_control.c + control/runtime_descriptor.c + control/runtime_options.c + control/runtime_profiling.c + ${RUNTIME_COMMON_GENERATED} + ) + +# Generate the Chameleon sources for all possible precisions +# ---------------------------------------------------------- +set(RUNTIME_SRCS_GENERATED "") +set(ZSRC + ${CODELETS_ZSRC} + ) + +precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}" + PRECISIONS "${CHAMELEON_PRECISION}" + TARGETDIR "codelets") + +set(RUNTIME_SRCS + ${RUNTIME_COMMON} + ${RUNTIME_SRCS_GENERATED} + ${CODELETS_SRC} + ) + +# Force generation of sources +# --------------------------- +add_custom_target(openmp_sources ALL SOURCES ${RUNTIME_SRCS}) +set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};runtime_openmp_include;openmp_sources" CACHE INTERNAL "List of targets of sources") + +# Add library +# ----------- +add_library(chameleon_openmp ${RUNTIME_SRCS}) +set_property(TARGET chameleon_openmp PROPERTY LINKER_LANGUAGE Fortran) +set_property(TARGET chameleon_openmp PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") + +target_link_libraries(chameleon_openmp + ${OPENMP_LIBRARIES_DEP}) +target_link_libraries(chameleon_openmp + coreblas) + +add_dependencies(chameleon_openmp + chameleon_include + control_include + runtime_openmp_include + openmp_sources + ) + +add_dependencies(chameleon_openmp coreblas_include) + +# installation +# ------------ +install(TARGETS chameleon_openmp + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + +### +### END CMakeLists.txt +### diff --git a/runtime/openmp/codelets/codelet_map.c b/runtime/openmp/codelets/codelet_map.c new file mode 100644 index 0000000000000000000000000000000000000000..2b9572873d0ef2b2238c92670321f88224c1b3c8 --- /dev/null +++ b/runtime/openmp/codelets/codelet_map.c @@ -0,0 +1,30 @@ +/** + * + * @file openmp/codelet_map.c + * + * @copyright 2018-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon map OpenMP codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2018-09-24 + * + */ +#include "chameleon_openmp.h" + +void INSERT_TASK_map( const RUNTIME_option_t *options, + cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, + cham_unary_operator_t operator, void *op_args ) +{ + char *ptrA = RTBLKADDR( A, char, Am, An ); + +#pragma omp task depend(inout: ptrA[0]) + { + operator( A, uplo, Am, An, ptrA, op_args ); + } + +} diff --git a/runtime/openmp/codelets/codelet_zasum.c b/runtime/openmp/codelets/codelet_zasum.c new file mode 100644 index 0000000000000000000000000000000000000000..036c0a1007b31398677c5c99de58a966039b1a00 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zasum.c @@ -0,0 +1,38 @@ +/** + * + * @file openmp/codelet_zasum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zasum OpenMP codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Florent Pruvost + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "coreblas/coreblas_z.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_dzasum(const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ); + double *ptrB = RTBLKADDR( B, double, Bm, Bn ); +#pragma omp task firstprivate(storev, uplo, M, N, lda, ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_dzasum(storev, uplo, M, N, ptrA, lda, ptrB); +} + + diff --git a/runtime/openmp/codelets/codelet_zaxpy.c b/runtime/openmp/codelets/codelet_zaxpy.c new file mode 100644 index 0000000000000000000000000000000000000000..6de39f6dd9357a25df795b5bd1f2dea5fe55921c --- /dev/null +++ b/runtime/openmp/codelets/codelet_zaxpy.c @@ -0,0 +1,35 @@ +/** + * + * @file openmp/codelet_zaxpy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zaxpy StarPU codelet + * + * @version 1.0.0 + * @author Florent Pruvost + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, + int M, CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int incA, + const CHAM_desc_t *B, int Bm, int Bn, int incB) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(M, alpha, incA, incB, ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_zaxpy(M, alpha, ptrA, incA, ptrB, incB); +} + diff --git a/runtime/openmp/codelets/codelet_zbuild.c b/runtime/openmp/codelets/codelet_zbuild.c new file mode 100644 index 0000000000000000000000000000000000000000..1952fa385734890393a2b3ff76b42964ebfa9a22 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zbuild.c @@ -0,0 +1,47 @@ +/** + * + * @file openmp/codelet_zbuild.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zbuild StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Guillaume Sylvand + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zbuild( const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An, int lda, + void *user_data, void* user_build_callback ) +{ + int row_min, row_max, col_min, col_max; + row_min = Am*A->mb ; + row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ; + col_min = An*A->nb ; + col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + void (*callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; + callback = user_build_callback; + +#pragma omp task firstprivate(row_min, row_max, col_min, col_max, ptrA, lda, user_data) depend(inout:ptrA[0]) + callback(row_min, row_max, col_min, col_max, ptrA, lda, user_data); +} diff --git a/runtime/openmp/codelets/codelet_zgeadd.c b/runtime/openmp/codelets/codelet_zgeadd.c new file mode 100644 index 0000000000000000000000000000000000000000..1d18ff18f83a3d4f7f6343e92ec16246d265ef96 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgeadd.c @@ -0,0 +1,92 @@ +/** + * + * @file openmp/codelet_zgeadd.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeadd StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + ****************************************************************************** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') + * + ******************************************************************************* + * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = ChamNoTrans: op(A) = A + * = ChamTrans: op(A) = A' + * = ChamConjTrans: op(A) = conj(A') + * + * @param[in] M + * Number of rows of the matrices op(A) and B. + * + * @param[in] N + * Number of columns of the matrices op(A) and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M + * otherwise. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = ChamNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. + * + * @param[in,out] B + * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B + * + * @param[in] LDB + * Leading dimension of the array B. LDB >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ +void INSERT_TASK_zgeadd(const RUNTIME_option_t *options, + cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(trans, m, n, alpha, beta, lda, ldb, ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_zgeadd(trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c new file mode 100644 index 0000000000000000000000000000000000000000..71a9bddceef10244b194948d3a7c3623168ea03c --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgelqt.c @@ -0,0 +1,107 @@ +/** + * + * @file openmp/codelet_zgelqt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgelqt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q. + * + * The tile Q is represented as a product of elementary reflectors + * + * Q = H(k)' . . . H(2)' H(1)', where k = min(M,N). + * + * Each H(i) has the form + * + * H(i) = I - tau * v * v' + * + * where tau is a complex scalar, and v is a complex vector with + * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + * A(i,i+1:n), and tau in TAU(i). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, the elements on and below the diagonal of the array + * contain the M-by-min(M,N) lower trapezoidal tile L (L is + * lower triangular if M <= N); the elements above the diagonal, + * with the array TAU, represent the unitary tile Q as a + * product of elementary reflectors (see Further Details). + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0]) + { + CHAMELEON_Complex64_t TAU[ws_size]; + CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n ); + CORE_zgelqt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); + } +} diff --git a/runtime/openmp/codelets/codelet_zgemm.c b/runtime/openmp/codelets/codelet_zgemm.c new file mode 100644 index 0000000000000000000000000000000000000000..68aec8de49aa853b5ca3aa8bd9d54ddd9ca2eb30 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgemm.c @@ -0,0 +1,52 @@ +/** + * + * @file openmp/codelet_zgemm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgemm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zgemm(const RUNTIME_option_t *options, + cham_trans_t transA, cham_trans_t transB, + int m, int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zgemm(transA, transB, + m, n, k, + alpha, ptrA, lda, + ptrB, ldb, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..a097637736103f1b1e1e90a4f6e72407c5ce68b9 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgeqrt.c @@ -0,0 +1,108 @@ +/** + * + * @file openmp/codelet_zgeqrt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeqrt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgeqrt computes a QR factorization of a complex M-by-N tile A: + * A = Q * R. + * + * The tile Q is represented as a product of elementary reflectors + * + * Q = H(1) H(2) . . . H(k), where k = min(M,N). + * + * Each H(i) has the form + * + * H(i) = I - tau * v * v' + * + * where tau is a complex scalar, and v is a complex vector with + * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + * and tau in TAU(i). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, the elements on and above the diagonal of the array + * contain the min(M,N)-by-N upper trapezoidal tile R (R is + * upper triangular if M >= N); the elements below the diagonal, + * with the array TAU, represent the unitary tile Q as a + * product of elementary reflectors (see Further Details). + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] T + * The IB-by-N triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[out] TAU + * The scalar factors of the elementary reflectors (see Further + * Details). + * + * @param[out] WORK + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0]) + { + CHAMELEON_Complex64_t TAU[ws_size]; + CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n); + CORE_zgeqrt(m, n, ib, ptrA, lda, ptrT, ldt, TAU, work); + } +} diff --git a/runtime/openmp/codelets/codelet_zgessm.c b/runtime/openmp/codelets/codelet_zgessm.c new file mode 100644 index 0000000000000000000000000000000000000000..cd24a4ac0a6e9e1fd04743d12ac110dc4334f4f8 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgessm.c @@ -0,0 +1,88 @@ +/** + * + * @file openmp/codelet_zgessm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgessm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgessm applies the factors L computed by CORE_zgetrf_incpiv to + * a complex M-by-N tile A. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] K + * The number of columns of the tile L. K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] IPIV + * The pivot indices array of size K as returned by + * CORE_zgetrf_incpiv. + * + * @param[in] L + * The M-by-K lower triangular tile. + * + * @param[in] LDL + * The leading dimension of the array L. LDL >= max(1,M). + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, updated by the application of L. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * + */ + +void INSERT_TASK_zgessm(const RUNTIME_option_t *options, + int m, int n, int k, int ib, int nb, + int *IPIV, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + const CHAM_desc_t *D, int Dm, int Dn, int ldd, + const CHAM_desc_t *A, int Am, int An, int lda) +{ + CHAMELEON_Complex64_t *ptrD = RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn); + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0:Dm*Dn]) depend(inout:ptrA[0:Am*An]) + CORE_zgessm(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_zgessq.c b/runtime/openmp/codelets/codelet_zgessq.c new file mode 100644 index 0000000000000000000000000000000000000000..f28d03affa325ae49e55bec9f85ab790fec74d1a --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgessq.c @@ -0,0 +1,36 @@ +/** + * + * @file openmp/codelet_zgessq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgessq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zgessq( const RUNTIME_option_t *options, + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrScaleSum) depend(in:ptrA[0]) depend(inout:ptrScaleSum[0]) + CORE_zgessq( m, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] ); +} diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c new file mode 100644 index 0000000000000000000000000000000000000000..d7cc9fe75179ed236a3a0a80246ff01fc6962b30 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf.c @@ -0,0 +1,39 @@ +/** + * + * @file openmp/codelet_zgetrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zgetrf(const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int *IPIV, + cham_bool_t check_info, int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int info = 0; +#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0]) + CORE_zgetrf( m, n, ptrA, lda, IPIV, &info ); +} diff --git a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c new file mode 100644 index 0000000000000000000000000000000000000000..20b5e92d3cd22eb6d323769e5f7d65d020858452 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c @@ -0,0 +1,97 @@ +/** + * + * @file openmp/codelet_zgetrf_incpiv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_incpiv StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgetrf_incpiv computes an LU factorization of a general M-by-N tile A + * using partial pivoting with row interchanges. + * + * The factorization has the form + * + * A = P * L * U + * + * where P is a permutation matrix, L is lower triangular with unit + * diagonal elements (lower trapezoidal if m > n), and U is upper + * triangular (upper trapezoidal if m < n). + * + * This is the right-looking Level 2.5 BLAS version of the algorithm. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A + * On entry, the M-by-N tile to be factored. + * On exit, the factors L and U from the factorization + * A = P*L*U; the unit diagonal elements of L are not stored. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] IPIV + * The pivot indices; for 1 <= i <= min(M,N), row i of the + * tile was interchanged with row IPIV(i). + * + * @param[out] INFO + * See returned value. + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * has been completed, but the factor U is exactly + * singular, and division by zero will occur if it is used + * to solve a system of equations. + * + */ + +void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int info = 0; +#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An]) + CORE_zgetrf_incpiv(m, n, ib, ptrA, lda, IPIV, &info); +} diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c new file mode 100644 index 0000000000000000000000000000000000000000..5f26b76e9b5ba139af2d25c21d0302811f5e6baf --- /dev/null +++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c @@ -0,0 +1,85 @@ +/** + * + * @file openmp/codelet_zgetrf_nopiv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf_nopiv StarPU codelet + * + * @version 1.0.0 + * @author Omar Zenati + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zgetrf_nopiv computes an LU factorization of a general diagonal + * dominant M-by-N matrix A witout pivoting. + * + * The factorization has the form + * A = L * U + * where L is lower triangular with unit + * diagonal elements (lower trapezoidal if m > n), and U is upper + * triangular (upper trapezoidal if m < n). + * + * This is the right-looking Level 3 BLAS version of the algorithm. + * WARNING: Your matrix need to be diagonal dominant if you want to call this + * routine safely. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] IB + * The block size to switch between blocked and unblocked code. + * + * @param[in,out] A + * On entry, the M-by-N matrix to be factored. + * On exit, the factors L and U from the factorization + * A = P*L*U; the unit diagonal elements of L are not stored. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * has been completed, but the factor U is exactly + * singular, and division by zero will occur if it is used + * to solve a system of equations. + * + */ + +void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int info = 0; +#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0:Am*An]) + CORE_zgetrf_nopiv(m, n, ib, ptrA, lda, &info); +} diff --git a/runtime/openmp/codelets/codelet_zhe2ge.c b/runtime/openmp/codelets/codelet_zhe2ge.c new file mode 100644 index 0000000000000000000000000000000000000000..06ffbf7249d88c59891fd2cea7a40166b80c59eb --- /dev/null +++ b/runtime/openmp/codelets/codelet_zhe2ge.c @@ -0,0 +1,37 @@ +/** + * + * @file openmp/codelet_zhe2ge.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhe2ge StarPU codelet + * + * @version 1.0.0 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ); +#pragma omp task firstprivate(uplo, m, n, ptrA, lda, ptrB, ldb) depend(in: ptrA[0]) depend(inout:ptrB[0]) + CORE_zhe2ge(uplo, m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zhemm.c b/runtime/openmp/codelets/codelet_zhemm.c new file mode 100644 index 0000000000000000000000000000000000000000..331459e479f3d6330b793a5833d7f1e1575dcca0 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zhemm.c @@ -0,0 +1,52 @@ +/** + * + * @file openmp/codelet_zhemm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhemm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zhemm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zhemm(side, uplo, + m, n, + alpha, ptrA, lda, + ptrB, ldb, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zher2k.c b/runtime/openmp/codelets/codelet_zher2k.c new file mode 100644 index 0000000000000000000000000000000000000000..a999ae5829f1758f462ebabd141e332d0db7062d --- /dev/null +++ b/runtime/openmp/codelets/codelet_zher2k.c @@ -0,0 +1,49 @@ +/** + * + * @file openmp/codelet_zher2k.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zher2k StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zher2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) + CORE_zher2k(uplo, trans, + n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zherfb.c b/runtime/openmp/codelets/codelet_zherfb.c new file mode 100644 index 0000000000000000000000000000000000000000..2699b10031267012b77537e7b9f1e2969c4059cf --- /dev/null +++ b/runtime/openmp/codelets/codelet_zherfb.c @@ -0,0 +1,44 @@ +/** + * + * @file openmp/codelet_zherfb.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zherfb StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zherfb(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + { + CHAMELEON_Complex64_t work[ws_size]; + CORE_zherfb(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); + } +} diff --git a/runtime/openmp/codelets/codelet_zherk.c b/runtime/openmp/codelets/codelet_zherk.c new file mode 100644 index 0000000000000000000000000000000000000000..a177be9b7baa9a9205d01cbbe2880f432b87fa51 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zherk.c @@ -0,0 +1,49 @@ +/** + * + * @file openmp/codelet_zherk.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zherk StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zherk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + double alpha, const CHAM_desc_t *A, int Am, int An, int lda, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, beta, ptrC, ldc) depend(in:ptrA[0]) depend(inout:ptrC[0]) + CORE_zherk(uplo, trans, + n, k, + alpha, ptrA, lda, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zhessq.c b/runtime/openmp/codelets/codelet_zhessq.c new file mode 100644 index 0000000000000000000000000000000000000000..46cd0f5c7037b64610dd3d2aa7cfc501101114ad --- /dev/null +++ b/runtime/openmp/codelets/codelet_zhessq.c @@ -0,0 +1,36 @@ +/** + * + * @file openmp/codelet_zhessq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhessq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zhessq( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0:SCALESUMSQm*SCALESUMSQn]) depend(inout:ptrA[0:Am*An]) + CORE_zhessq( uplo, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] ); +} diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c new file mode 100644 index 0000000000000000000000000000000000000000..74e420c31c178c45a96f4fd57ac4d05751ad4abd --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlacpy.c @@ -0,0 +1,55 @@ +/** + * + * @file openmp/codelet_zlacpy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlacpy StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + int displA, const CHAM_desc_t *A, int Am, int An, int lda, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A + displA, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B + displB, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(uplo, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_zlacpy(uplo, m, n, ptrA, lda, ptrB, ldb); +} + +void INSERT_TASK_zlacpy(const RUNTIME_option_t *options, + cham_uplo_t uplo, int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + INSERT_TASK_zlacpyx( options, uplo, m, n, nb, + 0, A, Am, An, lda, + 0, B, Bm, Bn, ldb ); +} diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c new file mode 100644 index 0000000000000000000000000000000000000000..b65a938fec6ae4ac078210d7befc9820fc0c7bb9 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlag2c.c @@ -0,0 +1,43 @@ +/** + * + * @file openmp/codelet_zlag2c.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlag2c StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions mixed zc -> ds + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, + int m, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zlag2c( m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zlange.c b/runtime/openmp/codelets/codelet_zlange.c new file mode 100644 index 0000000000000000000000000000000000000000..7c898916e1a394ca41df16acb95f2c9f313fcc98 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlange.c @@ -0,0 +1,56 @@ +/** + * + * @file openmp/codelet_zlange.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlange StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlange(const RUNTIME_option_t *options, + cham_normtype_t norm, int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, M, N, ptrA, LDA, ptrB, options) depend(in:ptrA[0]) depend(inout:ptrB[0]) + { + double work[ws_size]; + CORE_zlange( norm, M, N, ptrA, LDA, work, ptrB); + } +} + +void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn) +{ + double *ptrA = RTBLKADDR(A, double, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); + +#pragma omp task firstprivate(ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + { + if ( *ptrA > *ptrB ) + *ptrB = *ptrA; + } +} diff --git a/runtime/openmp/codelets/codelet_zlanhe.c b/runtime/openmp/codelets/codelet_zlanhe.c new file mode 100644 index 0000000000000000000000000000000000000000..094245ddf4ef68d28c2e2357b73a325318280e5e --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlanhe.c @@ -0,0 +1,42 @@ +/** + * + * @file openmp/codelet_zlanhe.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlanhe StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *normA = RTBLKADDR(B, double, Bm, Bn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0]) + { + double work[ws_size]; + CORE_zlanhe( norm, uplo, N, ptrA, LDA, work, normA); + } +} diff --git a/runtime/openmp/codelets/codelet_zlansy.c b/runtime/openmp/codelets/codelet_zlansy.c new file mode 100644 index 0000000000000000000000000000000000000000..f4f8834c45a5febdf37aa9a94400c5c3cad6eb19 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlansy.c @@ -0,0 +1,42 @@ +/** + * + * @file openmp/codelet_zlansy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlansy StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlansy(const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *normA = RTBLKADDR(B, double, Bm, Bn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0]) + { + double work[ws_size]; + CORE_zlansy( norm, uplo, N, ptrA, LDA, work, normA); + } +} diff --git a/runtime/openmp/codelets/codelet_zlantr.c b/runtime/openmp/codelets/codelet_zlantr.c new file mode 100644 index 0000000000000000000000000000000000000000..517bf27a2375a3cd492b58bac3f35c76c055127f --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlantr.c @@ -0,0 +1,41 @@ +/** + * + * @file openmp/codelet_zlantr.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlantr StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +void INSERT_TASK_zlantr(const RUNTIME_option_t *options, + cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, + int M, int N, int NB, + const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); + int ws_wsize = options->ws_wsize; +#pragma omp task firstprivate(ws_wsize, norm, uplo, diag, M, N, ptrA, LDA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + { + double work[ws_wsize]; + CORE_zlantr(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB); + } +} diff --git a/runtime/openmp/codelets/codelet_zlascal.c b/runtime/openmp/codelets/codelet_zlascal.c new file mode 100644 index 0000000000000000000000000000000000000000..d579bb39ae0f47273c14c301b5f7fd05f9d665ec --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlascal.c @@ -0,0 +1,69 @@ +/** + * + * @file openmp/codelet_zlascal.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlascal StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Dalal Sukkari + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlascal adds to matrices together. + * + * A <- alpha * A + * + ******************************************************************************* + * + * @param[in] M + * Number of rows of the matrices A and B. + * + * @param[in] N + * Number of columns of the matrices A and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zlascal(const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int lda) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0:Am*An]) + CORE_zlascal(uplo, m, n, alpha, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_zlaset.c b/runtime/openmp/codelets/codelet_zlaset.c new file mode 100644 index 0000000000000000000000000000000000000000..6d9eb3768fdf660bd993ed6e1f4978fb97930f3b --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlaset.c @@ -0,0 +1,74 @@ +/** + * + * @file openmp/codelet_zlaset.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlaset StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlaset - Sets the elements of the matrix A on the diagonal + * to beta and on the off-diagonals to alpha + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies which elements of the matrix are to be set + * = ChamUpper: Upper part of A is set; + * = ChamLower: Lower part of A is set; + * = ChamUpperLower: ALL elements of A are set. + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] alpha + * The constant to which the off-diagonal elements are to be set. + * + * @param[in] beta + * The constant to which the diagonal elements are to be set. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, A has been set accordingly. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + */ +void INSERT_TASK_zlaset(const RUNTIME_option_t *options, + cham_uplo_t uplo, int M, int N, + CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, + const CHAM_desc_t *A, int Am, int An, int LDA) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, M, N, alpha, beta, ptrA, LDA) depend(inout:ptrA[0]) + CORE_zlaset(uplo, M, N, alpha, beta, ptrA, LDA); +} diff --git a/runtime/openmp/codelets/codelet_zlaset2.c b/runtime/openmp/codelets/codelet_zlaset2.c new file mode 100644 index 0000000000000000000000000000000000000000..08e49e0ba61e438bc17f851007485a6cf00f2b6d --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlaset2.c @@ -0,0 +1,72 @@ +/** + * + * @file openmp/codelet_zlaset2.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlaset2 StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zlaset2 - Sets the elements of the matrix A to alpha. + * Not LAPACK compliant! Read below. + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies which elements of the matrix are to be set + * = ChamUpper: STRICT Upper part of A is set to alpha; + * = ChamLower: STRICT Lower part of A is set to alpha; + * = ChamUpperLower: ALL elements of A are set to alpha. + * Not LAPACK Compliant. + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] alpha + * The constant to which the elements are to be set. + * + * @param[in,out] A + * On entry, the M-by-N tile A. + * On exit, A has been set to alpha accordingly. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + */ +void INSERT_TASK_zlaset2(const RUNTIME_option_t *options, + cham_uplo_t uplo, int M, int N, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int LDA) +{ + + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, M, N, alpha, ptrA, LDA) depend(inout:ptrA[0]) + CORE_zlaset2(uplo, M, N, alpha, ptrA, LDA); +} diff --git a/runtime/openmp/codelets/codelet_zlatro.c b/runtime/openmp/codelets/codelet_zlatro.c new file mode 100644 index 0000000000000000000000000000000000000000..6f7ba5fa5bae73976ee6b81a4cdc609c2cf4962d --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlatro.c @@ -0,0 +1,46 @@ +/** + * + * @file openmp/codelet_zlatro.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlatro StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlatro(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn]) + CORE_zlatro(uplo, trans, m, n, ptrA, lda, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_zlauum.c b/runtime/openmp/codelets/codelet_zlauum.c new file mode 100644 index 0000000000000000000000000000000000000000..7ab7c8b99de5c9e5646562eb5b3ab3ebc32b0209 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zlauum.c @@ -0,0 +1,43 @@ +/** + * + * @file openmp/codelet_zlauum.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlauum StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zlauum(const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) + CORE_zlauum(uplo, n, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_zplghe.c b/runtime/openmp/codelets/codelet_zplghe.c new file mode 100644 index 0000000000000000000000000000000000000000..06e890a459444492cf59384f5eaebe65f469a92e --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplghe.c @@ -0,0 +1,40 @@ +/** + * + * @file openmp/codelet_zplghe.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplghe StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/* INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */ + +void INSERT_TASK_zplghe( const RUNTIME_option_t *options, + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An]) + CORE_zplghe( bump, m, n, ptrA, lda, bigM, m0, n0, seed ); +} diff --git a/runtime/openmp/codelets/codelet_zplgsy.c b/runtime/openmp/codelets/codelet_zplgsy.c new file mode 100644 index 0000000000000000000000000000000000000000..5269d527617a8efac61a2f841401af2b9fcb03aa --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplgsy.c @@ -0,0 +1,40 @@ +/** + * + * @file openmp/codelet_zplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplgsy StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/* INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ + +void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) + CORE_zplgsy( bump, m, n, ptrA, lda, bigM, m0, n0, seed ); +} diff --git a/runtime/openmp/codelets/codelet_zplrnt.c b/runtime/openmp/codelets/codelet_zplrnt.c new file mode 100644 index 0000000000000000000000000000000000000000..35cb6300c55c4d8fcf3276f952767fd24321981b --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplrnt.c @@ -0,0 +1,40 @@ +/** + * + * @file openmp/codelet_zplrnt.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplrnt StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/* INSERT_TASK_zplrnt - Generate a tile for random matrix. */ + +void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, + int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0]) + CORE_zplrnt( m, n, ptrA, lda, bigM, m0, n0, seed ); +} diff --git a/runtime/openmp/codelets/codelet_zplssq.c b/runtime/openmp/codelets/codelet_zplssq.c new file mode 100644 index 0000000000000000000000000000000000000000..bf72752ec10eadea297c04e3135ed4eefff4a400 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zplssq.c @@ -0,0 +1,85 @@ +/** + * + * @file openmp/codelet_zplssq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplssq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include <math.h> +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * INSERT_TASK_zplssq returns: scl * sqrt(ssq) + * + * with scl and ssq such that + * + * ( scl**2 )*ssq = sum( A( 2*i )**2 * A( 2*i+1 ) ) + * i + * + * The values of A(2*i+1) are assumed to be at least unity. + * The values of A(2*i) are assumed to be non-negative and scl is + * + * scl = max( A( 2*i ) ), + * i + * + * The routine makes only one pass through the matrix A. + * + ******************************************************************************* + * + * @param[in] M + * The number of couple (scale, sumsq) in the matrix A. + * + * @param[in] A + * The 2-by-M matrix. + * + * @param[out] result + * On exit, result contains scl * sqrt( ssq ) + * + */ +void INSERT_TASK_zplssq( const RUNTIME_option_t *options, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn, + const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn ) +{ + double *scalesum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); + double *scl = RTBLKADDR(SCLSSQ, double, SCLSSQm, SCLSSQn); +#pragma omp task depend(in: scalesum[0]) depend(inout: scl[0]) + { + if( scl[0] < scalesum[0] ) { + scl[1] = scalesum[1] + (scl[1] * (( scl[0] / scalesum[0] ) * ( scl[0] / scalesum[0] ))); + scl[0] = scalesum[0]; + } else { + scl[1] = scl[1] + (scalesum[1] * (( scalesum[0] / scl[0] ) * ( scalesum[0] / scl[0] ))); + } + } +} + +void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, + const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) +{ + CHAMELEON_Complex64_t *res = RTBLKADDR(RESULT, CHAMELEON_Complex64_t, RESULTm, RESULTn); + +#pragma omp task depend(inout: res[0]) + { + res[0] = res[0] * sqrt( res[1] ); + } +} diff --git a/runtime/openmp/codelets/codelet_zpotrf.c b/runtime/openmp/codelets/codelet_zpotrf.c new file mode 100644 index 0000000000000000000000000000000000000000..815ac1e430e2d0c4b8b0be8bc6b5cde1862fed85 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zpotrf.c @@ -0,0 +1,47 @@ +/** + * + * @file openmp/codelet_zpotrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zpotrf StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + (void)nb; + int info = 0; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, n, lda, info, ptrA) depend(inout:ptrA[0]) + CORE_zpotrf(uplo, n, ptrA, lda, &info); +} diff --git a/runtime/openmp/codelets/codelet_zssssm.c b/runtime/openmp/codelets/codelet_zssssm.c new file mode 100644 index 0000000000000000000000000000000000000000..38d9ad5e3c116a48251199d610ac494efa411f0b --- /dev/null +++ b/runtime/openmp/codelets/codelet_zssssm.c @@ -0,0 +1,118 @@ +/** + * + * @file openmp/codelet_zssssm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zssssm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zssssm applies the LU factorization update from a complex + * matrix formed by a lower triangular IB-by-K tile L1 on top of a + * M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1 + * tile A1 on top of a M2-by-N2 tile A2 (N1 == N2). + * + * This is the right-looking Level 2.5 BLAS version of the algorithm. + * + ******************************************************************************* + * + * @param[in] M1 + * The number of rows of the tile A1. M1 >= 0. + * + * @param[in] N1 + * The number of columns of the tile A1. N1 >= 0. + * + * @param[in] M2 + * The number of rows of the tile A2 and of the tile L2. + * M2 >= 0. + * + * @param[in] N2 + * The number of columns of the tile A2. N2 >= 0. + * + * @param[in] K + * The number of columns of the tiles L1 and L2. K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in,out] A1 + * On entry, the M1-by-N1 tile A1. + * On exit, A1 is updated by the application of L (L1 L2). + * + * @param[in] LDA1 + * The leading dimension of the array A1. LDA1 >= max(1,M1). + * + * @param[in,out] A2 + * On entry, the M2-by-N2 tile A2. + * On exit, A2 is updated by the application of L (L1 L2). + * + * @param[in] LDA2 + * The leading dimension of the array A2. LDA2 >= max(1,M2). + * + * @param[in] L1 + * The IB-by-K lower triangular tile as returned by + * CORE_ztstrf. + * + * @param[in] LDL1 + * The leading dimension of the array L1. LDL1 >= max(1,IB). + * + * @param[in] L2 + * The M2-by-K tile as returned by CORE_ztstrf. + * + * @param[in] LDL2 + * The leading dimension of the array L2. LDL2 >= max(1,M2). + * + * @param[in] IPIV + * The pivot indices array of size K as returned by + * CORE_ztstrf. + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * + */ + +void INSERT_TASK_zssssm(const RUNTIME_option_t *options, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, + const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, + const int *IPIV) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrL1 = RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n); + CHAMELEON_Complex64_t *ptrL2 = RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n); +#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV)\ + depend(inout:ptrA1[0])\ + depend(inout:ptrA2[0])\ + depend(in:ptrL1[0])\ + depend(in:ptrL2[0]) + CORE_zssssm(m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrL1, ldl1, ptrL2, ldl2, IPIV); +} diff --git a/runtime/openmp/codelets/codelet_zsymm.c b/runtime/openmp/codelets/codelet_zsymm.c new file mode 100644 index 0000000000000000000000000000000000000000..efe71b42578d7b5f71c1b3a3dc770705da213dbe --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsymm.c @@ -0,0 +1,50 @@ +/** + * + * @file openmp/codelet_zsymm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsymm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zsymm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn]) + CORE_zsymm(side, uplo, + m, n, + alpha, ptrA, lda, + ptrB, ldb, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zsyr2k.c b/runtime/openmp/codelets/codelet_zsyr2k.c new file mode 100644 index 0000000000000000000000000000000000000000..a2a231fa4bcd441284146c82399de649fa02d10b --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsyr2k.c @@ -0,0 +1,50 @@ +/** + * + * @file openmp/codelet_zsyr2k.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyr2k StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0]) + CORE_zsyr2k(uplo, trans, + n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zsyrk.c b/runtime/openmp/codelets/codelet_zsyrk.c new file mode 100644 index 0000000000000000000000000000000000000000..16e0e370da0596ee63b4420beb6ae7f67fadb8f9 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsyrk.c @@ -0,0 +1,50 @@ +/** + * + * @file openmp/codelet_zsyrk.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyrk StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, + int n, int k, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + (void)nb; + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); +#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, beta, ptrC, ldc) depend(in:ptrA[0]) depend(inout:ptrC[0]) + CORE_zsyrk(uplo, trans, + n, k, + alpha, ptrA, lda, + beta, ptrC, ldc); +} diff --git a/runtime/openmp/codelets/codelet_zsyssq.c b/runtime/openmp/codelets/codelet_zsyssq.c new file mode 100644 index 0000000000000000000000000000000000000000..c2d69dc57523b0340e89253aec2e985eb78ee6ee --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsyssq.c @@ -0,0 +1,34 @@ +/** + * + * @file openmp/codelet_zsyssq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyssq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0:Am*An]) depend(inout:ptrSCALESUMSQ[0]) + CORE_zsyssq( uplo, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1] ); +} diff --git a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c new file mode 100644 index 0000000000000000000000000000000000000000..1ebd1aa08878024b0379a6bdef55f06bdd48a0f0 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c @@ -0,0 +1,37 @@ +/** + * + * @file openmp/codelet_zsytrf_nopiv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsytrf_nopiv StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Florent Pruvost + * @author Marc Sergent + * @date 2011-10-09 + * @precisions normal z -> c + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options, + cham_uplo_t uplo, int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An]) + CORE_zsytf2_nopiv(uplo, n, ptrA, lda); +} diff --git a/runtime/openmp/codelets/codelet_ztile_zero.c b/runtime/openmp/codelets/codelet_ztile_zero.c new file mode 100644 index 0000000000000000000000000000000000000000..96ef911bf329829e3f282b448b11f277ce114a27 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztile_zero.c @@ -0,0 +1,38 @@ +/** + * + * @file openmp/codelet_ztile_zero.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztile_zero StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Jakub Kurzak + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas.h" +/** + * + */ +void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options, + int X1, int X2, int Y1, int Y2, + const CHAM_desc_t *A, int Am, int An, int lda ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + int x, y; + for (x = X1; x < X2; x++) + for (y = Y1; y < Y2; y++) + ptrA[lda*x+y] = 0.0; +} diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c new file mode 100644 index 0000000000000000000000000000000000000000..1acb66066910c0626cb731b76f5b8987c2beaac6 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztplqt.c @@ -0,0 +1,40 @@ +/** + * + * @file openmp/codelet_ztplqt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztplqt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztplqt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0], ptrT[0]) + { + CHAMELEON_Complex64_t work[ws_size]; + CORE_ztplqt( M, N, L, ib, + ptrA, lda, ptrB, ldb, ptrT, ldt, work ); + } +} diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c new file mode 100644 index 0000000000000000000000000000000000000000..543704822c5ddecca1c432a875ac357ace7c2a66 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztpmlqt.c @@ -0,0 +1,40 @@ +/** + * + * @file openmp/codelet_ztpmlqt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @brief Chameleon ztpmlqt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + { + CHAMELEON_Complex64_t work[ws_size]; + CORE_ztpmlqt( side, trans, M, N, K, L, ib, + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work ); + } +} diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..4f3262221eb3027fb82d1d2f93b9d8cd0ad09aeb --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztpmqrt.c @@ -0,0 +1,40 @@ +/** + * + * @file openmp/codelet_ztpmqrt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @brief Chameleon ztpmqrt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int M, int N, int K, int L, int ib, int nb, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, nb, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + { + CHAMELEON_Complex64_t tmp[ws_size]; + CORE_ztpmqrt( side, trans, M, N, K, L, ib, + ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, tmp ); + } +} diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c new file mode 100644 index 0000000000000000000000000000000000000000..17917cc7b7791955707edfe3f608ea18e1247705 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztpqrt.c @@ -0,0 +1,39 @@ +/** + * + * @file openmp/codelet_ztpqrt.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztpqrt StarPU codelet + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void +INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, + int M, int N, int L, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb, + const CHAM_desc_t *T, int Tm, int Tn, int ldt ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0]) + { + CHAMELEON_Complex64_t tmp[ws_size]; + CORE_ztpqrt( M, N, L, ib, + ptrA, lda, ptrB, ldb, ptrT, ldt, tmp ); + } +} diff --git a/runtime/openmp/codelets/codelet_ztradd.c b/runtime/openmp/codelets/codelet_ztradd.c new file mode 100644 index 0000000000000000000000000000000000000000..9a39aaf56d2ac9366c1e7c8c2986d678f9379db1 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztradd.c @@ -0,0 +1,94 @@ +/** + * + * @file openmp/codelet_ztradd.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztradd StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2011-11-03 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + ****************************************************************************** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd. + * + * B <- alpha * op(A) + beta * B, + * + * where op(X) = X, X', or conj(X') + * + ******************************************************************************* + * + * @param[in] uplo + * Specifies the shape of A and B matrices: + * = ChamUpperLower: A and B are general matrices. + * = ChamUpper: op(A) and B are upper trapezoidal matrices. + * = ChamLower: op(A) and B are lower trapezoidal matrices. + * + * @param[in] trans + * Specifies whether the matrix A is non-transposed, transposed, or + * conjugate transposed + * = ChamNoTrans: op(A) = A + * = ChamTrans: op(A) = A' + * = ChamConjTrans: op(A) = conj(A') + * + * @param[in] M + * Number of rows of the matrices op(A) and B. + * + * @param[in] N + * Number of columns of the matrices op(A) and B. + * + * @param[in] alpha + * Scalar factor of A. + * + * @param[in] A + * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M + * otherwise. + * + * @param[in] LDA + * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * trans = ChamNoTrans, and k=N otherwise. + * + * @param[in] beta + * Scalar factor of B. + * + * @param[in,out] B + * Matrix of size LDB-by-N. + * On exit, B = alpha * op(A) + beta * B + * + * @param[in] LDB + * Leading dimension of the array B. LDB >= max(1,M) + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ +void INSERT_TASK_ztradd(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(uplo, trans, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_ztradd(uplo, trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_ztrasm.c b/runtime/openmp/codelets/codelet_ztrasm.c new file mode 100644 index 0000000000000000000000000000000000000000..c2a017cfd0cc97bb6edff1c3836aad38993c4d11 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrasm.c @@ -0,0 +1,34 @@ +/** + * + * @file openmp/codelet_ztrasm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrasm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +void INSERT_TASK_ztrasm(const RUNTIME_option_t *options, + cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrB = RTBLKADDR(B, double, Bm, Bn); +#pragma omp task firstprivate(storev, uplo, diag, M, N, ptrA, lda, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_ztrasm(storev, uplo, diag, M, N, ptrA, lda, ptrB); +} diff --git a/runtime/openmp/codelets/codelet_ztrmm.c b/runtime/openmp/codelets/codelet_ztrmm.c new file mode 100644 index 0000000000000000000000000000000000000000..bd255050d452cbb3296e2b4cfac05045c768ab8e --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrmm.c @@ -0,0 +1,48 @@ +/** + * + * @file openmp/codelet_ztrmm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrmm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(side, uplo, transA, diag, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0]) + CORE_ztrmm(side, uplo, + transA, diag, + m, n, + alpha, ptrA, lda, + ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_ztrsm.c b/runtime/openmp/codelets/codelet_ztrsm.c new file mode 100644 index 0000000000000000000000000000000000000000..9db743cabb7faa272a750faf797e09fe0b9e57ef --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrsm.c @@ -0,0 +1,50 @@ +/** + * + * @file openmp/codelet_ztrsm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrsm StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-20 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +#include "coreblas/coreblas_z.h" + +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, + cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *B, int Bm, int Bn, int ldb) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn); +#pragma omp task firstprivate(side, uplo, transA, diag, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout: ptrB[0]) + CORE_ztrsm(side, uplo, + transA, diag, + m, n, + alpha, ptrA, lda, + ptrB, ldb); +} diff --git a/runtime/openmp/codelets/codelet_ztrssq.c b/runtime/openmp/codelets/codelet_ztrssq.c new file mode 100644 index 0000000000000000000000000000000000000000..b4ce2a66097deefadce9153c1c8d6292cd7291df --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrssq.c @@ -0,0 +1,35 @@ +/** + * + * @file openmp/codelet_ztrssq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrssq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.6.0 for CHAMELEON 1.0.0 + * @author Mathieu Faverge + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" + +void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_diag_t diag, + int m, int n, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn); +#pragma omp task firstprivate(uplo, diag, m, n, ptrA, lda, SCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0]) + CORE_ztrssq( uplo, diag, m, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1]); +} diff --git a/runtime/openmp/codelets/codelet_ztrtri.c b/runtime/openmp/codelets/codelet_ztrtri.c new file mode 100644 index 0000000000000000000000000000000000000000..cef754f89dda4359b0a0ad00c44cc24286a0d69e --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztrtri.c @@ -0,0 +1,43 @@ +/** + * + * @file openmp/codelet_ztrtri.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrtri StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Julien Langou + * @author Henricus Bouwmeester + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztrtri(const RUNTIME_option_t *options, + cham_uplo_t uplo, cham_diag_t diag, + int n, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); +#pragma omp task firstprivate(uplo, diag, n, ptrA, lda, iinfo) depend(inout:ptrA[0]) + CORE_ztrtri(uplo, diag, n, ptrA, lda, &iinfo); +} diff --git a/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c new file mode 100644 index 0000000000000000000000000000000000000000..0d1b2d1ffa0ec36f871211b507b6ddd13ac3c2e1 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c @@ -0,0 +1,50 @@ +/** + * + * @file openmp/codelet_ztsmlq_hetra1.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztsmlq_hetra1 StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Azzam Haidar + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + int ldwork = side == ChamLeft ? ib : nb; + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[ws_size]; + CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, + ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } +} diff --git a/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c new file mode 100644 index 0000000000000000000000000000000000000000..6ae8085c9998d895743a26b118d5bdd560a99eb4 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c @@ -0,0 +1,50 @@ +/** + * + * @file openmp/codelet_ztsmqr_hetra1.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztsmqr_hetra1 StarPU codelet + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Azzam Haidar + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + */ +void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m1, int n1, int m2, int n2, int k, int ib, int nb, + const CHAM_desc_t *A1, int A1m, int A1n, int lda1, + const CHAM_desc_t *A2, int A2m, int A2n, int lda2, + const CHAM_desc_t *V, int Vm, int Vn, int ldv, + const CHAM_desc_t *T, int Tm, int Tn, int ldt) +{ + CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n); + CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn); + int ldwork = side == ChamLeft ? ib : nb; + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0]) + { + CHAMELEON_Complex64_t work[ws_size]; + CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, + ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork); + } +} diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c new file mode 100644 index 0000000000000000000000000000000000000000..cb612cb6fd9fa8a0667218b3f2ff70878964c4d1 --- /dev/null +++ b/runtime/openmp/codelets/codelet_ztstrf.c @@ -0,0 +1,113 @@ +/** + * + * @file openmp/codelet_ztstrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztstrf StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_ztstrf computes an LU factorization of a complex matrix formed + * by an upper triangular NB-by-N tile U on top of a M-by-N tile A + * using partial pivoting with row interchanges. + * + * This is the right-looking Level 2.5 BLAS version of the algorithm. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the tile A. M >= 0. + * + * @param[in] N + * The number of columns of the tile A. N >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] NB + * + * @param[in,out] U + * On entry, the NB-by-N upper triangular tile. + * On exit, the new factor U from the factorization + * + * @param[in] LDU + * The leading dimension of the array U. LDU >= max(1,NB). + * + * @param[in,out] A + * On entry, the M-by-N tile to be factored. + * On exit, the factor L from the factorization + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in,out] L + * On entry, the IB-by-N lower triangular tile. + * On exit, the interchanged rows form the tile A in case of pivoting. + * + * @param[in] LDL + * The leading dimension of the array L. LDL >= max(1,IB). + * + * @param[out] IPIV + * The pivot indices; for 1 <= i <= min(M,N), row i of the + * tile U was interchanged with row IPIV(i) of the tile A. + * + * @param[in,out] WORK + * + * @param[in] LDWORK + * The dimension of the array WORK. + * + * @param[out] INFO + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if INFO = -k, the k-th argument had an illegal value + * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization + * has been completed, but the factor U is exactly + * singular, and division by zero will occur if it is used + * to solve a system of equations. + * + */ + +void INSERT_TASK_ztstrf(const RUNTIME_option_t *options, + int m, int n, int ib, int nb, + const CHAM_desc_t *U, int Um, int Un, int ldu, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *L, int Lm, int Ln, int ldl, + int *IPIV, + cham_bool_t check_info, int iinfo) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrU = RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un); + CHAMELEON_Complex64_t *ptrL = RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, iinfo) depend(inout:ptrA[0], ptrU[0], ptrL[0]) + { + CHAMELEON_Complex64_t work[ws_size]; + CORE_ztstrf(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, nb, &iinfo); + } +} diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c new file mode 100644 index 0000000000000000000000000000000000000000..348b290a1973097fcebf2acac241916a7b5de906 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zunmlq.c @@ -0,0 +1,131 @@ +/** + * + * @file openmp/codelet_zunmlq.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmlq StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Dulceneia Becker + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zunmlq overwrites the general complex M-by-N tile C with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * C C * Q + * TRANS = 'C': Q**H * C C * Q**H + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(k) . . . H(2) H(1) + * + * as returned by CORE_zgelqt. Q is of order M if SIDE = 'L' and of order N + * if SIDE = 'R'. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : Transpose, apply Q**H. + * + * @param[in] M + * The number of rows of the tile C. M >= 0. + * + * @param[in] N + * The number of columns of the tile C. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * If SIDE = ChamLeft, M >= K >= 0; + * if SIDE = ChamRight, N >= K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] A + * Dimension: (LDA,M) if SIDE = ChamLeft, + * (LDA,N) if SIDE = ChamRight, + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_zgelqt in the first k rows of its array argument A. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,K). + * + * @param[in] T + * The IB-by-K triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[in,out] C + * On entry, the M-by-N tile C. + * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + * @param[in,out] WORK + * On exit, if INFO = 0, WORK(1) returns the optimal LDWORK. + * + * @param[in] LDWORK + * The dimension of the array WORK. + * If SIDE = ChamLeft, LDWORK >= max(1,N); + * if SIDE = ChamRight, LDWORK >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zunmlq(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + { + CHAMELEON_Complex64_t work[ws_size]; + CORE_zunmlq(side, trans, m, n, k, ib, + ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb); + } +} diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c new file mode 100644 index 0000000000000000000000000000000000000000..42765470397c5f2fa9feb7bb1e27a4b7019f83e4 --- /dev/null +++ b/runtime/openmp/codelets/codelet_zunmqr.c @@ -0,0 +1,131 @@ +/** + * + * @file openmp/codelet_zunmqr.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmqr StarPU codelet + * + * @version 1.0.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + */ + +#include "chameleon_openmp.h" +#include "chameleon/tasks_z.h" +/** + * + * @ingroup CORE_CHAMELEON_Complex64_t + * + * CORE_zunmqr overwrites the general complex M-by-N tile C with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * C C * Q + * TRANS = 'C': Q**H * C C * Q**H + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by CORE_zgeqrt. Q is of order M if SIDE = 'L' and of order N + * if SIDE = 'R'. + * + ******************************************************************************* + * + * @param[in] side + * @arg ChamLeft : apply Q or Q**H from the Left; + * @arg ChamRight : apply Q or Q**H from the Right. + * + * @param[in] trans + * @arg ChamNoTrans : No transpose, apply Q; + * @arg ChamConjTrans : Transpose, apply Q**H. + * + * @param[in] M + * The number of rows of the tile C. M >= 0. + * + * @param[in] N + * The number of columns of the tile C. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * If SIDE = ChamLeft, M >= K >= 0; + * if SIDE = ChamRight, N >= K >= 0. + * + * @param[in] IB + * The inner-blocking size. IB >= 0. + * + * @param[in] A + * Dimension: (LDA,K) + * The i-th column must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * CORE_zgeqrt in the first k columns of its array argument A. + * + * @param[in] LDA + * The leading dimension of the array A. + * If SIDE = ChamLeft, LDA >= max(1,M); + * if SIDE = ChamRight, LDA >= max(1,N). + * + * @param[in] T + * The IB-by-K triangular factor T of the block reflector. + * T is upper triangular by block (economic storage); + * The rest of the array is not referenced. + * + * @param[in] LDT + * The leading dimension of the array T. LDT >= IB. + * + * @param[in,out] C + * On entry, the M-by-N tile C. + * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + * @param[in,out] WORK + * On exit, if INFO = 0, WORK(1) returns the optimal LDWORK. + * + * @param[in] LDWORK + * The dimension of the array WORK. + * If SIDE = ChamLeft, LDWORK >= max(1,N); + * if SIDE = ChamRight, LDWORK >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + */ + +void INSERT_TASK_zunmqr(const RUNTIME_option_t *options, + cham_side_t side, cham_trans_t trans, + int m, int n, int k, int ib, int nb, + const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *T, int Tm, int Tn, int ldt, + const CHAM_desc_t *C, int Cm, int Cn, int ldc) +{ + CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An); + CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn); + CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn); + int ws_size = options->ws_wsize; +#pragma omp task firstprivate(ws_size, side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0]) + { + CHAMELEON_Complex64_t tmp[ws_size]; + CORE_zunmqr(side, trans, m, n, k, ib, + ptrA, lda, ptrT, ldt, ptrC, ldc, tmp, nb); + } +} diff --git a/runtime/openmp/control/runtime_async.c b/runtime/openmp/control/runtime_async.c new file mode 100644 index 0000000000000000000000000000000000000000..cb88c5e00e727b4cae28dd63ff0b54bb86aa9c64 --- /dev/null +++ b/runtime/openmp/control/runtime_async.c @@ -0,0 +1,71 @@ +/** + * + * @file openmp/runtime_async.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU asynchronous routines + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +/** + * Create a sequence + */ +int RUNTIME_sequence_create( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence ) +{ + (void)chamctxt; + (void)sequence; + return CHAMELEON_SUCCESS; +} + +/** + * Destroy a sequence + */ +int RUNTIME_sequence_destroy( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence ) +{ + (void)chamctxt; + (void)sequence; + return CHAMELEON_SUCCESS; +} + +/** + * Wait for the completion of a sequence + */ +int RUNTIME_sequence_wait( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence ) +{ + (void)chamctxt; + (void)sequence; + +#pragma omp taskwait + return CHAMELEON_SUCCESS; +} + +/** + * Terminate a sequence + */ +void RUNTIME_sequence_flush( CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request, + int status ) +{ + (void)chamctxt; + sequence->request = request; + sequence->status = status; + request->status = status; + return; +} diff --git a/runtime/openmp/control/runtime_context.c b/runtime/openmp/control/runtime_context.c new file mode 100644 index 0000000000000000000000000000000000000000..f4777c74e2fc4e22cde4fcdb71f361ca7898ecd9 --- /dev/null +++ b/runtime/openmp/control/runtime_context.c @@ -0,0 +1,78 @@ +/** + * + * @file openmp/runtime_context.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU context routines + * + * @version 1.0.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +/** + * Create new context + */ +void RUNTIME_context_create( CHAM_context_t *chamctxt ) +{ + chamctxt->scheduler = RUNTIME_SCHED_OPENMP; + return; +} + +/** + * Clean the context + */ +void RUNTIME_context_destroy( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * + */ +void RUNTIME_enable( int lever ) +{ + switch (lever) + { + case CHAMELEON_PROFILING_MODE: + fprintf(stderr, "Profiling is not available with OpenMP\n"); + break; + case CHAMELEON_BOUND: + fprintf(stderr, "Bound computation is not available with OpenMP\n"); + break; + default: + return; + } + return; +} + +/** + * + */ +void RUNTIME_disable( int lever ) +{ + switch (lever) + { + case CHAMELEON_PROFILING_MODE: + fprintf(stderr, "Profiling is not available with OpenMP\n"); + break; + case CHAMELEON_BOUND: + fprintf(stderr, "Bound computation is not available with OpenMP\n"); + break; + default: + return; + } + return; +} diff --git a/runtime/openmp/control/runtime_control.c b/runtime/openmp/control/runtime_control.c new file mode 100644 index 0000000000000000000000000000000000000000..1a6c0185ce74f5e266ff12cc25b9adf1ac69e589 --- /dev/null +++ b/runtime/openmp/control/runtime_control.c @@ -0,0 +1,125 @@ +/** + * + * @file openmp/runtime_control.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU control routines + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Cedric Augonnet + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +/** + * + */ +int RUNTIME_init( CHAM_context_t *chamctxt, + int ncpus, + int ncudas, + int nthreads_per_worker ) +{ + int hres = 0; + if ( ncudas > 0 ) { + chameleon_warning( "RUNTIME_init_scheduler(OpenMP)", "GPUs are not supported for now"); + } + + if ( nthreads_per_worker > 0 ) { + chameleon_warning( "RUNTIME_init_scheduler(OpenMP)", "Multi-threaded kernels are not supported for now"); + } + + return hres; +} + +/** + * + */ +void RUNTIME_finalize( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * To suspend the processing of new tasks by workers + */ +void RUNTIME_pause( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * This is the symmetrical call to RUNTIME_pause, + * used to resume the workers polling for new tasks. + */ +void RUNTIME_resume( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + +/** + * Busy-waiting barrier + */ +void RUNTIME_barrier( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; +#pragma omp barrier +} + +/** + * Display a progress information when executing the tasks + */ +void RUNTIME_progress( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} + + +/** + * Thread rank. + */ +int RUNTIME_thread_rank( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return omp_get_thread_num(); +} + +/** + * Number of threads. + */ +int RUNTIME_thread_size( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return omp_get_num_threads(); +} + +/** + * The process rank + */ +int RUNTIME_comm_rank( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return 0; +} + +/** + * This returns the size of the distributed computation + */ +int RUNTIME_comm_size( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return 1; +} diff --git a/runtime/openmp/control/runtime_descriptor.c b/runtime/openmp/control/runtime_descriptor.c new file mode 100644 index 0000000000000000000000000000000000000000..b6144495cd7b9c423df8ee7ba76ed11f576b818f --- /dev/null +++ b/runtime/openmp/control/runtime_descriptor.c @@ -0,0 +1,106 @@ +/** + * + * @file openmp/runtime_descriptor.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon OpenMP descriptor routines + * + * @version 1.0.0 + * @author Vijay Joshi + * @author Cedric Castagnede + * @author Philippe Virouleau + * @date 2018-06-21 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +void RUNTIME_comm_set_tag_sizes( int user_tag_width, + int user_tag_sep ) +{ + (void)user_tag_width; + (void)user_tag_sep; +} + +void *RUNTIME_malloc( size_t size ) +{ + return malloc( size ); +} + +void RUNTIME_free( void *ptr, + size_t size ) +{ + (void)size; + free( ptr ); + return; +} + +void RUNTIME_desc_create( CHAM_desc_t *desc ) +{ + (void)desc; + return; +} + +void RUNTIME_desc_destroy( CHAM_desc_t *desc ) +{ + (void)desc; + return; +} + +int RUNTIME_desc_acquire( const CHAM_desc_t *desc ) +{ + (void)desc; + return CHAMELEON_SUCCESS; +} + +int RUNTIME_desc_release( const CHAM_desc_t *desc ) +{ + (void)desc; + return CHAMELEON_SUCCESS; +} + +void +RUNTIME_desc_flush( const CHAM_desc_t *desc, + const RUNTIME_sequence_t *sequence ) +{ + (void)desc; + (void)sequence; + return; +} + + +void +RUNTIME_flush( ) +{ + return; +} + +void +RUNTIME_data_flush( const RUNTIME_sequence_t *sequence, + const CHAM_desc_t *A, int Am, int An ) +{ + (void)sequence; + (void)A; + (void)Am; + (void)An; + return; +} + +#if defined(CHAMELEON_USE_MIGRATE) +void RUNTIME_data_migrate( const RUNTIME_sequence_t *sequence, + const CHAM_desc_t *A, int Am, int An, int new_rank ) +{ + (void)sequence; (void)A; (void)Am; (void)An; (void)new_rank; +} +#endif + +void *RUNTIME_data_getaddr( const CHAM_desc_t *desc, int m, int n ) +{ + return desc->get_blkaddr( desc, m, n ); +} diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c new file mode 100644 index 0000000000000000000000000000000000000000..744c31324423bcb0c051f0461ad49f16c90a85eb --- /dev/null +++ b/runtime/openmp/control/runtime_options.c @@ -0,0 +1,65 @@ +/** + * + * @file openmp/runtime_options.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU options routines + * + * @version 1.0.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include <stdlib.h> +#include "chameleon_openmp.h" + +void RUNTIME_options_init( RUNTIME_option_t *option, CHAM_context_t *chamctxt, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + option->sequence = sequence; + option->request = request; + option->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; + option->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; + option->priority = RUNTIME_PRIORITY_MIN; + option->ws_wsize = 0; + option->ws_hsize = 0; + option->ws_worker = NULL; + option->ws_host = NULL; + return; +} + +void RUNTIME_options_finalize( RUNTIME_option_t *option, CHAM_context_t *chamctxt ) +{ + (void)option; + (void)chamctxt; + return; +} + +int RUNTIME_options_ws_alloc( RUNTIME_option_t *options, size_t worker_size, size_t host_size ) +{ + if (worker_size > 0) { + /* + * NOTE: we set the size, but instead of doing a malloc shared by multiple workers, + * we just create a VLA in the relevant codelets, within the task's body. + * This way we ensure the "scratch" is thread local and not shared by multiple threads. + */ + options->ws_wsize = worker_size; + } + return CHAMELEON_SUCCESS; +} + +int RUNTIME_options_ws_free( RUNTIME_option_t *options ) +{ + if (options->ws_wsize) { + options->ws_wsize = 0; + } + return CHAMELEON_SUCCESS; +} diff --git a/runtime/openmp/control/runtime_profiling.c b/runtime/openmp/control/runtime_profiling.c new file mode 100644 index 0000000000000000000000000000000000000000..4d8f07a95fb0ccb0144291a6ffac043260f25719 --- /dev/null +++ b/runtime/openmp/control/runtime_profiling.c @@ -0,0 +1,70 @@ +/** + * + * @file openmp/runtime_profiling.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU profiling routines + * + * @version 1.0.0 + * @author Cedric Augonnet + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2010-11-15 + * + */ +#include "chameleon_openmp.h" +#include "chameleon/timer.h" + +double RUNTIME_get_time(){ + return CHAMELEON_timer(); +} + +void RUNTIME_start_profiling() +{ + chameleon_warning("RUNTIME_start_profiling()", "FxT profiling is not available with OpenMP\n"); +} + +void RUNTIME_stop_profiling() +{ + chameleon_warning("RUNTIME_stop_profiling()", "FxT profiling is not available with OpenMP\n"); +} + +void RUNTIME_start_stats() +{ + chameleon_warning("RUNTIME_start_stats()", "pruning stats are not available with OpenMP\n"); +} + +void RUNTIME_stop_stats() +{ + chameleon_warning("RUNTIME_stop_stats()", "pruning stats are not available with OpenMP\n"); +} + +void RUNTIME_schedprofile_display(void) +{ + chameleon_warning("RUNTIME_schedprofile_display(openmp)", "Scheduler profiling is not available with OpenMP\n"); +} + +void RUNTIME_kernelprofile_display(void) +{ + chameleon_warning("RUNTIME_kernelprofile_display(openmp)", "Kernel profiling is not available with OpenMP\n"); +} + +/** + * Set iteration numbers for traces + */ +void RUNTIME_iteration_push( CHAM_context_t *chamctxt, unsigned long iteration ) +{ + (void)chamctxt; (void)iteration; + return; +} +void RUNTIME_iteration_pop( CHAM_context_t *chamctxt ) +{ + (void)chamctxt; + return; +} diff --git a/runtime/openmp/include/chameleon_openmp.h b/runtime/openmp/include/chameleon_openmp.h new file mode 100644 index 0000000000000000000000000000000000000000..cc9a127af0d998d004336c3e2df87dda5811deac --- /dev/null +++ b/runtime/openmp/include/chameleon_openmp.h @@ -0,0 +1,33 @@ +/** + * + * @file openmp/chameleon_openmp.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon OpenMP runtime main header + * + * @version 1.0.0 + * @author Philippe Virouleau + * @date 2018-06-21 + * + */ +#ifndef _CHAMELEON_OPENMP_H_ +#define _CHAMELEON_OPENMP_H_ + +#include "coreblas.h" + +#include "control/common.h" +#include <omp.h> + +/* + * Access to block pointer and leading dimension + */ +#define RTBLKADDR( desc, type, m, n ) ( (type*)RUNTIME_data_getaddr( desc, m, n ) ) + + +#endif /* _CHAMELEON_OPENMP_H_ */ diff --git a/timing/timing.c b/timing/timing.c index ad6c1b57c532bb8d6c0f5ded5a9c34e64741163d..e5b1b79b5009934f96b33be5beccfaa768d99518 100644 --- a/timing/timing.c +++ b/timing/timing.c @@ -58,7 +58,6 @@ #include <starpu.h> #endif /* defined(CHAMELEON_SCHED_STARPU) */ - #if defined(CHAMELEON_HAVE_GETOPT_H) #include <getopt.h> #endif /* defined(CHAMELEON_HAVE_GETOPT_H) */ @@ -142,8 +141,11 @@ Test(int64_t n, int *iparam) { return 0; } - if ( CHAMELEON_My_Mpi_Rank() == 0) - printf( "%7d %7d %7d ", iparam[IPARAM_M], iparam[IPARAM_N], iparam[IPARAM_K] ); + if ( CHAMELEON_My_Mpi_Rank() == 0 ) { + printf( "%10d %7d %7d %7d %7d ", iparam[IPARAM_THRDNBR], + iparam[IPARAM_M], iparam[IPARAM_N], + iparam[IPARAM_NB], iparam[IPARAM_K] ); + } fflush( stdout ); t = (double*)malloc(niter*sizeof(double)); @@ -424,7 +426,7 @@ print_header(char *prog_name, int * iparam) { iparam[IPARAM_IB], eps ); - printf( "# M N K/NRHS seconds Gflop/s Deviation%s%s\n", + printf( "# Threads M N NB K/NRHS seconds Gflop/s Deviation%s%s\n", bound_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header); return; } @@ -613,37 +615,22 @@ parse_arguments(int *_argc, char ***_argv, int *iparam, int *start, int *stop, i } while(-1 != c); } +// NOTE: this function is here to cope with the fact that OpenMP parallel +// regions must not have instructions jumping outside the region (eg: returns) + int -main(int argc, char *argv[]) { - int i, m, n, mx, nx; +timing_main(int *iparam, char *prog_name, int start, int stop, int step) { + int status; + int i, m, n, mx, nx; int nbnode = 1; - int start = 500; - int stop = 5000; - int step = 500; - int iparam[IPARAM_SIZEOF]; int success = 0; - set_iparam_default(iparam); - - parse_arguments(&argc, &argv, iparam, &start, &stop, &step); - -#if !defined(CHAMELEON_USE_CUDA) - if (iparam[IPARAM_NCUDAS] != 0){ - fprintf(stderr, "ERROR: CHAMELEON_USE_CUDA is not defined. " - "The number of CUDA devices must be set to 0 (--gpus=0).\n"); - return EXIT_FAILURE; - } -#endif - n = iparam[IPARAM_N]; m = iparam[IPARAM_M]; mx = iparam[IPARAM_MX]; nx = iparam[IPARAM_NX]; - /* Initialize CHAMELEON */ - CHAMELEON_Init( iparam[IPARAM_THRDNBR], - iparam[IPARAM_NCUDAS] ); /* Get the number of threads set by the runtime */ iparam[IPARAM_THRDNBR] = CHAMELEON_GetThreadNbr(); @@ -697,7 +684,7 @@ main(int argc, char *argv[]) { CHAMELEON_Set(CHAMELEON_TRANSLATION_MODE, iparam[IPARAM_INPLACE]); if ( CHAMELEON_My_Mpi_Rank() == 0 ) { - print_header( argv[0], iparam); + print_header( prog_name, iparam); } if (step < 1) step = 1; @@ -737,7 +724,39 @@ main(int argc, char *argv[]) { if (status != CHAMELEON_SUCCESS) return status; success += status; } - CHAMELEON_Finalize(); return success; } +int +main(int argc, char *argv[]) { + int start = 500; + int stop = 5000; + int step = 500; + int iparam[IPARAM_SIZEOF]; + + set_iparam_default(iparam); + + parse_arguments(&argc, &argv, iparam, &start, &stop, &step); + +#if !defined(CHAMELEON_USE_CUDA) + if (iparam[IPARAM_NCUDAS] != 0){ + fprintf(stderr, "ERROR: CHAMELEON_USE_CUDA is not defined. " + "The number of CUDA devices must be set to 0 (--gpus=0).\n"); + return EXIT_FAILURE; + } +#endif + int return_code; + + /* Initialize CHAMELEON */ + CHAMELEON_Init( iparam[IPARAM_THRDNBR], + iparam[IPARAM_NCUDAS] ); + /* + * NOTE: OpenMP needs this, as Chameleon's init/finalize add '{'/'}', + * and 'return' is not allowed in parallel regions. + */ + return_code = timing_main(iparam, argv[0], start, stop, step); + + CHAMELEON_Finalize(); + return return_code; +} +