diff --git a/control/chameleon_f77.c b/control/chameleon_f77.c index 961eb3b29fd579baf98e42d68583d40f56e41017..6ae0feedf53c5b489f4512fdeedd16a3ef283d4f 100644 --- a/control/chameleon_f77.c +++ b/control/chameleon_f77.c @@ -30,11 +30,11 @@ extern "C" { /** * FORTRAN API - auxiliary function prototypes */ - void CHAMELEON_INIT(int *CORES, int *NGPUS, int *INFO) - { *INFO = CHAMELEON_Init(*CORES, *NGPUS); } + void __CHAMELEON_INIT(int *CORES, int *NGPUS, int *INFO) + { *INFO = __chameleon_init(*CORES, *NGPUS); } - void CHAMELEON_FINALIZE(int *INFO) - { *INFO = CHAMELEON_Finalize(); } + void __CHAMELEON_FINALIZE(int *INFO) + { *INFO = __chameleon_finalize(); } void CHAMELEON_ENABLE(int *lever, int *INFO) { *INFO = CHAMELEON_Enable(*lever); } diff --git a/control/chameleon_f77.h b/control/chameleon_f77.h index 2e8b02a572b94ef707319a10b19c0de976f509b2..c70c63270b835c58b4ae85923487ccb697a5e9f2 100644 --- a/control/chameleon_f77.h +++ b/control/chameleon_f77.h @@ -30,8 +30,8 @@ #define CHAMELEON_WS_FNAME(lcname, UCNAME) CHAMELEON_GLOBAL(chameleon_alloc_workspace_##lcname, CHAMELEON_ALLOC_WORKSPACE_##UCNAME) #define CHAMELEON_WST_FNAME(lcname, UCNAME) CHAMELEON_GLOBAL(chameleon_alloc_workspace_##lcname##_tile, CHAMELEON_ALLOC_WORKSPACE_##UCNAME##_TILE) -#define CHAMELEON_INIT CHAMELEON_GLOBAL(chameleon_init, CHAMELEON_INIT) -#define CHAMELEON_FINALIZE CHAMELEON_GLOBAL(chameleon_finalize, CHAMELEON_FINALIZE) +#define __CHAMELEON_INIT CHAMELEON_GLOBAL(__chameleon_init, __CHAMELEON_INIT) +#define __CHAMELEON_FINALIZE CHAMELEON_GLOBAL(__chameleon_finalize, __CHAMELEON_FINALIZE) #define CHAMELEON_ENABLE CHAMELEON_GLOBAL(chameleon_enable, CHAMELEON_ENABLE) #define CHAMELEON_DISABLE CHAMELEON_GLOBAL(chameleon_disable, CHAMELEON_DISABLE) #define CHAMELEON_SET CHAMELEON_GLOBAL(chameleon_set, CHAMELEON_SET) @@ -46,4 +46,20 @@ #define CHAMELEON_LAPACK_TO_TILE CHAMELEON_GLOBAL(chameleon_lapack_to_tile, CHAMELEON_LAPACK_TO_TILE) #define CHAMELEON_TILE_TO_LAPACK CHAMELEON_GLOBAL(chameleon_tile_to_lapack, CHAMELEON_TILE_TO_LAPACK) +#if defined(CHAMELEON_SCHED_OPENMP) +#define CHAMELEON_INIT(nworkers, ncudas)\ + CALL __CHAMELEON_INIT(nworkers, ncudas)\ + !$omp parallel\ + !$omp master +#define CHAMELEON_FINALIZE()\ + !$omp end master\ + !$omp end parallel\ + CALL __CHAMELEON_FINALIZE() +#else +#define CHAMELEON_INIT(nworkers, ncudas)\ + CALL __CHAMELEON_INIT(nworkers, ncudas) +#define CHAMELEON_FINALIZE()\ + CALL __CHAMELEON_FINALIZE() +#endif + #endif /* _chameleon_f77_h_ */ diff --git a/control/control.c b/control/control.c index b349389630307ac6285c7b6ca73c9c9248c0c6cd..8a8de87173c63ad1998baa0ee00e31fcd9df0110 100644 --- a/control/control.c +++ b/control/control.c @@ -34,7 +34,7 @@ * * @ingroup Control * - * CHAMELEON_Init - Initialize CHAMELEON. + * @brief Initialize CHAMELEON. * ****************************************************************************** * @@ -46,26 +46,19 @@ * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ -#ifdef CHAMELEON_Init -#undef CHAMELEON_Init -#endif -#ifdef CHAMELEON_Finalize -#undef CHAMELEON_Finalize -#endif -int CHAMELEON_Init(int cores, int gpus) +int __chameleon_init(int cores, int gpus) { - return CHAMELEON_InitPar(cores, gpus, -1); + return __chameleon_initpar(cores, gpus, -1); } /** * * @ingroup Control * - * CHAMELEON_InitPar - Initialize CHAMELEON. + * @brief Initialize CHAMELEON. * ****************************************************************************** * @@ -80,11 +73,10 @@ int CHAMELEON_Init(int cores, int gpus) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_InitPar(int ncpus, int ncudas, int nthreads_per_worker) +int __chameleon_initpar(int ncpus, int ncudas, int nthreads_per_worker) { CHAM_context_t *chamctxt; @@ -125,15 +117,14 @@ int CHAMELEON_InitPar(int ncpus, int ncudas, int nthreads_per_worker) * * @ingroup Control * - * CHAMELEON_Finalize - Finalize CHAMELEON. + * @brief Finalize CHAMELEON. * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Finalize(void) +int __chameleon_finalize(void) { CHAM_context_t *chamctxt = chameleon_context_self(); if (chamctxt == NULL) { diff --git a/example/lapack_to_chameleon/step1.c b/example/lapack_to_chameleon/step1.c index 96511d058f4a53e121815a306033e090163418cd..69fbc50db34b7934581db0e57ed391dd09bd52dd 100644 --- a/example/lapack_to_chameleon/step1.c +++ b/example/lapack_to_chameleon/step1.c @@ -79,9 +79,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* @@ -170,8 +170,14 @@ int main(int argc, char *argv[]) { free(B); free(X); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step2.c b/example/lapack_to_chameleon/step2.c index 3f16a7175454b1f92253e5ec8486d4baf2299b71..871bbac8476abbb2a86d0b06d651ce2bddfa8201 100644 --- a/example/lapack_to_chameleon/step2.c +++ b/example/lapack_to_chameleon/step2.c @@ -74,9 +74,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* Question chameleon to get the block (tile) size (number of columns) */ @@ -220,8 +220,14 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step3.c b/example/lapack_to_chameleon/step3.c index 7005b7c90f7dda53b8e39fabe1a5ba4db4dd95bb..02755d1fc7b644d6ab407b86ebc63cbe7c82f32a 100644 --- a/example/lapack_to_chameleon/step3.c +++ b/example/lapack_to_chameleon/step3.c @@ -75,11 +75,12 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } + /* Question chameleon to get the block (tile) size (number of columns) */ CHAMELEON_Get( CHAMELEON_TILE_SIZE, &NB ); @@ -201,8 +202,14 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step4.c b/example/lapack_to_chameleon/step4.c index 7ed2847a37754a5d074fc5eed83b1564bf642053..635e14c633d596c1bbebff5d11ddf4c2da80c0b9 100644 --- a/example/lapack_to_chameleon/step4.c +++ b/example/lapack_to_chameleon/step4.c @@ -53,7 +53,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -83,9 +82,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* Question chameleon to get the block (tile) size (number of columns) */ @@ -141,10 +140,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -195,8 +194,14 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); - return EXIT_SUCCESS; + return rc; } diff --git a/example/lapack_to_chameleon/step5.c b/example/lapack_to_chameleon/step5.c index c9daf2ebfbbf60b00d7bbf5b11e1b2f35f50f35e..1b154d5387704c915b351e8ad4470273435f88c5 100644 --- a/example/lapack_to_chameleon/step5.c +++ b/example/lapack_to_chameleon/step5.c @@ -53,7 +53,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -86,9 +85,9 @@ int main(int argc, char *argv[]) { print_header( argv[0], iparam); /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* set some specific parameters related to CHAMELEON: blocks size and inner-blocking size */ @@ -145,10 +144,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -199,6 +198,12 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); diff --git a/example/lapack_to_chameleon/step6.c b/example/lapack_to_chameleon/step6.c index 03309a4b82dd09ae7a6a2207470fb5a0f641580e..aca3f3feb479fc868f131795203bc0df5693124c 100644 --- a/example/lapack_to_chameleon/step6.c +++ b/example/lapack_to_chameleon/step6.c @@ -54,7 +54,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -84,9 +83,9 @@ int main(int argc, char *argv[]) { NGPU = iparam[IPARAM_NCUDAS]; /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* set some specific parameters related to CHAMELEON: blocks size and inner-blocking size */ @@ -167,10 +166,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -225,6 +224,12 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); diff --git a/example/lapack_to_chameleon/step7.c b/example/lapack_to_chameleon/step7.c index e688e3c7f62e86355fa7c40be823d73eb1daea28..7b611bf9ba7ec516fba1558ca613fa80da26deed 100644 --- a/example/lapack_to_chameleon/step7.c +++ b/example/lapack_to_chameleon/step7.c @@ -58,7 +58,6 @@ int main(int argc, char *argv[]) { RUNTIME_sequence_t *sequence = NULL; /* CHAMELEON request uniquely identifies each asynchronous function call */ RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; /* initialize some parameters with default values */ int iparam[IPARAM_SIZEOF]; @@ -88,9 +87,9 @@ int main(int argc, char *argv[]) { NGPU = iparam[IPARAM_NCUDAS]; /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; + int rc = CHAMELEON_Init( NCPU, NGPU ); + if (rc != CHAMELEON_SUCCESS) { + goto finalize; } /* set some specific parameters related to CHAMELEON: blocks size and inner-blocking size */ @@ -175,10 +174,10 @@ int main(int argc, char *argv[]) { * have been terminated */ CHAMELEON_Sequence_Wait(sequence); - status = sequence->status; - if ( status != 0 ) { - fprintf(stderr, "Error in computation (%d)\n", status); - return EXIT_FAILURE; + rc = sequence->status; + if ( rc != CHAMELEON_SUCCESS ) { + fprintf(stderr, "Error in computation (%d)\n", rc); + goto finalize; } CHAMELEON_Sequence_Destroy(sequence); @@ -233,6 +232,12 @@ int main(int argc, char *argv[]) { CHAMELEON_Desc_Destroy( &descX ); CHAMELEON_Desc_Destroy( &descAC ); +finalize: + /* + * Required semicolon to have at least one inst + * before the end of OpenMP block. + */ + ; /* Finalize CHAMELEON */ CHAMELEON_Finalize(); diff --git a/include/chameleon.h b/include/chameleon.h index 9166a88e14239fe6b504db0d67af998d98bee946..7312639602283834e827337f5635cbcbb806979d 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -80,9 +80,9 @@ BEGIN_C_DECLS /* Auxiliary */ int CHAMELEON_Version (int *ver_major, int *ver_minor, int *ver_micro); int CHAMELEON_My_Mpi_Rank (void); -int CHAMELEON_Init (int nworkers, int ncudas); -int CHAMELEON_InitPar (int nworkers, int ncudas, int nthreads_per_worker); -int CHAMELEON_Finalize (void); +int __chameleon_init (int nworkers, int ncudas); +int __chameleon_initpar (int nworkers, int ncudas, int nthreads_per_worker); +int __chameleon_finalize (void); int CHAMELEON_Pause (void); int CHAMELEON_Resume (void); int CHAMELEON_Distributed_start (void); @@ -135,20 +135,46 @@ int CHAMELEON_Sequence_Create (RUNTIME_sequence_t **sequence); int CHAMELEON_Sequence_Destroy (RUNTIME_sequence_t *sequence); int CHAMELEON_Sequence_Wait (RUNTIME_sequence_t *sequence); +/** + * + * @ingroup Control + * + * @brief Initialize CHAMELEON. + * + ****************************************************************************** + * + * @param[in] cores + * Number of cores to use. + * + * @param[in] gpus + * Number of cuda devices to use. + * + ****************************************************************************** + * + * @retval CHAMELEON_SUCCESS successful exit + * + */ #if defined(CHAMELEON_SCHED_OPENMP) -#define CHAMELEON_INIT(nworkers, ncudas)\ - CHAMELEON_Init(nworkers, ncudas);\ +#define CHAMELEON_Init(nworkers, ncudas)\ + __chameleon_init(nworkers, ncudas);\ + _Pragma("omp parallel")\ + _Pragma("omp master")\ + { +#define CHAMELEON_InitPar(nworkers, ncudas, nthreads_per_worker)\ + __chameleon_initpar(nworkers, ncudas, nthreads_per_worker);\ _Pragma("omp parallel")\ _Pragma("omp master")\ { -#define CHAMELEON_FINALIZE()\ +#define CHAMELEON_Finalize()\ }\ - CHAMELEON_Finalize(); + __chameleon_finalize(); #else -#define CHAMELEON_INIT(nworkers, ncudas)\ - CHAMELEON_Init(nworkers, ncudas); -#define CHAMELEON_FINALIZE()\ - CHAMELEON_Finalize(); +#define CHAMELEON_Init(nworkers, ncudas)\ + __chameleon_init(nworkers, ncudas); +#define CHAMELEON_InitPar(nworkers, ncudas, nthreads_per_worker)\ + __chameleon_initpar(nworkers, ncudas, nthreads_per_worker); +#define CHAMELEON_Finalize()\ + __chameleon_finalize(); #endif END_C_DECLS diff --git a/timing/timing.c b/timing/timing.c index 0b6037948f90839a8a9dddbab4ffe0ed9994dfce..fc0eaa10848867557126c13c06808f63d265e16c 100644 --- a/timing/timing.c +++ b/timing/timing.c @@ -58,11 +58,6 @@ #include <starpu.h> #endif /* defined(CHAMELEON_SCHED_STARPU) */ -#if defined(CHAMELEON_SCHED_OPENMP) -#include <omp.h> -#endif /* defined(CHAMELEON_SCHED_OPENMP) */ - - #if defined(CHAMELEON_HAVE_GETOPT_H) #include <getopt.h> #endif /* defined(CHAMELEON_HAVE_GETOPT_H) */ @@ -623,7 +618,7 @@ parse_arguments(int *_argc, char ***_argv, int *iparam, int *start, int *stop, i // regions must not have instructions jumping outside the region (eg: returns) int -CHAMELEON_Main(int *iparam, char *prog_name, int start, int stop, int step) { +timing_main(int *iparam, char *prog_name, int start, int stop, int step) { int status; int i, m, n, mx, nx; @@ -752,13 +747,13 @@ main(int argc, char *argv[]) { int return_code; /* Initialize CHAMELEON */ - CHAMELEON_INIT( iparam[IPARAM_THRDNBR], + CHAMELEON_Init( iparam[IPARAM_THRDNBR], iparam[IPARAM_NCUDAS] ); // NOTE: OpenMP needs this, as Chameleon's init/finalize add '{'/'}', // and 'return' is not allowed in parallel regions. - return_code = CHAMELEON_Main(iparam, argv[0], start, stop, step); + return_code = timing_main(iparam, argv[0], start, stop, step); - CHAMELEON_FINALIZE(); + CHAMELEON_Finalize(); return return_code; }