diff --git a/compute/pzgebrd.c b/compute/pzgebrd.c index cbfa67824f3032c94279739ea2e6b5533734dcb2..d6de8929fe269741e3e378f445d3931df009247b 100644 --- a/compute/pzgebrd.c +++ b/compute/pzgebrd.c @@ -246,6 +246,8 @@ chameleon_pzgebrd_gb2bd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, chameleon_desc_destroy( &descAB ); RUNTIME_options_finalize( &options, chamctxt ); + + return CHAMELEON_SUCCESS; } int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt, @@ -260,7 +262,7 @@ int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *subA, *subT, *subUVT, *subD; CHAM_desc_t descUl, descUt; CHAM_desc_t descVTl, descVTt; - int M, N, NB, ib; + int M, N, NB; chamctxt = chameleon_context_self(); if ( sequence->status != CHAMELEON_SUCCESS ) { @@ -358,4 +360,6 @@ int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt, } RUNTIME_options_finalize( &options, chamctxt ); + + return CHAMELEON_SUCCESS; } diff --git a/compute/zgesvd.c b/compute/zgesvd.c index 59ef98d92feee5297509b78d75613083e10eda0f..66f8fbef882749896226f351576c49e7942a4086 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -409,7 +409,7 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t descT; CHAM_desc_t D, *Dptr = NULL; double *E; - int M, N, MINMN, NB; + int M, N, MINMN; CHAM_context_t *chamctxt; chamctxt = chameleon_context_self(); @@ -465,7 +465,6 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt, M = descA.m; N = descA.n; MINMN = chameleon_min(M, N); - NB = descA.mb; #if defined(CHAMELEON_COPY_DIAG) { chameleon_zdesc_copy_and_restrict( A, &D, A->m, A->n ); diff --git a/control/auxiliary.h b/control/auxiliary.h index f335bb3620d77f685c6e80e8da35d502fa64c655..6e4d5b2ae0f8d4fe7036550f50b8c16bdec733cf 100644 --- a/control/auxiliary.h +++ b/control/auxiliary.h @@ -24,6 +24,9 @@ #ifndef _chameleon_auxiliary_h_ #define _chameleon_auxiliary_h_ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> #include "chameleon/struct.h" #include "chameleon/tasks.h" diff --git a/control/common.h b/control/common.h index 85e696d1a3a4b8a3bd98e2a439cca159fc0fa68d..d68934a6cc77f28680bc84860c1d6ae7f1e8444d 100644 --- a/control/common.h +++ b/control/common.h @@ -83,7 +83,7 @@ #define CHAMELEON_RHBLK chamctxt->rhblock #define CHAMELEON_TRANSLATION chamctxt->translation #define CHAMELEON_PARALLEL chamctxt->parallel_enabled -#define CHAMELEON_PROFILING chamctxt->profiling_enabled +#define CHAMELEON_STATISTICS chamctxt->statistics_enabled /** * IPT internal define diff --git a/control/context.c b/control/context.c index 827ebc811db3a4b0989d093b752c1a89b5b03493..dc444168b455531529e6557f7bf5335c05155f78 100644 --- a/control/context.c +++ b/control/context.c @@ -134,7 +134,7 @@ CHAM_context_t *chameleon_context_create() chamctxt->warnings_enabled = chameleon_env_is_off( "CHAMELEON_WARNINGS" ); chamctxt->autotuning_enabled = chameleon_env_is_on( "CHAMELEON_AUTOTUNING" ); chamctxt->parallel_enabled = chameleon_env_is_on( "CHAMELEON_PARALLEL_KERNEL" ); - chamctxt->profiling_enabled = chameleon_env_is_on( "CHAMELEON_PROFILING_MODE" ); + chamctxt->statistics_enabled = chameleon_env_is_on( "CHAMELEON_GENERATE_STATS" ); chamctxt->progress_enabled = chameleon_env_is_on( "CHAMELEON_PROGRESS" ); chamctxt->generic_enabled = chameleon_env_is_on( "CHAMELEON_GENERIC" ); chamctxt->autominmax_enabled = chameleon_env_is_on( "CHAMELEON_AUTOMINMAX" ); @@ -184,8 +184,9 @@ int chameleon_context_destroy(){ * Feature to be enabled: * @arg CHAMELEON_WARNINGS printing of warning messages, * @arg CHAMELEON_AUTOTUNING autotuning for tile size and inner block size. - * @arg CHAMELEON_PROFILING_MODE activate profiling of kernels - * @arg CHAMELEON_PROGRESS activate progress indicator + * @arg CHAMELEON_GENERATE_TRACE enable/start the trace generation + * @arg CHAMELEON_GENERATE_STATS enable/start the kernel statistics + * @arg CHAMELEON_PROGRESS enable the progress indicator * @arg CHAMELEON_GEMM3M Use z/cgemm3m for complexe matrix-matrix products * @arg CHAMELEON_GENERIC enable/disable GEMM3M Use z/cgemm3m for complexe matrix-matrix products * @@ -212,11 +213,11 @@ int CHAMELEON_Enable(int option) case CHAMELEON_AUTOTUNING: chamctxt->autotuning_enabled = CHAMELEON_TRUE; break; - case CHAMELEON_PROFILING_MODE: + case CHAMELEON_GENERATE_TRACE: RUNTIME_start_profiling(); break; - case CHAMELEON_KERNELPROFILE_MODE: - chamctxt->profiling_enabled = CHAMELEON_TRUE; + case CHAMELEON_GENERATE_STATS: + chamctxt->statistics_enabled = CHAMELEON_TRUE; break; case CHAMELEON_PROGRESS: chamctxt->progress_enabled = CHAMELEON_TRUE; @@ -259,8 +260,9 @@ int CHAMELEON_Enable(int option) * Feature to be disabled: * @arg CHAMELEON_WARNINGS printing of warning messages, * @arg CHAMELEON_AUTOTUNING autotuning for tile size and inner block size. - * @arg CHAMELEON_PROFILING_MODE deactivate profiling of kernels - * @arg CHAMELEON_PROGRESS deactivate progress indicator + * @arg CHAMELEON_GENERATE_TRACE disable/pause the trace generation + * @arg CHAMELEON_GENERATE_STATS disable/pause the kernel statistics + * @arg CHAMELEON_PROGRESS disable the progress indicator * @arg CHAMELEON_GEMM3M Use z/cgemm3m for complexe matrix-matrix products * ******************************************************************************* @@ -285,11 +287,11 @@ int CHAMELEON_Disable(int option) case CHAMELEON_AUTOTUNING: chamctxt->autotuning_enabled = CHAMELEON_FALSE; break; - case CHAMELEON_PROFILING_MODE: + case CHAMELEON_GENERATE_TRACE: RUNTIME_stop_profiling(); break; - case CHAMELEON_KERNELPROFILE_MODE: - chamctxt->profiling_enabled = CHAMELEON_FALSE; + case CHAMELEON_GENERATE_STATS: + chamctxt->statistics_enabled = CHAMELEON_FALSE; break; case CHAMELEON_PROGRESS: chamctxt->progress_enabled = CHAMELEON_FALSE; diff --git a/doc/user/chapters/using.org b/doc/user/chapters/using.org index 91533bfedb639ab00e229b01ebbf20c6f846f80d..cf0d7cc286de80d8124b49937f905d99bec9b49a 100644 --- a/doc/user/chapters/using.org +++ b/doc/user/chapters/using.org @@ -103,7 +103,7 @@ * *CHAMELEON_WARNINGS* enables/disables the warning output * *CHAMELEON_PARALLEL_KERNEL* enables/disables the use of multi-threaded kernels. Available only for StarPU runtime system. - * *CHAMELEON_PROFILING_MODE* enables the profiling information of + * *CHAMELEON_GENERATE_STATS* enables the profiling information of the kernels (StarPU specific) * *CHAMELEON_PROGRESS* enables the progress function to show the percentage of tasks completed. @@ -848,7 +848,8 @@ Features that can be enabled/disabled: * *CHAMELEON_WARNINGS*: printing of warning messages, * *CHAMELEON_AUTOTUNING*: autotuning for tile size and inner block size (inactive), - * *CHAMELEON_PROFILING_MODE*: activate kernels profiling, + * *CHAMELEON_GENERATE_TRACE*: enable/start the trace generation + * *CHAMELEON_GENERATE_STATS*: enable/start the kernel statistics * *CHAMELEON_PROGRESS*: to print a progress status, * *CHAMELEON_GEMM3M*: to enable the use of the /gemm3m/ blas bunction. diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index 71b18d8d41dfb2b87126dc54937b42c5413d4c37..d311bf63703e3de4ee27d2bfa15c9127a80db281 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -204,8 +204,10 @@ typedef enum chameleon_store_e { #define CHAMELEON_ERRORS 2 #define CHAMELEON_AUTOTUNING 3 #define CHAMELEON_DAG 4 -#define CHAMELEON_PROFILING_MODE 5 -#define CHAMELEON_KERNELPROFILE_MODE 6 +#define CHAMELEON_GENERATE_TRACE 5 +#define CHAMELEON_PROFILING_MODE CHAMELEON_GENERATE_TRACE /* _deprecated_ */ +#define CHAMELEON_GENERATE_STATS 6 +#define CHAMELEON_KERNELPROFILE_MODE CHAMELEON_GENERATE_STATS /* _deprecated_ */ #define CHAMELEON_PARALLEL_MODE 7 #define CHAMELEON_BOUND 8 #define CHAMELEON_PROGRESS 9 diff --git a/include/chameleon/fortran.h b/include/chameleon/fortran.h index 679eb3a5a2ab2f1cc8e7cb09e654d1d8b185f37d..eae7ff7c7828fd75cdb5295b7520ac7b12b4f26a 100644 --- a/include/chameleon/fortran.h +++ b/include/chameleon/fortran.h @@ -165,15 +165,20 @@ ! State machine switches ! integer CHAMELEON_WARNINGS, CHAMELEON_ERRORS, CHAMELEON_AUTOTUNING - integer CHAMELEON_DAG, CHAMELEON_PROFILING_MODE, CHAMELEON_PARALLEL_MODE - integer CHAMELEON_BOUND + integer CHAMELEON_DAG, CHAMELEON_GENERATE_TRACE, CHAMELEON_GENERATE_STATS + integer CHAMELEON_PARALLEL_MODE, CHAMELEON_BOUND, CHAMELEON_PROGRESS + integer CHAMELEON_GEMM3M, CHAMELEON_GENERIC parameter ( CHAMELEON_WARNINGS = 1 ) parameter ( CHAMELEON_ERRORS = 2 ) parameter ( CHAMELEON_AUTOTUNING = 3 ) parameter ( CHAMELEON_DAG = 4 ) - parameter ( CHAMELEON_PROFILING_MODE = 5 ) - parameter ( CHAMELEON_PARALLEL_MODE = 6 ) - parameter ( CHAMELEON_BOUND = 7 ) + parameter ( CHAMELEON_GENERATE_TRACE = 5 ) + parameter ( CHAMELEON_GENERATE_STATS = 6 ) + parameter ( CHAMELEON_PARALLEL_MODE = 7 ) + parameter ( CHAMELEON_BOUND = 8 ) + parameter ( CHAMELEON_PROGRESS = 9 ) + parameter ( CHAMELEON_GEMM3M = 10 ) + parameter ( CHAMELEON_GENERIC = 11 ) !******************************************************************** ! CHAMELEON constants - configuration parameters diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index 8794b5618391d33fd7da9987cd7d399fe92f6073..68642a0444946ba280a05edc830849d89f4b0418 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -66,7 +66,7 @@ RUNTIME_context_destroy( CHAM_context_t *ctxt ); * Pointer to the runtime data structure * * @param[in] option - * @arg CHAMELEON_PROFILING_MODE: start the profiling mode of the runtime. + * @arg CHAMELEON_GENERATE_TRACE: start the trace generation mode of the runtime. */ void RUNTIME_enable( void *runtime_ctxt, int option ); @@ -79,7 +79,7 @@ RUNTIME_enable( void *runtime_ctxt, int option ); * Pointer to the runtime data structure * * @param[in] option - * @arg CHAMELEON_PROFILING_MODE: stop the profiling mode of the runtime. + * @arg CHAMELEON_GENERATE_TRACE: stop the trace generation mode of the runtime. */ void RUNTIME_disable( void *runtime_ctxt, int option ); diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index bd49119df09a400e09e21a6d9096b0ec5a19c7ea..7e16010124e1ff8782449d8c0d6630abf42c7e5a 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -135,7 +135,7 @@ typedef struct chameleon_context_s { cham_bool_t warnings_enabled; cham_bool_t autotuning_enabled; cham_bool_t parallel_enabled; - cham_bool_t profiling_enabled; + cham_bool_t statistics_enabled; cham_bool_t progress_enabled; cham_bool_t generic_enabled; cham_bool_t autominmax_enabled; diff --git a/runtime/openmp/control/runtime_context.c b/runtime/openmp/control/runtime_context.c index a4332ef9f4b3e0840b76624cdf4c920ba8ea9e95..8faa242f76d8a1592d5561ecdf721f3741bb7713 100644 --- a/runtime/openmp/control/runtime_context.c +++ b/runtime/openmp/control/runtime_context.c @@ -49,8 +49,11 @@ void RUNTIME_enable( void *runtime_ctxt, int lever ) case CHAMELEON_DAG: fprintf(stderr, "DAG is not available with OpenMP\n"); break; - case CHAMELEON_PROFILING_MODE: - fprintf(stderr, "Profiling is not available with OpenMP\n"); + case CHAMELEON_GENERATE_TRACE: + fprintf(stderr, "Trace generation is not available with OpenMP\n"); + break; + case CHAMELEON_GENERATE_STATS: + fprintf(stderr, "Kernel statistics are not available with OpenMP\n"); break; case CHAMELEON_BOUND: fprintf(stderr, "Bound computation is not available with OpenMP\n"); @@ -72,8 +75,11 @@ void RUNTIME_disable( void *runtime_ctxt, int lever ) case CHAMELEON_DAG: fprintf(stderr, "DAG is not available with OpenMP\n"); break; - case CHAMELEON_PROFILING_MODE: - fprintf(stderr, "Profiling is not available with OpenMP\n"); + case CHAMELEON_GENERATE_TRACE: + fprintf(stderr, "Trace generation is not available with OpenMP\n"); + break; + case CHAMELEON_GENERATE_STATS: + fprintf(stderr, "Kernel statistics are not available with OpenMP\n"); break; case CHAMELEON_BOUND: fprintf(stderr, "Bound computation is not available with OpenMP\n"); diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c index c146c1f262db15734ec09fe59c941588aa342c00..e626e9fa5d419dd3f79630575e1d4b45fbcf70cb 100644 --- a/runtime/openmp/control/runtime_options.c +++ b/runtime/openmp/control/runtime_options.c @@ -26,7 +26,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, { options->sequence = sequence; options->request = request; - options->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; + options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; options->priority = RUNTIME_PRIORITY_MIN; options->workerid = -1; diff --git a/runtime/parsec/control/runtime_context.c b/runtime/parsec/control/runtime_context.c index a15d52eeede14e037c752b7eb7fde144177d59a9..f2ce0fb3e1c9678717c8a585c4e0b6fc039ab82f 100644 --- a/runtime/parsec/control/runtime_context.c +++ b/runtime/parsec/control/runtime_context.c @@ -48,12 +48,15 @@ void RUNTIME_enable( void *runtime_ctxt, int lever ) case CHAMELEON_DAG: fprintf(stderr, "DAG is not available with PaRSEC\n"); break; - case CHAMELEON_PROFILING_MODE: - fprintf(stderr, "Profiling is not available with PaRSEC\n"); + case CHAMELEON_GENERATE_TRACE: + fprintf(stderr, "Trace generation is not available with PaRSEC\n"); //parsec_profiling_start(); break; + case CHAMELEON_GENERATE_STATS: + fprintf(stderr, "Kernel statistics are not available with PaRSEC\n"); + break; case CHAMELEON_BOUND: - fprintf(stderr, "Bound computation is not available with Quark\n"); + fprintf(stderr, "Bound computation is not available with PaRSEC\n"); break; default: return; @@ -73,9 +76,12 @@ void RUNTIME_disable( void *runtime_ctxt, int lever ) case CHAMELEON_DAG: fprintf(stderr, "DAG is not available with PaRSEC\n"); break; - case CHAMELEON_PROFILING_MODE: - fprintf(stderr, "Profiling is not available with PaRSEC\n"); - //parsec_profiling_stop(); + case CHAMELEON_GENERATE_TRACE: + fprintf(stderr, "Trace generation is not available with PaRSEC\n"); + //parsec_profiling_start(); + break; + case CHAMELEON_GENERATE_STATS: + fprintf(stderr, "Kernel statistics are not available with PaRSEC\n"); break; case CHAMELEON_BOUND: fprintf(stderr, "Bound computation is not available with PaRSEC\n"); diff --git a/runtime/parsec/control/runtime_options.c b/runtime/parsec/control/runtime_options.c index dac3176f5e3af7091fcf3d6a07be01a12c3b47f7..0fe1498b236ccfad82d75898992094b0cb1c785a 100644 --- a/runtime/parsec/control/runtime_options.c +++ b/runtime/parsec/control/runtime_options.c @@ -24,7 +24,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, { options->sequence = sequence; options->request = request; - options->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; + options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; options->priority = RUNTIME_PRIORITY_MIN; options->workerid = -1; diff --git a/runtime/quark/control/runtime_context.c b/runtime/quark/control/runtime_context.c index 64b9b183b72af0b0401db0e48f594b3dd9b47192..41a3bab7810018f8ca13bdac8dccfe2798a1350f 100644 --- a/runtime/quark/control/runtime_context.c +++ b/runtime/quark/control/runtime_context.c @@ -51,8 +51,11 @@ void RUNTIME_enable( void *runtime_ctxt, int lever ) QUARK_Barrier( runtime_ctxt ); QUARK_DOT_DAG_Enable( runtime_ctxt, 1 ); break; - case CHAMELEON_PROFILING_MODE: - fprintf(stderr, "Profiling is not available with Quark\n"); + case CHAMELEON_GENERATE_TRACE: + fprintf(stderr, "Trace generation is not available with Quark\n"); + break; + case CHAMELEON_GENERATE_STATS: + fprintf(stderr, "Kernel statistics are not available with Quark\n"); break; case CHAMELEON_BOUND: fprintf(stderr, "Bound computation is not available with Quark\n"); @@ -74,8 +77,11 @@ void RUNTIME_disable( void *runtime_ctxt, int lever ) QUARK_Barrier( runtime_ctxt ); QUARK_DOT_DAG_Enable( runtime_ctxt, 0 ); break; - case CHAMELEON_PROFILING_MODE: - fprintf(stderr, "Profiling is not available with Quark\n"); + case CHAMELEON_GENERATE_TRACE: + fprintf(stderr, "Trace generation is not available with Quark\n"); + break; + case CHAMELEON_GENERATE_STATS: + fprintf(stderr, "Kernel statistics are not available with Quark\n"); break; case CHAMELEON_BOUND: fprintf(stderr, "Bound computation is not available with Quark\n"); diff --git a/runtime/quark/control/runtime_options.c b/runtime/quark/control/runtime_options.c index fba2114627ca7872e7717f4d65c15ac4bf958058..4723d28e2810327016c754117ca12a9609a912e9 100644 --- a/runtime/quark/control/runtime_options.c +++ b/runtime/quark/control/runtime_options.c @@ -36,7 +36,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, /* Initialize options */ options->sequence = sequence; options->request = request; - options->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; + options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; options->priority = RUNTIME_PRIORITY_MIN; options->workerid = -1; diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c index d785835ec7b727b789a5fec003d85ad995b8412d..c08f30e55cd6c6161243b2aa199b9f6d717de2bc 100644 --- a/runtime/starpu/control/runtime_context.c +++ b/runtime/starpu/control/runtime_context.c @@ -79,14 +79,14 @@ void RUNTIME_enable( void *runtime_ctxt, int lever ) switch (lever) { case CHAMELEON_DAG: - fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n"); + fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_GENERATE_TRACE)\n"); break; - case CHAMELEON_KERNELPROFILE_MODE: - context_starpu_flags |= (1 << CHAMELEON_KERNELPROFILE_MODE); + case CHAMELEON_GENERATE_STATS: + context_starpu_flags |= (1 << CHAMELEON_GENERATE_STATS); starpu_profiling_status_set(STARPU_PROFILING_ENABLE); break; - case CHAMELEON_PROFILING_MODE: - context_starpu_flags |= (1 << CHAMELEON_PROFILING_MODE); + case CHAMELEON_GENERATE_TRACE: + context_starpu_flags |= (1 << CHAMELEON_GENERATE_TRACE); starpu_profiling_status_set(STARPU_PROFILING_ENABLE); break; case CHAMELEON_BOUND: @@ -108,16 +108,16 @@ void RUNTIME_disable( void *runtime_ctxt, int lever ) switch (lever) { case CHAMELEON_DAG: - fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n"); + fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_GENERATE_TRACE)\n"); break; - case CHAMELEON_PROFILING_MODE: - context_starpu_flags |= ~(1 << CHAMELEON_PROFILING_MODE); + case CHAMELEON_GENERATE_TRACE: + context_starpu_flags |= ~(1 << CHAMELEON_GENERATE_TRACE); if ( !context_starpu_flags ) { starpu_profiling_status_set(STARPU_PROFILING_DISABLE); } break; - case CHAMELEON_KERNELPROFILE_MODE: - context_starpu_flags |= ~(1 << CHAMELEON_KERNELPROFILE_MODE); + case CHAMELEON_GENERATE_STATS: + context_starpu_flags |= ~(1 << CHAMELEON_GENERATE_STATS); if ( !context_starpu_flags ) { starpu_profiling_status_set(STARPU_PROFILING_DISABLE); } diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c index bcc79254f0db5ee2e95aab1f703200e9f644d2ae..554d81eff1f0393f2cf033781975c82585a46ba2 100644 --- a/runtime/starpu/control/runtime_control.c +++ b/runtime/starpu/control/runtime_control.c @@ -67,6 +67,12 @@ static int chameleon_starpu_init( starpu_conf_t *conf ) if ( rc == -ENODEV ) { hres = CHAMELEON_ERR_NOT_INITIALIZED; } + + /* Stop profiling as it seems that autostart is not sufficient */ +#if defined(STARPU_USE_FXT) + starpu_fxt_stop_profiling(); +#endif + return hres; } diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c index 98c35eaa05079222a86d19462b793aa3d7e94c37..0fa917acc326e69c03609d750ae15cd0675bf9ec 100644 --- a/runtime/starpu/control/runtime_options.c +++ b/runtime/starpu/control/runtime_options.c @@ -27,7 +27,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt); options->sequence = sequence; options->request = request; - options->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE; + options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; options->priority = RUNTIME_PRIORITY_MIN; options->workerid = (schedopt == NULL) ? -1 : schedopt->workerid; diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index 498e392c7aa36ba7b73964a118e9dd81511bc265..c0c71d25965e13c2f98bbf8d50624b5488058af9 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -34,6 +34,7 @@ #cmakedefine HAVE_STARPU_DATA_SET_OOC_FLAG #cmakedefine HAVE_STARPU_INTERFACE_COPY2D #cmakedefine HAVE_STARPU_DATA_PEEK +#cmakedefine HAVE_STARPU_SET_LIMIT_SUBMITTED_TASKS #cmakedefine HAVE_STARPU_REUSE_DATA_ON_NODE #cmakedefine HAVE_STARPU_MPI_DATA_MIGRATE #cmakedefine HAVE_STARPU_MPI_DATA_REGISTER diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index e2baddbcd8e31fd6390c7e565616f54e1c5b9488..dfe7776bf89fa8a11ac8c07a4362a66f452eb776 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -25,6 +25,8 @@ */ #include "testings.h" +testing_options_t options; + /** * @brief Defines all the parameters of the testings * @@ -138,11 +140,8 @@ parameter_t parameters[] = { int main (int argc, char **argv) { - int ncores, ngpus, human, generic, check, i, niter; - int trace, nowarmup, profile, forcegpu, api; + int i; int rc, info = 0; - int run_id = 0; - char *func_name; char *input_file; run_list_t *runlist; testing_t * test; @@ -155,20 +154,10 @@ int main (int argc, char **argv) { parameters_read_file( input_file ); free(input_file); } - ncores = parameters_getvalue_int( "threads" ); - ngpus = parameters_getvalue_int( "gpus" ); - check = parameters_getvalue_int( "check" ); - human = parameters_getvalue_int( "human" ); - generic = parameters_getvalue_int( "generic" ); - func_name = parameters_getvalue_str( "op" ); - niter = parameters_getvalue_int( "niter" ); - trace = parameters_getvalue_int( "trace" ); - nowarmup = parameters_getvalue_int( "nowarmup" ); - profile = parameters_getvalue_int( "profile" ); - forcegpu = parameters_getvalue_int( "forcegpu" ); - api = parameters_getvalue_int( "api" ); - - rc = CHAMELEON_Init( ncores, ngpus ); + + testing_options_init( &options ); + + rc = CHAMELEON_Init( options.threads, options.gpus ); if ( rc != CHAMELEON_SUCCESS ) { fprintf( stderr, "CHAMELEON_Init failed and returned %d.\n", rc ); info = 1; @@ -176,19 +165,19 @@ int main (int argc, char **argv) { } /* Set ncores to the right value */ - if ( ncores == -1 ) { + if ( options.threads == -1 ) { parameter_t *param; param = parameters_get( 't' ); param->value.ival = CHAMELEON_GetThreadNbr(); + options.threads = param->value.ival; } /* Binds the right function to be called and builds the parameters combinations */ - test = testing_gettest( argv[0], func_name ); - free(func_name); - test_fct_t fptr = (api == 0) ? test->fptr_desc : test->fptr_std; + test = testing_gettest( argv[0], options.op ); + test_fct_t fptr = (options.api == 0) ? test->fptr_desc : test->fptr_std; if ( fptr == NULL ) { fprintf( stderr, "The %s API is not available for function %s\n", - (api == 0) ? "descriptor" : "standard", func_name ); + (options.api == 0) ? "descriptor" : "standard", options.op ); info = 1; goto end; } @@ -197,12 +186,12 @@ int main (int argc, char **argv) { runlist = run_list_generate( test->params ); /* Executes the tests */ - run_print_header( test, check, human ); + run_print_header( test, options.check, options.human ); run = runlist->head; /* Force all possible kernels on GPU */ - if ( forcegpu ) { - if ( ngpus == 0 ) { + if ( options.forcegpu ) { + if ( options.gpus == 0 ) { fprintf( stderr, "--forcegpu can't be enable without GPU (-g 0).\n" " Please specify a larger number of GPU or disable this option\n" ); @@ -213,37 +202,31 @@ int main (int argc, char **argv) { } /* Warmup */ - if ( !nowarmup ) { + if ( !options.nowarmup ) { run_arg_list_t copy = run_arg_list_copy( &(run->args) ); - fptr( ©, check ); - run_arg_list_destroy( © ); - } - /* Start kernel statistics */ - if ( profile ) { - CHAMELEON_Enable( CHAMELEON_KERNELPROFILE_MODE ); - } - - /* Start tracing */ - if ( trace ) { - CHAMELEON_Enable( CHAMELEON_PROFILING_MODE ); + /* Run the warmup test as -1 */ + options.run_id = -1; + fptr( ©, options.check ); + run_arg_list_destroy( © ); + options.run_id++; } - if ( generic ) { + if ( options.generic ) { CHAMELEON_Enable( CHAMELEON_GENERIC ); } /* Perform all runs */ while ( run != NULL ) { - for(i=0; i<niter; i++) { + for(i=0; i<options.niter; i++) { run_arg_list_t copy = run_arg_list_copy( &(run->args) ); - rc = fptr( ©, check ); + rc = fptr( ©, options.check ); /* If rc < 0, we skipped the test */ if ( rc >= 0 ) { run_arg_add_int( ©, "RETURN", rc ); - run_print_line( test, ©, check, human, run_id ); - run_id++; + run_print_line( test, ©, options.check, options.human, options.run_id ); + options.run_id++; info += rc; } run_arg_list_destroy( © ); @@ -255,20 +238,15 @@ int main (int argc, char **argv) { run = next; } - /* Stop tracing */ - if ( trace ) { - CHAMELEON_Disable( CHAMELEON_PROFILING_MODE ); - } - - /* Stop kernel statistics and display results */ - if ( profile ) { - CHAMELEON_Disable( CHAMELEON_KERNELPROFILE_MODE ); + /* Display kernel statistics if asked */ + if ( options.profile ) { RUNTIME_kernelprofile_display(); } free( runlist ); end: - ;/* OpenMP end */ + /* OpenMP end */ + free( options.op ); CHAMELEON_Finalize(); parameters_destroy(); diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c index e08a076249a9f234f8582331a1e212a31a4338d3..3cc72a49a3384a1cda52b3ad21d60408ade6dc20 100644 --- a/testing/testing_zgemm.c +++ b/testing/testing_zgemm.c @@ -162,7 +162,7 @@ testing_zgemm_std( run_arg_list_t *args, int check ) /* Descriptors */ int Am, An, Bm, Bn; - CHAMELEON_Complex64_t *A, *B, *C, *Cinit; + CHAMELEON_Complex64_t *A, *B, *C; alpha = run_arg_get_complex64( args, "alpha", alpha ); beta = run_arg_get_complex64( args, "beta", beta ); @@ -225,6 +225,7 @@ testing_zgemm_std( run_arg_list_t *args, int check ) /* Check the solution */ if ( check ) { + CHAMELEON_Complex64_t *Cinit; Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplrnt( M, N, Cinit, LDC, seedC ); @@ -238,6 +239,8 @@ testing_zgemm_std( run_arg_list_t *args, int check ) free( B ); free( C ); + (void)api; + (void)check; return hres; } diff --git a/testing/testing_zgesvd.c b/testing/testing_zgesvd.c index a95eeb7338297bcc225a50197cdf487316711c09..413ca1367d0c7650141d31c96343b8bfed9b9384 100644 --- a/testing/testing_zgesvd.c +++ b/testing/testing_zgesvd.c @@ -84,8 +84,8 @@ testing_zgesvd_desc( run_arg_list_t *args, int check ) CHAMELEON_Complex64_t *U, *Vt = NULL; double *S, *D; int LDU = M; - int LDVt = N; - int Un, Vtn; + int LDVt = N; + int Un; CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); @@ -113,7 +113,6 @@ testing_zgesvd_desc( run_arg_list_t *args, int check ) if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { LDVt = ( jobvt == ChamSVec ) ? K : N; - Vtn = N; Vt = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) ); } else { @@ -197,8 +196,8 @@ testing_zgesvd_std( run_arg_list_t *args, int check ) CHAMELEON_Complex64_t *A, *A0, *U, *Vt; double *S, *D; int LDU = M; - int LDVt = N; - int Un, Vtn; + int LDVt = N; + int Un; CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); @@ -225,7 +224,6 @@ testing_zgesvd_std( run_arg_list_t *args, int check ) if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { LDVt = ( jobvt == ChamSVec ) ? K : N; - Vtn = N; Vt = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) ); } else { diff --git a/testing/testing_zhemm.c b/testing/testing_zhemm.c index 91adb578be68f97034d5e427dd761b5cfa1c53de..6b1c25699cea21661747ce437c814675422b79f7 100644 --- a/testing/testing_zhemm.c +++ b/testing/testing_zhemm.c @@ -144,7 +144,7 @@ testing_zhemm_std( run_arg_list_t *args, int check ) /* Descriptors */ int An; - CHAMELEON_Complex64_t *A, *B, *C, *Cinit; + CHAMELEON_Complex64_t *A, *B, *C; bump = run_arg_get_double( args, "bump", bump ); alpha = run_arg_get_complex64( args, "alpha", alpha ); @@ -168,7 +168,7 @@ testing_zhemm_std( run_arg_list_t *args, int check ) /* Calculates the product */ #if defined(CHAMELEON_TESTINGS_VENDOR) testing_start( &test_data ); - cblas_zhemm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, + cblas_zhemm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC ); testing_stop( &test_data, flops_zhemm( side, M, N ) ); #else @@ -179,6 +179,7 @@ testing_zhemm_std( run_arg_list_t *args, int check ) /* Checks the solution */ if ( check ) { + CHAMELEON_Complex64_t *Cinit; Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplrnt( M, N, Cinit, LDC, seedC ); @@ -192,6 +193,7 @@ testing_zhemm_std( run_arg_list_t *args, int check ) free( B ); free( C ); + (void)check; return hres; } diff --git a/testing/testing_zher2k.c b/testing/testing_zher2k.c index d3e8f2277c3da1eb1bc1471e5f428caebe69010c..52f9b7e850aa180ddd6a0663d6d093293d0f4953 100644 --- a/testing/testing_zher2k.c +++ b/testing/testing_zher2k.c @@ -146,7 +146,7 @@ testing_zher2k_std( run_arg_list_t *args, int check ) /* Descriptors */ int Am, An, Bm, Bn; - CHAMELEON_Complex64_t *A, *B, *C, *Cinit; + CHAMELEON_Complex64_t *A, *B, *C; bump = run_arg_get_double( args, "bump", bump ); alpha = run_arg_get_complex64( args, "alpha", alpha ); @@ -181,7 +181,7 @@ testing_zher2k_std( run_arg_list_t *args, int check ) /* Calculate the product */ #if defined(CHAMELEON_TESTINGS_VENDOR) testing_start( &test_data ); - cblas_zher2k( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, + cblas_zher2k( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, beta, C, LDC ); testing_stop( &test_data, flops_zher2k( K, N ) ); #else @@ -192,6 +192,7 @@ testing_zher2k_std( run_arg_list_t *args, int check ) /* Check the solution */ if ( check ) { + CHAMELEON_Complex64_t *Cinit; Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplghe( bump, uplo, N, Cinit, LDC, seedC ); @@ -205,6 +206,7 @@ testing_zher2k_std( run_arg_list_t *args, int check ) free( B ); free( C ); + (void)check; return hres; } diff --git a/testing/testing_zherk.c b/testing/testing_zherk.c index b68fec2ac8044997c2cdf4e4ac416512f5f12d1b..6dc56c2de270d11b738423131f2c18c046311107 100644 --- a/testing/testing_zherk.c +++ b/testing/testing_zherk.c @@ -138,7 +138,7 @@ testing_zherk_std( run_arg_list_t *args, int check ) /* Descriptors */ int Am, An; - CHAMELEON_Complex64_t *A, *C, *Cinit; + CHAMELEON_Complex64_t *A, *C; alpha = run_arg_get_double( args, "alpha", alpha ); beta = run_arg_get_double( args, "beta", beta ); @@ -167,7 +167,7 @@ testing_zherk_std( run_arg_list_t *args, int check ) /* Calculates the product */ #if defined(CHAMELEON_TESTINGS_VENDOR) testing_start( &test_data ); - cblas_zherk( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, + cblas_zherk( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, alpha, A, LDA, beta, C, LDC ); testing_stop( &test_data, flops_zherk( N, K ) ); #else @@ -178,6 +178,7 @@ testing_zherk_std( run_arg_list_t *args, int check ) /* Checks the solution */ if ( check ) { + CHAMELEON_Complex64_t *Cinit; Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplghe( bump, uplo, N, Cinit, LDC, seedC ); @@ -190,6 +191,7 @@ testing_zherk_std( run_arg_list_t *args, int check ) free( A ); free( C ); + (void)check; return hres; } diff --git a/testing/testing_zlanhe.c b/testing/testing_zlanhe.c index 23f9a1373e16bd1cd531aad6c0b4554aa511d87e..225f3065640ca5fa6e3f20e31af3917a87b2653c 100644 --- a/testing/testing_zlanhe.c +++ b/testing/testing_zlanhe.c @@ -162,6 +162,8 @@ testing_zlanhe_std( run_arg_list_t *args, int check ) free( A ); + (void)norm; + (void)check; return hres; } diff --git a/testing/testing_zlantr.c b/testing/testing_zlantr.c index e7dc6bdce12fca56e39a5c0011545af76bd265f5..78057457e3cf9f9efe9b377a948e379c03fd9d64 100644 --- a/testing/testing_zlantr.c +++ b/testing/testing_zlantr.c @@ -183,6 +183,8 @@ testing_zlantr_std( run_arg_list_t *args, int check ) free( A ); + (void)norm; + (void)check; return hres; } diff --git a/testing/testing_zpoinv.c b/testing/testing_zpoinv.c index 03f16efe678e065e69bae5148e5f34609aaa9eb7..265b8b64901e155921fb31f150ea43a830b8a2d4 100644 --- a/testing/testing_zpoinv.c +++ b/testing/testing_zpoinv.c @@ -143,6 +143,7 @@ testing_zpoinv_std( run_arg_list_t *args, int check ) free( A ); + (void)check; return hres; } diff --git a/testing/testing_zposv.c b/testing/testing_zposv.c index 0c0fcdac33f694378e8fb0dfd8043d167fe3843d..6cf198eca5ea08b49fd25f700d37d62b7c845c99 100644 --- a/testing/testing_zposv.c +++ b/testing/testing_zposv.c @@ -177,6 +177,7 @@ testing_zposv_std( run_arg_list_t *args, int check ) free( A ); free( X ); + (void)check; return hres; } diff --git a/testing/testing_zpotrf.c b/testing/testing_zpotrf.c index 48849af44a8952dbd4cc86ade5d8b9398eb79ecc..505a95f511095ddf165e51606a0d5bd76177bfad 100644 --- a/testing/testing_zpotrf.c +++ b/testing/testing_zpotrf.c @@ -134,6 +134,7 @@ testing_zpotrf_std( run_arg_list_t *args, int check ) free( A ); + (void)check; return hres; } diff --git a/testing/testing_zpotri.c b/testing/testing_zpotri.c index 19c075e600422c5d88a1e5808a5e844cbb163432..012a97cd89e79a1ff47ea51d31dcd552d63b0109 100644 --- a/testing/testing_zpotri.c +++ b/testing/testing_zpotri.c @@ -145,6 +145,7 @@ testing_zpotri_std( run_arg_list_t *args, int check ) free( A ); + (void)check; return hres; } diff --git a/testing/testing_zpotrs.c b/testing/testing_zpotrs.c index 766dd8a525c67a4575490ba4510ada5ebc650a0e..f1a49a483052561d9032c2e053cf30b267873aa4 100644 --- a/testing/testing_zpotrs.c +++ b/testing/testing_zpotrs.c @@ -167,6 +167,7 @@ testing_zpotrs_std( run_arg_list_t *args, int check ) free( A ); free( X ); + (void)check; return hres; } diff --git a/testing/testing_zsymm.c b/testing/testing_zsymm.c index 377848b643ab489775a39795fe79bec00d855201..a4d94002c865edce86c4d7fc1efee3bb2a4bac07 100644 --- a/testing/testing_zsymm.c +++ b/testing/testing_zsymm.c @@ -144,7 +144,7 @@ testing_zsymm_std( run_arg_list_t *args, int check ) /* Descriptors */ int An; - CHAMELEON_Complex64_t *A, *B, *C, *Cinit; + CHAMELEON_Complex64_t *A, *B, *C; bump = run_arg_get_double( args, "bump", bump ); alpha = run_arg_get_complex64( args, "alpha", alpha ); @@ -179,6 +179,7 @@ testing_zsymm_std( run_arg_list_t *args, int check ) /* Checks the solution */ if ( check ) { + CHAMELEON_Complex64_t *Cinit; Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplrnt( M, N, Cinit, LDC, seedC ); @@ -192,6 +193,7 @@ testing_zsymm_std( run_arg_list_t *args, int check ) free( B ); free( C ); + (void)check; return hres; } diff --git a/testing/testing_zsyr2k.c b/testing/testing_zsyr2k.c index 830cc5ea688fc5f604681398ba3bb673c41b8e9e..44437ec6680937199b54ade9b8a4494a91627074 100644 --- a/testing/testing_zsyr2k.c +++ b/testing/testing_zsyr2k.c @@ -146,7 +146,7 @@ testing_zsyr2k_std( run_arg_list_t *args, int check ) /* Descriptors */ int Am, An, Bm, Bn; - CHAMELEON_Complex64_t *A, *B, *C, *Cinit; + CHAMELEON_Complex64_t *A, *B, *C; bump = run_arg_get_double( args, "bump", bump ); alpha = run_arg_get_complex64( args, "alpha", alpha ); @@ -192,6 +192,7 @@ testing_zsyr2k_std( run_arg_list_t *args, int check ) /* Check the solution */ if ( check ) { + CHAMELEON_Complex64_t *Cinit; Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplgsy( bump, uplo, N, Cinit, LDC, seedC ); @@ -205,6 +206,7 @@ testing_zsyr2k_std( run_arg_list_t *args, int check ) free( B ); free( C ); + (void)check; return hres; } diff --git a/testing/testing_zsyrk.c b/testing/testing_zsyrk.c index 226e5093f210290c9b42cbdab490fe624295d343..9000c452dfa1f5db90499985673d6739258c45a7 100644 --- a/testing/testing_zsyrk.c +++ b/testing/testing_zsyrk.c @@ -137,7 +137,7 @@ testing_zsyrk_std( run_arg_list_t *args, int check ) /* Descriptors */ int Am, An; - CHAMELEON_Complex64_t *A, *C, *Cinit; + CHAMELEON_Complex64_t *A, *C; alpha = run_arg_get_complex64( args, "alpha", alpha ); beta = run_arg_get_complex64( args, "beta", beta ); @@ -177,6 +177,7 @@ testing_zsyrk_std( run_arg_list_t *args, int check ) /* Checks the solution */ if ( check ) { + CHAMELEON_Complex64_t *Cinit; Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplgsy( bump, uplo, N, Cinit, LDC, seedC ); @@ -189,6 +190,7 @@ testing_zsyrk_std( run_arg_list_t *args, int check ) free( A ); free( C ); + (void)check; return hres; } diff --git a/testing/testing_ztrmm.c b/testing/testing_ztrmm.c index 92d110468666614090d1a8668c11b386405c0401..1a38de09bd674e823b7b7c658fa42625e4bd8009 100644 --- a/testing/testing_ztrmm.c +++ b/testing/testing_ztrmm.c @@ -132,7 +132,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check ) /* Descriptors */ int An; - CHAMELEON_Complex64_t *A, *B, *Binit; + CHAMELEON_Complex64_t *A, *B; alpha = run_arg_get_complex64( args, "alpha", alpha ); @@ -152,7 +152,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check ) /* Calculates the product */ #if defined(CHAMELEON_TESTINGS_VENDOR) testing_start( &test_data ); - cblas_ztrmm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + cblas_ztrmm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB ); testing_stop( &test_data, flops_ztrmm( side, N, M ) ); #else @@ -163,6 +163,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check ) /* Checks the solution */ if ( check ) { + CHAMELEON_Complex64_t *Binit; Binit = malloc( LDB*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplrnt( M, N, Binit, LDB, seedB ); @@ -175,6 +176,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check ) free( A ); free( B ); + (void)check; return hres; } diff --git a/testing/testing_ztrsm.c b/testing/testing_ztrsm.c index 6e721c8ab7da99cc2804fcc484b5553861b35e69..3bff1b5a4c8c4f5638f65ba585028e2b376ca26b 100644 --- a/testing/testing_ztrsm.c +++ b/testing/testing_ztrsm.c @@ -126,7 +126,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check ) int seedB = run_arg_get_int( args, "seedB", random() ); /* Descriptors */ - CHAMELEON_Complex64_t *A, *B, *Binit; + CHAMELEON_Complex64_t *A, *B; alpha = run_arg_get_complex64( args, "alpha", alpha ); @@ -144,7 +144,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check ) /* Calculates the product */ #if defined(CHAMELEON_TESTINGS_VENDOR) testing_start( &test_data ); - cblas_ztrsm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + cblas_ztrsm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB ); testing_stop( &test_data, flops_ztrsm( side, M, N ) ); #else @@ -155,6 +155,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check ) /* Checks the solution */ if ( check ) { + CHAMELEON_Complex64_t *Binit; Binit = malloc( LDB*N*sizeof(CHAMELEON_Complex64_t) ); CHAMELEON_zplrnt( M, N, Binit, LDB, seedB ); @@ -167,6 +168,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check ) free( A ); free( B ); + (void)check; return hres; } diff --git a/testing/testings.c b/testing/testings.c index 09e1d91bee09adbffd045816fa46538b94ff4bb0..b44047e6708402da7a822597b238a8113310338e 100644 --- a/testing/testings.c +++ b/testing/testings.c @@ -17,6 +17,8 @@ */ #include "testings.h" +extern testing_options_t options; + /** * @brief List of all the testings available. */ @@ -77,12 +79,47 @@ testing_gettest( const char *prog_name, return test; } +/** + * @brief Initialize the option structure to avoid looking for the parameters at + * each iteration + */ +void +testing_options_init( testing_options_t *options ) +{ + options->human = parameters_getvalue_int( "human" ); + options->niter = parameters_getvalue_int( "niter" ); + options->nowarmup = parameters_getvalue_int( "nowarmup" ); +#if !defined(CHAMELEON_TESTINGS_VENDOR) + options->api = parameters_getvalue_int( "api" ); + options->async = parameters_getvalue_int( "async" ); + options->check = parameters_getvalue_int( "check" ); + options->forcegpu = parameters_getvalue_int( "forcegpu" ); + options->generic = parameters_getvalue_int( "generic" ); + options->gpus = parameters_getvalue_int( "gpus" ); + options->mtxfmt = parameters_getvalue_int( "mtxfmt" ); + options->P = parameters_getvalue_int( "P" ); + options->profile = parameters_getvalue_int( "profile" ); + options->splitsub = parameters_getvalue_int( "splitsub" ); + options->threads = parameters_getvalue_int( "threads" ); + options->trace = parameters_getvalue_int( "trace" ); +#endif + + options->file = parameters_getvalue_str( "file" ); + options->op = parameters_getvalue_str( "op" ); + + options->run_id = 0; +} + /** * @brief Starts the measure for the testing. */ void testing_start( testdata_t *tdata ) { + tdata->sequence = NULL; + tdata->request.status = 0; + tdata->request.schedopt = NULL; + #if defined(CHAMELEON_TESTINGS_VENDOR) /* * If we test the vendor functions, we want to use all the threads of the @@ -91,24 +128,31 @@ testing_start( testdata_t *tdata ) */ CHAMELEON_Pause(); #else - int splitsub = parameters_getvalue_int( "splitsub" ); - int async = parameters_getvalue_int( "async" ) || splitsub; -#endif - - tdata->sequence = NULL; - tdata->request.status = 0; - tdata->request.schedopt = NULL; - -#if !defined(CHAMELEON_TESTINGS_VENDOR) #if defined(CHAMELEON_USE_MPI) CHAMELEON_Distributed_start(); #endif - - if ( async ) { + /* + * Create the sequence for the asynchronous calls + */ + if ( options.async ) { CHAMELEON_Sequence_Create( &(tdata->sequence) ); } - if ( splitsub ) { + /* Start kernel statistics */ + if ( options.profile && (options.run_id >= 0) ) { + CHAMELEON_Enable( CHAMELEON_GENERATE_STATS ); + } + + /* Start tracing */ + if ( options.trace && (options.run_id >= 0) ) { + CHAMELEON_Enable( CHAMELEON_GENERATE_TRACE ); + } + + /* + * Pause the task execution if we want to time separately the task + * submission from the task execution + */ + if ( options.splitsub ) { CHAMELEON_Pause(); } #endif @@ -127,13 +171,10 @@ testing_stop( testdata_t *tdata, cham_fixdbl_t flops ) cham_fixdbl_t t0, t1, t2, gflops; #if !defined(CHAMELEON_TESTINGS_VENDOR) - int splitsub = parameters_getvalue_int( "splitsub" ); - int async = parameters_getvalue_int( "async" ) || splitsub; - /* Submission is done, we need to start the computations */ - if ( async ) { + if ( options.async ) { tdata->tsub = RUNTIME_get_time(); - if ( splitsub ) { + if ( options.splitsub ) { CHAMELEON_Resume(); } CHAMELEON_Sequence_Wait( tdata->sequence ); @@ -157,9 +198,19 @@ testing_stop( testdata_t *tdata, cham_fixdbl_t flops ) tdata->texec = t2 - t0; #if !defined(CHAMELEON_TESTINGS_VENDOR) - if ( splitsub ) { + if ( options.splitsub ) { tdata->texec = t2 - t1; } + + /* Stop tracing */ + if ( options.trace && (options.run_id >= 0) ) { + CHAMELEON_Disable( CHAMELEON_GENERATE_TRACE ); + } + + /* Stop kernel statistics */ + if ( options.profile && (options.run_id >= 0) ) { + CHAMELEON_Disable( CHAMELEON_GENERATE_STATS ); + } #endif gflops = flops * 1.e-9 / tdata->texec; diff --git a/testing/testings.h b/testing/testings.h index 2832223f4edeac53eba444a13c5e8249fc4592ed..5326b192250ccce54602a854cab040ad21619820 100644 --- a/testing/testings.h +++ b/testing/testings.h @@ -239,8 +239,6 @@ void parameters_destroy( ); run_list_t *run_list_generate( const char **params ); void run_list_destroy( run_list_elt_t *run ); -void testing_register( testing_t *test ); - /** * @brief Define the data associated to a single run of a testing */ @@ -254,12 +252,39 @@ typedef struct testdata_ { cham_fixdbl_t texec; /**< The execution time of test */ cham_fixdbl_t tsub; /**< The task submission tome of the test */ RUNTIME_sequence_t *sequence; /**< The sequence to run the test if splitsub */ - RUNTIME_request_t request; /**< The request to run the test if splitsub */ + RUNTIME_request_t request; /**< The request to run the test if splitsub */ } testdata_t; +/** + * @brief Structure to store the read parameters for a quicker access + */ +typedef struct testing_options_ { + /* Static parameters */ + int api; + int async; + int check; + int forcegpu; + int generic; + int gpus; + int human; + int mtxfmt; + int niter; + int nowarmup; + int P; + int profile; + int splitsub; + int threads; + int trace; + char *file; + char *op; + /* Additionnal information to exchange between the main and the testings */ + int run_id; +} testing_options_t; + void testing_register( testing_t *test ); testing_t *testing_gettest( const char *prog_name, const char *func_name ); void testing_start( testdata_t *tdata ); void testing_stop( testdata_t *tdata, cham_fixdbl_t flops ); +void testing_options_init( testing_options_t *options ); #endif /* _testings_h_ */ diff --git a/testing/vendor_ztesting.c b/testing/vendor_ztesting.c index f959ad3ced59fbd0040a3d531a3578c46066cd8f..df72f106c4c9315a0878d6811da589ce6975f44b 100644 --- a/testing/vendor_ztesting.c +++ b/testing/vendor_ztesting.c @@ -19,6 +19,8 @@ */ #include "testings.h" +testing_options_t options; + /** * @brief Defines all the parameters of the testings * @@ -96,11 +98,8 @@ parameter_t parameters[] = { int main (int argc, char **argv) { - int ncores, human, i, niter; - int nowarmup; - int rc, info, check = 0; - int run_id = 0; - char *func_name; + int i; + int rc, info = 0; char *input_file; run_list_t *runlist; testing_t * test; @@ -113,13 +112,10 @@ int main (int argc, char **argv) { parameters_read_file( input_file ); free(input_file); } - ncores = parameters_getvalue_int( "threads" ); - human = parameters_getvalue_int( "human" ); - func_name = parameters_getvalue_str( "op" ); - niter = parameters_getvalue_int( "niter" ); - nowarmup = parameters_getvalue_int( "nowarmup" ); - rc = CHAMELEON_Init( ncores, 0 ); + testing_options_init( &options ); + + rc = CHAMELEON_Init( options.threads, 0 ); if ( rc != CHAMELEON_SUCCESS ) { fprintf( stderr, "CHAMELEON_Init failed and returned %d.\n", rc ); info = 1; @@ -127,18 +123,18 @@ int main (int argc, char **argv) { } /* Set ncores to the right value */ - if ( ncores == -1 ) { + if ( options.threads == -1 ) { parameter_t *param; param = parameters_get( 't' ); param->value.ival = CHAMELEON_GetThreadNbr(); + options.threads = param->value.ival; } /* Binds the right function to be called and builds the parameters combinations */ - test = testing_gettest( argv[0], func_name ); - free(func_name); + test = testing_gettest( argv[0], options.op ); test_fct_t fptr = test->fptr_std; if ( fptr == NULL ) { - fprintf( stderr, "The vendor API is not available for function %s\n", func_name ); + fprintf( stderr, "The vendor API is not available for function %s\n", options.op ); info = 1; goto end; } @@ -147,27 +143,31 @@ int main (int argc, char **argv) { runlist = run_list_generate( test->params ); /* Executes the tests */ - run_print_header( test, check, human ); + run_print_header( test, options.check, options.human ); run = runlist->head; /* Warmup */ - if ( !nowarmup ) { + if ( !options.nowarmup ) { run_arg_list_t copy = run_arg_list_copy( &(run->args) ); - fptr( ©, check ); + + /* Run the warmup test as -1 */ + options.run_id = -1; + fptr( ©, options.check ); run_arg_list_destroy( © ); + options.run_id++; } /* Perform all runs */ while ( run != NULL ) { - for(i=0; i<niter; i++) { + for(i=0; i<options.niter; i++) { run_arg_list_t copy = run_arg_list_copy( &(run->args) ); - rc = fptr( ©, check ); + rc = fptr( ©, options.check ); /* If rc < 0, we skipped the test */ if ( rc >= 0 ) { run_arg_add_int( ©, "RETURN", rc ); - run_print_line( test, ©, check, human, run_id ); - run_id++; + run_print_line( test, ©, options.check, options.human, options.run_id ); + options.run_id++; info += rc; } run_arg_list_destroy( © ); @@ -182,7 +182,8 @@ int main (int argc, char **argv) { free( runlist ); end: - ;/* OpenMP end */ + /* OpenMP end */ + free( options.op ); CHAMELEON_Finalize(); parameters_destroy();