diff --git a/compute/pzgebrd.c b/compute/pzgebrd.c
index cbfa67824f3032c94279739ea2e6b5533734dcb2..d6de8929fe269741e3e378f445d3931df009247b 100644
--- a/compute/pzgebrd.c
+++ b/compute/pzgebrd.c
@@ -246,6 +246,8 @@ chameleon_pzgebrd_gb2bd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A,
     chameleon_desc_destroy( &descAB );
 
     RUNTIME_options_finalize( &options, chamctxt );
+
+    return CHAMELEON_SUCCESS;
 }
 
 int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt,
@@ -260,7 +262,7 @@ int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt,
     CHAM_desc_t *subA, *subT, *subUVT, *subD;
     CHAM_desc_t descUl, descUt;
     CHAM_desc_t descVTl, descVTt;
-    int M, N, NB, ib;
+    int M, N, NB;
 
     chamctxt = chameleon_context_self();
     if ( sequence->status != CHAMELEON_SUCCESS ) {
@@ -358,4 +360,6 @@ int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt,
     }
 
     RUNTIME_options_finalize( &options, chamctxt );
+
+    return CHAMELEON_SUCCESS;
 }
diff --git a/compute/zgesvd.c b/compute/zgesvd.c
index 59ef98d92feee5297509b78d75613083e10eda0f..66f8fbef882749896226f351576c49e7942a4086 100644
--- a/compute/zgesvd.c
+++ b/compute/zgesvd.c
@@ -409,7 +409,7 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt,
     CHAM_desc_t descT;
     CHAM_desc_t D, *Dptr = NULL;
     double *E;
-    int M, N, MINMN, NB;
+    int M, N, MINMN;
 
     CHAM_context_t *chamctxt;
     chamctxt = chameleon_context_self();
@@ -465,7 +465,6 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt,
     M     = descA.m;
     N     = descA.n;
     MINMN = chameleon_min(M, N);
-    NB    = descA.mb;
 #if defined(CHAMELEON_COPY_DIAG)
     {
         chameleon_zdesc_copy_and_restrict( A, &D, A->m, A->n );
diff --git a/control/auxiliary.h b/control/auxiliary.h
index f335bb3620d77f685c6e80e8da35d502fa64c655..6e4d5b2ae0f8d4fe7036550f50b8c16bdec733cf 100644
--- a/control/auxiliary.h
+++ b/control/auxiliary.h
@@ -24,6 +24,9 @@
 #ifndef _chameleon_auxiliary_h_
 #define _chameleon_auxiliary_h_
 
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
 #include "chameleon/struct.h"
 #include "chameleon/tasks.h"
 
diff --git a/control/common.h b/control/common.h
index 85e696d1a3a4b8a3bd98e2a439cca159fc0fa68d..d68934a6cc77f28680bc84860c1d6ae7f1e8444d 100644
--- a/control/common.h
+++ b/control/common.h
@@ -83,7 +83,7 @@
 #define CHAMELEON_RHBLK       chamctxt->rhblock
 #define CHAMELEON_TRANSLATION chamctxt->translation
 #define CHAMELEON_PARALLEL    chamctxt->parallel_enabled
-#define CHAMELEON_PROFILING   chamctxt->profiling_enabled
+#define CHAMELEON_STATISTICS  chamctxt->statistics_enabled
 
 /**
  *  IPT internal define
diff --git a/control/context.c b/control/context.c
index 827ebc811db3a4b0989d093b752c1a89b5b03493..dc444168b455531529e6557f7bf5335c05155f78 100644
--- a/control/context.c
+++ b/control/context.c
@@ -134,7 +134,7 @@ CHAM_context_t *chameleon_context_create()
     chamctxt->warnings_enabled   = chameleon_env_is_off( "CHAMELEON_WARNINGS" );
     chamctxt->autotuning_enabled = chameleon_env_is_on( "CHAMELEON_AUTOTUNING" );
     chamctxt->parallel_enabled   = chameleon_env_is_on( "CHAMELEON_PARALLEL_KERNEL" );
-    chamctxt->profiling_enabled  = chameleon_env_is_on( "CHAMELEON_PROFILING_MODE" );
+    chamctxt->statistics_enabled = chameleon_env_is_on( "CHAMELEON_GENERATE_STATS" );
     chamctxt->progress_enabled   = chameleon_env_is_on( "CHAMELEON_PROGRESS" );
     chamctxt->generic_enabled    = chameleon_env_is_on( "CHAMELEON_GENERIC" );
     chamctxt->autominmax_enabled = chameleon_env_is_on( "CHAMELEON_AUTOMINMAX" );
@@ -184,8 +184,9 @@ int chameleon_context_destroy(){
  *          Feature to be enabled:
  *          @arg CHAMELEON_WARNINGS   printing of warning messages,
  *          @arg CHAMELEON_AUTOTUNING autotuning for tile size and inner block size.
- *          @arg CHAMELEON_PROFILING_MODE  activate profiling of kernels
- *          @arg CHAMELEON_PROGRESS  activate progress indicator
+ *          @arg CHAMELEON_GENERATE_TRACE enable/start the trace generation
+ *          @arg CHAMELEON_GENERATE_STATS enable/start the kernel statistics
+ *          @arg CHAMELEON_PROGRESS enable the progress indicator
  *          @arg CHAMELEON_GEMM3M  Use z/cgemm3m for complexe matrix-matrix products
  *          @arg CHAMELEON_GENERIC  enable/disable GEMM3M  Use z/cgemm3m for complexe matrix-matrix products
  *
@@ -212,11 +213,11 @@ int CHAMELEON_Enable(int option)
         case CHAMELEON_AUTOTUNING:
             chamctxt->autotuning_enabled = CHAMELEON_TRUE;
             break;
-        case CHAMELEON_PROFILING_MODE:
+        case CHAMELEON_GENERATE_TRACE:
             RUNTIME_start_profiling();
             break;
-        case CHAMELEON_KERNELPROFILE_MODE:
-            chamctxt->profiling_enabled = CHAMELEON_TRUE;
+        case CHAMELEON_GENERATE_STATS:
+            chamctxt->statistics_enabled = CHAMELEON_TRUE;
             break;
         case CHAMELEON_PROGRESS:
             chamctxt->progress_enabled = CHAMELEON_TRUE;
@@ -259,8 +260,9 @@ int CHAMELEON_Enable(int option)
  *          Feature to be disabled:
  *          @arg CHAMELEON_WARNINGS   printing of warning messages,
  *          @arg CHAMELEON_AUTOTUNING autotuning for tile size and inner block size.
- *          @arg CHAMELEON_PROFILING_MODE  deactivate profiling of kernels
- *          @arg CHAMELEON_PROGRESS  deactivate progress indicator
+ *          @arg CHAMELEON_GENERATE_TRACE disable/pause the trace generation
+ *          @arg CHAMELEON_GENERATE_STATS disable/pause the kernel statistics
+ *          @arg CHAMELEON_PROGRESS disable the progress indicator
  *          @arg CHAMELEON_GEMM3M  Use z/cgemm3m for complexe matrix-matrix products
  *
  *******************************************************************************
@@ -285,11 +287,11 @@ int CHAMELEON_Disable(int option)
         case CHAMELEON_AUTOTUNING:
             chamctxt->autotuning_enabled = CHAMELEON_FALSE;
             break;
-        case CHAMELEON_PROFILING_MODE:
+        case CHAMELEON_GENERATE_TRACE:
             RUNTIME_stop_profiling();
             break;
-        case CHAMELEON_KERNELPROFILE_MODE:
-            chamctxt->profiling_enabled = CHAMELEON_FALSE;
+        case CHAMELEON_GENERATE_STATS:
+            chamctxt->statistics_enabled = CHAMELEON_FALSE;
             break;
         case CHAMELEON_PROGRESS:
             chamctxt->progress_enabled = CHAMELEON_FALSE;
diff --git a/doc/user/chapters/using.org b/doc/user/chapters/using.org
index 91533bfedb639ab00e229b01ebbf20c6f846f80d..cf0d7cc286de80d8124b49937f905d99bec9b49a 100644
--- a/doc/user/chapters/using.org
+++ b/doc/user/chapters/using.org
@@ -103,7 +103,7 @@
      * *CHAMELEON_WARNINGS* enables/disables the warning output
      * *CHAMELEON_PARALLEL_KERNEL* enables/disables the use of
        multi-threaded kernels. Available only for StarPU runtime system.
-     * *CHAMELEON_PROFILING_MODE* enables the profiling information of
+     * *CHAMELEON_GENERATE_STATS* enables the profiling information of
        the kernels (StarPU specific)
      * *CHAMELEON_PROGRESS* enables the progress function to show the
        percentage of tasks completed.
@@ -848,7 +848,8 @@
      Features that can be enabled/disabled:
      * *CHAMELEON_WARNINGS*:   printing of warning messages,
      * *CHAMELEON_AUTOTUNING*: autotuning for tile size and inner block size (inactive),
-     * *CHAMELEON_PROFILING_MODE*:  activate kernels profiling,
+     * *CHAMELEON_GENERATE_TRACE*: enable/start the trace generation
+     * *CHAMELEON_GENERATE_STATS*: enable/start the kernel statistics
      * *CHAMELEON_PROGRESS*:  to print a progress status,
      * *CHAMELEON_GEMM3M*: to enable the use of the /gemm3m/ blas bunction.
 
diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h
index 71b18d8d41dfb2b87126dc54937b42c5413d4c37..d311bf63703e3de4ee27d2bfa15c9127a80db281 100644
--- a/include/chameleon/constants.h
+++ b/include/chameleon/constants.h
@@ -204,8 +204,10 @@ typedef enum chameleon_store_e {
 #define CHAMELEON_ERRORS              2
 #define CHAMELEON_AUTOTUNING          3
 #define CHAMELEON_DAG                 4
-#define CHAMELEON_PROFILING_MODE      5
-#define CHAMELEON_KERNELPROFILE_MODE  6
+#define CHAMELEON_GENERATE_TRACE      5
+#define CHAMELEON_PROFILING_MODE      CHAMELEON_GENERATE_TRACE  /* _deprecated_ */
+#define CHAMELEON_GENERATE_STATS      6
+#define CHAMELEON_KERNELPROFILE_MODE  CHAMELEON_GENERATE_STATS  /* _deprecated_ */
 #define CHAMELEON_PARALLEL_MODE       7
 #define CHAMELEON_BOUND               8
 #define CHAMELEON_PROGRESS            9
diff --git a/include/chameleon/fortran.h b/include/chameleon/fortran.h
index 679eb3a5a2ab2f1cc8e7cb09e654d1d8b185f37d..eae7ff7c7828fd75cdb5295b7520ac7b12b4f26a 100644
--- a/include/chameleon/fortran.h
+++ b/include/chameleon/fortran.h
@@ -165,15 +165,20 @@
 !   State machine switches
 !
       integer CHAMELEON_WARNINGS, CHAMELEON_ERRORS, CHAMELEON_AUTOTUNING
-      integer CHAMELEON_DAG, CHAMELEON_PROFILING_MODE, CHAMELEON_PARALLEL_MODE
-      integer CHAMELEON_BOUND
+      integer CHAMELEON_DAG, CHAMELEON_GENERATE_TRACE, CHAMELEON_GENERATE_STATS
+      integer CHAMELEON_PARALLEL_MODE, CHAMELEON_BOUND, CHAMELEON_PROGRESS
+      integer CHAMELEON_GEMM3M, CHAMELEON_GENERIC
       parameter ( CHAMELEON_WARNINGS       = 1 )
       parameter ( CHAMELEON_ERRORS         = 2 )
       parameter ( CHAMELEON_AUTOTUNING     = 3 )
       parameter ( CHAMELEON_DAG            = 4 )
-      parameter ( CHAMELEON_PROFILING_MODE = 5 )
-      parameter ( CHAMELEON_PARALLEL_MODE  = 6 )
-      parameter ( CHAMELEON_BOUND          = 7 )
+      parameter ( CHAMELEON_GENERATE_TRACE = 5 )
+      parameter ( CHAMELEON_GENERATE_STATS = 6 )
+      parameter ( CHAMELEON_PARALLEL_MODE  = 7 )
+      parameter ( CHAMELEON_BOUND          = 8 )
+      parameter ( CHAMELEON_PROGRESS       = 9 )
+      parameter ( CHAMELEON_GEMM3M         = 10 )
+      parameter ( CHAMELEON_GENERIC        = 11 )
 
 !********************************************************************
 !   CHAMELEON constants - configuration  parameters
diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h
index 8794b5618391d33fd7da9987cd7d399fe92f6073..68642a0444946ba280a05edc830849d89f4b0418 100644
--- a/include/chameleon/runtime.h
+++ b/include/chameleon/runtime.h
@@ -66,7 +66,7 @@ RUNTIME_context_destroy( CHAM_context_t *ctxt );
  *            Pointer to the runtime data structure
  *
  * @param[in] option
- *            @arg CHAMELEON_PROFILING_MODE: start the profiling mode of the runtime.
+ *            @arg CHAMELEON_GENERATE_TRACE: start the trace generation mode of the runtime.
  */
 void
 RUNTIME_enable( void *runtime_ctxt, int option );
@@ -79,7 +79,7 @@ RUNTIME_enable( void *runtime_ctxt, int option );
  *            Pointer to the runtime data structure
  *
  * @param[in] option
- *            @arg CHAMELEON_PROFILING_MODE: stop the profiling mode of the runtime.
+ *            @arg CHAMELEON_GENERATE_TRACE: stop the trace generation mode of the runtime.
  */
 void
 RUNTIME_disable( void *runtime_ctxt, int option );
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index bd49119df09a400e09e21a6d9096b0ec5a19c7ea..7e16010124e1ff8782449d8c0d6630abf42c7e5a 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -135,7 +135,7 @@ typedef struct chameleon_context_s {
     cham_bool_t        warnings_enabled;
     cham_bool_t        autotuning_enabled;
     cham_bool_t        parallel_enabled;
-    cham_bool_t        profiling_enabled;
+    cham_bool_t        statistics_enabled;
     cham_bool_t        progress_enabled;
     cham_bool_t        generic_enabled;
     cham_bool_t        autominmax_enabled;
diff --git a/runtime/openmp/control/runtime_context.c b/runtime/openmp/control/runtime_context.c
index a4332ef9f4b3e0840b76624cdf4c920ba8ea9e95..8faa242f76d8a1592d5561ecdf721f3741bb7713 100644
--- a/runtime/openmp/control/runtime_context.c
+++ b/runtime/openmp/control/runtime_context.c
@@ -49,8 +49,11 @@ void RUNTIME_enable( void *runtime_ctxt, int lever )
     case CHAMELEON_DAG:
         fprintf(stderr, "DAG is not available with OpenMP\n");
         break;
-    case CHAMELEON_PROFILING_MODE:
-        fprintf(stderr, "Profiling is not available with OpenMP\n");
+    case CHAMELEON_GENERATE_TRACE:
+        fprintf(stderr, "Trace generation is not available with OpenMP\n");
+        break;
+    case CHAMELEON_GENERATE_STATS:
+        fprintf(stderr, "Kernel statistics are not available with OpenMP\n");
         break;
     case CHAMELEON_BOUND:
         fprintf(stderr, "Bound computation is not available with OpenMP\n");
@@ -72,8 +75,11 @@ void RUNTIME_disable( void *runtime_ctxt, int lever )
     case CHAMELEON_DAG:
         fprintf(stderr, "DAG is not available with OpenMP\n");
         break;
-    case CHAMELEON_PROFILING_MODE:
-        fprintf(stderr, "Profiling is not available with OpenMP\n");
+    case CHAMELEON_GENERATE_TRACE:
+        fprintf(stderr, "Trace generation is not available with OpenMP\n");
+        break;
+    case CHAMELEON_GENERATE_STATS:
+        fprintf(stderr, "Kernel statistics are not available with OpenMP\n");
         break;
     case CHAMELEON_BOUND:
         fprintf(stderr, "Bound computation is not available with OpenMP\n");
diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c
index c146c1f262db15734ec09fe59c941588aa342c00..e626e9fa5d419dd3f79630575e1d4b45fbcf70cb 100644
--- a/runtime/openmp/control/runtime_options.c
+++ b/runtime/openmp/control/runtime_options.c
@@ -26,7 +26,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
 {
     options->sequence  = sequence;
     options->request   = request;
-    options->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE;
+    options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
     options->parallel  = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
     options->priority  = RUNTIME_PRIORITY_MIN;
     options->workerid  = -1;
diff --git a/runtime/parsec/control/runtime_context.c b/runtime/parsec/control/runtime_context.c
index a15d52eeede14e037c752b7eb7fde144177d59a9..f2ce0fb3e1c9678717c8a585c4e0b6fc039ab82f 100644
--- a/runtime/parsec/control/runtime_context.c
+++ b/runtime/parsec/control/runtime_context.c
@@ -48,12 +48,15 @@ void RUNTIME_enable( void *runtime_ctxt, int lever )
     case CHAMELEON_DAG:
         fprintf(stderr, "DAG is not available with PaRSEC\n");
         break;
-    case CHAMELEON_PROFILING_MODE:
-        fprintf(stderr, "Profiling is not available with PaRSEC\n");
+    case CHAMELEON_GENERATE_TRACE:
+        fprintf(stderr, "Trace generation is not available with PaRSEC\n");
         //parsec_profiling_start();
         break;
+    case CHAMELEON_GENERATE_STATS:
+        fprintf(stderr, "Kernel statistics are not available with PaRSEC\n");
+        break;
     case CHAMELEON_BOUND:
-        fprintf(stderr, "Bound computation is not available with Quark\n");
+        fprintf(stderr, "Bound computation is not available with PaRSEC\n");
         break;
     default:
         return;
@@ -73,9 +76,12 @@ void RUNTIME_disable( void *runtime_ctxt, int lever )
     case CHAMELEON_DAG:
         fprintf(stderr, "DAG is not available with PaRSEC\n");
         break;
-    case CHAMELEON_PROFILING_MODE:
-        fprintf(stderr, "Profiling is not available with PaRSEC\n");
-        //parsec_profiling_stop();
+    case CHAMELEON_GENERATE_TRACE:
+        fprintf(stderr, "Trace generation is not available with PaRSEC\n");
+        //parsec_profiling_start();
+        break;
+    case CHAMELEON_GENERATE_STATS:
+        fprintf(stderr, "Kernel statistics are not available with PaRSEC\n");
         break;
     case CHAMELEON_BOUND:
         fprintf(stderr, "Bound computation is not available with PaRSEC\n");
diff --git a/runtime/parsec/control/runtime_options.c b/runtime/parsec/control/runtime_options.c
index dac3176f5e3af7091fcf3d6a07be01a12c3b47f7..0fe1498b236ccfad82d75898992094b0cb1c785a 100644
--- a/runtime/parsec/control/runtime_options.c
+++ b/runtime/parsec/control/runtime_options.c
@@ -24,7 +24,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
 {
     options->sequence   = sequence;
     options->request    = request;
-    options->profiling  = CHAMELEON_PROFILING == CHAMELEON_TRUE;
+    options->profiling  = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
     options->parallel   = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
     options->priority   = RUNTIME_PRIORITY_MIN;
     options->workerid  = -1;
diff --git a/runtime/quark/control/runtime_context.c b/runtime/quark/control/runtime_context.c
index 64b9b183b72af0b0401db0e48f594b3dd9b47192..41a3bab7810018f8ca13bdac8dccfe2798a1350f 100644
--- a/runtime/quark/control/runtime_context.c
+++ b/runtime/quark/control/runtime_context.c
@@ -51,8 +51,11 @@ void RUNTIME_enable( void *runtime_ctxt, int lever )
         QUARK_Barrier( runtime_ctxt );
         QUARK_DOT_DAG_Enable( runtime_ctxt, 1 );
         break;
-    case CHAMELEON_PROFILING_MODE:
-        fprintf(stderr, "Profiling is not available with Quark\n");
+    case CHAMELEON_GENERATE_TRACE:
+        fprintf(stderr, "Trace generation is not available with Quark\n");
+        break;
+    case CHAMELEON_GENERATE_STATS:
+        fprintf(stderr, "Kernel statistics are not available with Quark\n");
         break;
     case CHAMELEON_BOUND:
         fprintf(stderr, "Bound computation is not available with Quark\n");
@@ -74,8 +77,11 @@ void RUNTIME_disable( void *runtime_ctxt, int lever )
         QUARK_Barrier( runtime_ctxt );
         QUARK_DOT_DAG_Enable( runtime_ctxt, 0 );
         break;
-    case CHAMELEON_PROFILING_MODE:
-        fprintf(stderr, "Profiling is not available with Quark\n");
+    case CHAMELEON_GENERATE_TRACE:
+        fprintf(stderr, "Trace generation is not available with Quark\n");
+        break;
+    case CHAMELEON_GENERATE_STATS:
+        fprintf(stderr, "Kernel statistics are not available with Quark\n");
         break;
     case CHAMELEON_BOUND:
         fprintf(stderr, "Bound computation is not available with Quark\n");
diff --git a/runtime/quark/control/runtime_options.c b/runtime/quark/control/runtime_options.c
index fba2114627ca7872e7717f4d65c15ac4bf958058..4723d28e2810327016c754117ca12a9609a912e9 100644
--- a/runtime/quark/control/runtime_options.c
+++ b/runtime/quark/control/runtime_options.c
@@ -36,7 +36,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
     /* Initialize options */
     options->sequence   = sequence;
     options->request    = request;
-    options->profiling  = CHAMELEON_PROFILING == CHAMELEON_TRUE;
+    options->profiling  = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
     options->parallel   = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
     options->priority   = RUNTIME_PRIORITY_MIN;
     options->workerid   = -1;
diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c
index d785835ec7b727b789a5fec003d85ad995b8412d..c08f30e55cd6c6161243b2aa199b9f6d717de2bc 100644
--- a/runtime/starpu/control/runtime_context.c
+++ b/runtime/starpu/control/runtime_context.c
@@ -79,14 +79,14 @@ void RUNTIME_enable( void *runtime_ctxt, int lever )
     switch (lever)
     {
     case CHAMELEON_DAG:
-        fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n");
+        fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_GENERATE_TRACE)\n");
         break;
-    case CHAMELEON_KERNELPROFILE_MODE:
-        context_starpu_flags |= (1 << CHAMELEON_KERNELPROFILE_MODE);
+    case CHAMELEON_GENERATE_STATS:
+        context_starpu_flags |= (1 << CHAMELEON_GENERATE_STATS);
         starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
         break;
-    case CHAMELEON_PROFILING_MODE:
-        context_starpu_flags |= (1 << CHAMELEON_PROFILING_MODE);
+    case CHAMELEON_GENERATE_TRACE:
+        context_starpu_flags |= (1 << CHAMELEON_GENERATE_TRACE);
         starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
         break;
     case CHAMELEON_BOUND:
@@ -108,16 +108,16 @@ void RUNTIME_disable( void *runtime_ctxt, int lever )
     switch (lever)
     {
     case CHAMELEON_DAG:
-        fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n");
+        fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_GENERATE_TRACE)\n");
         break;
-    case CHAMELEON_PROFILING_MODE:
-        context_starpu_flags |= ~(1 << CHAMELEON_PROFILING_MODE);
+    case CHAMELEON_GENERATE_TRACE:
+        context_starpu_flags |= ~(1 << CHAMELEON_GENERATE_TRACE);
         if ( !context_starpu_flags ) {
             starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
         }
         break;
-    case CHAMELEON_KERNELPROFILE_MODE:
-        context_starpu_flags |= ~(1 << CHAMELEON_KERNELPROFILE_MODE);
+    case CHAMELEON_GENERATE_STATS:
+        context_starpu_flags |= ~(1 << CHAMELEON_GENERATE_STATS);
         if ( !context_starpu_flags ) {
             starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
         }
diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c
index bcc79254f0db5ee2e95aab1f703200e9f644d2ae..554d81eff1f0393f2cf033781975c82585a46ba2 100644
--- a/runtime/starpu/control/runtime_control.c
+++ b/runtime/starpu/control/runtime_control.c
@@ -67,6 +67,12 @@ static int chameleon_starpu_init( starpu_conf_t *conf )
     if ( rc == -ENODEV ) {
         hres = CHAMELEON_ERR_NOT_INITIALIZED;
     }
+
+    /* Stop profiling as it seems that autostart is not sufficient */
+#if defined(STARPU_USE_FXT)
+    starpu_fxt_stop_profiling();
+#endif
+
     return hres;
 }
 
diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c
index 98c35eaa05079222a86d19462b793aa3d7e94c37..0fa917acc326e69c03609d750ae15cd0675bf9ec 100644
--- a/runtime/starpu/control/runtime_options.c
+++ b/runtime/starpu/control/runtime_options.c
@@ -27,7 +27,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt);
     options->sequence  = sequence;
     options->request   = request;
-    options->profiling = CHAMELEON_PROFILING == CHAMELEON_TRUE;
+    options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
     options->parallel  = CHAMELEON_PARALLEL == CHAMELEON_TRUE;
     options->priority  = RUNTIME_PRIORITY_MIN;
     options->workerid  = (schedopt == NULL) ? -1 : schedopt->workerid;
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index 498e392c7aa36ba7b73964a118e9dd81511bc265..c0c71d25965e13c2f98bbf8d50624b5488058af9 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -34,6 +34,7 @@
 #cmakedefine HAVE_STARPU_DATA_SET_OOC_FLAG
 #cmakedefine HAVE_STARPU_INTERFACE_COPY2D
 #cmakedefine HAVE_STARPU_DATA_PEEK
+#cmakedefine HAVE_STARPU_SET_LIMIT_SUBMITTED_TASKS
 #cmakedefine HAVE_STARPU_REUSE_DATA_ON_NODE
 #cmakedefine HAVE_STARPU_MPI_DATA_MIGRATE
 #cmakedefine HAVE_STARPU_MPI_DATA_REGISTER
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index e2baddbcd8e31fd6390c7e565616f54e1c5b9488..dfe7776bf89fa8a11ac8c07a4362a66f452eb776 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -25,6 +25,8 @@
  */
 #include "testings.h"
 
+testing_options_t options;
+
 /**
  * @brief Defines all the parameters of the testings
  *
@@ -138,11 +140,8 @@ parameter_t parameters[] = {
 
 int main (int argc, char **argv) {
 
-    int ncores, ngpus, human, generic, check, i, niter;
-    int trace, nowarmup, profile, forcegpu, api;
+    int i;
     int rc, info = 0;
-    int run_id = 0;
-    char *func_name;
     char *input_file;
     run_list_t *runlist;
     testing_t * test;
@@ -155,20 +154,10 @@ int main (int argc, char **argv) {
         parameters_read_file( input_file );
         free(input_file);
     }
-    ncores    = parameters_getvalue_int( "threads"  );
-    ngpus     = parameters_getvalue_int( "gpus"     );
-    check     = parameters_getvalue_int( "check"    );
-    human     = parameters_getvalue_int( "human"    );
-    generic   = parameters_getvalue_int( "generic"  );
-    func_name = parameters_getvalue_str( "op"       );
-    niter     = parameters_getvalue_int( "niter"    );
-    trace     = parameters_getvalue_int( "trace"    );
-    nowarmup  = parameters_getvalue_int( "nowarmup" );
-    profile   = parameters_getvalue_int( "profile"  );
-    forcegpu  = parameters_getvalue_int( "forcegpu" );
-    api       = parameters_getvalue_int( "api"      );
-
-    rc = CHAMELEON_Init( ncores, ngpus );
+
+    testing_options_init( &options );
+
+    rc = CHAMELEON_Init( options.threads, options.gpus );
     if ( rc != CHAMELEON_SUCCESS ) {
         fprintf( stderr, "CHAMELEON_Init failed and returned %d.\n", rc );
         info = 1;
@@ -176,19 +165,19 @@ int main (int argc, char **argv) {
     }
 
     /* Set ncores to the right value */
-    if ( ncores == -1 ) {
+    if ( options.threads == -1 ) {
         parameter_t *param;
         param = parameters_get( 't' );
         param->value.ival = CHAMELEON_GetThreadNbr();
+        options.threads = param->value.ival;
     }
 
     /* Binds the right function to be called and builds the parameters combinations */
-    test = testing_gettest( argv[0], func_name );
-    free(func_name);
-    test_fct_t fptr = (api == 0) ? test->fptr_desc : test->fptr_std;
+    test = testing_gettest( argv[0], options.op );
+    test_fct_t fptr = (options.api == 0) ? test->fptr_desc : test->fptr_std;
     if ( fptr == NULL ) {
         fprintf( stderr, "The %s API is not available for function %s\n",
-                 (api == 0) ? "descriptor" : "standard", func_name );
+                 (options.api == 0) ? "descriptor" : "standard", options.op );
         info = 1;
         goto end;
     }
@@ -197,12 +186,12 @@ int main (int argc, char **argv) {
     runlist = run_list_generate( test->params );
 
     /* Executes the tests */
-    run_print_header( test, check, human );
+    run_print_header( test, options.check, options.human );
     run = runlist->head;
 
     /* Force all possible kernels on GPU */
-    if ( forcegpu ) {
-        if ( ngpus == 0 ) {
+    if ( options.forcegpu ) {
+        if ( options.gpus == 0 ) {
             fprintf( stderr,
                      "--forcegpu can't be enable without GPU (-g 0).\n"
                      "  Please specify a larger number of GPU or disable this option\n" );
@@ -213,37 +202,31 @@ int main (int argc, char **argv) {
     }
 
     /* Warmup */
-    if ( !nowarmup ) {
+    if ( !options.nowarmup ) {
         run_arg_list_t copy = run_arg_list_copy( &(run->args) );
-        fptr( &copy, check );
-        run_arg_list_destroy( &copy );
-    }
 
-    /* Start kernel statistics */
-    if ( profile ) {
-        CHAMELEON_Enable( CHAMELEON_KERNELPROFILE_MODE );
-    }
-
-    /* Start tracing */
-    if ( trace ) {
-        CHAMELEON_Enable( CHAMELEON_PROFILING_MODE );
+        /* Run the warmup test as -1 */
+        options.run_id = -1;
+        fptr( &copy, options.check );
+        run_arg_list_destroy( &copy );
+        options.run_id++;
     }
 
-    if ( generic ) {
+    if ( options.generic ) {
         CHAMELEON_Enable( CHAMELEON_GENERIC );
     }
 
     /* Perform all runs */
     while ( run != NULL ) {
-        for(i=0; i<niter; i++) {
+        for(i=0; i<options.niter; i++) {
             run_arg_list_t copy = run_arg_list_copy( &(run->args) );
-            rc = fptr( &copy, check );
+            rc = fptr( &copy, options.check );
 
             /* If rc < 0, we skipped the test */
             if ( rc >= 0 ) {
                 run_arg_add_int( &copy, "RETURN", rc );
-                run_print_line( test, &copy, check, human, run_id );
-                run_id++;
+                run_print_line( test, &copy, options.check, options.human, options.run_id );
+                options.run_id++;
                 info += rc;
             }
             run_arg_list_destroy( &copy );
@@ -255,20 +238,15 @@ int main (int argc, char **argv) {
         run = next;
     }
 
-    /* Stop tracing */
-    if ( trace ) {
-        CHAMELEON_Disable( CHAMELEON_PROFILING_MODE );
-    }
-
-    /* Stop kernel statistics and display results */
-    if ( profile ) {
-        CHAMELEON_Disable( CHAMELEON_KERNELPROFILE_MODE );
+    /* Display kernel statistics if asked */
+    if ( options.profile ) {
         RUNTIME_kernelprofile_display();
     }
     free( runlist );
 
   end:
-    ;/* OpenMP end */
+    /* OpenMP end */
+    free( options.op );
     CHAMELEON_Finalize();
     parameters_destroy();
 
diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c
index e08a076249a9f234f8582331a1e212a31a4338d3..3cc72a49a3384a1cda52b3ad21d60408ade6dc20 100644
--- a/testing/testing_zgemm.c
+++ b/testing/testing_zgemm.c
@@ -162,7 +162,7 @@ testing_zgemm_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    Am, An, Bm, Bn;
-    CHAMELEON_Complex64_t *A, *B, *C, *Cinit;
+    CHAMELEON_Complex64_t *A, *B, *C;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
@@ -225,6 +225,7 @@ testing_zgemm_std( run_arg_list_t *args, int check )
 
     /* Check the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Cinit;
         Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplrnt( M, N, Cinit, LDC, seedC );
 
@@ -238,6 +239,8 @@ testing_zgemm_std( run_arg_list_t *args, int check )
     free( B );
     free( C );
 
+    (void)api;
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zgesvd.c b/testing/testing_zgesvd.c
index a95eeb7338297bcc225a50197cdf487316711c09..413ca1367d0c7650141d31c96343b8bfed9b9384 100644
--- a/testing/testing_zgesvd.c
+++ b/testing/testing_zgesvd.c
@@ -84,8 +84,8 @@ testing_zgesvd_desc( run_arg_list_t *args, int check )
     CHAMELEON_Complex64_t *U, *Vt = NULL;
     double                *S, *D;
     int                    LDU   = M;
-    int                    LDVt  = N; 
-    int                    Un, Vtn;
+    int                    LDVt  = N;
+    int                    Un;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -113,7 +113,6 @@ testing_zgesvd_desc( run_arg_list_t *args, int check )
 
     if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) {
         LDVt = ( jobvt == ChamSVec ) ? K : N;
-        Vtn  = N;
         Vt   = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) );
     }
     else {
@@ -197,8 +196,8 @@ testing_zgesvd_std( run_arg_list_t *args, int check )
     CHAMELEON_Complex64_t *A, *A0, *U, *Vt;
     double                *S, *D;
     int                    LDU   = M;
-    int                    LDVt  = N; 
-    int                    Un, Vtn;
+    int                    LDVt  = N;
+    int                    Un;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -225,7 +224,6 @@ testing_zgesvd_std( run_arg_list_t *args, int check )
 
     if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) {
         LDVt = ( jobvt == ChamSVec ) ? K : N;
-        Vtn  = N;
         Vt   = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) );
     }
     else {
diff --git a/testing/testing_zhemm.c b/testing/testing_zhemm.c
index 91adb578be68f97034d5e427dd761b5cfa1c53de..6b1c25699cea21661747ce437c814675422b79f7 100644
--- a/testing/testing_zhemm.c
+++ b/testing/testing_zhemm.c
@@ -144,7 +144,7 @@ testing_zhemm_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    An;
-    CHAMELEON_Complex64_t *A, *B, *C, *Cinit;
+    CHAMELEON_Complex64_t *A, *B, *C;
 
     bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
@@ -168,7 +168,7 @@ testing_zhemm_std( run_arg_list_t *args, int check )
     /* Calculates the product */
 #if defined(CHAMELEON_TESTINGS_VENDOR)
     testing_start( &test_data );
-    cblas_zhemm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, 
+    cblas_zhemm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N,
                         CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C, LDC );
     testing_stop( &test_data, flops_zhemm( side, M, N ) );
 #else
@@ -179,6 +179,7 @@ testing_zhemm_std( run_arg_list_t *args, int check )
 
     /* Checks the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Cinit;
         Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplrnt( M, N, Cinit, LDC, seedC );
 
@@ -192,6 +193,7 @@ testing_zhemm_std( run_arg_list_t *args, int check )
     free( B );
     free( C );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zher2k.c b/testing/testing_zher2k.c
index d3e8f2277c3da1eb1bc1471e5f428caebe69010c..52f9b7e850aa180ddd6a0663d6d093293d0f4953 100644
--- a/testing/testing_zher2k.c
+++ b/testing/testing_zher2k.c
@@ -146,7 +146,7 @@ testing_zher2k_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    Am, An, Bm, Bn;
-    CHAMELEON_Complex64_t *A, *B, *C, *Cinit;
+    CHAMELEON_Complex64_t *A, *B, *C;
 
     bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
@@ -181,7 +181,7 @@ testing_zher2k_std( run_arg_list_t *args, int check )
     /* Calculate the product */
 #if defined(CHAMELEON_TESTINGS_VENDOR)
     testing_start( &test_data );
-    cblas_zher2k( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, 
+    cblas_zher2k( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K,
                   CBLAS_SADDR(alpha), A, LDA, B, LDB, beta, C, LDC );
     testing_stop( &test_data, flops_zher2k( K, N ) );
 #else
@@ -192,6 +192,7 @@ testing_zher2k_std( run_arg_list_t *args, int check )
 
     /* Check the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Cinit;
         Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplghe( bump, uplo, N, Cinit, LDC, seedC );
 
@@ -205,6 +206,7 @@ testing_zher2k_std( run_arg_list_t *args, int check )
     free( B );
     free( C );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zherk.c b/testing/testing_zherk.c
index b68fec2ac8044997c2cdf4e4ac416512f5f12d1b..6dc56c2de270d11b738423131f2c18c046311107 100644
--- a/testing/testing_zherk.c
+++ b/testing/testing_zherk.c
@@ -138,7 +138,7 @@ testing_zherk_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    Am, An;
-    CHAMELEON_Complex64_t *A, *C, *Cinit;
+    CHAMELEON_Complex64_t *A, *C;
 
     alpha = run_arg_get_double( args, "alpha", alpha );
     beta  = run_arg_get_double( args, "beta", beta );
@@ -167,7 +167,7 @@ testing_zherk_std( run_arg_list_t *args, int check )
     /* Calculates the product */
 #if defined(CHAMELEON_TESTINGS_VENDOR)
     testing_start( &test_data );
-    cblas_zherk( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, 
+    cblas_zherk( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K,
                  alpha, A, LDA, beta, C, LDC );
     testing_stop( &test_data, flops_zherk( N, K ) );
 #else
@@ -178,6 +178,7 @@ testing_zherk_std( run_arg_list_t *args, int check )
 
     /* Checks the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Cinit;
         Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplghe( bump, uplo, N, Cinit, LDC, seedC );
 
@@ -190,6 +191,7 @@ testing_zherk_std( run_arg_list_t *args, int check )
     free( A );
     free( C );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zlanhe.c b/testing/testing_zlanhe.c
index 23f9a1373e16bd1cd531aad6c0b4554aa511d87e..225f3065640ca5fa6e3f20e31af3917a87b2653c 100644
--- a/testing/testing_zlanhe.c
+++ b/testing/testing_zlanhe.c
@@ -162,6 +162,8 @@ testing_zlanhe_std( run_arg_list_t *args, int check )
 
     free( A );
 
+    (void)norm;
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zlantr.c b/testing/testing_zlantr.c
index e7dc6bdce12fca56e39a5c0011545af76bd265f5..78057457e3cf9f9efe9b377a948e379c03fd9d64 100644
--- a/testing/testing_zlantr.c
+++ b/testing/testing_zlantr.c
@@ -183,6 +183,8 @@ testing_zlantr_std( run_arg_list_t *args, int check )
 
     free( A );
 
+    (void)norm;
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zpoinv.c b/testing/testing_zpoinv.c
index 03f16efe678e065e69bae5148e5f34609aaa9eb7..265b8b64901e155921fb31f150ea43a830b8a2d4 100644
--- a/testing/testing_zpoinv.c
+++ b/testing/testing_zpoinv.c
@@ -143,6 +143,7 @@ testing_zpoinv_std( run_arg_list_t *args, int check )
 
     free( A );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zposv.c b/testing/testing_zposv.c
index 0c0fcdac33f694378e8fb0dfd8043d167fe3843d..6cf198eca5ea08b49fd25f700d37d62b7c845c99 100644
--- a/testing/testing_zposv.c
+++ b/testing/testing_zposv.c
@@ -177,6 +177,7 @@ testing_zposv_std( run_arg_list_t *args, int check )
     free( A );
     free( X );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zpotrf.c b/testing/testing_zpotrf.c
index 48849af44a8952dbd4cc86ade5d8b9398eb79ecc..505a95f511095ddf165e51606a0d5bd76177bfad 100644
--- a/testing/testing_zpotrf.c
+++ b/testing/testing_zpotrf.c
@@ -134,6 +134,7 @@ testing_zpotrf_std( run_arg_list_t *args, int check )
 
     free( A );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zpotri.c b/testing/testing_zpotri.c
index 19c075e600422c5d88a1e5808a5e844cbb163432..012a97cd89e79a1ff47ea51d31dcd552d63b0109 100644
--- a/testing/testing_zpotri.c
+++ b/testing/testing_zpotri.c
@@ -145,6 +145,7 @@ testing_zpotri_std( run_arg_list_t *args, int check )
 
     free( A );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zpotrs.c b/testing/testing_zpotrs.c
index 766dd8a525c67a4575490ba4510ada5ebc650a0e..f1a49a483052561d9032c2e053cf30b267873aa4 100644
--- a/testing/testing_zpotrs.c
+++ b/testing/testing_zpotrs.c
@@ -167,6 +167,7 @@ testing_zpotrs_std( run_arg_list_t *args, int check )
     free( A );
     free( X );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zsymm.c b/testing/testing_zsymm.c
index 377848b643ab489775a39795fe79bec00d855201..a4d94002c865edce86c4d7fc1efee3bb2a4bac07 100644
--- a/testing/testing_zsymm.c
+++ b/testing/testing_zsymm.c
@@ -144,7 +144,7 @@ testing_zsymm_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    An;
-    CHAMELEON_Complex64_t *A, *B, *C, *Cinit;
+    CHAMELEON_Complex64_t *A, *B, *C;
 
     bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
@@ -179,6 +179,7 @@ testing_zsymm_std( run_arg_list_t *args, int check )
 
     /* Checks the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Cinit;
         Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplrnt( M, N, Cinit, LDC, seedC );
 
@@ -192,6 +193,7 @@ testing_zsymm_std( run_arg_list_t *args, int check )
     free( B );
     free( C );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zsyr2k.c b/testing/testing_zsyr2k.c
index 830cc5ea688fc5f604681398ba3bb673c41b8e9e..44437ec6680937199b54ade9b8a4494a91627074 100644
--- a/testing/testing_zsyr2k.c
+++ b/testing/testing_zsyr2k.c
@@ -146,7 +146,7 @@ testing_zsyr2k_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    Am, An, Bm, Bn;
-    CHAMELEON_Complex64_t *A, *B, *C, *Cinit;
+    CHAMELEON_Complex64_t *A, *B, *C;
 
     bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
@@ -192,6 +192,7 @@ testing_zsyr2k_std( run_arg_list_t *args, int check )
 
     /* Check the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Cinit;
         Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplgsy( bump, uplo, N, Cinit, LDC, seedC );
 
@@ -205,6 +206,7 @@ testing_zsyr2k_std( run_arg_list_t *args, int check )
     free( B );
     free( C );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_zsyrk.c b/testing/testing_zsyrk.c
index 226e5093f210290c9b42cbdab490fe624295d343..9000c452dfa1f5db90499985673d6739258c45a7 100644
--- a/testing/testing_zsyrk.c
+++ b/testing/testing_zsyrk.c
@@ -137,7 +137,7 @@ testing_zsyrk_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    Am, An;
-    CHAMELEON_Complex64_t *A, *C, *Cinit;
+    CHAMELEON_Complex64_t *A, *C;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
@@ -177,6 +177,7 @@ testing_zsyrk_std( run_arg_list_t *args, int check )
 
     /* Checks the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Cinit;
         Cinit = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplgsy( bump, uplo, N, Cinit, LDC, seedC );
 
@@ -189,6 +190,7 @@ testing_zsyrk_std( run_arg_list_t *args, int check )
     free( A );
     free( C );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_ztrmm.c b/testing/testing_ztrmm.c
index 92d110468666614090d1a8668c11b386405c0401..1a38de09bd674e823b7b7c658fa42625e4bd8009 100644
--- a/testing/testing_ztrmm.c
+++ b/testing/testing_ztrmm.c
@@ -132,7 +132,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check )
 
     /* Descriptors */
     int                    An;
-    CHAMELEON_Complex64_t *A, *B, *Binit;
+    CHAMELEON_Complex64_t *A, *B;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
 
@@ -152,7 +152,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check )
     /* Calculates the product */
 #if defined(CHAMELEON_TESTINGS_VENDOR)
     testing_start( &test_data );
-    cblas_ztrmm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, 
+    cblas_ztrmm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,
                  (CBLAS_DIAG)diag, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB );
     testing_stop( &test_data, flops_ztrmm( side, N, M ) );
 #else
@@ -163,6 +163,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check )
 
     /* Checks the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Binit;
         Binit = malloc( LDB*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplrnt( M, N, Binit, LDB, seedB );
 
@@ -175,6 +176,7 @@ testing_ztrmm_std( run_arg_list_t *args, int check )
     free( A );
     free( B );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testing_ztrsm.c b/testing/testing_ztrsm.c
index 6e721c8ab7da99cc2804fcc484b5553861b35e69..3bff1b5a4c8c4f5638f65ba585028e2b376ca26b 100644
--- a/testing/testing_ztrsm.c
+++ b/testing/testing_ztrsm.c
@@ -126,7 +126,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check )
     int                   seedB = run_arg_get_int( args, "seedB", random() );
 
     /* Descriptors */
-    CHAMELEON_Complex64_t *A, *B, *Binit;
+    CHAMELEON_Complex64_t *A, *B;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
 
@@ -144,7 +144,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check )
     /* Calculates the product */
 #if defined(CHAMELEON_TESTINGS_VENDOR)
     testing_start( &test_data );
-    cblas_ztrsm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, 
+    cblas_ztrsm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,
                  (CBLAS_DIAG)diag, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB );
     testing_stop( &test_data, flops_ztrsm( side, M, N ) );
 #else
@@ -155,6 +155,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check )
 
     /* Checks the solution */
     if ( check ) {
+        CHAMELEON_Complex64_t *Binit;
         Binit = malloc( LDB*N*sizeof(CHAMELEON_Complex64_t) );
         CHAMELEON_zplrnt( M, N, Binit, LDB, seedB );
 
@@ -167,6 +168,7 @@ testing_ztrsm_std( run_arg_list_t *args, int check )
     free( A );
     free( B );
 
+    (void)check;
     return hres;
 }
 
diff --git a/testing/testings.c b/testing/testings.c
index 09e1d91bee09adbffd045816fa46538b94ff4bb0..b44047e6708402da7a822597b238a8113310338e 100644
--- a/testing/testings.c
+++ b/testing/testings.c
@@ -17,6 +17,8 @@
  */
 #include "testings.h"
 
+extern testing_options_t options;
+
 /**
  * @brief List of all the testings available.
  */
@@ -77,12 +79,47 @@ testing_gettest( const char *prog_name,
     return test;
 }
 
+/**
+ * @brief Initialize the option structure to avoid looking for the parameters at
+ * each iteration
+ */
+void
+testing_options_init( testing_options_t *options )
+{
+    options->human     = parameters_getvalue_int( "human" );
+    options->niter     = parameters_getvalue_int( "niter" );
+    options->nowarmup  = parameters_getvalue_int( "nowarmup" );
+#if !defined(CHAMELEON_TESTINGS_VENDOR)
+    options->api       = parameters_getvalue_int( "api" );
+    options->async     = parameters_getvalue_int( "async" );
+    options->check     = parameters_getvalue_int( "check" );
+    options->forcegpu  = parameters_getvalue_int( "forcegpu" );
+    options->generic   = parameters_getvalue_int( "generic" );
+    options->gpus      = parameters_getvalue_int( "gpus" );
+    options->mtxfmt    = parameters_getvalue_int( "mtxfmt" );
+    options->P         = parameters_getvalue_int( "P" );
+    options->profile   = parameters_getvalue_int( "profile" );
+    options->splitsub  = parameters_getvalue_int( "splitsub" );
+    options->threads   = parameters_getvalue_int( "threads" );
+    options->trace     = parameters_getvalue_int( "trace" );
+#endif
+
+    options->file = parameters_getvalue_str( "file" );
+    options->op   = parameters_getvalue_str( "op" );
+
+    options->run_id = 0;
+}
+
 /**
  * @brief Starts the measure for the testing.
  */
 void
 testing_start( testdata_t *tdata )
 {
+    tdata->sequence         = NULL;
+    tdata->request.status   = 0;
+    tdata->request.schedopt = NULL;
+
 #if defined(CHAMELEON_TESTINGS_VENDOR)
     /*
      * If we test the vendor functions, we want to use all the threads of the
@@ -91,24 +128,31 @@ testing_start( testdata_t *tdata )
      */
     CHAMELEON_Pause();
 #else
-    int splitsub = parameters_getvalue_int( "splitsub" );
-    int async    = parameters_getvalue_int( "async" ) || splitsub;
-#endif
-
-    tdata->sequence         = NULL;
-    tdata->request.status   = 0;
-    tdata->request.schedopt = NULL;
-
-#if !defined(CHAMELEON_TESTINGS_VENDOR)
 #if defined(CHAMELEON_USE_MPI)
     CHAMELEON_Distributed_start();
 #endif
-
-    if ( async ) {
+    /*
+     * Create the sequence for the asynchronous calls
+     */
+    if ( options.async ) {
         CHAMELEON_Sequence_Create( &(tdata->sequence) );
     }
 
-    if ( splitsub ) {
+    /* Start kernel statistics */
+    if ( options.profile && (options.run_id >= 0) ) {
+        CHAMELEON_Enable( CHAMELEON_GENERATE_STATS );
+    }
+
+    /* Start tracing */
+    if ( options.trace && (options.run_id >= 0) ) {
+        CHAMELEON_Enable( CHAMELEON_GENERATE_TRACE );
+    }
+
+    /*
+     * Pause the task execution if we want to time separately the task
+     * submission from the task execution
+     */
+    if ( options.splitsub ) {
         CHAMELEON_Pause();
     }
 #endif
@@ -127,13 +171,10 @@ testing_stop( testdata_t *tdata, cham_fixdbl_t flops )
     cham_fixdbl_t t0, t1, t2, gflops;
 
 #if !defined(CHAMELEON_TESTINGS_VENDOR)
-    int splitsub = parameters_getvalue_int( "splitsub" );
-    int async    = parameters_getvalue_int( "async" ) || splitsub;
-
     /* Submission is done, we need to start the computations */
-    if ( async ) {
+    if ( options.async ) {
         tdata->tsub = RUNTIME_get_time();
-        if ( splitsub ) {
+        if ( options.splitsub ) {
             CHAMELEON_Resume();
         }
         CHAMELEON_Sequence_Wait( tdata->sequence );
@@ -157,9 +198,19 @@ testing_stop( testdata_t *tdata, cham_fixdbl_t flops )
     tdata->texec = t2 - t0;
 
 #if !defined(CHAMELEON_TESTINGS_VENDOR)
-    if ( splitsub ) {
+    if ( options.splitsub ) {
         tdata->texec = t2 - t1;
     }
+
+    /* Stop tracing */
+    if ( options.trace && (options.run_id >= 0) ) {
+        CHAMELEON_Disable( CHAMELEON_GENERATE_TRACE );
+    }
+
+    /* Stop kernel statistics */
+    if ( options.profile && (options.run_id >= 0) ) {
+        CHAMELEON_Disable( CHAMELEON_GENERATE_STATS );
+    }
 #endif
 
     gflops = flops * 1.e-9 / tdata->texec;
diff --git a/testing/testings.h b/testing/testings.h
index 2832223f4edeac53eba444a13c5e8249fc4592ed..5326b192250ccce54602a854cab040ad21619820 100644
--- a/testing/testings.h
+++ b/testing/testings.h
@@ -239,8 +239,6 @@ void         parameters_destroy( );
 run_list_t *run_list_generate( const char **params );
 void        run_list_destroy( run_list_elt_t *run );
 
-void testing_register( testing_t *test );
-
 /**
  * @brief Define the data associated to a single run of a testing
  */
@@ -254,12 +252,39 @@ typedef struct testdata_ {
     cham_fixdbl_t       texec;    /**< The execution time of test           */
     cham_fixdbl_t       tsub;     /**< The task submission tome of the test */
     RUNTIME_sequence_t *sequence; /**< The sequence to run the test if splitsub */
-    RUNTIME_request_t  request;   /**< The request to run the test if splitsub  */
+    RUNTIME_request_t   request;  /**< The request to run the test if splitsub  */
 } testdata_t;
 
+/**
+ * @brief Structure to store the read parameters for a quicker access
+ */
+typedef struct testing_options_ {
+    /* Static parameters */
+    int api;
+    int async;
+    int check;
+    int forcegpu;
+    int generic;
+    int gpus;
+    int human;
+    int mtxfmt;
+    int niter;
+    int nowarmup;
+    int P;
+    int profile;
+    int splitsub;
+    int threads;
+    int trace;
+    char *file;
+    char *op;
+    /* Additionnal information to exchange between the main and the testings */
+    int run_id;
+} testing_options_t;
+
 void       testing_register( testing_t *test );
 testing_t *testing_gettest( const char *prog_name, const char *func_name );
 void       testing_start( testdata_t *tdata );
 void       testing_stop( testdata_t *tdata, cham_fixdbl_t flops );
+void       testing_options_init( testing_options_t *options );
 
 #endif /* _testings_h_ */
diff --git a/testing/vendor_ztesting.c b/testing/vendor_ztesting.c
index f959ad3ced59fbd0040a3d531a3578c46066cd8f..df72f106c4c9315a0878d6811da589ce6975f44b 100644
--- a/testing/vendor_ztesting.c
+++ b/testing/vendor_ztesting.c
@@ -19,6 +19,8 @@
  */
 #include "testings.h"
 
+testing_options_t options;
+
 /**
  * @brief Defines all the parameters of the testings
  *
@@ -96,11 +98,8 @@ parameter_t parameters[] = {
 
 int main (int argc, char **argv) {
 
-    int ncores, human, i, niter;
-    int nowarmup;
-    int rc, info, check = 0;
-    int run_id = 0;
-    char *func_name;
+    int i;
+    int rc, info = 0;
     char *input_file;
     run_list_t *runlist;
     testing_t * test;
@@ -113,13 +112,10 @@ int main (int argc, char **argv) {
         parameters_read_file( input_file );
         free(input_file);
     }
-    ncores    = parameters_getvalue_int( "threads"  );
-    human     = parameters_getvalue_int( "human"    );
-    func_name = parameters_getvalue_str( "op"       );
-    niter     = parameters_getvalue_int( "niter"    );
-    nowarmup  = parameters_getvalue_int( "nowarmup" );
 
-    rc = CHAMELEON_Init( ncores, 0 );
+    testing_options_init( &options );
+
+    rc = CHAMELEON_Init( options.threads, 0 );
     if ( rc != CHAMELEON_SUCCESS ) {
         fprintf( stderr, "CHAMELEON_Init failed and returned %d.\n", rc );
         info = 1;
@@ -127,18 +123,18 @@ int main (int argc, char **argv) {
     }
 
     /* Set ncores to the right value */
-    if ( ncores == -1 ) {
+    if ( options.threads == -1 ) {
         parameter_t *param;
         param = parameters_get( 't' );
         param->value.ival = CHAMELEON_GetThreadNbr();
+        options.threads = param->value.ival;
     }
 
     /* Binds the right function to be called and builds the parameters combinations */
-    test = testing_gettest( argv[0], func_name );
-    free(func_name);
+    test = testing_gettest( argv[0], options.op );
     test_fct_t fptr = test->fptr_std;
     if ( fptr == NULL ) {
-        fprintf( stderr, "The vendor API is not available for function %s\n", func_name );
+        fprintf( stderr, "The vendor API is not available for function %s\n", options.op );
         info = 1;
         goto end;
     }
@@ -147,27 +143,31 @@ int main (int argc, char **argv) {
     runlist = run_list_generate( test->params );
 
     /* Executes the tests */
-    run_print_header( test, check, human );
+    run_print_header( test, options.check, options.human );
     run = runlist->head;
 
     /* Warmup */
-    if ( !nowarmup ) {
+    if ( !options.nowarmup ) {
         run_arg_list_t copy = run_arg_list_copy( &(run->args) );
-        fptr( &copy, check );
+
+        /* Run the warmup test as -1 */
+        options.run_id = -1;
+        fptr( &copy, options.check );
         run_arg_list_destroy( &copy );
+        options.run_id++;
     }
 
     /* Perform all runs */
     while ( run != NULL ) {
-        for(i=0; i<niter; i++) {
+        for(i=0; i<options.niter; i++) {
             run_arg_list_t copy = run_arg_list_copy( &(run->args) );
-            rc = fptr( &copy, check );
+            rc = fptr( &copy, options.check );
 
             /* If rc < 0, we skipped the test */
             if ( rc >= 0 ) {
                 run_arg_add_int( &copy, "RETURN", rc );
-                run_print_line( test, &copy, check, human, run_id );
-                run_id++;
+                run_print_line( test, &copy, options.check, options.human, options.run_id );
+                options.run_id++;
                 info += rc;
             }
             run_arg_list_destroy( &copy );
@@ -182,7 +182,8 @@ int main (int argc, char **argv) {
     free( runlist );
 
   end:
-    ;/* OpenMP end */
+    /* OpenMP end */
+    free( options.op );
     CHAMELEON_Finalize();
     parameters_destroy();