diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 5ed871c02685be1d5b6d1fc2b068c460dfded964..128478f3bfae0bbf58a7e70ed6164adf92a4ec76 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -285,7 +285,6 @@ int CHAMELEON_zgeqrf_param_Tile(const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHA
 int CHAMELEON_zgeqrs_param_Tile(const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B);
 int CHAMELEON_zunglq_param_Tile(const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B);
 int CHAMELEON_zungqr_param_Tile(const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B);
-int CHAMELEON_zungqr_param_Tile(const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B);
 int CHAMELEON_zunmlq_param_Tile(const libhqr_tree_t *qrtree, cham_side_t side, cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B);
 int CHAMELEON_zunmqr_param_Tile(const libhqr_tree_t *qrtree, cham_side_t side, cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B);
 /**
diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c
index 4cce0886869ad16597741401c095bbdba27f36fa..7561f2e4fca742f1d35a9b43f5c0099b09bf51fb 100644
--- a/runtime/starpu/control/runtime_control.c
+++ b/runtime/starpu/control/runtime_control.c
@@ -76,7 +76,6 @@ int RUNTIME_init( CHAM_context_t *chamctxt,
 {
     starpu_conf_t *conf = (starpu_conf_t*)(chamctxt->schedopt);
     int hres = CHAMELEON_ERR_NOT_INITIALIZED;
-    int rc = 0;
 
     /* StarPU was already initialized by an external library */
     if (conf == NULL) {
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index ca48506224bd04bb66cc7b5839e5a72566f25fcf..6673656658160fd962fd4aff02295bb5fbbb4620 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -44,6 +44,8 @@ static parameter_t parameters[] = {
     { "mtxfmt",   "Change the way the matrix is stored (0: global, 1: tiles, 2: OOC)", -32, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 1, 6, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "profile",  "Display the kernel profiling",             -33, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "forcegpu", "Force kernels on GPU",                     -34, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
+    { "async",    "Switch to the Async interface",            's', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
+    { "splitsub", "Split the task submission and execution stages", 'S', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
 
     { NULL, "Machine parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "threads", "Number of CPU workers per node",      't', PARAM_OPTION | PARAM_OUTPUT, 1, 7, TestValInt, {-1}, NULL, pread_int, sprint_int },
@@ -101,6 +103,7 @@ static parameter_t parameters[] = {
     { "hlvl",   "Tree used for high level reduction between nodes",        -23, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "domino", "Enable/Disable the domino between upper and lower trees", -24, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 6, TestValInt, {0}, NULL, pread_int, sprint_int },
 
+    { "tsub",          "Graph submission time in s",             999, PARAM_OUTPUT, 2, 13, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl },
     { "time",          "Time in s",                             1000, PARAM_OUTPUT, 2, 13, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl },
     { "gflops",        "GFlop/s",                               1001, PARAM_OUTPUT, 2, 13, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl },
     { "RETURN",        "Result of the testing: SUCCESS/FAILED", 1002, PARAM_OUTPUT, 2,  7, TestValInt,    {0}, NULL, pread_int,    sprint_check  },
@@ -177,6 +180,16 @@ void print_usage( const char* prog_name )
                 "  will run one gemm with three matrices of size 2000x2000 each and a tile size of 200.\n"
                 "  The output will be in the human readable format\n"
                 "\n", prog_name );
+        printf( "Remarks about timing:\n"
+                "  Timings are reported respectively as 'tsub' for the graph submission time, and 'time'\n"
+                "  for the execution time.\n"
+                "  By default the synchronous tile interface is used to perform the timings. 'tsub' is null.\n"
+                "  If the --async option is enabled, then the asynchronous interface is called. 'tsub' reports\n"
+                "  the task submission time, and 'time' the execution time that includes 'tsub'.\n"
+                "  If the --splitsub option is enabled, then the asynchronous interface is called and task\n"
+                "  submission is fully performed before starting the computation. 'tsub' reports the\n"
+                "  task submission time, and 'time' the execution time excluding 'tsub'.\n"
+                "  Note that the 'gflops' field is always computed with 'time'\n" );
     }
 }
 
@@ -467,6 +480,25 @@ void parameters_parser( int argc, char **argv )
     if ( longopts != NULL ) {
         free( longopts );
     }
+
+    /* Force Async if splitsub is enabled */
+    {
+        int splitsub = parameters_getvalue_int( "splitsub" );
+
+        if ( splitsub ) {
+            param = parameters_get( 's' );
+            if ( param == NULL ) {
+                print_usage(argv[0]);
+                exit(1);
+            }
+            parameters_addvalues( param, NULL );
+
+#if defined(CHAMELEON_RUNTIME_SYNC)
+            fprintf( stderr, "Spliting the submission and the execution stages is not possible when the option CHAMELEON_RUNTIME_SYNC is enabled\n" );
+            exit(0);
+#endif
+        }
+    }
 }
 
 void
@@ -493,6 +525,76 @@ parameters_destroy()
     return;
 }
 
+void
+testing_start( testdata_t *tdata )
+{
+    int splitsub = parameters_getvalue_int( "splitsub" );
+    int async    = parameters_getvalue_int( "async" ) || splitsub;
+
+    tdata->sequence         = NULL;
+    tdata->request.status   = 0;
+    tdata->request.schedopt = NULL;
+
+#if defined(CHAMELEON_USE_MPI)
+    CHAMELEON_Distributed_start();
+#endif
+
+    if ( async ) {
+        CHAMELEON_Sequence_Create( &(tdata->sequence) );
+    }
+
+    if ( splitsub ) {
+        CHAMELEON_Pause();
+    }
+
+    /* Register starting time */
+    tdata->tsub  = RUNTIME_get_time();
+    tdata->texec = tdata->tsub;
+}
+
+void
+testing_stop( testdata_t *tdata, cham_fixdbl_t flops )
+{
+    cham_fixdbl_t t0, t1, t2, gflops;
+
+    int splitsub = parameters_getvalue_int( "splitsub" );
+    int async    = parameters_getvalue_int( "async" ) || splitsub;
+
+    /* Submission is done, we need to start the computations */
+    if ( async ) {
+        tdata->tsub = RUNTIME_get_time();
+        if ( splitsub ) {
+            CHAMELEON_Resume();
+        }
+        CHAMELEON_Sequence_Wait( tdata->sequence );
+        CHAMELEON_Sequence_Destroy( tdata->sequence );
+    }
+#if defined(CHAMELEON_USE_MPI)
+    CHAMELEON_Distributed_stop();
+#endif
+    t2 = RUNTIME_get_time();
+
+    t0 = tdata->texec;
+    t1 = tdata->tsub;
+    /*
+     * texec / Submission / tsub / Execution / t
+     *
+     * => texec = t2 - t1
+     * => tsub  = t1 - t0
+     */
+    tdata->tsub  = t1 - t0;
+    if ( splitsub ) {
+        tdata->texec = t2 - t1;
+    }
+    else {
+        tdata->texec = t2 - t0;
+    }
+    gflops = flops * 1.e-9 / tdata->texec;
+    run_arg_add_fixdbl( tdata->args, "time", tdata->texec );
+    run_arg_add_fixdbl( tdata->args, "tsub", tdata->tsub );
+    run_arg_add_fixdbl( tdata->args, "gflops", ( tdata->hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+}
+
 int main (int argc, char **argv) {
 
     int ncores, ngpus, human, check, i, niter;
diff --git a/testing/run_list.c b/testing/run_list.c
index 258e231b70ba567d7ad4957be23e87adf7a478a0..6b55f24410e9c545699c75e36ecf4fbe4c97ab38 100644
--- a/testing/run_list.c
+++ b/testing/run_list.c
@@ -695,7 +695,7 @@ const char *common_input[]  = { "threads", "gpus", "P", "Q", NULL };
 /**
  * @brief The common output parameters to all tests
  */
-const char *common_output[] = { "time", "gflops", NULL };
+const char *common_output[] = { "tsub", "time", "gflops", NULL };
 
 /**
  ********************************************************************************
diff --git a/testing/testing_zcesca.c b/testing/testing_zcesca.c
index edc42ce8d15603dd7bbfe8bdb103f61895a2326d..8fdcd839490fd5265ca32c940600a5d26ffc7546 100644
--- a/testing/testing_zcesca.c
+++ b/testing/testing_zcesca.c
@@ -37,20 +37,23 @@ flops_zcesca( int M, int N )
 int
 testing_zcesca( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
     /* Read arguments */
-    intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
-    int          nb     = run_arg_get_int( args, "nb", 320 );
-    int          P      = parameters_getvalue_int( "P" );
-    int          N      = run_arg_get_int( args, "N", 1000 );
-    int          M      = run_arg_get_int( args, "M", N );
-    int          LDA    = run_arg_get_int( args, "LDA", M );
-    int          seedA  = run_arg_get_int( args, "seedA", random() );
-    int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zcesca( M, N );
+    int      async  = parameters_getvalue_int( "async" );
+    intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int      nb     = run_arg_get_int( args, "nb", 320 );
+    int      P      = parameters_getvalue_int( "P" );
+    int      N      = run_arg_get_int( args, "N", 1000 );
+    int      M      = run_arg_get_int( args, "M", N );
+    int      LDA    = run_arg_get_int( args, "LDA", M );
+    int      seedA  = run_arg_get_int( args, "seedA", random() );
+    int      Q      = parameters_compute_q( P );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
+    void        *ws = NULL;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -61,13 +64,26 @@ testing_zcesca( run_arg_list_t *args, int check )
     /* Fill the matrix with random values */
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
+    if ( async ) {
+        ws = CHAMELEON_zcesca_WS_Alloc( descA );
+    }
+
     /* Compute the centered-scaled matrix transformation */
-    START_TIMING( t );
-    hres = CHAMELEON_zcesca_Tile( 1, 1, ChamColumnwise, descA, NULL, NULL );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zcesca_Tile_Async( 1, 1, ChamColumnwise, descA, ws,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zcesca_Tile( 1, 1, ChamColumnwise, descA, NULL, NULL );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zcesca( M, N ) );
+
+    if ( ws != NULL ) {
+        CHAMELEON_zgemm_WS_Free( ws );
+    }
 
     CHAMELEON_Desc_Destroy( &descA );
 
@@ -86,13 +102,13 @@ void testing_zcesca_init( void ) __attribute__( ( constructor ) );
 void
 testing_zcesca_init( void )
 {
-    test_zcesca.name        = "zcesca";
-    test_zcesca.helper      = "General centered-scaled matrix transformation";
-    test_zcesca.params      = zcesca_params;
-    test_zcesca.output      = zcesca_output;
-    test_zcesca.outchk      = zcesca_outchk;
-    test_zcesca.fptr        = testing_zcesca;
-    test_zcesca.next        = NULL;
+    test_zcesca.name   = "zcesca";
+    test_zcesca.helper = "General centered-scaled matrix transformation";
+    test_zcesca.params = zcesca_params;
+    test_zcesca.output = zcesca_output;
+    test_zcesca.outchk = zcesca_outchk;
+    test_zcesca.fptr   = testing_zcesca;
+    test_zcesca.next   = NULL;
 
     testing_register( &test_zcesca );
 }
diff --git a/testing/testing_zgeadd.c b/testing/testing_zgeadd.c
index 22ec5907756d8b81b12bbbffb61a88252f493d17..9198e1414d6924af319260c186b25bcb7cd74759 100644
--- a/testing/testing_zgeadd.c
+++ b/testing/testing_zgeadd.c
@@ -38,11 +38,11 @@ flops_zgeadd( int M, int N )
 int
 testing_zgeadd( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    int          Am, An;
-    CHAM_desc_t *descA, *descB;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
     /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          P      = parameters_getvalue_int( "P" );
@@ -56,11 +56,13 @@ testing_zgeadd( run_arg_list_t *args, int check )
     int          Q      = parameters_compute_q( P );
     CHAMELEON_Complex64_t alpha = testing_zalea();
     CHAMELEON_Complex64_t beta  = testing_zalea();
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgeadd( M, N );
+
+    /* Descriptors */
+    int          Am, An;
+    CHAM_desc_t *descA, *descB;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
-    beta  = run_arg_get_complex64( args, "beta",  beta  );
+    beta  = run_arg_get_complex64( args, "beta", beta );
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -84,12 +86,18 @@ testing_zgeadd( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descB, seedB );
 
     /* Compute the sum */
-    START_TIMING( t );
-    hres = CHAMELEON_zgeadd_Tile( trans, alpha, descA, beta, descB );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgeadd_Tile_Async( trans, alpha, descA, beta, descB,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgeadd_Tile( trans, alpha, descA, beta, descB );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgeadd( M, N ) );
 
     /* Check the solution */
     if ( check ) {
@@ -108,8 +116,8 @@ testing_zgeadd( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgeadd;
-const char *zgeadd_params[] = { "mtxfmt", "nb",   "trans", "m",     "n",     "lda", "ldb",
-                                "alpha", "beta",  "seedA", "seedB", NULL };
+const char *zgeadd_params[] = { "mtxfmt", "nb",    "trans", "m",     "n",     "lda",
+                                "ldb",    "alpha", "beta",  "seedA", "seedB", NULL };
 const char *zgeadd_output[] = { NULL };
 const char *zgeadd_outchk[] = { "RETURN", NULL };
 
@@ -120,13 +128,13 @@ void testing_zgeadd_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgeadd_init( void )
 {
-    test_zgeadd.name        = "zgeadd";
-    test_zgeadd.helper      = "General matrix-matrix addition";
-    test_zgeadd.params      = zgeadd_params;
-    test_zgeadd.output      = zgeadd_output;
-    test_zgeadd.outchk      = zgeadd_outchk;
-    test_zgeadd.fptr        = testing_zgeadd;
-    test_zgeadd.next        = NULL;
+    test_zgeadd.name   = "zgeadd";
+    test_zgeadd.helper = "General matrix-matrix addition";
+    test_zgeadd.params = zgeadd_params;
+    test_zgeadd.output = zgeadd_output;
+    test_zgeadd.outchk = zgeadd_outchk;
+    test_zgeadd.fptr   = testing_zgeadd;
+    test_zgeadd.next   = NULL;
 
     testing_register( &test_zgeadd );
 }
diff --git a/testing/testing_zgelqf.c b/testing/testing_zgelqf.c
index 18b5a67f85f6cc1b8bac9e2901fa50b2cb6fbde9..a53283dc1d2d5b15b35939799ac783a3c2e181b6 100644
--- a/testing/testing_zgelqf.c
+++ b/testing/testing_zgelqf.c
@@ -24,10 +24,11 @@
 int
 testing_zgelqf( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descT;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -38,8 +39,9 @@ testing_zgelqf( run_arg_list_t *args, int check )
     int      RH     = run_arg_get_int( args, "qra", 4 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgelqf( M, N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descT;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -61,12 +63,17 @@ testing_zgelqf( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgelqf_Tile( descA, descT );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgelqf_Tile_Async( descA, descT, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgelqf_Tile( descA, descT );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgelqf( M, N ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -93,7 +100,7 @@ testing_zgelqf( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgelqf;
-const char *zgelqf_params[] = { "mtxfmt", "nb","ib", "m", "n", "lda", "qra", "seedA", NULL };
+const char *zgelqf_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "qra", "seedA", NULL };
 const char *zgelqf_output[] = { NULL };
 const char *zgelqf_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -104,13 +111,13 @@ void testing_zgelqf_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgelqf_init( void )
 {
-    test_zgelqf.name        = "zgelqf";
-    test_zgelqf.helper      = "General LQ factorization";
-    test_zgelqf.params      = zgelqf_params;
-    test_zgelqf.output      = zgelqf_output;
-    test_zgelqf.outchk      = zgelqf_outchk;
-    test_zgelqf.fptr        = testing_zgelqf;
-    test_zgelqf.next        = NULL;
+    test_zgelqf.name   = "zgelqf";
+    test_zgelqf.helper = "General LQ factorization";
+    test_zgelqf.params = zgelqf_params;
+    test_zgelqf.output = zgelqf_output;
+    test_zgelqf.outchk = zgelqf_outchk;
+    test_zgelqf.fptr   = testing_zgelqf;
+    test_zgelqf.next   = NULL;
 
     testing_register( &test_zgelqf );
 }
diff --git a/testing/testing_zgelqf_hqr.c b/testing/testing_zgelqf_hqr.c
index a5c5a5cf0d6304a61ae89e2b7dc0b18c2252e033..32f880db00df57f57971b1e01214d0c39e427c6e 100644
--- a/testing/testing_zgelqf_hqr.c
+++ b/testing/testing_zgelqf_hqr.c
@@ -24,10 +24,11 @@
 int
 testing_zgelqf_hqr( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descTS, *descTT;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -42,9 +43,9 @@ testing_zgelqf_hqr( run_arg_list_t *args, int check )
     int      domino = run_arg_get_int( args, "domino", -1 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgelqf( M, N );
 
+    /* Descriptors */
+    CHAM_desc_t    *descA, *descTS, *descTT;
     libhqr_tree_t   qrtree;
     libhqr_matrix_t matrix;
 
@@ -69,12 +70,19 @@ testing_zgelqf_hqr( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgelqf_param_Tile( &qrtree, descA, descTS, descTT );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgelqf_param_Tile_Async( &qrtree, descA, descTS, descTT,
+                                                  test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTS, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTT, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgelqf_param_Tile( &qrtree, descA, descTS, descTT );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgelqf( M, N ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -103,8 +111,8 @@ testing_zgelqf_hqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgelqf_hqr;
-const char *zgelqf_hqr_params[] = { "mtxfmt", "nb", "ib",   "m",    "n",      "lda",   "qra",
-                                    "qrp", "llvl", "hlvl", "domino", "seedA", NULL };
+const char *zgelqf_hqr_params[] = { "mtxfmt", "nb",   "ib",   "m",      "n",     "lda", "qra",
+                                    "qrp",    "llvl", "hlvl", "domino", "seedA", NULL };
 const char *zgelqf_hqr_output[] = { NULL };
 const char *zgelqf_hqr_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -115,13 +123,13 @@ void testing_zgelqf_hqr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgelqf_hqr_init( void )
 {
-    test_zgelqf_hqr.name        = "zgelqf_hqr";
-    test_zgelqf_hqr.helper      = "General LQ factorization with hierachical reduction trees";
-    test_zgelqf_hqr.params      = zgelqf_hqr_params;
-    test_zgelqf_hqr.output      = zgelqf_hqr_output;
-    test_zgelqf_hqr.outchk      = zgelqf_hqr_outchk;
-    test_zgelqf_hqr.fptr        = testing_zgelqf_hqr;
-    test_zgelqf_hqr.next        = NULL;
+    test_zgelqf_hqr.name   = "zgelqf_hqr";
+    test_zgelqf_hqr.helper = "General LQ factorization with hierachical reduction trees";
+    test_zgelqf_hqr.params = zgelqf_hqr_params;
+    test_zgelqf_hqr.output = zgelqf_hqr_output;
+    test_zgelqf_hqr.outchk = zgelqf_hqr_outchk;
+    test_zgelqf_hqr.fptr   = testing_zgelqf_hqr;
+    test_zgelqf_hqr.next   = NULL;
 
     testing_register( &test_zgelqf_hqr );
 }
diff --git a/testing/testing_zgelqs.c b/testing/testing_zgelqs.c
index ea61cca131dbcf154daed639268fc659e2131950..75c2825c9f26e89dcc01549587c0a0713aa406bc 100644
--- a/testing/testing_zgelqs.c
+++ b/testing/testing_zgelqs.c
@@ -32,10 +32,11 @@ flops_zgelqs()
 int
 testing_zgelqs( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA1, *descA2, *descB1, *descB2, *descT, *descQ, *descX;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -49,8 +50,9 @@ testing_zgelqs( run_arg_list_t *args, int check )
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      seedB  = run_arg_get_int( args, "seedB", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgelqs();
+
+    /* Descriptors */
+    CHAM_desc_t *descA1, *descA2, *descB1, *descB2, *descT, *descQ, *descX;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -110,8 +112,8 @@ testing_zgelqs( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgelqs;
-const char *zgelqs_params[] = { "mtxfmt", "nb", "ib", "m",     "n",     "k", "lda",
-                                "ldb", "qra", "seedA", "seedB", NULL };
+const char *zgelqs_params[] = { "mtxfmt", "nb",  "ib",  "m",     "n",     "k",
+                                "lda",    "ldb", "qra", "seedA", "seedB", NULL };
 const char *zgelqs_output[] = { NULL };
 const char *zgelqs_outchk[] = { "RETURN", NULL };
 
@@ -122,13 +124,13 @@ void testing_zgelqs_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgelqs_init( void )
 {
-    test_zgelqs.name        = "zgelqs";
-    test_zgelqs.helper      = "General LQ solve";
-    test_zgelqs.params      = zgelqs_params;
-    test_zgelqs.output      = zgelqs_output;
-    test_zgelqs.outchk      = zgelqs_outchk;
-    test_zgelqs.fptr        = testing_zgelqs;
-    test_zgelqs.next        = NULL;
+    test_zgelqs.name   = "zgelqs";
+    test_zgelqs.helper = "General LQ solve";
+    test_zgelqs.params = zgelqs_params;
+    test_zgelqs.output = zgelqs_output;
+    test_zgelqs.outchk = zgelqs_outchk;
+    test_zgelqs.fptr   = testing_zgelqs;
+    test_zgelqs.next   = NULL;
 
     testing_register( &test_zgelqs );
 }
diff --git a/testing/testing_zgels.c b/testing/testing_zgels.c
index e3ef17c9fdeb0e366d4d66b5ea7bfc8ae87e127b..df61a7ad3b4217afd11eccd2f772b20187e6d91f 100644
--- a/testing/testing_zgels.c
+++ b/testing/testing_zgels.c
@@ -37,10 +37,11 @@ flops_zgels( cham_trans_t trans, int M, int N, int NRHS )
 int
 testing_zgels( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descX, *descT;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          ib     = run_arg_get_int( args, "ib", 48 );
@@ -56,8 +57,9 @@ testing_zgels( run_arg_list_t *args, int check )
     int          seedA  = run_arg_get_int( args, "seedA", random() );
     int          seedB  = run_arg_get_int( args, "seedB", random() );
     int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgels( trans, M, N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX, *descT;
 
     /* Make sure trans is only Notrans or ConjTrans */
     trans = ( trans == ChamNoTrans ) ? trans : ChamConjTrans;
@@ -85,12 +87,19 @@ testing_zgels( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descX, seedB );
 
     /* Computes the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgels_Tile( trans, descA, descT, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgels_Tile_Async( trans, descA, descT, descX,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgels_Tile( trans, descA, descT, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgels( trans, M, N, NRHS ) );
 
     if ( check ) {
         CHAM_desc_t *descA0, *descB;
@@ -131,8 +140,8 @@ testing_zgels( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgels;
-const char *zgels_params[] = { "mtxfmt", "nb", "ib",  "trans", "m",     "n",     "k",
-                               "lda", "ldb", "qra",    "seedA", "seedB", NULL };
+const char *zgels_params[] = { "mtxfmt", "nb",  "ib",  "trans", "m",     "n", "k",
+                               "lda",    "ldb", "qra", "seedA", "seedB", NULL };
 const char *zgels_output[] = { NULL };
 const char *zgels_outchk[] = { "RETURN", NULL };
 
@@ -143,13 +152,13 @@ void testing_zgels_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgels_init( void )
 {
-    test_zgels.name        = "zgels";
-    test_zgels.helper      = "Linear least squares with general matrix";
-    test_zgels.params      = zgels_params;
-    test_zgels.output      = zgels_output;
-    test_zgels.outchk      = zgels_outchk;
-    test_zgels.fptr        = testing_zgels;
-    test_zgels.next        = NULL;
+    test_zgels.name   = "zgels";
+    test_zgels.helper = "Linear least squares with general matrix";
+    test_zgels.params = zgels_params;
+    test_zgels.output = zgels_output;
+    test_zgels.outchk = zgels_outchk;
+    test_zgels.fptr   = testing_zgels;
+    test_zgels.next   = NULL;
 
     testing_register( &test_zgels );
 }
diff --git a/testing/testing_zgels_hqr.c b/testing/testing_zgels_hqr.c
index ee3742c1f99c6c54a5b66126cb0c85b2330978e6..e897ac7ae5002a7b815e7a32e264729e0a723304 100644
--- a/testing/testing_zgels_hqr.c
+++ b/testing/testing_zgels_hqr.c
@@ -37,10 +37,11 @@ flops_zgels_hqr( cham_trans_t trans, int M, int N, int NRHS )
 int
 testing_zgels_hqr( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descX, *descTS, *descTT;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          ib     = run_arg_get_int( args, "ib", 48 );
@@ -60,9 +61,9 @@ testing_zgels_hqr( run_arg_list_t *args, int check )
     int          seedA  = run_arg_get_int( args, "seedA", random() );
     int          seedB  = run_arg_get_int( args, "seedB", random() );
     int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgels_hqr( trans, M, N, NRHS );
 
+    /* Descriptors */
+    CHAM_desc_t    *descA, *descX, *descTS, *descTT;
     libhqr_tree_t   qrtree;
     libhqr_matrix_t matrix;
 
@@ -83,20 +84,28 @@ testing_zgels_hqr( run_arg_list_t *args, int check )
     matrix.nodes = P * Q;
     matrix.p     = P;
 
-    libhqr_init_hqr(
-        &qrtree, ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 );
+    libhqr_init_hqr( &qrtree, ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, &matrix,
+                     llvl, hlvl, qr_a, qr_p, domino, 0 );
 
     /* Fills the matrix with random values */
     CHAMELEON_zplrnt_Tile( descA, seedA );
     CHAMELEON_zplrnt_Tile( descX, seedB );
 
     /* Computes the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgels_param_Tile( &qrtree, trans, descA, descTS, descTT, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgels_param_Tile_Async( &qrtree, trans, descA, descTS, descTT, descX,
+                                                 test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTS, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgels_param_Tile( &qrtree, trans, descA, descTS, descTT, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgels_hqr( trans, M, N, NRHS ) );
 
     if ( check ) {
         CHAM_desc_t *descA0, *descB;
@@ -139,9 +148,9 @@ testing_zgels_hqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgels_hqr;
-const char *zgels_hqr_params[] = { "mtxfmt", "nb",  "ib",     "trans", "m",     "n",   "k",
-                                   "lda",  "ldb",    "qra",    "qra",   "qrp", "llvl",
-                                   "hlvl", "domino", "seedA", "seedB", NULL };
+const char *zgels_hqr_params[] = { "mtxfmt", "nb",   "ib",     "trans", "m",     "n",
+                                   "k",      "lda",  "ldb",    "qra",   "qra",   "qrp",
+                                   "llvl",   "hlvl", "domino", "seedA", "seedB", NULL };
 const char *zgels_hqr_output[] = { NULL };
 const char *zgels_hqr_outchk[] = { "RETURN", NULL };
 
@@ -152,13 +161,14 @@ void testing_zgels_hqr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgels_hqr_init( void )
 {
-    test_zgels_hqr.name   = "zgels_hqr";
-    test_zgels_hqr.helper = "Linear least squares with general matrix using hierarchical reduction trees";
+    test_zgels_hqr.name = "zgels_hqr";
+    test_zgels_hqr.helper =
+        "Linear least squares with general matrix using hierarchical reduction trees";
     test_zgels_hqr.params = zgels_hqr_params;
     test_zgels_hqr.output = zgels_hqr_output;
     test_zgels_hqr.outchk = zgels_hqr_outchk;
-    test_zgels_hqr.fptr = testing_zgels_hqr;
-    test_zgels_hqr.next = NULL;
+    test_zgels_hqr.fptr   = testing_zgels_hqr;
+    test_zgels_hqr.next   = NULL;
 
     testing_register( &test_zgels_hqr );
 }
diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c
index 3679971b1649ab5ef99d4d79c666f9c177386d11..d1700a38971d5db624e6b571f2d6513779151230 100644
--- a/testing/testing_zgemm.c
+++ b/testing/testing_zgemm.c
@@ -25,11 +25,11 @@
 int
 testing_zgemm( run_arg_list_t *args, int check )
 {
-    int          Am, An, Bm, Bn;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
     /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          P      = parameters_getvalue_int( "P" );
@@ -47,8 +47,11 @@ testing_zgemm( run_arg_list_t *args, int check )
     int                   seedB = run_arg_get_int( args, "seedB", random() );
     int                   seedC = run_arg_get_int( args, "seedC", random() );
     int                   Q     = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgemm( M, N, K );
+
+    /* Descriptors */
+    int          Am, An, Bm, Bn;
+    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+    void        *ws = NULL;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
@@ -86,13 +89,28 @@ testing_zgemm( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descB, seedB );
     CHAMELEON_zplrnt_Tile( descC, seedC );
 
+    if ( async ) {
+        ws = CHAMELEON_zgemm_WS_Alloc( transA, transB, descA, descB, descC );
+    }
+
     /* Calculate the product */
-    START_TIMING( t );
-    hres = CHAMELEON_zgemm_Tile( transA, transB, alpha, descA, descB, beta, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgemm_Tile_Async( transA, transB, alpha, descA, descB, beta, descC, ws,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgemm_Tile( transA, transB, alpha, descA, descB, beta, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgemm( M, N, K ) );
+
+    if ( ws != NULL ) {
+        CHAMELEON_zgemm_WS_Free( ws );
+    }
 
     /* Check the solution */
     if ( check ) {
@@ -113,8 +131,9 @@ testing_zgemm( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgemm;
-const char *zgemm_params[] = { "mtxfmt", "nb", "transA", "transB", "m",     "n",     "k",     "lda", "ldb",
-                               "ldc", "alpha",  "beta",   "seedA", "seedB", "seedC", NULL };
+const char *zgemm_params[] = { "mtxfmt", "nb",    "transA", "transB", "m",     "n",
+                               "k",      "lda",   "ldb",    "ldc",    "alpha", "beta",
+                               "seedA",  "seedB", "seedC",  NULL };
 const char *zgemm_output[] = { NULL };
 const char *zgemm_outchk[] = { "||A||", "||B||", "||C||", "||R||", "RETURN", NULL };
 
@@ -125,13 +144,13 @@ void testing_zgemm_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgemm_init( void )
 {
-    test_zgemm.name        = "zgemm";
-    test_zgemm.helper      = "General matrix-matrix multiply";
-    test_zgemm.params      = zgemm_params;
-    test_zgemm.output      = zgemm_output;
-    test_zgemm.outchk      = zgemm_outchk;
-    test_zgemm.fptr        = testing_zgemm;
-    test_zgemm.next        = NULL;
+    test_zgemm.name   = "zgemm";
+    test_zgemm.helper = "General matrix-matrix multiply";
+    test_zgemm.params = zgemm_params;
+    test_zgemm.output = zgemm_output;
+    test_zgemm.outchk = zgemm_outchk;
+    test_zgemm.fptr   = testing_zgemm;
+    test_zgemm.next   = NULL;
 
     testing_register( &test_zgemm );
 }
diff --git a/testing/testing_zgenm2.c b/testing/testing_zgenm2.c
index c18ef34f724aacec21702ff4ffc9ee7a95899dbd..ea1a33154d969636d10743901adf3bf8f611b64e 100644
--- a/testing/testing_zgenm2.c
+++ b/testing/testing_zgenm2.c
@@ -40,27 +40,29 @@ flops_zgenm2( int M, int N )
 int
 testing_zgenm2( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
+    intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int      nb     = run_arg_get_int( args, "nb", 320 );
+    int      P      = parameters_getvalue_int( "P" );
+    int      N      = run_arg_get_int( args, "N", 1000 );
+    int      M      = run_arg_get_int( args, "M", N );
+    int      LDA    = run_arg_get_int( args, "LDA", M );
+    int      seedA  = run_arg_get_int( args, "seedA", random() );
+    int      Q      = parameters_compute_q( P );
+    int      minMN  = chameleon_min( M, N );
+    double   cond   = run_arg_get_double( args, "cond", 1.e16 );
+    int      mode   = run_arg_get_int( args, "mode", 4 );
+    double   tol    = 1.e-1;
+
+    /* Descriptors */
     double       norm;
     CHAM_desc_t *descA;
     double      *D, dmax = 1.;
 
-    /* Reads arguments */
-    intptr_t        mtxfmt = parameters_getvalue_int( "mtxfmt" );
-    int             nb     = run_arg_get_int( args, "nb", 320 );
-    int             P      = parameters_getvalue_int( "P" );
-    int             N      = run_arg_get_int( args, "N", 1000 );
-    int             M      = run_arg_get_int( args, "M", N );
-    int             LDA    = run_arg_get_int( args, "LDA", M );
-    int             seedA  = run_arg_get_int( args, "seedA", random() );
-    int             Q      = parameters_compute_q( P );
-    int             minMN  = chameleon_min( M, N );
-    double          cond   = run_arg_get_double( args, "cond", 1.e16 );
-    int             mode   = run_arg_get_int( args, "mode", 4 );
-    double          tol    = 1.e-1;
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgenm2( M, N );
-
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
     /* Generate the diagonal of eigen/singular values */
@@ -92,15 +94,19 @@ testing_zgenm2( run_arg_list_t *args, int check )
     }
 
     /* Calculates the norm */
-    START_TIMING( t );
-    norm = CHAMELEON_zgenm2_Tile( tol, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( norm >= 0. ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgenm2_Tile_Async( tol, descA, &norm,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        norm = CHAMELEON_zgenm2_Tile( tol, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgenm2( M, N ) );
 
     /* Checks the solution */
-    hres = 0;
     if ( check ) {
         double res = fabs(dmax - norm) / (dmax * tol);
 
@@ -130,13 +136,13 @@ void testing_zgenm2_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgenm2_init( void )
 {
-    test_zgenm2.name        = "zgenm2";
-    test_zgenm2.helper      = "General matrix two-norm estimator";
-    test_zgenm2.params      = zgenm2_params;
-    test_zgenm2.output      = zgenm2_output;
-    test_zgenm2.outchk      = zgenm2_outchk;
-    test_zgenm2.fptr        = testing_zgenm2;
-    test_zgenm2.next        = NULL;
+    test_zgenm2.name   = "zgenm2";
+    test_zgenm2.helper = "General matrix two-norm estimator";
+    test_zgenm2.params = zgenm2_params;
+    test_zgenm2.output = zgenm2_output;
+    test_zgenm2.outchk = zgenm2_outchk;
+    test_zgenm2.fptr   = testing_zgenm2;
+    test_zgenm2.next   = NULL;
 
     testing_register( &test_zgenm2 );
 }
diff --git a/testing/testing_zgepdf_qdwh.c b/testing/testing_zgepdf_qdwh.c
index f4aef06e4b9322db6cf53365ca1542456e4099c0..56042f19f2223a2707d1f6c1f1255e1919ab81fa 100644
--- a/testing/testing_zgepdf_qdwh.c
+++ b/testing/testing_zgepdf_qdwh.c
@@ -30,40 +30,45 @@
 int
 testing_zgepdf_qdwh( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
+    intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int      nb     = run_arg_get_int( args, "nb", 320 );
+    int      ib     = run_arg_get_int( args, "ib", 48 );
+    int      P      = parameters_getvalue_int( "P" );
+    int      N      = run_arg_get_int( args, "N", 1000 );
+    int      M      = run_arg_get_int( args, "M", N );
+    int      LDA    = run_arg_get_int( args, "LDA", M );
+    int      LDB    = run_arg_get_int( args, "LDB", N );
+    int      seedA  = run_arg_get_int( args, "seedA", random() );
+    int      Q      = parameters_compute_q( P );
+    double   cond   = run_arg_get_double( args, "cond", 1.e16 );
+    int      mode   = run_arg_get_int( args, "mode", 4 );
+    int      runtime;
+
+    /* Descriptors */
     CHAM_desc_t *descA, *descA0, *descH;
     gepdf_info_t info;
 
-    /* Reads arguments */
-    intptr_t        mtxfmt = parameters_getvalue_int( "mtxfmt" );
-    int             nb     = run_arg_get_int( args, "nb", 320 );
-    int             ib     = run_arg_get_int( args, "ib", 48 );
-    int             P      = parameters_getvalue_int( "P" );
-    int             N      = run_arg_get_int( args, "N", 1000 );
-    int             M      = run_arg_get_int( args, "M", N );
-    int             LDA    = run_arg_get_int( args, "LDA", M );
-    int             LDB    = run_arg_get_int( args, "LDB", N );
-    int             seedA  = run_arg_get_int( args, "seedA", random() );
-    int             Q      = parameters_compute_q( P );
-    double          cond   = run_arg_get_double( args, "cond", 1.e16 );
-    int             mode   = run_arg_get_int( args, "mode", 4 );
-    int             runtime;
-    cham_fixdbl_t t, gflops;
-
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
 
     CHAMELEON_Get( CHAMELEON_RUNTIME, &runtime );
     if ( runtime == RUNTIME_SCHED_PARSEC ) {
         if ( CHAMELEON_Comm_rank() == 0 ) {
-            fprintf( stderr, "SKIPPED: The QDWH polar decomposition is not supported with PaRSEC\n" );
+            fprintf( stderr,
+                     "SKIPPED: The QDWH polar decomposition is not supported with PaRSEC\n" );
         }
         return -1;
     }
 
     if ( N > M ) {
         if ( CHAMELEON_Comm_rank() == 0 ) {
-            fprintf( stderr, "SKIPPED: The QDWH polar decomposition is performed only when M >= N\n" );
+            fprintf( stderr,
+                     "SKIPPED: The QDWH polar decomposition is performed only when M >= N\n" );
         }
         return -1;
     }
@@ -87,15 +92,20 @@ testing_zgepdf_qdwh( run_arg_list_t *args, int check )
     }
 
     /* Calculates the norm */
-    START_TIMING( t );
-    hres = CHAMELEON_zgepdf_qdwh_Tile( descA, descH, &info );
-    STOP_TIMING( t );
-    gflops = info.flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgepdf_qdwh_Tile_Async( descA, descH, &info,
+                                                 test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descH, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgepdf_qdwh_Tile( descA, descH, &info );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, info.flops );
 
     /* Checks the solution */
-    hres = 0;
     if ( check ) {
         hres += check_zxxpd( args, descA0, descA, descH );
         hres += check_zortho( args, descA );
@@ -110,7 +120,8 @@ testing_zgepdf_qdwh( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgepdf_qdwh;
-const char *zgepdf_qdwh_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "ldb", "seedA", "cond", "mode", NULL };
+const char *zgepdf_qdwh_params[] = { "mtxfmt", "nb",    "ib",   "m",    "n", "lda",
+                                     "ldb",    "seedA", "cond", "mode", NULL };
 const char *zgepdf_qdwh_output[] = { NULL };
 const char *zgepdf_qdwh_outchk[] = { "||A||", "||A-fact(A)||", "||I-QQ'||", "RETURN", NULL };
 
diff --git a/testing/testing_zgepdf_qr.c b/testing/testing_zgepdf_qr.c
index a1b933382bb5f86ed6f34e2473d12ce275af92e5..401d9b6d0816b61a72ba072e08b7251247dc6d00 100644
--- a/testing/testing_zgepdf_qr.c
+++ b/testing/testing_zgepdf_qr.c
@@ -31,32 +31,34 @@
 static cham_fixdbl_t
 flops_zgepdf_qr( int M, int N )
 {
-    double flops = flops_zgeqrf( M+N, N ) + flops_zungqr( M+N, N, N );
+    double flops = flops_zgeqrf( M + N, N ) + flops_zungqr( M + N, N, N );
     return flops;
 }
 
 int
 testing_zgepdf_qr( run_arg_list_t *args, int check )
 {
-    int           hres = 0;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int                   async  = parameters_getvalue_int( "async" );
+    intptr_t              mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int                   nb     = run_arg_get_int( args, "nb", 320 );
+    int                   ib     = run_arg_get_int( args, "ib", 48 );
+    int                   P      = parameters_getvalue_int( "P" );
+    int                   N      = run_arg_get_int( args, "N", 1000 );
+    int                   M      = run_arg_get_int( args, "M", N );
+    int                   LDA    = run_arg_get_int( args, "LDA", M );
+    int                   seedA  = run_arg_get_int( args, "seedA", random() );
+    int                   Q      = parameters_compute_q( P );
+    CHAMELEON_Complex64_t alpha  = testing_zalea();
+
+    /* Descriptors */
     CHAM_desc_t  *descA1, *descA2, *descQ1, *descQ2;
     CHAM_desc_t  *TS1, *TT1, *TS2, *TT2;
     libhqr_tree_t qrtreeT, qrtreeB;
-
-    /* Reads arguments */
-    intptr_t        mtxfmt = parameters_getvalue_int( "mtxfmt" );
-    int             nb     = run_arg_get_int( args, "nb", 320 );
-    int             ib     = run_arg_get_int( args, "ib", 48 );
-    int             P      = parameters_getvalue_int( "P" );
-    int             N      = run_arg_get_int( args, "N", 1000 );
-    int             M      = run_arg_get_int( args, "M", N );
-    int             LDA    = run_arg_get_int( args, "LDA", M );
-    int             seedA  = run_arg_get_int( args, "seedA", random() );
-    int             Q      = parameters_compute_q( P );
-    CHAMELEON_Complex64_t alpha = testing_zalea();
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgepdf_qr( M, N );
-    int zqdwh_opt_id = 1;
+    int           zqdwh_opt_id = 1;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
 
@@ -65,14 +67,18 @@ testing_zgepdf_qr( run_arg_list_t *args, int check )
 
     if ( N > M ) {
         if ( CHAMELEON_Comm_rank() == 0 ) {
-            fprintf( stderr, "SKIPPED: The QR factorization for Polar Decomposition is performed only when M >= N\n" );
+            fprintf( stderr,
+                     "SKIPPED: The QR factorization for Polar Decomposition is performed only when "
+                     "M >= N\n" );
         }
         return -1;
     }
 
-    if ( (N % nb) != 0 ) {
+    if ( ( N % nb ) != 0 ) {
         if ( CHAMELEON_Comm_rank() == 0 ) {
-            fprintf( stderr, "SKIPPED: The QR factorization for Polar Decomposition supports only multiple of nb\n" );
+            fprintf( stderr,
+                     "SKIPPED: The QR factorization for Polar Decomposition supports only multiple "
+                     "of nb\n" );
         }
         return -1;
     }
@@ -102,39 +108,42 @@ testing_zgepdf_qr( run_arg_list_t *args, int check )
         libhqr_matrix_t mat = {
             .mt    = descA1->mt,
             .nt    = descA1->nt,
-            .nodes = descA1->p * descA1-> q,
+            .nodes = descA1->p * descA1->q,
             .p     = descA1->p,
         };
 
         /* Tree for the top matrix */
         libhqr_init_hqr( &qrtreeT, LIBHQR_QR, &mat,
-                         -1,    /*low level tree   */
-                         -1,    /* high level tree */
-                         -1,    /* TS tree size    */
+                         -1,        /*low level tree   */
+                         -1,        /* high level tree */
+                         -1,        /* TS tree size    */
                          descA1->p, /* High level size */
-                         -1,    /* Domino */
-                         0      /* TSRR (unstable) */ );
+                         -1,        /* Domino */
+                         0          /* TSRR (unstable) */ );
 
         /* Tree for the bottom matrix */
         mat.mt = descA2->mt;
         mat.nt = descA2->nt;
-        libhqr_init_tphqr( &qrtreeB, LIBHQR_TSQR,
-                           mat.mt, zqdwh_opt_id ? (mat.nt-1) : 0, &mat,
-                           /* high level tree (Could be greedy, but flat should reduce the volume of comm) */
-                           LIBHQR_FLAT_TREE,
-                           -1,   /* TS tree size    */
-                           descA2->p /* High level size */ );
+        libhqr_init_tphqr(
+            &qrtreeB, LIBHQR_TSQR, mat.mt, zqdwh_opt_id ? (mat.nt - 1) : 0, &mat,
+            /* high level tree (Could be greedy, but flat should reduce the volume of comm) */
+            LIBHQR_FLAT_TREE,
+            -1,       /* TS tree size    */
+            descA2->p /* High level size */ );
     }
 
-    /* Calculates the norm */
-    START_TIMING( t );
-    hres = CHAMELEON_zgepdf_qr_Tile( 1, 1, &qrtreeT, &qrtreeB,
-                                     descA1, TS1, TT1, descQ1,
-                                     descA2, TS2, TT2, descQ2 );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    /* Calculates the solution */
+    testing_start( &test_data );
+    if ( async ) {
+        assert( 0 );
+    }
+    else {
+        hres = CHAMELEON_zgepdf_qr_Tile( 1, 1, &qrtreeT, &qrtreeB,
+                                         descA1, TS1, TT1, descQ1,
+                                         descA2, TS2, TT2, descQ2 );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgepdf_qr( M, N ) );
 
     CHAMELEON_Dealloc_Workspace( &TS1 );
     CHAMELEON_Dealloc_Workspace( &TS2 );
@@ -145,7 +154,6 @@ testing_zgepdf_qr( run_arg_list_t *args, int check )
     libhqr_finalize( &qrtreeB );
 
     /* Checks the solution */
-    hres = 0;
     if ( check ) {
         CHAM_desc_t *descA01, *descA02;
         descA01 = CHAMELEON_Desc_Copy( descA1, NULL );
diff --git a/testing/testing_zgeqrf.c b/testing/testing_zgeqrf.c
index 51322baa2a843ea4b7944537443b9da20745aa6c..331eb06a9e1e41ce7b86c5efaa999266c5d87151 100644
--- a/testing/testing_zgeqrf.c
+++ b/testing/testing_zgeqrf.c
@@ -24,10 +24,11 @@
 int
 testing_zgeqrf( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descT;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -38,8 +39,9 @@ testing_zgeqrf( run_arg_list_t *args, int check )
     int      RH     = run_arg_get_int( args, "qra", 4 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgeqrf( M, N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descT;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -53,20 +55,25 @@ testing_zgeqrf( run_arg_list_t *args, int check )
     }
 
     /* Creates the matrices */
-    CHAMELEON_Desc_Create(
-        &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q );
+    CHAMELEON_Desc_Create( &descA, (void *)( -mtxfmt ), ChamComplexDouble, nb, nb, nb * nb, LDA, N,
+                           0, 0, M, N, P, Q );
     CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q );
 
     /* Fills the matrix with random values */
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgeqrf_Tile( descA, descT );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgeqrf_Tile_Async( descA, descT, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgeqrf_Tile( descA, descT );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgeqrf( M, N ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -93,7 +100,7 @@ testing_zgeqrf( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgeqrf;
-const char *zgeqrf_params[] = { "mtxfmt", "nb","ib", "m", "n", "lda", "qra", "seedA", NULL };
+const char *zgeqrf_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "qra", "seedA", NULL };
 const char *zgeqrf_output[] = { NULL };
 const char *zgeqrf_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -104,13 +111,13 @@ void testing_zgeqrf_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgeqrf_init( void )
 {
-    test_zgeqrf.name        = "zgeqrf";
-    test_zgeqrf.helper      = "General QR factorization";
-    test_zgeqrf.params      = zgeqrf_params;
-    test_zgeqrf.output      = zgeqrf_output;
-    test_zgeqrf.outchk      = zgeqrf_outchk;
-    test_zgeqrf.fptr        = testing_zgeqrf;
-    test_zgeqrf.next        = NULL;
+    test_zgeqrf.name   = "zgeqrf";
+    test_zgeqrf.helper = "General QR factorization";
+    test_zgeqrf.params = zgeqrf_params;
+    test_zgeqrf.output = zgeqrf_output;
+    test_zgeqrf.outchk = zgeqrf_outchk;
+    test_zgeqrf.fptr   = testing_zgeqrf;
+    test_zgeqrf.next   = NULL;
 
     testing_register( &test_zgeqrf );
 }
diff --git a/testing/testing_zgeqrf_hqr.c b/testing/testing_zgeqrf_hqr.c
index 47139abc66eecbb8d156b4a40100986361f4c94a..8d4250ced4ba833115e4824b82010b4b2ff1ea48 100644
--- a/testing/testing_zgeqrf_hqr.c
+++ b/testing/testing_zgeqrf_hqr.c
@@ -24,10 +24,11 @@
 int
 testing_zgeqrf_hqr( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descTS, *descTT;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -42,9 +43,9 @@ testing_zgeqrf_hqr( run_arg_list_t *args, int check )
     int      domino = run_arg_get_int( args, "domino", -1 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgeqrf( M, N );
 
+    /* Descriptors */
+    CHAM_desc_t    *descA, *descTS, *descTT;
     libhqr_tree_t   qrtree;
     libhqr_matrix_t matrix;
 
@@ -69,12 +70,19 @@ testing_zgeqrf_hqr( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgeqrf_param_Tile( &qrtree, descA, descTS, descTT );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgeqrf_param_Tile_Async( &qrtree, descA, descTS, descTT,
+                                                  test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTS, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTT, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgeqrf_param_Tile( &qrtree, descA, descTS, descTT );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgeqrf( M, N ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -103,8 +111,8 @@ testing_zgeqrf_hqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgeqrf_hqr;
-const char *zgeqrf_hqr_params[] = { "mtxfmt", "nb", "ib",   "m",    "n",      "lda",   "qra",
-                                    "qrp", "llvl", "hlvl", "domino", "seedA", NULL };
+const char *zgeqrf_hqr_params[] = { "mtxfmt", "nb",   "ib",   "m",      "n",     "lda", "qra",
+                                    "qrp",    "llvl", "hlvl", "domino", "seedA", NULL };
 const char *zgeqrf_hqr_output[] = { NULL };
 const char *zgeqrf_hqr_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -115,13 +123,13 @@ void testing_zgeqrf_hqr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgeqrf_hqr_init( void )
 {
-    test_zgeqrf_hqr.name        = "zgeqrf_hqr";
-    test_zgeqrf_hqr.helper      = "General QR factorization with hierachical reduction trees";
-    test_zgeqrf_hqr.params      = zgeqrf_hqr_params;
-    test_zgeqrf_hqr.output      = zgeqrf_hqr_output;
-    test_zgeqrf_hqr.outchk      = zgeqrf_hqr_outchk;
-    test_zgeqrf_hqr.fptr        = testing_zgeqrf_hqr;
-    test_zgeqrf_hqr.next        = NULL;
+    test_zgeqrf_hqr.name   = "zgeqrf_hqr";
+    test_zgeqrf_hqr.helper = "General QR factorization with hierachical reduction trees";
+    test_zgeqrf_hqr.params = zgeqrf_hqr_params;
+    test_zgeqrf_hqr.output = zgeqrf_hqr_output;
+    test_zgeqrf_hqr.outchk = zgeqrf_hqr_outchk;
+    test_zgeqrf_hqr.fptr   = testing_zgeqrf_hqr;
+    test_zgeqrf_hqr.next   = NULL;
 
     testing_register( &test_zgeqrf_hqr );
 }
diff --git a/testing/testing_zgeqrs.c b/testing/testing_zgeqrs.c
index 381e2d21548eb40a6c1dfd1d8812997f7e661fdd..4fd5f29ca6934caf639544bf50d5a743a1bf1c2f 100644
--- a/testing/testing_zgeqrs.c
+++ b/testing/testing_zgeqrs.c
@@ -25,10 +25,11 @@
 int
 testing_zgeqrs( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descX, *descT;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -42,8 +43,9 @@ testing_zgeqrs( run_arg_list_t *args, int check )
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      seedB  = run_arg_get_int( args, "seedB", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgeqrs( M, N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX, *descT;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -77,12 +79,19 @@ testing_zgeqrs( run_arg_list_t *args, int check )
     /* Calculates the solution */
     hres = CHAMELEON_zgeqrf_Tile( descA, descT );
 
-    START_TIMING( t );
-    hres = CHAMELEON_zgeqrs_Tile( descA, descT, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgeqrs_Tile_Async( descA, descT, descX,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgeqrs_Tile( descA, descT, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgeqrs( M, N, NRHS ) );
 
     /* Checks the factorisation, orthogonality and residue */
     if ( check ) {
@@ -110,8 +119,8 @@ testing_zgeqrs( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgeqrs;
-const char *zgeqrs_params[] = { "mtxfmt", "nb", "ib", "m",     "n",     "k", "lda",
-                                "ldb", "qra", "seedA", "seedB", NULL };
+const char *zgeqrs_params[] = { "mtxfmt", "nb",  "ib",  "m",     "n",     "k",
+                                "lda",    "ldb", "qra", "seedA", "seedB", NULL };
 const char *zgeqrs_output[] = { NULL };
 const char *zgeqrs_outchk[] = { "RETURN", NULL };
 
@@ -122,13 +131,13 @@ void testing_zgeqrs_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgeqrs_init( void )
 {
-    test_zgeqrs.name        = "zgeqrs";
-    test_zgeqrs.helper      = "General QR solve";
-    test_zgeqrs.params      = zgeqrs_params;
-    test_zgeqrs.output      = zgeqrs_output;
-    test_zgeqrs.outchk      = zgeqrs_outchk;
-    test_zgeqrs.fptr        = testing_zgeqrs;
-    test_zgeqrs.next        = NULL;
+    test_zgeqrs.name   = "zgeqrs";
+    test_zgeqrs.helper = "General QR solve";
+    test_zgeqrs.params = zgeqrs_params;
+    test_zgeqrs.output = zgeqrs_output;
+    test_zgeqrs.outchk = zgeqrs_outchk;
+    test_zgeqrs.fptr   = testing_zgeqrs;
+    test_zgeqrs.next   = NULL;
 
     testing_register( &test_zgeqrs );
 }
diff --git a/testing/testing_zgesv.c b/testing/testing_zgesv.c
index 779b49ec08432b8ac3fa92c7942bfd92e93f5842..ab8b9daa8800b162ba9313fb52d6017d1de1e894 100644
--- a/testing/testing_zgesv.c
+++ b/testing/testing_zgesv.c
@@ -31,10 +31,11 @@ flops_zgesv( int N, int NRHS )
 int
 testing_zgesv( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descX;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      P      = parameters_getvalue_int( "P" );
@@ -45,8 +46,9 @@ testing_zgesv( run_arg_list_t *args, int check )
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      seedB  = run_arg_get_int( args, "seedB", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgesv( N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -61,12 +63,18 @@ testing_zgesv( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descX, seedB );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgesv_nopiv_Tile( descA, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgesv_nopiv_Tile_Async( descA, descX,
+                                                 test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgesv_nopiv_Tile( descA, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgesv( N, NRHS ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -83,7 +91,8 @@ testing_zgesv( run_arg_list_t *args, int check )
         CHAMELEON_zplrnt_Tile( descA0, seedA );
         CHAMELEON_zplrnt_Tile( descB, seedB );
 
-        hres += check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB );
+        hres +=
+            check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB );
 
         CHAMELEON_Desc_Destroy( &descA0 );
         CHAMELEON_Desc_Destroy( &descB );
@@ -96,7 +105,7 @@ testing_zgesv( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgesv;
-const char *zgesv_params[] = { "mtxfmt", "nb","n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+const char *zgesv_params[] = { "mtxfmt", "nb", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
 const char *zgesv_output[] = { NULL };
 const char *zgesv_outchk[] = { "RETURN", NULL };
 
@@ -107,13 +116,13 @@ void testing_zgesv_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgesv_init( void )
 {
-    test_zgesv.name        = "zgesv";
-    test_zgesv.helper      = "General linear system solve (LU without pivoting)";
-    test_zgesv.params      = zgesv_params;
-    test_zgesv.output      = zgesv_output;
-    test_zgesv.outchk      = zgesv_outchk;
-    test_zgesv.fptr        = testing_zgesv;
-    test_zgesv.next        = NULL;
+    test_zgesv.name   = "zgesv";
+    test_zgesv.helper = "General linear system solve (LU without pivoting)";
+    test_zgesv.params = zgesv_params;
+    test_zgesv.output = zgesv_output;
+    test_zgesv.outchk = zgesv_outchk;
+    test_zgesv.fptr   = testing_zgesv;
+    test_zgesv.next   = NULL;
 
     testing_register( &test_zgesv );
 }
diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c
index f2e051bdaf32882edbd80960b61a2a59d2b34c50..bcc2b4ef42a367a124ee0a0bcf204ab01c43af3d 100644
--- a/testing/testing_zgetrf.c
+++ b/testing/testing_zgetrf.c
@@ -24,10 +24,11 @@
 int
 testing_zgetrf( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      P      = parameters_getvalue_int( "P" );
@@ -36,8 +37,9 @@ testing_zgetrf( run_arg_list_t *args, int check )
     int      LDA    = run_arg_get_int( args, "LDA", M );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgetrf( M, N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -49,12 +51,16 @@ testing_zgetrf( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zgetrf_nopiv_Tile( descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgetrf_nopiv_Tile_Async( descA, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgetrf_nopiv_Tile( descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgetrf( M, N ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -72,7 +78,7 @@ testing_zgetrf( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgetrf;
-const char *zgetrf_params[] = { "mtxfmt", "nb","m", "n", "lda", "seedA", NULL };
+const char *zgetrf_params[] = { "mtxfmt", "nb", "m", "n", "lda", "seedA", NULL };
 const char *zgetrf_output[] = { NULL };
 const char *zgetrf_outchk[] = { "||A||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -83,13 +89,13 @@ void testing_zgetrf_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgetrf_init( void )
 {
-    test_zgetrf.name        = "zgetrf";
-    test_zgetrf.helper      = "General factorization (LU without pivoting)";
-    test_zgetrf.params      = zgetrf_params;
-    test_zgetrf.output      = zgetrf_output;
-    test_zgetrf.outchk      = zgetrf_outchk;
-    test_zgetrf.fptr        = testing_zgetrf;
-    test_zgetrf.next        = NULL;
+    test_zgetrf.name   = "zgetrf";
+    test_zgetrf.helper = "General factorization (LU without pivoting)";
+    test_zgetrf.params = zgetrf_params;
+    test_zgetrf.output = zgetrf_output;
+    test_zgetrf.outchk = zgetrf_outchk;
+    test_zgetrf.fptr   = testing_zgetrf;
+    test_zgetrf.next   = NULL;
 
     testing_register( &test_zgetrf );
 }
diff --git a/testing/testing_zgetrs.c b/testing/testing_zgetrs.c
index e4d0e9a4cbc074caf75761176975274a14a866ac..7515edd2dc55efbac0c81cb746e72c37eebb2725 100644
--- a/testing/testing_zgetrs.c
+++ b/testing/testing_zgetrs.c
@@ -25,10 +25,11 @@
 int
 testing_zgetrs( run_arg_list_t *args, int check )
 {
-    int          hres;
-    CHAM_desc_t *descA, *descX;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      P      = parameters_getvalue_int( "P" );
@@ -39,8 +40,9 @@ testing_zgetrs( run_arg_list_t *args, int check )
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      seedB  = run_arg_get_int( args, "seedB", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgetrs( N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -58,12 +60,18 @@ testing_zgetrs( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres += CHAMELEON_zgetrs_nopiv_Tile( descA, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zgetrs_nopiv_Tile_Async( descA, descX,
+                                                   test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zgetrs_nopiv_Tile( descA, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgetrs( N, NRHS ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -73,7 +81,8 @@ testing_zgetrs( run_arg_list_t *args, int check )
         CHAMELEON_zplrnt_Tile( descA0, seedA );
         CHAMELEON_zplrnt_Tile( descB, seedB );
 
-        hres += check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB );
+        hres += check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower,
+                              descA0, descX, descB );
 
         CHAMELEON_Desc_Destroy( &descA0 );
         CHAMELEON_Desc_Destroy( &descB );
@@ -86,7 +95,7 @@ testing_zgetrs( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zgetrs;
-const char *zgetrs_params[] = { "mtxfmt", "nb","n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+const char *zgetrs_params[] = { "mtxfmt", "nb", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
 const char *zgetrs_output[] = { NULL };
 const char *zgetrs_outchk[] = { "RETURN", NULL };
 
@@ -97,13 +106,13 @@ void testing_zgetrs_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgetrs_init( void )
 {
-    test_zgetrs.name        = "zgetrs";
-    test_zgetrs.helper      = "General triangular solve (LU without pivoting)";
-    test_zgetrs.params      = zgetrs_params;
-    test_zgetrs.output      = zgetrs_output;
-    test_zgetrs.outchk      = zgetrs_outchk;
-    test_zgetrs.fptr        = testing_zgetrs;
-    test_zgetrs.next        = NULL;
+    test_zgetrs.name   = "zgetrs";
+    test_zgetrs.helper = "General triangular solve (LU without pivoting)";
+    test_zgetrs.params = zgetrs_params;
+    test_zgetrs.output = zgetrs_output;
+    test_zgetrs.outchk = zgetrs_outchk;
+    test_zgetrs.fptr   = testing_zgetrs;
+    test_zgetrs.next   = NULL;
 
     testing_register( &test_zgetrs );
 }
diff --git a/testing/testing_zgram.c b/testing/testing_zgram.c
index abd27f011e5b5c659c16b9563fb6fb4bb4be10ac..edacb306a6de1f0f29dc3b2d0848c2418c443428 100644
--- a/testing/testing_zgram.c
+++ b/testing/testing_zgram.c
@@ -37,20 +37,23 @@ flops_zgram( int N )
 int
 testing_zgram( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
     /* Read arguments */
-    intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
-    int          nb     = run_arg_get_int( args, "nb", 320 );
-    cham_uplo_t uplo    = run_arg_get_uplo( args, "uplo", ChamUpper );
-    int          P      = parameters_getvalue_int( "P" );
-    int          N      = run_arg_get_int( args, "N", 1000 );
-    int          LDA    = run_arg_get_int( args, "LDA", N );
-    int          seedA  = run_arg_get_int( args, "seedA", random() );
-    int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zgram( N );
+    int         async  = parameters_getvalue_int( "async" );
+    intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int         nb     = run_arg_get_int( args, "nb", 320 );
+    cham_uplo_t uplo   = run_arg_get_uplo( args, "uplo", ChamUpper );
+    int         P      = parameters_getvalue_int( "P" );
+    int         N      = run_arg_get_int( args, "N", 1000 );
+    int         LDA    = run_arg_get_int( args, "LDA", N );
+    int         seedA  = run_arg_get_int( args, "seedA", random() );
+    int         Q      = parameters_compute_q( P );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
+    void        *ws = NULL;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -61,13 +64,26 @@ testing_zgram( run_arg_list_t *args, int check )
     /* Fill the matrix with random values */
     CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA );
 
+    if ( async ) {
+        ws = CHAMELEON_zgram_WS_Alloc( descA );
+    }
+
     /* Compute the gram matrix transformation */
-    START_TIMING( t );
-    hres = CHAMELEON_zgram_Tile( uplo, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zgram_Tile_Async( uplo, descA, ws,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zgram_Tile( uplo, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgram( N ) );
+
+    if ( ws != NULL ) {
+        CHAMELEON_zgemm_WS_Free( ws );
+    }
 
     CHAMELEON_Desc_Destroy( &descA );
 
@@ -86,13 +102,13 @@ void testing_zgram_init( void ) __attribute__( ( constructor ) );
 void
 testing_zgram_init( void )
 {
-    test_zgram.name        = "zgram";
-    test_zgram.helper      = "General Gram matrix transformation";
-    test_zgram.params      = zgram_params;
-    test_zgram.output      = zgram_output;
-    test_zgram.outchk      = zgram_outchk;
-    test_zgram.fptr        = testing_zgram;
-    test_zgram.next        = NULL;
+    test_zgram.name   = "zgram";
+    test_zgram.helper = "General Gram matrix transformation";
+    test_zgram.params = zgram_params;
+    test_zgram.output = zgram_output;
+    test_zgram.outchk = zgram_outchk;
+    test_zgram.fptr   = testing_zgram;
+    test_zgram.next   = NULL;
 
     testing_register( &test_zgram );
 }
diff --git a/testing/testing_zhemm.c b/testing/testing_zhemm.c
index f5d195cc9c1b0dcf7b646b96480c456aa70ebfe3..ae396c1aac7d8c7ac77fe4e10d6d93942cb225be 100644
--- a/testing/testing_zhemm.c
+++ b/testing/testing_zhemm.c
@@ -25,11 +25,11 @@
 int
 testing_zhemm( run_arg_list_t *args, int check )
 {
-    int          Am;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int                   async  = parameters_getvalue_int( "async" );
     intptr_t              mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int                   nb     = run_arg_get_int( args, "nb", 320 );
     int                   P      = parameters_getvalue_int( "P" );
@@ -46,11 +46,13 @@ testing_zhemm( run_arg_list_t *args, int check )
     int                   seedB  = run_arg_get_int( args, "seedB", random() );
     int                   seedC  = run_arg_get_int( args, "seedC", random() );
     double                bump   = testing_dalea();
-    bump                         = run_arg_get_double( args, "bump", bump );
     int                   Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zhemm( side, M, N );
 
+    /* Descriptors */
+    int          Am;
+    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+
+    bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
 
@@ -78,12 +80,19 @@ testing_zhemm( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descC, seedC );
 
     /* Calculates the product */
-    START_TIMING( t );
-    hres = CHAMELEON_zhemm_Tile( side, uplo, alpha, descA, descB, beta, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zhemm_Tile_Async( side, uplo, alpha, descA, descB, beta, descC,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zhemm_Tile( side, uplo, alpha, descA, descB, beta, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zhemm( side, M, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -105,8 +114,8 @@ testing_zhemm( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zhemm;
-const char *zhemm_params[] = { "mtxfmt", "nb",   "side", "uplo",  "m",     "n",     "lda",  "ldb", "ldc",
-                               "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL };
+const char *zhemm_params[] = { "mtxfmt", "nb",    "side", "uplo",  "m",     "n",     "lda",  "ldb",
+                               "ldc",    "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL };
 const char *zhemm_output[] = { NULL };
 const char *zhemm_outchk[] = { "RETURN", NULL };
 
@@ -117,13 +126,13 @@ void testing_zhemm_init( void ) __attribute__( ( constructor ) );
 void
 testing_zhemm_init( void )
 {
-    test_zhemm.name        = "zhemm";
-    test_zhemm.helper      = "Hermitian matrix-matrix multiply";
-    test_zhemm.params      = zhemm_params;
-    test_zhemm.output      = zhemm_output;
-    test_zhemm.outchk      = zhemm_outchk;
-    test_zhemm.fptr        = testing_zhemm;
-    test_zhemm.next        = NULL;
+    test_zhemm.name   = "zhemm";
+    test_zhemm.helper = "Hermitian matrix-matrix multiply";
+    test_zhemm.params = zhemm_params;
+    test_zhemm.output = zhemm_output;
+    test_zhemm.outchk = zhemm_outchk;
+    test_zhemm.fptr   = testing_zhemm;
+    test_zhemm.next   = NULL;
 
     testing_register( &test_zhemm );
 }
diff --git a/testing/testing_zher2k.c b/testing/testing_zher2k.c
index a27b2387cb04bfd527b5104c73de4190b97854bd..c242fc2bfa57fddc6877cb66a1e505bd5987f7ad 100644
--- a/testing/testing_zher2k.c
+++ b/testing/testing_zher2k.c
@@ -25,11 +25,11 @@
 int
 testing_zher2k( run_arg_list_t *args, int check )
 {
-    int          Am, An;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
     /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          P      = parameters_getvalue_int( "P" );
@@ -46,11 +46,13 @@ testing_zher2k( run_arg_list_t *args, int check )
     int                   seedB = run_arg_get_int( args, "seedB", random() );
     int                   seedC = run_arg_get_int( args, "seedC", random() );
     double                bump  = testing_dalea();
-    bump                        = run_arg_get_double( args, "bump", bump );
-    int    Q                    = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zher2k( K, N );
+    int                   Q     = parameters_compute_q( P );
+
+    /* Descriptors */
+    int          Am, An;
+    CHAM_desc_t *descA, *descB, *descC, *descCinit;
 
+    bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_double( args, "beta", beta );
 
@@ -80,12 +82,19 @@ testing_zher2k( run_arg_list_t *args, int check )
     CHAMELEON_zplghe_Tile( bump, uplo, descC, seedC );
 
     /* Calculate the product */
-    START_TIMING( t );
-    hres = CHAMELEON_zher2k_Tile( uplo, trans, alpha, descA, descB, beta, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zher2k_Tile_Async( uplo, trans, alpha, descA, descB, beta, descC,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zher2k_Tile( uplo, trans, alpha, descA, descB, beta, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zher2k( K, N ) );
 
     /* Check the solution */
     if ( check ) {
@@ -107,8 +116,9 @@ testing_zher2k( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zher2k;
-const char *zher2k_params[] = { "mtxfmt", "nb",   "trans", "uplo",  "n",     "k",     "lda",  "ldb", "ldc",
-                                "alpha", "beta",  "seedA", "seedB", "seedC", "bump", NULL };
+const char *zher2k_params[] = { "mtxfmt", "nb",    "trans", "uplo",  "n",    "k",
+                                "lda",    "ldb",   "ldc",   "alpha", "beta", "seedA",
+                                "seedB",  "seedC", "bump",  NULL };
 const char *zher2k_output[] = { NULL };
 const char *zher2k_outchk[] = { "RETURN", NULL };
 
@@ -119,13 +129,13 @@ void testing_zher2k_init( void ) __attribute__( ( constructor ) );
 void
 testing_zher2k_init( void )
 {
-    test_zher2k.name        = "zher2k";
-    test_zher2k.helper      = "Hermitian matrix-matrix rank 2k update";
-    test_zher2k.params      = zher2k_params;
-    test_zher2k.output      = zher2k_output;
-    test_zher2k.outchk      = zher2k_outchk;
-    test_zher2k.fptr        = testing_zher2k;
-    test_zher2k.next        = NULL;
+    test_zher2k.name   = "zher2k";
+    test_zher2k.helper = "Hermitian matrix-matrix rank 2k update";
+    test_zher2k.params = zher2k_params;
+    test_zher2k.output = zher2k_output;
+    test_zher2k.outchk = zher2k_outchk;
+    test_zher2k.fptr   = testing_zher2k;
+    test_zher2k.next   = NULL;
 
     testing_register( &test_zher2k );
 }
diff --git a/testing/testing_zherk.c b/testing/testing_zherk.c
index 5a23c354b864f1d0138e58ac2862ec3d1f403ee1..5162a37ce06b411348d22f414b6b46d457198ed8 100644
--- a/testing/testing_zherk.c
+++ b/testing/testing_zherk.c
@@ -26,11 +26,11 @@
 int
 testing_zherk( run_arg_list_t *args, int check )
 {
-    int          Am, An;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descC, *descCinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          P      = parameters_getvalue_int( "P" );
@@ -46,8 +46,10 @@ testing_zherk( run_arg_list_t *args, int check )
     int          seedA  = run_arg_get_int( args, "seedA", random() );
     int          seedC  = run_arg_get_int( args, "seedC", random() );
     int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zherk( K, N );
+
+    /* Descriptors */
+    int          Am, An;
+    CHAM_desc_t *descA, *descC, *descCinit;
 
     alpha = run_arg_get_double( args, "alpha", alpha );
     beta  = run_arg_get_double( args, "beta", beta );
@@ -76,12 +78,18 @@ testing_zherk( run_arg_list_t *args, int check )
     CHAMELEON_zplghe_Tile( bump, uplo, descC, seedC );
 
     /* Calculates the product */
-    START_TIMING( t );
-    hres = CHAMELEON_zherk_Tile( uplo, trans, alpha, descA, beta, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zherk_Tile_Async( uplo, trans, alpha, descA, beta, descC,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zherk_Tile( uplo, trans, alpha, descA, beta, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zherk( K, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -89,8 +97,8 @@ testing_zherk( run_arg_list_t *args, int check )
             &descCinit, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q );
         CHAMELEON_zplghe_Tile( bump, uplo, descCinit, seedC );
 
-        hres +=
-            check_zsyrk( args, ChamHermitian, uplo, trans, alpha, descA, NULL, beta, descCinit, descC );
+        hres += check_zsyrk( args, ChamHermitian, uplo, trans, alpha, descA, NULL,
+                             beta, descCinit, descC );
 
         CHAMELEON_Desc_Destroy( &descCinit );
     }
@@ -102,8 +110,8 @@ testing_zherk( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zherk;
-const char *zherk_params[] = { "mtxfmt", "nb",   "trans", "uplo",  "n",     "k",    "lda", "ldc",
-                               "alpha", "beta",  "seedA", "seedC", "bump", NULL };
+const char *zherk_params[] = { "mtxfmt", "nb",    "trans", "uplo",  "n",     "k",    "lda",
+                               "ldc",    "alpha", "beta",  "seedA", "seedC", "bump", NULL };
 const char *zherk_output[] = { NULL };
 const char *zherk_outchk[] = { "RETURN", NULL };
 
@@ -114,13 +122,13 @@ void testing_zherk_init( void ) __attribute__( ( constructor ) );
 void
 testing_zherk_init( void )
 {
-    test_zherk.name        = "zherk";
-    test_zherk.helper      = "Hermitian matrix-matrix rank k update";
-    test_zherk.params      = zherk_params;
-    test_zherk.output      = zherk_output;
-    test_zherk.outchk      = zherk_outchk;
-    test_zherk.fptr        = testing_zherk;
-    test_zherk.next        = NULL;
+    test_zherk.name   = "zherk";
+    test_zherk.helper = "Hermitian matrix-matrix rank k update";
+    test_zherk.params = zherk_params;
+    test_zherk.output = zherk_output;
+    test_zherk.outchk = zherk_outchk;
+    test_zherk.fptr   = testing_zherk;
+    test_zherk.next   = NULL;
 
     testing_register( &test_zherk );
 }
diff --git a/testing/testing_zlacpy.c b/testing/testing_zlacpy.c
index dc536e2af8173be6147c420293f8934c7c0b6682..708dc3f0800e6d4afa587642734d7970313812d5 100644
--- a/testing/testing_zlacpy.c
+++ b/testing/testing_zlacpy.c
@@ -55,10 +55,11 @@ flops_zlacpy( cham_uplo_t uplo, int M, int N )
 int
 testing_zlacpy( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descB;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -69,8 +70,9 @@ testing_zlacpy( run_arg_list_t *args, int check )
     int         LDB    = run_arg_get_int( args, "LDB", M );
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zlacpy( uplo, M, N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descB;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -86,12 +88,18 @@ testing_zlacpy( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descB, seedA + 1 );
 
     /* Makes a copy of descA to descB */
-    START_TIMING( t );
-    hres = CHAMELEON_zlacpy_Tile( uplo, descA, descB );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlacpy_Tile_Async( uplo, descA, descB,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zlacpy_Tile( uplo, descA, descB );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zlacpy( uplo, M, N ) );
 
     /* Checks their differences */
     if ( check ) {
@@ -105,7 +113,7 @@ testing_zlacpy( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zlacpy;
-const char *zlacpy_params[] = { "mtxfmt", "nb","uplo", "m", "n", "lda", "ldb", "seedA", NULL };
+const char *zlacpy_params[] = { "mtxfmt", "nb", "uplo", "m", "n", "lda", "ldb", "seedA", NULL };
 const char *zlacpy_output[] = { NULL };
 const char *zlacpy_outchk[] = { "RETURN", NULL };
 
@@ -116,13 +124,13 @@ void testing_zlacpy_init( void ) __attribute__( ( constructor ) );
 void
 testing_zlacpy_init( void )
 {
-    test_zlacpy.name        = "zlacpy";
-    test_zlacpy.helper      = "General matrix copy";
-    test_zlacpy.params      = zlacpy_params;
-    test_zlacpy.output      = zlacpy_output;
-    test_zlacpy.outchk      = zlacpy_outchk;
-    test_zlacpy.fptr        = testing_zlacpy;
-    test_zlacpy.next        = NULL;
+    test_zlacpy.name   = "zlacpy";
+    test_zlacpy.helper = "General matrix copy";
+    test_zlacpy.params = zlacpy_params;
+    test_zlacpy.output = zlacpy_output;
+    test_zlacpy.outchk = zlacpy_outchk;
+    test_zlacpy.fptr   = testing_zlacpy;
+    test_zlacpy.next   = NULL;
 
     testing_register( &test_zlacpy );
 }
diff --git a/testing/testing_zlange.c b/testing/testing_zlange.c
index 8c513e78d7aacdc63519d8bd25ff8f74d88bc941..1421cacc0b63b0721b921fecfd8982102d414842 100644
--- a/testing/testing_zlange.c
+++ b/testing/testing_zlange.c
@@ -26,7 +26,7 @@ static cham_fixdbl_t
 flops_zlange( cham_normtype_t ntype, int M, int N )
 {
     cham_fixdbl_t flops   = 0.;
-    double coefabs = 1.;
+    double        coefabs = 1.;
 #if defined( PRECISION_z ) || defined( PRECISION_c )
     coefabs = 3.;
 #endif
@@ -52,11 +52,11 @@ flops_zlange( cham_normtype_t ntype, int M, int N )
 int
 testing_zlange( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    double       norm;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int             async     = parameters_getvalue_int( "async" );
     intptr_t        mtxfmt    = parameters_getvalue_int( "mtxfmt" );
     int             nb        = run_arg_get_int( args, "nb", 320 );
     int             P         = parameters_getvalue_int( "P" );
@@ -66,8 +66,10 @@ testing_zlange( run_arg_list_t *args, int check )
     int             LDA       = run_arg_get_int( args, "LDA", M );
     int             seedA     = run_arg_get_int( args, "seedA", random() );
     int             Q         = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zlange( norm_type, M, N );
+
+    /* Descriptors */
+    double       norm;
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -79,16 +81,22 @@ testing_zlange( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Calculates the norm */
-    START_TIMING( t );
-    norm = CHAMELEON_zlange_Tile( norm_type, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( norm >= 0. ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlange_Tile_Async( norm_type, descA, &norm,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        norm = CHAMELEON_zlange_Tile( norm_type, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zlange( norm_type, M, N ) );
 
     /* Checks the solution */
     if ( check ) {
-        hres = check_znorm( args, ChamGeneral, norm_type, ChamUpperLower, ChamNonUnit, norm, descA );
+        hres = check_znorm( args, ChamGeneral, norm_type, ChamUpperLower,
+                            ChamNonUnit, norm, descA );
     }
 
     CHAMELEON_Desc_Destroy( &descA );
@@ -97,7 +105,7 @@ testing_zlange( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zlange;
-const char *zlange_params[] = { "mtxfmt", "nb","norm", "m", "n", "lda", "seedA", NULL };
+const char *zlange_params[] = { "mtxfmt", "nb", "norm", "m", "n", "lda", "seedA", NULL };
 const char *zlange_output[] = { NULL };
 const char *zlange_outchk[] = { "RETURN", NULL };
 
@@ -108,13 +116,13 @@ void testing_zlange_init( void ) __attribute__( ( constructor ) );
 void
 testing_zlange_init( void )
 {
-    test_zlange.name        = "zlange";
-    test_zlange.helper      = "General matrix norm";
-    test_zlange.params      = zlange_params;
-    test_zlange.output      = zlange_output;
-    test_zlange.outchk      = zlange_outchk;
-    test_zlange.fptr        = testing_zlange;
-    test_zlange.next        = NULL;
+    test_zlange.name   = "zlange";
+    test_zlange.helper = "General matrix norm";
+    test_zlange.params = zlange_params;
+    test_zlange.output = zlange_output;
+    test_zlange.outchk = zlange_outchk;
+    test_zlange.fptr   = testing_zlange;
+    test_zlange.next   = NULL;
 
     testing_register( &test_zlange );
 }
diff --git a/testing/testing_zlanhe.c b/testing/testing_zlanhe.c
index 3c04a83f68146e3214c38f7798e542556451efde..7a3cb2bbcac798896d5298dd4f36b611cdc0bf4c 100644
--- a/testing/testing_zlanhe.c
+++ b/testing/testing_zlanhe.c
@@ -25,7 +25,7 @@ static cham_fixdbl_t
 flops_zlanhe( cham_normtype_t ntype, int N )
 {
     cham_fixdbl_t flops   = 0.;
-    double coefabs = 1.;
+    double        coefabs = 1.;
 #if defined( PRECISION_z ) || defined( PRECISION_c )
     coefabs = 3.;
 #endif
@@ -49,11 +49,11 @@ flops_zlanhe( cham_normtype_t ntype, int N )
 int
 testing_zlanhe( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    double       norm;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int             async     = parameters_getvalue_int( "async" );
     intptr_t        mtxfmt    = parameters_getvalue_int( "mtxfmt" );
     int             nb        = run_arg_get_int( args, "nb", 320 );
     int             P         = parameters_getvalue_int( "P" );
@@ -64,8 +64,10 @@ testing_zlanhe( run_arg_list_t *args, int check )
     int             seedA     = run_arg_get_int( args, "seedA", random() );
     double          bump      = testing_dalea();
     int             Q         = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zlanhe( norm_type, N );
+
+    /* Descriptors */
+    double       norm;
+    CHAM_desc_t *descA;
 
     bump = run_arg_get_double( args, "bump", bump );
 
@@ -79,12 +81,17 @@ testing_zlanhe( run_arg_list_t *args, int check )
     CHAMELEON_zplghe_Tile( bump, uplo, descA, seedA );
 
     /* Calculates the norm */
-    START_TIMING( t );
-    norm = CHAMELEON_zlanhe_Tile( norm_type, uplo, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( norm >= 0. ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlanhe_Tile_Async( norm_type, uplo, descA, &norm,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        norm = CHAMELEON_zlanhe_Tile( norm_type, uplo, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zlanhe( norm_type, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -97,7 +104,7 @@ testing_zlanhe( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zlanhe;
-const char *zlanhe_params[] = { "mtxfmt", "nb","norm", "uplo", "n", "lda", "seedA", "bump", NULL };
+const char *zlanhe_params[] = { "mtxfmt", "nb", "norm", "uplo", "n", "lda", "seedA", "bump", NULL };
 const char *zlanhe_output[] = { NULL };
 const char *zlanhe_outchk[] = { "RETURN", NULL };
 
@@ -108,13 +115,13 @@ void testing_zlanhe_init( void ) __attribute__( ( constructor ) );
 void
 testing_zlanhe_init( void )
 {
-    test_zlanhe.name        = "zlanhe";
-    test_zlanhe.helper      = "Hermitian matrix norm";
-    test_zlanhe.params      = zlanhe_params;
-    test_zlanhe.output      = zlanhe_output;
-    test_zlanhe.outchk      = zlanhe_outchk;
-    test_zlanhe.fptr        = testing_zlanhe;
-    test_zlanhe.next        = NULL;
+    test_zlanhe.name   = "zlanhe";
+    test_zlanhe.helper = "Hermitian matrix norm";
+    test_zlanhe.params = zlanhe_params;
+    test_zlanhe.output = zlanhe_output;
+    test_zlanhe.outchk = zlanhe_outchk;
+    test_zlanhe.fptr   = testing_zlanhe;
+    test_zlanhe.next   = NULL;
 
     testing_register( &test_zlanhe );
 }
diff --git a/testing/testing_zlansy.c b/testing/testing_zlansy.c
index 637aa22e9fe4d4ecb322b92276281059a4756482..651092e54bdb3e6aa7806a17693460db207ad7f9 100644
--- a/testing/testing_zlansy.c
+++ b/testing/testing_zlansy.c
@@ -25,7 +25,7 @@ static cham_fixdbl_t
 flops_zlansy( cham_normtype_t ntype, int N )
 {
     cham_fixdbl_t flops   = 0.;
-    double coefabs = 1.;
+    double        coefabs = 1.;
 #if defined( PRECISION_z ) || defined( PRECISION_c )
     coefabs = 3.;
 #endif
@@ -49,11 +49,11 @@ flops_zlansy( cham_normtype_t ntype, int N )
 int
 testing_zlansy( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    double       norm;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int                   async     = parameters_getvalue_int( "async" );
     intptr_t              mtxfmt    = parameters_getvalue_int( "mtxfmt" );
     int                   nb        = run_arg_get_int( args, "nb", 320 );
     int                   P         = parameters_getvalue_int( "P" );
@@ -64,8 +64,10 @@ testing_zlansy( run_arg_list_t *args, int check )
     int                   seedA     = run_arg_get_int( args, "seedA", random() );
     CHAMELEON_Complex64_t bump      = testing_zalea();
     int                   Q         = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zlansy( norm_type, N );
+
+    /* Descriptors */
+    double       norm;
+    CHAM_desc_t *descA;
 
     bump = run_arg_get_complex64( args, "bump", bump );
 
@@ -79,12 +81,17 @@ testing_zlansy( run_arg_list_t *args, int check )
     CHAMELEON_zplgsy_Tile( bump, uplo, descA, seedA );
 
     /* Calculates the norm */
-    START_TIMING( t );
-    norm = CHAMELEON_zlansy_Tile( norm_type, uplo, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( norm >= 0. ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlansy_Tile_Async( norm_type, uplo, descA, &norm,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        norm = CHAMELEON_zlansy_Tile( norm_type, uplo, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zlansy( norm_type, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -97,7 +104,7 @@ testing_zlansy( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zlansy;
-const char *zlansy_params[] = { "mtxfmt", "nb","norm", "uplo", "n", "lda", "seedA", "bump", NULL };
+const char *zlansy_params[] = { "mtxfmt", "nb", "norm", "uplo", "n", "lda", "seedA", "bump", NULL };
 const char *zlansy_output[] = { NULL };
 const char *zlansy_outchk[] = { "RETURN", NULL };
 
@@ -108,13 +115,13 @@ void testing_zlansy_init( void ) __attribute__( ( constructor ) );
 void
 testing_zlansy_init( void )
 {
-    test_zlansy.name        = "zlansy";
-    test_zlansy.helper      = "Symmetric matrix norm";
-    test_zlansy.params      = zlansy_params;
-    test_zlansy.output      = zlansy_output;
-    test_zlansy.outchk      = zlansy_outchk;
-    test_zlansy.fptr        = testing_zlansy;
-    test_zlansy.next        = NULL;
+    test_zlansy.name   = "zlansy";
+    test_zlansy.helper = "Symmetric matrix norm";
+    test_zlansy.params = zlansy_params;
+    test_zlansy.output = zlansy_output;
+    test_zlansy.outchk = zlansy_outchk;
+    test_zlansy.fptr   = testing_zlansy;
+    test_zlansy.next   = NULL;
 
     testing_register( &test_zlansy );
 }
diff --git a/testing/testing_zlantr.c b/testing/testing_zlantr.c
index 7f23b987436c62c9a6f611ea2917012b39cfb484..a7690f9e0df550c86b79469bdb91bb2e7fe43d53 100644
--- a/testing/testing_zlantr.c
+++ b/testing/testing_zlantr.c
@@ -24,8 +24,8 @@
 static cham_fixdbl_t
 flops_zlantr( cham_normtype_t ntype, cham_uplo_t uplo, int M, int N )
 {
-    cham_fixdbl_t flops = 0.;
-    double coefabs = 1.;
+    cham_fixdbl_t flops   = 0.;
+    double        coefabs = 1.;
 #if defined( PRECISION_z ) || defined( PRECISION_c )
     coefabs = 3.;
 #endif
@@ -70,11 +70,11 @@ flops_zlantr( cham_normtype_t ntype, cham_uplo_t uplo, int M, int N )
 int
 testing_zlantr( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    double       norm;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int             async     = parameters_getvalue_int( "async" );
     intptr_t        mtxfmt    = parameters_getvalue_int( "mtxfmt" );
     int             nb        = run_arg_get_int( args, "nb", 320 );
     int             P         = parameters_getvalue_int( "P" );
@@ -86,8 +86,10 @@ testing_zlantr( run_arg_list_t *args, int check )
     int             LDA       = run_arg_get_int( args, "LDA", M );
     int             seedA     = run_arg_get_int( args, "seedA", random() );
     int             Q         = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zlantr( norm_type, uplo, M, N );
+
+    /* Descriptors */
+    double       norm;
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -99,12 +101,17 @@ testing_zlantr( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Calculates the norm */
-    START_TIMING( t );
-    norm = CHAMELEON_zlantr_Tile( norm_type, uplo, diag, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( norm >= 0. ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlantr_Tile_Async( norm_type, uplo, diag, descA, &norm,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        norm = CHAMELEON_zlantr_Tile( norm_type, uplo, diag, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zlantr( norm_type, uplo, M, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -117,7 +124,8 @@ testing_zlantr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zlantr;
-const char *zlantr_params[] = { "mtxfmt", "nb","norm", "uplo", "diag", "m", "n", "lda", "seedA", NULL };
+const char *zlantr_params[] = { "mtxfmt", "nb", "norm", "uplo",  "diag",
+                                "m",      "n",  "lda",  "seedA", NULL };
 const char *zlantr_output[] = { NULL };
 const char *zlantr_outchk[] = { "RETURN", NULL };
 
@@ -128,13 +136,13 @@ void testing_zlantr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zlantr_init( void )
 {
-    test_zlantr.name        = "zlantr";
-    test_zlantr.helper      = "Triangular matrix norm";
-    test_zlantr.params      = zlantr_params;
-    test_zlantr.output      = zlantr_output;
-    test_zlantr.outchk      = zlantr_outchk;
-    test_zlantr.fptr        = testing_zlantr;
-    test_zlantr.next        = NULL;
+    test_zlantr.name   = "zlantr";
+    test_zlantr.helper = "Triangular matrix norm";
+    test_zlantr.params = zlantr_params;
+    test_zlantr.output = zlantr_output;
+    test_zlantr.outchk = zlantr_outchk;
+    test_zlantr.fptr   = testing_zlantr;
+    test_zlantr.next   = NULL;
 
     testing_register( &test_zlantr );
 }
diff --git a/testing/testing_zlascal.c b/testing/testing_zlascal.c
index 14a85d957305b4c0bac641bd3bea6972e2d6324a..0c1f6248990fe39ad065dcf4cbdfc77ea4ee492a 100644
--- a/testing/testing_zlascal.c
+++ b/testing/testing_zlascal.c
@@ -25,7 +25,7 @@ static cham_fixdbl_t
 flops_zlascal( cham_uplo_t uplo, int M, int N )
 {
     cham_fixdbl_t flops = 0.;
-    int    minMN = chameleon_min( M, N );
+    int           minMN = chameleon_min( M, N );
     switch ( uplo ) {
         case ChamUpper:
             flops = ( minMN * ( minMN + 1 ) / 2 ) + M * chameleon_max( 0, N - M );
@@ -49,10 +49,11 @@ flops_zlascal( cham_uplo_t uplo, int M, int N )
 int
 testing_zlascal( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descAinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int                   async  = parameters_getvalue_int( "async" );
     intptr_t              mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int                   nb     = run_arg_get_int( args, "nb", 320 );
     int                   P      = parameters_getvalue_int( "P" );
@@ -63,8 +64,9 @@ testing_zlascal( run_arg_list_t *args, int check )
     CHAMELEON_Complex64_t alpha  = run_arg_get_complex64( args, "alpha", 1. );
     int                   seedA  = run_arg_get_int( args, "seedA", random() );
     int                   Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zlascal( uplo, M, N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descAinit;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -76,12 +78,17 @@ testing_zlascal( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descA, seedA );
 
     /* Scales the matrix */
-    START_TIMING( t );
-    hres = CHAMELEON_zlascal_Tile( uplo, alpha, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlascal_Tile_Async( uplo, alpha, descA,
+                                             test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zlascal_Tile( uplo, alpha, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zlascal( uplo, M, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -100,7 +107,7 @@ testing_zlascal( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zlascal;
-const char *zlascal_params[] = { "mtxfmt", "nb","uplo", "m", "n", "lda", "alpha", "seedA", NULL };
+const char *zlascal_params[] = { "mtxfmt", "nb", "uplo", "m", "n", "lda", "alpha", "seedA", NULL };
 const char *zlascal_output[] = { NULL };
 const char *zlascal_outchk[] = { "RETURN", NULL };
 
@@ -111,13 +118,13 @@ void testing_zlascal_init( void ) __attribute__( ( constructor ) );
 void
 testing_zlascal_init( void )
 {
-    test_zlascal.name        = "zlascal";
-    test_zlascal.helper      = "General matrix scaling";
-    test_zlascal.params      = zlascal_params;
-    test_zlascal.output      = zlascal_output;
-    test_zlascal.outchk      = zlascal_outchk;
-    test_zlascal.fptr        = testing_zlascal;
-    test_zlascal.next        = NULL;
+    test_zlascal.name   = "zlascal";
+    test_zlascal.helper = "General matrix scaling";
+    test_zlascal.params = zlascal_params;
+    test_zlascal.output = zlascal_output;
+    test_zlascal.outchk = zlascal_outchk;
+    test_zlascal.fptr   = testing_zlascal;
+    test_zlascal.next   = NULL;
 
     testing_register( &test_zlascal );
 }
diff --git a/testing/testing_zlauum.c b/testing/testing_zlauum.c
index 6594e488746facd3dd92daa3cf467eb499f0b823..10eb91965510fa299bdfcc0ae4a72492b309a3c9 100644
--- a/testing/testing_zlauum.c
+++ b/testing/testing_zlauum.c
@@ -31,10 +31,11 @@ flops_zlauum( int N )
 int
 testing_zlauum( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -43,8 +44,9 @@ testing_zlauum( run_arg_list_t *args, int check )
     int         LDA    = run_arg_get_int( args, "LDA", N );
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zlauum( N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -56,12 +58,16 @@ testing_zlauum( run_arg_list_t *args, int check )
     CHAMELEON_zplghe_Tile( 0., uplo, descA, seedA );
 
     /* Calculates the matrix product */
-    START_TIMING( t );
-    hres = CHAMELEON_zlauum_Tile( uplo, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zlauum_Tile_Async( uplo, descA, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zlauum_Tile( uplo, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zlauum( N ) );
 
     if ( check ) {
         CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL );
@@ -78,7 +84,7 @@ testing_zlauum( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zlauum;
-const char *zlauum_params[] = { "mtxfmt", "nb","uplo", "n", "lda", "seedA", NULL };
+const char *zlauum_params[] = { "mtxfmt", "nb", "uplo", "n", "lda", "seedA", NULL };
 const char *zlauum_output[] = { NULL };
 const char *zlauum_outchk[] = { "RETURN", NULL };
 
@@ -89,13 +95,13 @@ void testing_zlauum_init( void ) __attribute__( ( constructor ) );
 void
 testing_zlauum_init( void )
 {
-    test_zlauum.name        = "zlauum";
-    test_zlauum.helper      = "Trianguilar in-place matrix-matrix computation for Cholesky inversion";
-    test_zlauum.params      = zlauum_params;
-    test_zlauum.output      = zlauum_output;
-    test_zlauum.outchk      = zlauum_outchk;
-    test_zlauum.fptr        = testing_zlauum;
-    test_zlauum.next        = NULL;
+    test_zlauum.name   = "zlauum";
+    test_zlauum.helper = "Trianguilar in-place matrix-matrix computation for Cholesky inversion";
+    test_zlauum.params = zlauum_params;
+    test_zlauum.output = zlauum_output;
+    test_zlauum.outchk = zlauum_outchk;
+    test_zlauum.fptr   = testing_zlauum;
+    test_zlauum.next   = NULL;
 
     testing_register( &test_zlauum );
 }
diff --git a/testing/testing_zplrnk.c b/testing/testing_zplrnk.c
index 0923f9d7c3aee83b457f21da813186be87d38680..6563fc90a9a033a689d073c91cdba22f528f8caf 100644
--- a/testing/testing_zplrnk.c
+++ b/testing/testing_zplrnk.c
@@ -25,23 +25,23 @@
 int
 testing_zplrnk( run_arg_list_t *args, int check )
 {
-    static int   run_id = 0;
-    int          hres   = 0;
-    CHAM_desc_t *descC;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
+
+    /* Read arguments */
+    int async = parameters_getvalue_int( "async" );
+    int nb    = run_arg_get_int( args, "nb", 320 );
+    int P     = parameters_getvalue_int( "P" );
+    int N     = run_arg_get_int( args, "N", 1000 );
+    int M     = run_arg_get_int( args, "M", N );
+    int K     = run_arg_get_int( args, "K", N );
+    int LDC   = run_arg_get_int( args, "LDC", M );
+    int seedA = run_arg_get_int( args, "seedA", random() );
+    int seedB = run_arg_get_int( args, "seedB", random() );
+    int Q     = parameters_compute_q( P );
 
-    /* Reads arguments */
-    int             nb    = run_arg_get_int( args, "nb", 320 );
-    int             P     = parameters_getvalue_int( "P" );
-    int             N     = run_arg_get_int( args, "N", 1000 );
-    int             M     = run_arg_get_int( args, "M", N );
-    int             K     = run_arg_get_int( args, "K", N );
-    int             LDC   = run_arg_get_int( args, "LDC", M );
-    int             seedA = run_arg_get_int( args, "seedA", random() );
-    int             seedB = run_arg_get_int( args, "seedB", random() );
-    int             Q     = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    /* We consider the gemm cost used in this operation as the cost */
-    cham_fixdbl_t flops = flops_zgemm( M, N, K );
+    /* Descriptors */
+    CHAM_desc_t *descC;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -50,12 +50,17 @@ testing_zplrnk( run_arg_list_t *args, int check )
         &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q );
 
     /* Calculates the random rank-k matrix */
-    START_TIMING( t );
-    hres = CHAMELEON_zplrnk_Tile( K, descC, seedA, seedB );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zplrnk_Tile_Async( K, descC, seedA, seedB,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zplrnk_Tile( K, descC, seedA, seedB );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zgemm( M, N, K ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -64,7 +69,6 @@ testing_zplrnk( run_arg_list_t *args, int check )
 
     CHAMELEON_Desc_Destroy( &descC );
 
-    run_id++;
     return hres;
 }
 
@@ -80,13 +84,13 @@ void testing_zplrnk_init( void ) __attribute__( ( constructor ) );
 void
 testing_zplrnk_init( void )
 {
-    test_zplrnk.name        = "zplrnk";
-    test_zplrnk.helper      = "General rank-k matrix generation";
-    test_zplrnk.params      = zplrnk_params;
-    test_zplrnk.output      = zplrnk_output;
-    test_zplrnk.outchk      = zplrnk_outchk;
-    test_zplrnk.fptr        = testing_zplrnk;
-    test_zplrnk.next        = NULL;
+    test_zplrnk.name   = "zplrnk";
+    test_zplrnk.helper = "General rank-k matrix generation";
+    test_zplrnk.params = zplrnk_params;
+    test_zplrnk.output = zplrnk_output;
+    test_zplrnk.outchk = zplrnk_outchk;
+    test_zplrnk.fptr   = testing_zplrnk;
+    test_zplrnk.next   = NULL;
 
     testing_register( &test_zplrnk );
 }
diff --git a/testing/testing_zposv.c b/testing/testing_zposv.c
index a4b30901d737c927e7aa70cbb8d4cc213960f6c7..581eecc5380ac3f6bcf6d07d71031b8b3079c6a1 100644
--- a/testing/testing_zposv.c
+++ b/testing/testing_zposv.c
@@ -32,10 +32,11 @@ flops_zposv( int N, int NRHS )
 int
 testing_zposv( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descX;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -47,8 +48,9 @@ testing_zposv( run_arg_list_t *args, int check )
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         seedB  = run_arg_get_int( args, "seedB", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zposv( N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -63,12 +65,18 @@ testing_zposv( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descX, seedB );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zposv_Tile( uplo, descA, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zposv_Tile_Async( uplo, descA, descX,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zposv_Tile( uplo, descA, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zposv( N, NRHS ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -98,7 +106,8 @@ testing_zposv( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zposv;
-const char *zposv_params[] = { "mtxfmt", "nb","uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+const char *zposv_params[] = { "mtxfmt", "nb",  "uplo",  "n",     "nrhs",
+                               "lda",    "ldb", "seedA", "seedB", NULL };
 const char *zposv_output[] = { NULL };
 const char *zposv_outchk[] = { "RETURN", NULL };
 
@@ -109,13 +118,13 @@ void testing_zposv_init( void ) __attribute__( ( constructor ) );
 void
 testing_zposv_init( void )
 {
-    test_zposv.name        = "zposv";
-    test_zposv.helper      = "Hermitian positive definite linear system solve (Cholesky)";
-    test_zposv.params      = zposv_params;
-    test_zposv.output      = zposv_output;
-    test_zposv.outchk      = zposv_outchk;
-    test_zposv.fptr        = testing_zposv;
-    test_zposv.next        = NULL;
+    test_zposv.name   = "zposv";
+    test_zposv.helper = "Hermitian positive definite linear system solve (Cholesky)";
+    test_zposv.params = zposv_params;
+    test_zposv.output = zposv_output;
+    test_zposv.outchk = zposv_outchk;
+    test_zposv.fptr   = testing_zposv;
+    test_zposv.next   = NULL;
 
     testing_register( &test_zposv );
 }
diff --git a/testing/testing_zpotrf.c b/testing/testing_zpotrf.c
index 6546d2a9443e87b30bc15fc199341bfd65440fea..92caa245db6cf82f21dbca12a54ec99989ccbcc8 100644
--- a/testing/testing_zpotrf.c
+++ b/testing/testing_zpotrf.c
@@ -24,10 +24,11 @@
 int
 testing_zpotrf( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -36,8 +37,9 @@ testing_zpotrf( run_arg_list_t *args, int check )
     int         LDA    = run_arg_get_int( args, "LDA", N );
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zpotrf( N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -49,12 +51,17 @@ testing_zpotrf( run_arg_list_t *args, int check )
     CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zpotrf_Tile( uplo, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zpotrf_Tile_Async( uplo, descA,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zpotrf_Tile( uplo, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zpotrf( N ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -72,7 +79,7 @@ testing_zpotrf( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zpotrf;
-const char *zpotrf_params[] = { "mtxfmt", "nb","uplo", "n", "lda", "seedA", NULL };
+const char *zpotrf_params[] = { "mtxfmt", "nb", "uplo", "n", "lda", "seedA", NULL };
 const char *zpotrf_output[] = { NULL };
 const char *zpotrf_outchk[] = { "RETURN", NULL };
 
@@ -83,13 +90,13 @@ void testing_zpotrf_init( void ) __attribute__( ( constructor ) );
 void
 testing_zpotrf_init( void )
 {
-    test_zpotrf.name        = "zpotrf";
-    test_zpotrf.helper      = "Hermitian positive definite factorization (Cholesky)";
-    test_zpotrf.params      = zpotrf_params;
-    test_zpotrf.output      = zpotrf_output;
-    test_zpotrf.outchk      = zpotrf_outchk;
-    test_zpotrf.fptr        = testing_zpotrf;
-    test_zpotrf.next        = NULL;
+    test_zpotrf.name   = "zpotrf";
+    test_zpotrf.helper = "Hermitian positive definite factorization (Cholesky)";
+    test_zpotrf.params = zpotrf_params;
+    test_zpotrf.output = zpotrf_output;
+    test_zpotrf.outchk = zpotrf_outchk;
+    test_zpotrf.fptr   = testing_zpotrf;
+    test_zpotrf.next   = NULL;
 
     testing_register( &test_zpotrf );
 }
diff --git a/testing/testing_zpotri.c b/testing/testing_zpotri.c
index 481612e9395e29e982aa5dea18a6cd3b799c21a0..d2d735bb2a6945e8d4f11c46933c7967f979d09f 100644
--- a/testing/testing_zpotri.c
+++ b/testing/testing_zpotri.c
@@ -26,10 +26,11 @@
 int
 testing_zpotri( run_arg_list_t *args, int check )
 {
-    int          hres;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -38,8 +39,9 @@ testing_zpotri( run_arg_list_t *args, int check )
     int         LDA    = run_arg_get_int( args, "LDA", N );
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zpotri( N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -54,13 +56,16 @@ testing_zpotri( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Calculates the inversed matrix */
-    START_TIMING( t );
-    hres += CHAMELEON_zpotri_Tile( uplo, descA );
-    STOP_TIMING( t );
-
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zpotri_Tile_Async( uplo, descA, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zpotri_Tile( uplo, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zpotri( N ) );
 
     /* Check the inverse */
     if ( check ) {
@@ -78,7 +83,7 @@ testing_zpotri( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zpotri;
-const char *zpotri_params[] = { "mtxfmt", "nb","uplo", "n", "lda", "seedA", NULL };
+const char *zpotri_params[] = { "mtxfmt", "nb", "uplo", "n", "lda", "seedA", NULL };
 const char *zpotri_output[] = { NULL };
 const char *zpotri_outchk[] = { "RETURN", NULL };
 
@@ -89,13 +94,13 @@ void testing_zpotri_init( void ) __attribute__( ( constructor ) );
 void
 testing_zpotri_init( void )
 {
-    test_zpotri.name        = "zpotri";
-    test_zpotri.helper      = "Hermitian positive definite matrix inversion";
-    test_zpotri.params      = zpotri_params;
-    test_zpotri.output      = zpotri_output;
-    test_zpotri.outchk      = zpotri_outchk;
-    test_zpotri.fptr        = testing_zpotri;
-    test_zpotri.next        = NULL;
+    test_zpotri.name   = "zpotri";
+    test_zpotri.helper = "Hermitian positive definite matrix inversion";
+    test_zpotri.params = zpotri_params;
+    test_zpotri.output = zpotri_output;
+    test_zpotri.outchk = zpotri_outchk;
+    test_zpotri.fptr   = testing_zpotri;
+    test_zpotri.next   = NULL;
 
     testing_register( &test_zpotri );
 }
diff --git a/testing/testing_zpotrs.c b/testing/testing_zpotrs.c
index 4dc97b670bf9e8425409a3002710746708a71b67..de80992a7137fcb8e4037148b95d43d8066f9899 100644
--- a/testing/testing_zpotrs.c
+++ b/testing/testing_zpotrs.c
@@ -25,10 +25,11 @@
 int
 testing_zpotrs( run_arg_list_t *args, int check )
 {
-    int          hres;
-    CHAM_desc_t *descA, *descX;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -40,8 +41,9 @@ testing_zpotrs( run_arg_list_t *args, int check )
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         seedB  = run_arg_get_int( args, "seedB", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zpotrs( N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -59,12 +61,18 @@ testing_zpotrs( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres += CHAMELEON_zpotrs_Tile( uplo, descA, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zpotrs_Tile_Async( uplo, descA, descX,
+                                             test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zpotrs_Tile( uplo, descA, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zpotrs( N, NRHS ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -87,7 +95,8 @@ testing_zpotrs( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zpotrs;
-const char *zpotrs_params[] = { "mtxfmt", "nb","uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+const char *zpotrs_params[] = { "mtxfmt", "nb",  "uplo",  "n",     "nrhs",
+                                "lda",    "ldb", "seedA", "seedB", NULL };
 const char *zpotrs_output[] = { NULL };
 const char *zpotrs_outchk[] = { "RETURN", NULL };
 
@@ -98,13 +107,13 @@ void testing_zpotrs_init( void ) __attribute__( ( constructor ) );
 void
 testing_zpotrs_init( void )
 {
-    test_zpotrs.name        = "zpotrs";
-    test_zpotrs.helper      = "Hermitian positive definite solve (Cholesky)";
-    test_zpotrs.params      = zpotrs_params;
-    test_zpotrs.output      = zpotrs_output;
-    test_zpotrs.outchk      = zpotrs_outchk;
-    test_zpotrs.fptr        = testing_zpotrs;
-    test_zpotrs.next        = NULL;
+    test_zpotrs.name   = "zpotrs";
+    test_zpotrs.helper = "Hermitian positive definite solve (Cholesky)";
+    test_zpotrs.params = zpotrs_params;
+    test_zpotrs.output = zpotrs_output;
+    test_zpotrs.outchk = zpotrs_outchk;
+    test_zpotrs.fptr   = testing_zpotrs;
+    test_zpotrs.next   = NULL;
 
     testing_register( &test_zpotrs );
 }
diff --git a/testing/testing_zprint.c b/testing/testing_zprint.c
index 1db0422641d87723b0bd15dc75619e79f3cf1b53..ae489df40c54470faa418ace53693d108dccc8b8 100644
--- a/testing/testing_zprint.c
+++ b/testing/testing_zprint.c
@@ -24,24 +24,27 @@
 /**
  *  Internal function to return address of block (m,n) with m,n = block indices
  */
-inline static void *chameleon_getaddr_cm(const CHAM_desc_t *A, int m, int n)
+inline static void *
+chameleon_getaddr_cm( const CHAM_desc_t *A, int m, int n )
 {
-    size_t mm = m + A->i / A->mb;
-    size_t nn = n + A->j / A->nb;
-    size_t eltsize = CHAMELEON_Element_Size(A->dtyp);
-    size_t offset = 0;
+    size_t mm      = m + A->i / A->mb;
+    size_t nn      = n + A->j / A->nb;
+    size_t eltsize = CHAMELEON_Element_Size( A->dtyp );
+    size_t offset  = 0;
 
 #if defined(CHAMELEON_USE_MPI)
-    assert( A->myrank == A->get_rankof( A, mm, nn) );
+    assert( A->myrank == A->get_rankof( A, mm, nn ) );
     mm = mm / A->p;
     nn = nn / A->q;
 #endif
 
-    offset = (size_t)(A->llm * A->nb) * nn + (size_t)(A->mb) * mm;
-    return (void*)((intptr_t)A->mat + (offset*eltsize) );
+    offset = (size_t)( A->llm * A->nb ) * nn + (size_t)( A->mb ) * mm;
+    return (void *)( (intptr_t)A->mat + ( offset * eltsize ) );
 }
 
-inline static int chameleon_getblkldd_cm(const CHAM_desc_t *A, int m) {
+inline static int
+chameleon_getblkldd_cm( const CHAM_desc_t *A, int m )
+{
     (void)m;
     return A->llm;
 }
@@ -49,23 +52,24 @@ inline static int chameleon_getblkldd_cm(const CHAM_desc_t *A, int m) {
 int
 testing_zprint( run_arg_list_t *args, int check )
 {
-    int          hres = 0;
+    int hres = 0;
+
+    /* Read arguments */
+    intptr_t mtxfmt    = parameters_getvalue_int( "mtxfmt" );
+    int      nb        = run_arg_get_int( args, "nb", 320 );
+    int      P         = parameters_getvalue_int( "P" );
+    int      N         = run_arg_get_int( args, "N", 1000 );
+    int      M         = run_arg_get_int( args, "M", N );
+    int      LDA       = run_arg_get_int( args, "LDA", M );
+    int      l1        = run_arg_get_int( args, "l1", nb / 2 );
+    int      l2        = run_arg_get_int( args, "l2", l1 / 3 );
+    int      l3        = run_arg_get_int( args, "l3", l2 / 2 );
+    int      Q         = parameters_compute_q( P );
+    int      list_nb[] = { nb, l1, l2, l3, 0 };
+
+    /* Descriptors */
     CHAM_desc_t *descA;
 
-    /* Reads arguments */
-    intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
-    int         nb     = run_arg_get_int( args, "nb", 320 );
-    int         P      = parameters_getvalue_int( "P" );
-    int         N      = run_arg_get_int( args, "N", 1000 );
-    int         M      = run_arg_get_int( args, "M", N );
-    int         LDA    = run_arg_get_int( args, "LDA", M );
-    int         l1    = run_arg_get_int( args, "l1", nb / 2 );
-    int         l2    = run_arg_get_int( args, "l2", l1 / 3 );
-    int         l3    = run_arg_get_int( args, "l3", l2 / 2 );
-    int         Q      = parameters_compute_q( P );
-
-    int list_nb[] = { nb, l1, l2, l3, 0 };
-
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
     fprintf( stdout, "--- Tile layout ---\n" );
@@ -120,13 +124,13 @@ void testing_zprint_init( void ) __attribute__( ( constructor ) );
 void
 testing_zprint_init( void )
 {
-    test_zprint.name        = "zprint";
-    test_zprint.helper      = "Print descriptors";
-    test_zprint.params      = zprint_params;
-    test_zprint.output      = zprint_output;
-    test_zprint.outchk      = zprint_outchk;
-    test_zprint.fptr        = testing_zprint;
-    test_zprint.next        = NULL;
+    test_zprint.name   = "zprint";
+    test_zprint.helper = "Print descriptors";
+    test_zprint.params = zprint_params;
+    test_zprint.output = zprint_output;
+    test_zprint.outchk = zprint_outchk;
+    test_zprint.fptr   = testing_zprint;
+    test_zprint.next   = NULL;
 
     testing_register( &test_zprint );
 }
diff --git a/testing/testing_zsymm.c b/testing/testing_zsymm.c
index 10100aabe503a6c17e738c1e171e2aeed31806c2..c37aec490c3a041d7b440f25dfab4b7486bb99fe 100644
--- a/testing/testing_zsymm.c
+++ b/testing/testing_zsymm.c
@@ -25,11 +25,11 @@
 int
 testing_zsymm( run_arg_list_t *args, int check )
 {
-    int          Am;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int                   async  = parameters_getvalue_int( "async" );
     intptr_t              mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int                   nb     = run_arg_get_int( args, "nb", 320 );
     int                   P      = parameters_getvalue_int( "P" );
@@ -46,11 +46,13 @@ testing_zsymm( run_arg_list_t *args, int check )
     int                   seedB  = run_arg_get_int( args, "seedB", random() );
     int                   seedC  = run_arg_get_int( args, "seedC", random() );
     double                bump   = testing_dalea();
-    bump                         = run_arg_get_double( args, "bump", bump );
     int                   Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zsymm( side, M, N );
 
+    /* Descriptors */
+    int          Am;
+    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+
+    bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
 
@@ -78,12 +80,19 @@ testing_zsymm( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descC, seedC );
 
     /* Calculates the product */
-    START_TIMING( t );
-    hres = CHAMELEON_zsymm_Tile( side, uplo, alpha, descA, descB, beta, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zsymm_Tile_Async( side, uplo, alpha, descA, descB, beta, descC,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zsymm_Tile( side, uplo, alpha, descA, descB, beta, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zsymm( side, M, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -105,8 +114,8 @@ testing_zsymm( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zsymm;
-const char *zsymm_params[] = { "mtxfmt", "nb",   "side", "uplo",  "m",     "n",     "lda",  "ldb", "ldc",
-                               "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL };
+const char *zsymm_params[] = { "mtxfmt", "nb",    "side", "uplo",  "m",     "n",     "lda",  "ldb",
+                               "ldc",    "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL };
 const char *zsymm_output[] = { NULL };
 const char *zsymm_outchk[] = { "RETURN", NULL };
 
@@ -117,13 +126,13 @@ void testing_zsymm_init( void ) __attribute__( ( constructor ) );
 void
 testing_zsymm_init( void )
 {
-    test_zsymm.name        = "zsymm";
-    test_zsymm.helper      = "Symmetric matrix-matrix multiply";
-    test_zsymm.params      = zsymm_params;
-    test_zsymm.output      = zsymm_output;
-    test_zsymm.outchk      = zsymm_outchk;
-    test_zsymm.fptr        = testing_zsymm;
-    test_zsymm.next        = NULL;
+    test_zsymm.name   = "zsymm";
+    test_zsymm.helper = "Symmetric matrix-matrix multiply";
+    test_zsymm.params = zsymm_params;
+    test_zsymm.output = zsymm_output;
+    test_zsymm.outchk = zsymm_outchk;
+    test_zsymm.fptr   = testing_zsymm;
+    test_zsymm.next   = NULL;
 
     testing_register( &test_zsymm );
 }
diff --git a/testing/testing_zsyr2k.c b/testing/testing_zsyr2k.c
index cbfeb3fc8467afc5b3cba3985f4e9c60b7705848..5ad295e9d1e22740c4df40f6fd3fc2b5499914d3 100644
--- a/testing/testing_zsyr2k.c
+++ b/testing/testing_zsyr2k.c
@@ -25,11 +25,11 @@
 int
 testing_zsyr2k( run_arg_list_t *args, int check )
 {
-    int          Am, An;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descB, *descC, *descCinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
     /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          P      = parameters_getvalue_int( "P" );
@@ -46,11 +46,13 @@ testing_zsyr2k( run_arg_list_t *args, int check )
     int                   seedB = run_arg_get_int( args, "seedB", random() );
     int                   seedC = run_arg_get_int( args, "seedC", random() );
     double                bump  = testing_dalea();
-    bump                        = run_arg_get_double( args, "bump", bump );
-    int    Q                    = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zher2k( K, N );
+    int                   Q     = parameters_compute_q( P );
+
+    /* Descriptors */
+    int          Am, An;
+    CHAM_desc_t *descA, *descB, *descC, *descCinit;
 
+    bump  = run_arg_get_double( args, "bump", bump );
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
 
@@ -80,12 +82,19 @@ testing_zsyr2k( run_arg_list_t *args, int check )
     CHAMELEON_zplgsy_Tile( bump, uplo, descC, seedC );
 
     /* Calculate the product */
-    START_TIMING( t );
-    hres = CHAMELEON_zsyr2k_Tile( uplo, trans, alpha, descA, descB, beta, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zsyr2k_Tile_Async( uplo, trans, alpha, descA, descB, beta, descC,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zsyr2k_Tile( uplo, trans, alpha, descA, descB, beta, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zher2k( K, N ) );
 
     /* Check the solution */
     if ( check ) {
@@ -93,8 +102,8 @@ testing_zsyr2k( run_arg_list_t *args, int check )
             &descCinit, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q );
         CHAMELEON_zplgsy_Tile( bump, uplo, descCinit, seedC );
 
-        hres +=
-            check_zsyrk( args, ChamSymmetric, uplo, trans, alpha, descA, descB, beta, descCinit, descC );
+        hres += check_zsyrk( args, ChamSymmetric, uplo, trans, alpha, descA, descB,
+                             beta, descCinit, descC );
 
         CHAMELEON_Desc_Destroy( &descCinit );
     }
@@ -107,8 +116,9 @@ testing_zsyr2k( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zsyr2k;
-const char *zsyr2k_params[] = { "mtxfmt", "nb",   "trans", "uplo",  "n",     "k",     "lda",  "ldb", "ldc",
-                                "alpha", "beta",  "seedA", "seedB", "seedC", "bump", NULL };
+const char *zsyr2k_params[] = { "mtxfmt", "nb",    "trans", "uplo",  "n",    "k",
+                                "lda",    "ldb",   "ldc",   "alpha", "beta", "seedA",
+                                "seedB",  "seedC", "bump",  NULL };
 const char *zsyr2k_output[] = { NULL };
 const char *zsyr2k_outchk[] = { "RETURN", NULL };
 
@@ -119,13 +129,13 @@ void testing_zsyr2k_init( void ) __attribute__( ( constructor ) );
 void
 testing_zsyr2k_init( void )
 {
-    test_zsyr2k.name        = "zsyr2k";
-    test_zsyr2k.helper      = "Symmetrix matrix-matrix rank 2k update";
-    test_zsyr2k.params      = zsyr2k_params;
-    test_zsyr2k.output      = zsyr2k_output;
-    test_zsyr2k.outchk      = zsyr2k_outchk;
-    test_zsyr2k.fptr        = testing_zsyr2k;
-    test_zsyr2k.next        = NULL;
+    test_zsyr2k.name   = "zsyr2k";
+    test_zsyr2k.helper = "Symmetrix matrix-matrix rank 2k update";
+    test_zsyr2k.params = zsyr2k_params;
+    test_zsyr2k.output = zsyr2k_output;
+    test_zsyr2k.outchk = zsyr2k_outchk;
+    test_zsyr2k.fptr   = testing_zsyr2k;
+    test_zsyr2k.next   = NULL;
 
     testing_register( &test_zsyr2k );
 }
diff --git a/testing/testing_zsyrk.c b/testing/testing_zsyrk.c
index a3d17dd7cdce49273ad94667f533ff9919a32724..2c8bcf400da78aad665682eeabb82efd38732a7b 100644
--- a/testing/testing_zsyrk.c
+++ b/testing/testing_zsyrk.c
@@ -25,11 +25,11 @@
 int
 testing_zsyrk( run_arg_list_t *args, int check )
 {
-    int          Am, An;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descC, *descCinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          P      = parameters_getvalue_int( "P" );
@@ -45,8 +45,10 @@ testing_zsyrk( run_arg_list_t *args, int check )
     int                   seedA = run_arg_get_int( args, "seedA", random() );
     int                   seedC = run_arg_get_int( args, "seedC", random() );
     int                   Q     = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zsyrk( K, N );
+
+    /* Descriptors */
+    int          Am, An;
+    CHAM_desc_t *descA, *descC, *descCinit;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
@@ -75,12 +77,18 @@ testing_zsyrk( run_arg_list_t *args, int check )
     CHAMELEON_zplgsy_Tile( bump, uplo, descC, seedC );
 
     /* Calculates the product */
-    START_TIMING( t );
-    hres = CHAMELEON_zsyrk_Tile( uplo, trans, alpha, descA, beta, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zsyrk_Tile_Async( uplo, trans, alpha, descA, beta, descC,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zsyrk_Tile( uplo, trans, alpha, descA, beta, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zsyrk( K, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -88,8 +96,8 @@ testing_zsyrk( run_arg_list_t *args, int check )
             &descCinit, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q );
         CHAMELEON_zplgsy_Tile( bump, uplo, descCinit, seedC );
 
-        hres +=
-            check_zsyrk( args, ChamSymmetric, uplo, trans, alpha, descA, NULL, beta, descCinit, descC );
+        hres += check_zsyrk( args, ChamSymmetric, uplo, trans, alpha, descA, NULL,
+                             beta, descCinit, descC );
 
         CHAMELEON_Desc_Destroy( &descCinit );
     }
@@ -101,8 +109,8 @@ testing_zsyrk( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zsyrk;
-const char *zsyrk_params[] = { "mtxfmt", "nb",   "trans", "uplo",  "n",     "k",    "lda", "ldc",
-                               "alpha", "beta",  "seedA", "seedC", "bump", NULL };
+const char *zsyrk_params[] = { "mtxfmt", "nb",    "trans", "uplo",  "n",     "k",    "lda",
+                               "ldc",    "alpha", "beta",  "seedA", "seedC", "bump", NULL };
 const char *zsyrk_output[] = { NULL };
 const char *zsyrk_outchk[] = { "RETURN", NULL };
 
@@ -113,13 +121,13 @@ void testing_zsyrk_init( void ) __attribute__( ( constructor ) );
 void
 testing_zsyrk_init( void )
 {
-    test_zsyrk.name        = "zsyrk";
-    test_zsyrk.helper      = "Symmetrix matrix-matrix rank k update";
-    test_zsyrk.params      = zsyrk_params;
-    test_zsyrk.output      = zsyrk_output;
-    test_zsyrk.outchk      = zsyrk_outchk;
-    test_zsyrk.fptr        = testing_zsyrk;
-    test_zsyrk.next        = NULL;
+    test_zsyrk.name   = "zsyrk";
+    test_zsyrk.helper = "Symmetrix matrix-matrix rank k update";
+    test_zsyrk.params = zsyrk_params;
+    test_zsyrk.output = zsyrk_output;
+    test_zsyrk.outchk = zsyrk_outchk;
+    test_zsyrk.fptr   = testing_zsyrk;
+    test_zsyrk.next   = NULL;
 
     testing_register( &test_zsyrk );
 }
diff --git a/testing/testing_zsysv.c b/testing/testing_zsysv.c
index 22e0536367dbda0533e89abae08a10094c3b4bc4..302eb4726ce2ddda4c9c4f6e6b75c19033375e55 100644
--- a/testing/testing_zsysv.c
+++ b/testing/testing_zsysv.c
@@ -31,10 +31,11 @@ flops_zsysv( int N, int NRHS )
 int
 testing_zsysv( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descX;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -46,8 +47,9 @@ testing_zsysv( run_arg_list_t *args, int check )
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         seedB  = run_arg_get_int( args, "seedB", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zsysv( N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -62,12 +64,18 @@ testing_zsysv( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descX, seedB );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zsysv_Tile( uplo, descA, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zsysv_Tile_Async( uplo, descA, descX,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zsysv_Tile( uplo, descA, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zsysv( N, NRHS ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -97,7 +105,8 @@ testing_zsysv( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zsysv;
-const char *zsysv_params[] = { "mtxfmt", "nb","uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+const char *zsysv_params[] = { "mtxfmt", "nb",  "uplo",  "n",     "nrhs",
+                               "lda",    "ldb", "seedA", "seedB", NULL };
 const char *zsysv_output[] = { NULL };
 const char *zsysv_outchk[] = { "RETURN", NULL };
 
@@ -108,13 +117,13 @@ void testing_zsysv_init( void ) __attribute__( ( constructor ) );
 void
 testing_zsysv_init( void )
 {
-    test_zsysv.name        = "zsysv";
-    test_zsysv.helper      = "Symmetrix linear system solve";
-    test_zsysv.params      = zsysv_params;
-    test_zsysv.output      = zsysv_output;
-    test_zsysv.outchk      = zsysv_outchk;
-    test_zsysv.fptr        = testing_zsysv;
-    test_zsysv.next        = NULL;
+    test_zsysv.name   = "zsysv";
+    test_zsysv.helper = "Symmetrix linear system solve";
+    test_zsysv.params = zsysv_params;
+    test_zsysv.output = zsysv_output;
+    test_zsysv.outchk = zsysv_outchk;
+    test_zsysv.fptr   = testing_zsysv;
+    test_zsysv.next   = NULL;
 
     testing_register( &test_zsysv );
 }
diff --git a/testing/testing_zsytrf.c b/testing/testing_zsytrf.c
index 3712af1b42bf1784bad81f604f23d980c302dc26..72667bc976db03e6767e144adb93cabd2bf5da7f 100644
--- a/testing/testing_zsytrf.c
+++ b/testing/testing_zsytrf.c
@@ -24,10 +24,11 @@
 int
 testing_zsytrf( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -36,8 +37,9 @@ testing_zsytrf( run_arg_list_t *args, int check )
     int         LDA    = run_arg_get_int( args, "LDA", N );
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zpotrf( N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -49,12 +51,16 @@ testing_zsytrf( run_arg_list_t *args, int check )
     CHAMELEON_zplgsy_Tile( (double)N, uplo, descA, seedA );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres = CHAMELEON_zsytrf_Tile( uplo, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zsytrf_Tile_Async( uplo, descA, test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zsytrf_Tile( uplo, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zpotrf( N ) );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -72,7 +78,7 @@ testing_zsytrf( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zsytrf;
-const char *zsytrf_params[] = { "mtxfmt", "nb","uplo", "n", "lda", "seedA", NULL };
+const char *zsytrf_params[] = { "mtxfmt", "nb", "uplo", "n", "lda", "seedA", NULL };
 const char *zsytrf_output[] = { NULL };
 const char *zsytrf_outchk[] = { "RETURN", NULL };
 
@@ -83,13 +89,13 @@ void testing_zsytrf_init( void ) __attribute__( ( constructor ) );
 void
 testing_zsytrf_init( void )
 {
-    test_zsytrf.name        = "zsytrf";
-    test_zsytrf.helper      = "Symmetric trinagular factorization";
-    test_zsytrf.params      = zsytrf_params;
-    test_zsytrf.output      = zsytrf_output;
-    test_zsytrf.outchk      = zsytrf_outchk;
-    test_zsytrf.fptr        = testing_zsytrf;
-    test_zsytrf.next        = NULL;
+    test_zsytrf.name   = "zsytrf";
+    test_zsytrf.helper = "Symmetric trinagular factorization";
+    test_zsytrf.params = zsytrf_params;
+    test_zsytrf.output = zsytrf_output;
+    test_zsytrf.outchk = zsytrf_outchk;
+    test_zsytrf.fptr   = testing_zsytrf;
+    test_zsytrf.next   = NULL;
 
     testing_register( &test_zsytrf );
 }
diff --git a/testing/testing_zsytrs.c b/testing/testing_zsytrs.c
index 0ced94ba172e9d2bdf714342ca382c289f460c5f..80e44e41c8b17c8eddd5b71a43fdc44517b2db11 100644
--- a/testing/testing_zsytrs.c
+++ b/testing/testing_zsytrs.c
@@ -25,10 +25,11 @@
 int
 testing_zsytrs( run_arg_list_t *args, int check )
 {
-    int          hres;
-    CHAM_desc_t *descA, *descX;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -40,8 +41,9 @@ testing_zsytrs( run_arg_list_t *args, int check )
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         seedB  = run_arg_get_int( args, "seedB", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = 0;  // flops_zsytrs( N, NRHS );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descX;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -59,12 +61,18 @@ testing_zsytrs( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    hres += CHAMELEON_zsytrs_Tile( uplo, descA, descX );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zsytrs_Tile_Async( uplo, descA, descX,
+                                             test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descX, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zsytrs_Tile( uplo, descA, descX );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, 0 /*flops_zsytrs( N, NRHS )*/ );
 
     /* Checks the factorisation and residue */
     if ( check ) {
@@ -87,7 +95,8 @@ testing_zsytrs( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zsytrs;
-const char *zsytrs_params[] = { "mtxfmt", "nb","uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL };
+const char *zsytrs_params[] = { "mtxfmt", "nb",  "uplo",  "n",     "nrhs",
+                                "lda",    "ldb", "seedA", "seedB", NULL };
 const char *zsytrs_output[] = { NULL };
 const char *zsytrs_outchk[] = { "RETURN", NULL };
 
@@ -98,13 +107,13 @@ void testing_zsytrs_init( void ) __attribute__( ( constructor ) );
 void
 testing_zsytrs_init( void )
 {
-    test_zsytrs.name        = "zsytrs";
-    test_zsytrs.helper      = "Symmetric triangular solve";
-    test_zsytrs.params      = zsytrs_params;
-    test_zsytrs.output      = zsytrs_output;
-    test_zsytrs.outchk      = zsytrs_outchk;
-    test_zsytrs.fptr        = testing_zsytrs;
-    test_zsytrs.next        = NULL;
+    test_zsytrs.name   = "zsytrs";
+    test_zsytrs.helper = "Symmetric triangular solve";
+    test_zsytrs.params = zsytrs_params;
+    test_zsytrs.output = zsytrs_output;
+    test_zsytrs.outchk = zsytrs_outchk;
+    test_zsytrs.fptr   = testing_zsytrs;
+    test_zsytrs.next   = NULL;
 
     testing_register( &test_zsytrs );
 }
diff --git a/testing/testing_ztradd.c b/testing/testing_ztradd.c
index 6967178567ebe885bb70a3bace9c3c783d5dc74a..a754ce6c9bde8811abef65649d738d4d34a01b7b 100644
--- a/testing/testing_ztradd.c
+++ b/testing/testing_ztradd.c
@@ -25,7 +25,7 @@ static cham_fixdbl_t
 flops_ztradd( cham_uplo_t uplo, int M, int N )
 {
     cham_fixdbl_t flops = 0.;
-    int    minMN = chameleon_min( M, N );
+    int           minMN = chameleon_min( M, N );
     switch ( uplo ) {
         case ChamUpper:
             flops = ( minMN * ( minMN + 1 ) / 2 ) + M * chameleon_max( 0, N - M );
@@ -51,11 +51,11 @@ flops_ztradd( cham_uplo_t uplo, int M, int N )
 int
 testing_ztradd( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    int          Am, An;
-    CHAM_desc_t *descA, *descB;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          P      = parameters_getvalue_int( "P" );
@@ -70,8 +70,10 @@ testing_ztradd( run_arg_list_t *args, int check )
     int                   seedA = run_arg_get_int( args, "seedA", random() );
     int                   seedB = run_arg_get_int( args, "seedB", random() );
     int                   Q     = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_ztradd( uplo, M, N );
+
+    /* Descriptors */
+    int          Am, An;
+    CHAM_desc_t *descA, *descB;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
     beta  = run_arg_get_complex64( args, "beta", beta );
@@ -114,12 +116,18 @@ testing_ztradd( run_arg_list_t *args, int check )
     }
 
     /* Calculates the sum */
-    START_TIMING( t );
-    hres = CHAMELEON_ztradd_Tile( uplo, trans, alpha, descA, beta, descB );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_ztradd_Tile_Async( uplo, trans, alpha, descA, beta, descB,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_ztradd_Tile( uplo, trans, alpha, descA, beta, descB );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_ztradd( uplo, M, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -143,8 +151,8 @@ testing_ztradd( run_arg_list_t *args, int check )
 }
 
 testing_t   test_ztradd;
-const char *ztradd_params[] = { "mtxfmt", "nb", "trans", "uplo", "m",     "n",     "lda",
-                                "ldb", "alpha", "beta", "seedA", "seedB", NULL };
+const char *ztradd_params[] = { "mtxfmt", "nb",    "trans", "uplo",  "m",     "n", "lda",
+                                "ldb",    "alpha", "beta",  "seedA", "seedB", NULL };
 const char *ztradd_output[] = { NULL };
 const char *ztradd_outchk[] = { "RETURN", NULL };
 
@@ -155,13 +163,13 @@ void testing_ztradd_init( void ) __attribute__( ( constructor ) );
 void
 testing_ztradd_init( void )
 {
-    test_ztradd.name        = "ztradd";
-    test_ztradd.helper      = "Triangular matrix-matrix addition";
-    test_ztradd.params      = ztradd_params;
-    test_ztradd.output      = ztradd_output;
-    test_ztradd.outchk      = ztradd_outchk;
-    test_ztradd.fptr        = testing_ztradd;
-    test_ztradd.next        = NULL;
+    test_ztradd.name   = "ztradd";
+    test_ztradd.helper = "Triangular matrix-matrix addition";
+    test_ztradd.params = ztradd_params;
+    test_ztradd.output = ztradd_output;
+    test_ztradd.outchk = ztradd_outchk;
+    test_ztradd.fptr   = testing_ztradd;
+    test_ztradd.next   = NULL;
 
     testing_register( &test_ztradd );
 }
diff --git a/testing/testing_ztrmm.c b/testing/testing_ztrmm.c
index a73507e86ef7b7e0db884fc9ae0948aaa0f50175..24a342fd768942686244056d6f09847ddf791055 100644
--- a/testing/testing_ztrmm.c
+++ b/testing/testing_ztrmm.c
@@ -25,11 +25,11 @@
 int
 testing_ztrmm( run_arg_list_t *args, int check )
 {
-    int          Bm, Bn;
-    int          hres = 0;
-    CHAM_desc_t *descA, *descB, *descBinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int                   async  = parameters_getvalue_int( "async" );
     intptr_t              mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int                   nb     = run_arg_get_int( args, "nb", 320 );
     int                   P      = parameters_getvalue_int( "P" );
@@ -45,8 +45,10 @@ testing_ztrmm( run_arg_list_t *args, int check )
     int                   seedA  = run_arg_get_int( args, "seedA", random() );
     int                   seedB  = run_arg_get_int( args, "seedB", random() );
     int                   Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_ztrmm( side, N, K );
+
+    /* Descriptors */
+    int          Bm, Bn;
+    CHAM_desc_t *descA, *descB, *descBinit;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
 
@@ -73,12 +75,18 @@ testing_ztrmm( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descB, seedB );
 
     /* Calculates the product */
-    START_TIMING( t );
-    hres = CHAMELEON_ztrmm_Tile( side, uplo, trans, diag, alpha, descA, descB );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_ztrmm_Tile_Async( side, uplo, trans, diag, alpha, descA, descB,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_ztrmm_Tile( side, uplo, trans, diag, alpha, descA, descB );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_ztrmm( side, N, K ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -86,7 +94,8 @@ testing_ztrmm( run_arg_list_t *args, int check )
             &descBinit, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDB, Bn, 0, 0, Bm, Bn, P, Q );
         CHAMELEON_zplrnt_Tile( descBinit, seedB );
 
-        hres += check_ztrmm( args, CHECK_TRMM, side, uplo, trans, diag, alpha, descA, descB, descBinit );
+        hres += check_ztrmm( args, CHECK_TRMM, side, uplo, trans, diag,
+                             alpha, descA, descB, descBinit );
 
         CHAMELEON_Desc_Destroy( &descBinit );
     }
@@ -98,8 +107,8 @@ testing_ztrmm( run_arg_list_t *args, int check )
 }
 
 testing_t   test_ztrmm;
-const char *ztrmm_params[] = { "mtxfmt", "nb", "trans", "side",  "uplo",  "diag",  "n", "k",
-                               "lda", "ldb",   "alpha", "seedA", "seedB", NULL };
+const char *ztrmm_params[] = { "mtxfmt", "nb",  "trans", "side",  "uplo",  "diag",  "n",
+                               "k",      "lda", "ldb",   "alpha", "seedA", "seedB", NULL };
 const char *ztrmm_output[] = { NULL };
 const char *ztrmm_outchk[] = { "RETURN", NULL };
 
@@ -110,13 +119,13 @@ void testing_ztrmm_init( void ) __attribute__( ( constructor ) );
 void
 testing_ztrmm_init( void )
 {
-    test_ztrmm.name        = "ztrmm";
-    test_ztrmm.helper      = "Triangular matrix-matrix multiply";
-    test_ztrmm.params      = ztrmm_params;
-    test_ztrmm.output      = ztrmm_output;
-    test_ztrmm.outchk      = ztrmm_outchk;
-    test_ztrmm.fptr        = testing_ztrmm;
-    test_ztrmm.next        = NULL;
+    test_ztrmm.name   = "ztrmm";
+    test_ztrmm.helper = "Triangular matrix-matrix multiply";
+    test_ztrmm.params = ztrmm_params;
+    test_ztrmm.output = ztrmm_output;
+    test_ztrmm.outchk = ztrmm_outchk;
+    test_ztrmm.fptr   = testing_ztrmm;
+    test_ztrmm.next   = NULL;
 
     testing_register( &test_ztrmm );
 }
diff --git a/testing/testing_ztrsm.c b/testing/testing_ztrsm.c
index 5e4006d79d645288ce92fa6bc11640cf709d9e16..043debc55915541b76ef2635dd3f4f24470fdd65 100644
--- a/testing/testing_ztrsm.c
+++ b/testing/testing_ztrsm.c
@@ -25,10 +25,11 @@
 int
 testing_ztrsm( run_arg_list_t *args, int check )
 {
-    int          hres = 0;
-    CHAM_desc_t *descA, *descB, *descBinit;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int                   async  = parameters_getvalue_int( "async" );
     intptr_t              mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int                   nb     = run_arg_get_int( args, "nb", 320 );
     int                   P      = parameters_getvalue_int( "P" );
@@ -45,8 +46,9 @@ testing_ztrsm( run_arg_list_t *args, int check )
     int                   seedA  = run_arg_get_int( args, "seedA", random() );
     int                   seedB  = run_arg_get_int( args, "seedB", random() );
     int                   Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_ztrsm( side, M, N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descB, *descBinit;
 
     alpha = run_arg_get_complex64( args, "alpha", alpha );
 
@@ -64,12 +66,18 @@ testing_ztrsm( run_arg_list_t *args, int check )
     CHAMELEON_zplrnt_Tile( descB, seedB );
 
     /* Calculates the product */
-    START_TIMING( t );
-    hres = CHAMELEON_ztrsm_Tile( side, uplo, trans, diag, alpha, descA, descB );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_ztrsm_Tile_Async( side, uplo, trans, diag, alpha, descA, descB,
+                                           test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descB, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_ztrsm_Tile( side, uplo, trans, diag, alpha, descA, descB );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_ztrsm( side, M, N ) );
 
     /* Checks the solution */
     if ( check ) {
@@ -77,7 +85,8 @@ testing_ztrsm( run_arg_list_t *args, int check )
             &descBinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, N, 0, 0, M, N, P, Q );
         CHAMELEON_zplrnt_Tile( descBinit, seedB );
 
-        hres += check_ztrmm( args, CHECK_TRSM, side, uplo, trans, diag, alpha, descA, descB, descBinit );
+        hres += check_ztrmm( args, CHECK_TRSM, side, uplo, trans, diag,
+                             alpha, descA, descB, descBinit );
 
         CHAMELEON_Desc_Destroy( &descBinit );
     }
@@ -89,8 +98,8 @@ testing_ztrsm( run_arg_list_t *args, int check )
 }
 
 testing_t   test_ztrsm;
-const char *ztrsm_params[] = { "mtxfmt", "nb",  "side",  "uplo",  "trans", "diag",  "m", "n",
-                               "lda", "ldb",   "alpha", "seedA", "seedB", NULL };
+const char *ztrsm_params[] = { "mtxfmt", "nb",  "side", "uplo",  "trans", "diag",  "m",
+                               "n",      "lda", "ldb",  "alpha", "seedA", "seedB", NULL };
 const char *ztrsm_output[] = { NULL };
 const char *ztrsm_outchk[] = { "RETURN", NULL };
 
@@ -101,13 +110,13 @@ void testing_ztrsm_init( void ) __attribute__( ( constructor ) );
 void
 testing_ztrsm_init( void )
 {
-    test_ztrsm.name        = "ztrsm";
-    test_ztrsm.helper      = "Triangular matrix solve";
-    test_ztrsm.params      = ztrsm_params;
-    test_ztrsm.output      = ztrsm_output;
-    test_ztrsm.outchk      = ztrsm_outchk;
-    test_ztrsm.fptr        = testing_ztrsm;
-    test_ztrsm.next        = NULL;
+    test_ztrsm.name   = "ztrsm";
+    test_ztrsm.helper = "Triangular matrix solve";
+    test_ztrsm.params = ztrsm_params;
+    test_ztrsm.output = ztrsm_output;
+    test_ztrsm.outchk = ztrsm_outchk;
+    test_ztrsm.fptr   = testing_ztrsm;
+    test_ztrsm.next   = NULL;
 
     testing_register( &test_ztrsm );
 }
diff --git a/testing/testing_ztrtri.c b/testing/testing_ztrtri.c
index 36ead817e708d00510862e839c7133fc33fdb3a4..c3a3ad3f4e9d7dc2e23a098ddfd43156941efee2 100644
--- a/testing/testing_ztrtri.c
+++ b/testing/testing_ztrtri.c
@@ -24,10 +24,11 @@
 int
 testing_ztrtri( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int         async  = parameters_getvalue_int( "async" );
     intptr_t    mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int         nb     = run_arg_get_int( args, "nb", 320 );
     int         P      = parameters_getvalue_int( "P" );
@@ -37,8 +38,9 @@ testing_ztrtri( run_arg_list_t *args, int check )
     int         LDA    = run_arg_get_int( args, "LDA", N );
     int         seedA  = run_arg_get_int( args, "seedA", random() );
     int         Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_ztrtri( N );
+
+    /* Descriptors */
+    CHAM_desc_t *descA;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
 
@@ -50,12 +52,17 @@ testing_ztrtri( run_arg_list_t *args, int check )
     CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA );
 
     /* Calculates the inversed matrices */
-    START_TIMING( t );
-    hres = CHAMELEON_ztrtri_Tile( uplo, diag, descA );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_ztrtri_Tile_Async( uplo, diag, descA,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_ztrtri_Tile( uplo, diag, descA );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_ztrtri( N ) );
 
     /* Checks the inverse */
     if ( check ) {
@@ -73,7 +80,7 @@ testing_ztrtri( run_arg_list_t *args, int check )
 }
 
 testing_t   test_ztrtri;
-const char *ztrtri_params[] = { "mtxfmt", "nb","uplo", "diag", "n", "lda", "seedA", NULL };
+const char *ztrtri_params[] = { "mtxfmt", "nb", "uplo", "diag", "n", "lda", "seedA", NULL };
 const char *ztrtri_output[] = { NULL };
 const char *ztrtri_outchk[] = { "RETURN", NULL };
 
@@ -84,13 +91,13 @@ void testing_ztrtri_init( void ) __attribute__( ( constructor ) );
 void
 testing_ztrtri_init( void )
 {
-    test_ztrtri.name        = "ztrtri";
-    test_ztrtri.helper      = "Triangular matrix inversion";
-    test_ztrtri.params      = ztrtri_params;
-    test_ztrtri.output      = ztrtri_output;
-    test_ztrtri.outchk      = ztrtri_outchk;
-    test_ztrtri.fptr        = testing_ztrtri;
-    test_ztrtri.next        = NULL;
+    test_ztrtri.name   = "ztrtri";
+    test_ztrtri.helper = "Triangular matrix inversion";
+    test_ztrtri.params = ztrtri_params;
+    test_ztrtri.output = ztrtri_output;
+    test_ztrtri.outchk = ztrtri_outchk;
+    test_ztrtri.fptr   = testing_ztrtri;
+    test_ztrtri.next   = NULL;
 
     testing_register( &test_ztrtri );
 }
diff --git a/testing/testing_zunglq.c b/testing/testing_zunglq.c
index 662c0e39467e10c80626885e1114e2c703c65841..536554ecd1289e21cda2573b4703173191d18fab 100644
--- a/testing/testing_zunglq.c
+++ b/testing/testing_zunglq.c
@@ -24,10 +24,11 @@
 int
 testing_zunglq( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descT, *descQ;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -39,8 +40,9 @@ testing_zunglq( run_arg_list_t *args, int check )
     int      RH     = run_arg_get_int( args, "qra", 0 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zunglq( M, N, K );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descT, *descQ;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -79,12 +81,19 @@ testing_zunglq( run_arg_list_t *args, int check )
     hres = CHAMELEON_zgelqf_Tile( descA, descT );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    CHAMELEON_zunglq_Tile( descA, descT, descQ );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zunglq_Tile_Async( descA, descT, descQ,
+                                            test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descQ, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zunglq_Tile( descA, descT, descQ );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zunglq( M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -105,7 +114,7 @@ testing_zunglq( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zunglq;
-const char *zunglq_params[] = { "mtxfmt", "nb","ib", "m", "n", "k", "lda", "qra", "seedA", NULL };
+const char *zunglq_params[] = { "mtxfmt", "nb", "ib", "m", "n", "k", "lda", "qra", "seedA", NULL };
 const char *zunglq_output[] = { NULL };
 const char *zunglq_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -116,13 +125,13 @@ void testing_zunglq_init( void ) __attribute__( ( constructor ) );
 void
 testing_zunglq_init( void )
 {
-    test_zunglq.name        = "zunglq";
-    test_zunglq.helper      = "Q generation (LQ)";
-    test_zunglq.params      = zunglq_params;
-    test_zunglq.output      = zunglq_output;
-    test_zunglq.outchk      = zunglq_outchk;
-    test_zunglq.fptr        = testing_zunglq;
-    test_zunglq.next        = NULL;
+    test_zunglq.name   = "zunglq";
+    test_zunglq.helper = "Q generation (LQ)";
+    test_zunglq.params = zunglq_params;
+    test_zunglq.output = zunglq_output;
+    test_zunglq.outchk = zunglq_outchk;
+    test_zunglq.fptr   = testing_zunglq;
+    test_zunglq.next   = NULL;
 
     testing_register( &test_zunglq );
 }
diff --git a/testing/testing_zunglq_hqr.c b/testing/testing_zunglq_hqr.c
index 5e0cfef8e6fac8b7a41190504d04642444757344..652efabddceb03658546782c954d6fd39250de0e 100644
--- a/testing/testing_zunglq_hqr.c
+++ b/testing/testing_zunglq_hqr.c
@@ -24,10 +24,11 @@
 int
 testing_zunglq_hqr( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descTS, *descTT, *descQ;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -43,9 +44,9 @@ testing_zunglq_hqr( run_arg_list_t *args, int check )
     int      domino = run_arg_get_int( args, "domino", -1 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zunglq( M, N, K );
 
+    /* Descriptors */
+    CHAM_desc_t    *descA, *descTS, *descTT, *descQ;
     libhqr_tree_t   qrtree;
     libhqr_matrix_t matrix;
 
@@ -87,12 +88,20 @@ testing_zunglq_hqr( run_arg_list_t *args, int check )
     hres = CHAMELEON_zgelqf_param_Tile( &qrtree, descA, descTS, descTT );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    CHAMELEON_zunglq_param_Tile( &qrtree, descA, descTS, descTT, descQ );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zunglq_param_Tile_Async( &qrtree, descA, descTS, descTT, descQ,
+                                                  test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTS, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descQ, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zunglq_param_Tile( &qrtree, descA, descTS, descTT, descQ );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zunglq( M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -115,8 +124,8 @@ testing_zunglq_hqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zunglq_hqr;
-const char *zunglq_hqr_params[] = { "mtxfmt", "nb", "ib",   "m",    "n",      "k",     "lda", "qra",
-                                    "qrp", "llvl", "hlvl", "domino", "seedA", NULL };
+const char *zunglq_hqr_params[] = { "mtxfmt", "nb",  "ib",   "m",    "n",      "k",     "lda",
+                                    "qra",    "qrp", "llvl", "hlvl", "domino", "seedA", NULL };
 const char *zunglq_hqr_output[] = { NULL };
 const char *zunglq_hqr_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -127,13 +136,13 @@ void testing_zunglq_hqr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zunglq_hqr_init( void )
 {
-    test_zunglq_hqr.name        = "zunglq_hqr";
-    test_zunglq_hqr.helper      = "Q generation with hierarchical reduction trees (LQ)";
-    test_zunglq_hqr.params      = zunglq_hqr_params;
-    test_zunglq_hqr.output      = zunglq_hqr_output;
-    test_zunglq_hqr.outchk      = zunglq_hqr_outchk;
-    test_zunglq_hqr.fptr        = testing_zunglq_hqr;
-    test_zunglq_hqr.next        = NULL;
+    test_zunglq_hqr.name   = "zunglq_hqr";
+    test_zunglq_hqr.helper = "Q generation with hierarchical reduction trees (LQ)";
+    test_zunglq_hqr.params = zunglq_hqr_params;
+    test_zunglq_hqr.output = zunglq_hqr_output;
+    test_zunglq_hqr.outchk = zunglq_hqr_outchk;
+    test_zunglq_hqr.fptr   = testing_zunglq_hqr;
+    test_zunglq_hqr.next   = NULL;
 
     testing_register( &test_zunglq_hqr );
 }
diff --git a/testing/testing_zungqr.c b/testing/testing_zungqr.c
index 5a3f66eb9f6e28bb414f0439b40b273b4e65341f..5916755907cfc122d994f209b18c02ffd8106db4 100644
--- a/testing/testing_zungqr.c
+++ b/testing/testing_zungqr.c
@@ -24,10 +24,11 @@
 int
 testing_zungqr( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descT, *descQ;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -39,8 +40,9 @@ testing_zungqr( run_arg_list_t *args, int check )
     int      RH     = run_arg_get_int( args, "qra", 0 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zungqr( M, N, K );
+
+    /* Descriptors */
+    CHAM_desc_t *descA, *descT, *descQ;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -79,12 +81,19 @@ testing_zungqr( run_arg_list_t *args, int check )
     hres = CHAMELEON_zgeqrf_Tile( descA, descT );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    CHAMELEON_zungqr_Tile( descA, descT, descQ );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zungqr_Tile_Async( descA, descT, descQ, test_data.sequence,
+                                            &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descQ, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zungqr_Tile( descA, descT, descQ );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zungqr( M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -105,7 +114,7 @@ testing_zungqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zungqr;
-const char *zungqr_params[] = { "mtxfmt", "nb","ib", "m", "n", "k", "lda", "qra", "seedA", NULL };
+const char *zungqr_params[] = { "mtxfmt", "nb", "ib", "m", "n", "k", "lda", "qra", "seedA", NULL };
 const char *zungqr_output[] = { NULL };
 const char *zungqr_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -116,13 +125,13 @@ void testing_zungqr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zungqr_init( void )
 {
-    test_zungqr.name        = "zungqr";
-    test_zungqr.helper      = "Q generation (QR)";
-    test_zungqr.params      = zungqr_params;
-    test_zungqr.output      = zungqr_output;
-    test_zungqr.outchk      = zungqr_outchk;
-    test_zungqr.fptr        = testing_zungqr;
-    test_zungqr.next        = NULL;
+    test_zungqr.name   = "zungqr";
+    test_zungqr.helper = "Q generation (QR)";
+    test_zungqr.params = zungqr_params;
+    test_zungqr.output = zungqr_output;
+    test_zungqr.outchk = zungqr_outchk;
+    test_zungqr.fptr   = testing_zungqr;
+    test_zungqr.next   = NULL;
 
     testing_register( &test_zungqr );
 }
diff --git a/testing/testing_zungqr_hqr.c b/testing/testing_zungqr_hqr.c
index 1ed5a1e0b480668c835349399f1b0532fe642af9..773c1dbcdb5e5cc3f51e60d538d30cf181815260 100644
--- a/testing/testing_zungqr_hqr.c
+++ b/testing/testing_zungqr_hqr.c
@@ -24,10 +24,11 @@
 int
 testing_zungqr_hqr( run_arg_list_t *args, int check )
 {
-    int          hres   = 0;
-    CHAM_desc_t *descA, *descTS, *descTT, *descQ;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int      async  = parameters_getvalue_int( "async" );
     intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int      nb     = run_arg_get_int( args, "nb", 320 );
     int      ib     = run_arg_get_int( args, "ib", 48 );
@@ -43,9 +44,9 @@ testing_zungqr_hqr( run_arg_list_t *args, int check )
     int      domino = run_arg_get_int( args, "domino", -1 );
     int      seedA  = run_arg_get_int( args, "seedA", random() );
     int      Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zungqr( M, N, K );
 
+    /* Descriptors */
+    CHAM_desc_t    *descA, *descTS, *descTT, *descQ;
     libhqr_tree_t   qrtree;
     libhqr_matrix_t matrix;
 
@@ -87,12 +88,20 @@ testing_zungqr_hqr( run_arg_list_t *args, int check )
     hres = CHAMELEON_zgeqrf_param_Tile( &qrtree, descA, descTS, descTT );
 
     /* Calculates the solution */
-    START_TIMING( t );
-    CHAMELEON_zungqr_param_Tile( &qrtree, descA, descTS, descTT, descQ );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres = CHAMELEON_zungqr_param_Tile_Async( &qrtree, descA, descTS, descTT, descQ,
+                                                  test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTS, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descQ, test_data.sequence );
+    }
+    else {
+        hres = CHAMELEON_zungqr_param_Tile( &qrtree, descA, descTS, descTT, descQ );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zungqr( M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -115,8 +124,8 @@ testing_zungqr_hqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zungqr_hqr;
-const char *zungqr_hqr_params[] = { "mtxfmt", "nb", "ib",   "m",    "n",      "k",     "lda", "qra",
-                                    "qrp", "llvl", "hlvl", "domino", "seedA", NULL };
+const char *zungqr_hqr_params[] = { "mtxfmt", "nb",  "ib",   "m",    "n",      "k",     "lda",
+                                    "qra",    "qrp", "llvl", "hlvl", "domino", "seedA", NULL };
 const char *zungqr_hqr_output[] = { NULL };
 const char *zungqr_hqr_outchk[] = { "||A||", "||I-QQ'||", "||A-fact(A)||", "RETURN", NULL };
 
@@ -127,13 +136,13 @@ void testing_zungqr_hqr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zungqr_hqr_init( void )
 {
-    test_zungqr_hqr.name        = "zungqr_hqr";
-    test_zungqr_hqr.helper      = "Q generation with hierarchical reduction trees (QR)";
-    test_zungqr_hqr.params      = zungqr_hqr_params;
-    test_zungqr_hqr.output      = zungqr_hqr_output;
-    test_zungqr_hqr.outchk      = zungqr_hqr_outchk;
-    test_zungqr_hqr.fptr        = testing_zungqr_hqr;
-    test_zungqr_hqr.next        = NULL;
+    test_zungqr_hqr.name   = "zungqr_hqr";
+    test_zungqr_hqr.helper = "Q generation with hierarchical reduction trees (QR)";
+    test_zungqr_hqr.params = zungqr_hqr_params;
+    test_zungqr_hqr.output = zungqr_hqr_output;
+    test_zungqr_hqr.outchk = zungqr_hqr_outchk;
+    test_zungqr_hqr.fptr   = testing_zungqr_hqr;
+    test_zungqr_hqr.next   = NULL;
 
     testing_register( &test_zungqr_hqr );
 }
diff --git a/testing/testing_zunmlq.c b/testing/testing_zunmlq.c
index 39d9311756ce2296e03a43092d760dee3773d393..8e811f900648c0179878e1132c661f90228dc1f1 100644
--- a/testing/testing_zunmlq.c
+++ b/testing/testing_zunmlq.c
@@ -25,11 +25,11 @@
 int
 testing_zunmlq( run_arg_list_t *args, int check )
 {
-    int          An;
-    int          hres;
-    CHAM_desc_t *descA, *descT, *descC;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          ib     = run_arg_get_int( args, "ib", 48 );
@@ -45,8 +45,10 @@ testing_zunmlq( run_arg_list_t *args, int check )
     int          seedA  = run_arg_get_int( args, "seedA", random() );
     int          seedC  = run_arg_get_int( args, "seedC", random() );
     int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zunmlq( side, M, N, K );
+
+    /* Descriptors */
+    int          An;
+    CHAM_desc_t *descA, *descT, *descC;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -78,12 +80,19 @@ testing_zunmlq( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Computes unmlq */
-    START_TIMING( t );
-    hres += CHAMELEON_zunmlq_Tile( side, trans, descA, descT, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zunmlq_Tile_Async( side, trans, descA, descT, descC,
+                                             test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zunmlq_Tile( side, trans, descA, descT, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zunmlq( side, M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -110,8 +119,8 @@ testing_zunmlq( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zunmlq;
-const char *zunmlq_params[] = { "mtxfmt", "nb", "ib",  "side", "trans", "m",     "n", "k",
-                                "lda", "ldc", "qra",   "seedA", "seedC", NULL };
+const char *zunmlq_params[] = { "mtxfmt", "nb",  "ib",  "side", "trans", "m",     "n",
+                                "k",      "lda", "ldc", "qra",  "seedA", "seedC", NULL };
 const char *zunmlq_output[] = { NULL };
 const char *zunmlq_outchk[] = { "RETURN", NULL };
 
@@ -122,13 +131,13 @@ void testing_zunmlq_init( void ) __attribute__( ( constructor ) );
 void
 testing_zunmlq_init( void )
 {
-    test_zunmlq.name        = "zunmlq";
-    test_zunmlq.helper      = "Q application (LQ)";
-    test_zunmlq.params      = zunmlq_params;
-    test_zunmlq.output      = zunmlq_output;
-    test_zunmlq.outchk      = zunmlq_outchk;
-    test_zunmlq.fptr        = testing_zunmlq;
-    test_zunmlq.next        = NULL;
+    test_zunmlq.name   = "zunmlq";
+    test_zunmlq.helper = "Q application (LQ)";
+    test_zunmlq.params = zunmlq_params;
+    test_zunmlq.output = zunmlq_output;
+    test_zunmlq.outchk = zunmlq_outchk;
+    test_zunmlq.fptr   = testing_zunmlq;
+    test_zunmlq.next   = NULL;
 
     testing_register( &test_zunmlq );
 }
diff --git a/testing/testing_zunmlq_hqr.c b/testing/testing_zunmlq_hqr.c
index 5edc90522a3e98d575b4e2a7f8fb84c632f76730..156bb51404d1da96c9a039389096eff74e6c879f 100644
--- a/testing/testing_zunmlq_hqr.c
+++ b/testing/testing_zunmlq_hqr.c
@@ -25,11 +25,11 @@
 int
 testing_zunmlq_hqr( run_arg_list_t *args, int check )
 {
-    int          An;
-    int          hres;
-    CHAM_desc_t *descA, *descTS, *descTT, *descC;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          ib     = run_arg_get_int( args, "ib", 48 );
@@ -49,9 +49,10 @@ testing_zunmlq_hqr( run_arg_list_t *args, int check )
     int          seedA  = run_arg_get_int( args, "seedA", random() );
     int          seedC  = run_arg_get_int( args, "seedC", random() );
     int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zunmlq( side, M, N, K );
 
+    /* Descriptors */
+    int             An;
+    CHAM_desc_t    *descA, *descTS, *descTT, *descC;
     libhqr_tree_t   qrtree;
     libhqr_matrix_t matrix;
 
@@ -86,12 +87,20 @@ testing_zunmlq_hqr( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Computes unmlq_hqr */
-    START_TIMING( t );
-    hres += CHAMELEON_zunmlq_param_Tile( &qrtree, side, trans, descA, descTS, descTT, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zunmlq_param_Tile_Async( &qrtree, side, trans, descA, descTS, descTT, descC,
+                                                   test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTS, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zunmlq_param_Tile( &qrtree, side, trans, descA, descTS, descTT, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zunmlq( side, M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -120,9 +129,9 @@ testing_zunmlq_hqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zunmlq_hqr;
-const char *zunmlq_hqr_params[] = { "mtxfmt", "nb",  "ib",     "side",  "trans", "m",   "n",
-                                    "k",    "lda",    "ldc",   "qra",   "qrp", "llvl",
-                                    "hlvl", "domino", "seedA", "seedC", NULL };
+const char *zunmlq_hqr_params[] = { "mtxfmt", "nb",   "ib",     "side",  "trans", "m",
+                                    "n",      "k",    "lda",    "ldc",   "qra",   "qrp",
+                                    "llvl",   "hlvl", "domino", "seedA", "seedC", NULL };
 const char *zunmlq_hqr_output[] = { NULL };
 const char *zunmlq_hqr_outchk[] = { "RETURN", NULL };
 
@@ -138,8 +147,8 @@ testing_zunmlq_hqr_init( void )
     test_zunmlq_hqr.params = zunmlq_hqr_params;
     test_zunmlq_hqr.output = zunmlq_hqr_output;
     test_zunmlq_hqr.outchk = zunmlq_hqr_outchk;
-    test_zunmlq_hqr.fptr = testing_zunmlq_hqr;
-    test_zunmlq_hqr.next = NULL;
+    test_zunmlq_hqr.fptr   = testing_zunmlq_hqr;
+    test_zunmlq_hqr.next   = NULL;
 
     testing_register( &test_zunmlq_hqr );
 }
diff --git a/testing/testing_zunmqr.c b/testing/testing_zunmqr.c
index 3225523eb56e35bc17ecef04afd8f474c5a1ff21..f954246803bd09333e71f65065c5bfa978745e16 100644
--- a/testing/testing_zunmqr.c
+++ b/testing/testing_zunmqr.c
@@ -25,11 +25,11 @@
 int
 testing_zunmqr( run_arg_list_t *args, int check )
 {
-    int          Am;
-    int          hres;
-    CHAM_desc_t *descA, *descT, *descC;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          ib     = run_arg_get_int( args, "ib", 48 );
@@ -45,8 +45,10 @@ testing_zunmqr( run_arg_list_t *args, int check )
     int          seedA  = run_arg_get_int( args, "seedA", random() );
     int          seedC  = run_arg_get_int( args, "seedC", random() );
     int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zunmqr( side, M, N, K );
+
+    /* Descriptors */
+    int          Am;
+    CHAM_desc_t *descA, *descT, *descC;
 
     CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
     CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib );
@@ -78,12 +80,19 @@ testing_zunmqr( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Computes unmqr */
-    START_TIMING( t );
-    hres += CHAMELEON_zunmqr_Tile( side, trans, descA, descT, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zunmqr_Tile_Async( side, trans, descA, descT, descC,
+                                             test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zunmqr_Tile( side, trans, descA, descT, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zunmqr( side, M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -110,8 +119,8 @@ testing_zunmqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zunmqr;
-const char *zunmqr_params[] = { "mtxfmt", "nb", "ib",  "side", "trans", "m",     "n", "k",
-                                "lda", "ldc", "qra",   "seedA", "seedC", NULL };
+const char *zunmqr_params[] = { "mtxfmt", "nb",  "ib",  "side", "trans", "m",     "n",
+                                "k",      "lda", "ldc", "qra",  "seedA", "seedC", NULL };
 const char *zunmqr_output[] = { NULL };
 const char *zunmqr_outchk[] = { "RETURN", NULL };
 
@@ -122,13 +131,13 @@ void testing_zunmqr_init( void ) __attribute__( ( constructor ) );
 void
 testing_zunmqr_init( void )
 {
-    test_zunmqr.name        = "zunmqr";
-    test_zunmqr.helper      = "Q application (QR)";
-    test_zunmqr.params      = zunmqr_params;
-    test_zunmqr.output      = zunmqr_output;
-    test_zunmqr.outchk      = zunmqr_outchk;
-    test_zunmqr.fptr        = testing_zunmqr;
-    test_zunmqr.next        = NULL;
+    test_zunmqr.name   = "zunmqr";
+    test_zunmqr.helper = "Q application (QR)";
+    test_zunmqr.params = zunmqr_params;
+    test_zunmqr.output = zunmqr_output;
+    test_zunmqr.outchk = zunmqr_outchk;
+    test_zunmqr.fptr   = testing_zunmqr;
+    test_zunmqr.next   = NULL;
 
     testing_register( &test_zunmqr );
 }
diff --git a/testing/testing_zunmqr_hqr.c b/testing/testing_zunmqr_hqr.c
index 42047ee5346c8ca3180ff20b580aaea3f13d18ff..0c8748290b6ee26504477673d199384eb05058c7 100644
--- a/testing/testing_zunmqr_hqr.c
+++ b/testing/testing_zunmqr_hqr.c
@@ -25,11 +25,11 @@
 int
 testing_zunmqr_hqr( run_arg_list_t *args, int check )
 {
-    int          Am;
-    int          hres;
-    CHAM_desc_t *descA, *descTS, *descTT, *descC;
+    testdata_t test_data = { .args = args };
+    int        hres      = 0;
 
-    /* Reads arguments */
+    /* Read arguments */
+    int          async  = parameters_getvalue_int( "async" );
     intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
     int          nb     = run_arg_get_int( args, "nb", 320 );
     int          ib     = run_arg_get_int( args, "ib", 48 );
@@ -49,9 +49,10 @@ testing_zunmqr_hqr( run_arg_list_t *args, int check )
     int          seedA  = run_arg_get_int( args, "seedA", random() );
     int          seedC  = run_arg_get_int( args, "seedC", random() );
     int          Q      = parameters_compute_q( P );
-    cham_fixdbl_t t, gflops;
-    cham_fixdbl_t flops = flops_zunmqr( side, M, N, K );
 
+    /* Descriptors */
+    int             Am;
+    CHAM_desc_t    *descA, *descTS, *descTT, *descC;
     libhqr_tree_t   qrtree;
     libhqr_matrix_t matrix;
 
@@ -86,12 +87,20 @@ testing_zunmqr_hqr( run_arg_list_t *args, int check )
     assert( hres == 0 );
 
     /* Computes unmqr_hqr */
-    START_TIMING( t );
-    hres += CHAMELEON_zunmqr_param_Tile( &qrtree, side, trans, descA, descTS, descTT, descC );
-    STOP_TIMING( t );
-    gflops = flops * 1.e-9 / t;
-    run_arg_add_fixdbl( args, "time", t );
-    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+    testing_start( &test_data );
+    if ( async ) {
+        hres += CHAMELEON_zunmqr_param_Tile_Async( &qrtree, side, trans, descA, descTS, descTT, descC,
+                                                   test_data.sequence, &test_data.request );
+        CHAMELEON_Desc_Flush( descA, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTS, test_data.sequence );
+        CHAMELEON_Desc_Flush( descTT, test_data.sequence );
+        CHAMELEON_Desc_Flush( descC, test_data.sequence );
+    }
+    else {
+        hres += CHAMELEON_zunmqr_param_Tile( &qrtree, side, trans, descA, descTS, descTT, descC );
+    }
+    test_data.hres = hres;
+    testing_stop( &test_data, flops_zunmqr( side, M, N, K ) );
 
     /* Checks the factorisation and orthogonality */
     if ( check ) {
@@ -120,9 +129,9 @@ testing_zunmqr_hqr( run_arg_list_t *args, int check )
 }
 
 testing_t   test_zunmqr_hqr;
-const char *zunmqr_hqr_params[] = { "mtxfmt", "nb",  "ib",     "side",  "trans", "m",   "n",
-                                    "k",    "lda",    "ldc",   "qra",   "qrp", "llvl",
-                                    "hlvl", "domino", "seedA", "seedC", NULL };
+const char *zunmqr_hqr_params[] = { "mtxfmt", "nb",   "ib",     "side",  "trans", "m",
+                                    "n",      "k",    "lda",    "ldc",   "qra",   "qrp",
+                                    "llvl",   "hlvl", "domino", "seedA", "seedC", NULL };
 const char *zunmqr_hqr_output[] = { NULL };
 const char *zunmqr_hqr_outchk[] = { "RETURN", NULL };
 
@@ -138,8 +147,8 @@ testing_zunmqr_hqr_init( void )
     test_zunmqr_hqr.params = zunmqr_hqr_params;
     test_zunmqr_hqr.output = zunmqr_hqr_output;
     test_zunmqr_hqr.outchk = zunmqr_hqr_outchk;
-    test_zunmqr_hqr.fptr = testing_zunmqr_hqr;
-    test_zunmqr_hqr.next = NULL;
+    test_zunmqr_hqr.fptr   = testing_zunmqr_hqr;
+    test_zunmqr_hqr.next   = NULL;
 
     testing_register( &test_zunmqr_hqr );
 }
diff --git a/testing/testings.h b/testing/testings.h
index 4e6b0efbfda0f654442793e1655a5c4738a3889a..cf80e572dd19d99359741b1a4f154a25e5ef21ea 100644
--- a/testing/testings.h
+++ b/testing/testings.h
@@ -217,26 +217,24 @@ void        run_list_destroy( run_list_elt_t *run );
 
 void testing_register( testing_t *test );
 
-/**
- * @brief Macros to enable distributed synchronization if necessary
- */
-#if defined(CHAMELEON_USE_MPI)
-#define START_DISTRIBUTED()  CHAMELEON_Distributed_start();
-#define STOP_DISTRIBUTED()   CHAMELEON_Distributed_stop();
-#else
-#define START_DISTRIBUTED()  do {} while(0);
-#define STOP_DISTRIBUTED()   do {} while(0);
-#endif
 
 /**
- * @brief Macros to start/stop timers with necessary synchronizations
+ * @brief Define the data associated to a single run of a testing
  */
-#define START_TIMING( _t_ )                     \
-    START_DISTRIBUTED();                        \
-    (_t_) = RUNTIME_get_time();
+struct testing_;
+typedef struct testing_ testing_t;
+typedef int (*test_fct_t)( run_arg_list_t *, int );
 
-#define STOP_TIMING( _t_ )                      \
-    STOP_DISTRIBUTED();                         \
-    (_t_) = RUNTIME_get_time() - (_t_);         \
+typedef struct testdata_ {
+    run_arg_list_t     *args;     /**< The parameters of the test           */
+    int                 hres;     /**< The returned value of the test       */
+    cham_fixdbl_t       texec;    /**< The execution time of test           */
+    cham_fixdbl_t       tsub;     /**< The task submission tome of the test */
+    RUNTIME_sequence_t *sequence; /**< The sequence to run the test if splitsub */
+    RUNTIME_request_t  request;   /**< The request to run the test if splitsub  */
+} testdata_t;
+
+void testing_start( testdata_t *tdata );
+void testing_stop( testdata_t *tdata, cham_fixdbl_t flops );
 
 #endif /* _testings_h_ */