From d1dc8c2531eb3a8c45be24de0515251432d7dfac Mon Sep 17 00:00:00 2001 From: Ana Hourcau <ahourcau@sirocco15.plafrim.cluster> Date: Mon, 19 Aug 2024 10:52:17 +0200 Subject: [PATCH] Adapting gered and gerst codelets cuda parts --- runtime/starpu/codelets/codelet_zgered.c | 81 ++++++++++++------------ runtime/starpu/codelets/codelet_zgerst.c | 44 +++++++------ 2 files changed, 62 insertions(+), 63 deletions(-) diff --git a/runtime/starpu/codelets/codelet_zgered.c b/runtime/starpu/codelets/codelet_zgered.c index d7a132200..430db00ff 100644 --- a/runtime/starpu/codelets/codelet_zgered.c +++ b/runtime/starpu/codelets/codelet_zgered.c @@ -71,8 +71,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, #if defined(CHAMELEON_USE_MPI) /* Backup the MPI tag */ - if (A->myrank == tileA->rank) - { + if ( A->myrank == tileA->rank ) { tag = starpu_mpi_data_get_tag( *handleAin ); } #endif /* defined(CHAMELEON_USE_MPI) */ @@ -89,39 +88,41 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert the tile ( %d, %d ) to half precision\n", - A->myrank, Am, An); + A->myrank, Am, An); #endif - starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf ); + if ( A->myrank == tileA->rank ) + { + starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf ); - rt_shm_starpu_insert_task( - &cl_dlag2h, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, - STARPU_EXECUTE_ON_WORKER, options->workerid, + rt_shm_starpu_insert_task( + &cl_dlag2h, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, + STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "dlag2h", + STARPU_NAME, "dlag2h", #endif - 0); + 0); - starpu_data_unregister_no_coherency( *handleAin ); - *handleAin = handleAout; - tileA->flttype = ChamComplexHalf; - starpu_mpi_data_register( handleAout, tag, tileA->rank ); - } - else - { - tileA->flttype = ChamComplexHalf; - if (*handleAin != NULL) + starpu_data_unregister_no_coherency( *handleAin ); + *handleAin = handleAout; + tileA->flttype = ChamComplexHalf; + starpu_mpi_data_register( handleAout, tag, tileA->rank ); + } + else { - starpu_data_unregister_no_coherency(*handleAin); - *handleAin = NULL; + tileA->flttype = ChamComplexHalf; + if ( *handleAin != NULL ) + { + starpu_data_unregister_no_coherency( *handleAin ); + *handleAin = NULL; + } } + return; } - return; - } #endif #endif @@ -129,11 +130,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, /* * Check for single precision */ -#if !defined(CHAMELEON_SIMULATION) - u_low = LAPACKE_slamch_work('e'); -#else - u_low = 1e-8; -#endif + u_low = CHAMELEON_slamch(); if ( lnorm < (threshold / u_low) ) { #if defined(CHAMELEON_DEBUG_GERED) @@ -141,34 +138,34 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, "[%2d] Convert the tile ( %d, %d ) to single precision\n", A->myrank, Am, An ); #endif - if (A->myrank == tileA->rank) + if ( A->myrank == tileA->rank ) { starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexFloat ); rt_shm_starpu_insert_task( &cl_zlag2c, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlag2c", + STARPU_NAME, "zlag2c", #endif 0); starpu_data_unregister_no_coherency( *handleAin ); - *handleAin = handleAout; + *handleAin = handleAout; tileA->flttype = ChamComplexFloat; starpu_mpi_data_register( *handleAin, tag, tileA->rank ); } else { tileA->flttype = ChamComplexFloat; - if (*handleAin != NULL) + if ( *handleAin != NULL ) { - starpu_data_unregister_no_coherency(*handleAin); + starpu_data_unregister_no_coherency( *handleAin ); *handleAin = NULL; } } diff --git a/runtime/starpu/codelets/codelet_zgerst.c b/runtime/starpu/codelets/codelet_zgerst.c index f0fbdc1a4..c2faaf939 100644 --- a/runtime/starpu/codelets/codelet_zgerst.c +++ b/runtime/starpu/codelets/codelet_zgerst.c @@ -39,7 +39,8 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, handleAin = A->schedopt; handleAin += ((int64_t)A->lmt) * nn + mm; - if ( tileA->flttype == ChamComplexDouble ) { + if ( tileA->flttype == ChamComplexDouble ) + { starpu_data_handle_t *copy = handleAin; /* Remove first copy */ @@ -59,12 +60,12 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, return; } - if (A->myrank != tileA->rank) + if ( A->myrank != tileA->rank ) { tileA->flttype = ChamComplexDouble; - if (*handleAin != NULL) + if ( *handleAin != NULL ) { - starpu_data_unregister_no_coherency(*handleAin); + starpu_data_unregister_no_coherency( *handleAin ); *handleAin = NULL; } return; @@ -79,9 +80,9 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, switch( tileA->flttype ) { #if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500) #if defined(PRECISION_d) - /* - * Restore from half precision - */ + /* + * Restore from half precision + */ case ChamComplexHalf: assert( options->withcuda ); #if defined(CHAMELEON_DEBUG_GERED) @@ -91,14 +92,14 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, #endif rt_shm_starpu_insert_task( &cl_hlag2d, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "hlag2d", + STARPU_NAME, "hlag2d", #endif 0); break; @@ -108,19 +109,20 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, case ChamComplexFloat: #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, - "[%2d] Convert back the tile ( %d, %d ) from half precision\n", + "[%2d] Convert back the tile ( %d, %d ) from single precision\n", A->myrank, Am, An ); #endif + rt_shm_starpu_insert_task( &cl_clag2z, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "clag2z", + STARPU_NAME, "clag2z", #endif 0); break; @@ -130,7 +132,7 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, } starpu_data_unregister_no_coherency( *handleAin ); - *handleAin = handleAout; + *handleAin = handleAout; tileA->flttype = ChamComplexDouble; starpu_mpi_data_register( handleAout, tag, tileA->rank ); } -- GitLab