Mentions légales du service

Skip to content
Snippets Groups Projects
Commit d1dc8c25 authored by Ana Hourcau's avatar Ana Hourcau Committed by Mathieu Faverge
Browse files

Adapting gered and gerst codelets cuda parts

parent 7fc32871
No related branches found
No related tags found
1 merge request!488Mixed precision
......@@ -71,8 +71,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
#if defined(CHAMELEON_USE_MPI)
/* Backup the MPI tag */
if (A->myrank == tileA->rank)
{
if ( A->myrank == tileA->rank ) {
tag = starpu_mpi_data_get_tag( *handleAin );
}
#endif /* defined(CHAMELEON_USE_MPI) */
......@@ -89,39 +88,41 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
#if defined(CHAMELEON_DEBUG_GERED)
fprintf( stderr,
"[%2d] Convert the tile ( %d, %d ) to half precision\n",
A->myrank, Am, An);
A->myrank, Am, An);
#endif
starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf );
if ( A->myrank == tileA->rank )
{
starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf );
rt_shm_starpu_insert_task(
&cl_dlag2h,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_EXECUTE_ON_WORKER, options->workerid,
rt_shm_starpu_insert_task(
&cl_dlag2h,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "dlag2h",
STARPU_NAME, "dlag2h",
#endif
0);
0);
starpu_data_unregister_no_coherency( *handleAin );
*handleAin = handleAout;
tileA->flttype = ChamComplexHalf;
starpu_mpi_data_register( handleAout, tag, tileA->rank );
}
else
{
tileA->flttype = ChamComplexHalf;
if (*handleAin != NULL)
starpu_data_unregister_no_coherency( *handleAin );
*handleAin = handleAout;
tileA->flttype = ChamComplexHalf;
starpu_mpi_data_register( handleAout, tag, tileA->rank );
}
else
{
starpu_data_unregister_no_coherency(*handleAin);
*handleAin = NULL;
tileA->flttype = ChamComplexHalf;
if ( *handleAin != NULL )
{
starpu_data_unregister_no_coherency( *handleAin );
*handleAin = NULL;
}
}
return;
}
return;
}
#endif
#endif
......@@ -129,11 +130,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
/*
* Check for single precision
*/
#if !defined(CHAMELEON_SIMULATION)
u_low = LAPACKE_slamch_work('e');
#else
u_low = 1e-8;
#endif
u_low = CHAMELEON_slamch();
if ( lnorm < (threshold / u_low) )
{
#if defined(CHAMELEON_DEBUG_GERED)
......@@ -141,34 +138,34 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
"[%2d] Convert the tile ( %d, %d ) to single precision\n",
A->myrank, Am, An );
#endif
if (A->myrank == tileA->rank)
if ( A->myrank == tileA->rank )
{
starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexFloat );
rt_shm_starpu_insert_task(
&cl_zlag2c,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zlag2c",
STARPU_NAME, "zlag2c",
#endif
0);
starpu_data_unregister_no_coherency( *handleAin );
*handleAin = handleAout;
*handleAin = handleAout;
tileA->flttype = ChamComplexFloat;
starpu_mpi_data_register( *handleAin, tag, tileA->rank );
}
else
{
tileA->flttype = ChamComplexFloat;
if (*handleAin != NULL)
if ( *handleAin != NULL )
{
starpu_data_unregister_no_coherency(*handleAin);
starpu_data_unregister_no_coherency( *handleAin );
*handleAin = NULL;
}
}
......
......@@ -39,7 +39,8 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
handleAin = A->schedopt;
handleAin += ((int64_t)A->lmt) * nn + mm;
if ( tileA->flttype == ChamComplexDouble ) {
if ( tileA->flttype == ChamComplexDouble )
{
starpu_data_handle_t *copy = handleAin;
/* Remove first copy */
......@@ -59,12 +60,12 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
return;
}
if (A->myrank != tileA->rank)
if ( A->myrank != tileA->rank )
{
tileA->flttype = ChamComplexDouble;
if (*handleAin != NULL)
if ( *handleAin != NULL )
{
starpu_data_unregister_no_coherency(*handleAin);
starpu_data_unregister_no_coherency( *handleAin );
*handleAin = NULL;
}
return;
......@@ -79,9 +80,9 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
switch( tileA->flttype ) {
#if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500)
#if defined(PRECISION_d)
/*
* Restore from half precision
*/
/*
* Restore from half precision
*/
case ChamComplexHalf:
assert( options->withcuda );
#if defined(CHAMELEON_DEBUG_GERED)
......@@ -91,14 +92,14 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
#endif
rt_shm_starpu_insert_task(
&cl_hlag2d,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "hlag2d",
STARPU_NAME, "hlag2d",
#endif
0);
break;
......@@ -108,19 +109,20 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
case ChamComplexFloat:
#if defined(CHAMELEON_DEBUG_GERED)
fprintf( stderr,
"[%2d] Convert back the tile ( %d, %d ) from half precision\n",
"[%2d] Convert back the tile ( %d, %d ) from single precision\n",
A->myrank, Am, An );
#endif
rt_shm_starpu_insert_task(
&cl_clag2z,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, *handleAin,
STARPU_W, handleAout,
STARPU_PRIORITY, options->priority,
STARPU_EXECUTE_ON_WORKER, options->workerid,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "clag2z",
STARPU_NAME, "clag2z",
#endif
0);
break;
......@@ -130,7 +132,7 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
}
starpu_data_unregister_no_coherency( *handleAin );
*handleAin = handleAout;
*handleAin = handleAout;
tileA->flttype = ChamComplexDouble;
starpu_mpi_data_register( handleAout, tag, tileA->rank );
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment