/** * * @file starpu/codelet_zgerst.c * * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. * *** * * @brief Chameleon zgerst StarPU codelet * * @version 1.3.0 * @author Mathieu Faverge * @author Ana Hourcau * @date 2024-07-17 * @precisions normal z -> d * */ #include "chameleon_starpu_internal.h" #include <coreblas/lapacke.h> #include "runtime_codelet_zc.h" #include "runtime_codelet_z.h" void INSERT_TASK_zgerst( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An ) { CHAM_tile_t *tileA; int64_t mm, nn; int tag = -1; starpu_data_handle_t *handleAin; starpu_data_handle_t handleAout; tileA = A->get_blktile( A, Am, An ); /* Get the Input handle */ mm = Am + (A->i / A->mb); nn = An + (A->j / A->nb); handleAin = A->schedopt; handleAin += ((int64_t)A->lmt) * nn + mm; if ( tileA->flttype == ChamComplexDouble ) { starpu_data_handle_t *copy = handleAin; /* Remove first copy */ copy += ((int64_t)A->lmt * (int64_t)A->lnt); if ( *copy ) { starpu_data_unregister_no_coherency( *copy ); *copy = NULL; } /* Remove second copy */ copy += ((int64_t)A->lmt * (int64_t)A->lnt); if ( *copy ) { starpu_data_unregister_no_coherency( *copy ); *copy = NULL; } return; } if ( A->myrank != tileA->rank ) { tileA->flttype = ChamComplexDouble; if ( *handleAin != NULL ) { starpu_data_unregister_no_coherency( *handleAin ); *handleAin = NULL; } return; } #if defined(CHAMELEON_USE_MPI) tag = starpu_mpi_data_get_tag( *handleAin ); #endif /* defined(CHAMELEON_USE_MPI) */ starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexDouble ); switch( tileA->flttype ) { #if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500) #if defined(PRECISION_d) /* * Restore from half precision */ case ChamComplexHalf: assert( options->withcuda ); #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert back the tile ( %d, %d ) from half precision\n", A->myrank, Am, An ); #endif rt_shm_starpu_insert_task( &cl_hlag2d, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, *handleAin, STARPU_W, handleAout, STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "hlag2d", #endif 0); break; #endif #endif case ChamComplexFloat: #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert back the tile ( %d, %d ) from single precision\n", A->myrank, Am, An ); #endif rt_shm_starpu_insert_task( &cl_clag2z, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, *handleAin, STARPU_W, handleAout, STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "clag2z", #endif 0); break; default: fprintf( stderr, "ERROR: Unknonw input datatype" ); } starpu_data_unregister_no_coherency( *handleAin ); *handleAin = handleAout; tileA->flttype = ChamComplexDouble; starpu_mpi_data_register( handleAout, tag, tileA->rank ); }