diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index fcd8177a0a0cc56a6e21080463956c98918d1a46..37d352bfdd0f3f7cabdac2f2b614aea98c126b0b 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -24,7 +24,7 @@ * @author Alycia Lisito * @author Matthieu Kuhn * @author Ana Hourcau - * @date 2024-10-17 + * @date 2025-03-24 * @precisions normal z -> c d s * */ @@ -371,6 +371,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t int CHAMELEON_zTile_to_Lapack( CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA ) __attribute__((deprecated("Please refer to CHAMELEON_zDesc2Lap() instead"))); int CHAMELEON_zLap2Desc( cham_uplo_t uplo, CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t *A ); int CHAMELEON_zDesc2Lap( cham_uplo_t uplo, CHAM_desc_t *A, CHAMELEON_Complex64_t *Af77, int LDA ); +void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, CHAM_ipiv_t *descIPIV ); /** * User Builder function prototypes diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h index 7ba9ee093056b21f3d3c8aa975dc304043a8d3a7..b9cd9fcb4be875946d42537f3c75a1997b9a8826 100644 --- a/include/chameleon/tasks.h +++ b/include/chameleon/tasks.h @@ -168,8 +168,10 @@ void INSERT_TASK_hgemm( const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Real16_t beta, const CHAM_desc_t *C, int Cm, int Cn ); -void INSERT_TASK_ipiv_init ( const RUNTIME_option_t *options, - CHAM_ipiv_t *ipiv ); +void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options, + CHAM_ipiv_t *ipiv ); +void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, + CHAM_ipiv_t *ipiv ); void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, CHAM_ipiv_t *ws, int k, int h, int rank ); void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c index 7d7045edc9d70710f74deee861b436b745b974a4..5a16c6e2dda5d2e411415bf368f214bbbc8ec71b 100644 --- a/runtime/starpu/codelets/codelet_ipiv.c +++ b/runtime/starpu/codelets/codelet_ipiv.c @@ -18,17 +18,23 @@ * */ #include "chameleon_starpu_internal.h" -#include "runtime_codelets.h" -static void cl_ipiv_init_cpu_func(void *descr[], void *cl_arg) +struct cl_laswp_args_s { + int m0; + int n; + int m; + int *data; +}; + +static void cl_ipiv_init_cpu_func( void *descr[], void *cl_arg ) { #if !defined(CHAMELEON_SIMULATION) - int *ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[0]); + int *ipiv = (int *)STARPU_VECTOR_GET_PTR( descr[0] ); int i, m0, n; starpu_codelet_unpack_args( cl_arg, &m0, &n ); - for( i=0; i<n; i++ ) { + for( i = 0; i < n; i++ ) { ipiv[i] = m0 + i + 1; } #endif @@ -47,10 +53,10 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options, int64_t mb = ipiv->mb; int m; - for (m = 0; m < mt; m++) { + for ( m = 0; m < mt; m++ ) { starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m ); int m0 = m * mb; - int n = (m == (mt-1)) ? ipiv->m - m0 : mb; + int n = ( m == ( mt - 1 ) ) ? ipiv->m - m0 : mb; rt_starpu_insert_task( &cl_ipiv_init, @@ -61,6 +67,62 @@ void INSERT_TASK_ipiv_init( const RUNTIME_option_t *options, } } +static void cl_ipiv_init_data_cpu_func( void *descr[], void *cl_arg ) +{ +#if !defined(CHAMELEON_SIMULATION) + struct cl_laswp_args_s *clargs = (struct cl_laswp_args_s *) cl_arg; + + int *ipiv = (int *)STARPU_VECTOR_GET_PTR( descr[0] ); + int n = clargs->n; + int i; + + for( i = 0; i < n; i++ ) { + ipiv[i] = clargs->data[i]; + } +#endif +} + +struct starpu_codelet cl_ipiv_init_data = { + .where = STARPU_CPU, + .cpu_func = cl_ipiv_init_data_cpu_func, + .nbuffers = 1, +}; + +void INSERT_TASK_ipiv_init_data( const RUNTIME_option_t *options, + CHAM_ipiv_t *ipiv ) +{ + + int64_t mt = ipiv->mt; + int64_t mb = ipiv->mb; + int m; + + if ( ipiv->data == NULL ) { + return; + } + + for ( m = 0; m < mt; m++ ) { + starpu_data_handle_t ipiv_src = RUNTIME_ipiv_getaddr( ipiv, m ); + struct cl_laswp_args_s *cl_args; + int m0, n; + + m0 = m * mb; + n = ( m == ( mt-1 ) ) ? ipiv->m - m0 : mb; + + cl_args = malloc( sizeof(struct cl_laswp_args_s) ); + cl_args->m0 = m0; + cl_args->n = n; + cl_args->m = ipiv->desc->m; + + cl_args->data = ipiv->data + m0; + + rt_starpu_insert_task( + &cl_ipiv_init_data, + STARPU_CL_ARGS, cl_args, sizeof(struct cl_laswp_args_s), + STARPU_W, ipiv_src, + 0); + } +} + void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, CHAM_ipiv_t *ipiv, int k, int h, int rank ) { @@ -68,7 +130,7 @@ void INSERT_TASK_ipiv_reducek( const RUNTIME_option_t *options, #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI) #if !defined(HAVE_STARPU_MPI_REDUX_WRAPUP) - starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h ); + starpu_data_handle_t nextpiv = RUNTIME_pivot_getaddr( ipiv, rank, k, h ); if ( h < ipiv->n ) { starpu_mpi_redux_data_prio_tree( options->sequence->comm, nextpiv, options->priority, 2 /* Binary tree */ ); @@ -135,3 +197,4 @@ void INSERT_TASK_ipiv_to_perm( const RUNTIME_option_t *options, STARPU_EXECUTE_ON_WORKER, options->workerid, 0 ); } +