Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b003a628 authored by LISITO Alycia's avatar LISITO Alycia Committed by Mathieu Faverge
Browse files

zgetrf: add new reduction in codelet_zipiv_allreduce.c

parent 510f4e73
No related branches found
No related tags found
1 merge request!478Reduction
...@@ -562,4 +562,12 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options, ...@@ -562,4 +562,12 @@ void INSERT_TASK_zgetrf_blocked_trsm( const RUNTIME_option_t *options,
CHAM_desc_t *U, int Um, int Un, CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ws ); CHAM_ipiv_t *ws );
void INSERT_TASK_zipiv_allreduce( CHAM_desc_t *A,
const RUNTIME_option_t *options,
CHAM_ipiv_t *ipiv,
int *proc_involved,
int k,
int h,
int n );
#endif /* _chameleon_tasks_z_h_ */ #endif /* _chameleon_tasks_z_h_ */
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
# @author Florent Pruvost # @author Florent Pruvost
# @author Philippe Virouleau # @author Philippe Virouleau
# @author Matthieu Kuhn # @author Matthieu Kuhn
# @author Alycia Lisito
# @date 2024-03-16 # @date 2024-03-16
# #
### ###
...@@ -73,6 +74,7 @@ set(CODELETS_ZSRC ...@@ -73,6 +74,7 @@ set(CODELETS_ZSRC
codelets/codelet_zhe2ge.c codelets/codelet_zhe2ge.c
codelets/codelet_zherfb.c codelets/codelet_zherfb.c
codelets/codelet_zhessq.c codelets/codelet_zhessq.c
codelets/codelet_zipiv_allreduce.c
codelets/codelet_zlacpy.c codelets/codelet_zlacpy.c
codelets/codelet_zlange.c codelets/codelet_zlange.c
codelets/codelet_zlanhe.c codelets/codelet_zlanhe.c
......
/**
*
* @file starpu/codelet_zipiv_allreduce.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon StarPU codelets to do the reduction
*
* @version 1.3.0
* @author Alycia Lisito
* @date 2024-06-11
* @precisions normal z -> c d s
*
*/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
#include <coreblas/cblas_wrapper.h>
#if defined ( CHAMELEON_USE_MPI )
struct cl_redux_args_t {
int h;
int n;
int k;
};
static void cl_zipiv_allreduce_cpu_func( void *descr[], void *cl_arg )
{
struct cl_redux_args_t *clargs = (struct cl_redux_args_t *) cl_arg;
cppi_interface_t *cppi_me = ((cppi_interface_t *) descr[0]);
cppi_interface_t *cppi_src = ((cppi_interface_t *) descr[1]);
CHAM_pivot_t *nextpiv_me = &(cppi_me->pivot);
CHAM_pivot_t *nextpiv_src = &(cppi_src->pivot);
CHAMELEON_Complex64_t *pivrow_me = (CHAMELEON_Complex64_t *)(nextpiv_me->pivrow);
CHAMELEON_Complex64_t *pivrow_src = (CHAMELEON_Complex64_t *)(nextpiv_src->pivrow);
cppi_display_dbg( cppi_me, stderr, "Global redux Inout: ");
cppi_display_dbg( cppi_src, stderr, "Global redux Input: ");
assert( cppi_me->n == cppi_src->n );
assert( cppi_me->h == cppi_src->h );
assert( cppi_me->flttype == cppi_src->flttype );
assert( cppi_me->arraysize == cppi_src->arraysize );
if ( cabs( pivrow_src[ clargs->h ] ) > cabs( pivrow_me[ clargs->h ] ) ) {
nextpiv_me->blkm0 = nextpiv_src->blkm0;
nextpiv_me->blkidx = nextpiv_src->blkidx;
cblas_zcopy( clargs->n, pivrow_src, 1, pivrow_me, 1 );
}
/* Let's copy the diagonal row if needed */
if ( ( cppi_src->has_diag == 1 ) &&
( cppi_me->has_diag == -1 ) )
{
cblas_zcopy( clargs->n, nextpiv_src->diagrow, 1, nextpiv_me->diagrow, 1 );
assert( cppi_src->arraysize == clargs->n * sizeof(CHAMELEON_Complex64_t) );
cppi_me->has_diag = 1;
}
cppi_display_dbg( cppi_me, stderr, "Global redux Inout(After): ");
}
CODELETS_CPU( zipiv_allreduce, cl_zipiv_allreduce_cpu_func )
void
INSERT_TASK_zipiv_allreduce_send( CHAM_ipiv_t *ipiv,
int me,
int dst,
int k,
int h,
const RUNTIME_option_t *options )
{
rt_starpu_insert_task(
NULL,
STARPU_EXECUTE_ON_NODE, dst,
STARPU_R, RUNTIME_pivot_getaddr( ipiv, me, k, h ),
STARPU_PRIORITY, options->priority,
0 );
}
void
INSERT_TASK_zipiv_allreduce_recv( CHAM_ipiv_t *ipiv,
int me,
int src,
int k,
int h,
int n,
const RUNTIME_option_t *options )
{
struct cl_redux_args_t *clargs;
clargs = malloc( sizeof( struct cl_redux_args_t ) );
clargs->h = h;
clargs->n = n;
clargs->k = k;
rt_starpu_insert_task(
&cl_zipiv_allreduce,
STARPU_CL_ARGS, clargs, sizeof(struct cl_redux_args_t),
STARPU_RW, RUNTIME_pivot_getaddr( ipiv, me, k, h ),
STARPU_R, RUNTIME_pivot_getaddr( ipiv, src, k, h ),
STARPU_EXECUTE_ON_NODE, me,
STARPU_EXECUTE_ON_WORKER, options->workerid,
STARPU_PRIORITY, options->priority,
0 );
starpu_mpi_cache_flush( options->sequence->comm, RUNTIME_pivot_getaddr( ipiv, src, k, h ) );
}
void INSERT_TASK_zipiv_allreduce( CHAM_desc_t *A,
const RUNTIME_option_t *options,
CHAM_ipiv_t *ipiv,
int *proc_involved,
int k,
int h,
int n )
{
int np_involved = chameleon_min( A->p, A->mt - k);
int np_iter = np_involved;
int p_recv, p_send, me;
int shift = 1;
if ( h > 0 ) {
starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( ipiv, A->myrank, k, h-1 ) );
}
if ( h >= ipiv->n ) {
return;
}
if ( np_involved == 1 ) {
assert( proc_involved[0] == A->myrank );
}
else {
for( me = 0; me < np_involved; me++ ) {
if ( proc_involved[me] == A->myrank ) {
break;
}
}
assert( me < np_involved );
while ( np_iter > 1 ) {
p_send = proc_involved[ ( me + shift ) % np_involved ];
p_recv = proc_involved[ ( me - shift + np_involved ) % np_involved ];
INSERT_TASK_zipiv_allreduce_send( ipiv, A->myrank, p_send, k, h, options );
INSERT_TASK_zipiv_allreduce_recv( ipiv, A->myrank, p_recv, k, h, n, options );
shift = shift << 1;
np_iter = chameleon_ceil( np_iter, 2 );
}
}
}
#else
void INSERT_TASK_zipiv_allreduce( CHAM_desc_t *A,
const RUNTIME_option_t *options,
CHAM_ipiv_t *ipiv,
int *proc_involved,
int k,
int h,
int n )
{
if ( h > 0 ) {
starpu_data_invalidate_submit( RUNTIME_pivot_getaddr( ipiv, A->myrank, k, h-1 ) );
}
(void)options;
(void)proc_involved;
(void)n;
}
#endif
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
* @version 1.3.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Matthieu Kuhn * @author Matthieu Kuhn
* @author Alycia Lisito
* @date 2023-08-22 * @date 2023-08-22
* *
*/ */
...@@ -44,6 +45,7 @@ struct cppi_interface_s ...@@ -44,6 +45,7 @@ struct cppi_interface_s
void cppi_interface_init(); void cppi_interface_init();
void cppi_interface_fini(); void cppi_interface_fini();
CHAM_pivot_t *cppi_handle_get( starpu_data_handle_t handle );
void cppi_register( starpu_data_handle_t *handleptr, void cppi_register( starpu_data_handle_t *handleptr,
cham_flttype_t flttype, cham_flttype_t flttype,
int n, int n,
...@@ -61,13 +63,14 @@ cppi_display_dbg( cppi_interface_t *cppi_interface, FILE *f, const char *title ) ...@@ -61,13 +63,14 @@ cppi_display_dbg( cppi_interface_t *cppi_interface, FILE *f, const char *title )
diagrow = cppi_interface->pivot.diagrow; diagrow = cppi_interface->pivot.diagrow;
pivrow = cppi_interface->pivot.pivrow; pivrow = cppi_interface->pivot.pivrow;
fprintf( f, "%sn=%2d, h=%2d, has_diag=%2d, m0=%2d, idx=%2d\n", fprintf( f, "%sn=%2d, h=%2d, has_diag=%2d, m0=%2d, idx=%2d, interf = %p\n",
title, title,
cppi_interface->n, cppi_interface->n,
cppi_interface->h, cppi_interface->h,
cppi_interface->has_diag, cppi_interface->has_diag,
cppi_interface->pivot.blkm0, cppi_interface->pivot.blkm0,
cppi_interface->pivot.blkidx ); cppi_interface->pivot.blkidx,
cppi_interface );
fprintf(stderr, "Diagonal row: " ); fprintf(stderr, "Diagonal row: " );
for( i=0; i<cppi_interface->n; i++) { for( i=0; i<cppi_interface->n; i++) {
......
...@@ -12,13 +12,14 @@ ...@@ -12,13 +12,14 @@
* @version 1.3.0 * @version 1.3.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Matthieu Kuhn * @author Matthieu Kuhn
* @author Alycia Lisito
* @date 2023-08-22 * @date 2023-08-22
* *
*/ */
#include "chameleon_starpu.h" #include "chameleon_starpu.h"
#undef HAVE_STARPU_REUSE_DATA_ON_NODE #undef HAVE_STARPU_REUSE_DATA_ON_NODE
static inline CHAM_pivot_t * CHAM_pivot_t *
cppi_handle_get( starpu_data_handle_t handle ) cppi_handle_get( starpu_data_handle_t handle )
{ {
cppi_interface_t *cppi_interface = (cppi_interface_t *) cppi_interface_t *cppi_interface = (cppi_interface_t *)
...@@ -38,7 +39,7 @@ cppi_init( void *data_interface ) ...@@ -38,7 +39,7 @@ cppi_init( void *data_interface )
cppi_interface_t *cppi_interface = (cppi_interface_t *)data_interface; cppi_interface_t *cppi_interface = (cppi_interface_t *)data_interface;
cppi_interface->id = CPPI_INTERFACE_ID; cppi_interface->id = CPPI_INTERFACE_ID;
cppi_interface->h = -1; cppi_interface->h = -1;
cppi_interface->has_diag = 0; cppi_interface->has_diag = -1;
} }
static void static void
...@@ -83,7 +84,7 @@ cppi_allocate_data_on_node( void *data_interface, unsigned node ) ...@@ -83,7 +84,7 @@ cppi_allocate_data_on_node( void *data_interface, unsigned node )
/* update the data properly in consequence */ /* update the data properly in consequence */
cppi_interface->h = -1; cppi_interface->h = -1;
cppi_interface->has_diag = 0; cppi_interface->has_diag = -1;
cppi_interface->pivot.pivrow = dataptr; cppi_interface->pivot.pivrow = dataptr;
cppi_interface->pivot.diagrow = ((char*)dataptr) + cppi_interface->arraysize; cppi_interface->pivot.diagrow = ((char*)dataptr) + cppi_interface->arraysize;
...@@ -279,8 +280,10 @@ cppi_describe( void *data_interface, char *buf, size_t size ) ...@@ -279,8 +280,10 @@ cppi_describe( void *data_interface, char *buf, size_t size )
{ {
cppi_interface_t *cppi_interface = (cppi_interface_t *) data_interface; cppi_interface_t *cppi_interface = (cppi_interface_t *) data_interface;
return snprintf( buf, size, "Pivot structure, n %d, blkm0 %d, blkidx %d", return snprintf( buf, size, "Pivot structure, n %d, h %d, has_diag = %d, blkm0 %d, blkidx %d",
cppi_interface->n, cppi_interface->n,
cppi_interface->h,
cppi_interface->has_diag,
cppi_interface->pivot.blkm0, cppi_interface->pivot.blkm0,
cppi_interface->pivot.blkidx ); cppi_interface->pivot.blkidx );
} }
...@@ -298,6 +301,7 @@ cppi_copy_any_to_any( void *src_interface, unsigned src_node, ...@@ -298,6 +301,7 @@ cppi_copy_any_to_any( void *src_interface, unsigned src_node,
STARPU_ASSERT( cppi_interface_src->flttype == cppi_interface_dst->flttype ); STARPU_ASSERT( cppi_interface_src->flttype == cppi_interface_dst->flttype );
cppi_interface_dst->h = cppi_interface_src->h; cppi_interface_dst->h = cppi_interface_src->h;
cppi_interface_dst->has_diag = cppi_interface_src->has_diag;
cppi_interface_dst->pivot.blkm0 = cppi_interface_src->pivot.blkm0; cppi_interface_dst->pivot.blkm0 = cppi_interface_src->pivot.blkm0;
cppi_interface_dst->pivot.blkidx = cppi_interface_src->pivot.blkidx; cppi_interface_dst->pivot.blkidx = cppi_interface_src->pivot.blkidx;
...@@ -402,8 +406,8 @@ cl_cppi_redux_cpu_func(void *descr[], void *cl_arg) ...@@ -402,8 +406,8 @@ cl_cppi_redux_cpu_func(void *descr[], void *cl_arg)
assert( cppi_redux->h == cppi_input->h ); assert( cppi_redux->h == cppi_input->h );
/* Let's copy the diagonal row if needed */ /* Let's copy the diagonal row if needed */
if ( cppi_input->has_diag ) { if ( cppi_input->has_diag == 1 ) {
assert( cppi_redux->has_diag == 0 ); assert( cppi_redux->has_diag == -1 );
memcpy( cppi_redux->pivot.diagrow, memcpy( cppi_redux->pivot.diagrow,
cppi_input->pivot.diagrow, cppi_input->pivot.diagrow,
...@@ -449,7 +453,7 @@ cl_cppi_init_redux_cpu_func( void *descr[], void *cl_arg ) ...@@ -449,7 +453,7 @@ cl_cppi_init_redux_cpu_func( void *descr[], void *cl_arg )
cppi_interface_t *cppi_redux = ((cppi_interface_t *) descr[0]); cppi_interface_t *cppi_redux = ((cppi_interface_t *) descr[0]);
/* Redux pivot never has diagonal at initialization */ /* Redux pivot never has diagonal at initialization */
cppi_redux->has_diag = 0; cppi_redux->has_diag = -1;
cppi_redux->h = -1; cppi_redux->h = -1;
size_t size = cppi_redux->arraysize; size_t size = cppi_redux->arraysize;
...@@ -497,7 +501,7 @@ cppi_register( starpu_data_handle_t *handleptr, ...@@ -497,7 +501,7 @@ cppi_register( starpu_data_handle_t *handleptr,
.id = CPPI_INTERFACE_ID, .id = CPPI_INTERFACE_ID,
.arraysize = n * CHAMELEON_Element_Size( flttype ), .arraysize = n * CHAMELEON_Element_Size( flttype ),
.flttype = flttype, .flttype = flttype,
.has_diag = 0, .has_diag = -1,
.h = -1, .h = -1,
.n = n, .n = n,
}; };
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment