Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 16b7c4ef authored by PRUVOST Florent's avatar PRUVOST Florent Committed by Mathieu Faverge
Browse files

Add an example + utest for using MPI subcommunicators

parent bd7d2bf0
No related branches found
No related tags found
1 merge request!446Example comm split
...@@ -27,12 +27,16 @@ if (CHAMELEON_SIMULATION) ...@@ -27,12 +27,16 @@ if (CHAMELEON_SIMULATION)
endif() endif()
if (CHAMELEON_PREC_D) if (CHAMELEON_PREC_D)
add_subdirectory(lapack_to_chameleon) add_subdirectory(lapack_to_chameleon)
else() else()
message(WARNING "CHAMELEON_PREC_D is set to OFF so that lapack_to_chameleon " message(WARNING "CHAMELEON_PREC_D is set to OFF so that lapack_to_chameleon "
"and out_core tutorials cannot be built (use only double arithmetic " "and out_core tutorials cannot be built (use only double arithmetic "
"precision).\n Please set CHAMELEON_PREC_D to ON if you want to build " "precision).\n Please set CHAMELEON_PREC_D to ON if you want to build "
"executables of this tutorial.") "executables of this tutorial.")
endif()
if(CHAMELEON_USE_MPI AND MPI_C_FOUND AND CHAMELEON_SCHED_STARPU)
add_subdirectory(mpi)
endif() endif()
### ###
......
###
#
# @file CMakeLists.txt
#
# @copyright 2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
# Univ. Bordeaux. All rights reserved.
#
###
#
# CHAMELEON example routines
# CHAMELEON is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI,
# University of Bordeaux, Bordeaux INP
#
# @version 1.3.0
# @author Florent Pruvost
# @author Mathieu Faverge
# @date 2024-03-19
#
###
set(MPICMD mpiexec --bind-to none -n 4)
set(SOURCES
comm_split.c
)
foreach(_src ${SOURCES})
get_filename_component(_name_exe ${_src} NAME_WE)
add_executable(${_name_exe} ${_src})
target_link_libraries(${_name_exe} PRIVATE chameleon coreblas MORSE::LAPACKE)
install(TARGETS ${_name_exe} DESTINATION bin/chameleon/mpi)
add_test(example_mpi_${_name_exe} ${MPICMD} ./${_name_exe})
endforeach()
###
### END CMakeLists.txt
###
/**
*
* @file comm_split.c
*
* @copyright 2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon comm_split example
*
* @version 1.3.0
* @author Florent Pruvost
* @author Mathieu Faverge
* @date 2024-03-19
*
*/
#include "comm_split.h"
/*
* @brief comm_split introduces how to use CHAMELEON with MPI
* subcommunicators.
* @details This example shows that Chameleon can be used with custom MPI
* communicators (different from MPI_COMM_WORLD). Here two different algorithms
* (potrf and getrf_nopiv) are called at the same time on two different
* communicators A (0, 2) and B (1, 3). To use this program properly CHAMELEON
* must use StarPU Runtime system and MPI option must be activated at
* configure. This program is meant to be run with 4 MPI processes and the data
* distribution on matrices is 2D block cyclic P=2, Q=1.
*/
int main(int argc, char *argv[]) {
/* Check that MPI has threads support */
int thread_support;
if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS)
{
fprintf(stderr,"MPI_Init_thread failed\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
if (thread_support == MPI_THREAD_FUNNELED)
fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
if (thread_support < MPI_THREAD_FUNNELED)
fprintf(stderr,"Warning: MPI does not have thread support!\n");
/* Check that 4 MPI processes are used */
int comm_size;
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
if(comm_size != 4)
{
printf("This application is meant to be run with 4 MPI processes, not %d.\n", comm_size);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
/* Get my rank in the global communicator */
int my_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Determine the colour and key based on whether my rank is even. */
char subcommunicator;
int colour;
int key = my_rank/2;
if(my_rank % 2 == 0)
{
subcommunicator = 'A';
colour = 0;
}
else
{
subcommunicator = 'B';
colour = 1;
}
/* Split the global communicator */
MPI_Comm new_comm;
MPI_Comm_split(MPI_COMM_WORLD, colour, key, &new_comm);
/* Get my rank in the new communicator */
int my_new_comm_rank;
MPI_Comm_rank(new_comm, &my_new_comm_rank);
/* Print my new rank and new communicator */
printf("[MPI process %d] MPI process %d in subcommunicator %c.\n", my_rank, my_new_comm_rank, subcommunicator);
/* initialize some parameters with default values */
int iparam[IPARAM_SIZEOF];
memset(iparam, 0, IPARAM_SIZEOF*sizeof(int));
init_iparam(iparam);
/* read arguments */
read_args(argc, argv, iparam);
int N = iparam[IPARAM_N]; // matrix order
int NB = iparam[IPARAM_NB]; // number of rows and columns in tiles
int NRHS = iparam[IPARAM_NRHS]; // number of RHS vectors
int NCPU = iparam[IPARAM_NCPU]; // number of CPU cores to use
int P = 2;
int Q = 1;
/* Initialize CHAMELEON with custom communicator */
CHAMELEON_InitParComm( NCPU, 0, 1, new_comm );
CHAMELEON_Set(CHAMELEON_TILE_SIZE, iparam[IPARAM_NB] );
/* declarations to time the program and evaluate performances */
double flops, gflops, cpu_time;
if(my_rank % 2 == 0)
{
/* Cholesky on subcommunicator A i.e. 0,2 */
/* Initialize matrices */
CHAM_desc_t *descA, *descAC, *descB, *descX;
CHAMELEON_Desc_Create( &descA, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, P, Q );
CHAMELEON_Desc_Create( &descAC, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, P, Q );
CHAMELEON_Desc_Create( &descB, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, P, Q );
CHAMELEON_Desc_Create( &descX, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, P, Q );
CHAMELEON_dplgsy_Tile( (double)N, ChamUpperLower, descA, 20 );
CHAMELEON_dplrnt_Tile( descB, 21 );
CHAMELEON_dlacpy_Tile( ChamUpperLower, descA, descAC);
CHAMELEON_dlacpy_Tile( ChamUpperLower, descB, descX);
cpu_time = -CHAMELEON_timer();
/* Solve AX = B by Cholesky factorization */
CHAMELEON_dposv_Tile( ChamLower, descA, descX );
cpu_time += CHAMELEON_timer();
flops = flops_dpotrf( N ) + flops_dpotrs( N, NRHS );
gflops = flops * 1.e-9 / cpu_time;
if ( CHAMELEON_Comm_rank() == 0 ) {
printf( "\nCholesky performances on subcommunicator %c:\n", subcommunicator);
print_header( argv[0], iparam);
printf( "%9.3f %9.2f\n", cpu_time, gflops);
}
fflush( stdout );
/* compute norms to check the result */
check( descAC, descX, descB );
CHAMELEON_Desc_Destroy( &descX );
CHAMELEON_Desc_Destroy( &descAC );
CHAMELEON_Desc_Destroy( &descB );
CHAMELEON_Desc_Destroy( &descA );
}
else
{
/* LU nopiv on subcommunicator B i.e. 1,3 */
/* Initialize matrices */
CHAM_desc_t *descA, *descAC, *descB, *descX;
CHAMELEON_Desc_Create( &descA, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, P, Q );
CHAMELEON_Desc_Create( &descAC, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, P, Q );
CHAMELEON_Desc_Create( &descB, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, P, Q );
CHAMELEON_Desc_Create( &descX, CHAMELEON_MAT_ALLOC_TILE, ChamRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, P, Q );
CHAMELEON_dplgtr_Tile( 0 , ChamUpper, descA, 10 );
CHAMELEON_dplgtr_Tile( (double)N, ChamLower, descA, 11 );
CHAMELEON_dplrnt_Tile( descB, 12 );
CHAMELEON_dlacpy_Tile( ChamUpperLower, descA, descAC);
CHAMELEON_dlacpy_Tile( ChamUpperLower, descB, descX);
cpu_time = -CHAMELEON_timer();
/* Solve AX = B by LU factorization */
CHAMELEON_dgesv_nopiv_Tile( descAC, descX );
cpu_time += CHAMELEON_timer();
flops = flops_dgetrf( N, N ) + flops_dgetrs( N, NRHS );
gflops = flops * 1.e-9 / cpu_time;
if ( CHAMELEON_Comm_rank() == 0 ) {
printf( "\nLU nopiv performances on subcommunicator %c:\n", subcommunicator);
print_header( argv[0], iparam);
printf( "%9.3f %9.2f\n", cpu_time, gflops);
}
fflush( stdout );
/* compute norms to check the result */
check( descA, descX, descB );
CHAMELEON_Desc_Destroy( &descX );
CHAMELEON_Desc_Destroy( &descAC );
CHAMELEON_Desc_Destroy( &descB );
CHAMELEON_Desc_Destroy( &descA );
}
CHAMELEON_Finalize();
MPI_Finalize();
return EXIT_SUCCESS;
}
/**
*
* @file comm_split.h
*
* @copyright 2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon comm_split example header
*
* @version 1.3.0
* @author Florent Pruvost
* @author Mathieu Faverge
* @date 2024-03-19
*
*/
#ifndef _comm_split_h_
#define _comm_split_h_
#include <coreblas/lapacke.h>
#include <chameleon.h>
#include <chameleon/timer.h>
#include <chameleon/flops.h>
#include <mpi.h>
#include <string.h>
/* Integer parameters for comm_split */
enum iparam_comm_split {
IPARAM_NCPU, /* Number of CPUs */
IPARAM_N, /* Number of columns/rows of the matrix */
IPARAM_NB, /* Number of columns/rows in a tile */
IPARAM_NRHS, /* Number of RHS */
/* End */
IPARAM_SIZEOF
};
/* Specific routines used in comm_split.c main program */
/**
* Initialize integer parameters
*/
static void init_iparam(int iparam[IPARAM_SIZEOF]){
iparam[IPARAM_NCPU ] = -1;
iparam[IPARAM_N ] = 1000;
iparam[IPARAM_NB ] = 500;
iparam[IPARAM_NRHS ] = 1;
}
/**
* Print how to use the program
*/
static void show_help(char *prog_name) {
printf( "Usage:\n%s [options]\n\n", prog_name );
printf( "Options are:\n"
" --help Show this help\n"
"\n"
" --n=X dimension (N). (default: 1000)\n"
" --nb=X NB size. (default: 500)\n"
" --nrhs=X number of RHS. (default: 1)\n"
"\n"
" --cpus=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n"
"\n");
}
static int startswith(const char *s, const char *prefix) {
size_t n = strlen( prefix );
if (strncmp( s, prefix, n ))
return 0;
return 1;
}
/**
* Read arguments following comm_split program call
*/
static void read_args(int argc, char *argv[], int *iparam){
int i;
for (i = 1; i < argc && argv[i]; ++i) {
if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") ||
startswith( argv[i], "--h") || startswith( argv[i], "-h") ) {
show_help( argv[0] );
exit(0);
} else if (startswith( argv[i], "--n=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) );
} else if (startswith( argv[i], "--nb=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) );
} else if (startswith( argv[i], "--nrhs=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NRHS]) );
} else if (startswith( argv[i], "--cpus=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NCPU]) );
} else {
fprintf( stderr, "Unknown option: %s\n", argv[i] );
}
}
}
/**
* Print a header message to summarize main parameters
*/
static void print_header(char *prog_name, int * iparam) {
double eps = LAPACKE_dlamch_work( 'e' );
printf( "#\n"
"# CHAMELEON %d.%d.%d, %s\n"
"# Nb cpu: %d\n"
"# N: %d\n"
"# NB: %d\n"
"# eps: %e\n"
"#\n",
CHAMELEON_VERSION_MAJOR,
CHAMELEON_VERSION_MINOR,
CHAMELEON_VERSION_MICRO,
prog_name,
iparam[IPARAM_NCPU],
iparam[IPARAM_N],
iparam[IPARAM_NB],
eps );
printf( "# M N K/NRHS seconds Gflop/s\n");
printf( "#%7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS]);
fflush( stdout );
return;
}
/**
* Check that AX=B is correct
*/
static void check(CHAM_desc_t *A, CHAM_desc_t *X, CHAM_desc_t *B) {
double anorm = CHAMELEON_dlange_Tile( ChamInfNorm, A);
double xnorm = CHAMELEON_dlange_Tile( ChamInfNorm, X);
double bnorm = CHAMELEON_dlange_Tile( ChamInfNorm, B);
CHAMELEON_dgemm_Tile( ChamNoTrans, ChamNoTrans,
1.0, A, X, -1.0, B );
double res = CHAMELEON_dlange_Tile( ChamInfNorm, B );
double eps = LAPACKE_dlamch_work( 'e' );
int N = X->lm;
int hres = ( res / N / eps / (anorm * xnorm + bnorm ) > 100.0 );
if ( CHAMELEON_Comm_rank() == 0 ){
printf( " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN\n");
if (hres) {
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE \n",
res, anorm, xnorm, bnorm,
res / N / eps / (anorm * xnorm + bnorm ));
}
else {
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS \n",
res, anorm, xnorm, bnorm,
res / N / eps / (anorm * xnorm + bnorm ));
}
}
return;
}
#endif /* _comm_split_h_ */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment