diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a0d7de78590ef8ca16602b60bac1e0b30cf94f9..3f897fcdd6718df2df7fb1caef63200e90c20644 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,7 +196,7 @@ set(SOURCES src/z_spm_dof_extend.c src/z_spm_norm.c src/z_spm_scal.c - + src/z_spm_reduce_rhs.c src/z_spm_convert_to_csc.c src/z_spm_convert_to_csr.c src/z_spm_convert_to_ijv.c diff --git a/include/spm.h b/include/spm.h index 4cdf1caddbb99bf5fa6060d566ddff7997c06dde..0f46e18fe5ad5b21c348a7b901521eacc578eb53 100644 --- a/include/spm.h +++ b/include/spm.h @@ -11,7 +11,8 @@ * @author Xavier Lacoste * @author Pierre Ramet * @author Mathieu Faverge - * @date 2013-06-24 + * @author Tony Delarue + * @date 2020-06-09 * * @addtogroup spm * @{ @@ -206,6 +207,7 @@ int spmParseLaplacianInfo( const char * filename, */ void * spm2Dense( const spmatrix_t *spm ); void spmPrint( const spmatrix_t *spm, FILE *f ); +void spmPrintRHS( const spmatrix_t *spm, int n, const void *x, spm_int_t ldx, FILE *stream ); void spmPrintInfo( const spmatrix_t *spm, FILE *f ); void spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ); spmatrix_t *spmDofExtend( const spmatrix_t *spm, const int type, const int dof ); diff --git a/src/spm.c b/src/spm.c index 7db59d5915745d4a8c42c0b81aaa3ca582710cf2..b492e714e77512bd4855c37cc02ef549288872b8 100644 --- a/src/spm.c +++ b/src/spm.c @@ -993,6 +993,61 @@ spmPrint( const spmatrix_t *spm, } } +/** + ******************************************************************************* + * + * @brief Print a set of vector associated to an spm matrix. + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix. + * + * @param[in] n + * The number of columns of x. + * + * @param[in] x + * The set of vectors associated to the spm of size n-by-ldx. + * + * @param[in] ldx + * The local leading dimension of the set of vectors (ldx >= spm->n). + * + * @param[in] stream + * File to print the spm matrix. stdout, if stream == NULL. + * + *******************************************************************************/ +void +spmPrintRHS( const spmatrix_t *spm, + int n, + const void *x, + spm_int_t ldx, + FILE *stream ) +{ + if (stream == NULL) { + stream = stdout; + } + + switch(spm->flttype) + { + case SpmPattern: + /* Not handled for now */ + break; + case SpmFloat: + s_spmPrintRHS( stream, spm, n, x, ldx ); + break; + case SpmComplex32: + c_spmPrintRHS( stream, spm, n, x, ldx ); + break; + case SpmComplex64: + z_spmPrintRHS( stream, spm, n, x, ldx ); + break; + case SpmDouble: + d_spmPrintRHS( stream, spm, n, x, ldx ); + } + + return; +} + /** ******************************************************************************* * diff --git a/src/spm_gather.c b/src/spm_gather.c index a918ca69c5ec05239aa123252ac7277e75dbd683..82a1714346cefe020dc64d5c6622c7a8b9f89f69 100644 --- a/src/spm_gather.c +++ b/src/spm_gather.c @@ -24,8 +24,7 @@ * @param[in] spm * The spm to gather. * - * @return The array if triplets { n, nnz, nnzexp } for all nodes on root - * node(s), NULL otherwise. + * @return The array of triplets { n, nnz, nnzexp } for all nodes. */ static inline int * spm_gather_init( const spmatrix_t *spm ) @@ -48,8 +47,8 @@ spm_gather_init( const spmatrix_t *spm ) * @param[in] spm * The spm to gather. * - * @param[in] spm - * The spm to gather. + * @param[in] allcounts + * The array of triplets { n, nnz, nnzexp } for all nodes. */ static inline int spm_gather_check( const spmatrix_t *spm, diff --git a/src/z_spm.h b/src/z_spm.h index f7eb42fe30ec1267c144b4c596fd5f00ce00f58f..4fbe1b06122721419c0bff646b3df1ccce6f075b 100644 --- a/src/z_spm.h +++ b/src/z_spm.h @@ -73,14 +73,16 @@ void z_spmSort( spmatrix_t *spm ); spm_int_t z_spmMergeDuplicate( spmatrix_t *spm ); spm_int_t z_spmSymmetrize( spmatrix_t *spm ); -int z_spmGenRHS(spm_rhstype_t type, int nrhs, const spmatrix_t *spm, void *x, int ldx, void *b, int ldb ); -int z_spmCheckAxb( spm_fixdbl_t eps, int nrhs, const spmatrix_t *spm, void *x0, int ldx0, void *b, int ldb, const void *x, int ldx ); +int z_spmGenRHS(spm_rhstype_t type, int nrhs, const spmatrix_t *spm, void *x, int ldx, void *b, int ldb ); +int z_spmCheckAxb( spm_fixdbl_t eps, int nrhs, const spmatrix_t *spm, void *x0, int ldx0, void *b, int ldb, const void *x, int ldx ); +spm_complex64_t *z_spmReduceRHS( const spmatrix_t *spm, int nrhs, const spm_complex64_t *x, spm_int_t ldx, int root ); /** * Output routines */ void z_spmDensePrint( FILE *f, spm_int_t m, spm_int_t n, const spm_complex64_t *A, spm_int_t lda ); void z_spmPrint( FILE *f, const spmatrix_t *spm ); +void z_spmPrintRHS( FILE *f, const spmatrix_t *spm, int n, const void *x, spm_int_t ldx ); void z_spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ); void z_spmDofExtend( spmatrix_t *spm ); diff --git a/src/z_spm_2dense.c b/src/z_spm_2dense.c index 4f7715a95215ce1e71188eb63b81634b0f380e23..0910feb8d76e5949a27ee00ed49ec9000ee1c587 100644 --- a/src/z_spm_2dense.c +++ b/src/z_spm_2dense.c @@ -465,7 +465,7 @@ z_spmIJV2dense( const spmatrix_t *spm ) spm_complex64_t * z_spm2dense( const spmatrix_t *spm ) { - spm_complex64_t *A; + spm_complex64_t *A = NULL; if ( spm->loc2glob != NULL ) { fprintf( stderr, "spm2dense: Conversion to dense matrix with distributed spm is not available\n"); diff --git a/src/z_spm_genrhs.c b/src/z_spm_genrhs.c index be9fd922bdb8da14bd6bfc6355003cd542e46808..f466b8bef72ff76945146d70a3d89cb370954f68 100644 --- a/src/z_spm_genrhs.c +++ b/src/z_spm_genrhs.c @@ -79,63 +79,294 @@ Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { * * @ingroup spm_dev_rhs * - * @brief Generate a vector of random values. + * @brief Generate a value of the RHS * ******************************************************************************* * * @param[in] scale * Scaling factor for each randomized values. * - * @param[in] m - * The number of rows of the tile A. m >= 0. + * @param[inout] val + * The value that will be updated + * + * @param[inout] ran + * The current random value. + * + ******************************************************************************/ +static inline void +z_updateRndVal( double scale, + spm_complex64_t *val, + unsigned long long int *ran ) +{ + *val = (0.5f - (*ran) * RndF_Mul) * scale; + *ran = Rnd64_A * (*ran) + Rnd64_C; +#if defined(PRECISION_z) || defined(PRECISION_c) + *val += (I*(0.5f - (*ran) * RndF_Mul)) * scale; + *ran = Rnd64_A * (*ran) + Rnd64_C; +#endif +} + +/** + ******************************************************************************* + * + * @ingroup spm_dev_rhs + * + * @brief Generate a set of vectors of constant values. + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix associated to the right hand side. + * + * @param[in] baseval + * The basevalue of the spm. + * + * @param[in] alpha + * Initialize each value to alpha [ + I * alpha ] * * @param[in] n - * The number of columns of the tile A. n >= 0. + * The number of columns in A. n >= 0. * * @param[inout] A - * On entry, the m-by-n tile to be initialized. - * On exit, the tile initialized in the mtxtype format. + * On entry, the lda-by-n matrix to be initialized. + * On exit, the matrix initialized. * * @param[in] lda - * The leading dimension of the tile A. lda >= max(1,m). + * The leading dimension of the matrix A. lda >= max(1,spm->nexp). * - * @param[in] gM - * The global number of rows of the full matrix, A is belonging to. gM >= (m0+M). + ******************************************************************************/ +static inline void +z_spmRhsGenOne( const spmatrix_t *spm, spm_int_t baseval, + spm_complex64_t alpha, + spm_int_t n, spm_complex64_t *A, spm_int_t lda ) +{ + spm_complex64_t *tmp = A; + int64_t i, j, m = spm->nexp; + + /* + * Global version : the RHS is contiguous. The jump can follow the vector. + */ + for (j=0; j<n; ++j) { + for( i=0; i<m; i++, tmp++ ) + { +#if defined(PRECISION_z) || defined(PRECISION_c) + *tmp = (spm_complex64_t)(alpha + alpha * I); +#else + *tmp = (spm_complex64_t)alpha; +#endif + } + tmp += lda - i; + } + + (void)baseval; +} + +/** + ******************************************************************************* * - * @param[in] m0 - * The index of the first row of tile A in the full matrix. m0 >= 0. + * @ingroup spm_dev_rhs + * + * @brief Generate a set of vectors x[i] = scale * ( i [+ i* I ] ). + * + ******************************************************************************* * - * @param[in] n0 - * The index of the first column of tile A in the full matrix. n0 >= 0. + * @param[in] spm + * The sparse matrix associated to the right hand side. + * + * @param[in] baseval + * The basevalue of the spm. + * + * @param[in] scale + * Scaling factor for each value of the vector. + * + * @param[in] n + * The number of columns in A. n >= 0. + * + * @param[inout] A + * On entry, the lda-by-n matrix to be initialized. + * On exit, the matrix initialized. + * + * @param[in] lda + * The leading dimension of the matrix A. lda >= max(1,spm->nexp). + * + ******************************************************************************/ +static inline void +z_spmRhsGenI( const spmatrix_t *spm, spm_int_t baseval, + spm_complex64_t scale, + spm_int_t n, spm_complex64_t *A, spm_int_t lda ) +{ + spm_complex64_t *tmp = A; + spm_int_t i, j, k, ig, dofi, row; + const spm_int_t *dofs = spm->dofs; + + /* + * Global version : the RHS is contiguous. The jump can follow the vector. + */ + for (j=0; j<n; ++j) { + for( i=0; i<spm->n; i++ ) + { + ig = (spm->loc2glob != NULL) ? spm->loc2glob[i] - baseval: i; + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + row++; /* To avoid 0 */ + for( k=0; k<dofi; k++, row++, tmp++ ) + { +#if defined(PRECISION_z) || defined(PRECISION_c) + *tmp = (spm_complex64_t)(row + row * I) * scale; +#else + *tmp = (spm_complex64_t)row * scale; +#endif + } + } + tmp += lda - spm->nexp; + } +} + +/** + ******************************************************************************* + * + * @ingroup spm_dev_rhs + * + * @brief Generate a set of vectors of random values in shared memory. + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix associated to the right hand side. + * + * @param[in] baseval + * The basevalue of the spm. + * + * @param[in] scale + * Scaling factor for each value of the vector. + * + * @param[in] n + * The number of columns in A. n >= 0. + * + * @param[inout] A + * On entry, the lda-by-n matrix to be initialized. + * On exit, the matrix initialized. + * + * @param[in] lda + * The leading dimension of the matrix A. lda >= max(1,spm->nexp). + * + * @param[in] shift + * The initial shift in the random sequence. * * @param[in] seed * The seed used for random generation. Must be the same for * all tiles initialized with this routine. * ******************************************************************************/ -void -z_spmRndVect( double scale, int m, int n, spm_complex64_t *A, int lda, - int gM, int m0, int n0, unsigned long long int seed ) +static inline void +z_spmRhsGenRndShm( const spmatrix_t *spm, spm_int_t baseval, + spm_complex64_t scale, + spm_int_t n, spm_complex64_t *A, spm_int_t lda, + int shift, unsigned long long int seed ) { spm_complex64_t *tmp = A; - int64_t i, j; - unsigned long long int ran, jump; - - jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)gM; + int64_t i, j, m = spm->nexp; + unsigned long long int ran, jump = shift; - for (j=0; j<n; ++j ) { + /* + * Global version : the RHS is contiguous. The jump can follow the vector. + */ + for (j=0; j<n; ++j) { ran = Rnd64_jump( NBELEM*jump, seed ); for (i = 0; i < m; ++i) { - *tmp = (0.5f - ran * RndF_Mul) * scale; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += (I*(0.5f - ran * RndF_Mul)) * scale; - ran = Rnd64_A * ran + Rnd64_C; -#endif + z_updateRndVal( scale, tmp, &ran ); tmp++; } - tmp += lda-i; - jump += gM; + jump += spm->gNexp; + tmp += lda - spm->nexp; + } + + (void)baseval; +} + +/** + ******************************************************************************* + * + * @ingroup spm_dev_rhs + * + * @brief Generate a set of vectors of random values in distributed memory. + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix associated to the right hand side. + * + * @param[in] baseval + * The basevalue of the spm. + * + * @param[in] scale + * Scaling factor for each value of the vector. + * + * @param[in] n + * The number of columns in A. n >= 0. + * + * @param[inout] A + * On entry, the lda-by-n matrix to be initialized. + * On exit, the matrix initialized. + * + * @param[in] lda + * The leading dimension of the matrix A. lda >= max(1,spm->nexp). + * + * @param[in] shift + * The initial shift in the random sequence. + * + * @param[in] seed + * The seed used for random generation. Must be the same for + * all tiles initialized with this routine. + * + ******************************************************************************/ +void +z_spmRhsGenRndDist( const spmatrix_t *spm, spm_int_t baseval, + double scale, + spm_int_t n, spm_complex64_t *A, spm_int_t lda, + int shift, unsigned long long int seed ) +{ + spm_complex64_t *tmp = A; + spm_int_t i, j, k, ig, dofi; + unsigned long long int ran, jump; + unsigned long long int row, col; + const spm_int_t *l2g; + const spm_int_t *dofs = spm->dofs; + + assert( NULL != spm->loc2glob ); + assert( lda == spm->nexp ); + + /* + * Distributed version : the RHS might be distributed in a non-contiguous + * way, so the jump have to be recomputed with global index for each element. + */ + for (j=0, col=0; j<n; j++, col++) { + l2g = spm->loc2glob; + for (i=0; i<spm->n; i++, l2g++ ) { + ig = *l2g - baseval; + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + jump = row + col * (unsigned long long int)(spm->gNexp) + shift; + ran = Rnd64_jump( NBELEM*jump, seed ); + + for( k=0; k<dofi; k++, tmp++ ) { + z_updateRndVal( scale, tmp, &ran ); + } + } } } @@ -194,7 +425,7 @@ z_spmGenRHS( spm_rhstype_t type, int nrhs, { spm_complex64_t *xptr = (spm_complex64_t*)x; spm_complex64_t *bptr = (spm_complex64_t*)b; - spm_int_t i, j; + spm_int_t baseval; int rc; if (( spm == NULL ) || @@ -223,25 +454,30 @@ z_spmGenRHS( spm_rhstype_t type, int nrhs, return SPM_ERR_BADPARAMETER; } - if( spm->dof != 1 ) { - return SPM_ERR_BADPARAMETER; - } + /* if( spm->dof != 1 ) { */ + /* return SPM_ERR_BADPARAMETER; */ + /* } */ if (nrhs == 1) { - ldb = spm->n; - ldx = spm->n; + ldb = spm->nexp; + ldx = spm->nexp; } - /* We don't handle distributed spm for now */ - assert( spm->n == spm->gN ); + baseval = spmFindBase( spm ); /* If random b, we do it and exit */ if ( type == SpmRhsRndB ) { /* Compute the spm norm to scale the b vector */ double norm = z_spmNorm( SpmFrobeniusNorm, spm ); - z_spmRndVect( norm, spm->n, nrhs, bptr, ldb, - spm->gN, 0, 0, 24356 ); + if ( spm->loc2glob ) { + z_spmRhsGenRndDist( spm, baseval, norm, nrhs, bptr, ldb, + 1, 24356 ); + } + else { + z_spmRhsGenRndShm( spm, baseval, norm, nrhs, bptr, ldb, + 1, 24356 ); + } return SPM_SUCCESS; } @@ -255,41 +491,23 @@ z_spmGenRHS( spm_rhstype_t type, int nrhs, switch( type ) { case SpmRhsOne: - for( j=0; j<nrhs; j++ ) - { - for( i=0; i<spm->n; i++, xptr++ ) - { -#if defined(PRECISION_z) || defined(PRECISION_c) - *xptr = (spm_complex64_t)(1.+1.*I); -#else - *xptr = (spm_complex64_t)1.; -#endif - } - xptr += ldx-i; - } - xptr -= nrhs * ldx; + z_spmRhsGenOne( spm, baseval, 1., nrhs, xptr, ldx ); break; case SpmRhsI: - for( j=0; j<nrhs; j++ ) - { - for( i=0; i<spm->n; i++, xptr++ ) - { -#if defined(PRECISION_z) || defined(PRECISION_c) - *xptr = (spm_complex64_t)(i + i * I); -#else - *xptr = (spm_complex64_t)i; -#endif - } - xptr += ldx-i; - } - xptr -= nrhs * ldx; + z_spmRhsGenI( spm, baseval, 1., nrhs, xptr, ldx ); break; case SpmRhsRndX: default: - z_spmRndVect( 1., spm->n, nrhs, xptr, ldx, - spm->gN, 0, 0, 24356 ); + if ( spm->loc2glob ) { + z_spmRhsGenRndDist( spm, baseval, 1., nrhs, xptr, ldx, + 1, 24356 ); + } + else { + z_spmRhsGenRndShm( spm, baseval, 1., nrhs, xptr, ldx, + 1, 24356 ); + } } /* Compute B */ diff --git a/src/z_spm_print.c b/src/z_spm_print.c index 12c647cbd3100e5604bdc48bb8706ce098a75302..7ba3c73e5059470899c4840bfc6df03207278ffb 100644 --- a/src/z_spm_print.c +++ b/src/z_spm_print.c @@ -432,3 +432,47 @@ z_spmPrint( FILE *f, const spmatrix_t *spm ) } return; } + +/** + ******************************************************************************* + * + * @ingroup spm_dev_print + * + * @brief Write into a file the vectors associated to a spm. + * + ******************************************************************************* + * + * @param[inout] f + * Output file + * + * @param[in] spm + * The spm structure describing the matrix. + * + * @param[in] n + * The number of columns of x. + * + * @param[in] x + * The set of vectors associated to the spm of size n-by-ldx. + * + * @param[in] ldx + * The local leading dimension of the set of vectors (ldx >= spm->n). + * + *******************************************************************************/ +void +z_spmPrintRHS( FILE *f, const spmatrix_t *spm, + int n, const void *x, spm_int_t ldx ) +{ + const spm_complex64_t *xptr = (const spm_complex64_t *)x; + spm_int_t i, j, ig, baseval; + + baseval = spmFindBase( spm ); + + for( j=0; j<n; j++) { + for( i=0; i<spm->n; i++, xptr++ ) { + ig = (spm->loc2glob == NULL) ? i : spm->loc2glob[i] - baseval; + + z_spmPrintElt( f, ig, j, *xptr ); + } + xptr += ldx - i; + } +} diff --git a/src/z_spm_reduce_rhs.c b/src/z_spm_reduce_rhs.c new file mode 100644 index 0000000000000000000000000000000000000000..69e96fb3dfd0df5e1cec86b81c2a51500659c993 --- /dev/null +++ b/src/z_spm_reduce_rhs.c @@ -0,0 +1,155 @@ +/** + * + * @file z_spm_reduce_rhs.c + * + * SParse Matrix package right hand side reduction routine. + * + * @copyright 2020-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Delarue Tony + * @author Faverge Mathieu + * @date 2020-06-11 + * + * @precisions normal z -> c d s + * + **/ +#include "common.h" +#include "z_spm.h" + +/** + ******************************************************************************* + * + * @brief Reduce all the global C coefficients and store the good ones in local + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix spm + * + * @param[inout] glob + * The global vector to reduce + * + * @param[inout] loc + * Local vector + * + *******************************************************************************/ +spm_complex64_t * +z_spmReduceRHS( const spmatrix_t *spm, + int nrhs, + const spm_complex64_t *x, + spm_int_t ldx, + int root ) +{ + spm_complex64_t *outptr, *out = NULL; + int c; + int n, nmax = 0; + int nexp, nexpmax = 0; + spm_complex64_t *tmp_out, *current_out; + spm_int_t *tmp_l2g, *current_l2g; + spm_int_t current_n, current_nexp; + spm_int_t i, j, k, ig, dofi, row; + int local = ( ( root == -1 ) || (root == spm->clustnum) ); + spm_int_t baseval = spmFindBase( spm ); + + /* We do not handle cases where ldx is different from spm->n */ + assert( ldx == spm->nexp ); + + if ( spm->loc2glob == NULL ) { + if ( local ) { + out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + } + return out; + } + +#if !defined(SPM_WITH_MPI) + assert( 0 ); +#else + n = spm->n; + nexp = spm->nexp; + if ( root == -1 ) { + MPI_Allreduce( &n, &nmax, 1, MPI_INT, MPI_MAX, spm->comm ); + MPI_Allreduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, spm->comm ); + } + else { + MPI_Reduce( &n, &nmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); + MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); + } + + if ( local ) { + out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + tmp_out = malloc( nexpmax * nrhs * sizeof( spm_complex64_t ) ); + tmp_l2g = malloc( nmax * sizeof( spm_int_t ) ); + } + + for( c=0; c<spm->clustnbr; c++ ) { + MPI_Status status; + + if ( c == spm->clustnum ) { + current_n = spm->n; + current_nexp = spm->nexp; + current_l2g = spm->loc2glob; + current_out = (spm_complex64_t*)x; + } + else { + current_n = 0; + current_nexp = 0; + current_l2g = tmp_l2g; + current_out = tmp_out; + } + + if ( root == -1 ) { + MPI_Bcast( ¤t_n, 1, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( ¤t_nexp, 1, SPM_MPI_INT, c, spm->comm ); + if ( current_n > 0 ) { + MPI_Bcast( current_l2g, current_n, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm ); + } + } + else if ( root != c ) { /* No communication if c == root */ + if ( c == spm->clustnum ) { + MPI_Send( ¤t_n, 1, SPM_MPI_INT, root, 0, spm->comm ); + MPI_Send( ¤t_nexp, 1, SPM_MPI_INT, root, 1, spm->comm ); + if ( current_n > 0 ) { + MPI_Send( current_l2g, current_n, SPM_MPI_INT, root, 2, spm->comm ); + MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm ); + } + } + if ( root == spm->clustnum ) { + MPI_Recv( ¤t_n, 1, SPM_MPI_INT, c, 0, spm->comm, &status ); + MPI_Recv( ¤t_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status ); + if ( current_n > 0 ) { + MPI_Recv( current_l2g, current_n, SPM_MPI_INT, c, 2, spm->comm, &status ); + MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status ); + } + } + } + + if ( !local ) { + continue; + } + + outptr = out; + for( i=0; i<current_n; i++, current_l2g++ ) { + ig = *current_l2g - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; + row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; + for( j=0; j<nrhs; j++ ) { + for( k=0; k<dofi; k++ ) { + outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ]; + } + } + current_out += dofi; + } + } + + if ( local ) { + free( tmp_out ); + free( tmp_l2g ); + } + +#endif + return out; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4b6323f9d4c22506c9438b2eca3875934c0b69e5..311bff7fd1adae3dd94ea4dede2e4db8d473189d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -52,6 +52,7 @@ if ( SPM_WITH_MPI ) list( APPEND TESTS spm_scatter_gather_tests.c spm_dist_norm_tests.c + spm_dist_genrhs_tests.c ) endif() @@ -68,7 +69,8 @@ set( SPM_DOF_TESTS spm_dof_expand_tests spm_dof_norm_tests spm_dof_matvec_tests) set( SPM_MPI_TESTS spm_scatter_gather_tests - spm_dist_norm_tests ) + spm_dist_norm_tests + spm_dist_genrhs_tests ) # List of run types set( RUNTYPE shm ) diff --git a/tests/spm_dist_genrhs_tests.c b/tests/spm_dist_genrhs_tests.c new file mode 100644 index 0000000000000000000000000000000000000000..a0fb79f6b2319868748f97640024983d74a94058 --- /dev/null +++ b/tests/spm_dist_genrhs_tests.c @@ -0,0 +1,186 @@ +/** + * + * @file spm_dist_norm_tests.c + * + * Tests and validate the spm_genrhs routines in the case of random distributed vectors. + * + * @copyright 2015-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 6.0.0 + * @author Mathieu Faverge + * @author Theophile Terraz + * @author Tony Delarue + * @date 2020-05-19 + * + **/ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <assert.h> +#include <time.h> +#include <spm_tests.h> + +#if !defined(SPM_WITH_MPI) +#error "This test should not be compiled in non distributed version" +#endif + +int main (int argc, char **argv) +{ + char *filename; + spmatrix_t original, *spm, *spmdist; + spm_driver_t driver; + int clustnbr = 1; + int clustnum = 0; + int baseval, root; + int rc = SPM_SUCCESS; + int err = 0; + int dof, dofmax = 4; + int to_free = 0; + int nrhs = 3; + size_t sizeloc, sizedst; + void *bloc, *bdst; + + MPI_Init( &argc, &argv ); + + /** + * Get options from command line + */ + spmGetOptions( argc, argv, + &driver, &filename ); + + rc = spmReadDriver( driver, filename, &original ); + free(filename); + + if ( rc != SPM_SUCCESS ) { + fprintf(stderr, "ERROR: Could not read the file, stop the test !!!\n"); + return EXIT_FAILURE; + } + + MPI_Comm_size( MPI_COMM_WORLD, &clustnbr ); + MPI_Comm_rank( MPI_COMM_WORLD, &clustnum ); + + if ( original.flttype == SpmPattern ) { + spmGenFakeValues( &original ); + } + + spmPrintInfo( &original, stdout ); + + printf(" -- SPM GenRHS Test --\n"); + + for( dof=-1; dof<2; dof++ ) + { + if ( dof >= 0 ) { + spm = spmDofExtend( &original, dof, dofmax ); + to_free = 1; + } + else { + spm = malloc(sizeof(spmatrix_t)); + memcpy( spm, &original, sizeof(spmatrix_t) ); + to_free = 0; + } + + if ( spm == NULL ) { + fprintf( stderr, "Issue to extend the matrix\n" ); + continue; + } + + sizeloc = spm_size_of( spm->flttype ) * spm->nexp * nrhs; + bloc = malloc( sizeloc ); + + memset( bloc, 0xbeefdead, sizeloc ); + if ( spmGenRHS( SpmRhsRndB, nrhs, spm, + NULL, spm->nexp, bloc, spm->nexp ) != SPM_SUCCESS ) { + fprintf( stderr, "Issue to generate the local rhs\n" ); + continue; + } + + for( root=-1; root<clustnbr; root++ ) + { + spmdist = spmScatter( spm, -1, NULL, 1, -1, MPI_COMM_WORLD ); + if ( spmdist == NULL ) { + fprintf( stderr, "Failed to scatter the spm\n" ); + err++; + continue; + } + + sizedst = spm_size_of( spmdist->flttype ) * spmdist->nexp * nrhs; + bdst = malloc( sizedst ); + + for( baseval=0; baseval<2; baseval++ ) + { + spmBase( spmdist, baseval ); + + if(clustnum == 0) { + printf( " Case: %s - base(%d) - dof(%s) - root(%d): ", + fltnames[spmdist->flttype], + baseval, dofname[dof+1], root ); + } + + memset( bdst, 0xdeadbeef, sizedst ); + if ( spmGenRHS( SpmRhsRndB, nrhs, spmdist, + NULL, spmdist->nexp, bdst, spmdist->nexp ) != SPM_SUCCESS ) { + err++; + continue; + } + + switch( spmdist->flttype ){ + case SpmComplex64: + rc = z_spm_dist_genrhs_check( spmdist, nrhs, bloc, bdst, root ); + break; + + case SpmComplex32: + rc = c_spm_dist_genrhs_check( spmdist, nrhs, bloc, bdst, root ); + break; + + case SpmFloat: + rc = s_spm_dist_genrhs_check( spmdist, nrhs, bloc, bdst, root ); + break; + + case SpmDouble: + default: + rc = d_spm_dist_genrhs_check( spmdist, nrhs, bloc, bdst, root ); + } + + if ( clustnum == 0 ) { + if ( rc == 0 ) { + printf( "SUCCESS\n" ); + } + else { + printf( "FAILURE\n" ); + } + } + err = (rc != 0) ? err+1 : err; + } + + free( bdst ); + spmExit( spmdist ); + free( spmdist ); + } + + if ( spm != &original ) { + if( to_free ){ + spmExit( spm ); + } + free( spm ); + } + free( bloc ); + } + + spmExit( &original ); + MPI_Finalize(); + + if( err == 0 ) { + if(clustnum == 0) { + printf(" -- All tests PASSED --\n"); + } + return EXIT_SUCCESS; + } + else + { + printf(" -- %d tests FAILED --\n", err); + return EXIT_FAILURE; + } +} diff --git a/tests/spm_tests.h b/tests/spm_tests.h index 876c240b8a77f039562f17d7f66ecb66c99b37df..7c6a2e496c4585bfb04eb89b51c7c8c5f7fbe006 100644 --- a/tests/spm_tests.h +++ b/tests/spm_tests.h @@ -83,21 +83,29 @@ void z_spm_print_check( char *filename, const spmatrix_t *spm ); int z_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int z_spm_norm_check( const spmatrix_t *spm ); int z_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); +int z_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, + const spm_complex64_t *bloc, const spm_complex64_t *bdst, int root ); void c_spm_print_check( char *filename, const spmatrix_t *spm ); int c_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int c_spm_norm_check( const spmatrix_t *spm ); int c_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); +int c_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, + const spm_complex32_t *bloc, const spm_complex32_t *bdst, int root ); void d_spm_print_check( char *filename, const spmatrix_t *spm ); int d_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int d_spm_norm_check( const spmatrix_t *spm ); int d_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); +int d_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, + const double *bloc, const double *bdst, int root ); void s_spm_print_check( char *filename, const spmatrix_t *spm ); int s_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int s_spm_norm_check( const spmatrix_t *spm ); int s_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); +int s_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, + const float *bloc, const float *bdst, int root ); void p_spm_print_check( char *filename, const spmatrix_t *spm ); diff --git a/tests/z_spm_tests.c b/tests/z_spm_tests.c index 7ef502f5b31c2a8232a087f39e40baa37dedcac9..dcbfebc3f47ebacb3497a118d3ebe0871eb0c9d1 100644 --- a/tests/z_spm_tests.c +++ b/tests/z_spm_tests.c @@ -334,4 +334,55 @@ z_spm_dist_norm_check( const spmatrix_t *spm, return ret; } + +int +z_spm_dist_genrhs_check( const spmatrix_t *spm, + spm_int_t nrhs, + const spm_complex64_t *bloc, + const spm_complex64_t *bdst, + int root ) +{ + static spm_complex64_t mzone = (spm_complex64_t)-1.; + spm_complex64_t *tmp; + double norm, normr, eps, result; + int rc, ret = 0; + spm_int_t M; + + eps = LAPACKE_dlamch_work('e'); + M = spm->gNexp; + norm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'M', M, nrhs, bloc, M ); + + /* Let's gather the distributed RHS */ + tmp = z_spmReduceRHS( spm, nrhs, bdst, spm->nexp, root ); + + rc = 0; + if ( tmp != NULL ) { + cblas_zaxpy( M * nrhs, CBLAS_SADDR(mzone), bloc, 1, tmp, 1 ); + normr = LAPACKE_zlange( LAPACK_COL_MAJOR, 'M', M, nrhs, tmp, M ); + + result = fabs(normr) / (norm * eps); + /** + * By default the rhs is scaled by the frobenius norm of A, thus we need + * to take into account the accumulation error un distributed on the + * norm to validate the test here. + */ + result /= spm->gnnzexp; + if ( result > 1. ) { + fprintf( stderr, "[%2d] || X_global || = %e, || X_global - X_dist || = %e, error=%e\n", + (int)(spm->clustnum), norm, normr, result ); + rc = 1; + } + + free( tmp ); + } + else { + if ( (spm->clustnum == root) || (root == -1) ) { + rc = 1; + } + } + + MPI_Allreduce( &rc, &ret, 1, MPI_INT, MPI_MAX, spm->comm ); + + return ret; +} #endif