From 89ef88bb082c943dc4702b419f8e05fcd7aaf6a6 Mon Sep 17 00:00:00 2001 From: tdelarue <tony.delarue@inria.fr> Date: Wed, 25 Mar 2020 17:42:08 +0100 Subject: [PATCH] spmm and spmv are now distributed. Dofless --- CMakeLists.txt | 1 + include/spm.h | 3 +- src/spm.c | 78 ++++++++++- src/z_spm.h | 15 ++- src/z_spm_gather_rhs.c | 166 +++++++++++++++++++++++ src/z_spm_genrhs.c | 7 +- src/z_spm_matrixvector.c | 241 ++++++++++++++++++++++++++++------ src/z_spm_print.c | 6 +- src/z_spm_reduce_rhs.c | 157 ++++++---------------- tests/CMakeLists.txt | 5 +- tests/spm_compare.c | 4 +- tests/spm_dist_genrhs_tests.c | 2 +- tests/spm_dist_matvec_tests.c | 204 ++++++++++++++++++++++++++++ tests/spm_dist_norm_tests.c | 6 +- tests/spm_tests.h | 4 + tests/z_spm_tests.c | 114 +++++++++++++++- 16 files changed, 837 insertions(+), 176 deletions(-) create mode 100644 src/z_spm_gather_rhs.c create mode 100644 tests/spm_dist_matvec_tests.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f897fcd..fd7753a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,6 +196,7 @@ set(SOURCES src/z_spm_dof_extend.c src/z_spm_norm.c src/z_spm_scal.c + src/z_spm_gather_rhs.c src/z_spm_reduce_rhs.c src/z_spm_convert_to_csc.c src/z_spm_convert_to_csr.c diff --git a/include/spm.h b/include/spm.h index 0f46e18f..5d52866c 100644 --- a/include/spm.h +++ b/include/spm.h @@ -116,6 +116,7 @@ spmatrix_t *spmScatter( const spmatrix_t *spm, SPM_Comm comm ); spmatrix_t *spmGather ( const spmatrix_t *spm, int root ); +int spmGetDistribution( const spmatrix_t *spm ); /** * @} @@ -207,7 +208,7 @@ int spmParseLaplacianInfo( const char * filename, */ void * spm2Dense( const spmatrix_t *spm ); void spmPrint( const spmatrix_t *spm, FILE *f ); -void spmPrintRHS( const spmatrix_t *spm, int n, const void *x, spm_int_t ldx, FILE *stream ); +void spmPrintRHS( const spmatrix_t *spm, int nrhs, const void *x, spm_int_t ldx, FILE *stream ); void spmPrintInfo( const spmatrix_t *spm, FILE *f ); void spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ); spmatrix_t *spmDofExtend( const spmatrix_t *spm, const int type, const int dof ); diff --git a/src/spm.c b/src/spm.c index b492e714..31ab2b36 100644 --- a/src/spm.c +++ b/src/spm.c @@ -128,6 +128,70 @@ spmInitDist( spmatrix_t *spm, SPM_Comm comm ) #endif /* defined(SPM_WITH_MPI) */ } +/** + ******************************************************************************* + * + * @brief Search the distribution pattern used in the spm structure. + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix structure. + * + ******************************************************************************** + * + * @return 1 if the distribution is column based. + * 0 otherwise. + * + *******************************************************************************/ +int +spmGetDistribution( const spmatrix_t *spm ) +{ + int distribution = 1; + + if( spm->fmttype == SpmCSC ){ + distribution = 1; + } + else if ( spm->fmttype == SpmCSR ) { + distribution = 0; + } + else { + spm_int_t i, baseval; + spm_int_t *colptr = spm->colptr; + spm_int_t *glob2loc = spm->glob2loc; + + baseval = spmFindBase( spm ); + assert( glob2loc != NULL ); + for ( i = 0; i < spm->nnz; i++, colptr++ ) + { + /* + * If the global index is not in the local colptr + * -> row distribution + */ + if( glob2loc[ *colptr - baseval ] < 0 ) { + distribution = 0; + break; + } + } + +#if defined(SPM_WITH_MPI) + { + int check = 0; + MPI_Allreduce( &distribution, &check, 1, MPI_INT, + MPI_SUM, spm->comm ); + if( distribution == 0) { + assert( check == 0 ); + } + else { + assert( check == spm->clustnbr ); + } + } +#endif + } + + return distribution; +} + /** ******************************************************************************* * @@ -848,6 +912,10 @@ spmCopy( const spmatrix_t *spm ) newspm->loc2glob = (spm_int_t*)malloc( spm->n * sizeof(spm_int_t) ); memcpy( newspm->loc2glob, spm->loc2glob, spm->n * sizeof(spm_int_t) ); } + if(spm->glob2loc != NULL) { + newspm->glob2loc = (spm_int_t*)malloc( spm->gN * sizeof(spm_int_t) ); + memcpy( newspm->glob2loc, spm->glob2loc, spm->gN * sizeof(spm_int_t) ); + } if(spm->dofs != NULL) { newspm->dofs = (spm_int_t*)malloc( dofsize * sizeof(spm_int_t) ); memcpy( newspm->dofs, spm->dofs, dofsize * sizeof(spm_int_t) ); @@ -1018,7 +1086,7 @@ spmPrint( const spmatrix_t *spm, *******************************************************************************/ void spmPrintRHS( const spmatrix_t *spm, - int n, + int nrhs, const void *x, spm_int_t ldx, FILE *stream ) @@ -1033,16 +1101,16 @@ spmPrintRHS( const spmatrix_t *spm, /* Not handled for now */ break; case SpmFloat: - s_spmPrintRHS( stream, spm, n, x, ldx ); + s_spmPrintRHS( stream, spm, nrhs, x, ldx ); break; case SpmComplex32: - c_spmPrintRHS( stream, spm, n, x, ldx ); + c_spmPrintRHS( stream, spm, nrhs, x, ldx ); break; case SpmComplex64: - z_spmPrintRHS( stream, spm, n, x, ldx ); + z_spmPrintRHS( stream, spm, nrhs, x, ldx ); break; case SpmDouble: - d_spmPrintRHS( stream, spm, n, x, ldx ); + d_spmPrintRHS( stream, spm, nrhs, x, ldx ); } return; diff --git a/src/z_spm.h b/src/z_spm.h index 4fbe1b06..5e0f6860 100644 --- a/src/z_spm.h +++ b/src/z_spm.h @@ -75,17 +75,28 @@ spm_int_t z_spmSymmetrize( spmatrix_t *spm ); int z_spmGenRHS(spm_rhstype_t type, int nrhs, const spmatrix_t *spm, void *x, int ldx, void *b, int ldb ); int z_spmCheckAxb( spm_fixdbl_t eps, int nrhs, const spmatrix_t *spm, void *x0, int ldx0, void *b, int ldb, const void *x, int ldx ); -spm_complex64_t *z_spmReduceRHS( const spmatrix_t *spm, int nrhs, const spm_complex64_t *x, spm_int_t ldx, int root ); +spm_complex64_t *z_spmGatherRHS( const spmatrix_t *spm, int nrhs, const spm_complex64_t *x, spm_int_t ldx, int root ); +void z_spmReduceRhs( const spmatrix_t *spm, int nrhs, spm_complex64_t *bglob, spm_complex64_t *b, spm_int_t ldb ); /** * Output routines */ void z_spmDensePrint( FILE *f, spm_int_t m, spm_int_t n, const spm_complex64_t *A, spm_int_t lda ); void z_spmPrint( FILE *f, const spmatrix_t *spm ); -void z_spmPrintRHS( FILE *f, const spmatrix_t *spm, int n, const void *x, spm_int_t ldx ); +void z_spmPrintRHS( FILE *f, const spmatrix_t *spm, int nrhs, const void *x, spm_int_t ldx ); void z_spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ); void z_spmDofExtend( spmatrix_t *spm ); void z_spmScal( const double alpha, spmatrix_t *spm ); +/** + * Subroutines for random vector generation to be used in testings + */ +void z_spmRhsGenRndShm( const spmatrix_t *spm, spm_int_t baseval, spm_complex64_t scale, + spm_int_t n, spm_complex64_t *A, spm_int_t lda, + int shift, unsigned long long int seed ); +void z_spmRhsGenRndDist( const spmatrix_t *spm, spm_int_t baseval, spm_complex64_t scale, + spm_int_t n, spm_complex64_t *A, spm_int_t lda, + int shift, unsigned long long int seed ); + #endif /* _z_spm_h_ */ diff --git a/src/z_spm_gather_rhs.c b/src/z_spm_gather_rhs.c new file mode 100644 index 00000000..ca4b44b1 --- /dev/null +++ b/src/z_spm_gather_rhs.c @@ -0,0 +1,166 @@ +/** + * + * @file z_spm_gather_rhs.c + * + * SParse Matrix package right hand side gather routine. + * + * @copyright 2020-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Delarue Tony + * @author Faverge Mathieu + * @date 2020-06-11 + * + * @precisions normal z -> c d s + * + **/ +#include "common.h" +#include "z_spm.h" + +/** + ******************************************************************************* + * + * @brief Gather all the global C coefficients and store the good ones in local + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix spm + * + * @param[in] nrhs + * Number of rhs vectors. + * + * @param[in] x + * Local vector + * + * @param[in] ldx + * Leading dimension of this vector + * + * @param[in] root + * Clustnum whre the complete vector will be gathered. + * -1 if you want to gather the data on all nodes. + * + ******************************************************************************** + * + * @return The gathered right hand side vector. + * + *******************************************************************************/ +spm_complex64_t * +z_spmGatherRHS( const spmatrix_t *spm, + int nrhs, + const spm_complex64_t *x, + spm_int_t ldx, + int root ) +{ + int local = ( ( root == -1 ) || (root == spm->clustnum) ); + spm_complex64_t *out = NULL; + + /* We do not handle cases where ldx is different from spm->n */ + assert( ldx == spm->nexp ); + + if ( spm->loc2glob == NULL ) { + if ( local ) { + out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + } + (void)ldx; + return out; + } + +#if defined(SPM_WITH_MPI) + int c; + int n, nmax = 0; + int nexp, nexpmax = 0; + spm_complex64_t *tmp_out, *current_out, *outptr; + spm_int_t *tmp_l2g, *current_l2g; + spm_int_t current_n, current_nexp; + spm_int_t i, j, k, ig, dofi, row; + spm_int_t baseval = spmFindBase( spm ); + + n = spm->n; + nexp = spm->nexp; + if ( root == -1 ) { + MPI_Allreduce( &n, &nmax, 1, MPI_INT, MPI_MAX, spm->comm ); + MPI_Allreduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, spm->comm ); + } + else { + MPI_Reduce( &n, &nmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); + MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); + } + + if ( local ) { + out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + tmp_out = malloc( nexpmax * nrhs * sizeof( spm_complex64_t ) ); + tmp_l2g = malloc( nmax * sizeof( spm_int_t ) ); + } + + for( c=0; c<spm->clustnbr; c++ ) { + MPI_Status status; + + if ( c == spm->clustnum ) { + current_n = spm->n; + current_nexp = spm->nexp; + current_l2g = spm->loc2glob; + current_out = (spm_complex64_t*)x; + } + else { + current_n = 0; + current_nexp = 0; + current_l2g = tmp_l2g; + current_out = tmp_out; + } + + if ( root == -1 ) { + MPI_Bcast( ¤t_n, 1, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( ¤t_nexp, 1, SPM_MPI_INT, c, spm->comm ); + if ( current_n > 0 ) { + MPI_Bcast( current_l2g, current_n, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm ); + } + } + else if ( root != c ) { /* No communication if c == root */ + if ( c == spm->clustnum ) { + MPI_Send( ¤t_n, 1, SPM_MPI_INT, root, 0, spm->comm ); + MPI_Send( ¤t_nexp, 1, SPM_MPI_INT, root, 1, spm->comm ); + if ( current_n > 0 ) { + MPI_Send( current_l2g, current_n, SPM_MPI_INT, root, 2, spm->comm ); + MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm ); + } + } + if ( root == spm->clustnum ) { + MPI_Recv( ¤t_n, 1, SPM_MPI_INT, c, 0, spm->comm, &status ); + MPI_Recv( ¤t_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status ); + if ( current_n > 0 ) { + MPI_Recv( current_l2g, current_n, SPM_MPI_INT, c, 2, spm->comm, &status ); + MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status ); + } + } + } + + if ( !local ) { + continue; + } + + outptr = out; + for( i=0; i<current_n; i++, current_l2g++ ) { + ig = *current_l2g - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; + row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; + for( j=0; j<nrhs; j++ ) { + for( k=0; k<dofi; k++ ) { + outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ]; + } + } + current_out += dofi; + } + } + + if ( local ) { + free( tmp_out ); + free( tmp_l2g ); + } + +#endif + return out; +} diff --git a/src/z_spm_genrhs.c b/src/z_spm_genrhs.c index f466b8be..54f213cb 100644 --- a/src/z_spm_genrhs.c +++ b/src/z_spm_genrhs.c @@ -94,7 +94,7 @@ Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { * ******************************************************************************/ static inline void -z_updateRndVal( double scale, +z_updateRndVal( spm_complex64_t scale, spm_complex64_t *val, unsigned long long int *ran ) { @@ -265,7 +265,7 @@ z_spmRhsGenI( const spmatrix_t *spm, spm_int_t baseval, * all tiles initialized with this routine. * ******************************************************************************/ -static inline void +void z_spmRhsGenRndShm( const spmatrix_t *spm, spm_int_t baseval, spm_complex64_t scale, spm_int_t n, spm_complex64_t *A, spm_int_t lda, @@ -329,7 +329,7 @@ z_spmRhsGenRndShm( const spmatrix_t *spm, spm_int_t baseval, ******************************************************************************/ void z_spmRhsGenRndDist( const spmatrix_t *spm, spm_int_t baseval, - double scale, + spm_complex64_t scale, spm_int_t n, spm_complex64_t *A, spm_int_t lda, int shift, unsigned long long int seed ) { @@ -368,6 +368,7 @@ z_spmRhsGenRndDist( const spmatrix_t *spm, spm_int_t baseval, } } } + (void)lda; } /** diff --git a/src/z_spm_matrixvector.c b/src/z_spm_matrixvector.c index cf6078b0..d8a05c28 100644 --- a/src/z_spm_matrixvector.c +++ b/src/z_spm_matrixvector.c @@ -46,6 +46,10 @@ struct __spm_zmatvec_s { const spm_int_t *rowptr; const spm_int_t *colptr; const spm_complex64_t *values; + const spm_int_t *loc2glob; + + spm_int_t dof; + const spm_int_t *dofs; const spm_complex64_t *x; spm_int_t incx; @@ -67,26 +71,27 @@ __spm_zmatvec_sy_csr( const __spm_zmatvec_t *args ) const spm_int_t *rowptr = args->rowptr; const spm_int_t *colptr = args->colptr; const spm_complex64_t *values = args->values; + const spm_int_t *loc2glob = args->loc2glob; const spm_complex64_t *x = args->x; spm_int_t incx = args->incx; spm_complex64_t *y = args->y; spm_int_t incy = args->incy; const __conj_fct_t conjA_fct = args->conjA_fct; const __conj_fct_t conjAt_fct = args->conjAt_fct; - spm_int_t col, row, i; + spm_int_t col, gcol, grow, i; for( col=0; col<n; col++, colptr++ ) { + gcol = (loc2glob == NULL) ? col : loc2glob[col] - baseval ; for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ ) { - row = *rowptr - baseval; - - if ( row != col ) { - y[ row * incy ] += alpha * conjAt_fct( *values ) * x[ col * incx ]; - y[ col * incy ] += alpha * conjA_fct( *values ) * x[ row * incx ]; + grow = *rowptr - baseval; + if ( grow != gcol ) { + y[ grow * incy ] += alpha * conjAt_fct( *values ) * x[ gcol * incx ]; + y[ gcol * incy ] += alpha * conjA_fct( *values ) * x[ grow * incx ]; } else { - y[ col * incy ] += alpha * conjA_fct( *values ) * x[ row * incx ]; + y[ gcol * incy ] += alpha * conjA_fct( *values ) * x[ grow * incx ]; } } } @@ -102,26 +107,28 @@ __spm_zmatvec_sy_csc( const __spm_zmatvec_t *args ) const spm_int_t *rowptr = args->rowptr; const spm_int_t *colptr = args->colptr; const spm_complex64_t *values = args->values; + const spm_int_t *loc2glob = args->loc2glob; const spm_complex64_t *x = args->x; spm_int_t incx = args->incx; spm_complex64_t *y = args->y; spm_int_t incy = args->incy; const __conj_fct_t conjA_fct = args->conjA_fct; const __conj_fct_t conjAt_fct = args->conjAt_fct; - spm_int_t col, row, i; + spm_int_t col, gcol, grow, i; + for( col=0; col<n; col++, colptr++ ) { + gcol = (loc2glob == NULL) ? col : loc2glob[col] - baseval ; for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ ) { - row = *rowptr - baseval; - - if ( row != col ) { - y[ row * incy ] += alpha * conjA_fct( *values ) * x[ col * incx ]; - y[ col * incy ] += alpha * conjAt_fct( *values ) * x[ row * incx ]; + grow = *rowptr - baseval; + if ( grow != gcol ) { + y[ grow * incy ] += alpha * conjA_fct( *values ) * x[ gcol * incx ]; + y[ gcol * incy ] += alpha * conjAt_fct( *values ) * x[ grow * incx ]; } else { - y[ col * incy ] += alpha * conjA_fct( *values ) * x[ row * incx ]; + y[ gcol * incy ] += alpha * conjA_fct( *values ) * x[ gcol * incx ]; } } } @@ -137,31 +144,59 @@ __spm_zmatvec_ge_csc( const __spm_zmatvec_t *args ) const spm_int_t *rowptr = args->rowptr; const spm_int_t *colptr = args->colptr; const spm_complex64_t *values = args->values; + const spm_int_t *loc2glob = args->loc2glob; + const spm_int_t *dofs = args->dofs; + spm_int_t dof = args->dof; const spm_complex64_t *x = args->x; spm_int_t incx = args->incx; spm_complex64_t *y = args->y; spm_int_t incy = args->incy; const __conj_fct_t conjA_fct = args->conjA_fct; - spm_int_t col, row, i; + spm_int_t row, dofj, dofi; + spm_int_t i, ii, ig, j, jj, jg,; if ( args->follow_x ) { - for( col=0; col<n; col++, colptr++, x+=incx ) + for( j = 0; j < n; j++, colptr++ ) { - for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ ) + jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval; + dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg]; + for( i=colptr[0]; i<colptr[1]; i++, rowptr++ ) { - row = *rowptr - baseval; - y[ row * incy ] += alpha * conjA_fct( *values ) * (*x); + ig = *rowptr - baseval; + dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig]; + row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval; + + for(jj=0; jj<dofj; jj++) + { + for(ii=0; ii<dofi; ii++, values++) + { + y[ row + (ii * incy) ] += alpha * conjA_fct( *values ) * x[ jj ]; + } + } } + x += (dofj * incx); } } else { - for( col=0; col<n; col++, colptr++, y+=incy ) + for( j=0; j<n; j++, colptr++ ) { - for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ ) + jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval; + dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg]; + for( i=colptr[0]; i<colptr[1]; i++, rowptr++ ) { - row = *rowptr - baseval; - *y += alpha * conjA_fct( *values ) * x[ row * incx ]; + ig = *rowptr - baseval; + dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig]; + row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval; + + for ( jj = 0; jj < dofj; jj++) + { + for ( ii = 0; ii < dofi; ii++, values++ ) + { + y[jj] += alpha * conjA_fct( *values ) * x[ (row + ii) * incx ]; + } + } } + y += (dofj * incy); } } return SPM_SUCCESS; @@ -209,6 +244,7 @@ __spm_zmatvec_ge_ijv( const __spm_zmatvec_t *args ) const spm_int_t *rowptr = args->rowptr; const spm_int_t *colptr = args->colptr; const spm_complex64_t *values = args->values; + const spm_int_t *glob2loc = args->loc2glob; const spm_complex64_t *x = args->x; spm_int_t incx = args->incx; spm_complex64_t *y = args->y; @@ -216,13 +252,25 @@ __spm_zmatvec_ge_ijv( const __spm_zmatvec_t *args ) const __conj_fct_t conjA_fct = args->conjA_fct; spm_int_t col, row, i; - for( i=0; i<nnz; i++, colptr++, rowptr++, values++ ) - { - row = *rowptr - baseval; - col = *colptr - baseval; + if( args->follow_x ) { + for( i=0; i<nnz; i++, colptr++, rowptr++, values++ ) + { + row = *rowptr - baseval; + col = (glob2loc == NULL) ? *colptr - baseval : glob2loc[ *colptr - baseval ]; - y[ row * incy ] += alpha * conjA_fct( *values ) * x[ col * incx ]; + y[ row * incy ] += alpha * conjA_fct( *values ) * x[ col * incx ]; + } } + else { + for( i=0; i<nnz; i++, colptr++, rowptr++, values++ ) + { + row = (glob2loc == NULL) ? *rowptr - baseval : glob2loc[ *rowptr - baseval ]; + col = *colptr - baseval; + + y[ row * incy ] += alpha * conjA_fct( *values ) * x[ col * incx ]; + } + } + return SPM_SUCCESS; } @@ -280,6 +328,9 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args, args->rowptr = A->rowptr; args->colptr = A->colptr; args->values = A->values; + args->loc2glob = A->loc2glob; + args->dof = A->dof; + args->dofs = A->dofs; args->x = B; args->incx = incx; args->y = C; @@ -349,7 +400,11 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args, args->conjAt_fct = tmp_fct; args->colptr = A->rowptr; args->rowptr = A->colptr; + args->follow_x = 0; + } else { + args->follow_x = 1; } + args->loc2glob = A->glob2loc; args->loop_fct = (A->mtxtype == SpmGeneral) ? __spm_zmatvec_ge_ijv : __spm_zmatvec_sy_ijv; } break; @@ -360,6 +415,49 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args, return SPM_SUCCESS; } +static inline spm_complex64_t * +z_spmm_build_Ctmp( const spmatrix_t *spm, + const spm_complex64_t *Cloc, + spm_int_t *ldc, + int nrhs ) +{ + spm_complex64_t *Ctmp; + spm_int_t i, j, idx; + spm_int_t ig, dof, baseval, *loc2glob; + + Ctmp = calloc(spm->gNexp * nrhs, sizeof(spm_complex64_t)); + *ldc = spm->gNexp; + + baseval = spmFindBase(spm); + for ( j = 0; j < nrhs; j++) + { + loc2glob = spm->loc2glob; + for ( i = 0; i < spm->n; i++, loc2glob++) + { + ig = *loc2glob - baseval; + dof = (spm->dof > 0) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; + idx = (spm->dof > 0) ? spm->dof * ig : spm->dofs[ig] - baseval; + memcpy( (Ctmp + j * spm->gNexp + idx), + (Cloc + j * spm->nexp + i), + dof * sizeof(spm_complex64_t) ); + } + } + return Ctmp; +} + +static inline spm_complex64_t * +z_spmm_build_Btmp( const spmatrix_t *spm, + const spm_complex64_t *Bloc, + spm_int_t *ldb, + int nrhs ) +{ + spm_complex64_t *Btmp; + + Btmp = z_spmGatherRHS( spm, nrhs, Bloc, *ldb, -1 ); + *ldb = spm->gNexp; + return Btmp; +} + /** ******************************************************************************* * @@ -459,6 +557,7 @@ spm_zspmm( spm_side_t side, int rc = SPM_SUCCESS; spm_int_t M, N, ldx, ldy, r; __spm_zmatvec_t args; + spm_complex64_t *Ctmp, *Btmp; if ( transB != SpmNoTrans ) { fprintf(stderr, "transB != SpmNoTrans not supported yet in spmv computations\n"); @@ -467,18 +566,18 @@ spm_zspmm( spm_side_t side, } if ( side == SpmLeft ) { - M = A->n; + M = A->nexp; N = K; - ldx = ldb; - ldy = ldc; + ldx = ldb; + ldy = ldc; } else { M = K; - N = A->n; + N = A->nexp; - ldx = 1; - ldy = 1; + ldx = 1; + ldy = 1; } if ( beta == 0. ) { @@ -492,15 +591,45 @@ spm_zspmm( spm_side_t side, return SPM_SUCCESS; } + Btmp = (spm_complex64_t*)B; + Ctmp = C; + if ( A->loc2glob != NULL ) { + int distByCol = spmGetDistribution(A); + + if ( A->mtxtype != SpmGeneral ) { + Btmp = z_spmm_build_Btmp( A, B, &ldb, N ); + Ctmp = z_spmm_build_Ctmp( A, C, &ldc, N ); + } + else { + if( ( (transA != SpmNoTrans) && (distByCol == 1) ) || + ( (transA == SpmNoTrans) && (distByCol == 0) ) ) { + Btmp = z_spmm_build_Btmp( A, B, &ldb, N ); + } + if( ( (transA == SpmNoTrans) && (distByCol == 1) ) || + ( (transA != SpmNoTrans) && (distByCol == 0) ) ) { + Ctmp = z_spmm_build_Ctmp( A, C, &ldc, N ); + } + } + } + __spm_zmatvec_args_init( &args, side, transA, - alpha, A, B, ldb, C, ldc ); + alpha, A, Btmp, ldb, Ctmp, ldc ); for( r=0; (r < N) && (rc == SPM_SUCCESS); r++ ) { - args.x = B + r * ldx; - args.y = C + r * ldy; + args.x = Btmp + r * ldx; + args.y = Ctmp + r * ldy; rc = args.loop_fct( &args ); } + if ( Ctmp != C ) { + z_spmReduceRhs( A, N, Ctmp, C, ldc ); + free( Ctmp ); + } + + if ( Btmp != B ) { + free( Btmp ); + } + return rc; } @@ -559,22 +688,52 @@ spm_zspmv( spm_trans_t trans, { int rc = SPM_SUCCESS; __spm_zmatvec_t args; + spm_complex64_t *ytmp, *xtmp; if ( beta == 0. ) { - memset( y, 0, A->n * sizeof(spm_complex64_t) ); + memset( y, 0, A->nexp * sizeof(spm_complex64_t) ); } else { - cblas_zscal( A->n, CBLAS_SADDR(beta), y, incy ); + cblas_zscal( A->nexp, CBLAS_SADDR(beta), y, incy ); } if ( alpha == 0. ) { return SPM_SUCCESS; } - __spm_zmatvec_args_init( &args, SpmLeft, trans, - alpha, A, x, incx, y, incy ); + xtmp = (spm_complex64_t*)x; + ytmp = y; + if ( A->loc2glob != NULL ){ + int distByCol = spmGetDistribution(A); + if ( A->mtxtype != SpmGeneral ) { + xtmp = z_spmm_build_Btmp( A, x, &incx, 1 ); + ytmp = z_spmm_build_Ctmp( A, y, &incy, 1 ); + } + else { + if( ( (trans != SpmNoTrans) && (distByCol == 1) ) || + ( (trans == SpmNoTrans) && (distByCol == 0) ) ) { + xtmp = z_spmm_build_Btmp( A, x, &incx, 1 ); + } + if( ( (trans == SpmNoTrans) && (distByCol == 1) ) || + ( (trans != SpmNoTrans) && (distByCol == 0) ) ) { + ytmp = z_spmm_build_Ctmp( A, y, &incy, 1 ); + } + } + } + + __spm_zmatvec_args_init( &args, SpmLeft, trans, + alpha, A, xtmp, incx, ytmp, incy ); rc = args.loop_fct( &args ); + if ( ytmp != y ) { + z_spmReduceRhs( A, 1, ytmp, y, incy ); + free(ytmp); + } + + if ( xtmp != x ) { + free( xtmp ); + } + return rc; } diff --git a/src/z_spm_print.c b/src/z_spm_print.c index 7ba3c73e..a462f0ac 100644 --- a/src/z_spm_print.c +++ b/src/z_spm_print.c @@ -460,15 +460,15 @@ z_spmPrint( FILE *f, const spmatrix_t *spm ) *******************************************************************************/ void z_spmPrintRHS( FILE *f, const spmatrix_t *spm, - int n, const void *x, spm_int_t ldx ) + int nrhs, const void *x, spm_int_t ldx ) { const spm_complex64_t *xptr = (const spm_complex64_t *)x; spm_int_t i, j, ig, baseval; baseval = spmFindBase( spm ); - for( j=0; j<n; j++) { - for( i=0; i<spm->n; i++, xptr++ ) { + for( j=0; j<nrhs; j++) { + for( i=0; i < spm->nexp; i++, xptr++ ) { ig = (spm->loc2glob == NULL) ? i : spm->loc2glob[i] - baseval; z_spmPrintElt( f, ig, j, *xptr ); diff --git a/src/z_spm_reduce_rhs.c b/src/z_spm_reduce_rhs.c index 69e96fb3..f2a7f964 100644 --- a/src/z_spm_reduce_rhs.c +++ b/src/z_spm_reduce_rhs.c @@ -9,8 +9,7 @@ * * @version 1.0.0 * @author Delarue Tony - * @author Faverge Mathieu - * @date 2020-06-11 + * @date 2020-03-08 * * @precisions normal z -> c d s * @@ -21,135 +20,63 @@ /** ******************************************************************************* * - * @brief Reduce all the global C coefficients and store the good ones in local + * @brief Reduce all the global coefficients of a rhs and store the local ones * ******************************************************************************* * * @param[in] spm * The sparse matrix spm * - * @param[inout] glob - * The global vector to reduce + * @param[in] nrhs + * Number of rhs vectors. * - * @param[inout] loc - * Local vector + * @param[inout] bglob + * The global rhs vector to reduce. + * + * @param[inout] bloc + * Local rhs vector. + * + * @param[inout] ldb + * Leading dimension of the global b vector. * *******************************************************************************/ -spm_complex64_t * -z_spmReduceRHS( const spmatrix_t *spm, - int nrhs, - const spm_complex64_t *x, - spm_int_t ldx, - int root ) +void +z_spmReduceRhs( const spmatrix_t *spm, + int nrhs, + spm_complex64_t *bglob, + spm_complex64_t *b, + spm_int_t ldb ) { - spm_complex64_t *outptr, *out = NULL; - int c; - int n, nmax = 0; - int nexp, nexpmax = 0; - spm_complex64_t *tmp_out, *current_out; - spm_int_t *tmp_l2g, *current_l2g; - spm_int_t current_n, current_nexp; - spm_int_t i, j, k, ig, dofi, row; - int local = ( ( root == -1 ) || (root == spm->clustnum) ); - spm_int_t baseval = spmFindBase( spm ); - - /* We do not handle cases where ldx is different from spm->n */ - assert( ldx == spm->nexp ); +#if defined(SPM_WITH_MPI) + spm_int_t i, j, k; + spm_int_t ig, dofi, row, baseval; + spm_int_t *loc2glob; + spm_complex64_t *rhs = b; if ( spm->loc2glob == NULL ) { - if ( local ) { - out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); - memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); - } - return out; + return; } -#if !defined(SPM_WITH_MPI) - assert( 0 ); -#else - n = spm->n; - nexp = spm->nexp; - if ( root == -1 ) { - MPI_Allreduce( &n, &nmax, 1, MPI_INT, MPI_MAX, spm->comm ); - MPI_Allreduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, spm->comm ); - } - else { - MPI_Reduce( &n, &nmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); - MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); - } - - if ( local ) { - out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); - tmp_out = malloc( nexpmax * nrhs * sizeof( spm_complex64_t ) ); - tmp_l2g = malloc( nmax * sizeof( spm_int_t ) ); - } - - for( c=0; c<spm->clustnbr; c++ ) { - MPI_Status status; - - if ( c == spm->clustnum ) { - current_n = spm->n; - current_nexp = spm->nexp; - current_l2g = spm->loc2glob; - current_out = (spm_complex64_t*)x; - } - else { - current_n = 0; - current_nexp = 0; - current_l2g = tmp_l2g; - current_out = tmp_out; - } + MPI_Allreduce( MPI_IN_PLACE, bglob, ldb * nrhs, SPM_MPI_COMPLEX64, MPI_SUM, spm->comm ); - if ( root == -1 ) { - MPI_Bcast( ¤t_n, 1, SPM_MPI_INT, c, spm->comm ); - MPI_Bcast( ¤t_nexp, 1, SPM_MPI_INT, c, spm->comm ); - if ( current_n > 0 ) { - MPI_Bcast( current_l2g, current_n, SPM_MPI_INT, c, spm->comm ); - MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm ); + baseval = spmFindBase( spm ); + loc2glob = spm->loc2glob; + for( i=0; i<spm->n; i++, loc2glob++ ) { + ig = *loc2glob - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; + row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; + for( j=0; j<nrhs; j++ ) { + for( k=0; k<dofi; k++ ) { + rhs[ j * spm->nexp + k ] = bglob[ row + j * ldb + k ]; } } - else if ( root != c ) { /* No communication if c == root */ - if ( c == spm->clustnum ) { - MPI_Send( ¤t_n, 1, SPM_MPI_INT, root, 0, spm->comm ); - MPI_Send( ¤t_nexp, 1, SPM_MPI_INT, root, 1, spm->comm ); - if ( current_n > 0 ) { - MPI_Send( current_l2g, current_n, SPM_MPI_INT, root, 2, spm->comm ); - MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm ); - } - } - if ( root == spm->clustnum ) { - MPI_Recv( ¤t_n, 1, SPM_MPI_INT, c, 0, spm->comm, &status ); - MPI_Recv( ¤t_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status ); - if ( current_n > 0 ) { - MPI_Recv( current_l2g, current_n, SPM_MPI_INT, c, 2, spm->comm, &status ); - MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status ); - } - } - } - - if ( !local ) { - continue; - } - - outptr = out; - for( i=0; i<current_n; i++, current_l2g++ ) { - ig = *current_l2g - baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; - row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; - for( j=0; j<nrhs; j++ ) { - for( k=0; k<dofi; k++ ) { - outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ]; - } - } - current_out += dofi; - } - } - - if ( local ) { - free( tmp_out ); - free( tmp_l2g ); + rhs += dofi; } - +#else + (void)spm; + (void)nrhs; + (void)bglob; + (void)b; + (void)ldb; #endif - return out; -} +} \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 311bff7f..815e11c4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -53,6 +53,7 @@ if ( SPM_WITH_MPI ) spm_scatter_gather_tests.c spm_dist_norm_tests.c spm_dist_genrhs_tests.c + spm_dist_matvec_tests.c ) endif() @@ -70,7 +71,9 @@ set( SPM_DOF_TESTS set( SPM_MPI_TESTS spm_scatter_gather_tests spm_dist_norm_tests - spm_dist_genrhs_tests ) + spm_dist_genrhs_tests + spm_dist_matvec_tests + ) # List of run types set( RUNTYPE shm ) diff --git a/tests/spm_compare.c b/tests/spm_compare.c index d16adad3..123c6b07 100644 --- a/tests/spm_compare.c +++ b/tests/spm_compare.c @@ -138,7 +138,9 @@ int spmCompare( spmatrix_t *spm1, spmatrix_t *spm2 ) { - spm_int_t i, base1, base2; + spm_int_t i; + spm_int_t base1 = -1; + spm_int_t base2 = -2; int rc = 0; /* I don't have one of the matrices, I jump to the findbase computations */ diff --git a/tests/spm_dist_genrhs_tests.c b/tests/spm_dist_genrhs_tests.c index a0fb79f6..26d0c46e 100644 --- a/tests/spm_dist_genrhs_tests.c +++ b/tests/spm_dist_genrhs_tests.c @@ -149,7 +149,7 @@ int main (int argc, char **argv) printf( "SUCCESS\n" ); } else { - printf( "FAILURE\n" ); + printf( "FAILED\n" ); } } err = (rc != 0) ? err+1 : err; diff --git a/tests/spm_dist_matvec_tests.c b/tests/spm_dist_matvec_tests.c new file mode 100644 index 00000000..7d2e270b --- /dev/null +++ b/tests/spm_dist_matvec_tests.c @@ -0,0 +1,204 @@ +/** + * + * @file spm_dist_matvec_tests.c + * + * Tests and validate the spm_matvec routines. + * + * @copyright 2015-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 6.0.0 + * @author Mathieu Faverge + * @author Theophile Terraz + * @author Tony Delarue + * @date 2020-05-19 + * + **/ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <assert.h> +#include <time.h> +#include "spm_tests.h" + +#if !defined(SPM_WITH_MPI) +#error "This test should not be compiled in non distributed version" +#endif + +#define PRINT_RES(_ret_) \ + if(_ret_) { \ + printf("FAILED(%d)\n", _ret_); \ + err++; \ + } \ + else { \ + printf("SUCCESS\n"); \ + } + +int main (int argc, char **argv) +{ + char *filename; + spmatrix_t original, *origdist, *spm; + spm_driver_t driver; + int clustnbr = 1; + int clustnum = 0; + spm_mtxtype_t mtxtype; + spm_fmttype_t fmttype; + spm_trans_t trans; + int baseval, distbycol; + int rc = SPM_SUCCESS; + int err = 0; + int dof, dofmax = 4; + int to_free = 0; + + MPI_Init( &argc, &argv ); + + /** + * Get options from command line + */ + spmGetOptions( argc, argv, + &driver, &filename ); + + rc = spmReadDriver( driver, filename, &original ); + free(filename); + + if ( rc != SPM_SUCCESS ) { + fprintf(stderr, "ERROR: Could not read the file, stop the test !!!\n"); + return EXIT_FAILURE; + } + + MPI_Comm_size( MPI_COMM_WORLD, &clustnbr ); + MPI_Comm_rank( MPI_COMM_WORLD, &clustnum ); + + if ( original.flttype == SpmPattern ) { + spmGenFakeValues( &original ); + } + + spmPrintInfo( &original, stdout ); + + if ( clustnum == 0 ) { + printf(" -- SPM Matrix-Vector Test --\n"); + } + + for( fmttype=SpmCSC; fmttype<=SpmIJV; fmttype++ ) { + + distbycol = (fmttype == SpmCSR) ? 0 : 1; + if ( spmConvert( fmttype, &original ) != SPM_SUCCESS ) { + fprintf( stderr, "Issue to convert to %s format\n", fmtnames[fmttype] ); + continue; + } + + origdist = spmScatter( &original, -1, NULL, distbycol, -1, MPI_COMM_WORLD ); + if ( origdist == NULL ) { + fprintf( stderr, "Failed to scatter the spm\n" ); + continue; + } + + for( dof=-1; dof<2; dof++ ) + { + if ( dof >= 0 ) { + spm = spmDofExtend( origdist, dof, dofmax ); + to_free = 1; + } + else { + spm = malloc(sizeof(spmatrix_t)); + memcpy( spm, origdist, sizeof(spmatrix_t) ); + to_free = 0; + } + + if ( spm == NULL ) { + fprintf( stderr, "Issue to extend the matrix\n" ); + continue; + } + + for( baseval=0; baseval<2; baseval++ ) + { + spmBase( spm, baseval ); + + for( mtxtype=SpmGeneral; mtxtype<=SpmHermitian; mtxtype++ ) + { + if ( (mtxtype == SpmHermitian) && + ( ((original.flttype != SpmComplex64) && + (original.flttype != SpmComplex32)) || + (original.mtxtype != SpmHermitian) ) ) + { + continue; + } + + if ( (mtxtype != SpmGeneral) && + (original.mtxtype == SpmGeneral) ) + { + continue; + } + + spm->mtxtype = mtxtype; + + for( trans=SpmNoTrans; trans<=SpmConjTrans; trans++ ) + { + if ( (trans == SpmConjTrans) && + ((spm->flttype != SpmComplex64) && (spm->flttype != SpmComplex32))) + { + continue; + } + + if(clustnum == 0) { + printf( " Case: %s / %s / dof(%8s) / base(%d) / %10s / %9s : ", + fltnames[spm->flttype], + fmtnames[spm->fmttype], + dofname[dof+1], + baseval, + mtxnames[mtxtype - SpmGeneral], + transnames[trans - SpmNoTrans] ); + } + + switch( spm->flttype ){ + case SpmComplex64: + rc = z_spm_dist_matvec_check( baseval, trans, spm ); + break; + + case SpmComplex32: + rc = c_spm_dist_matvec_check( baseval, trans, spm ); + break; + + case SpmFloat: + rc = s_spm_dist_matvec_check( baseval, trans, spm ); + break; + + case SpmDouble: + default: + rc = d_spm_dist_matvec_check( baseval, trans, spm ); + } + err = (rc != 0) ? err+1 : err; + } + } + } + + if ( spm != origdist ) { + if( to_free ){ + spmExit( spm ); + } + free( spm ); + } + } + + spmExit( origdist ); + free( origdist ); + } + + spmExit(&original); + + MPI_Finalize(); + + if( err == 0 ) { + if(clustnum == 0) { + printf(" -- All tests PASSED --\n"); + } + return EXIT_SUCCESS; + } + else + { + printf(" -- %d tests FAILED --\n", err); + return EXIT_FAILURE; + } +} diff --git a/tests/spm_dist_norm_tests.c b/tests/spm_dist_norm_tests.c index 8c7c7dd3..1369f92e 100644 --- a/tests/spm_dist_norm_tests.c +++ b/tests/spm_dist_norm_tests.c @@ -67,7 +67,9 @@ int main (int argc, char **argv) spmPrintInfo( &original, stdout ); - printf(" -- SPM Norms Test --\n"); + if ( clustnum == 0 ) { + printf(" -- SPM Norms Test --\n"); + } for( fmttype=SpmCSC; fmttype<=SpmIJV; fmttype++ ) { @@ -128,7 +130,7 @@ int main (int argc, char **argv) } spmdist->mtxtype = mtxtype; - if(clustnum == 0) { + if ( clustnum == 0 ) { printf( " Case: %s / %s / %d / %s / %d\n", fltnames[spmdist->flttype], fmtnames[spmdist->fmttype], baseval, diff --git a/tests/spm_tests.h b/tests/spm_tests.h index 7c6a2e49..85dd17ab 100644 --- a/tests/spm_tests.h +++ b/tests/spm_tests.h @@ -85,6 +85,7 @@ int z_spm_norm_check( const spmatrix_t *spm ); int z_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); int z_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, const spm_complex64_t *bloc, const spm_complex64_t *bdst, int root ); +int z_spm_dist_matvec_check( spm_int_t baseval, spm_trans_t trans, const spmatrix_t *spm ); void c_spm_print_check( char *filename, const spmatrix_t *spm ); int c_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); @@ -92,6 +93,7 @@ int c_spm_norm_check( const spmatrix_t *spm ); int c_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); int c_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, const spm_complex32_t *bloc, const spm_complex32_t *bdst, int root ); +int c_spm_dist_matvec_check( spm_int_t baseval, spm_trans_t trans, const spmatrix_t *spm ); void d_spm_print_check( char *filename, const spmatrix_t *spm ); int d_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); @@ -99,6 +101,7 @@ int d_spm_norm_check( const spmatrix_t *spm ); int d_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); int d_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, const double *bloc, const double *bdst, int root ); +int d_spm_dist_matvec_check( spm_int_t baseval, spm_trans_t trans, const spmatrix_t *spm ); void s_spm_print_check( char *filename, const spmatrix_t *spm ); int s_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); @@ -106,6 +109,7 @@ int s_spm_norm_check( const spmatrix_t *spm ); int s_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); int s_spm_dist_genrhs_check( const spmatrix_t *spm, spm_int_t nrhs, const float *bloc, const float *bdst, int root ); +int s_spm_dist_matvec_check( spm_int_t baseval, spm_trans_t trans, const spmatrix_t *spm ); void p_spm_print_check( char *filename, const spmatrix_t *spm ); diff --git a/tests/z_spm_tests.c b/tests/z_spm_tests.c index dcbfebc3..c53a0900 100644 --- a/tests/z_spm_tests.c +++ b/tests/z_spm_tests.c @@ -353,7 +353,7 @@ z_spm_dist_genrhs_check( const spmatrix_t *spm, norm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'M', M, nrhs, bloc, M ); /* Let's gather the distributed RHS */ - tmp = z_spmReduceRHS( spm, nrhs, bdst, spm->nexp, root ); + tmp = z_spmGatherRHS( spm, nrhs, bdst, spm->nexp, root ); rc = 0; if ( tmp != NULL ) { @@ -385,4 +385,116 @@ z_spm_dist_genrhs_check( const spmatrix_t *spm, return ret; } + +/*------------------------------------------------------------------------ + * Check the accuracy of the solution + */ +int +z_spm_dist_matvec_check( spm_int_t baseval, spm_trans_t trans, const spmatrix_t *spm ) +{ + spmatrix_t *spmloc; + unsigned long long int seed = 35469; + unsigned long long int seedx = 24632; + unsigned long long int seedy = 73246; + spm_complex64_t *x, *y, *yd; + /* + * Alpha and beta are complex for cblas, but only the real part is used for + * matvec/matmat subroutines + */ + double dalpha = 0.; + double dbeta = 0.; + + double Anorm, Xnorm, Ynorm, Ylnorm, Ydnorm, Rnorm; + double eps, result; + int rc, info_solution, start = 1; + spm_int_t ldl, ldd, nrhs = 1; + + eps = LAPACKE_dlamch_work('e'); + + core_dplrnt( 1, 1, &dalpha, 1, 1, start, 0, seed ); start++; + core_dplrnt( 1, 1, &dbeta, 1, 1, start, 0, seed ); start++; + + ldd = spm->nexp; + ldl = spm->gNexp; + + /* Generate random x and y in distributed */ + x = (spm_complex64_t*)malloc( ldd * nrhs * sizeof(spm_complex64_t) ); + z_spmRhsGenRndDist( spm, baseval, 1., nrhs, x, spm->nexp, 1, seedx ); + + y = (spm_complex64_t*)malloc( ldd * nrhs * sizeof(spm_complex64_t) ); + z_spmRhsGenRndDist( spm, baseval, 1., nrhs, y, spm->nexp, 1, seedy ); + + /* Compute the distributed sparse matrix-vector product */ + rc = spmMatMat( trans, nrhs, dalpha, spm, x, ldd, dbeta, y, ldd ); + if ( rc != SPM_SUCCESS ) { + info_solution = 1; + goto end; + } + + /* Compute matrix norm and gather info */ + Anorm = spmNorm( SpmInfNorm, spm ); + spmloc = spmGather( spm, 0 ); + yd = z_spmGatherRHS( spm, nrhs, y, ldd, 0 ); + + /* Compute the local sparse matrix-vector product */ + if ( spm->clustnum == 0 ) { + spm_complex64_t *xl, *yl; + + /* Generate xl and yl as x and y locally on 0 */ + xl = (spm_complex64_t*)malloc( ldl * nrhs * sizeof(spm_complex64_t) ); + z_spmRhsGenRndShm( spmloc, baseval, 1., nrhs, xl, spm->nexp, 1, seedx ); + + yl = (spm_complex64_t*)malloc( ldl * nrhs * sizeof(spm_complex64_t) ); + z_spmRhsGenRndShm( spmloc, baseval, 1., nrhs, yl, spm->nexp, 1, seedy ); + + /* Compute the original norms */ + Xnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', ldl, nrhs, xl, ldl ); + Ynorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', ldl, nrhs, yl, ldl ); + + rc = spmMatMat( trans, nrhs, dalpha, spmloc, xl, ldl, dbeta, yl, ldl ); + if ( rc != SPM_SUCCESS ) { + info_solution = 1; + goto end; + } + + /* Compute the final norm in shared memory */ + Ylnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', ldl, nrhs, yl, ldl ); + Ydnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', ldl, nrhs, yd, ldl ); + + core_zgeadd( SpmNoTrans, ldl, nrhs, + -1., yl, ldl, + 1., yd, ldl ); + Rnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'M', ldl, nrhs, yd, ldl ); + + + result = Rnorm / ( (Anorm + Xnorm + Ynorm) * spm->gNexp * eps ); + if ( isinf(Ydnorm) || isinf(Ylnorm) || + isnan(result) || isinf(result) || (result > 10.0) ) + { + info_solution = 1; + printf( "FAILED !\n" ); + /* " ||A||_inf = %e, ||x||_inf = %e, ||y||_inf = %e\n" */ + /* " ||shm(a*A*x+b*y)||_inf = %e, ||dist(a*A*x+b*y)||_inf = %e, ||R||_m = %e\n", */ + /* Anorm, Xnorm, Ynorm, Ylnorm, Ydnorm, Rnorm ); */ + } + else { + info_solution = 0; + printf("SUCCESS !\n"); + } + + free( xl ); + free( yl ); + free( yd ); + spmExit( spmloc ); + free( spmloc ); + } + + MPI_Bcast( &info_solution, 1, MPI_INT, 0, spm->comm ); + + end: + free( x ); + free( y ); + + return info_solution; +} #endif -- GitLab