diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f897fcdd6718df2df7fb1caef63200e90c20644..fd7753a578be36e5de3e01ee56c77e9759605d27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,6 +196,7 @@ set(SOURCES src/z_spm_dof_extend.c src/z_spm_norm.c src/z_spm_scal.c + src/z_spm_gather_rhs.c src/z_spm_reduce_rhs.c src/z_spm_convert_to_csc.c src/z_spm_convert_to_csr.c diff --git a/src/z_spm.h b/src/z_spm.h index d48f742fc235d3923b111bba316adfed0249ef4f..a2fe6adfb786607750da05d6e791a8e37ae77fe9 100644 --- a/src/z_spm.h +++ b/src/z_spm.h @@ -75,7 +75,8 @@ spm_int_t z_spmSymmetrize( spmatrix_t *spm ); int z_spmGenRHS(spm_rhstype_t type, int nrhs, const spmatrix_t *spm, void *x, int ldx, void *b, int ldb ); int z_spmCheckAxb( spm_fixdbl_t eps, int nrhs, const spmatrix_t *spm, void *x0, int ldx0, void *b, int ldb, const void *x, int ldx ); -spm_complex64_t *z_spmReduceRHS( const spmatrix_t *spm, int nrhs, const spm_complex64_t *x, spm_int_t ldx, int root ); +spm_complex64_t *z_spmGatherRHS( const spmatrix_t *spm, int nrhs, const spm_complex64_t *x, spm_int_t ldx, int root ); +void z_spmReduceRhs( const spmatrix_t *spm, int nrhs, spm_complex64_t *bglob, spm_complex64_t *bloc ); /** * Output routines diff --git a/src/z_spm_gather_rhs.c b/src/z_spm_gather_rhs.c new file mode 100644 index 0000000000000000000000000000000000000000..e2407d9969ac6050c7dd7e20bd6b5b991ed0f447 --- /dev/null +++ b/src/z_spm_gather_rhs.c @@ -0,0 +1,155 @@ +/** + * + * @file z_spm_gather_rhs.c + * + * SParse Matrix package right hand side reduction routine. + * + * @copyright 2020-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Delarue Tony + * @author Faverge Mathieu + * @date 2020-06-11 + * + * @precisions normal z -> c d s + * + **/ +#include "common.h" +#include "z_spm.h" + +/** + ******************************************************************************* + * + * @brief Reduce all the global C coefficients and store the good ones in local + * + ******************************************************************************* + * + * @param[in] spm + * The sparse matrix spm + * + * @param[inout] glob + * The global vector to reduce + * + * @param[inout] loc + * Local vector + * + *******************************************************************************/ +spm_complex64_t * +z_spmGatherRHS( const spmatrix_t *spm, + int nrhs, + const spm_complex64_t *x, + spm_int_t ldx, + int root ) +{ + spm_complex64_t *outptr, *out = NULL; + int c; + int n, nmax = 0; + int nexp, nexpmax = 0; + spm_complex64_t *tmp_out, *current_out; + spm_int_t *tmp_l2g, *current_l2g; + spm_int_t current_n, current_nexp; + spm_int_t i, j, k, ig, dofi, row; + int local = ( ( root == -1 ) || (root == spm->clustnum) ); + spm_int_t baseval = spmFindBase( spm ); + + /* We do not handle cases where ldx is different from spm->n */ + assert( ldx == spm->nexp ); + + if ( spm->loc2glob == NULL ) { + if ( local ) { + out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + } + return out; + } + +#if !defined(SPM_WITH_MPI) + assert( 0 ); +#else + n = spm->n; + nexp = spm->nexp; + if ( root == -1 ) { + MPI_Allreduce( &n, &nmax, 1, MPI_INT, MPI_MAX, spm->comm ); + MPI_Allreduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, spm->comm ); + } + else { + MPI_Reduce( &n, &nmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); + MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); + } + + if ( local ) { + out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); + tmp_out = malloc( nexpmax * nrhs * sizeof( spm_complex64_t ) ); + tmp_l2g = malloc( nmax * sizeof( spm_int_t ) ); + } + + for( c=0; c<spm->clustnbr; c++ ) { + MPI_Status status; + + if ( c == spm->clustnum ) { + current_n = spm->n; + current_nexp = spm->nexp; + current_l2g = spm->loc2glob; + current_out = (spm_complex64_t*)x; + } + else { + current_n = 0; + current_nexp = 0; + current_l2g = tmp_l2g; + current_out = tmp_out; + } + + if ( root == -1 ) { + MPI_Bcast( ¤t_n, 1, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( ¤t_nexp, 1, SPM_MPI_INT, c, spm->comm ); + if ( current_n > 0 ) { + MPI_Bcast( current_l2g, current_n, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm ); + } + } + else if ( root != c ) { /* No communication if c == root */ + if ( c == spm->clustnum ) { + MPI_Send( ¤t_n, 1, SPM_MPI_INT, root, 0, spm->comm ); + MPI_Send( ¤t_nexp, 1, SPM_MPI_INT, root, 1, spm->comm ); + if ( current_n > 0 ) { + MPI_Send( current_l2g, current_n, SPM_MPI_INT, root, 2, spm->comm ); + MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm ); + } + } + if ( root == spm->clustnum ) { + MPI_Recv( ¤t_n, 1, SPM_MPI_INT, c, 0, spm->comm, &status ); + MPI_Recv( ¤t_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status ); + if ( current_n > 0 ) { + MPI_Recv( current_l2g, current_n, SPM_MPI_INT, c, 2, spm->comm, &status ); + MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status ); + } + } + } + + if ( !local ) { + continue; + } + + outptr = out; + for( i=0; i<current_n; i++, current_l2g++ ) { + ig = *current_l2g - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; + row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; + for( j=0; j<nrhs; j++ ) { + for( k=0; k<dofi; k++ ) { + outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ]; + } + } + current_out += dofi; + } + } + + if ( local ) { + free( tmp_out ); + free( tmp_l2g ); + } + +#endif + return out; +} diff --git a/src/z_spm_matrixvector.c b/src/z_spm_matrixvector.c index e089cd9795746fdb422ee1ccfd47b545ab1decba..e58c620db6307c39ce8f42f515ac54c240bbdf85 100644 --- a/src/z_spm_matrixvector.c +++ b/src/z_spm_matrixvector.c @@ -110,18 +110,18 @@ __spm_zmatvec_sy_csc( const __spm_zmatvec_t *args ) spm_int_t incy = args->incy; const __conj_fct_t conjA_fct = args->conjA_fct; const __conj_fct_t conjAt_fct = args->conjAt_fct; - spm_int_t col, gcol, row, i; + spm_int_t col, gcol, grow, i; for( col=0; col<n; col++, colptr++ ) { - gcol = (loc2glob == NULL) ? col : loc2glob[col]; + gcol = (loc2glob == NULL) ? col : loc2glob[col] - baseval ; for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ ) { - row = *rowptr - baseval; - if ( row != gcol ) { - y[ row * incy ] += alpha * conjA_fct( *values ) * x[ gcol * incx ]; - y[ gcol * incy ] += alpha * conjAt_fct( *values ) * x[ row * incx ]; + grow = *rowptr - baseval; + if ( grow != gcol ) { + y[ grow * incy ] += alpha * conjA_fct( *values ) * x[ gcol * incx ]; + y[ gcol * incy ] += alpha * conjAt_fct( *values ) * x[ grow * incx ]; } else { y[ gcol * incy ] += alpha * conjA_fct( *values ) * x[ gcol * incx ]; @@ -140,6 +140,7 @@ __spm_zmatvec_ge_csc( const __spm_zmatvec_t *args ) const spm_int_t *rowptr = args->rowptr; const spm_int_t *colptr = args->colptr; const spm_complex64_t *values = args->values; + const spm_int_t *loc2glob = args->loc2glob; const spm_complex64_t *x = args->x; spm_int_t incx = args->incx; spm_complex64_t *y = args->y; @@ -158,12 +159,14 @@ __spm_zmatvec_ge_csc( const __spm_zmatvec_t *args ) } } else { - for( col=0; col<n; col++, colptr++, y+=incy ) + spm_complex64_t *yptr; + for( col=0; col<n; col++, colptr++ ) { + yptr = (loc2glob == NULL) ? y + col * incy : y + (loc2glob[col] - baseval) * incy ; for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ ) { row = *rowptr - baseval; - *y += alpha * conjA_fct( *values ) * x[ row * incx ]; + *yptr += alpha * conjA_fct( *values ) * x[ row * incx ]; } } } @@ -385,39 +388,6 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args, return SPM_SUCCESS; } -static inline void -z_spmCopyC( const spmatrix_t *spm, - const spm_complex64_t *xloc, - spm_complex64_t *xglob ) -{ - spm_int_t i; - spm_int_t *loc2glob; - if(spm->loc2glob == NULL) { - return; - } - - loc2glob = spm->loc2glob; - memset( xglob, 0, spm->gN * sizeof(spm_complex64_t) ); - for( i=0; i<spm->n; i++, loc2glob++ ) { - xglob[ (*loc2glob) ] = xloc[i]; - } -} - -static inline void -z_spmCopyB( const spmatrix_t *spm, - const spm_complex64_t *xloc, - spm_complex64_t *xglob) -{ - if( (spm->mtxtype == SpmGeneral) || (spm->loc2glob == NULL) ) { - return; - } - z_spmCopyC( spm, xloc, xglob ); -#if defined(SPM_WITH_MPI) - MPI_Allreduce( MPI_IN_PLACE, xglob, spm->gN, SPM_MPI_COMPLEX64, MPI_SUM, spm->comm ); -#endif -} - - /** ******************************************************************************* * @@ -529,15 +499,15 @@ spm_zspmm( spm_side_t side, M = A->n; N = K; - ldx = ldb; - ldy = ldc; + ldx = ldb; + ldy = ldc; } else { M = K; N = A->n; - ldx = 1; - ldy = 1; + ldx = 1; + ldy = 1; } if ( beta == 0. ) { @@ -551,28 +521,57 @@ spm_zspmm( spm_side_t side, return SPM_SUCCESS; } - ldc = (A->loc2glob != NULL) ? A->gN : ldc; - Ctmp = (A->loc2glob != NULL) ? malloc(A->gN * sizeof(spm_complex64_t)) : NULL; - ldb = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? A->gN : ldb; - Btmp = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? malloc(A->gN * sizeof(spm_complex64_t)) : NULL; + /* Need a BIG cleanup */ + if(A->loc2glob != NULL){ + spm_int_t i, j, idx; + spm_int_t ig, dof, baseval, *loc2glob; + + if( A->mtxtype != SpmGeneral || transA != SpmNoTrans ) { + Btmp = z_spmGatherRHS( A, N, B, ldb, -1 ); + ldb = A->gNexp; + } + else { + Btmp = (spm_complex64_t*)B; + } + Ctmp = malloc(A->gNexp * N * sizeof(spm_complex64_t) ); + ldc = A->gNexp; + memset( Ctmp, 0, A->gNexp * N * sizeof(spm_complex64_t) ); + + baseval = spmFindBase(A); + for ( j = 0; j < N; j++) + { + loc2glob = A->loc2glob; + for ( i = 0; i < A->n; i++, loc2glob++) + { + ig = *loc2glob - baseval; + dof = (A->dof > 0) ? A->dof : A->dofs[ig+1] - A->dofs[ig]; + idx = (A->dof > 0) ? A->dof * ig : A->dofs[ig] - baseval; + memcpy( (Ctmp + j * A->gNexp + idx), + (C + j * A->nexp + i), + dof * sizeof(spm_complex64_t) ); + } + } + } + else { + Btmp = (spm_complex64_t*)B; + Ctmp = C; + } __spm_zmatvec_args_init( &args, side, transA, alpha, A, Btmp, ldb, Ctmp, ldc ); for( r=0; (r < N) && (rc == SPM_SUCCESS); r++ ) { - z_spmCopyB( A, B + r * ldx, Btmp ); - z_spmCopyC( A, C + r * ldy, Ctmp ); - - args.x = (Btmp == NULL) ? B + r * ldx : Btmp; - args.y = (Ctmp == NULL) ? C + r * ldy : Ctmp; + args.x = Btmp + r * ldx; + args.y = Ctmp + r * ldy; rc = args.loop_fct( &args ); - - z_spmReduce( A, Ctmp, C + r * ldy ); } + /* Reduce only when necessary */ + z_spmReduceRhs( A, K, Ctmp, C ); + if(A->loc2glob != NULL) { free(Ctmp); - if(A->mtxtype != SpmGeneral){ + if( A->mtxtype != SpmGeneral ) { free(Btmp); } } @@ -648,25 +647,42 @@ spm_zspmv( spm_trans_t trans, return SPM_SUCCESS; } - ytmp = (A->loc2glob != NULL) ? malloc(A->gN * sizeof(spm_complex64_t)) : y; - incy = (A->loc2glob != NULL) ? A->gN : incy; - xtmp = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? malloc(A->gN * sizeof(spm_complex64_t)) : (spm_complex64_t *)x; - incx = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? A->gN : incx; + if(A->loc2glob != NULL){ + spm_int_t i, idx; + spm_int_t ig, dof, baseval, *loc2glob; - z_spmCopyB( A, x, xtmp ); - z_spmCopyC( A, y, ytmp ); + xtmp = z_spmGatherRHS( A, 1, x, incx, -1 ); + incx = A->gNexp; + ytmp = malloc(A->gNexp * sizeof(spm_complex64_t) ); + incy = A->gNexp; + memset( ytmp, 0, A->gNexp * sizeof(spm_complex64_t) ); + + baseval = spmFindBase(A); + loc2glob = A->loc2glob; + for ( i = 0; i < A->n; i++, loc2glob++) + { + ig = *loc2glob - baseval; + dof = (A->dof > 0) ? A->dof : A->dofs[ig+1] - A->dofs[ig]; + idx = (A->dof > 0) ? A->dof * ig : A->dofs[ig] - baseval; + memcpy( (ytmp + idx), + (y + i), + dof * sizeof(spm_complex64_t) ); + } + } + else { + xtmp = (spm_complex64_t*)x; + ytmp = y; + } __spm_zmatvec_args_init( &args, SpmLeft, trans, alpha, A, xtmp, incx, ytmp, incy ); rc = args.loop_fct( &args ); - z_spmReduce( A, ytmp, y ); + z_spmReduceRhs( A, 1, ytmp, y ); if(A->loc2glob != NULL) { free(ytmp); - if(A->mtxtype != SpmGeneral){ - free(xtmp); - } + free(xtmp); } diff --git a/src/z_spm_reduce_rhs.c b/src/z_spm_reduce_rhs.c index 69e96fb3dfd0df5e1cec86b81c2a51500659c993..da536297f4906e08af3f31bfa4fb820d29a8ab5e 100644 --- a/src/z_spm_reduce_rhs.c +++ b/src/z_spm_reduce_rhs.c @@ -2,18 +2,16 @@ * * @file z_spm_reduce_rhs.c * - * SParse Matrix package right hand side reduction routine. + * SParse Matrix package matrix-vector multiplication routines. * - * @copyright 2020-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * @copyright 2016-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. * * @version 1.0.0 * @author Delarue Tony - * @author Faverge Mathieu - * @date 2020-06-11 + * @date 2020-03-08 * * @precisions normal z -> c d s - * **/ #include "common.h" #include "z_spm.h" @@ -28,128 +26,51 @@ * @param[in] spm * The sparse matrix spm * - * @param[inout] glob - * The global vector to reduce + * @param[in] nrhs + * Number of Rhs vectors. + * + * @param[inout] bglob + * The global rhs vector to reduce. * - * @param[inout] loc - * Local vector + * @param[inout] bloc + * Local rhs vector. * *******************************************************************************/ -spm_complex64_t * -z_spmReduceRHS( const spmatrix_t *spm, - int nrhs, - const spm_complex64_t *x, - spm_int_t ldx, - int root ) +void +z_spmReduceRhs( const spmatrix_t *spm, + int nrhs, + spm_complex64_t *bglob, + spm_complex64_t *bloc ) { - spm_complex64_t *outptr, *out = NULL; - int c; - int n, nmax = 0; - int nexp, nexpmax = 0; - spm_complex64_t *tmp_out, *current_out; - spm_int_t *tmp_l2g, *current_l2g; - spm_int_t current_n, current_nexp; - spm_int_t i, j, k, ig, dofi, row; - int local = ( ( root == -1 ) || (root == spm->clustnum) ); - spm_int_t baseval = spmFindBase( spm ); - - /* We do not handle cases where ldx is different from spm->n */ - assert( ldx == spm->nexp ); +#if defined(SPM_WITH_MPI) + spm_int_t i, j, k; + spm_int_t ig, dofi, row, baseval; + spm_int_t *loc2glob; + spm_complex64_t *rhs = bloc; if ( spm->loc2glob == NULL ) { - if ( local ) { - out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); - memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); - } - return out; + return; } -#if !defined(SPM_WITH_MPI) - assert( 0 ); -#else - n = spm->n; - nexp = spm->nexp; - if ( root == -1 ) { - MPI_Allreduce( &n, &nmax, 1, MPI_INT, MPI_MAX, spm->comm ); - MPI_Allreduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, spm->comm ); - } - else { - MPI_Reduce( &n, &nmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); - MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); - } - - if ( local ) { - out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); - tmp_out = malloc( nexpmax * nrhs * sizeof( spm_complex64_t ) ); - tmp_l2g = malloc( nmax * sizeof( spm_int_t ) ); - } - - for( c=0; c<spm->clustnbr; c++ ) { - MPI_Status status; - - if ( c == spm->clustnum ) { - current_n = spm->n; - current_nexp = spm->nexp; - current_l2g = spm->loc2glob; - current_out = (spm_complex64_t*)x; - } - else { - current_n = 0; - current_nexp = 0; - current_l2g = tmp_l2g; - current_out = tmp_out; - } + MPI_Allreduce( MPI_IN_PLACE, bglob, spm->gNexp * nrhs, SPM_MPI_COMPLEX64, MPI_SUM, spm->comm ); - if ( root == -1 ) { - MPI_Bcast( ¤t_n, 1, SPM_MPI_INT, c, spm->comm ); - MPI_Bcast( ¤t_nexp, 1, SPM_MPI_INT, c, spm->comm ); - if ( current_n > 0 ) { - MPI_Bcast( current_l2g, current_n, SPM_MPI_INT, c, spm->comm ); - MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm ); + baseval = spmFindBase( spm ); + loc2glob = spm->loc2glob; + for( i=0; i<spm->n; i++, loc2glob++ ) { + ig = *loc2glob - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; + row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; + for( j=0; j<nrhs; j++ ) { + for( k=0; k<dofi; k++ ) { + rhs[ j * spm->nexp + k ] = bglob[ row + j * spm->gNexp + k ]; } } - else if ( root != c ) { /* No communication if c == root */ - if ( c == spm->clustnum ) { - MPI_Send( ¤t_n, 1, SPM_MPI_INT, root, 0, spm->comm ); - MPI_Send( ¤t_nexp, 1, SPM_MPI_INT, root, 1, spm->comm ); - if ( current_n > 0 ) { - MPI_Send( current_l2g, current_n, SPM_MPI_INT, root, 2, spm->comm ); - MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm ); - } - } - if ( root == spm->clustnum ) { - MPI_Recv( ¤t_n, 1, SPM_MPI_INT, c, 0, spm->comm, &status ); - MPI_Recv( ¤t_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status ); - if ( current_n > 0 ) { - MPI_Recv( current_l2g, current_n, SPM_MPI_INT, c, 2, spm->comm, &status ); - MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status ); - } - } - } - - if ( !local ) { - continue; - } - - outptr = out; - for( i=0; i<current_n; i++, current_l2g++ ) { - ig = *current_l2g - baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; - row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; - for( j=0; j<nrhs; j++ ) { - for( k=0; k<dofi; k++ ) { - outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ]; - } - } - current_out += dofi; - } - } - - if ( local ) { - free( tmp_out ); - free( tmp_l2g ); + rhs += dofi; } - +#else + (void)spm; + (void)bglob; + (void)bloc; + (void)nrhs; #endif - return out; -} +} \ No newline at end of file diff --git a/tests/z_spm_tests.c b/tests/z_spm_tests.c index ca96dc060bddeb6116315a10abece30028095a13..c53a0900c9c845312b8f437c1456d24b152c6b0f 100644 --- a/tests/z_spm_tests.c +++ b/tests/z_spm_tests.c @@ -353,7 +353,7 @@ z_spm_dist_genrhs_check( const spmatrix_t *spm, norm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'M', M, nrhs, bloc, M ); /* Let's gather the distributed RHS */ - tmp = z_spmReduceRHS( spm, nrhs, bdst, spm->nexp, root ); + tmp = z_spmGatherRHS( spm, nrhs, bdst, spm->nexp, root ); rc = 0; if ( tmp != NULL ) { @@ -434,7 +434,7 @@ z_spm_dist_matvec_check( spm_int_t baseval, spm_trans_t trans, const spmatrix_t /* Compute matrix norm and gather info */ Anorm = spmNorm( SpmInfNorm, spm ); spmloc = spmGather( spm, 0 ); - yd = z_spmReduceRHS( spm, nrhs, y, ldd, 0 ); + yd = z_spmGatherRHS( spm, nrhs, y, ldd, 0 ); /* Compute the local sparse matrix-vector product */ if ( spm->clustnum == 0 ) {