diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3f897fcdd6718df2df7fb1caef63200e90c20644..fd7753a578be36e5de3e01ee56c77e9759605d27 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -196,6 +196,7 @@ set(SOURCES
   src/z_spm_dof_extend.c
   src/z_spm_norm.c
   src/z_spm_scal.c
+  src/z_spm_gather_rhs.c
   src/z_spm_reduce_rhs.c
   src/z_spm_convert_to_csc.c
   src/z_spm_convert_to_csr.c
diff --git a/src/z_spm.h b/src/z_spm.h
index d48f742fc235d3923b111bba316adfed0249ef4f..a2fe6adfb786607750da05d6e791a8e37ae77fe9 100644
--- a/src/z_spm.h
+++ b/src/z_spm.h
@@ -75,7 +75,8 @@ spm_int_t z_spmSymmetrize( spmatrix_t *spm );
 
 int              z_spmGenRHS(spm_rhstype_t type, int nrhs, const spmatrix_t *spm, void *x, int ldx, void *b, int ldb );
 int              z_spmCheckAxb( spm_fixdbl_t eps, int nrhs, const spmatrix_t *spm, void *x0, int ldx0, void *b, int ldb, const void *x, int ldx );
-spm_complex64_t *z_spmReduceRHS( const spmatrix_t *spm, int nrhs, const spm_complex64_t *x, spm_int_t ldx, int root );
+spm_complex64_t *z_spmGatherRHS( const spmatrix_t *spm, int nrhs, const spm_complex64_t *x, spm_int_t ldx, int root );
+void             z_spmReduceRhs( const spmatrix_t *spm, int nrhs, spm_complex64_t *bglob, spm_complex64_t *bloc );
 
 /**
  * Output routines
diff --git a/src/z_spm_gather_rhs.c b/src/z_spm_gather_rhs.c
new file mode 100644
index 0000000000000000000000000000000000000000..e2407d9969ac6050c7dd7e20bd6b5b991ed0f447
--- /dev/null
+++ b/src/z_spm_gather_rhs.c
@@ -0,0 +1,155 @@
+/**
+ *
+ * @file z_spm_gather_rhs.c
+ *
+ * SParse Matrix package right hand side reduction routine.
+ *
+ * @copyright 2020-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @version 1.0.0
+ * @author Delarue Tony
+ * @author Faverge Mathieu
+ * @date 2020-06-11
+ *
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "common.h"
+#include "z_spm.h"
+
+/**
+ *******************************************************************************
+ *
+ * @brief Reduce all the global C coefficients and store the good ones in local
+ *
+ *******************************************************************************
+ *
+ * @param[in] spm
+ *          The sparse matrix spm
+ *
+ * @param[inout] glob
+ *          The global vector to reduce
+ *
+ * @param[inout] loc
+ *          Local vector
+ *
+ *******************************************************************************/
+spm_complex64_t *
+z_spmGatherRHS( const spmatrix_t      *spm,
+                int                    nrhs,
+                const spm_complex64_t *x,
+                spm_int_t              ldx,
+                int                    root )
+{
+    spm_complex64_t *outptr, *out = NULL;
+    int              c;
+    int              n, nmax = 0;
+    int              nexp, nexpmax = 0;
+    spm_complex64_t *tmp_out, *current_out;
+    spm_int_t       *tmp_l2g, *current_l2g;
+    spm_int_t        current_n, current_nexp;
+    spm_int_t        i, j, k, ig, dofi, row;
+    int              local =  ( ( root == -1 ) || (root == spm->clustnum) );
+    spm_int_t        baseval = spmFindBase( spm );
+
+    /* We do not handle cases where ldx is different from spm->n */
+    assert( ldx == spm->nexp );
+
+    if ( spm->loc2glob == NULL ) {
+        if ( local ) {
+            out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) );
+            memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) );
+        }
+        return out;
+    }
+
+#if !defined(SPM_WITH_MPI)
+    assert( 0 );
+#else
+    n    = spm->n;
+    nexp = spm->nexp;
+    if ( root == -1 ) {
+        MPI_Allreduce( &n,    &nmax,    1, MPI_INT, MPI_MAX, spm->comm );
+        MPI_Allreduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, spm->comm );
+    }
+    else {
+        MPI_Reduce( &n,    &nmax,    1, MPI_INT, MPI_MAX, root, spm->comm );
+        MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm );
+    }
+
+    if ( local ) {
+        out     = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) );
+        tmp_out = malloc( nexpmax    * nrhs * sizeof( spm_complex64_t ) );
+        tmp_l2g = malloc( nmax              * sizeof( spm_int_t )       );
+    }
+
+    for( c=0; c<spm->clustnbr; c++ ) {
+        MPI_Status status;
+
+        if ( c == spm->clustnum ) {
+            current_n    = spm->n;
+            current_nexp = spm->nexp;
+            current_l2g  = spm->loc2glob;
+            current_out  = (spm_complex64_t*)x;
+        }
+        else {
+            current_n    = 0;
+            current_nexp = 0;
+            current_l2g  = tmp_l2g;
+            current_out  = tmp_out;
+        }
+
+        if ( root == -1 ) {
+            MPI_Bcast( &current_n,    1, SPM_MPI_INT, c, spm->comm );
+            MPI_Bcast( &current_nexp, 1, SPM_MPI_INT, c, spm->comm );
+            if ( current_n > 0 ) {
+                MPI_Bcast( current_l2g, current_n,           SPM_MPI_INT,       c, spm->comm );
+                MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm );
+            }
+        }
+        else if ( root != c ) { /* No communication if c == root */
+            if ( c == spm->clustnum ) {
+                MPI_Send( &current_n,    1, SPM_MPI_INT, root, 0, spm->comm );
+                MPI_Send( &current_nexp, 1, SPM_MPI_INT, root, 1, spm->comm );
+                if ( current_n > 0 ) {
+                    MPI_Send( current_l2g, current_n,           SPM_MPI_INT,       root, 2, spm->comm );
+                    MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm );
+                }
+            }
+            if ( root == spm->clustnum ) {
+                MPI_Recv( &current_n,    1, SPM_MPI_INT, c, 0, spm->comm, &status );
+                MPI_Recv( &current_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status );
+                if ( current_n > 0 ) {
+                    MPI_Recv( current_l2g, current_n,           SPM_MPI_INT,       c, 2, spm->comm, &status );
+                    MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status );
+                }
+            }
+        }
+
+        if ( !local ) {
+            continue;
+        }
+
+        outptr = out;
+        for( i=0; i<current_n; i++, current_l2g++ ) {
+            ig   = *current_l2g - baseval;
+            dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig];
+            row  = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval;
+            for( j=0; j<nrhs; j++ ) {
+                for( k=0; k<dofi; k++ ) {
+                    outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ];
+                }
+            }
+            current_out += dofi;
+        }
+    }
+
+    if ( local ) {
+        free( tmp_out );
+        free( tmp_l2g );
+    }
+
+#endif
+    return out;
+}
diff --git a/src/z_spm_matrixvector.c b/src/z_spm_matrixvector.c
index e089cd9795746fdb422ee1ccfd47b545ab1decba..e58c620db6307c39ce8f42f515ac54c240bbdf85 100644
--- a/src/z_spm_matrixvector.c
+++ b/src/z_spm_matrixvector.c
@@ -110,18 +110,18 @@ __spm_zmatvec_sy_csc( const __spm_zmatvec_t *args )
     spm_int_t              incy       = args->incy;
     const __conj_fct_t     conjA_fct  = args->conjA_fct;
     const __conj_fct_t     conjAt_fct = args->conjAt_fct;
-    spm_int_t              col, gcol, row, i;
+    spm_int_t              col, gcol, grow, i;
 
 
     for( col=0; col<n; col++, colptr++ )
     {
-        gcol = (loc2glob == NULL) ? col : loc2glob[col];
+        gcol = (loc2glob == NULL) ? col : loc2glob[col] - baseval ;
         for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ )
         {
-            row = *rowptr - baseval;
-            if ( row != gcol ) {
-                y[  row * incy ] += alpha *  conjA_fct( *values ) * x[ gcol * incx ];
-                y[ gcol * incy ] += alpha * conjAt_fct( *values ) * x[  row * incx ];
+            grow = *rowptr - baseval;
+            if ( grow != gcol ) {
+                y[ grow * incy ] += alpha *  conjA_fct( *values ) * x[ gcol * incx ];
+                y[ gcol * incy ] += alpha * conjAt_fct( *values ) * x[ grow * incx ];
             }
             else {
                 y[ gcol * incy ] += alpha *  conjA_fct( *values ) * x[ gcol * incx ];
@@ -140,6 +140,7 @@ __spm_zmatvec_ge_csc( const __spm_zmatvec_t *args )
     const spm_int_t       *rowptr    = args->rowptr;
     const spm_int_t       *colptr    = args->colptr;
     const spm_complex64_t *values    = args->values;
+    const spm_int_t       *loc2glob   = args->loc2glob;
     const spm_complex64_t *x         = args->x;
     spm_int_t              incx      = args->incx;
     spm_complex64_t       *y         = args->y;
@@ -158,12 +159,14 @@ __spm_zmatvec_ge_csc( const __spm_zmatvec_t *args )
         }
     }
     else {
-        for( col=0; col<n; col++, colptr++, y+=incy )
+        spm_complex64_t *yptr;
+        for( col=0; col<n; col++, colptr++ )
         {
+            yptr = (loc2glob == NULL) ? y + col * incy : y + (loc2glob[col] - baseval) * incy ;
             for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ )
             {
                 row = *rowptr - baseval;
-                *y += alpha * conjA_fct( *values ) * x[ row * incx ];
+                *yptr += alpha * conjA_fct( *values ) * x[ row * incx ];
             }
         }
     }
@@ -385,39 +388,6 @@ __spm_zmatvec_args_init( __spm_zmatvec_t       *args,
     return SPM_SUCCESS;
 }
 
-static inline void
-z_spmCopyC( const spmatrix_t      *spm,
-            const spm_complex64_t *xloc,
-                  spm_complex64_t *xglob )
-{
-    spm_int_t        i;
-    spm_int_t       *loc2glob;
-    if(spm->loc2glob == NULL) {
-        return;
-    }
-
-    loc2glob = spm->loc2glob;
-    memset( xglob, 0, spm->gN * sizeof(spm_complex64_t) );
-    for( i=0; i<spm->n; i++, loc2glob++ ) {
-        xglob[ (*loc2glob) ] = xloc[i];
-    }
-}
-
-static inline void
-z_spmCopyB( const spmatrix_t      *spm,
-            const spm_complex64_t *xloc,
-                  spm_complex64_t *xglob)
-{
-    if( (spm->mtxtype == SpmGeneral) || (spm->loc2glob == NULL) ) {
-        return;
-    }
-    z_spmCopyC( spm, xloc, xglob );
-#if defined(SPM_WITH_MPI)
-    MPI_Allreduce( MPI_IN_PLACE, xglob, spm->gN, SPM_MPI_COMPLEX64, MPI_SUM, spm->comm );
-#endif
-}
-
-
 /**
  *******************************************************************************
  *
@@ -529,15 +499,15 @@ spm_zspmm( spm_side_t             side,
         M = A->n;
         N = K;
 
-        ldx  = ldb;
-        ldy  = ldc;
+        ldx = ldb;
+        ldy = ldc;
     }
     else {
         M = K;
         N = A->n;
 
-        ldx  = 1;
-        ldy  = 1;
+        ldx = 1;
+        ldy = 1;
     }
 
     if ( beta == 0. ) {
@@ -551,28 +521,57 @@ spm_zspmm( spm_side_t             side,
         return SPM_SUCCESS;
     }
 
-    ldc  = (A->loc2glob != NULL) ? A->gN : ldc;
-    Ctmp = (A->loc2glob != NULL) ? malloc(A->gN * sizeof(spm_complex64_t)) : NULL;
-    ldb  = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? A->gN : ldb;
-    Btmp = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? malloc(A->gN * sizeof(spm_complex64_t)) : NULL;
+    /* Need a BIG cleanup */
+    if(A->loc2glob != NULL){
+        spm_int_t i, j, idx;
+        spm_int_t ig, dof, baseval, *loc2glob;
+
+        if( A->mtxtype != SpmGeneral || transA != SpmNoTrans ) {
+            Btmp = z_spmGatherRHS( A, N, B, ldb, -1 );
+            ldb  = A->gNexp;
+        }
+        else {
+            Btmp = (spm_complex64_t*)B;
+        }
+        Ctmp = malloc(A->gNexp * N * sizeof(spm_complex64_t) );
+        ldc  = A->gNexp;
+        memset( Ctmp, 0, A->gNexp * N * sizeof(spm_complex64_t) );
+
+        baseval = spmFindBase(A);
+        for ( j = 0; j < N; j++)
+        {
+            loc2glob = A->loc2glob;
+            for ( i = 0; i < A->n; i++, loc2glob++)
+            {
+                ig  = *loc2glob - baseval;
+                dof = (A->dof > 0) ? A->dof : A->dofs[ig+1] - A->dofs[ig];
+                idx = (A->dof > 0) ? A->dof * ig : A->dofs[ig] - baseval;
+                memcpy( (Ctmp + j * A->gNexp + idx),
+                        (C    + j * A->nexp  +   i),
+                        dof * sizeof(spm_complex64_t) );
+            }
+        }
+    }
+    else {
+        Btmp = (spm_complex64_t*)B;
+        Ctmp = C;
+    }
 
     __spm_zmatvec_args_init( &args, side, transA,
                              alpha, A, Btmp, ldb, Ctmp, ldc );
 
     for( r=0; (r < N) && (rc == SPM_SUCCESS); r++ ) {
-        z_spmCopyB( A, B + r * ldx, Btmp );
-        z_spmCopyC( A, C + r * ldy, Ctmp );
-
-        args.x = (Btmp == NULL) ? B + r * ldx : Btmp;
-        args.y = (Ctmp == NULL) ? C + r * ldy : Ctmp;
+        args.x = Btmp + r * ldx;
+        args.y = Ctmp + r * ldy;
         rc = args.loop_fct( &args );
-
-        z_spmReduce( A, Ctmp, C +  r * ldy );
     }
 
+    /* Reduce only when necessary */
+    z_spmReduceRhs( A, K, Ctmp, C );
+
     if(A->loc2glob != NULL) {
         free(Ctmp);
-        if(A->mtxtype != SpmGeneral){
+        if( A->mtxtype != SpmGeneral ) {
             free(Btmp);
         }
     }
@@ -648,25 +647,42 @@ spm_zspmv( spm_trans_t            trans,
         return SPM_SUCCESS;
     }
 
-    ytmp =  (A->loc2glob != NULL) ? malloc(A->gN * sizeof(spm_complex64_t)) : y;
-    incy =  (A->loc2glob != NULL) ? A->gN : incy;
-    xtmp = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? malloc(A->gN * sizeof(spm_complex64_t)) : (spm_complex64_t *)x;
-    incx = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? A->gN : incx;
+    if(A->loc2glob != NULL){
+        spm_int_t i, idx;
+        spm_int_t ig, dof, baseval, *loc2glob;
 
-    z_spmCopyB( A, x, xtmp );
-    z_spmCopyC( A, y, ytmp );
+        xtmp = z_spmGatherRHS( A, 1, x, incx, -1 );
+        incx  = A->gNexp;
+        ytmp = malloc(A->gNexp * sizeof(spm_complex64_t) );
+        incy  = A->gNexp;
+        memset( ytmp, 0, A->gNexp * sizeof(spm_complex64_t) );
+
+        baseval = spmFindBase(A);
+        loc2glob = A->loc2glob;
+        for ( i = 0; i < A->n; i++, loc2glob++)
+        {
+            ig  = *loc2glob - baseval;
+            dof = (A->dof > 0) ? A->dof : A->dofs[ig+1] - A->dofs[ig];
+            idx = (A->dof > 0) ? A->dof * ig : A->dofs[ig] - baseval;
+            memcpy( (ytmp + idx),
+                    (y    +   i),
+                    dof * sizeof(spm_complex64_t) );
+        }
+    }
+    else {
+        xtmp = (spm_complex64_t*)x;
+        ytmp = y;
+    }
 
     __spm_zmatvec_args_init( &args, SpmLeft, trans,
                              alpha, A, xtmp, incx, ytmp, incy );
     rc = args.loop_fct( &args );
 
-    z_spmReduce( A, ytmp, y );
+    z_spmReduceRhs( A, 1, ytmp, y );
 
     if(A->loc2glob != NULL) {
         free(ytmp);
-        if(A->mtxtype != SpmGeneral){
-            free(xtmp);
-        }
+        free(xtmp);
     }
 
 
diff --git a/src/z_spm_reduce_rhs.c b/src/z_spm_reduce_rhs.c
index 69e96fb3dfd0df5e1cec86b81c2a51500659c993..da536297f4906e08af3f31bfa4fb820d29a8ab5e 100644
--- a/src/z_spm_reduce_rhs.c
+++ b/src/z_spm_reduce_rhs.c
@@ -2,18 +2,16 @@
  *
  * @file z_spm_reduce_rhs.c
  *
- * SParse Matrix package right hand side reduction routine.
+ * SParse Matrix package matrix-vector multiplication routines.
  *
- * @copyright 2020-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ * @copyright 2016-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  * @version 1.0.0
  * @author Delarue Tony
- * @author Faverge Mathieu
- * @date 2020-06-11
+ * @date 2020-03-08
  *
  * @precisions normal z -> c d s
- *
  **/
 #include "common.h"
 #include "z_spm.h"
@@ -28,128 +26,51 @@
  * @param[in] spm
  *          The sparse matrix spm
  *
- * @param[inout] glob
- *          The global vector to reduce
+ * @param[in] nrhs
+ *          Number of Rhs vectors.
+ *
+ * @param[inout] bglob
+ *          The global rhs vector to reduce.
  *
- * @param[inout] loc
- *          Local vector
+ * @param[inout] bloc
+ *          Local rhs vector.
  *
  *******************************************************************************/
-spm_complex64_t *
-z_spmReduceRHS( const spmatrix_t      *spm,
-                int                    nrhs,
-                const spm_complex64_t *x,
-                spm_int_t              ldx,
-                int                    root )
+void
+z_spmReduceRhs( const spmatrix_t      *spm,
+                      int              nrhs,
+                      spm_complex64_t *bglob,
+                      spm_complex64_t *bloc  )
 {
-    spm_complex64_t *outptr, *out = NULL;
-    int              c;
-    int              n, nmax = 0;
-    int              nexp, nexpmax = 0;
-    spm_complex64_t *tmp_out, *current_out;
-    spm_int_t       *tmp_l2g, *current_l2g;
-    spm_int_t        current_n, current_nexp;
-    spm_int_t        i, j, k, ig, dofi, row;
-    int              local =  ( ( root == -1 ) || (root == spm->clustnum) );
-    spm_int_t        baseval = spmFindBase( spm );
-
-    /* We do not handle cases where ldx is different from spm->n */
-    assert( ldx == spm->nexp );
+#if defined(SPM_WITH_MPI)
+    spm_int_t        i, j, k;
+    spm_int_t        ig, dofi, row, baseval;
+    spm_int_t       *loc2glob;
+    spm_complex64_t *rhs = bloc;
 
     if ( spm->loc2glob == NULL ) {
-        if ( local ) {
-            out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) );
-            memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) );
-        }
-        return out;
+        return;
     }
 
-#if !defined(SPM_WITH_MPI)
-    assert( 0 );
-#else
-    n    = spm->n;
-    nexp = spm->nexp;
-    if ( root == -1 ) {
-        MPI_Allreduce( &n,    &nmax,    1, MPI_INT, MPI_MAX, spm->comm );
-        MPI_Allreduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, spm->comm );
-    }
-    else {
-        MPI_Reduce( &n,    &nmax,    1, MPI_INT, MPI_MAX, root, spm->comm );
-        MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm );
-    }
-
-    if ( local ) {
-        out     = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) );
-        tmp_out = malloc( nexpmax    * nrhs * sizeof( spm_complex64_t ) );
-        tmp_l2g = malloc( nmax              * sizeof( spm_int_t )       );
-    }
-
-    for( c=0; c<spm->clustnbr; c++ ) {
-        MPI_Status status;
-
-        if ( c == spm->clustnum ) {
-            current_n    = spm->n;
-            current_nexp = spm->nexp;
-            current_l2g  = spm->loc2glob;
-            current_out  = (spm_complex64_t*)x;
-        }
-        else {
-            current_n    = 0;
-            current_nexp = 0;
-            current_l2g  = tmp_l2g;
-            current_out  = tmp_out;
-        }
+    MPI_Allreduce( MPI_IN_PLACE, bglob, spm->gNexp * nrhs, SPM_MPI_COMPLEX64, MPI_SUM, spm->comm );
 
-        if ( root == -1 ) {
-            MPI_Bcast( &current_n,    1, SPM_MPI_INT, c, spm->comm );
-            MPI_Bcast( &current_nexp, 1, SPM_MPI_INT, c, spm->comm );
-            if ( current_n > 0 ) {
-                MPI_Bcast( current_l2g, current_n,           SPM_MPI_INT,       c, spm->comm );
-                MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm );
+    baseval  = spmFindBase( spm );
+    loc2glob = spm->loc2glob;
+    for( i=0; i<spm->n; i++, loc2glob++ ) {
+        ig   = *loc2glob - baseval;
+        dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig];
+        row  = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval;
+        for( j=0; j<nrhs; j++ ) {
+            for( k=0; k<dofi; k++ ) {
+                rhs[ j * spm->nexp + k ] = bglob[ row + j * spm->gNexp + k ];
             }
         }
-        else if ( root != c ) { /* No communication if c == root */
-            if ( c == spm->clustnum ) {
-                MPI_Send( &current_n,    1, SPM_MPI_INT, root, 0, spm->comm );
-                MPI_Send( &current_nexp, 1, SPM_MPI_INT, root, 1, spm->comm );
-                if ( current_n > 0 ) {
-                    MPI_Send( current_l2g, current_n,           SPM_MPI_INT,       root, 2, spm->comm );
-                    MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm );
-                }
-            }
-            if ( root == spm->clustnum ) {
-                MPI_Recv( &current_n,    1, SPM_MPI_INT, c, 0, spm->comm, &status );
-                MPI_Recv( &current_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status );
-                if ( current_n > 0 ) {
-                    MPI_Recv( current_l2g, current_n,           SPM_MPI_INT,       c, 2, spm->comm, &status );
-                    MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status );
-                }
-            }
-        }
-
-        if ( !local ) {
-            continue;
-        }
-
-        outptr = out;
-        for( i=0; i<current_n; i++, current_l2g++ ) {
-            ig   = *current_l2g - baseval;
-            dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig];
-            row  = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval;
-            for( j=0; j<nrhs; j++ ) {
-                for( k=0; k<dofi; k++ ) {
-                    outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ];
-                }
-            }
-            current_out += dofi;
-        }
-    }
-
-    if ( local ) {
-        free( tmp_out );
-        free( tmp_l2g );
+        rhs += dofi;
     }
-
+#else
+    (void)spm;
+    (void)bglob;
+    (void)bloc;
+    (void)nrhs;
 #endif
-    return out;
-}
+}
\ No newline at end of file
diff --git a/tests/z_spm_tests.c b/tests/z_spm_tests.c
index ca96dc060bddeb6116315a10abece30028095a13..c53a0900c9c845312b8f437c1456d24b152c6b0f 100644
--- a/tests/z_spm_tests.c
+++ b/tests/z_spm_tests.c
@@ -353,7 +353,7 @@ z_spm_dist_genrhs_check( const spmatrix_t      *spm,
     norm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'M', M, nrhs, bloc, M );
 
     /* Let's gather the distributed RHS */
-    tmp = z_spmReduceRHS( spm, nrhs, bdst, spm->nexp, root );
+    tmp = z_spmGatherRHS( spm, nrhs, bdst, spm->nexp, root );
 
     rc = 0;
     if ( tmp != NULL ) {
@@ -434,7 +434,7 @@ z_spm_dist_matvec_check( spm_int_t baseval, spm_trans_t trans, const spmatrix_t
     /* Compute matrix norm and gather info */
     Anorm  = spmNorm( SpmInfNorm, spm );
     spmloc = spmGather( spm, 0 );
-    yd     = z_spmReduceRHS( spm, nrhs, y, ldd, 0 );
+    yd     = z_spmGatherRHS( spm, nrhs, y, ldd, 0 );
 
     /* Compute the local sparse matrix-vector product */
     if ( spm->clustnum == 0 ) {