diff --git a/src/spm.c b/src/spm.c index 82a7fff45721e5b5dddfdf1e778f209874bd7337..5028953c13b5423e55aa857b42a861bbeceb3ea5 100644 --- a/src/spm.c +++ b/src/spm.c @@ -165,36 +165,19 @@ spmInit( spmatrix_t *spm ) void spmAlloc( spmatrix_t *spm ) { - spm_int_t colsize, rowsize, valsize, dofsize; - - switch(spm->fmttype){ - case SpmCSC: - colsize = spm->n + 1; - rowsize = spm->nnz; - valsize = spm->nnzexp; - dofsize = spm->gN + 1; - break; - case SpmCSR: - colsize = spm->nnz; - rowsize = spm->n + 1; - valsize = spm->nnzexp; - dofsize = spm->gN + 1; - break; - case SpmIJV: - default: - colsize = spm->nnz; - rowsize = spm->nnz; - valsize = spm->nnzexp; - dofsize = spm->gN + 1; - } + spm_int_t colsize = (spm->fmttype == SpmCSC) ? spm->n + 1 : spm->nnz; + spm_int_t rowsize = (spm->fmttype == SpmCSR) ? spm->n + 1 : spm->nnz; spm->colptr = (spm_int_t*)malloc( colsize * sizeof(spm_int_t) ); spm->rowptr = (spm_int_t*)malloc( rowsize * sizeof(spm_int_t) ); + if ( spm->dof < 1 ) { + spm_int_t dofsize = spm->gN + 1; spm->dofs = (spm_int_t*)malloc( dofsize * sizeof(spm_int_t) ); } + if(spm->flttype != SpmPattern) { - valsize = valsize * spm_size_of( spm->flttype ); + spm_int_t valsize = spm->nnzexp * spm_size_of( spm->flttype ); spm->values = malloc(valsize); } } @@ -260,7 +243,7 @@ spmBase( spmatrix_t *spm, int baseval ) { spm_int_t baseadj; - spm_int_t i, n, nnz; + spm_int_t i, n, nnz, colsize, rowsize; /* Parameter checks */ if ( spm == NULL ) { @@ -281,39 +264,20 @@ spmBase( spmatrix_t *spm, } baseadj = baseval - spmFindBase( spm ); - if (baseadj == 0) + if (baseadj == 0) { return; + } - n = spm->n; - nnz = spm->nnz; - - switch(spm->fmttype) - { - case SpmCSC: - assert( nnz == (spm->colptr[n] - spm->colptr[0]) ); - - for (i = 0; i <= n; i++) { - spm->colptr[i] += baseadj; - } - for (i = 0; i < nnz; i++) { - spm->rowptr[i] += baseadj; - } - break; + n = spm->n; + nnz = spm->nnz; + colsize = (spm->fmttype == SpmCSC) ? n + 1 : nnz; + rowsize = (spm->fmttype == SpmCSR) ? n + 1 : nnz; - case SpmCSR: - assert( nnz == (spm->rowptr[n] - spm->rowptr[0]) ); - for (i = 0; i <= n; i++) { - spm->rowptr[i] += baseadj; - } - for (i = 0; i < nnz; i++) { - spm->colptr[i] += baseadj; - } - break; - case SpmIJV: - for (i = 0; i < nnz; i++) { - spm->rowptr[i] += baseadj; - spm->colptr[i] += baseadj; - } + for (i = 0; i < colsize; i++) { + spm->colptr[i] += baseadj; + } + for (i = 0; i < rowsize; i++) { + spm->rowptr[i] += baseadj; } if (spm->loc2glob != NULL) { @@ -508,10 +472,6 @@ spmNorm( spm_normtype_t ntype, { double norm = -1.; - if ( spm->flttype == SpmPattern ) { - return norm; - } - switch (spm->flttype) { case SpmFloat: norm = (double)s_spmNorm( ntype, spm ); @@ -531,7 +491,7 @@ spmNorm( spm_normtype_t ntype, case SpmPattern: default: - ; + return norm; } return norm; @@ -807,26 +767,10 @@ spmCopy( const spmatrix_t *spm ) memcpy( newspm, spm, sizeof(spmatrix_t)); - switch(spm->fmttype){ - case SpmCSC: - colsize = spm->n + 1; - rowsize = spm->nnz; - valsize = spm->nnzexp; - dofsize = spm->gN + 1; - break; - case SpmCSR: - colsize = spm->nnz; - rowsize = spm->n + 1; - valsize = spm->nnzexp; - dofsize = spm->gN + 1; - break; - case SpmIJV: - default: - colsize = spm->nnz; - rowsize = spm->nnz; - valsize = spm->nnzexp; - dofsize = spm->gN + 1; - } + colsize = (spm->fmttype == SpmCSC) ? spm->n + 1 : spm->nnz; + rowsize = (spm->fmttype == SpmCSR) ? spm->n + 1 : spm->nnz; + valsize = spm->nnzexp; + dofsize = spm->gN + 1; if(spm->colptr != NULL) { newspm->colptr = (spm_int_t*)malloc( colsize * sizeof(spm_int_t) ); @@ -1038,6 +982,7 @@ spmPrintRHS( const spmatrix_t *spm, z_spmPrintRHS( stream, spm, nrhs, x, ldx ); break; case SpmDouble: + default: d_spmPrintRHS( stream, spm, nrhs, x, ldx ); } @@ -1145,10 +1090,6 @@ spmMatVec( spm_trans_t trans, return SPM_ERR_BADPARAMETER; } - if ( spm->dof != 1 ) { - espm = malloc( sizeof(spmatrix_t) ); - spmExpand( spm, espm ); - } switch (spm->flttype) { case SpmFloat: rc = spm_sspmv( trans, alpha, espm, x, 1, beta, y, 1 ); @@ -1631,50 +1572,50 @@ spm_get_distribution( const spmatrix_t *spm ) { int distribution = 0; + /* The matrix is not distributed */ if( (spm->loc2glob == NULL) || (spm->n == spm->gN) ) { distribution = ( SpmDistByColumn | SpmDistByRow ); + return distribution; + } + if( spm->fmttype == SpmCSC ){ + distribution = SpmDistByColumn; + } + else if ( spm->fmttype == SpmCSR ) { + distribution = SpmDistByRow; } else { - if( spm->fmttype == SpmCSC ){ - distribution = SpmDistByColumn; - } - else if ( spm->fmttype == SpmCSR ) { - distribution = SpmDistByRow; - } - else { - spm_int_t i, baseval; - spm_int_t *colptr = spm->colptr; - spm_int_t *glob2loc = spm->glob2loc; - - baseval = spmFindBase( spm ); - distribution = 1; - assert( glob2loc != NULL ); - for ( i = 0; i < spm->nnz; i++, colptr++ ) - { - /* - * If the global index is not in the local colptr - * -> row distribution - */ - if( glob2loc[ *colptr - baseval ] < 0 ) { - distribution = SpmDistByRow; - break; - } + spm_int_t i, baseval; + spm_int_t *colptr = spm->colptr; + spm_int_t *glob2loc = spm->glob2loc; + + baseval = spmFindBase( spm ); + distribution = 1; + assert( glob2loc != NULL ); + for ( i = 0; i < spm->nnz; i++, colptr++ ) + { + /* + * If the global index is not in the local colptr + * -> row distribution + */ + if( glob2loc[ *colptr - baseval ] < 0 ) { + distribution = SpmDistByRow; + break; } + } - #if defined(SPM_WITH_MPI) - { - int check = 0; - MPI_Allreduce( &distribution, &check, 1, MPI_INT, - MPI_BOR, spm->comm ); - /* - * If a matrix is distributed - * it cannot be distributed by row AND column - */ - assert( check != ( SpmDistByColumn | SpmDistByRow ) ); - assert( distribution == check ); - } - #endif +#if defined(SPM_WITH_MPI) + { + int check = 0; + MPI_Allreduce( &distribution, &check, 1, MPI_INT, + MPI_BOR, spm->comm ); + /* + * If a matrix is distributed + * it cannot be distributed by row AND column + */ + assert( check != ( SpmDistByColumn | SpmDistByRow ) ); + assert( distribution == check ); } +#endif } assert(distribution > 0); return distribution; diff --git a/src/spm_scatter.c b/src/spm_scatter.c index 1d59b0162baa9540778e902734d9708cc9d27de7..7077e46db05f597424845fbb5f7d70c43770994c 100644 --- a/src/spm_scatter.c +++ b/src/spm_scatter.c @@ -231,7 +231,7 @@ spm_scatter_csx_get_locals( const spmatrix_t *oldspm, root, newspm->comm ); } - /* Syore the local information */ + /* Store the local information */ newspm->nnz = allcounts[ newspm->clustnum * 3 + 1 ]; newspm->nnzexp = allcounts[ newspm->clustnum * 3 + 2 ]; @@ -339,7 +339,7 @@ spm_scatter_ijv_get_locals( const spmatrix_t *oldspm, root, newspm->comm ); } - /* Syore the local information */ + /* Store the local information */ newspm->nnz = allcounts[ newspm->clustnum * 3 + 1 ]; newspm->nnzexp = allcounts[ newspm->clustnum * 3 + 2 ]; diff --git a/src/z_spm_dof_extend.c b/src/z_spm_dof_extend.c index 83fe013a55b7212bafd444a6da75a951d5ca7e9b..23d75bcc007aedabe725cfcfc93ba65b7abc1656 100644 --- a/src/z_spm_dof_extend.c +++ b/src/z_spm_dof_extend.c @@ -22,7 +22,63 @@ * * @ingroup spm_dev_dof * - * @brief Extend a multi-dof sparse matrix to a single dof sparse matrix. + * @brief Update the newval array thanks to the old value and the degrees of + * freedom. + * + ******************************************************************************* + * + * @param[inout] newval + * The extended value array. + * + * @param[in] value + * The old value that will be extended. + * + * @param[in] dofi + * A degree of freedom. + * + * @param[in] dofj + * A degree of freedom. + * + * @param[in] diag + * 1 if row == col, 0 otherwise. + * + *******************************************************************************/ +static inline void +z_spm_dof_extend_update_values( spm_complex64_t *newval, + spm_complex64_t value, + spm_int_t dofi, + spm_int_t dofj, + int diag ) +{ + spm_int_t ii, jj; + spm_complex64_t *valptr = newval; + + if ( !diag ) { + for(jj=0; jj<dofj; jj++) + { + for(ii=0; ii<dofi; ii++, valptr++) + { + *valptr = value; + } + } + } + else { + for(jj=0; jj<dofj; jj++) + { + for(ii=0; ii<dofi; ii++, valptr++) + { + *valptr = value / (labs((long)(ii - jj)) + 1.); + } + } + } +} + +/** + ******************************************************************************* + * + * @ingroup spm_dev_dof + * + * @brief Extend a single dof CSX sparse matrix to a multi-dof sparse matrix. * ******************************************************************************* * @@ -30,89 +86,106 @@ * The sparse matrix to extend. * *******************************************************************************/ -void -z_spmDofExtend( spmatrix_t *spm ) +static inline void +z_spm_dof_extend_csx( spmatrix_t *spm ) { - spm_int_t j, ig, jg, k, ii, jj, dofi, dofj, baseval; - spm_int_t *colptr, *rowptr, *dofs; + spm_int_t i, j, baseval; + spm_int_t ig, jg, dofi, dofj; + spm_int_t *colptr, *rowptr, *dofs, *loc2glob; spm_complex64_t *newval, *oldval, *oldvalptr; - oldval = oldvalptr = (spm_complex64_t*)(spm->values); + oldval = oldvalptr = (spm_complex64_t*)(spm->values); newval = spm->values = malloc( spm->nnzexp * sizeof(spm_complex64_t) ); - baseval = spmFindBase( spm ); - colptr = spm->colptr; - rowptr = spm->rowptr; - dofs = spm->dofs; + baseval = spmFindBase( spm ); + colptr = (spm->fmttype == SpmCSC) ? spm->colptr : spm->rowptr; + rowptr = (spm->fmttype == SpmCSC) ? spm->rowptr : spm->colptr; + dofs = spm->dofs; + loc2glob = spm->loc2glob; - switch(spm->fmttype) + for(j=0; j<spm->n; j++, colptr++, loc2glob++) { - case SpmCSR: - /* Swap pointers to call CSC */ - colptr = spm->rowptr; - rowptr = spm->colptr; - - spm_attr_fallthrough; - - case SpmCSC: - /** - * Loop on col - */ - for(j=0; j<spm->n; j++, colptr++) - { - jg = ( spm->loc2glob == NULL ) ? j : spm->loc2glob[j] - baseval; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[jg+1] - dofs[jg]; + jg = (spm->loc2glob == NULL) ? j : *loc2glob - baseval; + dofj = (spm->dof > 0) ? spm->dof : dofs[jg+1] - dofs[jg]; - /** - * Loop on rows - */ - for(k=colptr[0]; k<colptr[1]; k++, rowptr++, oldval++) - { - ig = *rowptr - baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[ig+1] - dofs[ig]; - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, newval++) - { - if ( ig == jg ) { - *newval = *oldval / (labs((long)(ii - jj)) + 1.); - } - else { - *newval = *oldval; - } - } - } - } - } - break; - case SpmIJV: - /** - * Loop on coordinates - */ - for(k=0; k<spm->nnz; k++, rowptr++, colptr++, oldval++) + for(i=colptr[0]; i<colptr[1]; i++, rowptr++, oldval++) { - ig = *rowptr - baseval; - jg = *colptr - baseval; + ig = *rowptr - baseval; dofi = ( spm->dof > 0 ) ? spm->dof : dofs[ig+1] - dofs[ig]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[jg+1] - dofs[jg]; - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, newval++) - { - if ( ig == jg ) { - *newval = *oldval / (labs((long)(ii - jj)) + 1.); - } - else { - *newval = *oldval; - } - } - } + z_spm_dof_extend_update_values( newval, *oldval, dofi, dofj, (ig == jg) ); + newval += (dofi*dofj); } - break; } + free( oldvalptr ); + + assert((newval - (spm_complex64_t*)spm->values) == spm->nnzexp); +} - free(oldvalptr); - return; +/** + ******************************************************************************* + * + * @ingroup spm_dev_dof + * + * @brief Extend a single dof IJV sparse matrix to a multi-dof sparse matrix. + * + ******************************************************************************* + * + * @param[inout] spm + * The sparse matrix to extend. + * + *******************************************************************************/ +static inline void +z_spm_dof_extend_ijv( spmatrix_t *spm ) +{ + spm_int_t k, baseval; + spm_int_t ig, jg, dofi, dofj; + spm_int_t *colptr, *rowptr, *dofs; + spm_complex64_t *newval, *oldval, *oldvalptr; + + oldval = oldvalptr = (spm_complex64_t*)(spm->values); + newval = spm->values = malloc( spm->nnzexp * sizeof(spm_complex64_t) ); + + baseval = spmFindBase( spm ); + colptr = spm->colptr; + rowptr = spm->rowptr; + dofs = spm->dofs; + + for(k=0; k<spm->nnz; k++, rowptr++, colptr++, oldval++) + { + ig = *rowptr - baseval; + jg = *colptr - baseval; + dofi = (spm->dof > 0) ? spm->dof : dofs[ig+1] - dofs[ig]; + dofj = (spm->dof > 0) ? spm->dof : dofs[jg+1] - dofs[jg]; + + z_spm_dof_extend_update_values( newval, *oldval, dofi, dofj, (ig == jg) ); + newval += (dofi*dofj); + } + free( oldvalptr ); + + assert((newval - (spm_complex64_t*)spm->values) == spm->nnzexp); +} + +/** + ******************************************************************************* + * + * @ingroup spm_dev_dof + * + * @brief Extend a single dof sparse matrix to a multi-dof sparse matrix. + * + ******************************************************************************* + * + * @param[inout] spm + * The sparse matrix to extend. + * + *******************************************************************************/ +void +z_spmDofExtend( spmatrix_t *spm ) +{ + if (spm->fmttype != SpmIJV) { + z_spm_dof_extend_csx( spm ); + } + else { + z_spm_dof_extend_ijv( spm ); + } } diff --git a/src/z_spm_expand.c b/src/z_spm_expand.c index 07a38e8ce4def2c48d8a7e2ee8f69f365f4cf27e..48fd59055d8239b0bd6b1555c795d5e3262267b3 100644 --- a/src/z_spm_expand.c +++ b/src/z_spm_expand.c @@ -30,12 +30,10 @@ spm_expand_loc2glob( const spmatrix_t *spm_in, spmatrix_t *spm_out ) /* Constant dof */ if ( spm_in->dof > 0 ) { ndof = spm_in->dof; - for(i=0; i<spm_in->n; i++, l2g_in++) - { + for ( i=0; i<spm_in->n; i++, l2g_in++ ) { ig = *l2g_in - baseval; jg = ig * ndof + baseval; - for(j=0; j<ndof; j++, l2g_out++) - { + for ( j=0; j<ndof; j++, l2g_out++ ) { *l2g_out = jg + j; } } @@ -43,14 +41,12 @@ spm_expand_loc2glob( const spmatrix_t *spm_in, spmatrix_t *spm_out ) /* Variadic dof */ else { spm_int_t *dofs = spm_in->dofs - baseval; - for(i=0; i<spm_in->n; i++, l2g_in++) - { + for ( i=0; i<spm_in->n; i++, l2g_in++ ) { ig = *l2g_in; ndof = dofs[ig+1] - dofs[ig]; jg = dofs[ig]; - for(j=0; j<ndof; j++, l2g_out++) - { + for (j=0; j<ndof; j++, l2g_out++) { *l2g_out = jg + j; } } @@ -96,43 +92,40 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) assert( spm_in->fmttype == SpmCSC ); baseval = spmFindBase( spm_in ); - oldcol = spm_in->colptr; - oldrow = spm_in->rowptr; - dofs = spm_in->dofs; + oldcol = spm_in->colptr; + oldrow = spm_in->rowptr; + dofs = spm_in->dofs; #if !defined(PRECISION_p) - oldval = oldval2 = (spm_complex64_t*)(spm_in->values); + oldval = oldval2 = (spm_complex64_t *)( spm_in->values ); #endif - spm_out->colptr = newcol = malloc(sizeof(spm_int_t)*(spm_out->n+1)); + spm_out->colptr = newcol = malloc( sizeof( spm_int_t ) * ( spm_out->n + 1 ) ); /** * First loop to compute the new colptr */ *newcol = baseval; - for(j=0; j<spm_in->n; j++, oldcol++) - { + for ( j=0; j<spm_in->n; j++, oldcol++ ) { diag = 0; - jg = (spm_in->loc2glob == NULL) ? j : spm_in->loc2glob[j] - baseval; - dofj = (spm_in->dof > 0 ) ? spm_in->dof : dofs[jg+1] - dofs[jg]; + jg = ( spm_in->loc2glob == NULL ) ? j : spm_in->loc2glob[j] - baseval; + dofj = ( spm_in->dof > 0 ) ? spm_in->dof : dofs[jg+1] - dofs[jg]; /* Sum the heights of the elements in the column */ newcol[1] = newcol[0]; - for(k=oldcol[0]; k<oldcol[1]; k++, oldrow++ ) - { + for ( k=oldcol[0]; k<oldcol[1]; k++, oldrow++ ) { ig = *oldrow - baseval; - dofi = (spm_in->dof > 0 ) ? spm_in->dof : dofs[ig+1] - dofs[ig]; + dofi = ( spm_in->dof > 0 ) ? spm_in->dof : dofs[ig+1] - dofs[ig]; newcol[1] += dofi; - diag = (diag || (ig == jg)); + diag = ( diag || ( ig == jg ) ); } - diag = (diag & (spm_in->mtxtype != SpmGeneral)); + diag = ( diag & ( spm_in->mtxtype != SpmGeneral ) ); height = newcol[1] - newcol[0]; newcol++; /* Add extra columns */ - for(jj=1; jj<dofj; jj++, newcol++) - { + for ( jj=1; jj<dofj; jj++, newcol++ ) { newcol[1] = newcol[0] + height; if ( diag ) { @@ -143,10 +136,10 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) assert( ((spm_in->mtxtype == SpmGeneral) && ((newcol[0]-baseval) == spm_in->nnzexp)) || ((spm_in->mtxtype != SpmGeneral) && ((newcol[0]-baseval) <= spm_in->nnzexp)) ); - spm_out->nnz = newcol[0] - baseval; - spm_out->rowptr = newrow = malloc(sizeof(spm_int_t)*spm_out->nnz); -#if !defined(PRECISION_p) - spm_out->values = newval = malloc(sizeof(spm_complex64_t)*spm_out->nnz); + spm_out->nnz = newcol[0] - baseval; + spm_out->rowptr = newrow = malloc( sizeof(spm_int_t) * spm_out->nnz ); +#if !defined( PRECISION_p ) + spm_out->values = newval = malloc( sizeof(spm_complex64_t) * spm_out->nnz ); #endif /** @@ -155,15 +148,14 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) oldcol = spm_in->colptr; oldrow = spm_in->rowptr; newcol = spm_out->colptr; - for(j=0, col=0; j<spm_in->n; j++, oldcol++) - { + for ( j=0, col=0; j<spm_in->n; j++, oldcol++ ) { /** * Backup current position in oldval because we will pick * interleaved data inside the buffer */ lda = newcol[1] - newcol[0]; oldval2 = oldval; - jg = (spm_in->loc2glob == NULL) ? j : spm_in->loc2glob[j] - baseval; + jg = ( spm_in->loc2glob == NULL ) ? j : spm_in->loc2glob[j] - baseval; if ( spm_in->dof > 0 ) { dofj = spm_in->dof; @@ -173,16 +165,14 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) dofj = dofs[jg+1] - dofs[jg]; } - for(jj=0; jj<dofj; jj++, col++, newcol++) - { + for ( jj=0; jj<dofj; jj++, col++, newcol++ ) { assert( ((spm_in->mtxtype == SpmGeneral) && (lda == (newcol[1] - newcol[0]))) || ((spm_in->mtxtype != SpmGeneral) && (lda >= (newcol[1] - newcol[0]))) ); /* Move to the top of the column jj in coming element (i,j) */ oldval = oldval2; - for(k=oldcol[0]; k<oldcol[1]; k++) - { + for ( k=oldcol[0]; k<oldcol[1]; k++ ) { ig = oldrow[k-baseval] - baseval; if ( spm_in->dof > 0 ) { @@ -197,8 +187,7 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) /* Move to the top of the jj column in the current element */ oldval += dofi * jj; - for(ii=0; ii<dofi; ii++, row++) - { + for ( ii=0; ii<dofi; ii++, row++ ) { if ( (spm_in->mtxtype == SpmGeneral) || (ig != jg) || ((ig == jg) && (row >= col)) ) @@ -259,43 +248,40 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) assert( spm_in->fmttype == SpmCSR ); baseval = spmFindBase( spm_in ); - oldcol = spm_in->colptr; - oldrow = spm_in->rowptr; - dofs = spm_in->dofs; + oldcol = spm_in->colptr; + oldrow = spm_in->rowptr; + dofs = spm_in->dofs; #if !defined(PRECISION_p) - oldval = oldval2 = (spm_complex64_t*)(spm_in->values); + oldval = oldval2 = (spm_complex64_t*)(spm_in->values); #endif - spm_out->rowptr = newrow = malloc(sizeof(spm_int_t)*(spm_out->n+1)); + spm_out->rowptr = newrow = malloc( sizeof(spm_int_t) * (spm_out->n+1) ); /** * First loop to compute the new rowptr */ *newrow = baseval; - for(i=0; i<spm_in->n; i++, oldrow++) - { + for ( i=0; i<spm_in->n; i++, oldrow++ ) { diag = 0; - ig = (spm_in->loc2glob == NULL) ? i : spm_in->loc2glob[i] - baseval; - dofi = (spm_in->dof > 0 ) ? spm_in->dof : dofs[ig+1] - dofs[ig]; + ig = ( spm_in->loc2glob == NULL ) ? i : spm_in->loc2glob[i] - baseval; + dofi = ( spm_in->dof > 0 ) ? spm_in->dof : dofs[ig+1] - dofs[ig]; /* Sum the width of the elements in the row */ newrow[1] = newrow[0]; - for(k=oldrow[0]; k<oldrow[1]; k++, oldcol++) - { + for ( k=oldrow[0]; k<oldrow[1]; k++, oldcol++ ) { jg = *oldcol - baseval; dofj = (spm_in->dof > 0 ) ? spm_in->dof : dofs[jg+1] - dofs[jg]; newrow[1] += dofj; - diag = (diag || (ig == jg)); + diag = ( diag || ( ig == jg ) ); } - diag = (diag & (spm_in->mtxtype != SpmGeneral)); + diag = ( diag & ( spm_in->mtxtype != SpmGeneral ) ); height = newrow[1] - newrow[0]; newrow++; /* Add extra rows */ - for(ii=1; ii<dofi; ii++, newrow++) - { + for ( ii=1; ii<dofi; ii++, newrow++ ) { newrow[1] = newrow[0] + height; if ( diag ) { @@ -306,10 +292,10 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) assert( ((spm_in->mtxtype == SpmGeneral) && ((newrow[0]-baseval) == spm_in->nnzexp)) || ((spm_in->mtxtype != SpmGeneral) && ((newrow[0]-baseval) <= spm_in->nnzexp)) ); - spm_out->nnz = newrow[0] - baseval; - spm_out->colptr = newcol = malloc(sizeof(spm_int_t)*spm_out->nnz); + spm_out->nnz = newrow[0] - baseval; + spm_out->colptr = newcol = malloc( sizeof(spm_int_t) * spm_out->nnz ); #if !defined(PRECISION_p) - spm_out->values = newval = malloc(sizeof(spm_complex64_t)*spm_out->nnz); + spm_out->values = newval = malloc( sizeof(spm_complex64_t) * spm_out->nnz ); #endif /** @@ -318,15 +304,14 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) oldcol = spm_in->colptr; oldrow = spm_in->rowptr; newrow = spm_out->rowptr; - for(i=0, row=0; i<spm_in->n; i++, oldrow++) - { + for ( i=0, row=0; i<spm_in->n; i++, oldrow++ ) { /** * Backup current position in oldval because we will pick * interleaved data inside the buffer */ lda = newrow[1] - newrow[0]; oldval2 = oldval; - ig = (spm_in->loc2glob == NULL) ? i : spm_in->loc2glob[i] - baseval; + ig = ( spm_in->loc2glob == NULL ) ? i : spm_in->loc2glob[i] - baseval; if ( spm_in->dof > 0 ) { dofi = spm_in->dof; @@ -336,16 +321,14 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) dofi = dofs[ig+1] - dofs[ig]; } - for(ii=0; ii<dofi; ii++, row++, newrow++) - { - assert( ((spm_in->mtxtype == SpmGeneral) && (lda == (newrow[1] - newrow[0]))) || - ((spm_in->mtxtype != SpmGeneral) && (lda >= (newrow[1] - newrow[0]))) ); + for ( ii=0; ii<dofi; ii++, row++, newrow++ ) { + assert( ( ( spm_in->mtxtype == SpmGeneral ) && ( lda == ( newrow[1] - newrow[0] ) ) ) || + ( ( spm_in->mtxtype != SpmGeneral ) && ( lda >= ( newrow[1] - newrow[0] ) ) ) ); /* Move to the beginning of the row ii in coming element (i,j) */ oldval = oldval2 + ii; - for(k=oldrow[0]; k<oldrow[1]; k++) - { + for ( k=oldrow[0]; k<oldrow[1]; k++ ) { jg = oldcol[k-baseval] - baseval; if ( spm_in->dof > 0 ) { @@ -357,11 +340,9 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) col = dofs[jg] - baseval; } - for(jj=0; jj<dofj; jj++, col++) - { - if ( (spm_in->mtxtype == SpmGeneral) || - (ig != jg) || - ((ig == jg) && (row <= col)) ) + for ( jj=0; jj<dofj; jj++, col++ ) { + if ( ( spm_in->mtxtype == SpmGeneral ) || ( ig != jg ) || + ( ( ig == jg ) && ( row <= col ) ) ) { (*newcol) = col + baseval; newcol++; @@ -418,23 +399,22 @@ z_spmIJVExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) assert( spm_in->fmttype == SpmIJV ); baseval = spmFindBase( spm_in ); - oldcol = spm_in->colptr; - oldrow = spm_in->rowptr; - dofs = spm_in->dofs; -#if !defined(PRECISION_p) - oldval = (spm_complex64_t*)(spm_in->values); + oldcol = spm_in->colptr; + oldrow = spm_in->rowptr; + dofs = spm_in->dofs; +#if !defined( PRECISION_p ) + oldval = (spm_complex64_t *)(spm_in->values); #endif /** * First loop to compute the size of the vectors */ - if (spm_in->mtxtype == SpmGeneral) { + if ( spm_in->mtxtype == SpmGeneral ) { spm_out->nnz = spm_in->nnzexp; } else { spm_out->nnz = 0; - for(k=0; k<spm_in->nnz; k++, oldrow++, oldcol++) - { + for ( k=0; k<spm_in->nnz; k++, oldrow++, oldcol++ ) { i = *oldrow - baseval; j = *oldcol - baseval; @@ -458,10 +438,10 @@ z_spmIJVExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) assert( spm_out->nnz <= spm_in->nnzexp ); } - spm_out->rowptr = newrow = malloc(sizeof(spm_int_t)*spm_out->nnz); - spm_out->colptr = newcol = malloc(sizeof(spm_int_t)*spm_out->nnz); + spm_out->rowptr = newrow = malloc( sizeof(spm_int_t) * spm_out->nnz ); + spm_out->colptr = newcol = malloc( sizeof(spm_int_t) * spm_out->nnz ); #if !defined(PRECISION_p) - spm_out->values = newval = malloc(sizeof(spm_complex64_t)*spm_out->nnz); + spm_out->values = newval = malloc( sizeof(spm_complex64_t) * spm_out->nnz ); #endif /** @@ -469,8 +449,7 @@ z_spmIJVExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) */ oldrow = spm_in->rowptr; oldcol = spm_in->colptr; - for(k=0; k<spm_in->nnz; k++, oldrow++, oldcol++) - { + for ( k=0; k<spm_in->nnz; k++, oldrow++, oldcol++ ) { i = *oldrow - baseval; j = *oldcol - baseval; @@ -488,13 +467,10 @@ z_spmIJVExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) } if ( spm_in->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++,oldval+=1) - { - if ( (spm_in->mtxtype == SpmGeneral) || - (i != j) || - ((i == j) && (row+ii >= col+jj)) ) + for ( jj=0; jj<dofj; jj++ ) { + for ( ii=0; ii<dofi; ii++, oldval+=1 ) { + if ( ( spm_in->mtxtype == SpmGeneral ) || ( i != j ) || + ( ( i == j ) && ( row + ii >= col + jj ) ) ) { assert( row + ii < spm_out->gNexp ); assert( col + jj < spm_out->gNexp ); @@ -511,13 +487,10 @@ z_spmIJVExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) } } else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++,oldval+=1) - { - if ( (spm_in->mtxtype == SpmGeneral) || - (i != j) || - ((i == j) && (row+ii >= col+jj)) ) + for ( ii=0; ii<dofi; ii++ ) { + for ( jj=0; jj<dofj; jj++, oldval+=1 ) { + if ( ( spm_in->mtxtype == SpmGeneral ) || ( i != j ) || + ( ( i == j ) && ( row + ii >= col + jj ) ) ) { assert( row + ii < spm_out->gNexp ); assert( col + jj < spm_out->gNexp ); @@ -593,16 +566,16 @@ z_spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) spm_expand_loc2glob( spm_in, spm_out ); } - switch (spm_in->fmttype) { - case SpmCSC: - z_spmCSCExpand( spm_in, spm_out ); - break; - case SpmCSR: - z_spmCSRExpand( spm_in, spm_out ); - break; - case SpmIJV: - z_spmIJVExpand( spm_in, spm_out ); - break; + switch ( spm_in->fmttype ) { + case SpmCSC: + z_spmCSCExpand( spm_in, spm_out ); + break; + case SpmCSR: + z_spmCSRExpand( spm_in, spm_out ); + break; + case SpmIJV: + z_spmIJVExpand( spm_in, spm_out ); + break; } spm_out->dof = 1; diff --git a/src/z_spm_gather_rhs.c b/src/z_spm_gather_rhs.c index 498c22246b37cb51040e1fbc11767f1a7f83f71a..b8a5a6dba77d5e33abfce93352a16ee14359c0d2 100644 --- a/src/z_spm_gather_rhs.c +++ b/src/z_spm_gather_rhs.c @@ -53,14 +53,13 @@ z_spmGatherRHS( const spmatrix_t *spm, spm_int_t ldx, int root ) { - int local = ( ( root == -1 ) || (root == spm->clustnum) ); spm_complex64_t *out = NULL; /* We do not handle cases where ldx is different from spm->n */ assert( ldx == spm->nexp ); if ( spm->loc2glob == NULL ) { - if ( local ) { + if ( ( root == -1 ) || ( root == spm->clustnum ) ) { out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); memcpy( out, x, spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); } @@ -70,7 +69,7 @@ z_spmGatherRHS( const spmatrix_t *spm, #if defined(SPM_WITH_MPI) int c; - int n, nmax = 0; + int n, nmax = 0; int nexp, nexpmax = 0; spm_complex64_t *tmp_out, *current_out, *outptr; spm_int_t *tmp_l2g, *current_l2g; @@ -89,46 +88,37 @@ z_spmGatherRHS( const spmatrix_t *spm, MPI_Reduce( &nexp, &nexpmax, 1, MPI_INT, MPI_MAX, root, spm->comm ); } - if ( local ) { + if ( ( root == -1 ) || ( root == spm->clustnum ) ) { out = malloc( spm->gNexp * nrhs * sizeof( spm_complex64_t ) ); tmp_out = malloc( nexpmax * nrhs * sizeof( spm_complex64_t ) ); tmp_l2g = malloc( nmax * sizeof( spm_int_t ) ); - } - - for( c=0; c<spm->clustnbr; c++ ) { - MPI_Status status; - if ( c == spm->clustnum ) { - current_n = spm->n; - current_nexp = spm->nexp; - current_l2g = spm->loc2glob; - current_out = (spm_complex64_t*)x; - } - else { - current_n = 0; - current_nexp = 0; - current_l2g = tmp_l2g; - current_out = tmp_out; - } + for ( c=0; c<spm->clustnbr; c++ ) { + MPI_Status status; - if ( root == -1 ) { - MPI_Bcast( ¤t_n, 1, SPM_MPI_INT, c, spm->comm ); - MPI_Bcast( ¤t_nexp, 1, SPM_MPI_INT, c, spm->comm ); - if ( current_n > 0 ) { - MPI_Bcast( current_l2g, current_n, SPM_MPI_INT, c, spm->comm ); - MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm ); - } - } - else if ( root != c ) { /* No communication if c == root */ if ( c == spm->clustnum ) { - MPI_Send( ¤t_n, 1, SPM_MPI_INT, root, 0, spm->comm ); - MPI_Send( ¤t_nexp, 1, SPM_MPI_INT, root, 1, spm->comm ); + current_n = spm->n; + current_nexp = spm->nexp; + current_l2g = spm->loc2glob; + current_out = (spm_complex64_t *)x; + } + else { + current_n = 0; + current_nexp = 0; + current_l2g = tmp_l2g; + current_out = tmp_out; + } + + if ( root == -1 ) { + MPI_Bcast( ¤t_n, 1, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( ¤t_nexp, 1, SPM_MPI_INT, c, spm->comm ); if ( current_n > 0 ) { - MPI_Send( current_l2g, current_n, SPM_MPI_INT, root, 2, spm->comm ); - MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm ); + MPI_Bcast( current_l2g, current_n, SPM_MPI_INT, c, spm->comm ); + MPI_Bcast( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, spm->comm ); } } - if ( root == spm->clustnum ) { + else if ( root != c ) { /* No communication if c == root */ + assert( root == spm->clustnum ); MPI_Recv( ¤t_n, 1, SPM_MPI_INT, c, 0, spm->comm, &status ); MPI_Recv( ¤t_nexp, 1, SPM_MPI_INT, c, 1, spm->comm, &status ); if ( current_n > 0 ) { @@ -136,31 +126,37 @@ z_spmGatherRHS( const spmatrix_t *spm, MPI_Recv( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, c, 3, spm->comm, &status ); } } - } - - if ( !local ) { - continue; - } - outptr = out; - for( i=0; i<current_n; i++, current_l2g++ ) { - ig = *current_l2g - baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; - row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; - for( j=0; j<nrhs; j++ ) { - for( k=0; k<dofi; k++ ) { - outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ]; + outptr = out; + for ( i=0; i<current_n; i++, current_l2g++ ) { + ig = *current_l2g - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; + row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; + for ( j=0; j<nrhs; j++ ) { + for ( k=0; k<dofi; k++ ) { + outptr[ row + j * spm->gNexp + k ] = current_out[ j * current_nexp + k ]; + } } + current_out += dofi; } - current_out += dofi; } - } - if ( local ) { free( tmp_out ); free( tmp_l2g ); } + else { + current_n = spm->n; + current_nexp = spm->nexp; + current_l2g = spm->loc2glob; + current_out = (spm_complex64_t*)x; + MPI_Send( ¤t_n, 1, SPM_MPI_INT, root, 0, spm->comm ); + MPI_Send( ¤t_nexp, 1, SPM_MPI_INT, root, 1, spm->comm ); + if ( current_n > 0 ) { + MPI_Send( current_l2g, current_n, SPM_MPI_INT, root, 2, spm->comm ); + MPI_Send( current_out, current_nexp * nrhs, SPM_MPI_COMPLEX64, root, 3, spm->comm ); + } + } #endif return out; } diff --git a/src/z_spm_genrhs.c b/src/z_spm_genrhs.c index c09fc2f4224ea8c8615848c8bb696e15e5acb468..c9692ad066d1ddfcd639f55f4e8041a1d03d8e6c 100644 --- a/src/z_spm_genrhs.c +++ b/src/z_spm_genrhs.c @@ -57,8 +57,9 @@ Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { ran = seed; for (i = 0; n; n >>= 1, ++i) { - if (n & 1) + if (n & 1) { ran = a_k * ran + c_k; + } c_k *= (a_k + 1); a_k *= a_k; } @@ -336,7 +337,7 @@ z_spmRhsGenRndDist( const spmatrix_t *spm, spm_int_t baseval, spm_complex64_t *tmp = A; spm_int_t i, j, k, ig, dofi; unsigned long long int ran, jump; - unsigned long long int row, col; + spm_int_t row, col; const spm_int_t *l2g; const spm_int_t *dofs = spm->dofs; @@ -455,10 +456,6 @@ z_spmGenRHS( spm_rhstype_t type, int nrhs, return SPM_ERR_BADPARAMETER; } - /* if( spm->dof != 1 ) { */ - /* return SPM_ERR_BADPARAMETER; */ - /* } */ - if (nrhs == 1) { ldb = spm->nexp; ldx = spm->nexp; diff --git a/src/z_spm_matrixvector.c b/src/z_spm_matrixvector.c index 7530689dea1fd188cdad811d45a393042d90b5cf..977919c34dd98e0505d3c67f78bd19d2bcff5498 100644 --- a/src/z_spm_matrixvector.c +++ b/src/z_spm_matrixvector.c @@ -177,7 +177,7 @@ __spm_zmatvec_sy_csx( const __spm_zmatvec_t *args ) dof_loop_sy( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, conjAt_fct, alpha ); } else { - __spm_zmatvec_dof_loop( col, dofj, row, dofi, y, incy, x, incx, values, conjA_fct, alpha ); + __spm_zmatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha ); } values += dofi*dofj; } @@ -341,8 +341,12 @@ __spm_zmatvec_ge_ijv( const __spm_zmatvec_t *args ) spm_int_t *dof_local = NULL; + assert( ((dof > 0) && (dofs == NULL)) || + ((dof <= 0) && (dofs != NULL)) ); + if( (dofs != NULL) && (glob2loc != NULL) ) { dof_local = __spm_zmatvec_dofs_local( dofs, glob2loc, args->gN ); + assert( dof_local != NULL ); } if( args->follow_x ) { diff --git a/src/z_spm_norm.c b/src/z_spm_norm.c index c94daf14ad2cf1f6a55063d4691fd342982387c2..72b1997ae857f55d88812f0085690c43e89563f3 100644 --- a/src/z_spm_norm.c +++ b/src/z_spm_norm.c @@ -355,6 +355,7 @@ z_spmFrobeniusNorm( const spmatrix_t *spm ) break; case SpmIJV: + default: z_spmFrobeniusNorm_ijv( spm, data ); } } @@ -621,6 +622,155 @@ z_spm_oneinf_elt( const spm_mtxtype_t mtxtype, } } +/** + * @brief Compute the one/inf norm of an spm CSC structure. + */ +static inline void +z_spmOneInfNorm_csc( spm_normtype_t ntype, + const spmatrix_t *spm, + double *sumtab, + spm_int_t baseval ) +{ + spm_int_t i, j, ig, jg, col, row; + spm_int_t dofi, dofj, dof; + spm_int_t *colptr, *rowptr, *loc2glob, *dofs; + spm_complex64_t *valptr; + + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + loc2glob = spm->loc2glob; + dofs = spm->dofs; + dof = spm->dof; + for(j=0; j<spm->n; j++, colptr++, loc2glob++) + { + jg = (spm->loc2glob == NULL) ? j : (*loc2glob) - baseval; + if ( dof > 0 ) { + dofj = dof; + col = dof * jg; + } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } + + for(i=colptr[0]; i<colptr[1]; i++, rowptr++) + { + ig = (*rowptr - baseval); + if ( dof > 0 ) { + dofi = dof; + row = dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + z_spm_oneinf_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + ntype, sumtab ); + valptr += dofi * dofj; + } + } +} + +/** + * @brief Compute the one/inf norm of an spm CSR structure. + */ +static inline void +z_spmOneInfNorm_csr( spm_normtype_t ntype, + const spmatrix_t *spm, + double *sumtab, + spm_int_t baseval ) +{ + spm_int_t i, j, ig, jg, col, row; + spm_int_t dofi, dofj, dof; + spm_int_t *colptr, *rowptr, *loc2glob, *dofs; + spm_complex64_t *valptr; + + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + loc2glob = spm->loc2glob; + dofs = spm->dofs; + dof = spm->dof; + for(i=0; i<spm->n; i++, rowptr++, loc2glob++) + { + ig = (spm->loc2glob == NULL) ? i : (*loc2glob) - baseval; + if ( dof > 0 ) { + dofi = dof; + row = dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + for(j=rowptr[0]; j<rowptr[1]; j++, colptr++) + { + jg = (*colptr - baseval); + if ( dof > 0 ) { + dofj = dof; + col = dof * jg; + } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } + + z_spm_oneinf_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + ntype, sumtab ); + valptr += dofi * dofj; + } + } +} + +/** + * @brief Compute the one/inf norm of an spm IJV structure. + */ +static inline void +z_spmOneInfNorm_ijv( spm_normtype_t ntype, + const spmatrix_t *spm, + double *sumtab, + spm_int_t baseval ) +{ + spm_int_t k, ig, jg, col, row; + spm_int_t dofi, dofj, dof; + spm_int_t *colptr, *rowptr, *dofs; + spm_complex64_t *valptr; + + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + dof = spm->dof; + + for(k=0; k<spm->nnz; k++, rowptr++, colptr++) + { + ig = *rowptr - baseval; + jg = *colptr - baseval; + + if ( dof > 0 ) { + dofi = dof; + row = dof * ig; + dofj = dof; + col = dof * jg; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } + + z_spm_oneinf_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + ntype, sumtab ); + valptr += dofi * dofj; + } +} + /** ******************************************************************************* * @@ -647,120 +797,25 @@ static inline double z_spmOneInfNorm( spm_normtype_t ntype, const spmatrix_t *spm ) { - spm_int_t i, j, k, baseval; - spm_int_t ig, dofi, row; - spm_int_t jg, dofj, col; - const spm_int_t *colptr; - const spm_int_t *rowptr; - const spm_int_t *dofs; - const spm_int_t *loc2glob; - const spm_complex64_t *valptr; + spm_int_t k, baseval; double *sumtab = calloc( spm->gNexp, sizeof(double) ); double norm = 0.; baseval = spmFindBase( spm ); - colptr = spm->colptr; - rowptr = spm->rowptr; - valptr = (spm_complex64_t*)(spm->values); - dofs = spm->dofs; - loc2glob = spm->loc2glob; - - switch( spm->fmttype ){ + switch (spm->fmttype) + { case SpmCSC: - for(j=0; j<spm->n; j++, colptr++, loc2glob++) - { - jg = (spm->loc2glob == NULL) ? j : (*loc2glob) - baseval; - if ( spm->dof > 0 ) { - dofj = spm->dof; - col = spm->dof * jg; - } - else { - dofj = dofs[jg+1] - dofs[jg]; - col = dofs[jg] - baseval; - } - - for(k=colptr[0]; k<colptr[1]; k++, rowptr++) - { - ig = (*rowptr - baseval); - if ( spm->dof > 0 ) { - dofi = spm->dof; - row = spm->dof * ig; - } - else { - dofi = dofs[ig+1] - dofs[ig]; - row = dofs[ig] - baseval; - } - - z_spm_oneinf_elt( spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr, - ntype, sumtab ); - valptr += dofi * dofj; - } - } + z_spmOneInfNorm_csc( ntype, spm, sumtab, baseval ); break; case SpmCSR: - for(i=0; i<spm->n; i++, rowptr++, loc2glob++) - { - ig = (spm->loc2glob == NULL) ? i : (*loc2glob) - baseval; - if ( spm->dof > 0 ) { - dofi = spm->dof; - row = spm->dof * ig; - } - else { - dofi = dofs[ig+1] - dofs[ig]; - row = dofs[ig] - baseval; - } - - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) - { - jg = (*colptr - baseval); - if ( spm->dof > 0 ) { - dofj = spm->dof; - col = spm->dof * jg; - } - else { - dofj = dofs[jg+1] - dofs[jg]; - col = dofs[jg] - baseval; - } - - z_spm_oneinf_elt( spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr, - ntype, sumtab ); - valptr += dofi * dofj; - } - } + z_spmOneInfNorm_csr( ntype, spm, sumtab, baseval ); break; case SpmIJV: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - ig = *rowptr - baseval; - jg = *colptr - baseval; - - if ( spm->dof > 0 ) { - dofi = spm->dof; - row = spm->dof * ig; - dofj = spm->dof; - col = spm->dof * jg; - } - else { - dofi = dofs[ig+1] - dofs[ig]; - row = dofs[ig] - baseval; - dofj = dofs[jg+1] - dofs[jg]; - col = dofs[jg] - baseval; - } - - z_spm_oneinf_elt( spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr, - ntype, sumtab ); - valptr += dofi * dofj; - } - break; - default: - return SPM_ERR_BADPARAMETER; + z_spmOneInfNorm_ijv( ntype, spm, sumtab, baseval ); } #if defined(SPM_WITH_MPI) @@ -772,7 +827,7 @@ z_spmOneInfNorm( spm_normtype_t ntype, /* Look for the maximum */ { const double *sumtmp = sumtab; - for( i=0; i<spm->gNexp; i++, sumtmp++ ) + for( k=0; k<spm->gNexp; k++, sumtmp++ ) { if( norm < *sumtmp ) { norm = *sumtmp; diff --git a/src/z_spm_reduce_rhs.c b/src/z_spm_reduce_rhs.c index 30135468d800197a1807961461380454109cb28d..fd54e61888f68dc737db4a035c0bd103aac782f7 100644 --- a/src/z_spm_reduce_rhs.c +++ b/src/z_spm_reduce_rhs.c @@ -62,12 +62,15 @@ z_spmReduceRHS( const spmatrix_t *spm, baseval = spmFindBase( spm ); loc2glob = spm->loc2glob; - for( i=0; i<spm->n; i++, loc2glob++ ) { + for( i=0; i<spm->n; i++, loc2glob++ ) + { ig = *loc2glob - baseval; dofi = ( spm->dof > 0 ) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig]; row = ( spm->dof > 0 ) ? spm->dof * ig : spm->dofs[ig] - baseval; - for( j=0; j<nrhs; j++ ) { - for( k=0; k<dofi; k++ ) { + for( j=0; j<nrhs; j++ ) + { + for( k=0; k<dofi; k++ ) + { rhs[ j * ldb + k ] = bglob[ row + j * ldbglob + k ]; } } @@ -81,4 +84,4 @@ z_spmReduceRHS( const spmatrix_t *spm, (void)b; (void)ldb; #endif -} \ No newline at end of file +} diff --git a/tests/spm_dist_genrhs_tests.c b/tests/spm_dist_genrhs_tests.c index 6c9568b08c96a74f4b84ad120d0ce5dcc3b78032..27872e2c84e2fc9b6521d54650a5df21bad507b9 100644 --- a/tests/spm_dist_genrhs_tests.c +++ b/tests/spm_dist_genrhs_tests.c @@ -90,7 +90,7 @@ int main (int argc, char **argv) sizeloc = spm_size_of( spm->flttype ) * spm->nexp * nrhs; bloc = malloc( sizeloc ); - memset( bloc, 0xbeefdead, sizeloc ); + memset( bloc, 0xbeef, sizeloc ); if ( spmGenRHS( SpmRhsRndB, nrhs, spm, NULL, spm->nexp, bloc, spm->nexp ) != SPM_SUCCESS ) { fprintf( stderr, "Issue to generate the local rhs\n" ); @@ -119,7 +119,7 @@ int main (int argc, char **argv) baseval, dofname[dof+1], root ); } - memset( bdst, 0xdeadbeef, sizedst ); + memset( bdst, 0xdead, sizedst ); if ( spmGenRHS( SpmRhsRndB, nrhs, spmdist, NULL, spmdist->nexp, bdst, spmdist->nexp ) != SPM_SUCCESS ) { err++; diff --git a/tests/spm_dof_sort_tests.c b/tests/spm_dof_sort_tests.c index 146be200d0be9cb612421e0cfe95d156e5520427..147a9dc935e0c5777f1ce0550bd2a54371c053ed 100644 --- a/tests/spm_dof_sort_tests.c +++ b/tests/spm_dof_sort_tests.c @@ -53,10 +53,6 @@ spm_unsort( spmatrix_t *spm ) spm_int_t *colptr = spm->colptr; spm_int_t *rowptr = spm->rowptr; - if( spm->dof < 0 ) { - return; - } - baseval = spmFindBase(spm); switch (spm->fmttype) { @@ -69,10 +65,10 @@ spm_unsort( spmatrix_t *spm ) case SpmCSC: size = spm->n; - for ( j = 0; j < size; j++, colptr++ ) + for ( j=0; j<size; j++, colptr++ ) { count = colptr[1] - colptr[0]; - for ( i = 0; i < count; i++ ) + for ( i=0; i < count; i++ ) { index1 = ( rand() % count ) + colptr[0] - baseval; index2 = ( rand() % count ) + colptr[0] - baseval; @@ -80,14 +76,13 @@ spm_unsort( spmatrix_t *spm ) rowtmp = rowptr[index1]; rowptr[index1] = rowptr[index2]; rowptr[index2] = rowtmp; - break; } } break; case SpmIJV: size = spm->nnz; - for ( i = 0; i < size; i++ ) + for ( i=0; i<size; i++ ) { index1 = ( rand() % size ); index2 = ( rand() % size ); diff --git a/tests/spm_scatter_gather_tests.c b/tests/spm_scatter_gather_tests.c index 0678923a495345abfe8d4f249bcd5b309f90bc28..3e2bcbb7b4b620e5766c61be9027c917303e0374 100644 --- a/tests/spm_scatter_gather_tests.c +++ b/tests/spm_scatter_gather_tests.c @@ -136,8 +136,6 @@ spmdist_check_scatter_gather( spmatrix_t *original, if ( spmdist_check( clustnum, spms == NULL, "Failed to generate an spm on each node" ) ) { - spmExit( spms ); - free( spms ); return 1; } @@ -155,10 +153,8 @@ spmdist_check_scatter_gather( spmatrix_t *original, * Check spmGather */ spmg = spmGather( spms, root ); - if ( spms ) { - spmExit( spms ); - free( spms ); - } + spmExit( spms ); + free( spms ); /* Check non supported cases by Gather */ {