From 53289a3ea68bdadd6f58383a6c55c3fa3221bf0d Mon Sep 17 00:00:00 2001 From: Alban Bellot <alban.bellot@inria.fr> Date: Tue, 19 Jul 2016 11:09:40 +0200 Subject: [PATCH] matvec test with dofs --- CMakeLists.txt | 2 + z_spm.h | 4 ++ z_spm_convert_to_csc.c | 1 + z_spm_matrixvector.c | 116 ++++++++++++++++++++++++++++++++--------- z_spm_norm.c | 73 +++++++++++++++++--------- 5 files changed, 145 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dbd384a..21caac4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,8 @@ set(SOURCES z_spm_genrhs.c z_spm_matrixvector.c z_spm_norm.c + z_spm_2dense.c + z_spm_print.c ) set(spm_headers diff --git a/z_spm.h b/z_spm.h index 877da184..73b31693 100644 --- a/z_spm.h +++ b/z_spm.h @@ -37,4 +37,8 @@ pastix_int_t z_spmSymmetrize( pastix_spm_t *csc ); int z_spmGenRHS(int type, int nrhs, const pastix_spm_t *spm, void *x, int ldx, void *b, int ldb ); int z_spmCheckAxb( int nrhs, const pastix_spm_t *spm, void *x0, int ldx0, void *b, int ldb, const void *x, int ldx ); +pastix_complex64_t *z_spm2dense( const pastix_spm_t *spm ); +void z_spmDensePrint( pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda ); +void z_spmPrint( const pastix_spm_t *spm ); + #endif /* _z_spm_H_ */ diff --git a/z_spm_convert_to_csc.c b/z_spm_convert_to_csc.c index 72b96a23..d07996f4 100644 --- a/z_spm_convert_to_csc.c +++ b/z_spm_convert_to_csc.c @@ -204,6 +204,7 @@ z_spmConvertCSR2CSC( pastix_spm_t *spm ) assert( val_csc ); #endif + /* Count the number of elements per column */ for (j=0; j<nnz; j++) { col = spm->colptr[j] - baseval; diff --git a/z_spm_matrixvector.c b/z_spm_matrixvector.c index 7db1de70..4b0cf889 100644 --- a/z_spm_matrixvector.c +++ b/z_spm_matrixvector.c @@ -73,7 +73,9 @@ z_spmGeCSCv( int trans, const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values; const pastix_complex64_t *xptr = x; pastix_complex64_t *yptr = y; - pastix_int_t col, row, i, baseval; + pastix_int_t col, row, i, j, baseval; + pastix_int_t ii, jj, k, dofi, dofj; + pastix_int_t *dofs=csc->dofs; if ( (csc == NULL) || (x == NULL) || (y == NULL ) ) { @@ -89,10 +91,10 @@ z_spmGeCSCv( int trans, /* first, y = beta*y */ if( beta == 0. ) { - memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) ); + memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) ); } else { - for( i=0; i<csc->gN; i++, yptr++ ) { + for( i=0; i<csc->gNexp; i++, yptr++ ) { (*yptr) *= beta; } yptr = y; @@ -104,12 +106,22 @@ z_spmGeCSCv( int trans, */ if( trans == PastixNoTrans ) { - for( col=0; col < csc->gN; col++ ) + for( i=0; i < csc->gN; i++ ) { - for( i=csc->colptr[col]; i<csc->colptr[col+1]; i++ ) + dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; + col=dofs[i]; + for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) { - row = csc->rowptr[i-baseval]-baseval; - yptr[row] += alpha * valptr[i-baseval] * xptr[col]; + j = csc->rowptr[k-baseval]-baseval; + dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; + row=dofs[j]; + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<dofj; jj++, valptr++) + { + yptr[row+jj] += alpha * (*valptr) * xptr[col+ii]; + } + } } } } @@ -118,15 +130,26 @@ z_spmGeCSCv( int trans, */ else if( trans == PastixTrans ) { - for( col=0; col < csc->gN; col++ ) + for( i=0; i < csc->gN; i++ ) { - for( i=csc->colptr[col]; i<csc->colptr[col+1]; i++ ) + dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; + col=dofs[i]; + for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) { - row = csc->rowptr[i-baseval]-baseval; - yptr[col] += alpha * valptr[i-baseval] * xptr[row]; + j = csc->rowptr[k-baseval]-baseval; + dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; + row=dofs[j]; + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<dofj; jj++, valptr++) + { + yptr[col+ii] += alpha * (*valptr) * xptr[row+jj]; + } + } } } } + #if defined(PRECISION_c) || defined(PRECISION_z) else if( trans == PastixConjTrans ) { @@ -194,7 +217,10 @@ z_spmSyCSCv( pastix_complex64_t alpha, const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values; const pastix_complex64_t *xptr = x; pastix_complex64_t *yptr = y; - pastix_int_t col, row, i, baseval; + pastix_int_t col, row, i, j, baseval; + pastix_int_t ii, jj, k, dofi, dofj; + pastix_int_t *dofs=csc->dofs; + if ( (csc == NULL) || (x == NULL) || (y == NULL ) ) { @@ -210,30 +236,40 @@ z_spmSyCSCv( pastix_complex64_t alpha, /* First, y = beta*y */ if( beta == 0. ) { - memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) ); + memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) ); } else { - for( i=0; i<csc->gN; i++, yptr++ ) { + for( i=0; i<csc->gNexp; i++, yptr++ ) { (*yptr) *= beta; } yptr = y; } - if( alpha != 0. ) { - for( col=0; col < csc->gN; col++ ) + if(alpha != 0.) + { + for( i=0; i < csc->gN; i++ ) { - for( i=csc->colptr[col]; i < csc->colptr[col+1]; i++ ) + dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; + col = dofs[i]; + for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) { - row = csc->rowptr[i-baseval]-baseval; - yptr[row] += alpha * valptr[i-baseval] * xptr[col]; - if( col != row ) + j = csc->rowptr[k-baseval]-baseval; + dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; + row = dofs[j]; + for(ii=0; ii<dofi; ii++) { - yptr[col] += alpha * valptr[i-baseval] * xptr[row]; + for(jj=0; jj<dofj; jj++, valptr++) + { + yptr[row+jj] += alpha * (*valptr) * xptr[col+ii]; + if( i != j ) + { + yptr[col+ii] += alpha * (*valptr) * xptr[row+jj]; + } + } } } } } - return PASTIX_SUCCESS; } @@ -283,7 +319,10 @@ z_spmHeCSCv( pastix_complex64_t alpha, const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values; const pastix_complex64_t *xptr = x; pastix_complex64_t *yptr = y; - pastix_int_t col, row, i, baseval; + pastix_int_t col, row, i, j, baseval; + pastix_int_t ii, jj, k, dofi, dofj; + pastix_int_t *dofs=csc->dofs; + if ( (csc == NULL) || (x == NULL) || (y == NULL ) ) { @@ -297,17 +336,43 @@ z_spmHeCSCv( pastix_complex64_t alpha, /* First, y = beta*y */ if( beta == 0. ) { - memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) ); + memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) ); } else { - for( i=0; i<csc->gN; i++, yptr++ ) { + for( i=0; i<csc->gNexp; i++, yptr++ ) { (*yptr) *= beta; } yptr = y; } baseval = spmFindBase( csc ); + if( alpha != 0.) + { + for( i=0; i < csc->gN; i++ ) + { + dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; + col = dofs[i]; + for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) + { + j = csc->rowptr[k-baseval]-baseval; + dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; + row = dofs[j]; + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<dofj; jj++, valptr++) + { + yptr[row+jj] += alpha * (*valptr) * xptr[col+ii]; + if( i != j ) + { + yptr[col+ii] += alpha * conj( *valptr ) * xptr[row+jj]; + } + } + } + } + } + } + /* if( alpha != 0. ) { for( col=0; col < csc->gN; col++ ) { @@ -320,6 +385,7 @@ z_spmHeCSCv( pastix_complex64_t alpha, } } } + */ return PASTIX_SUCCESS; } diff --git a/z_spm_norm.c b/z_spm_norm.c index 4e4a594d..9d45ade4 100644 --- a/z_spm_norm.c +++ b/z_spm_norm.c @@ -50,7 +50,7 @@ z_spmFrobeniusNorm( const pastix_spm_t *spm ) double sumsq = 0.; if (spm->mtxtype == PastixGeneral) { - for(i=0; i <spm->nnz; i++, valptr++) { + for(i=0; i <spm->nnzexp; i++, valptr++) { frobenius_update( 1, &scale, &sumsq, valptr ); #if defined(PRECISION_z) || defined(PRECISION_c) @@ -62,20 +62,31 @@ z_spmFrobeniusNorm( const pastix_spm_t *spm ) else { pastix_int_t *colptr = spm->colptr; pastix_int_t *rowptr = spm->rowptr; - int nb; + int nb, dofi, dofj, ii, jj; + pastix_int_t *dofs=spm->dofs; baseval = spmFindBase( spm ); switch( spm->fmttype ){ case PastixCSC: - for(i=0; i<spm->n; i++, colptr++) { - for(j=colptr[0]; j<colptr[1]; j++, rowptr++, valptr++) { - nb = ( i == (*rowptr-baseval) ) ? 1 : 2; - frobenius_update( nb, &scale, &sumsq, valptr ); + for(i=0; i<spm->n; i++, colptr++) + { + dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; + for(j=colptr[0]; j<colptr[1]; j++, rowptr++) + { + dofj = ( spm->dof > 0 ) ? spm->dof : dofs[(*rowptr-baseval)+1] - dofs[(*rowptr-baseval)]; + for(ii=0 ; ii < dofi; ii++) + { + for(jj=0; jj < dofj ;jj++,valptr++) + { + nb = ( i == (*rowptr-baseval) ) ? 1 : 2; + frobenius_update( nb, &scale, &sumsq, valptr ); #if defined(PRECISION_z) || defined(PRECISION_c) - valptr++; - frobenius_update( nb, &scale, &sumsq, valptr ); + valptr++; + frobenius_update( nb, &scale, &sumsq, valptr ); #endif + } + } } } break; @@ -211,7 +222,7 @@ z_spmInfNorm( const pastix_spm_t *spm ) */ /* Dofs */ - for( i=0; i < spm->n; i++ ) + for( i=0; i < spm->n; i++) { dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++) @@ -230,7 +241,6 @@ z_spmInfNorm( const pastix_spm_t *spm ) } } } - /* Add the symmetric/hermitian part */ if ( (spm->mtxtype == PastixHermitian) || (spm->mtxtype == PastixSymmetric) ) @@ -244,16 +254,20 @@ z_spmInfNorm( const pastix_spm_t *spm ) { j = spm->rowptr[k - baseval] - baseval; dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - for(ii=0; ii < dofi; ii++) - for(jj=0; jj < dofj; jj++,valptr++) + if(i != j) + { + for(ii=0; ii < dofi; ii++) { + for(jj=0; jj < dofj; jj++, valptr++) { - if(i != j) - { - sumcol[col + ii] += cabs( *valptr ); - } + sumcol[col + ii] += cabs( *valptr ); } } + } + else + { + valptr += dofi * dofj; + } } } } @@ -368,18 +382,20 @@ z_spmOneNorm( const pastix_spm_t *spm ) for(i=0; i < spm->n; i++) { col = dofs[i]; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - spm->dofs[i]; + dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++) { j = spm->rowptr[k - baseval] - baseval; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - spm->dofs[j]; + dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; for(ii=0; ii < dofi; ii++) + { for(jj=0; jj < dofj; jj++, valptr++) { { sumrow[col + ii] += cabs( *valptr ); } } + } } } /* Add the symmetric/hermitian part */ @@ -389,22 +405,27 @@ z_spmOneNorm( const pastix_spm_t *spm ) valptr = (pastix_complex64_t*)spm->values; for(i=0; i < spm->n; i++) { - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - spm->dofs[i]; + dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++) { j = spm->rowptr[k - baseval] - baseval; row = dofs[j]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - spm->dofs[j]; - for(ii=0; ii < dofi; ii++) - for(jj=0; jj < dofj; jj++, valptr++) + dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; + if(i != j) + { + for(ii=0; ii < dofi; ii++) { + for(jj=0; jj < dofj; jj++, valptr++) { - if(i != j) - { - sumrow[row + jj] += cabs( *valptr ); - } + + sumrow[row + jj] += cabs( *valptr ); } } + } + else + { + valptr += dofi * dofj; + } } } } -- GitLab