diff --git a/z_spm_convert_to_csc.c b/z_spm_convert_to_csc.c index 5ec2702ca00c13cb679e5f63ec68d1d6940e60d5..bd76019cc490bee69ae093d8693344a48c6ab963 100644 --- a/z_spm_convert_to_csc.c +++ b/z_spm_convert_to_csc.c @@ -9,7 +9,6 @@ * @version 5.1.0 * @author Mathieu Faverge * @author Theophile Terraz - * @author Alban Bellot * @date 2015-01-01 * * @precisions normal z -> c d s p @@ -81,45 +80,11 @@ z_spmConvertIJV2CSC( pastix_spm_t *spm ) /* Sort the rows and avals arrays by column */ spm->rowptr = malloc(spm->nnz * sizeof(pastix_int_t)); - pastix_int_t *node = calloc(spm->nnz+1,sizeof(pastix_int_t)); - pastix_int_t *old_node = calloc(spm->nnz+1,sizeof(pastix_int_t)); - pastix_int_t *dofs = spm->dofs; - pastix_int_t row, col, dofi, dofj; - - for(i=0; i<spm->nnz; i++) - { - row = oldspm.rowptr[i]-baseval; - col = oldspm.colptr[i]-baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[col+1] - dofs[col]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[row+1] - dofs[row]; - old_node[i+1] = dofi * dofj; - node[spm->colptr[col]+1] += dofi * dofj; - spm->colptr[col]++; - } - - for(i=0;i<spm->nnz;i++) - { - node[i+1]+=node[i]; - old_node[i+1]+=old_node[i]; - } - - /* Restore the colptr indexes */ - { - pastix_int_t tmp, tmp2; - tmp = spm->colptr[0]; - spm->colptr[0] = 0; - for (j=0; j<spm->n; j++) { - tmp2 = spm->colptr[j+1]; - spm->colptr[j+1] = tmp; - tmp = tmp2; - } - } #if defined(PRECISION_p) spm->values = NULL; #else - pastix_int_t ii, jj; - spm->values = malloc(spm->nnzexp * sizeof(pastix_complex64_t)); + spm->values = malloc(spm->nnz * sizeof(pastix_complex64_t)); navals = (pastix_complex64_t*)(spm->values); oavals = (pastix_complex64_t*)(oldspm.values); #endif @@ -127,20 +92,11 @@ z_spmConvertIJV2CSC( pastix_spm_t *spm ) for (j=0; j<spm->nnz; j++) { i = oldspm.colptr[j] - baseval; + spm->rowptr[ spm->colptr[i] ] = oldspm.rowptr[j]; #if !defined(PRECISION_p) - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[oldspm.rowptr[j]-baseval+1] - dofs[oldspm.rowptr[j]-baseval]; - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++) - { - navals[node[spm->colptr[i]]] = oavals[ old_node[j]]; - old_node[j]++; - node[spm->colptr[i]]++; - } - } + navals[ spm->colptr[i] ] = oavals[j]; #endif (spm->colptr[i])++; @@ -150,6 +106,7 @@ z_spmConvertIJV2CSC( pastix_spm_t *spm ) /* Rebuild the colptr with the correct baseval */ tmp = spm->colptr[0]; spm->colptr[0] = baseval; + spmptx = spm->colptr + 1; for (i=1; i<(spm->n+1); i++, spmptx++) { @@ -162,7 +119,7 @@ z_spmConvertIJV2CSC( pastix_spm_t *spm ) spmExit( &oldspm ); spm->fmttype = PastixCSC; - spm->colmajor = 1; + return PASTIX_SUCCESS; } @@ -195,10 +152,6 @@ z_spmConvertCSR2CSC( pastix_spm_t *spm ) spm->fmttype = PastixCSC; - pastix_int_t type = spm->mtxtype; - if(spm->dof != 1) - spm->mtxtype = PastixGeneral; - switch( spm->mtxtype ) { #if defined(PRECISION_z) || defined(PRECISION_c) case PastixHermitian: @@ -231,36 +184,26 @@ z_spmConvertCSR2CSC( pastix_spm_t *spm ) { pastix_int_t *row_csc; pastix_int_t *col_csc; - pastix_int_t *node; - pastix_int_t *dofs; - #if !defined(PRECISION_p) pastix_complex64_t *val_csc; pastix_complex64_t *valptr = (pastix_complex64_t*)(spm->values); - pastix_int_t ii, jj; - int cpt = 0; #endif - pastix_int_t i, j, k, col, row, nnz, baseval; - pastix_int_t dofi, dofj; + pastix_int_t j, k, col, row, nnz, baseval; baseval = spmFindBase( spm ); nnz = spm->nnz; row_csc = malloc(nnz * sizeof(pastix_int_t)); col_csc = calloc(spm->n+1,sizeof(pastix_int_t)); - node = calloc(spm->nnz+1,sizeof(pastix_int_t)); - dofs = spm->dofs; assert( row_csc ); assert( col_csc ); - assert( node ); - assert( ( (dofs) && !(spm->dof > 0) ) || - ( !(dofs) && (spm->dof > 0) ) ); // (dofs) xor (spm->dof > 0) #if !defined(PRECISION_p) - val_csc = malloc(spm->nnzexp*sizeof(pastix_complex64_t)); + val_csc = malloc(nnz*sizeof(pastix_complex64_t)); assert( val_csc ); #endif + /* Count the number of elements per column */ for (j=0; j<nnz; j++) { col = spm->colptr[j] - baseval; @@ -274,35 +217,6 @@ z_spmConvertCSR2CSC( pastix_spm_t *spm ) col_csc[j+1] += col_csc[j]; } - for(i=0; i<spm->n; i++) - { - for(k=spm->rowptr[i]; k<spm->rowptr[i+1]; k++) - { - j = spm->colptr[k-baseval] - baseval; - row = ( spm->dof > 0 ) ? j : dofs[j]; - col = ( spm->dof > 0) ? i : dofs[i]; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - node[col_csc[j]+1] += dofi * dofj; - col_csc[j]++; - } - } - - for(i=0;i <spm->nnz; i++) - node[i+1] += node[i]; - - /* Restore the colptr indexes */ - { - pastix_int_t tmp, tmp2; - tmp = col_csc[0]; - col_csc[0] = 0; - for (j=0; j<spm->n; j++) { - tmp2 = col_csc[j+1]; - col_csc[j+1] = tmp; - tmp = tmp2; - } - } - assert( (col_csc[spm->gN]) == nnz ); for (row=0; row<spm->n; row++) { @@ -313,34 +227,27 @@ z_spmConvertCSR2CSC( pastix_spm_t *spm ) col = spm->colptr[k] - baseval; j = col_csc[col]; row_csc[j] = row + baseval; + #if !defined(PRECISION_p) - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[col+1] - dofs[col]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[row+1] - dofs[row]; - for(jj=0; jj < dofj ; jj++) - { - for(ii=0; ii < dofi ; ii++) - { - val_csc[node[j] + ii * dofj + jj] = valptr[cpt]; - cpt++; - } - } + val_csc[j] = valptr[k]; #endif - col_csc[col]++; + col_csc[col] ++; } } /* Restore the colptr indexes */ { pastix_int_t tmp, tmp2; + tmp = col_csc[0]; col_csc[0] = baseval; - for (j=0; j<spm->n; j++) - { + for (j=0; j<spm->n; j++) { tmp2 = col_csc[j+1]; col_csc[j+1] = tmp + baseval; tmp = tmp2; } } + spmExit( spm ); spm->colptr = col_csc; spm->rowptr = row_csc; @@ -352,8 +259,5 @@ z_spmConvertCSR2CSC( pastix_spm_t *spm ) } } - if(spm-> dof != 1) - spm->mtxtype = type; - spm->colmajor = 1; return PASTIX_SUCCESS; } diff --git a/z_spm_convert_to_csr.c b/z_spm_convert_to_csr.c index 73822a86722ff523d42c41e67fcbef29b10ddf28..c1d0d4582f0d2edd50c74a231cb6a1419dbfcd50 100644 --- a/z_spm_convert_to_csr.c +++ b/z_spm_convert_to_csr.c @@ -9,7 +9,6 @@ * @version 5.1.0 * @author Mathieu Faverge * @author Theophile Terraz - * @authot Alban Bellot * @date 2015-01-01 * * @precisions normal z -> c d s p @@ -44,10 +43,6 @@ z_spmConvertCSC2CSR( pastix_spm_t *spm ) { pastix_int_t *tmp; pastix_int_t result; - pastix_int_t type = spm->mtxtype; - - if(spm->dof != 1) - spm->mtxtype = PastixGeneral; switch( spm->mtxtype ) { #if defined(PRECISION_z) || defined(PRECISION_c) @@ -71,6 +66,7 @@ z_spmConvertCSC2CSR( pastix_spm_t *spm ) spm->rowptr = spm->colptr; spm->colptr = tmp; spm->fmttype = PastixCSR; + return PASTIX_SUCCESS; } break; @@ -95,10 +91,6 @@ z_spmConvertCSC2CSR( pastix_spm_t *spm ) } } - if(spm-> dof != 1) - spm->mtxtype = type; - spm->colmajor = -1; - return result; } @@ -131,53 +123,16 @@ z_spmConvertIJV2CSR( pastix_spm_t *spm ) #endif pastix_int_t *spmptx, *otmp; pastix_int_t i, j, tmp, baseval, total; - pastix_int_t row, col, dofi, dofj; - - pastix_int_t *node = calloc(spm->nnz+1,sizeof(pastix_int_t)); - pastix_int_t *old_node = calloc(spm->nnz+1,sizeof(pastix_int_t)); - pastix_int_t *dofs = spm->dofs; - pastix_spm_t oldspm; + /* Backup the input */ + memcpy( &oldspm, spm, sizeof(pastix_spm_t) ); + /* * Check the baseval, we consider that arrays are sorted by columns or rows */ baseval = spmFindBase( spm ); -#if !defined(PRECISION_p) - pastix_int_t ii, jj, k; - /* Transpose values in row major format */ - if( spm->colmajor ) - { - pastix_int_t cpt=0; - pastix_int_t* dofs = spm->dofs; - - oavals = (pastix_complex64_t*)spm->values; - navals = malloc(sizeof(pastix_complex64_t)*spm->nnzexp); - - for(k=0; k<spm->nnz; k++) - { - j = spm->rowptr[k]-baseval; - i = spm->colptr[k]-baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++) - { - navals[cpt + jj * dofi + ii] = *oavals; - oavals++; - } - } - cpt += dofi * dofj; - } - spm->values = navals; - } -#endif - - /* Backup the input */ - memcpy( &oldspm, spm, sizeof(pastix_spm_t) ); - /* Compute the new rowptr */ spm->rowptr = (pastix_int_t *) calloc(spm->n+1,sizeof(pastix_int_t)); @@ -203,39 +158,10 @@ z_spmConvertIJV2CSR( pastix_spm_t *spm ) /* Sort the colptr and avals arrays by rows */ spm->colptr = malloc(spm->nnz * sizeof(pastix_int_t)); - for(i=0; i<spm->nnz; i++) - { - row = oldspm.rowptr[i]-baseval; - col = oldspm.colptr[i]-baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[col+1] - dofs[col]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[row+1] - dofs[row]; - old_node[i+1] = dofi * dofj; - node[spm->rowptr[row]+1] += dofi * dofj; - spm->rowptr[row]++; - } - - for(i=0;i<spm->nnz;i++) - { - node[i+1]+=node[i]; - old_node[i+1]+=old_node[i]; - } - - /* Restore the rowptr indexes */ - { - pastix_int_t tmp, tmp2; - tmp = spm->rowptr[0]; - spm->rowptr[0] = 0; - for (j=0; j<spm->n; j++) { - tmp2 = spm->rowptr[j+1]; - spm->rowptr[j+1] = tmp; - tmp = tmp2; - } - } - #if defined(PRECISION_p) spm->values = NULL; #else - spm->values = malloc(spm->nnzexp * sizeof(pastix_complex64_t)); + spm->values = malloc(spm->nnz * sizeof(pastix_complex64_t)); navals = (pastix_complex64_t*)(spm->values); oavals = (pastix_complex64_t*)(oldspm.values); #endif @@ -247,17 +173,7 @@ z_spmConvertIJV2CSR( pastix_spm_t *spm ) spm->colptr[ spm->rowptr[i] ] = oldspm.colptr[j]; #if !defined(PRECISION_p) - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[oldspm.colptr[j]-baseval+1] - dofs[oldspm.colptr[j]-baseval]; - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++) - { - navals[node[spm->rowptr[i]]] = oavals[ old_node[j]]; - old_node[j]++; - node[spm->rowptr[i]]++; - } - } + navals[ spm->rowptr[i] ] = oavals[j]; #endif (spm->rowptr[i])++; @@ -279,9 +195,7 @@ z_spmConvertIJV2CSR( pastix_spm_t *spm ) spmExit( &oldspm ); - spm->colmajor = -1; spm->fmttype = PastixCSR; return PASTIX_SUCCESS; } - diff --git a/z_spm_convert_to_ijv.c b/z_spm_convert_to_ijv.c index 457f353169a6c87eb36c90e28883bc378c3185a5..6ab25d095da59c6dc59bcf576e9afcc1c5544472 100644 --- a/z_spm_convert_to_ijv.c +++ b/z_spm_convert_to_ijv.c @@ -9,7 +9,6 @@ * @version 5.1.0 * @author Mathieu Faverge * @author Theophile Terraz - * @author Alban Bellot * @date 2015-01-01 * * @precisions normal z -> c d s p @@ -63,37 +62,6 @@ z_spmConvertCSC2IJV( pastix_spm_t *spm ) } } - /* Transpose values in row major format */ - /* - if( spm->colmajor < 1 ) //A test - { - int k, ii, jj, dofi, dofj; - int cpt = 0; - pastix_complex64_t* oavals = (pastix_complex64_t*)spm->values; - pastix_complex64_t* navals = malloc(sizeof(pastix_complex64_t)*spm->nnzexp); - pastix_int_t* dofs=spm->dofs; - - for(k=0; k<spm->nnz; k++) - { - j = spm->rowptr[k]-baseval; - i = col_ijv[k]-baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++) - { - navals[cpt + jj * dofi + ii] = *oavals; - oavals++; - } - } - cpt += dofi * dofj; - } - spm->values = navals; - } - - */ - memFree_null(spm->colptr); spm->colptr = col_ijv; @@ -145,35 +113,6 @@ z_spmConvertCSR2IJV( pastix_spm_t *spm ) } } - /* Transpose values in column major format */ - /* - if( spm->colmajor > 1 ) - { - pastix_int_t k, ii, jj, dofi, dofj; - pastix_int_t cpt=0; - pastix_complex64_t* oavals = (pastix_complex64_t*)spm->values; - pastix_complex64_t* navals = malloc(sizeof(pastix_complex64_t)*spm->nnzexp); - pastix_int_t* dofs=spm->dofs; - for(k=0; k<spm->nnz; k++) - { - i = spm->colptr[k]-baseval; - j = row_ijv[k]-baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++) - { - navals[cpt + ii * dofj + jj] = *oavals; - oavals++; - } - } - cpt += dofi * dofj; - } - spm->values = navals; - } - */ - memFree_null(spm->rowptr); spm->rowptr = row_ijv; diff --git a/z_spm_matrixvector.c b/z_spm_matrixvector.c index 93734bbdf80399195ff8f1eabfc1c4c2fe893b6f..e5346ebf9c67c735626e2a43394b9f99133dcc5d 100644 --- a/z_spm_matrixvector.c +++ b/z_spm_matrixvector.c @@ -73,9 +73,7 @@ z_spmGeCSCv( int trans, const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values; const pastix_complex64_t *xptr = x; pastix_complex64_t *yptr = y; - pastix_int_t col, row, i, j, baseval; - pastix_int_t ii, jj, k, dofi, dofj; - pastix_int_t *dofs = csc->dofs; + pastix_int_t col, row, i, baseval; if ( (csc == NULL) || (x == NULL) || (y == NULL ) ) { @@ -91,10 +89,10 @@ z_spmGeCSCv( int trans, /* first, y = beta*y */ if( beta == 0. ) { - memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) ); + memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) ); } else { - for( i=0; i<csc->gNexp; i++, yptr++ ) { + for( i=0; i<csc->gN; i++, yptr++ ) { (*yptr) *= beta; } yptr = y; @@ -106,22 +104,12 @@ z_spmGeCSCv( int trans, */ if( trans == PastixNoTrans ) { - for( i=0; i < csc->gN; i++ ) + for( col=0; col < csc->gN; col++ ) { - dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; - col = ( csc->dof > 0 ) ? i : dofs[i]; - for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) + for( i=csc->colptr[col]; i<csc->colptr[col+1]; i++ ) { - j = csc->rowptr[k-baseval]-baseval; - dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; - row = ( csc->dof > 0 ) ? j : dofs[j]; - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - yptr[row+jj] += alpha * (*valptr) * xptr[col+ii]; - } - } + row = csc->rowptr[i-baseval]-baseval; + yptr[row] += alpha * valptr[i-baseval] * xptr[col]; } } } @@ -130,26 +118,15 @@ z_spmGeCSCv( int trans, */ else if( trans == PastixTrans ) { - for( i=0; i < csc->gN; i++ ) + for( col=0; col < csc->gN; col++ ) { - dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; - col = ( csc->dof > 0 ) ? i : dofs[i]; - for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) + for( i=csc->colptr[col]; i<csc->colptr[col+1]; i++ ) { - j = csc->rowptr[k-baseval]-baseval; - dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; - row = ( csc->dof > 0 ) ? j : dofs[j]; - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - yptr[col+ii] += alpha * (*valptr) * xptr[row+jj]; - } - } + row = csc->rowptr[i-baseval]-baseval; + yptr[col] += alpha * valptr[i-baseval] * xptr[row]; } } } - #if defined(PRECISION_c) || defined(PRECISION_z) else if( trans == PastixConjTrans ) { @@ -168,6 +145,7 @@ z_spmGeCSCv( int trans, return PASTIX_ERR_BADPARAMETER; } } + return PASTIX_SUCCESS; } @@ -216,9 +194,7 @@ z_spmSyCSCv( pastix_complex64_t alpha, const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values; const pastix_complex64_t *xptr = x; pastix_complex64_t *yptr = y; - pastix_int_t col, row, i, j, baseval; - pastix_int_t ii, jj, k, dofi, dofj; - pastix_int_t *dofs = csc->dofs; + pastix_int_t col, row, i, baseval; if ( (csc == NULL) || (x == NULL) || (y == NULL ) ) { @@ -234,40 +210,30 @@ z_spmSyCSCv( pastix_complex64_t alpha, /* First, y = beta*y */ if( beta == 0. ) { - memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) ); + memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) ); } else { - for( i=0; i<csc->gNexp; i++, yptr++ ) { + for( i=0; i<csc->gN; i++, yptr++ ) { (*yptr) *= beta; } yptr = y; } - if(alpha != 0.) - { - for( i=0; i < csc->gN; i++ ) + if( alpha != 0. ) { + for( col=0; col < csc->gN; col++ ) { - dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; - col = ( csc->dof > 0 ) ? i : dofs[i]; - for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) + for( i=csc->colptr[col]; i < csc->colptr[col+1]; i++ ) { - j = csc->rowptr[k-baseval]-baseval; - dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; - row = ( csc->dof > 0 ) ? j : dofs[j]; - for(ii=0; ii<dofi; ii++) + row = csc->rowptr[i-baseval]-baseval; + yptr[row] += alpha * valptr[i-baseval] * xptr[col]; + if( col != row ) { - for(jj=0; jj<dofj; jj++, valptr++) - { - yptr[row+jj] += alpha * (*valptr) * xptr[col+ii]; - if( i != j ) - { - yptr[col+ii] += alpha * (*valptr) * xptr[row+jj]; - } - } + yptr[col] += alpha * valptr[i-baseval] * xptr[row]; } } } } + return PASTIX_SUCCESS; } @@ -317,9 +283,7 @@ z_spmHeCSCv( pastix_complex64_t alpha, const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values; const pastix_complex64_t *xptr = x; pastix_complex64_t *yptr = y; - pastix_int_t col, row, i, j, baseval; - pastix_int_t ii, jj, k, dofi, dofj; - pastix_int_t *dofs = csc->dofs; + pastix_int_t col, row, i, baseval; if ( (csc == NULL) || (x == NULL) || (y == NULL ) ) { @@ -333,41 +297,30 @@ z_spmHeCSCv( pastix_complex64_t alpha, /* First, y = beta*y */ if( beta == 0. ) { - memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) ); + memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) ); } else { - for( i=0; i<csc->gNexp; i++, yptr++ ) { + for( i=0; i<csc->gN; i++, yptr++ ) { (*yptr) *= beta; } yptr = y; } baseval = spmFindBase( csc ); - if( alpha != 0.) - { - for( i=0; i < csc->gN; i++ ) + + if( alpha != 0. ) { + for( col=0; col < csc->gN; col++ ) { - dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i]; - col = ( csc->dof > 0 ) ? i : dofs[i]; - for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ ) + for( i=csc->colptr[col]; i < csc->colptr[col+1]; i++ ) { - j = csc->rowptr[k-baseval]-baseval; - dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j]; - row = ( csc->dof > 0 ) ? j : dofs[j]; - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - yptr[row+jj] += alpha * (*valptr) * xptr[col+ii]; - if( i != j ) - { - yptr[col+ii] += alpha * conj( *valptr ) * xptr[row+jj]; - } - } - } + row=csc->rowptr[i-baseval]-baseval; + yptr[row] += alpha * valptr[i-baseval] * xptr[col]; + if( col != row ) + yptr[col] += alpha * conj( valptr[i-baseval] ) * xptr[row]; } } } + return PASTIX_SUCCESS; } #endif diff --git a/z_spm_norm.c b/z_spm_norm.c index 60880281fb44b686916caeab192301c3da756ae4..422ace0b4cb31e7eeb46a987f731ebdb3f340622 100644 --- a/z_spm_norm.c +++ b/z_spm_norm.c @@ -50,7 +50,7 @@ z_spmFrobeniusNorm( const pastix_spm_t *spm ) double sumsq = 0.; if (spm->mtxtype == PastixGeneral) { - for(i=0; i <spm->nnzexp; i++, valptr++) { + for(i=0; i <spm->nnz; i++, valptr++) { frobenius_update( 1, &scale, &sumsq, valptr ); #if defined(PRECISION_z) || defined(PRECISION_c) @@ -62,32 +62,20 @@ z_spmFrobeniusNorm( const pastix_spm_t *spm ) else { pastix_int_t *colptr = spm->colptr; pastix_int_t *rowptr = spm->rowptr; - pastix_int_t *dofs = spm->dofs; - - int nb, dofi, dofj, ii, jj; + int nb; baseval = spmFindBase( spm ); switch( spm->fmttype ){ case PastixCSC: - for(i=0; i<spm->n; i++, colptr++) - { - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - for(j=colptr[0]; j<colptr[1]; j++, rowptr++) - { - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[(*rowptr-baseval)+1] - dofs[(*rowptr-baseval)]; - for(ii=0 ; ii < dofi; ii++) - { - for(jj=0; jj < dofj ;jj++,valptr++) - { - nb = ( i == (*rowptr-baseval) ) ? 1 : 2; - frobenius_update( nb, &scale, &sumsq, valptr ); + for(i=0; i<spm->n; i++, colptr++) { + for(j=colptr[0]; j<colptr[1]; j++, rowptr++, valptr++) { + nb = ( i == (*rowptr-baseval) ) ? 1 : 2; + frobenius_update( nb, &scale, &sumsq, valptr ); #if defined(PRECISION_z) || defined(PRECISION_c) - valptr++; - frobenius_update( nb, &scale, &sumsq, valptr ); + valptr++; + frobenius_update( nb, &scale, &sumsq, valptr ); #endif - } - } } } break; @@ -151,10 +139,11 @@ z_spmMaxNorm( const pastix_spm_t *spm ) pastix_complex64_t *valptr = (pastix_complex64_t*)spm->values; double tmp, norm = 0.; - for(i=0; i < spm->nnzexp; i++, valptr++) { + for(i=0; i <spm->nnz; i++, valptr++) { tmp = cabs( *valptr ); norm = norm > tmp ? norm : tmp; } + return norm; } @@ -182,37 +171,22 @@ z_spmMaxNorm( const pastix_spm_t *spm ) double z_spmInfNorm( const pastix_spm_t *spm ) { - pastix_int_t col, row, i, j, k, ii, jj, baseval; - pastix_int_t dofi, dofj; - pastix_int_t *dofs = spm->dofs; - + pastix_int_t col, row, i, baseval; pastix_complex64_t *valptr = (pastix_complex64_t*)spm->values; double norm = 0.; double *sumcol; - MALLOC_INTERN( sumcol, spm->gNexp, double ); - memset( sumcol, 0, spm->gNexp * sizeof(double) ); + MALLOC_INTERN( sumcol, spm->gN, double ); + memset( sumcol, 0, spm->gN * sizeof(double) ); baseval = spmFindBase( spm ); switch( spm->fmttype ){ case PastixCSC: - for( i=0; i < spm->n; i++) + for( col=0; col < spm->gN; col++ ) { - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++) + for( i=spm->colptr[col]-baseval; i<spm->colptr[col+1]-baseval; i++ ) { - j = spm->rowptr[k - baseval] - baseval; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - row = ( spm->dof > 0 ) ? j : dofs[j]; - for(ii=0; ii < dofi; ii++) - { - for(jj=0; jj < dofj; jj++, valptr++) - { - { - sumcol[row + jj] += cabs( *valptr ); - } - } - } + sumcol[spm->rowptr[i]-baseval] += cabs( valptr[i] ); } } @@ -220,29 +194,11 @@ z_spmInfNorm( const pastix_spm_t *spm ) if ( (spm->mtxtype == PastixHermitian) || (spm->mtxtype == PastixSymmetric) ) { - valptr = (pastix_complex64_t*)spm->values; - for(i=0; i < spm->n; i++) + for( col=0; col < spm->gN; col++ ) { - col = ( spm->dof > 0 ) ? i : dofs[i]; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++) + for( i=spm->colptr[col]-baseval+1; i<spm->colptr[col+1]-baseval; i++ ) { - j = spm->rowptr[k - baseval] - baseval; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - if(i != j) - { - for(ii=0; ii < dofi; ii++) - { - for(jj=0; jj < dofj; jj++, valptr++) - { - sumcol[col + ii] += cabs( *valptr ); - } - } - } - else - { - valptr += dofi * dofj; - } + sumcol[col] += cabs( valptr[i] ); } } } @@ -294,7 +250,7 @@ z_spmInfNorm( const pastix_spm_t *spm ) return PASTIX_ERR_BADPARAMETER; } - for( i=0; i<spm->gNexp; i++) + for( i=0; i<spm->gN; i++) { if(norm < sumcol[i]) { @@ -330,37 +286,22 @@ z_spmInfNorm( const pastix_spm_t *spm ) double z_spmOneNorm( const pastix_spm_t *spm ) { - pastix_int_t col, row, i, j, k, ii, jj, baseval; - pastix_int_t dofi, dofj; - pastix_int_t* dofs = spm->dofs; - + pastix_int_t col, row, i, baseval; pastix_complex64_t *valptr = (pastix_complex64_t*)spm->values; double norm = 0.; double *sumrow; - MALLOC_INTERN( sumrow, spm->gNexp, double ); - memset( sumrow, 0, spm->gNexp * sizeof(double) ); + MALLOC_INTERN( sumrow, spm->gN, double ); + memset( sumrow, 0, spm->gN * sizeof(double) ); baseval = spmFindBase( spm ); switch( spm->fmttype ){ case PastixCSC: - for(i=0; i < spm->n; i++) + for( col=0; col < spm->gN; col++ ) { - col = ( spm->dof > 0 ) ? i : dofs[i]; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++) + for( i=spm->colptr[col]-baseval; i<spm->colptr[col+1]-baseval; i++ ) { - j = spm->rowptr[k - baseval] - baseval; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - for(ii=0; ii < dofi; ii++) - { - for(jj=0; jj < dofj; jj++, valptr++) - { - { - sumrow[col + ii] += cabs( *valptr ); - } - } - } + sumrow[col] += cabs( valptr[i] ); } } @@ -368,29 +309,11 @@ z_spmOneNorm( const pastix_spm_t *spm ) if ( (spm->mtxtype == PastixHermitian) || (spm->mtxtype == PastixSymmetric) ) { - valptr = (pastix_complex64_t*)spm->values; - for(i=0; i < spm->n; i++) + for( col=0; col < spm->gN; col++ ) { - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++) + for( i=spm->colptr[col]-baseval+1; i<spm->colptr[col+1]-baseval; i++ ) { - j = spm->rowptr[k - baseval] - baseval; - row = ( spm->dof > 0 ) ? j : dofs[j]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - if(i != j) - { - for(ii=0; ii < dofi; ii++) - { - for(jj=0; jj < dofj; jj++, valptr++) - { - sumrow[row + jj] += cabs( *valptr ); - } - } - } - else - { - valptr += dofi * dofj; - } + sumrow[spm->rowptr[i]-baseval] += cabs( valptr[i] ); } } } @@ -442,7 +365,7 @@ z_spmOneNorm( const pastix_spm_t *spm ) return PASTIX_ERR_BADPARAMETER; } - for( i=0; i<spm->gNexp; i++) + for( i=0; i<spm->gN; i++) { if(norm < sumrow[i]) {