diff --git a/src/spm.c b/src/spm.c index b418edfaf93975a7f8897a0776afa6e5d571d5eb..2fc61e1cd6346a6d44a3ccebe66470f6a97251ec 100644 --- a/src/spm.c +++ b/src/spm.c @@ -417,10 +417,6 @@ int spmConvert( int ofmttype, spmatrix_t *spm ) { if ( conversionTable[spm->fmttype][ofmttype][spm->flttype] ) { - if ( (spm->dof != 1) && (spm->flttype != SpmPattern) ) { - //fprintf( stderr, "spmConvert: Conversion of non unique dof not yet implemented\n"); - return SPM_ERR_NOTIMPLEMENTED; - } return conversionTable[spm->fmttype][ofmttype][spm->flttype]( spm ); } else { @@ -748,7 +744,12 @@ spmCheckAndCorrect( const spmatrix_t *spm_in, } /* PaStiX works on CSC matrices */ - spmConvert( SpmCSC, newspm ); + if ( spmConvert( SpmCSC, newspm ) != SPM_SUCCESS ) { + spm_print_error( "spmCheckAndCorrect: error during the conversion to CSC format\n" ); + spmExit( newspm ); + free( newspm ); + return 0; + } /* Sort the rowptr for each column */ spmSort( newspm ); diff --git a/src/spm_dof_extend.c b/src/spm_dof_extend.c index 876ee7879123c2839cdcd834b52b349b77aff28b..18eed840759c2e2d8d8998c7f8c88d6687184725 100644 --- a/src/spm_dof_extend.c +++ b/src/spm_dof_extend.c @@ -79,7 +79,7 @@ spmDofExtend( const spmatrix_t *spm, baseval = spmFindBase( spm ); newspm->dof = -1; - newspm->dofs = malloc( (spm->n+1) * sizeof(spm_int_t) ); + newspm->dofs = malloc( (spm->gN+1) * sizeof(spm_int_t) ); dofptr = newspm->dofs; /* @@ -87,10 +87,15 @@ spmDofExtend( const spmatrix_t *spm, * dof[i+1] - dof[i] */ *dofptr = baseval; - for(i=0; i<spm->n; i++, dofptr++) { - dofi = 1 + ( rand() % dof ); - dofptr[1] = dofptr[0] + dofi; + if( spm->clustnum == 0 ) { + for(i=0; i<spm->gN; i++, dofptr++) { + dofi = 1 + ( rand() % dof ); + dofptr[1] = dofptr[0] + dofi; + } } +#if defined(SPM_WITH_MPI) + MPI_Bcast( newspm->dofs, spm->gN+1, SPM_MPI_INT, 0, spm->comm ); +#endif } spmUpdateComputedFields( newspm ); diff --git a/src/spm_gen_fake_values.c b/src/spm_gen_fake_values.c index 825c5e361523666b62fdd96d2a4a2d68bfcba93d..2081c6c90bbe1c7cb1cb91202b105cac89147970 100644 --- a/src/spm_gen_fake_values.c +++ b/src/spm_gen_fake_values.c @@ -12,61 +12,56 @@ * @author Theophile Terraz * @date 2015-01-01 * + * @addtogroup spm_dev_driver + * @{ **/ #include "common.h" /** - ******************************************************************************* - * - * @ingroup spm_dev_driver - * - * @brief Compute the degree of each vertex. - * - ******************************************************************************* + * @brief Compute the local degree of each vertex of an SPM in CSR/CSC format. * * @param[in] spm - * The spm to study. + * The spm to study in CSC/CSR format. * - * @param[inout] degrees - * Array of size spm->n allocated on entry. On exit, contains the - * degree of each vertex in the spm matrix. + * @param[in] baseval + * The baseval of the spm. * - ******************************************************************************* + * @param[inout] degrees + * Array of size spm->gN allocated and set to 0 on entry. On exit, + * contains the degree of each vertex in the spm matrix for the local + * node. * * @return the number of diagonal elements found during the computation. - * - *******************************************************************************/ + */ static inline spm_int_t -spm_compute_degrees( const spmatrix_t *spm, - spm_int_t *degrees ) +spm_compute_degrees_csx( const spmatrix_t *spm, + spm_int_t baseval, + spm_int_t *degrees ) { - spm_int_t i, j, k; - spm_int_t *colptr = spm->colptr; - spm_int_t *rowptr = spm->rowptr; - spm_int_t baseval; + const spm_int_t *colptr = spm->colptr; + const spm_int_t *rowptr = spm->rowptr; + const spm_int_t *loc2glob = spm->loc2glob; + spm_int_t j, k, ig, jg; spm_int_t diagval = 0; - baseval = spmFindBase( spm ); - memset( degrees, 0, spm->n * sizeof(spm_int_t) ); - - switch(spm->fmttype) + /* Swap pointers to call CSC */ + if ( spm->fmttype == SpmCSR ) { - case SpmCSR: - /* Swap pointers to call CSC */ colptr = spm->rowptr; rowptr = spm->colptr; + } - spm_attr_fallthrough; + if ( loc2glob ) { + for(j=0; j<spm->n; j++, colptr++, loc2glob++) { + jg = *loc2glob - baseval; - case SpmCSC: - for(j=0; j<spm->n; j++, colptr++) { for(k=colptr[0]; k<colptr[1]; k++, rowptr++) { - i = *rowptr - baseval; + ig = *rowptr - baseval; - if ( i != j ) { - degrees[j] += 1; + if ( ig != jg ) { + degrees[jg] += 1; if ( spm->mtxtype != SpmGeneral ) { - degrees[i] += 1; + degrees[ig] += 1; } } else { @@ -74,24 +69,70 @@ spm_compute_degrees( const spmatrix_t *spm, } } } - break; - case SpmIJV: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if ( i != j ) { - degrees[j] += 1; + } + else { + for(jg=0; jg<spm->n; jg++, colptr++) { + for(k=colptr[0]; k<colptr[1]; k++, rowptr++) { + ig = *rowptr - baseval; - if ( spm->mtxtype != SpmGeneral ) { - degrees[i] += 1; + if ( ig != jg ) { + degrees[jg] += 1; + if ( spm->mtxtype != SpmGeneral ) { + degrees[ig] += 1; + } + } + else { + diagval++; } } - else { - diagval++; + } + } + + return diagval; +} + +/** + * @brief Compute the degree of each vertex of an IJV matrix. + * + * @param[in] spm + * The spm to study in IJV format. + * + * @param[in] baseval + * The baseval of the spm. + * + * @param[inout] degrees + * Array of size spm->gN allocated and set to 0 on entry. On exit, + * contains the degree of each vertex in the spm matrix for the local + * node. + * + * @return the number of diagonal elements found during the computation. + * + **/ +static inline spm_int_t +spm_compute_degrees_ijv( const spmatrix_t *spm, + spm_int_t baseval, + spm_int_t *degrees ) +{ + const spm_int_t *colptr = spm->colptr; + const spm_int_t *rowptr = spm->rowptr; + spm_int_t k, ig, jg; + spm_int_t diagval = 0; + + for(k=0; k<spm->nnz; k++, rowptr++, colptr++) + { + ig = *rowptr - baseval; + jg = *colptr - baseval; + + if ( ig != jg ) { + degrees[jg] += 1; + + if ( spm->mtxtype != SpmGeneral ) { + degrees[ig] += 1; } } + else { + diagval++; + } } return diagval; @@ -102,112 +143,240 @@ spm_compute_degrees( const spmatrix_t *spm, * * @ingroup spm_dev_driver * - * @brief Insert diagonal elements to the graph to have a full Laplacian - * generated + * @brief Compute the degree of each vertex. * ******************************************************************************* * * @param[in] spm + * The spm to study. + * + * @param[in] baseval + * The baseval of the spm. + * + * @param[inout] degrees + * Array of size spm->n allocated on entry. On exit, contains the + * degree of each vertex in the spm matrix. + * + ******************************************************************************* + * + * @return the number of diagonal elements found during the computation. + * + *******************************************************************************/ +static inline spm_int_t +spm_compute_degrees( const spmatrix_t *spm, + spm_int_t baseval, + spm_int_t *degrees ) +{ + spm_int_t diagval; + + memset( degrees, 0, spm->gN * sizeof(spm_int_t) ); + + if ( spm->fmttype == SpmIJV ) { + diagval = spm_compute_degrees_ijv( spm, baseval, degrees ); + } + else { + diagval = spm_compute_degrees_csx( spm, baseval, degrees ); + } + + /* + * Use the simplest solution here, despite its cost. + * In fact, this function is used only for testing and should not be used + * with very large graph, so it should not be a problem. + */ +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob ) { + MPI_Allreduce( MPI_IN_PLACE, degrees, spm->gN, SPM_MPI_INT, + MPI_SUM, spm->comm ); + } +#else + assert( spm->loc2glob == NULL ); +#endif + + return diagval; +} + +/** + * @brief Insert diagonal elements to the graph of a CSC/CSR matrix to have a + * full Laplacian generated + * + * @param[inout] spm * At start, the initial spm structure with missing diagonal elements. * At exit, contains the same sparse matrix with diagonal elements added. * + * @param[in] baseval + * The baseval of the spm. + * * @param[in] diagval * The number of diagonal elements already present in the matrix. * - *******************************************************************************/ + */ static inline void -spm_add_diag( spmatrix_t *spm, - spm_int_t diagval ) +spm_add_diag_csx( spmatrix_t *spm, + spm_int_t baseval, + spm_int_t diagval ) { - spmatrix_t oldspm; - spm_int_t i, j, k; - spm_int_t *oldcol = spm->colptr; - spm_int_t *oldrow = spm->rowptr; - spm_int_t *newrow, *newcol; - spm_int_t baseval; - - baseval = spmFindBase( spm ); + spmatrix_t oldspm; + spm_int_t ig, j, jg, k, d; + spm_int_t *oldcol; + const spm_int_t *oldrow; + spm_int_t *newrow; + spm_int_t *newcol; + const spm_int_t *loc2glob; - memcpy( &oldspm, spm, sizeof(spmatrix_t)); + memcpy( &oldspm, spm, sizeof(spmatrix_t) ); spm->nnz = oldspm.nnz + (spm->n - diagval); newrow = malloc( spm->nnz * sizeof(spm_int_t) ); - switch(spm->fmttype) + /* Swap pointers to call CSC */ + if ( spm->fmttype == SpmCSC ) + { + oldcol = spm->colptr; + oldrow = spm->rowptr; + newcol = oldcol; + spm->rowptr = newrow; + } + else { - case SpmCSR: - /* Swap pointers to call CSC */ oldcol = spm->rowptr; oldrow = spm->colptr; + newcol = oldcol; spm->colptr = newrow; + } + loc2glob = spm->loc2glob; - spm_attr_fallthrough; + d = 0; /* Number of diagonal element added */ + for(j=0; j<spm->n; j++, newcol++) { + spm_int_t nbelt = newcol[1] - newcol[0]; + int hasdiag = 0; - case SpmCSC: - newcol = oldcol; - if ( spm->fmttype == SpmCSC ) { - spm->rowptr = newrow; - } - diagval = 0; - for(j=0; j<spm->n; j++, newcol++) { - spm_int_t nbelt = newcol[1] - newcol[0]; - int diag = 0; - - memcpy( newrow, oldrow, nbelt * sizeof(spm_int_t) ); - newrow += nbelt; + memcpy( newrow, oldrow, nbelt * sizeof(spm_int_t) ); + newrow += nbelt; - for(k=0; k<nbelt; k++, oldrow++) { - i = *oldrow - baseval; + /* Check if the diagonal element is present or not */ + jg = (loc2glob == NULL) ? j + baseval : loc2glob[j]; + for(k=0; k<nbelt; k++, oldrow++) { + ig = *oldrow; - if ( i == j ) { - diag = 1; - } - } - newcol[0] += diagval; - if ( !diag ) { - *newrow = j + baseval; - newrow++; - diagval++; + if ( ig == jg ) { + hasdiag = 1; } } - newcol[0] += diagval; - if ( spm->fmttype == SpmCSC ) { - free( oldspm.rowptr ); - } - else { - free( oldspm.colptr ); + newcol[0] += d; + if ( !hasdiag ) { + *newrow = jg; + newrow++; + d++; } - assert( diagval == spm->n ); - break; + } + newcol[0] += d; - case SpmIJV: - newcol = malloc( spm->nnz * sizeof(spm_int_t) ); - spm->colptr = newcol; - spm->rowptr = newrow; + if ( spm->fmttype == SpmCSC ) { + free( oldspm.rowptr ); + } + else { + free( oldspm.colptr ); + } + assert( d == spm->n ); +} +/** + * @brief Insert diagonal elements to the graph of an IJV matrix to have a + * full Laplacian generated + * + * @param[inout] spm + * At start, the initial spm structure with missing diagonal elements. + * At exit, contains the same sparse matrix with diagonal elements added. + * + * @param[in] baseval + * The baseval of the spm. + * + * @param[in] diagval + * The number of diagonal elements already present in the matrix. + * + */ +static inline void +spm_add_diag_ijv( spmatrix_t *spm, + spm_int_t baseval, + spm_int_t diagval ) +{ + spmatrix_t oldspm; + spm_int_t k; + const spm_int_t *oldcol = spm->colptr; + const spm_int_t *oldrow = spm->rowptr; + spm_int_t *newrow; + spm_int_t *newcol; + const spm_int_t *loc2glob; + + memcpy( &oldspm, spm, sizeof(spmatrix_t)); + + spm->nnz = oldspm.nnz + (spm->n - diagval); + newrow = malloc( spm->nnz * sizeof(spm_int_t) ); + newcol = malloc( spm->nnz * sizeof(spm_int_t) ); + spm->colptr = newcol; + spm->rowptr = newrow; + loc2glob = spm->loc2glob; + + /* Let's insert all diagonal elements first */ + if ( loc2glob ) { + for(k=0; k<spm->n; k++, newrow++, newcol++, loc2glob++) + { + *newrow = *loc2glob; + *newcol = *loc2glob; + } + } + else { for(k=0; k<spm->n; k++, newrow++, newcol++) { *newrow = k + baseval; *newcol = k + baseval; } + } - for(k=0; k<spm->nnz; k++, oldrow++, oldcol++) - { - i = *oldrow - baseval; - j = *oldcol - baseval; + /* Now let's copy everything else but the diagonal elements */ + for(k=0; k<spm->nnz; k++, oldrow++, oldcol++) + { + if ( *oldrow == *oldcol ) { + continue; + } - if ( i == j ) { - continue; - } + *newrow = *oldrow; + *newcol = *oldcol; + newrow++; + newcol++; + } - *newrow = i + baseval; - *newcol = j + baseval; - newrow++; - newcol++; - } - free( oldspm.colptr ); - free( oldspm.rowptr ); + free( oldspm.colptr ); + free( oldspm.rowptr ); +} + +/** + * @brief Insert diagonal elements to the graph of a matrix to have a full + * Laplacian generated + * + * @param[inout] spm + * At start, the initial spm structure with missing diagonal elements. + * At exit, contains the same sparse matrix with diagonal elements added. + * + * @param[in] baseval + * The baseval of the spm. + * + * @param[in] diagval + * The number of diagonal elements already present in the matrix. + * + */ +static inline void +spm_add_diag( spmatrix_t *spm, + spm_int_t baseval, + spm_int_t diagval ) +{ + assert( diagval < spm->n ); + if ( spm->fmttype == SpmIJV ) { + spm_add_diag_ijv( spm, baseval, diagval ); + } + else { + spm_add_diag_csx( spm, baseval, diagval ); } } @@ -226,22 +395,20 @@ spm_add_diag( spmatrix_t *spm, * The spm structure for which the values array must be generated. * * @param[in] degrees - * Array of size spm->n that containes the degree of each vertex in the + * Array of size spm->n that contains the degree of each vertex in the * spm structure. * *******************************************************************************/ static inline void -spm_generate_fake_values( spmatrix_t *spm, +spm_generate_fake_values( spmatrix_t *spm, + spm_int_t baseval, const spm_int_t *degrees, double alpha, double beta ) { - double *values; - spm_int_t i, j, k; - spm_int_t *colptr = spm->colptr; - spm_int_t *rowptr = spm->rowptr; - spm_int_t baseval; - - baseval = spmFindBase( spm ); + double *values; + spm_int_t ig, j, jg, k; + const spm_int_t *colptr = spm->colptr; + const spm_int_t *rowptr = spm->rowptr; spm->values = malloc( spm->nnzexp * sizeof(double) ); values = spm->values; @@ -257,11 +424,13 @@ spm_generate_fake_values( spmatrix_t *spm, case SpmCSC: for(j=0; j<spm->n; j++, colptr++) { + jg = (spm->loc2glob == NULL) ? j : (spm->loc2glob[j] - baseval); for(k=colptr[0]; k<colptr[1]; k++, rowptr++, values++) { - i = *rowptr - baseval; + ig = *rowptr - baseval; - if ( i == j ) { - *values = alpha * degrees[j]; + /* Diagonal element */ + if ( ig == jg ) { + *values = alpha * degrees[jg]; } else { *values = - beta; @@ -272,11 +441,11 @@ spm_generate_fake_values( spmatrix_t *spm, case SpmIJV: for(k=0; k<spm->nnz; k++, rowptr++, colptr++, values++) { - i = *rowptr - baseval; - j = *colptr - baseval; + ig = *rowptr - baseval; + jg = *colptr - baseval; - if ( i == j ) { - *values = alpha * degrees[j]; + if ( ig == jg ) { + *values = alpha * degrees[jg]; } else { *values = - beta; @@ -309,9 +478,10 @@ spm_generate_fake_values( spmatrix_t *spm, void spmGenFakeValues( spmatrix_t *spm ) { - spm_int_t *degrees, diagval; - double alpha = 10.; - double beta = 1.; + spm_int_t *degrees, diagval, gdiagval; + double alpha = 10.; + double beta = 1.; + spm_int_t baseval; assert( spm->flttype == SpmPattern ); assert( spm->values == NULL ); @@ -346,14 +516,30 @@ spmGenFakeValues( spmatrix_t *spm ) } } - degrees = malloc( spm->n * sizeof(spm_int_t)); - diagval = spm_compute_degrees( spm, degrees ); - if ( diagval != spm->n ) { + baseval = spmFindBase( spm ); + + degrees = malloc( spm->gN * sizeof(spm_int_t)); + diagval = spm_compute_degrees( spm, baseval, degrees ); + +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob ) { + MPI_Allreduce( &diagval, &gdiagval, 1, SPM_MPI_INT, + MPI_SUM, spm->comm ); + } + else +#endif + { + gdiagval = diagval; + } + + if ( gdiagval != spm->gN ) { /* Diagonal elements must be added to the sparse matrix */ - spm_add_diag( spm, diagval ); + if ( spm->n != diagval ) { + spm_add_diag( spm, baseval, diagval ); + } spmUpdateComputedFields( spm ); } - spm_generate_fake_values( spm, degrees, alpha, beta ); + spm_generate_fake_values( spm, baseval, degrees, alpha, beta ); free( degrees ); return; diff --git a/src/z_spm_2dense.c b/src/z_spm_2dense.c index 22c9c2fbec5a457bffbee6d7ff99d73b3692c4af..c4a562f1e4541dd95f7ac935ff74ee4b9d8a319d 100644 --- a/src/z_spm_2dense.c +++ b/src/z_spm_2dense.c @@ -19,6 +19,229 @@ #include "common.h" #include "z_spm.h" +typedef spm_complex64_t (*__conj_fct_t)( spm_complex64_t ); + +static inline spm_complex64_t +__fct_id( spm_complex64_t val ) { + return val; +} + +#if defined(PRECISION_c) || defined(PRECISION_z) +static inline spm_complex64_t +__fct_conj( spm_complex64_t val ) { + return conj( val ); +} +#endif + +static inline void +z_spm_2dense_elt_sym_diag_col( spm_complex64_t *A, + const spm_int_t lda, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(jj=0; jj<dofj; jj++) + { + /* Skip unused upper triangular part */ + for(ii=0; ii<jj; ii++) { + valptr++; + } + + /* Diagonal element */ + A[ lda * (col + jj) + (row + ii) ] = *valptr; + valptr++; + + for(ii=jj+1; ii<dofi; ii++, valptr++) + { + /* Lower part */ + A[ lda * (col + jj) + (row + ii) ] = *valptr; + /* Upper part */ + A[ lda * (row + ii) + (col + jj) ] = conjfct(*valptr); + } + } + (void)conjfct; +} + +static inline void +z_spm_2dense_elt_sym_diag_row( spm_complex64_t *A, + const spm_int_t lda, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<ii; jj++, valptr++) + { + /* Lower part */ + A[ lda * (col + jj) + (row + ii) ] = *valptr; + + /* Upper part */ + A[ lda * (row + ii) + (col + jj) ] = conjfct(*valptr); + } + + /* Diagonal element */ + A[ lda * (col + jj) + (row + ii) ] = *valptr; + valptr++; + + /* Skip unused upper triangular part */ + for(jj=ii+1; jj<dofj; jj++) { + valptr++; + } + } + (void)conjfct; +} + +static inline void +z_spm_2dense_elt_sym_diag( spm_complex64_t *A, + const spm_int_t lda, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + if ( layout == SpmColMajor ) { + z_spm_2dense_elt_sym_diag_col( A, lda, row, dofi, col, dofj, conjfct, valptr ); + } + else { + z_spm_2dense_elt_sym_diag_row( A, lda, row, dofi, col, dofj, conjfct, valptr ); + } + (void)conjfct; +} + +static inline void +z_spm_2dense_elt_gen_col( spm_complex64_t *A, + const spm_int_t lda, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(jj=0; jj<dofj; jj++) + { + for(ii=0; ii<dofi; ii++, valptr++) + { + A[ lda * (col + jj) + (row + ii) ] = conjfct(*valptr); + } + } + (void)conjfct; +} + +static inline void +z_spm_2dense_elt_gen_row( spm_complex64_t *A, + const spm_int_t lda, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<dofj; jj++, valptr++) + { + A[ lda * (col + jj) + (row + ii) ] = conjfct(*valptr); + } + } + (void)conjfct; +} + +static inline void +z_spm_2dense_elt_gen( spm_complex64_t *A, + const spm_int_t lda, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + if ( layout == SpmColMajor ) { + z_spm_2dense_elt_gen_col( A, lda, row, dofi, col, dofj, conjfct, valptr ); + } + else { + z_spm_2dense_elt_gen_row( A, lda, row, dofi, col, dofj, conjfct, valptr ); + } +} + +static inline void +z_spm_2dense_elt_sym_offd( spm_complex64_t *A, + const spm_int_t lda, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + if ( layout == SpmColMajor ) { + z_spm_2dense_elt_gen_col( A, lda, row, dofi, col, dofj, __fct_id, valptr ); + z_spm_2dense_elt_gen_row( A, lda, col, dofj, row, dofi, conjfct, valptr ); + } + else { + z_spm_2dense_elt_gen_row( A, lda, row, dofi, col, dofj, __fct_id, valptr ); + z_spm_2dense_elt_gen_col( A, lda, col, dofj, row, dofi, conjfct, valptr ); + } +} + +static inline void +z_spm_2dense_elt( spm_complex64_t *A, + const spm_int_t lda, + const spm_mtxtype_t mtxtype, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const spm_complex64_t *valptr ) +{ + if ( mtxtype == SpmGeneral ) { + z_spm_2dense_elt_gen( A, lda, layout, row, dofi, col, dofj, __fct_id, valptr ); + } + else { + __conj_fct_t conjfct; + +#if defined(PRECISION_c) || defined(PRECISION_z) + if ( mtxtype == SpmHermitian ) { + conjfct = __fct_conj; + } + else +#endif + { + conjfct = __fct_id; + } + + if ( row == col ) { + z_spm_2dense_elt_sym_diag( A, lda, layout, row, dofi, col, dofj, conjfct, valptr ); + } + else { + z_spm_2dense_elt_sym_offd( A, lda, layout, row, dofi, col, dofj, conjfct, valptr ); + } + } +} + /** ******************************************************************************* * @@ -40,12 +263,18 @@ * @return A dense matrix in Lapack layout format * *******************************************************************************/ -spm_complex64_t * +static inline spm_complex64_t * z_spmCSC2dense( const spmatrix_t *spm ) { - spm_int_t i, j, k, lda, baseval; - spm_complex64_t *A, *valptr; - spm_int_t *colptr, *rowptr; + spm_int_t j, k, lda, baseval; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_int_t *loc2glob; + const spm_complex64_t *valptr; + spm_complex64_t *A; assert( spm->fmttype == SpmCSC ); assert( spm->flttype == SpmComplex64 ); @@ -55,146 +284,43 @@ z_spmCSC2dense( const spmatrix_t *spm ) memset( A, 0, lda * lda * sizeof(spm_complex64_t)); baseval = spmFindBase( spm ); - i = 0; - j = 0; - colptr = spm->colptr; - rowptr = spm->rowptr; - valptr = (spm_complex64_t*)(spm->values); + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + loc2glob = spm->loc2glob; - /** - * Constant degree of fredom of 1 - */ - if ( spm->dof == 1 ) { - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(j=0; j<spm->n; j++, colptr++) - { - for(k=colptr[0]; k<colptr[1]; k++, rowptr++, valptr++) - { - i = (*rowptr-baseval); - if( i == j ) { - /* Make sure the matrix is hermitian */ - A[ j * lda + i ] = creal(*valptr) + I * 0.; - } - else { - A[ j * lda + i ] = *valptr; - A[ i * lda + j ] = conj(*valptr); - } - } - } - break; -#endif - case SpmSymmetric: - for(j=0; j<spm->n; j++, colptr++) - { - for(k=colptr[0]; k<colptr[1]; k++, rowptr++, valptr++) - { - i = (*rowptr-baseval); - A[ j * lda + i ] = *valptr; - A[ i * lda + j ] = *valptr; - } - } - break; - case SpmGeneral: - default: - for(j=0; j<spm->n; j++, colptr++) - { - for(k=colptr[0]; k<colptr[1]; k++, rowptr++, valptr++) - { - i = (*rowptr-baseval); - A[ j * lda + i ] = *valptr; - } - } + for(j=0; j<spm->n; j++, colptr++, loc2glob++) + { + jg = (spm->loc2glob == NULL) ? j : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; } - } - /** - * General degree of freedom (constant or variable) - */ - else { - spm_int_t k, ii, jj, dofi, dofj, col, row; - spm_int_t *dofs = spm->dofs; - - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(j=0; j<spm->n; j++, colptr++) - { - dofj = ( spm->dof > 1 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 1 ) ? (spm->dof * j) : dofs[j] - baseval; - - for(k=colptr[0]; k<colptr[1]; k++, rowptr++) - { - i = (*rowptr - baseval); - dofi = ( spm->dof > 1 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 1 ) ? (spm->dof * i) : dofs[i] - baseval; - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if( col+jj == row+ii ) { - /* Make sure the matrix is hermitian */ - A[ (col + jj) * lda + (row + ii) ] = creal(*valptr) + I * 0.; - } - else { - A[ (col + jj) * lda + (row + ii) ] = *valptr; - A[ (row + ii) * lda + (col + jj) ] = conj(*valptr); - } - } - } - } - } - break; -#endif - case SpmSymmetric: - for(j=0; j<spm->n; j++, colptr++) - { - dofj = ( spm->dof > 1 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 1 ) ? spm->dof * j : dofs[j] - baseval; - - for(k=colptr[0]; k<colptr[1]; k++, rowptr++) - { - i = (*rowptr - baseval); - dofi = ( spm->dof > 1 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 1 ) ? spm->dof * i : dofs[i] - baseval; - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - A[ (col + jj) * lda + (row + ii) ] = *valptr; - A[ (row + ii) * lda + (col + jj) ] = *valptr; - } - } - } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } + + for(k=colptr[0]; k<colptr[1]; k++, rowptr++) + { + ig = (*rowptr - baseval); + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; } - break; - case SpmGeneral: - default: - for(j=0; j<spm->n; j++, colptr++) - { - dofj = ( spm->dof > 1 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 1 ) ? spm->dof * j : dofs[j] - baseval; - - for(k=colptr[0]; k<colptr[1]; k++, rowptr++) - { - i = (*rowptr - baseval); - dofi = ( spm->dof > 1 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 1 ) ? spm->dof * i : dofs[i] - baseval; - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - A[ (col + jj) * lda + (row + ii) ] = *valptr; - } - } - } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; } + + z_spm_2dense_elt( A, lda, spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr ); + valptr += dofi * dofj; } } + return A; } @@ -219,12 +345,18 @@ z_spmCSC2dense( const spmatrix_t *spm ) * @return A dense matrix in Lapack layout format * *******************************************************************************/ -spm_complex64_t * +static inline spm_complex64_t * z_spmCSR2dense( const spmatrix_t *spm ) { - spm_int_t i, j, k, lda, baseval; - spm_complex64_t *A, *valptr; - spm_int_t *colptr, *rowptr; + spm_int_t i, k, lda, baseval; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_int_t *loc2glob; + const spm_complex64_t *valptr; + spm_complex64_t *A; assert( spm->fmttype == SpmCSR ); assert( spm->flttype == SpmComplex64 ); @@ -234,146 +366,43 @@ z_spmCSR2dense( const spmatrix_t *spm ) memset( A, 0, lda * lda * sizeof(spm_complex64_t)); baseval = spmFindBase( spm ); - i = 0; - j = 0; - colptr = spm->colptr; - rowptr = spm->rowptr; - valptr = (spm_complex64_t*)(spm->values); + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + loc2glob = spm->loc2glob; - /** - * Constant degree of fredom of 1 - */ - if ( spm->dof == 1 ) { - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(i=0; i<spm->n; i++, rowptr++) - { - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++, valptr++) - { - j = (*colptr-baseval); - if( i == j ) { - /* Make sure the matrix is hermitian */ - A[ j * lda + i ] = creal(*valptr) + I * 0.; - } - else { - A[ j * lda + i ] = *valptr; - A[ i * lda + j ] = conj(*valptr); - } - } - } - break; -#endif - case SpmSymmetric: - for(i=0; i<spm->n; i++, rowptr++) - { - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++, valptr++) - { - j = (*colptr-baseval); - A[ j * lda + i ] = *valptr; - A[ i * lda + j ] = *valptr; - } - } - break; - case SpmGeneral: - default: - for(i=0; i<spm->n; i++, rowptr++) - { - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++, valptr++) - { - j = (*colptr-baseval); - A[ j * lda + i ] = *valptr; - } - } + for(i=0; i<spm->n; i++, rowptr++, loc2glob++) + { + ig = (spm->loc2glob == NULL) ? i : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; } - } - /** - * General degree of freedom (constant or variable) - */ - else { - spm_int_t k, ii, jj, dofi, dofj, col, row; - spm_int_t *dofs = spm->dofs; - - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(i=0; i<spm->n; i++, rowptr++) - { - dofi = ( spm->dof > 1 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 1 ) ? spm->dof * i : dofs[i] - baseval; - - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) - { - j = (*colptr - baseval); - dofj = ( spm->dof > 1 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 1 ) ? spm->dof * j : dofs[j] - baseval; - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if( col+jj == row+ii ) { - /* Make sure the matrix is hermitian */ - A[ (col + jj) * lda + (row + ii) ] = creal(*valptr) + I * 0.; - } - else { - A[ (col + jj) * lda + (row + ii) ] = *valptr; - A[ (row + ii) * lda + (col + jj) ] = conj(*valptr); - } - } - } - } - } - break; -#endif - case SpmSymmetric: - for(i=0; i<spm->n; i++, rowptr++) - { - dofi = ( spm->dof > 1 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 1 ) ? spm->dof * i : dofs[i] - baseval; - - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) - { - j = (*colptr - baseval); - dofj = ( spm->dof > 1 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 1 ) ? spm->dof * j : dofs[j] - baseval; - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - A[ (col + jj) * lda + (row + ii) ] = *valptr; - A[ (row + ii) * lda + (col + jj) ] = *valptr; - } - } - } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) + { + jg = (*colptr - baseval); + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; } - break; - case SpmGeneral: - default: - for(i=0; i<spm->n; i++, rowptr++) - { - dofi = ( spm->dof > 1 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 1 ) ? spm->dof * i : dofs[i] - baseval; - - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) - { - j = (*colptr - baseval); - dofj = ( spm->dof > 1 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 1 ) ? spm->dof * j : dofs[j] - baseval; - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - A[ (col + jj) * lda + (row + ii) ] = *valptr; - } - } - } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; } + + z_spm_2dense_elt( A, lda, spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr ); + valptr += dofi * dofj; } } + return A; } @@ -398,12 +427,17 @@ z_spmCSR2dense( const spmatrix_t *spm ) * @return A dense matrix in Lapack layout format * *******************************************************************************/ -spm_complex64_t * +static inline spm_complex64_t * z_spmIJV2dense( const spmatrix_t *spm ) { - spm_int_t i, j, k, lda, baseval; - spm_complex64_t *A, *valptr; - spm_int_t *colptr, *rowptr; + spm_int_t k, lda, baseval; + spm_int_t i, dofi, row; + spm_int_t j, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_complex64_t *valptr; + spm_complex64_t *A; assert( spm->fmttype == SpmIJV ); assert( spm->flttype == SpmComplex64 ); @@ -413,162 +447,35 @@ z_spmIJV2dense( const spmatrix_t *spm ) memset( A, 0, lda * lda * sizeof(spm_complex64_t)); baseval = spmFindBase( spm ); - i = 0; - j = 0; colptr = spm->colptr; rowptr = spm->rowptr; valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; - /** - * Constant degree of fredom of 1 - */ - if ( spm->dof == 1 ) { - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++, valptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if( i == j ) { - /* Make sure the matrix is hermitian */ - A[ i * lda + i ] = creal(*valptr) + I * 0.; - } - else { - A[ j * lda + i ] = *valptr; - A[ i * lda + j ] = conj(*valptr); - } - } - break; -#endif - case SpmSymmetric: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++, valptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - A[ j * lda + i ] = *valptr; - A[ i * lda + j ] = *valptr; - } - break; - case SpmGeneral: - default: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++, valptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - A[ j * lda + i ] = *valptr; - } + for(k=0; k<spm->nnz; k++, rowptr++, colptr++) + { + i = *rowptr - baseval; + j = *colptr - baseval; + + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * i; + dofj = spm->dof; + col = spm->dof * j; } - } - /** - * General degree of freedom (constant or variable) - */ - else { - spm_int_t k, ii, jj, dofi, dofj, col, row; - spm_int_t *dofs = spm->dofs; - - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if ( spm->dof > 1 ) { - dofi = spm->dof; - row = spm->dof * i; - dofj = spm->dof; - col = spm->dof * j; - } - else { - dofi = dofs[i+1] - dofs[i]; - row = dofs[i] - baseval; - dofj = dofs[j+1] - dofs[j]; - col = dofs[j] - baseval; - } - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if( col+jj == row+ii ) { - /* Make sure the matrix is hermitian */ - A[ (col+jj) * lda + (row+ii) ] = creal(*valptr) + I * 0.; - } - else { - A[ (col+jj) * lda + (row+ii) ] = *valptr; - A[ (row+ii) * lda + (col+jj) ] = conj(*valptr); - } - } - } - } - break; -#endif - case SpmSymmetric: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if ( spm->dof > 1 ) { - dofi = spm->dof; - row = spm->dof * i; - dofj = spm->dof; - col = spm->dof * j; - } - else { - dofi = dofs[i+1] - dofs[i]; - row = dofs[i] - baseval; - dofj = dofs[j+1] - dofs[j]; - col = dofs[j] - baseval; - } - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - A[ (col+jj) * lda + (row+ii) ] = *valptr; - A[ (row+ii) * lda + (col+jj) ] = *valptr; - } - } - } - break; - - case SpmGeneral: - default: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if ( spm->dof > 1 ) { - dofi = spm->dof; - row = spm->dof * i; - dofj = spm->dof; - col = spm->dof * j; - } - else { - dofi = dofs[i+1] - dofs[i]; - row = dofs[i] - baseval; - dofj = dofs[j+1] - dofs[j]; - col = dofs[j] - baseval; - } - - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - A[ (col+jj) * lda + (row+ii) ] = *valptr; - } - } - } + else { + dofi = dofs[i+1] - dofs[i]; + row = dofs[i] - baseval; + dofj = dofs[j+1] - dofs[j]; + col = dofs[j] - baseval; } + + z_spm_2dense_elt( A, lda, spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr ); + valptr += dofi * dofj; } + return A; } @@ -596,15 +503,26 @@ z_spmIJV2dense( const spmatrix_t *spm ) spm_complex64_t * z_spm2dense( const spmatrix_t *spm ) { + spm_complex64_t *A; + + if ( spm->loc2glob != NULL ) { + fprintf( stderr, "spm2dense: Conversion to dense matrix with distributed spm is not available\n"); + return NULL; + } + switch (spm->fmttype) { case SpmCSC: - return z_spmCSC2dense( spm ); + A = z_spmCSC2dense( spm ); + break; case SpmCSR: - return z_spmCSR2dense( spm ); + A = z_spmCSR2dense( spm ); + break; case SpmIJV: - return z_spmIJV2dense( spm ); + A = z_spmIJV2dense( spm ); + break; } - return NULL; + + return A; } /** diff --git a/src/z_spm_convert_to_csc.c b/src/z_spm_convert_to_csc.c index 0d7946890a1a5a84506490355356fbcd2571b693..9ea0d28eeb401afd4798df3d5af822c289949daf 100644 --- a/src/z_spm_convert_to_csc.c +++ b/src/z_spm_convert_to_csc.c @@ -34,16 +34,22 @@ * ******************************************************************************* * - * @retval SPM_SUCCESS + * @retval SPM_SUCCESS on success + * @retval SPM_ERR_NOTIMPLEMENTED on non supported cases * *******************************************************************************/ int z_spmConvertIJV2CSC( spmatrix_t *spm ) { spm_int_t *newcol, *oldcol; - spm_int_t i, tmp, baseval, total; + spm_int_t k, j, tmp, baseval, total; spmatrix_t oldspm; + if ( (spm->dof != 1) && (spm->flttype != SpmPattern) ) { + //fprintf( stderr, "spmConvert: Conversion of non unique dof not yet implemented\n"); + return SPM_ERR_NOTIMPLEMENTED; + } + /* Backup the input */ memcpy( &oldspm, spm, sizeof(spmatrix_t) ); @@ -57,21 +63,71 @@ z_spmConvertIJV2CSC( spmatrix_t *spm ) */ z_spmSort( spm ); - /* Allocate and compute the new colptr */ - spm->colptr = (spm_int_t *) calloc(spm->n+1,sizeof(spm_int_t)); +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob != NULL ) { + /* + * Check if the distribution is by column or row by exploiting the fact + * that the array is sorted. + * This is not completely safe, but that avoids going through the full + * matrix. + */ + const spm_int_t *glob2loc; + spm_int_t m = spm->rowptr[spm->nnz-1] - spm->rowptr[0] + 1; /* This may be not correct */ + spm_int_t n = spm->colptr[spm->nnz-1] - spm->colptr[0] + 1; + spm_int_t jg; + int distribution = 0; + + if ( m <= spm->n ) { /* By row */ + distribution |= 1; + } + if ( n <= spm->n ) { /* By column */ + distribution |= 2; + } + MPI_Allreduce( MPI_IN_PLACE, &distribution, 1, MPI_INT, + MPI_BAND, spm->comm ); + + if ( !(distribution & 2) ) { + //fprintf( stderr, "spmConvert: Conversion of column distributed matrices to CSC is not yet implemented\n"); + return SPM_ERR_NOTIMPLEMENTED; + } + + /* Allocate and compute the glob2loc array */ + glob2loc = spm_get_glob2loc( spm, baseval ); + + /* Allocate and compute the new colptr */ + spm->colptr = (spm_int_t *) calloc(spm->n+1,sizeof(spm_int_t)); - /* Compute the number of edges per row */ - newcol = spm->colptr - baseval; - oldcol = oldspm.colptr; - for (i=0; i<spm->nnz; i++, oldcol++) + /* Compute the number of edges per row */ + newcol = spm->colptr; + oldcol = oldspm.colptr; + for (k=0; k<spm->nnz; k++, oldcol++) + { + jg = *oldcol - baseval; + j = glob2loc[ jg ]; + assert( j >= 0 ); + newcol[j]++; + } + } + else +#endif { - newcol[ *oldcol ] ++; + /* Allocate and compute the new colptr */ + spm->colptr = (spm_int_t *) calloc(spm->n+1,sizeof(spm_int_t)); + + /* Compute the number of edges per row */ + newcol = spm->colptr; + oldcol = oldspm.colptr; + for (k=0; k<spm->nnz; k++, oldcol++) + { + j = *oldcol - baseval; + assert( j >= 0 ); + newcol[j]++; + } } /* Update the colptr */ - total = baseval; - newcol = spm->colptr; - for (i=0; i<(spm->n+1); i++, newcol++) + total = baseval; + for (j=0; j<(spm->n+1); j++, newcol++) { tmp = *newcol; *newcol = total; @@ -89,148 +145,297 @@ z_spmConvertIJV2CSC( spmatrix_t *spm ) } /** - ******************************************************************************* - * * @ingroup spm_dev_convert * - * @brief convert a matrix in CSR format to a matrix in CSC - * format. + * @brief convert a symmetric matrix in CSR format to a matrix in CSC format. * - * If the matrix is SpmSymmetric or SpmHermitian, then the - * transpose or respectively the conjugate is returned. - * - ******************************************************************************* + * Note that the transposed matrix is returned. * * @param[inout] spm - * The csr matrix at enter, - * the csc matrix at exit. + * The csr matrix on entry, + * the csc matrix on exit. * - ******************************************************************************* - * - * @retval SPM_SUCCESS + * @retval SPM_SUCCESS, if succeeded + * @retval SPM_ERR_NOTIMPLEMENTED, it not yet implemented * - *******************************************************************************/ -int -z_spmConvertCSR2CSC( spmatrix_t *spm ) + */ +static inline int +z_spmConvertCSR2CSC_sym( spmatrix_t *spm ) { - assert( spm->loc2glob == NULL ); + spm_int_t *tmp; + assert( spm->fmttype == SpmCSR ); spm->fmttype = SpmCSC; - switch( spm->mtxtype ) { + /* Just need to swap the pointers */ + tmp = spm->rowptr; + spm->rowptr = spm->colptr; + spm->colptr = tmp; + spm->fmttype = SpmCSC; + + return SPM_SUCCESS; +} + #if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - { - /* Similar to SpmSymmetric case with conjugate of the values */ - spm_complex64_t *valptr = spm->values; - spm_int_t *colptr = spm->colptr; - spm_int_t *rowptr = spm->rowptr; - spm_int_t i, j; - - for(i=0; i<spm->n; i++, rowptr++){ - for(j=rowptr[0]; j<rowptr[1]; j++, colptr++, valptr++) { - if ( *colptr != i ) { - *valptr = conj( *valptr ); +static inline void +z_spmConvert_conj_elt( const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + if ( layout == SpmColMajor ) { + for(jj=0; jj<dofj; jj++) + { + for(ii=0; ii<dofi; ii++, valptr++) + { + if ( (col+jj) == (row + ii) ) { + continue; } + *valptr = conj( *valptr ); } } } - spm_attr_fallthrough; -#endif - case SpmSymmetric: + else { + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<dofj; jj++, valptr++) + { + if ( (col+jj) == (row + ii) ) { + continue; + } + *valptr = conj( *valptr ); + } + } + } +} + +/** + * @ingroup spm_dev_convert + * + * @brief convert an hermitian matrix in CSR format to a matrix in CSC format. + * + * Note that the conjugate transposed matrix is returned. + * + * @param[inout] spm + * The csr matrix on entry, + * the csc matrix on exit. + * + * @retval SPM_SUCCESS, if succeeded + * @retval SPM_ERR_NOTIMPLEMENTED, it not yet implemented + * + */ +static inline int +z_spmConvertCSR2CSC_her( spmatrix_t *spm ) +{ + const spm_int_t *dofs; + const spm_int_t *loc2glob; + spm_complex64_t *valptr = spm->values; + spm_int_t *colptr = spm->colptr; + spm_int_t *rowptr = spm->rowptr; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + spm_int_t i, k; + spm_int_t *tmp; + spm_int_t baseval = spmFindBase( spm ); + + assert( spm->fmttype == SpmCSR ); + + spm->fmttype = SpmCSC; + dofs = spm->dofs; + loc2glob = spm->loc2glob; + + for(i=0; i<spm->n; i++, rowptr++, loc2glob++) { - spm_int_t *tmp; + ig = (spm->loc2glob == NULL) ? i : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } - /* Just need to swap the pointers */ - tmp = spm->rowptr; - spm->rowptr = spm->colptr; - spm->colptr = tmp; - spm->fmttype = SpmCSC; + for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) + { + jg = (*colptr - baseval); + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; + } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } - return SPM_SUCCESS; + z_spmConvert_conj_elt( spm->layout, + row, dofi, col, dofj, valptr ); + valptr += dofi * dofj; + } } - break; - case SpmGeneral: - default: - { - spm_int_t *row_csc; - spm_int_t *col_csc; + /* Just need to swap the pointers */ + tmp = spm->rowptr; + spm->rowptr = spm->colptr; + spm->colptr = tmp; + spm->fmttype = SpmCSC; + + return SPM_SUCCESS; +} +#endif + +/** + * @ingroup spm_dev_convert + * + * @brief convert a general matrix in CSR format to a matrix in CSC format. + * + * @param[inout] spm + * The csr matrix on entry, + * the csc matrix on exit. + * + * @retval SPM_SUCCESS, if succeeded + * @retval SPM_ERR_NOTIMPLEMENTED, it not yet implemented + * + */ +static inline int +z_spmConvertCSR2CSC_gen( spmatrix_t *spm ) +{ + spm_int_t *row_csc; + spm_int_t *col_csc; #if !defined(PRECISION_p) - spm_complex64_t *val_csc; - spm_complex64_t *valptr = (spm_complex64_t*)(spm->values); + spm_complex64_t *val_csc; + spm_complex64_t *valptr = (spm_complex64_t*)(spm->values); +#endif + spm_int_t j, k, col, row, nnz, baseval; + +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob != NULL ) { + return SPM_ERR_NOTIMPLEMENTED; + } + if ( (spm->dof != 1) && (spm->flttype != SpmPattern) ) { + return SPM_ERR_NOTIMPLEMENTED; + } #endif - spm_int_t j, k, col, row, nnz, baseval; + assert( spm->loc2glob == NULL ); + assert( spm->fmttype == SpmCSR ); - baseval = spmFindBase( spm ); - nnz = spm->nnz; + spm->fmttype = SpmCSC; + + baseval = spmFindBase( spm ); + nnz = spm->nnz; - row_csc = malloc(nnz * sizeof(spm_int_t)); - col_csc = calloc(spm->n+1,sizeof(spm_int_t)); + row_csc = malloc(nnz * sizeof(spm_int_t)); + col_csc = calloc(spm->n+1,sizeof(spm_int_t)); - assert( row_csc ); - assert( col_csc ); + assert( row_csc ); + assert( col_csc ); #if !defined(PRECISION_p) - val_csc = malloc(nnz*sizeof(spm_complex64_t)); - assert( val_csc ); + val_csc = malloc(nnz*sizeof(spm_complex64_t)); + assert( val_csc ); #endif - /* Count the number of elements per column */ - for (j=0; j<nnz; j++) { - col = spm->colptr[j] - baseval; - assert(col < spm->n ); - col_csc[ col+1 ] ++; - } + /* Count the number of elements per column */ + for (j=0; j<nnz; j++) { + col = spm->colptr[j] - baseval; + assert(col < spm->n ); + col_csc[ col+1 ] ++; + } - /* Compute the index of each column */ - col_csc[0] = 0; - for (j=0; j<spm->n; j++){ - col_csc[j+1] += col_csc[j]; - } + /* Compute the index of each column */ + col_csc[0] = 0; + for (j=0; j<spm->n; j++){ + col_csc[j+1] += col_csc[j]; + } - assert( (col_csc[spm->gN]) == nnz ); + assert( (col_csc[spm->gN]) == nnz ); - for (row=0; row<spm->n; row++) { - spm_int_t fcol = spm->rowptr[row ] - baseval; - spm_int_t lcol = spm->rowptr[row+1] - baseval; + for (row=0; row<spm->n; row++) { + spm_int_t fcol = spm->rowptr[row ] - baseval; + spm_int_t lcol = spm->rowptr[row+1] - baseval; - for (k=fcol; k<lcol; k++) { - col = spm->colptr[k] - baseval; - j = col_csc[col]; - row_csc[j] = row + baseval; + for (k=fcol; k<lcol; k++) { + col = spm->colptr[k] - baseval; + j = col_csc[col]; + row_csc[j] = row + baseval; #if !defined(PRECISION_p) - val_csc[j] = valptr[k]; + val_csc[j] = valptr[k]; #endif - col_csc[col] ++; - } + col_csc[col] ++; } + } - /* Restore the colptr indexes */ - { - spm_int_t tmp, tmp2; - - tmp = col_csc[0]; - col_csc[0] = baseval; - for (j=0; j<spm->n; j++) { - tmp2 = col_csc[j+1]; - col_csc[j+1] = tmp + baseval; - tmp = tmp2; - } + /* Restore the colptr indexes */ + { + spm_int_t tmp, tmp2; + + tmp = col_csc[0]; + col_csc[0] = baseval; + for (j=0; j<spm->n; j++) { + tmp2 = col_csc[j+1]; + col_csc[j+1] = tmp + baseval; + tmp = tmp2; } + } - spmExit( spm ); - spm->colptr = col_csc; - spm->rowptr = row_csc; + spmExit( spm ); + spm->fmttype = SpmCSC; + spm->colptr = col_csc; + spm->rowptr = row_csc; #if !defined(PRECISION_p) - spm->values = val_csc; + spm->values = val_csc; #else - spm->values = NULL; + spm->values = NULL; #endif - } - } return SPM_SUCCESS; } + +/** + ******************************************************************************* + * + * @ingroup spm_dev_convert + * + * @brief convert a matrix in CSR format to a matrix in CSC + * format. + * + * If the matrix is SpmSymmetric or SpmHermitian, then the + * transpose or respectively the conjugate is returned. + * + ******************************************************************************* + * + * @param[inout] spm + * The csr matrix at enter, + * the csc matrix at exit. + * + ******************************************************************************* + * + * @retval SPM_SUCCESS + * + *******************************************************************************/ +int +z_spmConvertCSR2CSC( spmatrix_t *spm ) +{ + assert( spm->fmttype == SpmCSR ); + + switch( spm->mtxtype ) { + case SpmGeneral: + return z_spmConvertCSR2CSC_gen( spm ); +#if defined(PRECISION_z) || defined(PRECISION_c) + case SpmHermitian: + return z_spmConvertCSR2CSC_her( spm ); +#endif + case SpmSymmetric: + spm_attr_fallthrough; + default: + return z_spmConvertCSR2CSC_sym( spm ); + } + + return SPM_ERR_UNKNOWN; +} diff --git a/src/z_spm_convert_to_csr.c b/src/z_spm_convert_to_csr.c index 1cae234dd77859572e3b21b4bb923b8535e46a6e..1f5f390c5d109bd6ce91217778589d6e89da8b14 100644 --- a/src/z_spm_convert_to_csr.c +++ b/src/z_spm_convert_to_csr.c @@ -18,90 +18,6 @@ #include "common.h" #include "z_spm.h" -/** - ******************************************************************************* - * - * @ingroup spm_dev_convert - * - * @brief convert a matrix in CSC format to a matrix in CSR format. - * - * If the matrix is SpmSymmetric or SpmHermitian, then the - * transpose or respectively the conjugate is returned. - * - ******************************************************************************* - * - * @param[inout] spm - * The csc matrix at enter, - * the csr matrix at exit. - * - ******************************************************************************* - * - * @retval SPM_SUCCESS - * - *******************************************************************************/ -int -z_spmConvertCSC2CSR( spmatrix_t *spm ) -{ - spm_int_t *tmp; - spm_int_t result; - - switch( spm->mtxtype ) { -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - { - /* Similar to SpmSymmetric case with conjugate of the values */ - spm_complex64_t *valptr = spm->values; - spm_int_t *colptr = spm->colptr; - spm_int_t *rowptr = spm->rowptr; - spm_int_t i, j; - - for(j=0; j<spm->n; j++, colptr++){ - for(i=colptr[0]; i<colptr[1]; i++, rowptr++, valptr++) { - if ( *rowptr != j ) { - *valptr = conj( *valptr ); - } - } - } - } - spm_attr_fallthrough; -#endif - case SpmSymmetric: - { - spm_int_t *tmp; - - /* Just need to swap the pointers */ - tmp = spm->rowptr; - spm->rowptr = spm->colptr; - spm->colptr = tmp; - spm->fmttype = SpmCSR; - - return SPM_SUCCESS; - } - break; - - case SpmGeneral: - default: - { - /* Transpose the spm in CSC to trans(spm) in CSR */ - tmp = spm->rowptr; - spm->rowptr = spm->colptr; - spm->colptr = tmp; - spm->fmttype = SpmCSR; - - /* Convert trans(spm) in CSR to trans(spm) in CSC */ - result = z_spmConvertCSR2CSC( spm ); - - /* Transpose trans(spm) in CSC to obtain the spm in CSR */ - tmp = spm->rowptr; - spm->rowptr = spm->colptr; - spm->colptr = tmp; - spm->fmttype = SpmCSR; - } - } - - return result; -} - /** ******************************************************************************* * @@ -118,16 +34,22 @@ z_spmConvertCSC2CSR( spmatrix_t *spm ) * ******************************************************************************* * - * @retval SPM_SUCCESS + * @retval SPM_SUCCESS on success + * @retval SPM_ERR_NOTIMPLEMENTED on non supported cases * *******************************************************************************/ int z_spmConvertIJV2CSR( spmatrix_t *spm ) { spm_int_t *newrow, *oldrow; - spm_int_t i, tmp, baseval, total; + spm_int_t k, i, tmp, baseval, total; spmatrix_t oldspm; + if ( (spm->dof != 1) && (spm->flttype != SpmPattern) ) { + //fprintf( stderr, "spmConvert: Conversion of non unique dof not yet implemented\n"); + return SPM_ERR_NOTIMPLEMENTED; + } + /* Backup the input */ memcpy( &oldspm, spm, sizeof(spmatrix_t) ); @@ -137,7 +59,7 @@ z_spmConvertIJV2CSR( spmatrix_t *spm ) baseval = spmFindBase( spm ); /* - * Sort the IJV structure by column/row indexes + * Sort the IJV structure by row/column indexes */ newrow = spm->colptr; spm->colptr = spm->rowptr; @@ -146,20 +68,70 @@ z_spmConvertIJV2CSR( spmatrix_t *spm ) spm->rowptr = spm->colptr; spm->colptr = newrow; - /* Compute the new rowptr */ - spm->rowptr = (spm_int_t *) calloc(spm->n+1,sizeof(spm_int_t)); +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob != NULL ) { + /* + * Check if the distribution is by column or row by exploiting the fact + * that the array is sorted. + * This is not completely safe, but that avoids going through the full + * matrix. + */ + const spm_int_t *glob2loc; + spm_int_t m = spm->rowptr[spm->nnz-1] - spm->rowptr[0] + 1; /* This may be not correct */ + spm_int_t n = spm->colptr[spm->nnz-1] - spm->colptr[0] + 1; + spm_int_t ig; + int distribution = 0; + + if ( m <= spm->n ) { /* By row */ + distribution |= 1; + } + if ( n <= spm->n ) { /* By column */ + distribution |= 2; + } + MPI_Allreduce( MPI_IN_PLACE, &distribution, 1, MPI_INT, + MPI_BAND, spm->comm ); + + if ( !(distribution & 1) ) { + //fprintf( stderr, "spmConvert: Conversion of column distributed matrices to CSC is not yet implemented\n"); + return SPM_ERR_NOTIMPLEMENTED; + } - /* Compute the number of edges per row */ - newrow = spm->rowptr - baseval; - oldrow = oldspm.rowptr; - for (i=0; i<spm->nnz; i++, oldrow++) + /* Allocate and compute the glob2loc array */ + glob2loc = spm_get_glob2loc( spm, baseval ); + + /* Allocate and compute the new rowptr */ + spm->rowptr = (spm_int_t *) calloc(spm->n+1,sizeof(spm_int_t)); + + /* Compute the number of edges per row */ + newrow = spm->rowptr; + oldrow = oldspm.rowptr; + for (k=0; k<spm->nnz; k++, oldrow++) + { + ig = *oldrow - baseval; + i = glob2loc[ ig ]; + assert( i >= 0 ); + newrow[i]++; + } + } + else +#endif { - newrow[ *oldrow ] ++; + /* Allocate and compute the new colptr */ + spm->rowptr = (spm_int_t *) calloc(spm->n+1,sizeof(spm_int_t)); + + /* Compute the number of edges per row */ + newrow = spm->rowptr; + oldrow = oldspm.rowptr; + for (k=0; k<spm->nnz; k++, oldrow++) + { + i = *oldrow - baseval; + assert( i >= 0 ); + newrow[i]++; + } } /* Update the rowptr */ total = baseval; - newrow = spm->rowptr; for (i=0; i<(spm->n+1); i++, newrow++) { tmp = *newrow; @@ -176,3 +148,48 @@ z_spmConvertIJV2CSR( spmatrix_t *spm ) return SPM_SUCCESS; } + +/** + ******************************************************************************* + * + * @ingroup spm_dev_convert + * + * @brief convert a matrix in CSC format to a matrix in CSR format. + * + * If the matrix is SpmSymmetric or SpmHermitian, then the + * transpose or respectively the conjugate is returned. + * + ******************************************************************************* + * + * @param[inout] spm + * The csc matrix at enter, + * the csr matrix at exit. + * + ******************************************************************************* + * + * @retval SPM_SUCCESS + * + *******************************************************************************/ +int +z_spmConvertCSC2CSR( spmatrix_t *spm ) +{ + spm_int_t *tmp; + spm_int_t result; + + /* Transpose the spm in CSC to trans(spm) in CSR */ + tmp = spm->rowptr; + spm->rowptr = spm->colptr; + spm->colptr = tmp; + spm->fmttype = SpmCSR; + + /* Convert trans(spm) in CSR to trans(spm) in CSC */ + result = z_spmConvertCSR2CSC( spm ); + + /* Transpose trans(spm) in CSC to obtain the spm in CSR */ + tmp = spm->rowptr; + spm->rowptr = spm->colptr; + spm->colptr = tmp; + spm->fmttype = SpmCSR; + + return result; +} diff --git a/src/z_spm_convert_to_ijv.c b/src/z_spm_convert_to_ijv.c index 6404b2083fd191055b52eaa39d9c3400aa48e19a..4c1efb01baddb575165a0e8a54fe5648512839f0 100644 --- a/src/z_spm_convert_to_ijv.c +++ b/src/z_spm_convert_to_ijv.c @@ -22,7 +22,7 @@ * * @ingroup spm_dev_convert * - * @brief convert a matrix in CSC format to a matrix in IJV format. + * @brief Convert a matrix in CSC format to a matrix in IJV format. * ******************************************************************************* * @@ -38,31 +38,50 @@ int z_spmConvertCSC2IJV( spmatrix_t *spm ) { - spm_int_t *col_ijv, *colptr; - spm_int_t i, j, baseval, nnz; + const spm_int_t *colcscptr, *colcsc; + spm_int_t *colijvptr, *colijv; + spm_int_t i, j, nnz; - /* - * Check the baseval - */ - baseval = spmFindBase( spm ); nnz = spm->nnz; spm->fmttype = SpmIJV; - col_ijv = malloc(nnz*sizeof(spm_int_t)); - assert( col_ijv ); + colijvptr = malloc( nnz * sizeof(spm_int_t) ); + colijv = colijvptr; + assert( colijvptr ); - colptr = col_ijv; - for(i=0; i<spm->n; i++) - { - for(j=spm->colptr[i]; j<spm->colptr[i+1]; j++) + colcscptr = spm->colptr; + colcsc = colcscptr; + + if ( spm->loc2glob ) { + const spm_int_t *loc2glob = spm->loc2glob; + spm_int_t ig; + + for(i=0; i<spm->n; i++, colcsc++, loc2glob++) + { + ig = *loc2glob; + for(j=colcsc[0]; j<colcsc[1]; j++) + { + *colijv = ig; + colijv++; + } + } + } + else { + spm_int_t baseval = spmFindBase( spm ); + spm_int_t n = spm->n + baseval; + + for(i=baseval; i<n; i++, colcsc++) { - *colptr = i+baseval; - colptr++; + for(j=colcsc[0]; j<colcsc[1]; j++) + { + *colijv = i; + colijv++; + } } } - free(spm->colptr); - spm->colptr = col_ijv; + free( (spm_int_t*)colcscptr ); + spm->colptr = colijvptr; return SPM_SUCCESS; } @@ -88,31 +107,50 @@ z_spmConvertCSC2IJV( spmatrix_t *spm ) int z_spmConvertCSR2IJV( spmatrix_t *spm ) { - spm_int_t *row_ijv, *rowptr; - spm_int_t i, j, baseval, nnz; + const spm_int_t *rowcscptr, *rowcsc; + spm_int_t *rowijvptr, *rowijv; + spm_int_t i, j, nnz; - /* - * Check the baseval - */ - baseval = spmFindBase( spm ); nnz = spm->nnz; spm->fmttype = SpmIJV; - row_ijv = malloc(nnz*sizeof(spm_int_t)); - assert( row_ijv ); + rowijvptr = malloc( nnz * sizeof(spm_int_t) ); + rowijv = rowijvptr; + assert( rowijvptr ); + + rowcscptr = spm->rowptr; + rowcsc = rowcscptr; + + if ( spm->loc2glob ) { + const spm_int_t *loc2glob = spm->loc2glob; + spm_int_t jg; + + for(j=0; j<spm->n; j++, rowcsc++, loc2glob++) + { + jg = *loc2glob; + for(i=rowcsc[0]; i<rowcsc[1]; i++) + { + *rowijv = jg; + rowijv++; + } + } + } + else { + spm_int_t baseval = spmFindBase( spm ); + spm_int_t n = spm->n + baseval; - rowptr = row_ijv; - for(i=0; i<spm->n; i++) - { - for(j=spm->rowptr[i]; j<spm->rowptr[i+1]; j++) + for(j=baseval; j<n; j++, rowcsc++) { - *rowptr = i+baseval; - rowptr++; + for(i=rowcsc[0]; i<rowcsc[1]; i++) + { + *rowijv = j; + rowijv++; + } } } - free(spm->rowptr); - spm->rowptr = row_ijv; + free( (spm_int_t*)rowcscptr ); + spm->rowptr = rowijvptr; return SPM_SUCCESS; } diff --git a/src/z_spm_dof_extend.c b/src/z_spm_dof_extend.c index 711187dfb53b410f427ff0cd9860b46a6f69791f..8e7320fa3dc468b4cb0e82ba692318d6b4959c9d 100644 --- a/src/z_spm_dof_extend.c +++ b/src/z_spm_dof_extend.c @@ -33,7 +33,7 @@ void z_spmDofExtend( spmatrix_t *spm ) { - spm_int_t i, j, k, ii, jj, dofi, dofj, baseval; + spm_int_t j, ig, jg, k, ii, jj, dofi, dofj, baseval; spm_int_t *colptr, *rowptr, *dofs; spm_complex64_t *newval, *oldval, *oldvalptr; @@ -60,21 +60,22 @@ z_spmDofExtend( spmatrix_t *spm ) */ for(j=0; j<spm->n; j++, colptr++) { - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; + jg = ( spm->loc2glob == NULL ) ? j : spm->loc2glob[j] - baseval; + dofj = ( spm->dof > 0 ) ? spm->dof : dofs[jg+1] - dofs[jg]; /** * Loop on rows */ for(k=colptr[0]; k<colptr[1]; k++, rowptr++, oldval++) { - i = *rowptr - baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; + ig = *rowptr - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : dofs[ig+1] - dofs[ig]; for(jj=0; jj<dofj; jj++) { for(ii=0; ii<dofi; ii++, newval++) { - if ( i == j ) { + if ( ig == jg ) { *newval = *oldval / (labs((long)(ii - jj)) + 1.); } else { @@ -91,16 +92,16 @@ z_spmDofExtend( spmatrix_t *spm ) */ for(k=0; k<spm->nnz; k++, rowptr++, colptr++, oldval++) { - i = *rowptr - baseval; - j = *colptr - baseval; - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; + ig = *rowptr - baseval; + jg = *colptr - baseval; + dofi = ( spm->dof > 0 ) ? spm->dof : dofs[ig+1] - dofs[ig]; + dofj = ( spm->dof > 0 ) ? spm->dof : dofs[jg+1] - dofs[jg]; for(jj=0; jj<dofj; jj++) { for(ii=0; ii<dofi; ii++, newval++) { - if ( i == j ) { + if ( ig == jg ) { *newval = *oldval / (labs((long)(ii - jj)) + 1.); } else { diff --git a/src/z_spm_expand.c b/src/z_spm_expand.c index 4fc8e52cba29733a1e930cd3064cc216f235e2d4..00a4cdc395b835eadfd4c0c1b378221df985e85d 100644 --- a/src/z_spm_expand.c +++ b/src/z_spm_expand.c @@ -17,6 +17,44 @@ #include "common.h" #include "z_spm.h" +static inline void +spm_expand_loc2glob( const spmatrix_t *spm_in, spmatrix_t *spm_out ) +{ + spm_int_t i, j, ig, baseval, ndof; + + spm_int_t *l2g_in = spm_in->loc2glob; + spm_int_t *l2g_out = spm_out->loc2glob; + + baseval = spmFindBase( spm_in ); + + /* Constant dof */ + if ( spm_in->dof > 0 ) { + ndof = spm_in->dof; + for(i=0; i<spm_in->n; i++, l2g_in++) + { + ig = *l2g_in - baseval; + for(j=0; i<ndof; i++, l2g_out++) + { + *l2g_out = ig * ndof + j + baseval; + } + } + } + /* Variadic dof */ + else { + spm_int_t *dofs = spm_in->dofs; + for(i=0; i<spm_in->n; i++, l2g_in++) + { + ig = *l2g_in - baseval; + ndof = dofs[ig+1] - dofs[ig]; + for(j=0; i<ndof; i++, l2g_out++) + { + *l2g_out = dofs[ig] + j; + } + } + } + assert( (l2g_out - spm_out->loc2glob) == spm_out->n ); +} + /** ******************************************************************************* * @@ -42,7 +80,8 @@ static void z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) { - spm_int_t i, j, k, ii, jj, dofi, dofj, col, row, baseval, lda; + spm_int_t j, k, ii, jj, ig, jg; + spm_int_t dofi, dofj, col, row, baseval, lda; spm_int_t diag, height; spm_int_t *newcol, *newrow, *oldcol, *oldrow, *dofs; #if !defined(PRECISION_p) @@ -69,17 +108,18 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) for(j=0; j<spm_in->n; j++, oldcol++) { diag = 0; - dofj = (spm_in->dof > 0 ) ? spm_in->dof : dofs[j+1] - dofs[j]; + jg = (spm_in->loc2glob == NULL) ? j : spm_in->loc2glob[j] - baseval; + dofj = (spm_in->dof > 0 ) ? spm_in->dof : dofs[jg+1] - dofs[jg]; /* Sum the heights of the elements in the column */ newcol[1] = newcol[0]; for(k=oldcol[0]; k<oldcol[1]; k++) { - i = oldrow[k-baseval] - baseval; - dofi = (spm_in->dof > 0 ) ? spm_in->dof : dofs[i+1] - dofs[i]; + ig = oldrow[k-baseval] - baseval; + dofi = (spm_in->dof > 0 ) ? spm_in->dof : dofs[ig+1] - dofs[ig]; newcol[1] += dofi; - diag = (diag || (i == j)); + diag = (diag || (ig == jg)); } diag = (diag & (spm_in->mtxtype != SpmGeneral)); @@ -117,16 +157,17 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) * Backup current position in oldval because we will pick * interleaved data inside the buffer */ - lda = newcol[1] - newcol[0]; + lda = newcol[1] - newcol[0]; oldval2 = oldval; + jg = (spm_in->loc2glob == NULL) ? j : spm_in->loc2glob[j] - baseval; if ( spm_in->dof > 0 ) { dofj = spm_in->dof; assert( col == spm_in->dof * j ); } else { - dofj = dofs[j+1] - dofs[j]; - assert( col == (dofs[j] - baseval) ); + dofj = dofs[jg+1] - dofs[jg]; + assert( col == (dofs[jg] - baseval) ); } for(jj=0; jj<dofj; jj++, col++, newcol++) @@ -139,15 +180,15 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) for(k=oldcol[0]; k<oldcol[1]; k++) { - i = oldrow[k-baseval] - baseval; + ig = oldrow[k-baseval] - baseval; if ( spm_in->dof > 0 ) { dofi = spm_in->dof; - row = spm_in->dof * i; + row = spm_in->dof * ig; } else { - dofi = dofs[i+1] - dofs[i]; - row = dofs[i] - baseval; + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; } /* Move to the top of the jj column in the current element */ @@ -156,8 +197,8 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) for(ii=0; ii<dofi; ii++, row++) { if ( (spm_in->mtxtype == SpmGeneral) || - (i != j) || - ((i == j) && (row >= col)) ) + (ig != jg) || + ((ig == jg) && (row >= col)) ) { (*newrow) = row + baseval; newrow++; @@ -203,7 +244,8 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) static void z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) { - spm_int_t i, j, k, ii, jj, dofi, dofj, col, row, baseval, lda; + spm_int_t i, k, ii, jj, ig, jg; + spm_int_t dofi, dofj, col, row, baseval, lda; spm_int_t diag, height; spm_int_t *newcol, *newrow, *oldcol, *oldrow, *dofs; #if !defined(PRECISION_p) @@ -230,17 +272,18 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) for(i=0; i<spm_in->n; i++, oldrow++) { diag = 0; - dofi = (spm_in->dof > 0 ) ? spm_in->dof : dofs[i+1] - dofs[i]; + ig = (spm_in->loc2glob == NULL) ? i : spm_in->loc2glob[i] - baseval; + dofi = (spm_in->dof > 0 ) ? spm_in->dof : dofs[ig+1] - dofs[ig]; /* Sum the width of the elements in the row */ newrow[1] = newrow[0]; for(k=oldrow[0]; k<oldrow[1]; k++) { - j = oldcol[k-baseval] - baseval; - dofj = (spm_in->dof > 0 ) ? spm_in->dof : dofs[j+1] - dofs[j]; + jg = oldcol[k-baseval] - baseval; + dofj = (spm_in->dof > 0 ) ? spm_in->dof : dofs[jg+1] - dofs[jg]; newrow[1] += dofj; - diag = (diag || (i == j)); + diag = (diag || (ig == jg)); } diag = (diag & (spm_in->mtxtype != SpmGeneral)); @@ -278,16 +321,17 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) * Backup current position in oldval because we will pick * interleaved data inside the buffer */ - lda = newrow[1] - newrow[0]; + lda = newrow[1] - newrow[0]; oldval2 = oldval; + ig = (spm_in->loc2glob == NULL) ? i : spm_in->loc2glob[i] - baseval; if ( spm_in->dof > 0 ) { dofi = spm_in->dof; assert( row == spm_in->dof * i ); } else { - dofi = dofs[i+1] - dofs[i]; - assert( row == dofs[i] - baseval ); + dofi = dofs[ig+1] - dofs[ig]; + assert( row == dofs[ig] - baseval ); } for(ii=0; ii<dofi; ii++, row++, newrow++) @@ -300,22 +344,22 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) for(k=oldrow[0]; k<oldrow[1]; k++) { - j = oldcol[k-baseval] - baseval; + jg = oldcol[k-baseval] - baseval; if ( spm_in->dof > 0 ) { dofj = spm_in->dof; - col = spm_in->dof * j; + col = spm_in->dof * jg; } else { - dofj = dofs[j+1] - dofs[j]; - col = dofs[j] - baseval; + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; } for(jj=0; jj<dofj; jj++, col++) { if ( (spm_in->mtxtype == SpmGeneral) || - (i != j) || - ((i == j) && (row <= col)) ) + (ig != jg) || + ((ig == jg) && (row <= col)) ) { (*newcol) = col + baseval; newcol++; @@ -522,7 +566,6 @@ z_spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) { assert( spm_in != NULL ); assert( spm_out != NULL ); - assert( spm_in->loc2glob == NULL ); assert( spm_in->flttype == SpmComplex64 ); if ( spm_in->dof == 1 ) { @@ -543,6 +586,11 @@ z_spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) memcpy( spm_out, spm_in, sizeof(spmatrix_t) ); spm_out->n = spm_in->nexp; + if ( spm_in->loc2glob ) { + spm_out->loc2glob = malloc( spm_out->n * sizeof(spm_int_t) ); + spm_expand_loc2glob( spm_in, spm_out ); + } + switch (spm_in->fmttype) { case SpmCSC: z_spmCSCExpand( spm_in, spm_out ); @@ -555,9 +603,10 @@ z_spmExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) break; } - spm_out->dof = 1; - spm_out->dofs = NULL; - spm_out->layout = SpmColMajor; + spm_out->dof = 1; + spm_out->layout = SpmColMajor; + spm_out->dofs = NULL; + spm_out->glob2loc = NULL; spmUpdateComputedFields( spm_out ); diff --git a/src/z_spm_print.c b/src/z_spm_print.c index 55b1f51f0e3c9375d7e5d706be52494743ebc63f..8cfd51d803dc4c72fe196d01b89289a26d3b8ef9 100644 --- a/src/z_spm_print.c +++ b/src/z_spm_print.c @@ -19,6 +19,220 @@ #include "common.h" #include "z_spm.h" +typedef spm_complex64_t (*__conj_fct_t)( spm_complex64_t ); + +static inline spm_complex64_t +__fct_id( spm_complex64_t val ) { + return val; +} + +#if defined(PRECISION_c) || defined(PRECISION_z) +static inline spm_complex64_t +__fct_conj( spm_complex64_t val ) { + return conj( val ); +} +#endif + +static inline void +z_spm_print_elt_sym_diag_col( FILE *f, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(jj=0; jj<dofj; jj++) + { + /* Skip unused upper triangular part */ + for(ii=0; ii<jj; ii++) { + valptr++; + } + + /* Diagonal element */ + z_spmPrintElt( f, row + ii, col + jj, *valptr ); + valptr++; + + for(ii=jj+1; ii<dofi; ii++, valptr++) + { + /* Lower part */ + z_spmPrintElt( f, row + ii, col + jj, *valptr ); + /* Upper part */ + z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); + } + } + (void)conjfct; +} + +static inline void +z_spm_print_elt_sym_diag_row( FILE *f, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<ii; jj++, valptr++) + { + /* Lower part */ + z_spmPrintElt( f, row + ii, col + jj, *valptr ); + /* Upper part */ + z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); + } + + /* Diagonal element */ + z_spmPrintElt( f, row + ii, col + jj, *valptr ); + valptr++; + + /* Skip unused upper triangular part */ + for(jj=ii+1; jj<dofj; jj++) { + valptr++; + } + } + (void)conjfct; +} + +static inline void +z_spm_print_elt_sym_diag( FILE *f, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + if ( layout == SpmColMajor ) { + z_spm_print_elt_sym_diag_col( f, row, dofi, col, dofj, conjfct, valptr ); + } + else { + z_spm_print_elt_sym_diag_row( f, row, dofi, col, dofj, conjfct, valptr ); + } + (void)conjfct; +} + +static inline void +z_spm_print_elt_gen_col( FILE *f, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(jj=0; jj<dofj; jj++) + { + for(ii=0; ii<dofi; ii++, valptr++) + { + z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); + } + } + (void)conjfct; +} + +static inline void +z_spm_print_elt_gen_row( FILE *f, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + spm_int_t ii, jj; + + for(ii=0; ii<dofi; ii++) + { + for(jj=0; jj<dofj; jj++, valptr++) + { + z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); + } + } + (void)conjfct; +} + +static inline void +z_spm_print_elt_gen( FILE *f, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + if ( layout == SpmColMajor ) { + z_spm_print_elt_gen_col( f, row, dofi, col, dofj, conjfct, valptr ); + } + else { + z_spm_print_elt_gen_row( f, row, dofi, col, dofj, conjfct, valptr ); + } +} + +static inline void +z_spm_print_elt_sym_offd( FILE *f, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const __conj_fct_t conjfct, + const spm_complex64_t *valptr ) +{ + if ( layout == SpmColMajor ) { + z_spm_print_elt_gen_col( f, row, dofi, col, dofj, __fct_id, valptr ); + z_spm_print_elt_gen_row( f, col, dofj, row, dofi, conjfct, valptr ); + } + else { + z_spm_print_elt_gen_row( f, row, dofi, col, dofj, __fct_id, valptr ); + z_spm_print_elt_gen_col( f, col, dofj, row, dofi, conjfct, valptr ); + } +} + +static inline void +z_spm_print_elt( FILE *f, + const spm_mtxtype_t mtxtype, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const spm_complex64_t *valptr ) +{ + if ( mtxtype == SpmGeneral ) { + z_spm_print_elt_gen( f, layout, row, dofi, col, dofj, __fct_id, valptr ); + } + else { + __conj_fct_t conjfct; + +#if defined(PRECISION_c) || defined(PRECISION_z) + if ( mtxtype == SpmHermitian ) { + conjfct = __fct_conj; + } + else +#endif + { + conjfct = __fct_id; + } + + if ( row == col ) { + z_spm_print_elt_sym_diag( f, layout, row, dofi, col, dofj, conjfct, valptr ); + } + else { + z_spm_print_elt_sym_offd( f, layout, row, dofi, col, dofj, conjfct, valptr ); + } + } +} + /** ******************************************************************************* * @@ -28,7 +242,7 @@ * ******************************************************************************* * - * @param[in] f + * @param[inout] f * Output file * * @param[in] spm @@ -36,170 +250,53 @@ * *******************************************************************************/ void -z_spmCSCPrint( FILE *f, const spmatrix_t *spm ) +z_spmCSCPrint( FILE *f, + const spmatrix_t *spm ) { - spm_int_t i, j, baseval; - spm_int_t k, ii, jj, dofi, dofj, col, row; - spm_complex64_t *valptr; - spm_int_t *colptr, *rowptr, *dofs; + spm_int_t j, k, baseval; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + const spm_int_t *colptr, *rowptr, *dofs, *loc2glob; + const spm_complex64_t *valptr; assert( spm->fmttype == SpmCSC ); assert( spm->flttype == SpmComplex64 ); baseval = spmFindBase( spm ); - i = 0; - j = 0; - - colptr = spm->colptr; - rowptr = spm->rowptr; - valptr = (spm_complex64_t*)(spm->values); - dofs = spm->dofs; - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(j=0; j<spm->n; j++, colptr++) - { - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 0 ) ? spm->dof * j : dofs[j] - baseval; - - for(k=colptr[0]; k<colptr[1]; k++, rowptr++) - { - i = (*rowptr - baseval); - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 0 ) ? spm->dof * i : dofs[i] - baseval; - - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - } - } + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + loc2glob = spm->loc2glob; + + for(j=0; j<spm->n; j++, colptr++, loc2glob++) + { + jg = (spm->loc2glob == NULL) ? j : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; } - break; -#endif - case SpmSymmetric: - for(j=0; j<spm->n; j++, colptr++) - { - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 0 ) ? spm->dof * j : dofs[j] - baseval; - - for(k=colptr[0]; k<colptr[1]; k++, rowptr++) - { - i = (*rowptr - baseval); - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 0 ) ? spm->dof * i : dofs[i] - baseval; - - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - } - } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; } - break; - case SpmGeneral: - default: - for(j=0; j<spm->n; j++, colptr++) + + for(k=colptr[0]; k<colptr[1]; k++, rowptr++) { - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 0 ) ? spm->dof * j : dofs[j] - baseval; - - for(k=colptr[0]; k<colptr[1]; k++, rowptr++) - { - i = (*rowptr - baseval); - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 0 ) ? spm->dof * i : dofs[i] - baseval; - - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - } - } - } + ig = (*rowptr - baseval); + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; } + + z_spm_print_elt( f, spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr ); + valptr += dofi * dofj; } } return; @@ -224,170 +321,56 @@ z_spmCSCPrint( FILE *f, const spmatrix_t *spm ) void z_spmCSRPrint( FILE *f, const spmatrix_t *spm ) { - spm_int_t i, j, baseval; - spm_int_t k, ii, jj, dofi, dofj, col, row; - spm_complex64_t *valptr; - spm_int_t *colptr, *rowptr, *dofs; + spm_int_t i, k, baseval; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_int_t *loc2glob; + const spm_complex64_t *valptr; assert( spm->fmttype == SpmCSR ); assert( spm->flttype == SpmComplex64 ); baseval = spmFindBase( spm ); - i = 0; - j = 0; - - colptr = spm->colptr; - rowptr = spm->rowptr; - valptr = (spm_complex64_t*)(spm->values); - dofs = spm->dofs; - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(i=0; i<spm->n; i++, rowptr++) - { - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 0 ) ? spm->dof * i : dofs[i] - baseval; - - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) - { - j = (*colptr - baseval); - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 0 ) ? spm->dof * j : dofs[j] - baseval; - - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - } - } + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + loc2glob = spm->loc2glob; + + for(i=0; i<spm->n; i++, rowptr++, loc2glob++) + { + ig = (spm->loc2glob == NULL) ? i : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; } - break; -#endif - case SpmSymmetric: - for(i=0; i<spm->n; i++, rowptr++) - { - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 0 ) ? spm->dof * i : dofs[i] - baseval; - - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) - { - j = (*colptr - baseval); - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 0 ) ? spm->dof * j : dofs[j] - baseval; - - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - } - } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; } - break; - case SpmGeneral: - default: - for(i=0; i<spm->n; i++, rowptr++) + + for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) { - dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i]; - row = ( spm->dof > 0 ) ? spm->dof * i : dofs[i] - baseval; - - for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) - { - j = (*colptr - baseval); - dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j]; - col = ( spm->dof > 0 ) ? spm->dof * j : dofs[j] - baseval; - - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - } - } - } + jg = (*colptr - baseval); + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; + } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; } + + z_spm_print_elt( f, spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr ); + valptr += dofi * dofj; } } + return; } @@ -410,187 +393,45 @@ z_spmCSRPrint( FILE *f, const spmatrix_t *spm ) void z_spmIJVPrint( FILE *f, const spmatrix_t *spm ) { - spm_int_t i, j, baseval; - spm_int_t k, ii, jj, dofi, dofj, col, row; - spm_complex64_t *valptr; - spm_int_t *colptr, *rowptr, *dofs; + spm_int_t k, baseval; + spm_int_t i, dofi, row; + spm_int_t j, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_complex64_t *valptr; assert( spm->fmttype == SpmIJV ); assert( spm->flttype == SpmComplex64 ); baseval = spmFindBase( spm ); - i = 0; - j = 0; colptr = spm->colptr; rowptr = spm->rowptr; valptr = (spm_complex64_t*)(spm->values); dofs = spm->dofs; - switch( spm->mtxtype ){ -#if defined(PRECISION_z) || defined(PRECISION_c) - case SpmHermitian: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if ( spm->dof > 0 ) { - dofi = spm->dof; - row = spm->dof * i; - dofj = spm->dof; - col = spm->dof * j; - } - else { - dofi = dofs[i+1] - dofs[i]; - row = dofs[i] - baseval; - dofj = dofs[j+1] - dofs[j]; - col = dofs[j] - baseval; - } + for(k=0; k<spm->nnz; k++, rowptr++, colptr++) + { + i = *rowptr - baseval; + j = *colptr - baseval; - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, conj(*valptr) ); - } - } - } - } + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * i; + dofj = spm->dof; + col = spm->dof * j; } - break; -#endif - case SpmSymmetric: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if ( spm->dof > 0 ) { - dofi = spm->dof; - row = spm->dof * i; - dofj = spm->dof; - col = spm->dof * j; - } - else { - dofi = dofs[i+1] - dofs[i]; - row = dofs[i] - baseval; - dofj = dofs[j+1] - dofs[j]; - col = dofs[j] - baseval; - } - - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - if ( row == col ) { - if (row+ii >= col+jj) { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - if (row+ii > col+jj) { - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - else { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - z_spmPrintElt( f, col + jj, row + ii, *valptr ); - } - } - } - } + else { + dofi = dofs[i+1] - dofs[i]; + row = dofs[i] - baseval; + dofj = dofs[j+1] - dofs[j]; + col = dofs[j] - baseval; } - break; - case SpmGeneral: - default: - for(k=0; k<spm->nnz; k++, rowptr++, colptr++) - { - i = *rowptr - baseval; - j = *colptr - baseval; - - if ( spm->dof > 0 ) { - dofi = spm->dof; - row = spm->dof * i; - dofj = spm->dof; - col = spm->dof * j; - } - else { - dofi = dofs[i+1] - dofs[i]; - row = dofs[i] - baseval; - dofj = dofs[j+1] - dofs[j]; - col = dofs[j] - baseval; - } - if ( spm->layout == SpmColMajor ) { - for(jj=0; jj<dofj; jj++) - { - for(ii=0; ii<dofi; ii++, valptr++) - { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - } - } - } - else { - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<dofj; jj++, valptr++) - { - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - } - } - } - } + z_spm_print_elt( f, spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr ); + valptr += dofi * dofj; } return; }