diff --git a/src/spm.c b/src/spm.c index 03328a0f3a8028d15fadb566005a85f1bf321aa8..7db59d5915745d4a8c42c0b81aaa3ca582710cf2 100644 --- a/src/spm.c +++ b/src/spm.c @@ -11,7 +11,8 @@ * @author Xavier Lacoste * @author Pierre Ramet * @author Mathieu Faverge - * @date 2013-06-24 + * @author Tony Delarue + * @date 2020-05-19 * * @addtogroup spm * @{ @@ -505,33 +506,27 @@ double spmNorm( spm_normtype_t ntype, const spmatrix_t *spm ) { - spmatrix_t *spmtmp = (spmatrix_t*)spm; double norm = -1.; if ( spm->flttype == SpmPattern ) { - return SPM_ERR_BADPARAMETER; + return norm; } - if ( spm->dof != 1 ) { - fprintf(stderr, "WARNING: spm expanded due to non implemented norm for non-expanded spm\n"); - spmtmp = malloc( sizeof(spmatrix_t) ); - spmExpand( spm, spmtmp ); - } switch (spm->flttype) { case SpmFloat: - norm = (double)s_spmNorm( ntype, spmtmp ); + norm = (double)s_spmNorm( ntype, spm ); break; case SpmDouble: - norm = d_spmNorm( ntype, spmtmp ); + norm = d_spmNorm( ntype, spm ); break; case SpmComplex32: - norm = (double)c_spmNorm( ntype, spmtmp ); + norm = (double)c_spmNorm( ntype, spm ); break; case SpmComplex64: - norm = z_spmNorm( ntype, spmtmp ); + norm = z_spmNorm( ntype, spm ); break; case SpmPattern: @@ -539,10 +534,6 @@ spmNorm( spm_normtype_t ntype, ; } - if ( spmtmp != spm ) { - spmExit( spmtmp ); - free( spmtmp ); - } return norm; } diff --git a/src/z_spm_norm.c b/src/z_spm_norm.c index 29e9ccfd5e7531539a0fd014779d079fd18c3151..9c1035144270dc3548360cb3a186895321bffe10 100644 --- a/src/z_spm_norm.c +++ b/src/z_spm_norm.c @@ -11,7 +11,9 @@ * @author Pierre Ramet * @author Xavier Lacoste * @author Theophile Terraz - * @date 2015-06-01 + * @author Tony Delarue + * @date 2020-05-19 + * * @precisions normal z -> c d s * * @addtogroup spm_dev_norm @@ -22,11 +24,295 @@ #include "z_spm.h" #include "frobeniusupdate.h" +#if defined(SPM_WITH_MPI) +void +z_spm_frobenius_merge( double *dist, + double *loc, + int *len, + MPI_Datatype *dtype ) +{ + assert( *len == 2 ); + frobenius_merge( dist[0], dist[1], loc, loc+1 ); + (void)len; + (void)dtype; +} +#endif + +/** + * @brief Compute the Frobenius norm of a diagonal element within a + * symmetric/hermitian matrix with column/row major storage + * + * Note that column major is using the low triangular part only of the diagonal + * element matrices, and row major, by symmetry, is using only the upper + * triangular part. + * + * The comments in the code are made for column major storage. + */ +static inline void +z_spm_frobenius_elt_sym_diag( const spm_int_t dofs, + const double *valptr, + double *data ) +{ + spm_int_t ii, jj; + + for(jj=0; jj<dofs; jj++) + { + /* Skip unused upper triangular part */ + for(ii=0; ii<jj; ii++) { + valptr++; +#if defined(PRECISION_z) || defined(PRECISION_c) + valptr++; +#endif + } + + /* Diagonal element */ + frobenius_update( 1, data, data + 1, valptr ); +#if defined(PRECISION_z) || defined(PRECISION_c) + valptr++; + frobenius_update( 1, data, data + 1, valptr ); +#endif + valptr++; + + for(ii=jj+1; ii<dofs; ii++, valptr++) + { + frobenius_update( 2, data, data + 1, valptr ); + +#if defined(PRECISION_z) || defined(PRECISION_c) + valptr++; + frobenius_update( 2, data, data + 1, valptr ); +#endif + } + } +} + +/** + * @brief Compute the Frobenius norm of an off-diagonal element matrix in the + * symmetric/hermitian case + */ +static inline void +z_spm_frobenius_elt_sym_offd( const spm_int_t nbelts, + const double *valptr, + double *data ) +{ + spm_int_t ii; + + for(ii=0; ii<nbelts; ii++, valptr++) + { + frobenius_update( 2, data, data + 1, valptr ); + +#if defined(PRECISION_z) || defined(PRECISION_c) + valptr++; + frobenius_update( 2, data, data + 1, valptr ); +#endif + } +} + +/** + * @brief Compute the Frobenius norm of any element matrix in the + * symmetric/hermitian case + */ +static inline void +z_spm_frobenius_elt_sym( const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const spm_complex64_t *valptr, + double *data ) +{ + if ( row == col ) { + assert( dofi == dofj ); + z_spm_frobenius_elt_sym_diag( dofi, (const double*)valptr, data ); + } + else { + z_spm_frobenius_elt_sym_offd( dofi * dofj, (const double*)valptr, data ); + } +} + +/** + * @brief Compute the Frobenius norm of a symmetrix/hermitian CSC matrix + * + * @param[in] spm + * The spm from which the norm need to be computed. + * + * @param[in,out] data + * + */ +static inline void +z_spmFrobeniusNorm_csc( const spmatrix_t *spm, + double *data ) +{ + spm_int_t j, k, baseval; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_int_t *loc2glob; + const spm_complex64_t *valptr; + + + assert( spm->fmttype == SpmCSC ); + assert( spm->flttype == SpmComplex64 ); + + baseval = spmFindBase( spm ); + + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + loc2glob = spm->loc2glob; + + for(j=0; j<spm->n; j++, colptr++, loc2glob++) + { + jg = (spm->loc2glob == NULL) ? j : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; + } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } + + for(k=colptr[0]; k<colptr[1]; k++, rowptr++) + { + ig = (*rowptr - baseval); + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + z_spm_frobenius_elt_sym( row, dofi, col, dofj, valptr, data ); + valptr += dofi * dofj; + } + } +} + +/** + * @brief Compute the Frobenius norm of a symmetrix/hermitian CSR matrix + * + * @param[in] spm + * The spm from which the norm need to be computed. + * + * @param[in,out] data + * + */ +static inline void +z_spmFrobeniusNorm_csr( const spmatrix_t *spm, + double *data ) +{ + spm_int_t i, k, baseval; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_int_t *loc2glob; + const spm_complex64_t *valptr; + + assert( spm->fmttype == SpmCSR ); + assert( spm->flttype == SpmComplex64 ); + + baseval = spmFindBase( spm ); + + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + loc2glob = spm->loc2glob; + + for(i=0; i<spm->n; i++, rowptr++, loc2glob++) + { + ig = (spm->loc2glob == NULL) ? i : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) + { + jg = (*colptr - baseval); + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; + } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } + + z_spm_frobenius_elt_sym( row, dofi, col, dofj, valptr, data ); + valptr += dofi * dofj; + } + } +} + +/** + * @brief Compute the Frobenius norm of a symmetrix/hermitian IJV matrix + * + * @param[in] spm + * The spm from which the norm need to be computed. + * + * @param[in,out] data + * + */ +static inline void +z_spmFrobeniusNorm_ijv( const spmatrix_t *spm, + double *data ) +{ + spm_int_t k, baseval; + spm_int_t i, dofi, row; + spm_int_t j, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_complex64_t *valptr; + + assert( spm->fmttype == SpmIJV ); + assert( spm->flttype == SpmComplex64 ); + + baseval = spmFindBase( spm ); + + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + + for(k=0; k<spm->nnz; k++, rowptr++, colptr++) + { + i = *rowptr - baseval; + j = *colptr - baseval; + + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * i; + dofj = spm->dof; + col = spm->dof * j; + } + else { + dofi = dofs[i+1] - dofs[i]; + row = dofs[i] - baseval; + dofj = dofs[j+1] - dofs[j]; + col = dofs[j] - baseval; + } + + z_spm_frobenius_elt_sym( row, dofi, col, dofj, valptr, data ); + valptr += dofi * dofj; + } +} + /** ******************************************************************************* * - * @brief Compute the Frobenius norm of the non distributed given - * spm structure. + * @brief Compute the Frobenius norm of the given spm structure. * * ||A|| = sqrt( sum( a_ij ^ 2 ) ) * @@ -43,78 +329,52 @@ double z_spmFrobeniusNorm( const spmatrix_t *spm ) { - spm_int_t i, j, baseval; - double *valptr = (double*)spm->values; - double scale = 1.; - double sumsq = 0.; + double data[] = { 0., 1. }; /* Scale, Sum */ if (spm->mtxtype == SpmGeneral) { + const double *valptr = (double*)spm->values; + spm_int_t i; + for(i=0; i <spm->nnzexp; i++, valptr++) { - frobenius_update( 1, &scale, &sumsq, valptr ); + frobenius_update( 1, data, data + 1, valptr ); #if defined(PRECISION_z) || defined(PRECISION_c) valptr++; - frobenius_update( 1, &scale, &sumsq, valptr ); + frobenius_update( 1, data, data + 1, valptr ); #endif } } else { - spm_int_t *colptr = spm->colptr; - spm_int_t *rowptr = spm->rowptr; - int nb; - baseval = spmFindBase( spm ); - switch( spm->fmttype ){ case SpmCSC: - for(i=0; i<spm->n; i++, colptr++) { - for(j=colptr[0]; j<colptr[1]; j++, rowptr++, valptr++) { - nb = ( i == (*rowptr-baseval) ) ? 1 : 2; - frobenius_update( nb, &scale, &sumsq, valptr ); - -#if defined(PRECISION_z) || defined(PRECISION_c) - valptr++; - frobenius_update( nb, &scale, &sumsq, valptr ); -#endif - } - } + z_spmFrobeniusNorm_csc( spm, data ); break; - case SpmCSR: - for(i=0; i<spm->n; i++, rowptr++) { - for(j=rowptr[0]; j<rowptr[1]; j++, colptr++, valptr++) { - nb = ( i == (*colptr-baseval) ) ? 1 : 2; - frobenius_update( nb, &scale, &sumsq, valptr ); -#if defined(PRECISION_z) || defined(PRECISION_c) - valptr++; - frobenius_update( nb, &scale, &sumsq, valptr ); -#endif - } - } + case SpmCSR: + z_spmFrobeniusNorm_csr( spm, data ); break; - case SpmIJV: - for(i=0; i <spm->nnz; i++, valptr++, colptr++, rowptr++) { - nb = ( *rowptr == *colptr ) ? 1 : 2; - frobenius_update( nb, &scale, &sumsq, valptr ); -#if defined(PRECISION_z) || defined(PRECISION_c) - valptr++; - frobenius_update( nb, &scale, &sumsq, valptr ); -#endif - } - break; - default: - fprintf(stderr, "z_spmFrobeniusNorm: Unknown Format\n"); + case SpmIJV: + z_spmFrobeniusNorm_ijv( spm, data ); } } - return scale * sqrt( sumsq ); +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob != NULL ) { + MPI_Op merge; + MPI_Op_create( (MPI_User_function *)z_spm_frobenius_merge, 1, &merge ); + MPI_Allreduce( MPI_IN_PLACE, data, 2, SPM_MPI_DOUBLE, merge, spm->comm ); + MPI_Op_free( &merge ); + } +#endif + + return data[0] * sqrt( data[1] ); } /** ******************************************************************************* * - * @brief Compute the Max norm of the non distributed given spm - * structure. + * @brief Compute the Max norm of the given spm structure. * * ||A|| = max( abs(a_ij) ) * @@ -132,139 +392,249 @@ double z_spmMaxNorm( const spmatrix_t *spm ) { spm_int_t i; - spm_complex64_t *valptr = (spm_complex64_t*)spm->values; + const spm_complex64_t *valptr = (spm_complex64_t*)spm->values; double tmp, norm = 0.; for(i=0; i <spm->nnzexp; i++, valptr++) { tmp = cabs( *valptr ); - norm = norm > tmp ? norm : tmp; + norm = (norm > tmp) ? norm : tmp; } +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob != NULL ) { + MPI_Allreduce( MPI_IN_PLACE, &norm, 1, MPI_DOUBLE, MPI_MAX, spm->comm ); + } +#endif + return norm; } /** - ******************************************************************************* - * - * @brief Compute the Infinity norm of the non distributed given spm - * structure given by the maximum column sum. + * @brief Compute the sum array for the one/inf norms of a diagonal element + * within a symmetric/hermitian matrix with column/row major storage. * - * ||A|| = max_i( sum_j(|a_ij|) ) + * Note that column major is using the low triangular part only of the diagonal + * element matrices, and row major, by symmetry, is using only the upper + * triangular part. * - ******************************************************************************* + * The comments in the code are made for column major storage. + */ +static inline void +z_spm_oneinf_elt_sym_diag( const spm_int_t row, + const spm_int_t dofi, + const spm_complex64_t *valptr, + double *sumtab ) +{ + spm_int_t ii, jj; + + sumtab += row; + + for(jj=0; jj<dofi; jj++) + { + /* Skip unused upper triangular part */ + for(ii=0; ii<jj; ii++) { + valptr++; + } + + /* Diagonal element */ + sumtab[jj] += cabs( *valptr ); + valptr++; + + for(ii=jj+1; ii<dofi; ii++, valptr++) + { + /* Lower part */ + sumtab[ii] += cabs( *valptr ); + /* Upper part */ + sumtab[jj] += cabs( *valptr ); + } + } +} + +/** + * @brief Compute the sum array for the one/inf norms of a general element. * - * @param[in] spm - * The spm from which the norm need to be computed. + * We can observe two cases A and B; + * ________ _ ________ _ + * | | | | | | |________| | | + * | | | | |A| OR |________| |B| + * |__|__|__| |_| |________| |_| + * ________ ________ + * |___B____| |___A____| * - ******************************************************************************* + * | One Norm | Inf norm | + * -------------+----------+----------+ + * Column Major | B | A | + * -------------+----------+----------+ + * Row Major | A | B | + * -------------+----------+----------+ * - * @return The computed infinity norm + * @warning: The sumtab must be shifted at the right place on input * - *******************************************************************************/ -double -z_spmInfNorm( const spmatrix_t *spm ) + */ +static inline void +z_spm_oneinf_elt_gen_A( const spm_int_t dofi, + const spm_int_t dofj, + const spm_complex64_t *valptr, + double *sumtab ) { - spm_int_t col, row, i, baseval; - spm_complex64_t *valptr = (spm_complex64_t*)spm->values; - double norm = 0.; - double *sumrow; + spm_int_t ii, jj; - sumrow = malloc( spm->gN * sizeof(double) ); - memset( sumrow, 0, spm->gN * sizeof(double) ); - baseval = spmFindBase( spm ); - - switch( spm->fmttype ){ - case SpmCSC: - for( col=0; col < spm->gN; col++ ) + for(jj=0; jj<dofj; jj++) + { + for(ii=0; ii<dofi; ii++, valptr++) { - for( i=spm->colptr[col]-baseval; i<spm->colptr[col+1]-baseval; i++ ) - { - row = spm->rowptr[i] - baseval; - sumrow[row] += cabs( valptr[i] ); - - /* Add the symmetric/hermitian part */ - if ( ( spm->mtxtype != SpmGeneral ) && - ( row != col ) ) - { - sumrow[col] += cabs( valptr[i] ); - } - } + sumtab[ii] += cabs( *valptr ); } - break; + } +} - case SpmCSR: - for( row=0; row < spm->gN; row++ ) - { - for( i=spm->rowptr[row]-baseval; i<spm->rowptr[row+1]-baseval; i++ ) - { - sumrow[row] += cabs( valptr[i] ); - } - } +/** + * @brief Compute the sum array for the one/inf norms of a general element. + * + * See z_spm_oneinf_elt_gen_A() + */ +static inline void +z_spm_oneinf_elt_gen_B( const spm_int_t dofi, + const spm_int_t dofj, + const spm_complex64_t *valptr, + double *sumtab ) +{ + spm_int_t ii, jj; - /* Add the symmetric/hermitian part */ - if ( spm->mtxtype != SpmGeneral ) + for(jj=0; jj<dofj; jj++, sumtab++) + { + for(ii=0; ii<dofi; ii++, valptr++) { - for( row=0; row < spm->gN; row++ ) - { - for( i=spm->rowptr[row]-baseval; i<spm->rowptr[row+1]-baseval; i++ ) - { - col = spm->colptr[i] - baseval; - if ( row != col ) { - sumrow[col] += cabs( valptr[i] ); - } - } - } + *sumtab += cabs( *valptr ); } - break; + } +} - case SpmIJV: - for(i=0; i < spm->nnz; i++) +/** + * @brief Compute the sum array for both the one and inf norms for the + * off-diagonal elements of symmetric/hermitian element matrices. + * + * See z_spm_oneinf_elt_gen_A() + */ +static inline void +z_spm_oneinf_elt_gen_AB( const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const spm_complex64_t *valptr, + double *sumtab ) +{ + double *sumrow = sumtab + row; + double *sumcol = sumtab + col; + spm_int_t ii, jj; + + for(jj=0; jj<dofj; jj++, sumcol++) + { + for(ii=0; ii<dofi; ii++, valptr++) { - row = spm->rowptr[i]-baseval; - sumrow[row] += cabs( valptr[i] ); + double v = cabs( *valptr ); + sumrow[ii] += v; + *sumcol += v; } + } +} - /* Add the symmetric/hermitian part */ - if ( spm->mtxtype != SpmGeneral ) - { - for(i=0; i < spm->nnz; i++) - { - row = spm->rowptr[i]-baseval; - col = spm->colptr[i]-baseval; - if( row != col ) { - sumrow[col] += cabs( valptr[i] ); - } - } +/** + * @brief Compute the sum array for the one and inf norms of a general element. + */ +static inline void +z_spm_oneinf_elt_gen( const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const spm_complex64_t *valptr, + const spm_normtype_t ntype, + double *sumtab ) +{ + if ( layout == SpmColMajor ) { + if ( ntype == SpmInfNorm ) { + z_spm_oneinf_elt_gen_A( dofi, dofj, valptr, sumtab + row ); } - break; + else { + assert( ntype == SpmOneNorm ); + z_spm_oneinf_elt_gen_B( dofi, dofj, valptr, sumtab + col ); + } + } + else { + if ( ntype == SpmInfNorm ) { + z_spm_oneinf_elt_gen_B( dofj, dofi, valptr, sumtab + row ); + } + else { + assert( ntype == SpmOneNorm ); + z_spm_oneinf_elt_gen_A( dofj, dofi, valptr, sumtab + col ); + } + } +} - default: - free( sumrow ); - return SPM_ERR_BADPARAMETER; +/** + * @brief Compute the sum array for both the one and inf norms for the + * off-diagonal elements of symmetric/hermitian element matrices in either + * column or row major layout. + */ +static inline void +z_spm_oneinf_elt_sym_offd( const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const spm_complex64_t *valptr, + double *sumtab ) +{ + if ( layout == SpmColMajor ) { + z_spm_oneinf_elt_gen_AB( row, dofi, col, dofj, valptr, sumtab ); } + else { + z_spm_oneinf_elt_gen_AB( col, dofj, row, dofi, valptr, sumtab ); + } +} - for( i=0; i<spm->gN; i++) - { - if(norm < sumrow[i]) - { - norm = sumrow[i]; +/** + * @brief Compute the sum array for the one/inf norm for an element matrix. + */ +static inline void +z_spm_oneinf_elt( const spm_mtxtype_t mtxtype, + const spm_layout_t layout, + const spm_int_t row, + const spm_int_t dofi, + const spm_int_t col, + const spm_int_t dofj, + const spm_complex64_t *valptr, + const spm_normtype_t ntype, + double *sumtab ) +{ + if ( mtxtype == SpmGeneral ) { + z_spm_oneinf_elt_gen( layout, row, dofi, col, dofj, valptr, ntype, sumtab ); + } + else { + if(row == col) { + z_spm_oneinf_elt_sym_diag( row, dofi, valptr, sumtab ); + } + else { + z_spm_oneinf_elt_sym_offd( layout, row, dofi, col, dofj, valptr, sumtab ); } } - free( sumrow ); - - return norm; } /** ******************************************************************************* * - * @brief Compute the one norm of the non distributed given spm - * structure fiven by the maximum row sum + * @brief Compute the one/inf norm of the given spm structure given by + * the maximum row sum * - * ||A|| = max_j( sum_i(|a_ij|) ) + * * SpmOneNorm: ||A|| = max_j( sum_i(|a_ij|) ) + * * SpmInfNorm: ||A|| = max_i( sum_j(|a_ij|) ) * ******************************************************************************* * + * @param[in] ntype + * The type of norm to compute. + * * @param[in] spm * The spm from which the norm need to be computed. * @@ -273,94 +643,144 @@ z_spmInfNorm( const spmatrix_t *spm ) * @return The computed one norm * *******************************************************************************/ -double -z_spmOneNorm( const spmatrix_t *spm ) +static inline double +z_spmOneInfNorm( spm_normtype_t ntype, + const spmatrix_t *spm ) { - spm_int_t col, row, i, baseval; - spm_complex64_t *valptr = (spm_complex64_t*)spm->values; - double norm = 0.; - double *sumcol; + spm_int_t i, j, k, baseval; + spm_int_t ig, dofi, row; + spm_int_t jg, dofj, col; + const spm_int_t *colptr; + const spm_int_t *rowptr; + const spm_int_t *dofs; + const spm_int_t *loc2glob; + const spm_complex64_t *valptr; + double *sumtab = calloc( spm->gNexp, sizeof(double) ); + double norm = 0.; - sumcol = malloc( spm->gN * sizeof(double) ); - memset( sumcol, 0, spm->gN * sizeof(double) ); baseval = spmFindBase( spm ); + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (spm_complex64_t*)(spm->values); + dofs = spm->dofs; + loc2glob = spm->loc2glob; + switch( spm->fmttype ){ case SpmCSC: - for( col=0; col<spm->gN; col++ ) + for(j=0; j<spm->n; j++, colptr++, loc2glob++) { - for( i=spm->colptr[col]-baseval; i<spm->colptr[col+1]-baseval; i++ ) - { - sumcol[col] += cabs( valptr[i] ); + jg = (spm->loc2glob == NULL) ? j : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; + } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; } - } - /* Add the symmetric/hermitian part */ - if ( spm->mtxtype != SpmGeneral ) - { - for( col=0; col < spm->gN; col++ ) + for(k=colptr[0]; k<colptr[1]; k++, rowptr++) { - for( i=spm->colptr[col]-baseval; i<spm->colptr[col+1]-baseval; i++ ) - { - row = spm->rowptr[i] - baseval; - if (row != col) { - sumcol[row] += cabs( valptr[i] ); - } + ig = (*rowptr - baseval); + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; } + + z_spm_oneinf_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + ntype, sumtab ); + valptr += dofi * dofj; } } break; case SpmCSR: - for( row=0; row < spm->gN; row++ ) + for(i=0; i<spm->n; i++, rowptr++, loc2glob++) { - for( i=spm->rowptr[row]-baseval; i<spm->rowptr[row+1]-baseval; i++ ) + ig = (spm->loc2glob == NULL) ? i : (*loc2glob) - baseval; + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + } + + for(k=rowptr[0]; k<rowptr[1]; k++, colptr++) { - col = spm->colptr[i] - baseval; - sumcol[col] += cabs( valptr[i] ); - - /* Add the symmetric/hermitian part */ - if ( ( spm->mtxtype != SpmGeneral ) && - ( row != col ) ) - { - sumcol[row] += cabs( valptr[i] ); + jg = (*colptr - baseval); + if ( spm->dof > 0 ) { + dofj = spm->dof; + col = spm->dof * jg; } + else { + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; + } + + z_spm_oneinf_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + ntype, sumtab ); + valptr += dofi * dofj; } } break; case SpmIJV: - for(i=0; i < spm->nnz; i++) + for(k=0; k<spm->nnz; k++, rowptr++, colptr++) { - sumcol[spm->colptr[i]-baseval] += cabs( valptr[i] ); - } - - /* Add the symmetric/hermitian part */ - if ( spm->mtxtype != SpmGeneral ) - { - for(i=0; i < spm->nnz; i++) - { - if( spm->rowptr[i] != spm->colptr[i] ) { - sumcol[spm->rowptr[i]-baseval] += cabs( valptr[i] ); - } + ig = *rowptr - baseval; + jg = *colptr - baseval; + + if ( spm->dof > 0 ) { + dofi = spm->dof; + row = spm->dof * ig; + dofj = spm->dof; + col = spm->dof * jg; + } + else { + dofi = dofs[ig+1] - dofs[ig]; + row = dofs[ig] - baseval; + dofj = dofs[jg+1] - dofs[jg]; + col = dofs[jg] - baseval; } + + z_spm_oneinf_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + ntype, sumtab ); + valptr += dofi * dofj; } break; default: - free( sumcol ); return SPM_ERR_BADPARAMETER; } - for( i=0; i<spm->gN; i++) +#if defined(SPM_WITH_MPI) + if ( spm->loc2glob != NULL ) { + MPI_Allreduce( MPI_IN_PLACE, sumtab, spm->gNexp, MPI_DOUBLE, MPI_SUM, spm->comm ); + } +#endif + + /* Look for the maximum */ { - if(norm < sumcol[i]) + const double *sumtmp = sumtab; + for( i=0; i<spm->gNexp; i++, sumtmp++ ) { - norm = sumcol[i]; + if( norm < *sumtmp ) { + norm = *sumtmp; + } } } - free( sumcol ); + free( sumtab ); return norm; } @@ -383,6 +803,7 @@ z_spmOneNorm( const spmatrix_t *spm ) ******************************************************************************* * * @return The norm of the spm matrix + * -1 when error occurs or with pattern only * *******************************************************************************/ double @@ -391,7 +812,7 @@ z_spmNorm( spm_normtype_t ntype, { double norm = 0.; - if(spm == NULL) + if( spm == NULL ) { return -1.; } @@ -402,11 +823,10 @@ z_spmNorm( spm_normtype_t ntype, break; case SpmInfNorm: - norm = z_spmInfNorm( spm ); - break; + spm_attr_fallthrough; case SpmOneNorm: - norm = z_spmOneNorm( spm ); + norm = z_spmOneInfNorm( ntype, spm ); break; case SpmFrobeniusNorm: diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7213784d6203d1887dc9da8bc4f2f96af1ad3f74..4b6323f9d4c22506c9438b2eca3875934c0b69e5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,7 +5,7 @@ # # @version 1.0.0 # @author Mathieu Faverge -# @date 2013-06-24 +# @date 2020-05-19 # ### include(RulesPrecisions) @@ -51,6 +51,7 @@ set (TESTS if ( SPM_WITH_MPI ) list( APPEND TESTS spm_scatter_gather_tests.c + spm_dist_norm_tests.c ) endif() @@ -66,7 +67,8 @@ set( SPM_TESTS set( SPM_DOF_TESTS spm_dof_expand_tests spm_dof_norm_tests spm_dof_matvec_tests) set( SPM_MPI_TESTS - spm_scatter_gather_tests ) + spm_scatter_gather_tests + spm_dist_norm_tests ) # List of run types set( RUNTYPE shm ) diff --git a/tests/spm_dist_norm_tests.c b/tests/spm_dist_norm_tests.c new file mode 100644 index 0000000000000000000000000000000000000000..8c7c7dd36f2fbefdf07874a06784e2eed71439ba --- /dev/null +++ b/tests/spm_dist_norm_tests.c @@ -0,0 +1,183 @@ +/** + * + * @file spm_dist_norm_tests.c + * + * Tests and validate the spm_norm routines. + * + * @copyright 2015-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 6.0.0 + * @author Mathieu Faverge + * @author Theophile Terraz + * @author Tony Delarue + * @date 2020-05-19 + * + **/ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <assert.h> +#include <time.h> +#include <spm_tests.h> + +#if !defined(SPM_WITH_MPI) +#error "This test should not be compiled in non distributed version" +#endif + +int main (int argc, char **argv) +{ + char *filename; + spmatrix_t original, *spm, *spmdist; + spm_driver_t driver; + int clustnbr = 1; + int clustnum = 0; + spm_mtxtype_t mtxtype; + spm_fmttype_t fmttype; + int baseval, distbycol, root; + int rc = SPM_SUCCESS; + int err = 0; + int dof, dofmax = 4; + int to_free = 0; + + MPI_Init( &argc, &argv ); + + /** + * Get options from command line + */ + spmGetOptions( argc, argv, + &driver, &filename ); + + rc = spmReadDriver( driver, filename, &original ); + free(filename); + + if ( rc != SPM_SUCCESS ) { + fprintf(stderr, "ERROR: Could not read the file, stop the test !!!\n"); + return EXIT_FAILURE; + } + + MPI_Comm_size( MPI_COMM_WORLD, &clustnbr ); + MPI_Comm_rank( MPI_COMM_WORLD, &clustnum ); + + if ( original.flttype == SpmPattern ) { + spmGenFakeValues( &original ); + } + + spmPrintInfo( &original, stdout ); + + printf(" -- SPM Norms Test --\n"); + + for( fmttype=SpmCSC; fmttype<=SpmIJV; fmttype++ ) { + + distbycol = (fmttype == SpmCSR) ? 0 : 1; + if ( spmConvert( fmttype, &original ) != SPM_SUCCESS ) { + fprintf( stderr, "Issue to convert to %s format\n", fmtnames[fmttype] ); + continue; + } + + for( dof=-1; dof<2; dof++ ) + { + if ( dof >= 0 ) { + spm = spmDofExtend( &original, dof, dofmax ); + to_free = 1; + } + else { + spm = malloc(sizeof(spmatrix_t)); + memcpy( spm, &original, sizeof(spmatrix_t) ); + to_free = 0; + } + + if ( spm == NULL ) { + fprintf( stderr, "Issue to extend the matrix\n" ); + continue; + } + + for( root=-1; root<clustnbr; root++ ) + { + spmdist = spmScatter( spm, -1, NULL, distbycol, -1, MPI_COMM_WORLD ); + if ( spmdist == NULL ) { + fprintf( stderr, "Failed to scatter the spm\n" ); + err++; + continue; + } + + for( baseval=0; baseval<2; baseval++ ) + { + spmBase( spmdist, baseval ); + + for( mtxtype=SpmGeneral; mtxtype<=SpmHermitian; mtxtype++ ) + { + if ( (mtxtype == SpmHermitian) && + ( ((original.flttype != SpmComplex64) && + (original.flttype != SpmComplex32)) || + (original.mtxtype != SpmHermitian) ) ) + { + continue; + } + + if ( (mtxtype != SpmGeneral) && + (original.mtxtype == SpmGeneral) ) + { + continue; + } + + if ( spm ) { + spm->mtxtype = mtxtype; + } + spmdist->mtxtype = mtxtype; + + if(clustnum == 0) { + printf( " Case: %s / %s / %d / %s / %d\n", + fltnames[spmdist->flttype], + fmtnames[spmdist->fmttype], baseval, + mtxnames[mtxtype - SpmGeneral], dof ); + } + + switch( spmdist->flttype ){ + case SpmComplex64: + rc = z_spm_dist_norm_check( spm, spmdist ); + break; + + case SpmComplex32: + rc = c_spm_dist_norm_check( spm, spmdist ); + break; + + case SpmFloat: + rc = s_spm_dist_norm_check( spm, spmdist ); + break; + + case SpmDouble: + default: + rc = d_spm_dist_norm_check( spm, spmdist ); + } + err = (rc != 0) ? err+1 : err; + } + } + spmExit( spmdist ); + free( spmdist ); + } + if ( spm != &original ) { + if( to_free ){ + spmExit( spm ); + } + free( spm ); + } + } + } + + MPI_Finalize(); + + if( err == 0 ) { + if(clustnum == 0) { + printf(" -- All tests PASSED --\n"); + } + return EXIT_SUCCESS; + } + else + { + printf(" -- %d tests FAILED --\n", err); + return EXIT_FAILURE; + } +} diff --git a/tests/spm_tests.h b/tests/spm_tests.h index 011be821e8693576a0e7fb83a57073aa9c10616f..876c240b8a77f039562f17d7f66ecb66c99b37df 100644 --- a/tests/spm_tests.h +++ b/tests/spm_tests.h @@ -9,7 +9,8 @@ * * @version 1.0.0 * @author Mathieu Faverge - * @date 2013-06-24 + * @author Tony Delarue + * @date 2020-05-19 * **/ #ifndef _spm_tests_h_ @@ -81,35 +82,42 @@ int core_sgeadd( spm_trans_t trans, void z_spm_print_check( char *filename, const spmatrix_t *spm ); int z_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int z_spm_norm_check( const spmatrix_t *spm ); +int z_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); void c_spm_print_check( char *filename, const spmatrix_t *spm ); int c_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int c_spm_norm_check( const spmatrix_t *spm ); +int c_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); void d_spm_print_check( char *filename, const spmatrix_t *spm ); int d_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int d_spm_norm_check( const spmatrix_t *spm ); +int d_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); void s_spm_print_check( char *filename, const spmatrix_t *spm ); int s_spm_matvec_check( spm_trans_t trans, const spmatrix_t *spm ); int s_spm_norm_check( const spmatrix_t *spm ); +int s_spm_dist_norm_check( const spmatrix_t *spm, const spmatrix_t *spmdist ); void p_spm_print_check( char *filename, const spmatrix_t *spm ); static inline int -spm_norm_print_result( double norms, double normd, double result ) +spm_norm_print_result( double norms, double normd, double result, int clustnum ) { int rc = 0; if ( (result >= 0.) && (result < 1.) ) { - printf("SUCCESS !\n"); + if(clustnum == 0) { + printf("SUCCESS !\n"); + } } else { - printf("FAILED !\n"); + if(clustnum == 0) { + printf("FAILED !\n"); + printf(" Nsparse = %e, Ndense = %e\n", norms, normd ); + printf(" | Nsparse - Ndense | / Ndense = %e\n", result); + } rc = 1; } - printf(" Nsparse = %e, Ndense = %e\n", norms, normd ); - printf(" | Nsparse - Ndense | / Ndense = %e\n", result); - return rc; } diff --git a/tests/z_spm_tests.c b/tests/z_spm_tests.c index 22effb5ed1b77bd9bc26eb8f3c1618ed822e159b..7ef502f5b31c2a8232a087f39e40baa37dedcac9 100644 --- a/tests/z_spm_tests.c +++ b/tests/z_spm_tests.c @@ -10,7 +10,8 @@ * @version 6.0.0 * @author Mathieu Faverge * @author Theophile Terraz - * @date 2015-01-01 + * @author Tony Delarue + * @date 2020-05-19 * * @precisions normal z -> c d s * @@ -228,7 +229,7 @@ z_spm_norm_check( const spmatrix_t *spm ) norms = spmNorm( SpmMaxNorm, spm ); normd = LAPACKE_zlange( LAPACK_COL_MAJOR, 'M', spm->gNexp, spm->gNexp, A, spm->gNexp ); result = fabs(norms - normd) / (normd * eps); - ret += spm_norm_print_result( norms, normd, result ); + ret += spm_norm_print_result( norms, normd, result, 0 ); /** * Test Norm Inf @@ -238,7 +239,7 @@ z_spm_norm_check( const spmatrix_t *spm ) normd = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', spm->gNexp, spm->gNexp, A, spm->gNexp ); result = fabs(norms - normd) / (normd * eps); result = result * ((double)(spm->gNexp)) / nnz; - ret += spm_norm_print_result( norms, normd, result ); + ret += spm_norm_print_result( norms, normd, result, 0 ); /** * Test Norm One @@ -248,7 +249,7 @@ z_spm_norm_check( const spmatrix_t *spm ) normd = LAPACKE_zlange( LAPACK_COL_MAJOR, 'O', spm->gNexp, spm->gNexp, A, spm->gNexp ); result = fabs(norms - normd) / (normd * eps); result = result * ((double)(spm->gNexp)) / nnz; - ret += spm_norm_print_result( norms, normd, result ); + ret += spm_norm_print_result( norms, normd, result, 0 ); /** * Test Norm Frobenius @@ -258,8 +259,79 @@ z_spm_norm_check( const spmatrix_t *spm ) normd = LAPACKE_zlange( LAPACK_COL_MAJOR, 'F', spm->gNexp, spm->gNexp, A, spm->gNexp ); result = fabs(norms - normd) / (normd * eps); result = result / nnz; - ret += spm_norm_print_result( norms, normd, result ); + ret += spm_norm_print_result( norms, normd, result, 0 ); free(A); return ret; } + +/*------------------------------------------------------------------------ + * Check the accuracy of the solution + */ +#if defined(SPM_WITH_MPI) +int +z_spm_dist_norm_check( const spmatrix_t *spm, + const spmatrix_t *spmdist ) +{ + double norms, normd; + double eps, result, nnz; + int ret = 0; + int clustnum = spmdist->clustnum; + + eps = LAPACKE_dlamch_work('e'); + + nnz = spm->gnnzexp; + if ( spm->mtxtype != SpmGeneral ) { + nnz *= 2.; + } + + /** + * Test Norm Max + */ + if(clustnum == 0) { + printf(" -- Test norm Max : "); + } + norms = spmNorm( SpmMaxNorm, spm ); + normd = spmNorm( SpmMaxNorm, spmdist ); + result = fabs(norms - normd) / (normd * eps); + ret += spm_norm_print_result( norms, normd, result, clustnum ); + + /** + * Test Norm Inf + */ + if(clustnum == 0) { + printf(" -- Test norm Inf : "); + } + norms = spmNorm( SpmInfNorm, spm ); + normd = spmNorm( SpmInfNorm, spmdist ); + result = fabs(norms - normd) / (normd * eps); + result = result * ((double)(spm->gNexp)) / nnz; + ret += spm_norm_print_result( norms, normd, result, clustnum ); + + /** + * Test Norm One + */ + if(clustnum == 0) { + printf(" -- Test norm One : "); + } + norms = spmNorm( SpmOneNorm, spm ); + normd = spmNorm( SpmOneNorm, spmdist ); + result = fabs(norms - normd) / (normd * eps); + result = result * ((double)(spm->gNexp)) / nnz; + ret += spm_norm_print_result( norms, normd, result, clustnum ); + + /** + * Test Norm Frobenius + */ + if(clustnum == 0) { + printf(" -- Test norm Frb : "); + } + norms = spmNorm( SpmFrobeniusNorm, spm ); + normd = spmNorm( SpmFrobeniusNorm, spmdist ); + result = fabs(norms - normd) / (normd * eps); + result = result / nnz; + ret += spm_norm_print_result( norms, normd, result, clustnum ); + + return ret; +} +#endif