diff --git a/include/spm/const.h b/include/spm/const.h index adff0081d26e53c1c5c6f536f15c9dee15a0eba5..01f9de534e2cc089ad918008cc6f264b52115e4b 100644 --- a/include/spm/const.h +++ b/include/spm/const.h @@ -189,7 +189,7 @@ typedef enum spm_normtype_e { SpmOneNorm = 171, /**< One norm: max_j( sum_i( |a_{ij}| ) ) */ SpmFrobeniusNorm = 174, /**< Frobenius norm: sqrt( sum_{i,j} (a_{ij}^2) ) */ SpmInfNorm = 175, /**< Inifinite norm: max_i( sum_j( |a_{ij}| ) ) */ - SpmMaxNorm = 177 /**< Inifinite norm: max_{i,j}( | a_{ij} | ) */ + SpmMaxNorm = 177 /**< Max norm: max_{i,j}( | a_{ij} | ) */ } spm_normtype_t; /** diff --git a/src/common.h b/src/common.h index 3871c2404e29f0c938c393d2f24b3d798d0c676c..4abadf79404831a5b937fe2bdc12b88c20345196 100644 --- a/src/common.h +++ b/src/common.h @@ -55,6 +55,24 @@ spm_get_datatype( const spmatrix_t *spm ) spm_int_t *spm_get_glob2loc( spmatrix_t *spm, spm_int_t baseval ); +/******************************************************************** + * Conjuguate/Id functions + */ +typedef spm_complex64_t (*spm_zconj_fct_t)( spm_complex64_t ); +typedef spm_complex32_t (*spm_cconj_fct_t)( spm_complex32_t ); +typedef double (*spm_dconj_fct_t)( double ); +typedef float (*spm_sconj_fct_t)( float ); +typedef void (*spm_pconj_fct_t)( int ); + +static inline spm_complex64_t __spm_zid( spm_complex64_t val ) { return val; } +static inline spm_complex32_t __spm_cid( spm_complex32_t val ) { return val; } +static inline double __spm_did( double val ) { return val; } +static inline float __spm_sid( float val ) { return val; } +static inline void __spm_pid( int val __attribute__((unused)) ) { } + +static inline spm_complex64_t __spm_zconj( spm_complex64_t val ) { return conj( val ); } +static inline spm_complex32_t __spm_cconj( spm_complex32_t val ) { return conjf( val ); } + /******************************************************************** * Errors functions */ @@ -89,7 +107,7 @@ spm_print_warning( const char *fmt, ... ) #define CBLAS_SADDR( a_ ) (&(a_)) #endif -/* +/******************************************************************** * Get environment variable */ #if defined(SPM_OS_WINDOWS) diff --git a/src/frobeniusupdate.h b/src/frobeniusupdate.h index 90cbdad1bfc0f65fccceb0bb3f2ca28308665099..e232ad26333bfcbf84bc895ffb4b33fb7683d9dc 100644 --- a/src/frobeniusupdate.h +++ b/src/frobeniusupdate.h @@ -20,16 +20,20 @@ /** ******************************************************************************* * - * @ingroup pastix_internal + * @ingroup spm_internal * - * frobenius_update - Update the couple (scale, sumsq) with one element when - * computing the Froebnius norm. + * @brief Update the couple (scale, sumsq) with one element when computing the + * Froebnius norm. * * The frobenius norm is equal to scale * sqrt( sumsq ), this method allows to * avoid overflow in the sum square computation. * ******************************************************************************* * + * @param[in] nb + * The number of time the value must be integrated into the couple + * (scale, sumsq) + * * @param[inout] scale * On entry, the former scale * On exit, the update scale to take into account the value @@ -44,7 +48,7 @@ *******************************************************************************/ static inline void #if defined(PRECISION_d) || defined(PRECISION_z) -frobenius_update( int nb, double *scale, double *sumsq, double *value ) +frobenius_update( int nb, double *scale, double *sumsq, const double *value ) { double absval = fabs(*value); double ratio; @@ -60,7 +64,7 @@ frobenius_update( int nb, double *scale, double *sumsq, double *value ) } } #elif defined(PRECISION_s) || defined(PRECISION_c) -frobenius_update( int nb, float *scale, float *sumsq, float *value ) +frobenius_update( int nb, float *scale, float *sumsq, const float *value ) { float absval = fabs(*value); float ratio; @@ -77,4 +81,64 @@ frobenius_update( int nb, float *scale, float *sumsq, float *value ) } #endif +/** + ******************************************************************************* + * + * @ingroup spm_internal + * + * @brief Merge together two sum square stored as a couple (scale, sumsq). + * + * The frobenius norm is equal to scale * sqrt( sumsq ), this method allows to + * avoid overflow in the sum square computation. + * + ******************************************************************************* + * + * @param[in] scl_in + * The scale factor of the first couple to merge + * + * @param[in] ssq_in + * The sumsquare factor of the first couple to merge + * + * @param[inout] scl_out + * On entry, the scale factor of the second couple to merge + * On exit, the updated scale factor. + * + * @param[inout] ssq_out + * The sumsquare factor of the second couple to merge + * On exit, the updated sumsquare factor. + * + *******************************************************************************/ +static inline void +#if defined(PRECISION_d) || defined(PRECISION_z) +frobenius_merge( double scl_in, double ssq_in, + double *scl_out, double *ssq_out ) +{ + double ratio; + if ( (*scl_out) < scl_in ) { + ratio = (*scl_out) / scl_in; + *ssq_out = (*ssq_out) * ratio * ratio + ssq_in; + *scl_out = scl_in; + } + else { + ratio = scl_in / (*scl_out); + *ssq_out = (*ssq_out) + ssq_in * ratio * ratio; + } +} +#else +frobenius_merge( float scl_in, float ssq_in, + float *scl_out, float *ssq_out ) +{ + float ratio; + if ( (*scl_out) < scl_in ) { + ratio = (*scl_out) / scl_in; + *ssq_out = (*ssq_out) * ratio * ratio + ssq_in; + *scl_out = scl_in; + } + else { + ratio = scl_in / (*scl_out); + *ssq_out = (*ssq_out) + ssq_in * ratio * ratio; + } +} +#endif + #endif /* _frobeniusupdate_h_ */ diff --git a/src/spm.c b/src/spm.c index 2fc61e1cd6346a6d44a3ccebe66470f6a97251ec..03328a0f3a8028d15fadb566005a85f1bf321aa8 100644 --- a/src/spm.c +++ b/src/spm.c @@ -321,7 +321,7 @@ spmBase( spmatrix_t *spm, } } if (spm->dofs != NULL) { - for (i = 0; i <= n; i++) { + for (i = 0; i <= spm->gN; i++) { spm->dofs[i] += baseadj; } } diff --git a/src/spm_dof_extend.c b/src/spm_dof_extend.c index 18eed840759c2e2d8d8998c7f8c88d6687184725..64ddfdaa9e239bee62962f71dd9b236d1f76b9be 100644 --- a/src/spm_dof_extend.c +++ b/src/spm_dof_extend.c @@ -86,8 +86,8 @@ spmDofExtend( const spmatrix_t *spm, * Initialize the dofs array where the degree of freedom of vertex i is * dof[i+1] - dof[i] */ - *dofptr = baseval; if( spm->clustnum == 0 ) { + *dofptr = baseval; for(i=0; i<spm->gN; i++, dofptr++) { dofi = 1 + ( rand() % dof ); dofptr[1] = dofptr[0] + dofi; diff --git a/src/spm_gather.c b/src/spm_gather.c index 91f231842c5d930029d0fb454272ed57a7a9115d..a918ca69c5ec05239aa123252ac7277e75dbd683 100644 --- a/src/spm_gather.c +++ b/src/spm_gather.c @@ -116,7 +116,7 @@ spm_gather_csx_update( const spmatrix_t *spm, for ( c=1; c<spm->clustnbr; c++ ) { /* Let's start shifting the value after the first array */ spm_int_t shift = recvdispls[c]; - spm_int_t end = ( c == spm->clustnbr-1 ) ? spm->gN+1 : recvdispls[c+1]; + spm_int_t end = ( c < (spm->clustnbr-1) ) ? recvdispls[c+1] : spm->gN+1; spm_int_t i; to_add += recvcounts[c-1]; diff --git a/src/z_spm.h b/src/z_spm.h index eff04607e23c6c0c1140d8d451945598dcd97c47..f7eb42fe30ec1267c144b4c596fd5f00ce00f58f 100644 --- a/src/z_spm.h +++ b/src/z_spm.h @@ -64,7 +64,7 @@ int spm_zspmm( spm_side_t side, /** * Norm computation routines */ -double z_spmNorm( int ntype, const spmatrix_t *spm ); +double z_spmNorm( spm_normtype_t ntype, const spmatrix_t *spm ); /** * Extra routines diff --git a/src/z_spm_2dense.c b/src/z_spm_2dense.c index c4a562f1e4541dd95f7ac935ff74ee4b9d8a319d..4f7715a95215ce1e71188eb63b81634b0f380e23 100644 --- a/src/z_spm_2dense.c +++ b/src/z_spm_2dense.c @@ -19,33 +19,27 @@ #include "common.h" #include "z_spm.h" -typedef spm_complex64_t (*__conj_fct_t)( spm_complex64_t ); - -static inline spm_complex64_t -__fct_id( spm_complex64_t val ) { - return val; -} - -#if defined(PRECISION_c) || defined(PRECISION_z) -static inline spm_complex64_t -__fct_conj( spm_complex64_t val ) { - return conj( val ); -} -#endif - +/** + * @brief Convert to dense a diagonal element within a symmetric/hermitian + * matrix with column/row major storage + * + * Note that column major is using the low triangular part only of the diagonal + * element matrices, and row major, by symmetry, is using only the upper + * triangular part. + * + * The comments in the code are made for column major storage. + */ static inline void -z_spm_2dense_elt_sym_diag_col( spm_complex64_t *A, - const spm_int_t lda, - const spm_int_t row, - const spm_int_t dofi, - const spm_int_t col, - const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) +z_spm_2dense_elt_sym_diag( const spm_int_t row, + const spm_int_t dofi, + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + spm_complex64_t *A, + const spm_int_t lda ) { spm_int_t ii, jj; - for(jj=0; jj<dofj; jj++) + for(jj=0; jj<dofi; jj++) { /* Skip unused upper triangular part */ for(ii=0; ii<jj; ii++) { @@ -53,84 +47,32 @@ z_spm_2dense_elt_sym_diag_col( spm_complex64_t *A, } /* Diagonal element */ - A[ lda * (col + jj) + (row + ii) ] = *valptr; + A[ lda * (row + jj) + (row + ii) ] = *valptr; valptr++; for(ii=jj+1; ii<dofi; ii++, valptr++) { /* Lower part */ - A[ lda * (col + jj) + (row + ii) ] = *valptr; - /* Upper part */ - A[ lda * (row + ii) + (col + jj) ] = conjfct(*valptr); - } - } - (void)conjfct; -} - -static inline void -z_spm_2dense_elt_sym_diag_row( spm_complex64_t *A, - const spm_int_t lda, - const spm_int_t row, - const spm_int_t dofi, - const spm_int_t col, - const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) -{ - spm_int_t ii, jj; - - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<ii; jj++, valptr++) - { - /* Lower part */ - A[ lda * (col + jj) + (row + ii) ] = *valptr; - + A[ lda * (row + jj) + (row + ii) ] = *valptr; /* Upper part */ - A[ lda * (row + ii) + (col + jj) ] = conjfct(*valptr); - } - - /* Diagonal element */ - A[ lda * (col + jj) + (row + ii) ] = *valptr; - valptr++; - - /* Skip unused upper triangular part */ - for(jj=ii+1; jj<dofj; jj++) { - valptr++; + A[ lda * (row + ii) + (row + jj) ] = conjfct(*valptr); } } (void)conjfct; } +/** + * @brief Convert to dense a general element matrix with column major storage + */ static inline void -z_spm_2dense_elt_sym_diag( spm_complex64_t *A, - const spm_int_t lda, - const spm_layout_t layout, - const spm_int_t row, - const spm_int_t dofi, - const spm_int_t col, - const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) -{ - if ( layout == SpmColMajor ) { - z_spm_2dense_elt_sym_diag_col( A, lda, row, dofi, col, dofj, conjfct, valptr ); - } - else { - z_spm_2dense_elt_sym_diag_row( A, lda, row, dofi, col, dofj, conjfct, valptr ); - } - (void)conjfct; -} - -static inline void -z_spm_2dense_elt_gen_col( spm_complex64_t *A, - const spm_int_t lda, - const spm_int_t row, +z_spm_2dense_elt_gen_col( const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + spm_complex64_t *A, + const spm_int_t lda ) { spm_int_t ii, jj; @@ -141,18 +83,20 @@ z_spm_2dense_elt_gen_col( spm_complex64_t *A, A[ lda * (col + jj) + (row + ii) ] = conjfct(*valptr); } } - (void)conjfct; } +/** + * @brief Convert to dense a general element matrix with row major storage + */ static inline void -z_spm_2dense_elt_gen_row( spm_complex64_t *A, - const spm_int_t lda, - const spm_int_t row, +z_spm_2dense_elt_gen_row( const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + spm_complex64_t *A, + const spm_int_t lda ) { spm_int_t ii, jj; @@ -163,81 +107,96 @@ z_spm_2dense_elt_gen_row( spm_complex64_t *A, A[ lda * (col + jj) + (row + ii) ] = conjfct(*valptr); } } - (void)conjfct; } +/** + * @brief Convert to dense a general element matrix + */ static inline void -z_spm_2dense_elt_gen( spm_complex64_t *A, - const spm_int_t lda, - const spm_layout_t layout, +z_spm_2dense_elt_gen( const spm_layout_t layout, const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + spm_complex64_t *A, + const spm_int_t lda ) { if ( layout == SpmColMajor ) { - z_spm_2dense_elt_gen_col( A, lda, row, dofi, col, dofj, conjfct, valptr ); + z_spm_2dense_elt_gen_col( row, dofi, col, dofj, conjfct, valptr, A, lda ); } else { - z_spm_2dense_elt_gen_row( A, lda, row, dofi, col, dofj, conjfct, valptr ); + z_spm_2dense_elt_gen_row( row, dofi, col, dofj, conjfct, valptr, A, lda ); } } +/** + * @brief Convert to dense an off-diagonal element matrix in the + * symmetric/hermitian case + */ static inline void -z_spm_2dense_elt_sym_offd( spm_complex64_t *A, - const spm_int_t lda, - const spm_layout_t layout, +z_spm_2dense_elt_sym_offd( const spm_layout_t layout, const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + spm_complex64_t *A, + const spm_int_t lda ) { if ( layout == SpmColMajor ) { - z_spm_2dense_elt_gen_col( A, lda, row, dofi, col, dofj, __fct_id, valptr ); - z_spm_2dense_elt_gen_row( A, lda, col, dofj, row, dofi, conjfct, valptr ); + /* A[ row, col ] */ + z_spm_2dense_elt_gen_col( row, dofi, col, dofj, __spm_zid, valptr, A, lda ); + /* + * A[ col, row ] = conj( A[ row, col ]^t ) + * => Let's exploit the row major kernel to make it transpose + */ + z_spm_2dense_elt_gen_row( col, dofj, row, dofi, conjfct, valptr, A, lda ); } else { - z_spm_2dense_elt_gen_row( A, lda, row, dofi, col, dofj, __fct_id, valptr ); - z_spm_2dense_elt_gen_col( A, lda, col, dofj, row, dofi, conjfct, valptr ); + z_spm_2dense_elt_gen_row( row, dofi, col, dofj, __spm_zid, valptr, A, lda ); + z_spm_2dense_elt_gen_col( col, dofj, row, dofi, conjfct, valptr, A, lda ); } } +/** + * @brief Convert to dense an element matrix + */ static inline void -z_spm_2dense_elt( spm_complex64_t *A, - const spm_int_t lda, - const spm_mtxtype_t mtxtype, +z_spm_2dense_elt( const spm_mtxtype_t mtxtype, const spm_layout_t layout, const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const spm_complex64_t *valptr ) + const spm_complex64_t *valptr, + spm_complex64_t *A, + const spm_int_t lda ) { if ( mtxtype == SpmGeneral ) { - z_spm_2dense_elt_gen( A, lda, layout, row, dofi, col, dofj, __fct_id, valptr ); + z_spm_2dense_elt_gen( layout, row, dofi, col, dofj, __spm_zid, valptr, A, lda ); } else { - __conj_fct_t conjfct; + spm_zconj_fct_t conjfct; #if defined(PRECISION_c) || defined(PRECISION_z) if ( mtxtype == SpmHermitian ) { - conjfct = __fct_conj; + conjfct = __spm_zconj; } else #endif { - conjfct = __fct_id; + conjfct = __spm_zid; } if ( row == col ) { - z_spm_2dense_elt_sym_diag( A, lda, layout, row, dofi, col, dofj, conjfct, valptr ); + assert( dofi == dofj ); + z_spm_2dense_elt_sym_diag( row, dofi, conjfct, valptr, A, lda ); } else { - z_spm_2dense_elt_sym_offd( A, lda, layout, row, dofi, col, dofj, conjfct, valptr ); + z_spm_2dense_elt_sym_offd( layout, row, dofi, col, dofj, conjfct, valptr, A, lda ); } } } @@ -315,8 +274,9 @@ z_spmCSC2dense( const spmatrix_t *spm ) row = dofs[ig] - baseval; } - z_spm_2dense_elt( A, lda, spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr ); + z_spm_2dense_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + A, lda ); valptr += dofi * dofj; } } @@ -397,8 +357,9 @@ z_spmCSR2dense( const spmatrix_t *spm ) col = dofs[jg] - baseval; } - z_spm_2dense_elt( A, lda, spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr ); + z_spm_2dense_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + A, lda ); valptr += dofi * dofj; } } @@ -471,8 +432,9 @@ z_spmIJV2dense( const spmatrix_t *spm ) col = dofs[j] - baseval; } - z_spm_2dense_elt( A, lda, spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr ); + z_spm_2dense_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, + A, lda ); valptr += dofi * dofj; } diff --git a/src/z_spm_expand.c b/src/z_spm_expand.c index 00a4cdc395b835eadfd4c0c1b378221df985e85d..fe0af3a9879b1e89de62bab3fc4e3d6a4b55e688 100644 --- a/src/z_spm_expand.c +++ b/src/z_spm_expand.c @@ -20,7 +20,7 @@ static inline void spm_expand_loc2glob( const spmatrix_t *spm_in, spmatrix_t *spm_out ) { - spm_int_t i, j, ig, baseval, ndof; + spm_int_t i, j, ig, jg, baseval, ndof; spm_int_t *l2g_in = spm_in->loc2glob; spm_int_t *l2g_out = spm_out->loc2glob; @@ -33,25 +33,29 @@ spm_expand_loc2glob( const spmatrix_t *spm_in, spmatrix_t *spm_out ) for(i=0; i<spm_in->n; i++, l2g_in++) { ig = *l2g_in - baseval; - for(j=0; i<ndof; i++, l2g_out++) + jg = ig * ndof + baseval; + for(j=0; j<ndof; j++, l2g_out++) { - *l2g_out = ig * ndof + j + baseval; + *l2g_out = jg + j; } } } /* Variadic dof */ else { - spm_int_t *dofs = spm_in->dofs; + spm_int_t *dofs = spm_in->dofs - baseval; for(i=0; i<spm_in->n; i++, l2g_in++) { - ig = *l2g_in - baseval; + ig = *l2g_in; ndof = dofs[ig+1] - dofs[ig]; - for(j=0; i<ndof; i++, l2g_out++) + jg = dofs[ig]; + + for(j=0; j<ndof; j++, l2g_out++) { - *l2g_out = dofs[ig] + j; + *l2g_out = jg + j; } } } + assert( (l2g_out - spm_out->loc2glob) == spm_out->n ); } @@ -167,7 +171,6 @@ z_spmCSCExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) } else { dofj = dofs[jg+1] - dofs[jg]; - assert( col == (dofs[jg] - baseval) ); } for(jj=0; jj<dofj; jj++, col++, newcol++) @@ -331,7 +334,6 @@ z_spmCSRExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) } else { dofi = dofs[ig+1] - dofs[ig]; - assert( row == dofs[ig] - baseval ); } for(ii=0; ii<dofi; ii++, row++, newrow++) @@ -494,8 +496,8 @@ z_spmIJVExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) (i != j) || ((i == j) && (row+ii >= col+jj)) ) { - assert( row + ii < spm_out->n ); - assert( col + jj < spm_out->n ); + assert( row + ii < spm_out->gNexp ); + assert( col + jj < spm_out->gNexp ); (*newrow) = row + ii + baseval; (*newcol) = col + jj + baseval; newrow++; @@ -517,8 +519,8 @@ z_spmIJVExpand( const spmatrix_t *spm_in, spmatrix_t *spm_out ) (i != j) || ((i == j) && (row+ii >= col+jj)) ) { - assert( row + ii < spm_out->n ); - assert( col + jj < spm_out->n ); + assert( row + ii < spm_out->gNexp ); + assert( col + jj < spm_out->gNexp ); (*newrow) = row + ii + baseval; (*newcol) = col + jj + baseval; newrow++; diff --git a/src/z_spm_norm.c b/src/z_spm_norm.c index 861a4f536b1042f44c139ad9699f2c577387db01..29e9ccfd5e7531539a0fd014779d079fd18c3151 100644 --- a/src/z_spm_norm.c +++ b/src/z_spm_norm.c @@ -386,7 +386,7 @@ z_spmOneNorm( const spmatrix_t *spm ) * *******************************************************************************/ double -z_spmNorm( int ntype, +z_spmNorm( spm_normtype_t ntype, const spmatrix_t *spm ) { double norm = 0.; diff --git a/src/z_spm_print.c b/src/z_spm_print.c index 8cfd51d803dc4c72fe196d01b89289a26d3b8ef9..12c647cbd3100e5604bdc48bb8706ce098a75302 100644 --- a/src/z_spm_print.c +++ b/src/z_spm_print.c @@ -19,32 +19,26 @@ #include "common.h" #include "z_spm.h" -typedef spm_complex64_t (*__conj_fct_t)( spm_complex64_t ); - -static inline spm_complex64_t -__fct_id( spm_complex64_t val ) { - return val; -} - -#if defined(PRECISION_c) || defined(PRECISION_z) -static inline spm_complex64_t -__fct_conj( spm_complex64_t val ) { - return conj( val ); -} -#endif - +/** + * @brief Print a diagonal element within a symmetric/hermitian + * matrix with column/row major storage + * + * Note that column major is using the low triangular part only of the diagonal + * element matrices, and row major, by symmetry, is using only the upper + * triangular part. + * + * The comments in the code are made for column major storage. + */ static inline void -z_spm_print_elt_sym_diag_col( FILE *f, - const spm_int_t row, - const spm_int_t dofi, - const spm_int_t col, - const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) +z_spm_print_elt_sym_diag( const spm_int_t row, + const spm_int_t dofi, + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + FILE *f ) { spm_int_t ii, jj; - for(jj=0; jj<dofj; jj++) + for(jj=0; jj<dofi; jj++) { /* Skip unused upper triangular part */ for(ii=0; ii<jj; ii++) { @@ -52,80 +46,31 @@ z_spm_print_elt_sym_diag_col( FILE *f, } /* Diagonal element */ - z_spmPrintElt( f, row + ii, col + jj, *valptr ); + z_spmPrintElt( f, row + ii, row + jj, *valptr ); valptr++; for(ii=jj+1; ii<dofi; ii++, valptr++) { /* Lower part */ - z_spmPrintElt( f, row + ii, col + jj, *valptr ); + z_spmPrintElt( f, row + ii, row + jj, *valptr ); /* Upper part */ - z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); + z_spmPrintElt( f, row + jj, row + ii, conjfct(*valptr) ); } } (void)conjfct; } +/** + * @brief Print a general element matrix with column major storage + */ static inline void -z_spm_print_elt_sym_diag_row( FILE *f, - const spm_int_t row, - const spm_int_t dofi, - const spm_int_t col, - const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) -{ - spm_int_t ii, jj; - - for(ii=0; ii<dofi; ii++) - { - for(jj=0; jj<ii; jj++, valptr++) - { - /* Lower part */ - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - /* Upper part */ - z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); - } - - /* Diagonal element */ - z_spmPrintElt( f, row + ii, col + jj, *valptr ); - valptr++; - - /* Skip unused upper triangular part */ - for(jj=ii+1; jj<dofj; jj++) { - valptr++; - } - } - (void)conjfct; -} - -static inline void -z_spm_print_elt_sym_diag( FILE *f, - const spm_layout_t layout, - const spm_int_t row, - const spm_int_t dofi, - const spm_int_t col, - const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) -{ - if ( layout == SpmColMajor ) { - z_spm_print_elt_sym_diag_col( f, row, dofi, col, dofj, conjfct, valptr ); - } - else { - z_spm_print_elt_sym_diag_row( f, row, dofi, col, dofj, conjfct, valptr ); - } - (void)conjfct; -} - -static inline void -z_spm_print_elt_gen_col( FILE *f, - const spm_int_t row, +z_spm_print_elt_gen_col( const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + FILE *f ) { spm_int_t ii, jj; @@ -133,20 +78,23 @@ z_spm_print_elt_gen_col( FILE *f, { for(ii=0; ii<dofi; ii++, valptr++) { - z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); + z_spmPrintElt( f, row + ii, col + jj, conjfct(*valptr) ); } } (void)conjfct; } +/** + * @brief Print a general element matrix with row major storage + */ static inline void -z_spm_print_elt_gen_row( FILE *f, - const spm_int_t row, +z_spm_print_elt_gen_row( const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + FILE *f ) { spm_int_t ii, jj; @@ -154,81 +102,96 @@ z_spm_print_elt_gen_row( FILE *f, { for(jj=0; jj<dofj; jj++, valptr++) { - z_spmPrintElt( f, col + jj, row + ii, conjfct(*valptr) ); + z_spmPrintElt( f, row + ii, col + jj, conjfct(*valptr) ); } } (void)conjfct; } +/** + * @brief Print a general element matrix + */ static inline void -z_spm_print_elt_gen( FILE *f, - const spm_layout_t layout, +z_spm_print_elt_gen( const spm_layout_t layout, const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + FILE *f ) { if ( layout == SpmColMajor ) { - z_spm_print_elt_gen_col( f, row, dofi, col, dofj, conjfct, valptr ); + z_spm_print_elt_gen_col( row, dofi, col, dofj, conjfct, valptr, f ); } else { - z_spm_print_elt_gen_row( f, row, dofi, col, dofj, conjfct, valptr ); + z_spm_print_elt_gen_row( row, dofi, col, dofj, conjfct, valptr, f ); } } +/** + * @brief Print an off-diagonal element matrix in the symmetric/hermitian case + */ static inline void -z_spm_print_elt_sym_offd( FILE *f, - const spm_layout_t layout, +z_spm_print_elt_sym_offd( const spm_layout_t layout, const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const __conj_fct_t conjfct, - const spm_complex64_t *valptr ) + const spm_zconj_fct_t conjfct, + const spm_complex64_t *valptr, + FILE *f ) { if ( layout == SpmColMajor ) { - z_spm_print_elt_gen_col( f, row, dofi, col, dofj, __fct_id, valptr ); - z_spm_print_elt_gen_row( f, col, dofj, row, dofi, conjfct, valptr ); + /* A[ row, col ] */ + z_spm_print_elt_gen_col( row, dofi, col, dofj, __spm_zid, valptr, f ); + /* + * A[ col, row ] = conj( A[ row, col ]^t ) + * => Let's exploit the row major kernel to make it transpose + */ + z_spm_print_elt_gen_row( col, dofj, row, dofi, conjfct, valptr, f ); } else { - z_spm_print_elt_gen_row( f, row, dofi, col, dofj, __fct_id, valptr ); - z_spm_print_elt_gen_col( f, col, dofj, row, dofi, conjfct, valptr ); + z_spm_print_elt_gen_row( row, dofi, col, dofj, __spm_zid, valptr, f ); + z_spm_print_elt_gen_col( col, dofj, row, dofi, conjfct, valptr, f ); } } +/** + * @brief Print an element matrix + */ static inline void -z_spm_print_elt( FILE *f, - const spm_mtxtype_t mtxtype, +z_spm_print_elt( const spm_mtxtype_t mtxtype, const spm_layout_t layout, const spm_int_t row, const spm_int_t dofi, const spm_int_t col, const spm_int_t dofj, - const spm_complex64_t *valptr ) + const spm_complex64_t *valptr, + FILE *f ) { if ( mtxtype == SpmGeneral ) { - z_spm_print_elt_gen( f, layout, row, dofi, col, dofj, __fct_id, valptr ); + z_spm_print_elt_gen( layout, row, dofi, col, dofj, __spm_zid, valptr, f ); } else { - __conj_fct_t conjfct; + spm_zconj_fct_t conjfct; #if defined(PRECISION_c) || defined(PRECISION_z) if ( mtxtype == SpmHermitian ) { - conjfct = __fct_conj; + conjfct = __spm_zconj; } else #endif { - conjfct = __fct_id; + conjfct = __spm_zid; } if ( row == col ) { - z_spm_print_elt_sym_diag( f, layout, row, dofi, col, dofj, conjfct, valptr ); + assert( dofi == dofj ); + z_spm_print_elt_sym_diag( row, dofi, conjfct, valptr, f ); } else { - z_spm_print_elt_sym_offd( f, layout, row, dofi, col, dofj, conjfct, valptr ); + z_spm_print_elt_sym_offd( layout, row, dofi, col, dofj, conjfct, valptr, f ); } } } @@ -250,8 +213,8 @@ z_spm_print_elt( FILE *f, * *******************************************************************************/ void -z_spmCSCPrint( FILE *f, - const spmatrix_t *spm ) +z_spmCSCPrint( FILE *f, + const spmatrix_t *spm ) { spm_int_t j, k, baseval; spm_int_t ig, dofi, row; @@ -294,8 +257,8 @@ z_spmCSCPrint( FILE *f, row = dofs[ig] - baseval; } - z_spm_print_elt( f, spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr ); + z_spm_print_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, f ); valptr += dofi * dofj; } } @@ -319,7 +282,8 @@ z_spmCSCPrint( FILE *f, * *******************************************************************************/ void -z_spmCSRPrint( FILE *f, const spmatrix_t *spm ) +z_spmCSRPrint( FILE *f, + const spmatrix_t *spm ) { spm_int_t i, k, baseval; spm_int_t ig, dofi, row; @@ -365,8 +329,8 @@ z_spmCSRPrint( FILE *f, const spmatrix_t *spm ) col = dofs[jg] - baseval; } - z_spm_print_elt( f, spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr ); + z_spm_print_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, f ); valptr += dofi * dofj; } } @@ -391,7 +355,8 @@ z_spmCSRPrint( FILE *f, const spmatrix_t *spm ) * *******************************************************************************/ void -z_spmIJVPrint( FILE *f, const spmatrix_t *spm ) +z_spmIJVPrint( FILE *f, + const spmatrix_t *spm ) { spm_int_t k, baseval; spm_int_t i, dofi, row; @@ -429,8 +394,8 @@ z_spmIJVPrint( FILE *f, const spmatrix_t *spm ) col = dofs[j] - baseval; } - z_spm_print_elt( f, spm->mtxtype, spm->layout, - row, dofi, col, dofj, valptr ); + z_spm_print_elt( spm->mtxtype, spm->layout, + row, dofi, col, dofj, valptr, f ); valptr += dofi * dofj; } return; diff --git a/tests/spm_compare.c b/tests/spm_compare.c index 6ef899ef1ada2b8a821f050889d6c1dd12ffc326..d16adad322d211742633ae52c8f427b121da4db7 100644 --- a/tests/spm_compare.c +++ b/tests/spm_compare.c @@ -13,9 +13,11 @@ **/ #include "spm_tests.h" -char* fltnames[] = { "Pattern", "", "Float", "Double", "Complex32", "Complex64" }; -char* fmtnames[] = { "CSC", "CSR", "IJV" }; -char* mtxnames[] = { "General", "Symmetric", "Hermitian" }; +const char* fltnames[] = { "Pattern", "", "Float", "Double", "Complex32", "Complex64" }; +const char* fmtnames[] = { "CSC", "CSR", "IJV" }; +const char* mtxnames[] = { "General", "Symmetric", "Hermitian" }; +const char *dofname[] = { "None", "Constant", "Variadic" }; +const char* transnames[] = { "NoTrans", "Trans", "ConjTrans" }; static inline int spmCompareFloatArray( spm_int_t n, diff --git a/tests/spm_dof_expand_tests.c b/tests/spm_dof_expand_tests.c index 61f600ed7ef2fcfbfa4a5c7dc10f42bdacc8b1c2..d49612e0731dbf289800448f7851c736b6a90aeb 100644 --- a/tests/spm_dof_expand_tests.c +++ b/tests/spm_dof_expand_tests.c @@ -34,10 +34,6 @@ printf("SUCCESS\n"); \ } -char* fltnames[] = { "Pattern", "", "Float", "Double", "Complex32", "Complex64" }; -char* fmtnames[] = { "CSC", "CSR", "IJV" }; -char* mtxnames[] = { "General", "Symmetric", "Hermitian" }; - int main (int argc, char **argv) { spmatrix_t original, *spm; diff --git a/tests/spm_dof_matvec_tests.c b/tests/spm_dof_matvec_tests.c index b0bde26ccf17bbdb72094b15c4affeb88cae6778..5841ce0452096361e688e63223fce4c9a79eda99 100644 --- a/tests/spm_dof_matvec_tests.c +++ b/tests/spm_dof_matvec_tests.c @@ -32,10 +32,6 @@ printf("SUCCESS\n"); \ } -char* fltnames[] = { "Pattern", "", "Float", "Double", "Complex32", "Complex64" }; -char* fmtnames[] = { "CSC", "CSR", "IJV" }; -char* mtxnames[] = { "General", "Symmetric", "Hermitian" }; - int main (int argc, char **argv) { spmatrix_t original, *spm; diff --git a/tests/spm_dof_norm_tests.c b/tests/spm_dof_norm_tests.c index e0303723397aab30de8c11efbcb2948d28d065aa..8383fb326f1a5f181961c1c9b31a73df7247c183 100644 --- a/tests/spm_dof_norm_tests.c +++ b/tests/spm_dof_norm_tests.c @@ -22,11 +22,6 @@ #include <time.h> #include <spm_tests.h> -int z_spm_norm_check( const spmatrix_t *spm ); -int c_spm_norm_check( const spmatrix_t *spm ); -int d_spm_norm_check( const spmatrix_t *spm ); -int s_spm_norm_check( const spmatrix_t *spm ); - #define PRINT_RES(_ret_) \ if(_ret_) { \ printf("FAILED(%d)\n", _ret_); \ @@ -36,10 +31,6 @@ int s_spm_norm_check( const spmatrix_t *spm ); printf("SUCCESS\n"); \ } -char* fltnames[] = { "Pattern", "", "Float", "Double", "Complex32", "Complex64" }; -char* fmtnames[] = { "CSC", "CSR", "IJV" }; -char* mtxnames[] = { "General", "Symmetric", "Hermitian" }; - int main (int argc, char **argv) { spmatrix_t original, *spm; @@ -108,10 +99,12 @@ int main (int argc, char **argv) continue; } - printf(" Case: %d / %s / %s / %d / %s\n", - i, fltnames[spm->flttype], - fmtnames[spm->fmttype], baseval, - mtxnames[mtxtype - SpmGeneral] ); + printf( " Case: %s / %s / %s / %d / %s\n", + fltnames[spm->flttype], + dofname[i+1], + mtxnames[mtxtype - SpmGeneral], + baseval, + fmtnames[spm->fmttype] ); switch( spm->flttype ){ case SpmComplex64: diff --git a/tests/spm_matvec_tests.c b/tests/spm_matvec_tests.c index 61740c69f076ab6639feb504dcf2c620ca6b84f6..84108b9b88bc2b2ff7ddb93c87bd424c2caa5c55 100644 --- a/tests/spm_matvec_tests.c +++ b/tests/spm_matvec_tests.c @@ -31,11 +31,6 @@ printf("SUCCESS\n"); \ } -char* fltnames[] = { "Pattern", "", "Float", "Double", "Complex32", "Complex64" }; -char* transnames[] = { "NoTrans", "Trans", "ConjTrans" }; -char* mtxnames[] = { "General", "Symmetric", "Hermitian" }; -char* fmtnames[] = { "CSC", "CSR", "IJV" }; - int main (int argc, char **argv) { spmatrix_t spm; diff --git a/tests/spm_norm_tests.c b/tests/spm_norm_tests.c index 4e49a15adfe33fefbd1694f93642f409e837a768..415239044417931b1379d1f0300717b3911e95eb 100644 --- a/tests/spm_norm_tests.c +++ b/tests/spm_norm_tests.c @@ -22,11 +22,6 @@ #include <time.h> #include <spm_tests.h> -int z_spm_norm_check( const spmatrix_t *spm ); -int c_spm_norm_check( const spmatrix_t *spm ); -int d_spm_norm_check( const spmatrix_t *spm ); -int s_spm_norm_check( const spmatrix_t *spm ); - #define PRINT_RES(_ret_) \ if(_ret_) { \ printf("FAILED(%d)\n", _ret_); \ @@ -36,10 +31,6 @@ int s_spm_norm_check( const spmatrix_t *spm ); printf("SUCCESS\n"); \ } -char* fltnames[] = { "Pattern", "", "Float", "Double", "Complex32", "Complex64" }; -char* fmtnames[] = { "CSC", "CSR", "IJV" }; -char* mtxnames[] = { "General", "Symmetric", "Hermitian" }; - int main (int argc, char **argv) { spmatrix_t spm; @@ -101,7 +92,8 @@ int main (int argc, char **argv) printf(" Case: %s / %s / %d / %s\n", fltnames[spm.flttype], - fmtnames[spm.fmttype], baseval, + fmtnames[spm.fmttype], + baseval, mtxnames[mtxtype - SpmGeneral] ); switch( spm.flttype ){ diff --git a/tests/spm_scatter_gather_tests.c b/tests/spm_scatter_gather_tests.c index f9c4ec407a060c0fcd939263cc21d882555269d9..0678923a495345abfe8d4f249bcd5b309f90bc28 100644 --- a/tests/spm_scatter_gather_tests.c +++ b/tests/spm_scatter_gather_tests.c @@ -75,7 +75,6 @@ spmdist_check_scatter_gather( spmatrix_t *original, int root, int clustnum ) { - const char *dofname[] = { "None", "Constant", "Variadic" }; const char *distname[] = { "Round-Robin", "Continuous "}; spmatrix_t *spms = NULL; spmatrix_t *spmg = NULL; diff --git a/tests/spm_tests.h b/tests/spm_tests.h index c7610f53c775764efd81fdb66868ca983162bf0e..011be821e8693576a0e7fb83a57073aa9c10616f 100644 --- a/tests/spm_tests.h +++ b/tests/spm_tests.h @@ -20,9 +20,11 @@ #include <stdio.h> #include <spm.h> -extern char* fltnames[]; -extern char* fmtnames[]; -extern char* mtxnames[]; +extern const char* fltnames[]; +extern const char* fmtnames[]; +extern const char* mtxnames[]; +extern const char *dofname[]; +extern const char* transnames[]; void spmGetOptions( int argc, char **argv, spm_driver_t *driver, char **filename );