diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d09034c24b9f7a27397c9e23ad5a7e90a70bae7..fdcd443e09423452246d517baaca8961403809ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,18 +114,16 @@ include(CheckSystem) # SPM depends on Lapacke and CBLAS #--------------------------------- -find_package(LAPACKE REQUIRED) -if(LAPACKE_FOUND) - message(STATUS "lapacke: ${LAPACKE_INCLUDE_DIRS}") - include_directories(${LAPACKE_INCLUDE_DIRS}) -endif() - -find_package(CBLAS REQUIRED) +find_package(CBLAS) # Should be REQUIRED for BLAS sequential only if(CBLAS_FOUND) - message(STATUS "cblas: ${CBLAS_INCLUDE_DIRS}") include_directories(${CBLAS_INCLUDE_DIRS}) endif() +find_package(LAPACKE) # Should be also REQUIRED +if(LAPACKE_FOUND) + include_directories(${LAPACKE_INCLUDE_DIRS}) +endif() + ### Store dependencies not handled with pkg-config set( DEPS_LIBRARIES ${LAPACKE_LIBRARIES_DEP} diff --git a/src/drivers/laplacian.c b/src/drivers/laplacian.c index e5edc26eea1e079867d4c971b5c9d4d808bff85f..22a94af91515604a1df365eb8d5a47348741dbf5 100644 --- a/src/drivers/laplacian.c +++ b/src/drivers/laplacian.c @@ -220,24 +220,14 @@ static void (*laplacian_7points[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_ z_spmLaplacian_7points }; -static void (*extended_laplacian_table2D[6])(spmatrix_t *, spm_int_t, spm_int_t) = +static void (*laplacian_27points[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_t, spm_fixdbl_t, spm_fixdbl_t) = { - p_spmExtendedLaplacian2D, + p_spmLaplacian_27points, NULL, - s_spmExtendedLaplacian2D, - d_spmExtendedLaplacian2D, - c_spmExtendedLaplacian2D, - z_spmExtendedLaplacian2D -}; - -static void (*extended_laplacian_table3D[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_t) = -{ - p_spmExtendedLaplacian3D, - NULL, - s_spmExtendedLaplacian3D, - d_spmExtendedLaplacian3D, - c_spmExtendedLaplacian3D, - z_spmExtendedLaplacian3D + s_spmLaplacian_27points, + d_spmLaplacian_27points, + c_spmLaplacian_27points, + z_spmLaplacian_27points }; /** @@ -343,12 +333,7 @@ genExtendedLaplacian( const char *filename, spm->flttype = flttype; spm->n = dim1 * dim2 * dim3; - if( dim3 > 0 ) { - extended_laplacian_table3D[spm->flttype](spm, dim1, dim2, dim3); - } - else if (dim2 > 0) { - extended_laplacian_table2D[spm->flttype](spm, dim1, dim2); - } + laplacian_27points[spm->flttype](spm, dim1, dim2, dim3, alpha, beta); return SPM_SUCCESS; } diff --git a/src/drivers/laplacian.h b/src/drivers/laplacian.h index 2954365eaadb9468dc1ab9162eceb5c8d7f4886d..cd2ef45577bc5f76f95fe6025655868a9b995002 100644 --- a/src/drivers/laplacian.h +++ b/src/drivers/laplacian.h @@ -20,16 +20,10 @@ void d_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, sp void s_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); void p_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); -void z_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void c_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void d_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void s_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void p_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); - -void z_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void c_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void d_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void s_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void p_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); +void z_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void c_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void d_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void s_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void p_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); #endif /* _laplacian_h_ */ diff --git a/src/z_spm_laplacian.c b/src/z_spm_laplacian.c index c191ef409f6db5e63a480141547d675417f9ba08..e2f209a7c5d04d884522afdd4dc7ff9ee088c6e4 100644 --- a/src/z_spm_laplacian.c +++ b/src/z_spm_laplacian.c @@ -22,7 +22,7 @@ * * @ingroup spm_dev_driver * - * @brief Generate a laplacian matrix for a 7-points stencil + * @brief Generate a laplacian matrix for a 3D 7-points stencil * \f[ M = \alpha * D - \beta * A \f] * * Example: @@ -57,6 +57,10 @@ * @param[in] beta * The beta coefficient for the adjacency matrix * + * @remark: In complex, the Laplacian is set to hermitian. See + * z_spmLaplacian_27points() to get a symmetric Laplacian, or change the + * mtxtype field by hand. + * *******************************************************************************/ void z_spmLaplacian_7points( spmatrix_t *spm, @@ -68,8 +72,10 @@ z_spmLaplacian_7points( spmatrix_t *spm, { spm_complex64_t *valptr; + spm_complex64_t lalpha = (spm_complex64_t)alpha; + spm_complex64_t lbeta = (spm_complex64_t)beta; spm_int_t *colptr, *rowptr; - spm_int_t i, j, k, l; + spm_int_t i, j, k, l, degree; spm_int_t nnz = (2*(dim1)-1)*dim2*dim3 + (dim2-1)*dim1*dim3 + dim2*dim1*(dim3-1); spm->mtxtype = SpmHermitian; @@ -110,19 +116,27 @@ z_spmLaplacian_7points( spmatrix_t *spm, /* Diagonal value */ *rowptr = l; #if !defined(PRECISION_p) - *valptr = 6. * alpha; - if (k == 1) - *valptr -= alpha; - if (k == dim1) - *valptr -= alpha; - if (j == 1) - *valptr -= alpha; - if (j == dim2) - *valptr -= alpha; - if (i == 1) - *valptr -= alpha; - if (i == dim3) - *valptr -= alpha; + degree = 0; + if (k > 1) { + degree++; + } + if (k < dim1) { + degree++; + } + if (j > 1) { + degree++; + } + if (j < dim2) { + degree++; + } + if (i > 1) { + degree++; + } + if (i < dim3) { + degree++; + } + + *valptr = (spm_complex64_t)degree * lalpha; #endif valptr++; rowptr++; colptr[1]++; @@ -130,7 +144,7 @@ z_spmLaplacian_7points( spmatrix_t *spm, if (k < dim1) { *rowptr = l+1; #if !defined(PRECISION_p) - *valptr = -beta; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -139,7 +153,7 @@ z_spmLaplacian_7points( spmatrix_t *spm, if (j < dim2) { *rowptr = l+dim1; #if !defined(PRECISION_p) - *valptr = -beta; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -148,7 +162,7 @@ z_spmLaplacian_7points( spmatrix_t *spm, if (i < dim3) { *rowptr = l+dim1*dim2; #if !defined(PRECISION_p) - *valptr = -beta; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -159,7 +173,8 @@ z_spmLaplacian_7points( spmatrix_t *spm, } assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz ); - (void)alpha; (void)beta; + (void)lalpha; (void)lbeta; + (void)degree; } /** @@ -167,130 +182,30 @@ z_spmLaplacian_7points( spmatrix_t *spm, * * @ingroup spm_dev_driver * - * z_spmExtendedLaplacian2D - Generate a 2D extended laplacian matrix. - * - ******************************************************************************* - * - * @param[inout] spm - * At start, an allocated spm structure. - * Contains the size of the laplacian in spm->n. - * At exit, contains the matrix in csc format. - * - * @param[in] dim1 - * contains the first dimension of the 2D grid of the laplacian. - * - * @param[in] dim2 - * contains the second dimension of the 2D grid of the laplacian. - * - *******************************************************************************/ -void -z_spmExtendedLaplacian2D( spmatrix_t *spm, - spm_int_t dim1, - spm_int_t dim2 ) -{ - spm_complex64_t *valptr; - spm_int_t *colptr, *rowptr; - spm_int_t i, j, k; - spm_int_t nnz = (2*(dim1)-1)*dim2 + (dim2-1)*(3*dim1-2); - - spm->mtxtype = SpmSymmetric; - spm->flttype = SpmComplex64; - spm->fmttype = SpmCSC; - spm->nnz = nnz; - spm->dof = 1; - - assert( spm->n == dim1*dim2 ); - - /* Allocating */ - spm->colptr = malloc((spm->n+1)*sizeof(spm_int_t)); - spm->rowptr = malloc(nnz *sizeof(spm_int_t)); - assert( spm->colptr ); - assert( spm->rowptr ); - -#if !defined(PRECISION_p) - spm->values = malloc(nnz *sizeof(spm_complex64_t)); - assert( spm->values ); -#endif - - /* Building ia, ja and values*/ - colptr = spm->colptr; - rowptr = spm->rowptr; - valptr = (spm_complex64_t*)(spm->values); - - /* Building ia, ja and values*/ - *colptr = 1; - k = 1; /* Column index in the matrix ((i-1) * dim1 + j-1) */ - for(i=1; i<=dim2; i++) - { - for(j=1; j<=dim1; j++) - { - colptr[1] = colptr[0]; - - /* Diagonal value */ - *rowptr = k; -#if !defined(PRECISION_p) - if ( (j == dim1 || j == 1) && (i == dim2 || i == 1) ) - *valptr = (spm_complex64_t) 2.5; - else if (j == dim1 || j == 1 || i == dim2 || i == 1) - *valptr = (spm_complex64_t) 4.; - else - *valptr = (spm_complex64_t) 6.; -#endif - valptr++; rowptr++; colptr[1]++; - - /* Connexion along dimension 1 */ - if (j < dim1) { - *rowptr = k+1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; -#endif - valptr++; rowptr++; colptr[1]++; - } - - /* Connexion along dimension 2 */ - if (i < dim2) - { - if (j > 1) - { - *rowptr = k+dim1-1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; -#endif - valptr++; rowptr++; colptr[1]++; - - } - - *rowptr = k+dim1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; -#endif - valptr++; rowptr++; colptr[1]++; - - if (j < dim1) - { - *rowptr = k+dim1+1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; -#endif - valptr++; rowptr++; colptr[1]++; - - } - } - - colptr++; k++; - } - } - - assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz ); -} - -/** - ******************************************************************************* + * @brief Generate an extended laplacian matrix for a 3D 27-points stencil with + * \f[ M = \alpha * D - \beta * A \f], where D is the matrix of degrees, and A + * the matrix of adjacency with coefficients of 1 for B connexions, 1/ sqrt(2) + * for X connexions, and 1/sqrt(3) for D connexions. * - * @ingroup spm_dev_driver + * D-------X-------D + * /| /| /| + * X-------B-------X | + * /| | /| | /| | + * D-------X-|-----X | | + * | | X---|-|-B---|-|-X + * | |/| | |/ | |/| + * | B-----|-A-----|-B | + * |/| | |/| |/| | + * X-------B-------X | | + * | | D---|-|-X---|-|-D + * | |/ | |/ | |/ + * | X-----|-B-----|-X + * |/ |/ |/ + * D-------X-------D * - * @brief Generate an extended laplacian matrix for a 27-points stencil with - * \f[ M = \alpha * D - \beta * A \f] + * @remark: In complex, the Laplacian is set to symmetric. See + * z_spmLaplacian_7points() to get an hermitian Laplacian, or change the + * mtxtype field by hand. * ******************************************************************************* * @@ -308,20 +223,38 @@ z_spmExtendedLaplacian2D( spmatrix_t *spm, * @param[in] dim3 * contains the third dimension of the grid of the laplacian. * + * @param[in] alpha + * The alpha coefficient for the degree matrix + * + * @param[in] beta + * The beta coefficient for the adjacency matrix + * *******************************************************************************/ void -z_spmExtendedLaplacian3D( spmatrix_t *spm, - spm_int_t dim1, - spm_int_t dim2, - spm_int_t dim3 ) +z_spmLaplacian_27points( spmatrix_t *spm, + spm_int_t dim1, + spm_int_t dim2, + spm_int_t dim3, + spm_fixdbl_t alpha, + spm_fixdbl_t beta ) { spm_complex64_t *valptr; + /* + * See https://crd.lbl.gov/assets/pubs_presos/iwapt09-27pt.pdf for the + * meaning of alpha, beta, gamma, and delta. + * "Auto-tuning the 27-point Stencil for Multicore", K. Datta, S. Williams, + * V. Volkov, J. Carter, L. Oliker, J. Shalf, and K. Yelick + */ + spm_complex64_t lalpha = (spm_complex64_t)alpha; + spm_complex64_t lbeta = (spm_complex64_t)beta; + spm_complex64_t lgamma = (spm_complex64_t)beta / sqrt(2); + spm_complex64_t ldelta = (spm_complex64_t)beta / sqrt(3); spm_int_t *colptr, *rowptr; - spm_int_t i, j, k, l; - spm_int_t nnz = (2*dim1-1) * dim2 * dim3 - + (3*dim1-2) * (dim2-1) * dim3 - + ((3*dim1-2) * dim2 + 2 * (3*dim1-2) *(dim2-1)) * (dim3-1); + spm_int_t i, j, k, l, degree, d; + spm_int_t nnz = (2*dim1-1) * dim2 * dim3 + + (3*dim1-2) * (dim2-1) * dim3 + + ((3*dim1-2) * dim2 + 2 * (3*dim1-2) *(dim2-1)) * (dim3-1); spm->mtxtype = SpmSymmetric; spm->flttype = SpmComplex64; @@ -361,22 +294,36 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, /* Diagonal value */ *rowptr = l; #if !defined(PRECISION_p) - if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 4.75; - else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 10.; - else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 10.; - else if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) ) - *valptr = (spm_complex64_t) 10.; - else if ( (j != dim2 || j != 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 7.; - else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k != dim1 || i != 1) ) - *valptr = (spm_complex64_t) 7.; - else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) ) - *valptr = (spm_complex64_t) 7.; - else - *valptr = (spm_complex64_t) 14.; + degree = 1; + d = 1; + + if (k > 1) { + d++; + } + if (k < dim1) { + d++; + } + degree = degree * d; + d = 1; + + if (j > 1) { + d++; + } + if (j < dim2) { + d++; + } + degree = degree * d; + d = 1; + + if (i > 1) { + d++; + } + if (i < dim3) { + d++; + } + degree = degree * d - 1; + + *valptr = (spm_complex64_t)degree * lalpha; #endif valptr++; rowptr++; colptr[1]++; @@ -384,7 +331,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, if (k < dim1) { *rowptr = l+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -396,7 +343,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -404,7 +351,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; @@ -412,7 +359,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -427,7 +374,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2-dim1-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -435,7 +382,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1*dim2-dim1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -443,7 +390,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2-dim1+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -453,7 +400,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -461,7 +408,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1*dim2; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; @@ -469,7 +416,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -481,7 +428,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2+dim1-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -489,7 +436,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1*dim2+dim1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -497,7 +444,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2+dim1+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -511,4 +458,6 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, } assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz ); + (void)lalpha; (void)lbeta; (void)lgamma; (void)ldelta; + (void)degree; (void)d; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 608ab0ab7a78604831546e6f8c8741a141445252..6b83ca5f00d1e3184416ac31053e56b327eb86bd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -62,7 +62,8 @@ set( SPM_DOF_TESTS # Laplacian foreach(example ${SPM_TESTS} ${SPM_DOF_TESTS} ) foreach(arithm ${RP_SPM_PRECISIONS} ) - add_test(test_lap_${arithm}_${example} ./${example} --lap ${arithm}:10:10:10) + add_test(test_lap_${arithm}_${example} ./${example} --lap ${arithm}:10:10:10:10.:2.) + add_test(test_xlap_${arithm}_${example} ./${example} --xlap ${arithm}:6:10:12:5.:0.33) endforeach() endforeach()