From 1ddfc2fa76b0794f5baaa6bc15f0014e1a9a9d3c Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Mon, 7 May 2018 16:35:46 +0200 Subject: [PATCH] Factorize the laplacian code for the extended version and alpha/beta to the extended stencil --- src/drivers/laplacian.c | 29 +--- src/drivers/laplacian.h | 16 +-- src/z_spm_laplacian.c | 289 +++++++++++++++------------------------- 3 files changed, 122 insertions(+), 212 deletions(-) diff --git a/src/drivers/laplacian.c b/src/drivers/laplacian.c index e5edc26e..22a94af9 100644 --- a/src/drivers/laplacian.c +++ b/src/drivers/laplacian.c @@ -220,24 +220,14 @@ static void (*laplacian_7points[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_ z_spmLaplacian_7points }; -static void (*extended_laplacian_table2D[6])(spmatrix_t *, spm_int_t, spm_int_t) = +static void (*laplacian_27points[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_t, spm_fixdbl_t, spm_fixdbl_t) = { - p_spmExtendedLaplacian2D, + p_spmLaplacian_27points, NULL, - s_spmExtendedLaplacian2D, - d_spmExtendedLaplacian2D, - c_spmExtendedLaplacian2D, - z_spmExtendedLaplacian2D -}; - -static void (*extended_laplacian_table3D[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_t) = -{ - p_spmExtendedLaplacian3D, - NULL, - s_spmExtendedLaplacian3D, - d_spmExtendedLaplacian3D, - c_spmExtendedLaplacian3D, - z_spmExtendedLaplacian3D + s_spmLaplacian_27points, + d_spmLaplacian_27points, + c_spmLaplacian_27points, + z_spmLaplacian_27points }; /** @@ -343,12 +333,7 @@ genExtendedLaplacian( const char *filename, spm->flttype = flttype; spm->n = dim1 * dim2 * dim3; - if( dim3 > 0 ) { - extended_laplacian_table3D[spm->flttype](spm, dim1, dim2, dim3); - } - else if (dim2 > 0) { - extended_laplacian_table2D[spm->flttype](spm, dim1, dim2); - } + laplacian_27points[spm->flttype](spm, dim1, dim2, dim3, alpha, beta); return SPM_SUCCESS; } diff --git a/src/drivers/laplacian.h b/src/drivers/laplacian.h index 2954365e..cd2ef455 100644 --- a/src/drivers/laplacian.h +++ b/src/drivers/laplacian.h @@ -20,16 +20,10 @@ void d_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, sp void s_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); void p_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); -void z_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void c_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void d_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void s_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); -void p_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 ); - -void z_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void c_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void d_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void s_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); -void p_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 ); +void z_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void c_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void d_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void s_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); +void p_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta ); #endif /* _laplacian_h_ */ diff --git a/src/z_spm_laplacian.c b/src/z_spm_laplacian.c index c191ef40..2e43a230 100644 --- a/src/z_spm_laplacian.c +++ b/src/z_spm_laplacian.c @@ -22,7 +22,7 @@ * * @ingroup spm_dev_driver * - * @brief Generate a laplacian matrix for a 7-points stencil + * @brief Generate a laplacian matrix for a 3D 7-points stencil * \f[ M = \alpha * D - \beta * A \f] * * Example: @@ -57,6 +57,10 @@ * @param[in] beta * The beta coefficient for the adjacency matrix * + * @remark: In complex, the Laplacian is set to hermitian. See + * z_spmLaplacian_27points() to get a symmetric Laplacian, or change the + * mtxtype field by hand. + * *******************************************************************************/ void z_spmLaplacian_7points( spmatrix_t *spm, @@ -68,8 +72,10 @@ z_spmLaplacian_7points( spmatrix_t *spm, { spm_complex64_t *valptr; + spm_complex64_t lalpha = (spm_complex64_t)alpha; + spm_complex64_t lbeta = (spm_complex64_t)beta; spm_int_t *colptr, *rowptr; - spm_int_t i, j, k, l; + spm_int_t i, j, k, l, degree; spm_int_t nnz = (2*(dim1)-1)*dim2*dim3 + (dim2-1)*dim1*dim3 + dim2*dim1*(dim3-1); spm->mtxtype = SpmHermitian; @@ -110,19 +116,27 @@ z_spmLaplacian_7points( spmatrix_t *spm, /* Diagonal value */ *rowptr = l; #if !defined(PRECISION_p) - *valptr = 6. * alpha; - if (k == 1) - *valptr -= alpha; - if (k == dim1) - *valptr -= alpha; - if (j == 1) - *valptr -= alpha; - if (j == dim2) - *valptr -= alpha; - if (i == 1) - *valptr -= alpha; - if (i == dim3) - *valptr -= alpha; + degree = 0; + if (k > 1) { + degree++; + } + if (k < dim1) { + degree++; + } + if (j > 1) { + degree++; + } + if (j < dim2) { + degree++; + } + if (i > 1) { + degree++; + } + if (i < dim3) { + degree++; + } + + *valptr = (spm_complex64_t)degree * lalpha; #endif valptr++; rowptr++; colptr[1]++; @@ -130,7 +144,7 @@ z_spmLaplacian_7points( spmatrix_t *spm, if (k < dim1) { *rowptr = l+1; #if !defined(PRECISION_p) - *valptr = -beta; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -139,7 +153,7 @@ z_spmLaplacian_7points( spmatrix_t *spm, if (j < dim2) { *rowptr = l+dim1; #if !defined(PRECISION_p) - *valptr = -beta; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -148,7 +162,7 @@ z_spmLaplacian_7points( spmatrix_t *spm, if (i < dim3) { *rowptr = l+dim1*dim2; #if !defined(PRECISION_p) - *valptr = -beta; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -159,129 +173,8 @@ z_spmLaplacian_7points( spmatrix_t *spm, } assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz ); - (void)alpha; (void)beta; -} - -/** - ******************************************************************************* - * - * @ingroup spm_dev_driver - * - * z_spmExtendedLaplacian2D - Generate a 2D extended laplacian matrix. - * - ******************************************************************************* - * - * @param[inout] spm - * At start, an allocated spm structure. - * Contains the size of the laplacian in spm->n. - * At exit, contains the matrix in csc format. - * - * @param[in] dim1 - * contains the first dimension of the 2D grid of the laplacian. - * - * @param[in] dim2 - * contains the second dimension of the 2D grid of the laplacian. - * - *******************************************************************************/ -void -z_spmExtendedLaplacian2D( spmatrix_t *spm, - spm_int_t dim1, - spm_int_t dim2 ) -{ - spm_complex64_t *valptr; - spm_int_t *colptr, *rowptr; - spm_int_t i, j, k; - spm_int_t nnz = (2*(dim1)-1)*dim2 + (dim2-1)*(3*dim1-2); - - spm->mtxtype = SpmSymmetric; - spm->flttype = SpmComplex64; - spm->fmttype = SpmCSC; - spm->nnz = nnz; - spm->dof = 1; - - assert( spm->n == dim1*dim2 ); - - /* Allocating */ - spm->colptr = malloc((spm->n+1)*sizeof(spm_int_t)); - spm->rowptr = malloc(nnz *sizeof(spm_int_t)); - assert( spm->colptr ); - assert( spm->rowptr ); - -#if !defined(PRECISION_p) - spm->values = malloc(nnz *sizeof(spm_complex64_t)); - assert( spm->values ); -#endif - - /* Building ia, ja and values*/ - colptr = spm->colptr; - rowptr = spm->rowptr; - valptr = (spm_complex64_t*)(spm->values); - - /* Building ia, ja and values*/ - *colptr = 1; - k = 1; /* Column index in the matrix ((i-1) * dim1 + j-1) */ - for(i=1; i<=dim2; i++) - { - for(j=1; j<=dim1; j++) - { - colptr[1] = colptr[0]; - - /* Diagonal value */ - *rowptr = k; -#if !defined(PRECISION_p) - if ( (j == dim1 || j == 1) && (i == dim2 || i == 1) ) - *valptr = (spm_complex64_t) 2.5; - else if (j == dim1 || j == 1 || i == dim2 || i == 1) - *valptr = (spm_complex64_t) 4.; - else - *valptr = (spm_complex64_t) 6.; -#endif - valptr++; rowptr++; colptr[1]++; - - /* Connexion along dimension 1 */ - if (j < dim1) { - *rowptr = k+1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; -#endif - valptr++; rowptr++; colptr[1]++; - } - - /* Connexion along dimension 2 */ - if (i < dim2) - { - if (j > 1) - { - *rowptr = k+dim1-1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; -#endif - valptr++; rowptr++; colptr[1]++; - - } - - *rowptr = k+dim1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; -#endif - valptr++; rowptr++; colptr[1]++; - - if (j < dim1) - { - *rowptr = k+dim1+1; -#if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; -#endif - valptr++; rowptr++; colptr[1]++; - - } - } - - colptr++; k++; - } - } - - assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz ); + (void)lalpha; (void)lbeta; + (void)degree; } /** @@ -289,9 +182,13 @@ z_spmExtendedLaplacian2D( spmatrix_t *spm, * * @ingroup spm_dev_driver * - * @brief Generate an extended laplacian matrix for a 27-points stencil with + * @brief Generate an extended laplacian matrix for a 3D 27-points stencil with * \f[ M = \alpha * D - \beta * A \f] * + * @remark: In complex, the Laplacian is set to symmetric. See + * z_spmLaplacian_7points() to get an hermitian Laplacian, or change the + * mtxtype field by hand. + * ******************************************************************************* * * @param[inout] spm @@ -308,20 +205,38 @@ z_spmExtendedLaplacian2D( spmatrix_t *spm, * @param[in] dim3 * contains the third dimension of the grid of the laplacian. * + * @param[in] alpha + * The alpha coefficient for the degree matrix + * + * @param[in] beta + * The beta coefficient for the adjacency matrix + * *******************************************************************************/ void -z_spmExtendedLaplacian3D( spmatrix_t *spm, - spm_int_t dim1, - spm_int_t dim2, - spm_int_t dim3 ) +z_spmLaplacian_27points( spmatrix_t *spm, + spm_int_t dim1, + spm_int_t dim2, + spm_int_t dim3, + spm_fixdbl_t alpha, + spm_fixdbl_t beta ) { spm_complex64_t *valptr; + /* + * See https://crd.lbl.gov/assets/pubs_presos/iwapt09-27pt.pdf for the + * meaning of alpha, beta, gamma, and delta. + * "Auto-tuning the 27-point Stencil for Multicore", K. Datta, S. Williams, + * V. Volkov, J. Carter, L. Oliker, J. Shalf, and K. Yelick + */ + spm_complex64_t lalpha = (spm_complex64_t)alpha; + spm_complex64_t lbeta = (spm_complex64_t)beta; + spm_complex64_t lgamma = (spm_complex64_t)beta; + spm_complex64_t ldelta = (spm_complex64_t)beta; spm_int_t *colptr, *rowptr; - spm_int_t i, j, k, l; - spm_int_t nnz = (2*dim1-1) * dim2 * dim3 - + (3*dim1-2) * (dim2-1) * dim3 - + ((3*dim1-2) * dim2 + 2 * (3*dim1-2) *(dim2-1)) * (dim3-1); + spm_int_t i, j, k, l, degree, d; + spm_int_t nnz = (2*dim1-1) * dim2 * dim3 + + (3*dim1-2) * (dim2-1) * dim3 + + ((3*dim1-2) * dim2 + 2 * (3*dim1-2) *(dim2-1)) * (dim3-1); spm->mtxtype = SpmSymmetric; spm->flttype = SpmComplex64; @@ -361,22 +276,36 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, /* Diagonal value */ *rowptr = l; #if !defined(PRECISION_p) - if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 4.75; - else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 10.; - else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 10.; - else if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) ) - *valptr = (spm_complex64_t) 10.; - else if ( (j != dim2 || j != 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) ) - *valptr = (spm_complex64_t) 7.; - else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k != dim1 || i != 1) ) - *valptr = (spm_complex64_t) 7.; - else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) ) - *valptr = (spm_complex64_t) 7.; - else - *valptr = (spm_complex64_t) 14.; + degree = 1; + d = 1; + + if (k > 1) { + d++; + } + if (k < dim1) { + d++; + } + degree = degree * d; + d = 1; + + if (j > 1) { + d++; + } + if (j < dim2) { + d++; + } + degree = degree * d; + d = 1; + + if (i > 1) { + d++; + } + if (i < dim3) { + d++; + } + degree = degree * d - 1; + + *valptr = (spm_complex64_t)degree * lalpha; #endif valptr++; rowptr++; colptr[1]++; @@ -384,7 +313,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, if (k < dim1) { *rowptr = l+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; } @@ -396,7 +325,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -404,7 +333,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; @@ -412,7 +341,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -427,7 +356,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2-dim1-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -435,7 +364,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1*dim2-dim1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -443,7 +372,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2-dim1+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -453,7 +382,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -461,7 +390,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1*dim2; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-1.; + *valptr = -lbeta; #endif valptr++; rowptr++; colptr[1]++; @@ -469,7 +398,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -481,7 +410,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2+dim1-1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -489,7 +418,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, *rowptr = l+dim1*dim2+dim1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.5; + *valptr = -lgamma; #endif valptr++; rowptr++; colptr[1]++; @@ -497,7 +426,7 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, { *rowptr = l+dim1*dim2+dim1+1; #if !defined(PRECISION_p) - *valptr = (spm_complex64_t)-0.25; + *valptr = -ldelta; #endif valptr++; rowptr++; colptr[1]++; @@ -511,4 +440,6 @@ z_spmExtendedLaplacian3D( spmatrix_t *spm, } assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz ); + (void)lalpha; (void)lbeta; (void)lgamma; (void)ldelta; + (void)degree; (void)d; } -- GitLab