From 1ddfc2fa76b0794f5baaa6bc15f0014e1a9a9d3c Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Mon, 7 May 2018 16:35:46 +0200
Subject: [PATCH] Factorize the laplacian code for the extended version and
 alpha/beta to the extended stencil

---
 src/drivers/laplacian.c |  29 +---
 src/drivers/laplacian.h |  16 +--
 src/z_spm_laplacian.c   | 289 +++++++++++++++-------------------------
 3 files changed, 122 insertions(+), 212 deletions(-)

diff --git a/src/drivers/laplacian.c b/src/drivers/laplacian.c
index e5edc26e..22a94af9 100644
--- a/src/drivers/laplacian.c
+++ b/src/drivers/laplacian.c
@@ -220,24 +220,14 @@ static void (*laplacian_7points[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_
     z_spmLaplacian_7points
 };
 
-static void (*extended_laplacian_table2D[6])(spmatrix_t *, spm_int_t, spm_int_t) =
+static void (*laplacian_27points[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_t, spm_fixdbl_t, spm_fixdbl_t) =
 {
-    p_spmExtendedLaplacian2D,
+    p_spmLaplacian_27points,
     NULL,
-    s_spmExtendedLaplacian2D,
-    d_spmExtendedLaplacian2D,
-    c_spmExtendedLaplacian2D,
-    z_spmExtendedLaplacian2D
-};
-
-static void (*extended_laplacian_table3D[6])(spmatrix_t *, spm_int_t, spm_int_t, spm_int_t) =
-{
-    p_spmExtendedLaplacian3D,
-    NULL,
-    s_spmExtendedLaplacian3D,
-    d_spmExtendedLaplacian3D,
-    c_spmExtendedLaplacian3D,
-    z_spmExtendedLaplacian3D
+    s_spmLaplacian_27points,
+    d_spmLaplacian_27points,
+    c_spmLaplacian_27points,
+    z_spmLaplacian_27points
 };
 
 /**
@@ -343,12 +333,7 @@ genExtendedLaplacian( const char    *filename,
     spm->flttype = flttype;
     spm->n = dim1 * dim2 * dim3;
 
-    if( dim3 > 0 ) {
-        extended_laplacian_table3D[spm->flttype](spm, dim1, dim2, dim3);
-    }
-    else if (dim2 > 0) {
-        extended_laplacian_table2D[spm->flttype](spm, dim1, dim2);
-    }
+    laplacian_27points[spm->flttype](spm, dim1, dim2, dim3, alpha, beta);
 
     return SPM_SUCCESS;
 }
diff --git a/src/drivers/laplacian.h b/src/drivers/laplacian.h
index 2954365e..cd2ef455 100644
--- a/src/drivers/laplacian.h
+++ b/src/drivers/laplacian.h
@@ -20,16 +20,10 @@ void d_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, sp
 void s_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta );
 void p_spmLaplacian_7points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta );
 
-void z_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 );
-void c_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 );
-void d_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 );
-void s_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 );
-void p_spmExtendedLaplacian2D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2 );
-
-void z_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 );
-void c_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 );
-void d_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 );
-void s_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 );
-void p_spmExtendedLaplacian3D( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3 );
+void z_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta );
+void c_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta );
+void d_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta );
+void s_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta );
+void p_spmLaplacian_27points( spmatrix_t *spm, spm_int_t dim1, spm_int_t dim2, spm_int_t dim3, spm_fixdbl_t alpha, spm_fixdbl_t beta );
 
 #endif /* _laplacian_h_ */
diff --git a/src/z_spm_laplacian.c b/src/z_spm_laplacian.c
index c191ef40..2e43a230 100644
--- a/src/z_spm_laplacian.c
+++ b/src/z_spm_laplacian.c
@@ -22,7 +22,7 @@
  *
  * @ingroup spm_dev_driver
  *
- * @brief Generate a laplacian matrix for a 7-points stencil
+ * @brief Generate a laplacian matrix for a 3D 7-points stencil
  * \f[ M = \alpha * D - \beta * A \f]
  *
  * Example:
@@ -57,6 +57,10 @@
  * @param[in] beta
  *          The beta coefficient for the adjacency matrix
  *
+ * @remark: In complex, the Laplacian is set to hermitian. See
+ * z_spmLaplacian_27points() to get a symmetric Laplacian, or change the
+ * mtxtype field by hand.
+ *
  *******************************************************************************/
 void
 z_spmLaplacian_7points( spmatrix_t   *spm,
@@ -68,8 +72,10 @@ z_spmLaplacian_7points( spmatrix_t   *spm,
 {
 
     spm_complex64_t *valptr;
+    spm_complex64_t  lalpha = (spm_complex64_t)alpha;
+    spm_complex64_t  lbeta  = (spm_complex64_t)beta;
     spm_int_t *colptr, *rowptr;
-    spm_int_t i, j, k, l;
+    spm_int_t i, j, k, l, degree;
     spm_int_t nnz = (2*(dim1)-1)*dim2*dim3 + (dim2-1)*dim1*dim3 + dim2*dim1*(dim3-1);
 
     spm->mtxtype  = SpmHermitian;
@@ -110,19 +116,27 @@ z_spmLaplacian_7points( spmatrix_t   *spm,
                 /* Diagonal value */
                 *rowptr = l;
 #if !defined(PRECISION_p)
-                *valptr = 6. * alpha;
-                if (k == 1)
-                    *valptr -= alpha;
-                if (k == dim1)
-                    *valptr -= alpha;
-                if (j == 1)
-                    *valptr -= alpha;
-                if (j == dim2)
-                    *valptr -= alpha;
-                if (i == 1)
-                    *valptr -= alpha;
-                if (i == dim3)
-                    *valptr -= alpha;
+                degree = 0;
+                if (k > 1) {
+                    degree++;
+                }
+                if (k < dim1) {
+                    degree++;
+                }
+                if (j > 1) {
+                    degree++;
+                }
+                if (j < dim2) {
+                    degree++;
+                }
+                if (i > 1) {
+                    degree++;
+                }
+                if (i < dim3) {
+                    degree++;
+                }
+
+                *valptr = (spm_complex64_t)degree * lalpha;
 #endif
                 valptr++; rowptr++; colptr[1]++;
 
@@ -130,7 +144,7 @@ z_spmLaplacian_7points( spmatrix_t   *spm,
                 if (k < dim1) {
                     *rowptr = l+1;
 #if !defined(PRECISION_p)
-                    *valptr = -beta;
+                    *valptr = -lbeta;
 #endif
                     valptr++; rowptr++; colptr[1]++;
                 }
@@ -139,7 +153,7 @@ z_spmLaplacian_7points( spmatrix_t   *spm,
                 if (j < dim2) {
                     *rowptr = l+dim1;
 #if !defined(PRECISION_p)
-                    *valptr = -beta;
+                    *valptr = -lbeta;
 #endif
                     valptr++; rowptr++; colptr[1]++;
                 }
@@ -148,7 +162,7 @@ z_spmLaplacian_7points( spmatrix_t   *spm,
                 if (i < dim3) {
                     *rowptr = l+dim1*dim2;
 #if !defined(PRECISION_p)
-                    *valptr = -beta;
+                    *valptr = -lbeta;
 #endif
                     valptr++; rowptr++; colptr[1]++;
                 }
@@ -159,129 +173,8 @@ z_spmLaplacian_7points( spmatrix_t   *spm,
     }
 
     assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz );
-    (void)alpha; (void)beta;
-}
-
-/**
- *******************************************************************************
- *
- * @ingroup spm_dev_driver
- *
- * z_spmExtendedLaplacian2D - Generate a 2D extended laplacian matrix.
- *
- *******************************************************************************
- *
- * @param[inout] spm
- *          At start, an allocated spm structure.
- *          Contains the size of the laplacian in spm->n.
- *          At exit, contains the matrix in csc format.
- *
- * @param[in] dim1
- *          contains the first dimension of the 2D grid of the laplacian.
- *
- * @param[in] dim2
- *          contains the second dimension of the 2D grid of the laplacian.
- *
- *******************************************************************************/
-void
-z_spmExtendedLaplacian2D( spmatrix_t  *spm,
-                          spm_int_t   dim1,
-                          spm_int_t   dim2 )
-{
-    spm_complex64_t *valptr;
-    spm_int_t *colptr, *rowptr;
-    spm_int_t i, j, k;
-    spm_int_t nnz = (2*(dim1)-1)*dim2 + (dim2-1)*(3*dim1-2);
-
-    spm->mtxtype  = SpmSymmetric;
-    spm->flttype  = SpmComplex64;
-    spm->fmttype  = SpmCSC;
-    spm->nnz      = nnz;
-    spm->dof      = 1;
-
-    assert( spm->n == dim1*dim2 );
-
-    /* Allocating */
-    spm->colptr = malloc((spm->n+1)*sizeof(spm_int_t));
-    spm->rowptr = malloc(nnz       *sizeof(spm_int_t));
-    assert( spm->colptr );
-    assert( spm->rowptr );
-
-#if !defined(PRECISION_p)
-    spm->values = malloc(nnz       *sizeof(spm_complex64_t));
-    assert( spm->values );
-#endif
-
-    /* Building ia, ja and values*/
-    colptr = spm->colptr;
-    rowptr = spm->rowptr;
-    valptr = (spm_complex64_t*)(spm->values);
-
-    /* Building ia, ja and values*/
-    *colptr = 1;
-    k = 1; /* Column index in the matrix ((i-1) * dim1 + j-1) */
-    for(i=1; i<=dim2; i++)
-    {
-        for(j=1; j<=dim1; j++)
-        {
-            colptr[1] = colptr[0];
-
-            /* Diagonal value */
-            *rowptr = k;
-#if !defined(PRECISION_p)
-            if ( (j == dim1 || j == 1) && (i == dim2 || i == 1) )
-                *valptr = (spm_complex64_t) 2.5;
-            else if (j == dim1 || j == 1 || i == dim2 || i == 1)
-                *valptr = (spm_complex64_t) 4.;
-            else
-                *valptr = (spm_complex64_t) 6.;
-#endif
-            valptr++; rowptr++; colptr[1]++;
-
-            /* Connexion along dimension 1 */
-            if (j < dim1) {
-                *rowptr = k+1;
-#if !defined(PRECISION_p)
-                *valptr = (spm_complex64_t)-1.;
-#endif
-                valptr++; rowptr++; colptr[1]++;
-            }
-
-            /* Connexion along dimension 2 */
-            if (i < dim2)
-            {
-                if (j > 1)
-                {
-                    *rowptr = k+dim1-1;
-#if !defined(PRECISION_p)
-                    *valptr = (spm_complex64_t)-0.5;
-#endif
-                    valptr++; rowptr++; colptr[1]++;
-
-                }
-
-                *rowptr = k+dim1;
-#if !defined(PRECISION_p)
-                *valptr = (spm_complex64_t)-1.;
-#endif
-                valptr++; rowptr++; colptr[1]++;
-
-                if (j < dim1)
-                {
-                    *rowptr = k+dim1+1;
-#if !defined(PRECISION_p)
-                    *valptr = (spm_complex64_t)-0.5;
-#endif
-                    valptr++; rowptr++; colptr[1]++;
-
-                }
-            }
-
-            colptr++; k++;
-        }
-    }
-
-    assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz );
+    (void)lalpha; (void)lbeta;
+    (void)degree;
 }
 
 /**
@@ -289,9 +182,13 @@ z_spmExtendedLaplacian2D( spmatrix_t  *spm,
  *
  * @ingroup spm_dev_driver
  *
- * @brief Generate an extended laplacian matrix for a 27-points stencil with
+ * @brief Generate an extended laplacian matrix for a 3D 27-points stencil with
  * \f[ M = \alpha * D - \beta * A \f]
  *
+ * @remark: In complex, the Laplacian is set to symmetric. See
+ * z_spmLaplacian_7points() to get an hermitian Laplacian, or change the
+ * mtxtype field by hand.
+ *
  *******************************************************************************
  *
  * @param[inout] spm
@@ -308,20 +205,38 @@ z_spmExtendedLaplacian2D( spmatrix_t  *spm,
  * @param[in] dim3
  *          contains the third dimension of the grid of the laplacian.
  *
+ * @param[in] alpha
+ *          The alpha coefficient for the degree matrix
+ *
+ * @param[in] beta
+ *          The beta coefficient for the adjacency matrix
+ *
  *******************************************************************************/
 void
-z_spmExtendedLaplacian3D( spmatrix_t   *spm,
-                          spm_int_t    dim1,
-                          spm_int_t    dim2,
-                          spm_int_t    dim3 )
+z_spmLaplacian_27points( spmatrix_t   *spm,
+                         spm_int_t    dim1,
+                         spm_int_t    dim2,
+                         spm_int_t    dim3,
+                         spm_fixdbl_t alpha,
+                         spm_fixdbl_t beta )
 {
 
     spm_complex64_t *valptr;
+    /*
+     * See https://crd.lbl.gov/assets/pubs_presos/iwapt09-27pt.pdf for the
+     * meaning of alpha, beta, gamma, and delta.
+     * "Auto-tuning the 27-point Stencil for Multicore", K. Datta, S. Williams,
+     * V. Volkov, J. Carter, L. Oliker, J. Shalf, and K. Yelick
+     */
+    spm_complex64_t  lalpha = (spm_complex64_t)alpha;
+    spm_complex64_t  lbeta  = (spm_complex64_t)beta;
+    spm_complex64_t  lgamma = (spm_complex64_t)beta;
+    spm_complex64_t  ldelta = (spm_complex64_t)beta;
     spm_int_t *colptr, *rowptr;
-    spm_int_t i, j, k, l;
-    spm_int_t nnz = (2*dim1-1) * dim2     * dim3
-        +              (3*dim1-2) * (dim2-1) * dim3
-        +             ((3*dim1-2) * dim2 + 2 * (3*dim1-2) *(dim2-1)) * (dim3-1);
+    spm_int_t i, j, k, l, degree, d;
+    spm_int_t nnz = (2*dim1-1) *  dim2    * dim3
+        +           (3*dim1-2) * (dim2-1) * dim3
+        +          ((3*dim1-2) *  dim2 + 2 * (3*dim1-2) *(dim2-1)) * (dim3-1);
 
     spm->mtxtype  = SpmSymmetric;
     spm->flttype  = SpmComplex64;
@@ -361,22 +276,36 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                 /* Diagonal value */
                 *rowptr = l;
 #if !defined(PRECISION_p)
-                if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) )
-                    *valptr = (spm_complex64_t) 4.75;
-                else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) )
-                    *valptr = (spm_complex64_t) 10.;
-                else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) )
-                    *valptr = (spm_complex64_t) 10.;
-                else if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) )
-                    *valptr = (spm_complex64_t) 10.;
-                else if ( (j != dim2 || j != 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) )
-                    *valptr = (spm_complex64_t) 7.;
-                else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k != dim1 || i != 1) )
-                    *valptr = (spm_complex64_t) 7.;
-                else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) )
-                    *valptr = (spm_complex64_t) 7.;
-                else
-                    *valptr = (spm_complex64_t) 14.;
+                degree = 1;
+                d = 1;
+
+                if (k > 1) {
+                    d++;
+                }
+                if (k < dim1) {
+                    d++;
+                }
+                degree = degree * d;
+                d = 1;
+
+                if (j > 1) {
+                    d++;
+                }
+                if (j < dim2) {
+                    d++;
+                }
+                degree = degree * d;
+                d = 1;
+
+                if (i > 1) {
+                    d++;
+                }
+                if (i < dim3) {
+                    d++;
+                }
+                degree = degree * d - 1;
+
+                *valptr = (spm_complex64_t)degree * lalpha;
 #endif
                 valptr++; rowptr++; colptr[1]++;
 
@@ -384,7 +313,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                 if (k < dim1) {
                     *rowptr = l+1;
 #if !defined(PRECISION_p)
-                    *valptr = (spm_complex64_t)-1.;
+                    *valptr = -lbeta;
 #endif
                     valptr++; rowptr++; colptr[1]++;
                 }
@@ -396,7 +325,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                     {
                         *rowptr = l+dim1-1;
 #if !defined(PRECISION_p)
-                        *valptr = (spm_complex64_t)-0.5;
+                        *valptr = -lgamma;
 #endif
                         valptr++; rowptr++; colptr[1]++;
 
@@ -404,7 +333,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
 
                     *rowptr = l+dim1;
 #if !defined(PRECISION_p)
-                    *valptr = (spm_complex64_t)-1.;
+                    *valptr = -lbeta;
 #endif
                     valptr++; rowptr++; colptr[1]++;
 
@@ -412,7 +341,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                     {
                         *rowptr = l+dim1+1;
 #if !defined(PRECISION_p)
-                        *valptr = (spm_complex64_t)-0.5;
+                        *valptr = -lgamma;
 #endif
                         valptr++; rowptr++; colptr[1]++;
 
@@ -427,7 +356,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                         {
                             *rowptr = l+dim1*dim2-dim1-1;
 #if !defined(PRECISION_p)
-                            *valptr = (spm_complex64_t)-0.25;
+                            *valptr = -ldelta;
 #endif
                             valptr++; rowptr++; colptr[1]++;
 
@@ -435,7 +364,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
 
                         *rowptr = l+dim1*dim2-dim1;
 #if !defined(PRECISION_p)
-                        *valptr = (spm_complex64_t)-0.5;
+                        *valptr = -lgamma;
 #endif
                         valptr++; rowptr++; colptr[1]++;
 
@@ -443,7 +372,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                         {
                             *rowptr = l+dim1*dim2-dim1+1;
 #if !defined(PRECISION_p)
-                            *valptr = (spm_complex64_t)-0.25;
+                            *valptr = -ldelta;
 #endif
                             valptr++; rowptr++; colptr[1]++;
 
@@ -453,7 +382,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                     {
                         *rowptr = l+dim1*dim2-1;
 #if !defined(PRECISION_p)
-                        *valptr = (spm_complex64_t)-0.5;
+                        *valptr = -lgamma;
 #endif
                         valptr++; rowptr++; colptr[1]++;
 
@@ -461,7 +390,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
 
                     *rowptr = l+dim1*dim2;
 #if !defined(PRECISION_p)
-                    *valptr = (spm_complex64_t)-1.;
+                    *valptr = -lbeta;
 #endif
                     valptr++; rowptr++; colptr[1]++;
 
@@ -469,7 +398,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                     {
                         *rowptr = l+dim1*dim2+1;
 #if !defined(PRECISION_p)
-                        *valptr = (spm_complex64_t)-0.5;
+                        *valptr = -lgamma;
 #endif
                         valptr++; rowptr++; colptr[1]++;
 
@@ -481,7 +410,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                         {
                             *rowptr = l+dim1*dim2+dim1-1;
 #if !defined(PRECISION_p)
-                            *valptr = (spm_complex64_t)-0.25;
+                            *valptr = -ldelta;
 #endif
                             valptr++; rowptr++; colptr[1]++;
 
@@ -489,7 +418,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
 
                         *rowptr = l+dim1*dim2+dim1;
 #if !defined(PRECISION_p)
-                        *valptr = (spm_complex64_t)-0.5;
+                        *valptr = -lgamma;
 #endif
                         valptr++; rowptr++; colptr[1]++;
 
@@ -497,7 +426,7 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
                         {
                             *rowptr = l+dim1*dim2+dim1+1;
 #if !defined(PRECISION_p)
-                            *valptr = (spm_complex64_t)-0.25;
+                            *valptr = -ldelta;
 #endif
                             valptr++; rowptr++; colptr[1]++;
 
@@ -511,4 +440,6 @@ z_spmExtendedLaplacian3D( spmatrix_t   *spm,
     }
 
     assert( (spm->colptr[ spm->n ] - spm->colptr[0]) == nnz );
+    (void)lalpha; (void)lbeta; (void)lgamma; (void)ldelta;
+    (void)degree; (void)d;
 }
-- 
GitLab