From 53289a3ea68bdadd6f58383a6c55c3fa3221bf0d Mon Sep 17 00:00:00 2001
From: Alban Bellot <alban.bellot@inria.fr>
Date: Tue, 19 Jul 2016 11:09:40 +0200
Subject: [PATCH] matvec test with dofs

---
 CMakeLists.txt         |   2 +
 z_spm.h                |   4 ++
 z_spm_convert_to_csc.c |   1 +
 z_spm_matrixvector.c   | 116 ++++++++++++++++++++++++++++++++---------
 z_spm_norm.c           |  73 +++++++++++++++++---------
 5 files changed, 145 insertions(+), 51 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4dbd384a..21caac4e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,8 @@ set(SOURCES
   z_spm_genrhs.c
   z_spm_matrixvector.c
   z_spm_norm.c
+  z_spm_2dense.c
+  z_spm_print.c
   )
 
 set(spm_headers
diff --git a/z_spm.h b/z_spm.h
index 877da184..73b31693 100644
--- a/z_spm.h
+++ b/z_spm.h
@@ -37,4 +37,8 @@ pastix_int_t z_spmSymmetrize( pastix_spm_t *csc );
 int z_spmGenRHS(int type, int nrhs, const pastix_spm_t *spm, void *x, int ldx, void *b, int ldb );
 int z_spmCheckAxb( int nrhs, const pastix_spm_t *spm, void *x0, int ldx0, void *b, int ldb, const void *x, int ldx );
 
+pastix_complex64_t *z_spm2dense( const pastix_spm_t *spm );
+void z_spmDensePrint( pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda );
+void z_spmPrint( const pastix_spm_t *spm );
+
 #endif /* _z_spm_H_ */
diff --git a/z_spm_convert_to_csc.c b/z_spm_convert_to_csc.c
index 72b96a23..d07996f4 100644
--- a/z_spm_convert_to_csc.c
+++ b/z_spm_convert_to_csc.c
@@ -204,6 +204,7 @@ z_spmConvertCSR2CSC( pastix_spm_t *spm )
         assert( val_csc );
 #endif
 
+
         /* Count the number of elements per column */
         for (j=0; j<nnz; j++) {
             col = spm->colptr[j] - baseval;
diff --git a/z_spm_matrixvector.c b/z_spm_matrixvector.c
index 7db1de70..4b0cf889 100644
--- a/z_spm_matrixvector.c
+++ b/z_spm_matrixvector.c
@@ -73,7 +73,9 @@ z_spmGeCSCv(      int                 trans,
     const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values;
     const pastix_complex64_t *xptr   = x;
     pastix_complex64_t *yptr = y;
-    pastix_int_t col, row, i, baseval;
+    pastix_int_t col, row, i, j, baseval;
+    pastix_int_t ii, jj, k, dofi, dofj;
+    pastix_int_t *dofs=csc->dofs;
 
     if ( (csc == NULL) || (x == NULL) || (y == NULL ) )
     {
@@ -89,10 +91,10 @@ z_spmGeCSCv(      int                 trans,
 
     /* first, y = beta*y */
     if( beta == 0. ) {
-        memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) );
+        memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) );
     }
     else {
-        for( i=0; i<csc->gN; i++, yptr++ ) {
+        for( i=0; i<csc->gNexp; i++, yptr++ ) {
             (*yptr) *= beta;
         }
         yptr = y;
@@ -104,12 +106,22 @@ z_spmGeCSCv(      int                 trans,
          */
         if( trans == PastixNoTrans )
         {
-            for( col=0; col < csc->gN; col++ )
+            for( i=0; i < csc->gN; i++ )
             {
-                for( i=csc->colptr[col]; i<csc->colptr[col+1]; i++ )
+                dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i];
+                col=dofs[i];
+                for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ )
                 {
-                    row = csc->rowptr[i-baseval]-baseval;
-                    yptr[row] += alpha * valptr[i-baseval] * xptr[col];
+                    j = csc->rowptr[k-baseval]-baseval;
+                    dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j];
+                    row=dofs[j];
+                    for(ii=0; ii<dofi; ii++)
+                    {
+                        for(jj=0; jj<dofj; jj++, valptr++)
+                        {
+                            yptr[row+jj] += alpha * (*valptr) * xptr[col+ii];
+                        }
+                    }
                 }
             }
         }
@@ -118,15 +130,26 @@ z_spmGeCSCv(      int                 trans,
          */
         else if( trans == PastixTrans )
         {
-            for( col=0; col < csc->gN; col++ )
+            for( i=0; i < csc->gN; i++ )
             {
-                for( i=csc->colptr[col]; i<csc->colptr[col+1]; i++ )
+                dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i];
+                col=dofs[i];
+                for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ )
                 {
-                    row = csc->rowptr[i-baseval]-baseval;
-                    yptr[col] += alpha * valptr[i-baseval] * xptr[row];
+                    j = csc->rowptr[k-baseval]-baseval;
+                    dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j];
+                    row=dofs[j];
+                    for(ii=0; ii<dofi; ii++)
+                    {
+                        for(jj=0; jj<dofj; jj++, valptr++)
+                        {
+                            yptr[col+ii] += alpha * (*valptr) * xptr[row+jj];
+                        }
+                    }
                 }
             }
         }
+
 #if defined(PRECISION_c) || defined(PRECISION_z)
         else if( trans == PastixConjTrans )
         {
@@ -194,7 +217,10 @@ z_spmSyCSCv(      pastix_complex64_t  alpha,
     const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values;
     const pastix_complex64_t *xptr   = x;
     pastix_complex64_t *yptr = y;
-    pastix_int_t col, row, i, baseval;
+    pastix_int_t col, row, i, j, baseval;
+    pastix_int_t ii, jj, k, dofi, dofj;
+    pastix_int_t *dofs=csc->dofs;
+
 
     if ( (csc == NULL) || (x == NULL) || (y == NULL ) )
     {
@@ -210,30 +236,40 @@ z_spmSyCSCv(      pastix_complex64_t  alpha,
 
     /* First, y = beta*y */
     if( beta == 0. ) {
-        memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) );
+        memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) );
     }
     else {
-        for( i=0; i<csc->gN; i++, yptr++ ) {
+        for( i=0; i<csc->gNexp; i++, yptr++ ) {
             (*yptr) *= beta;
         }
         yptr = y;
     }
 
-    if( alpha != 0. ) {
-        for( col=0; col < csc->gN; col++ )
+    if(alpha != 0.)
+    {
+        for( i=0; i < csc->gN; i++ )
         {
-            for( i=csc->colptr[col]; i < csc->colptr[col+1]; i++ )
+            dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i];
+            col = dofs[i];
+            for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ )
             {
-                row = csc->rowptr[i-baseval]-baseval;
-                yptr[row] += alpha * valptr[i-baseval] * xptr[col];
-                if( col != row )
+                j = csc->rowptr[k-baseval]-baseval;
+                dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j];
+                row = dofs[j];
+                for(ii=0; ii<dofi; ii++)
                 {
-                    yptr[col] += alpha * valptr[i-baseval] * xptr[row];
+                    for(jj=0; jj<dofj; jj++, valptr++)
+                    {
+                        yptr[row+jj] += alpha * (*valptr) * xptr[col+ii];
+                        if( i != j )
+                        {
+                            yptr[col+ii] += alpha * (*valptr) * xptr[row+jj];
+                        }
+                    }
                 }
             }
         }
     }
-
     return PASTIX_SUCCESS;
 }
 
@@ -283,7 +319,10 @@ z_spmHeCSCv(      pastix_complex64_t  alpha,
     const pastix_complex64_t *valptr = (pastix_complex64_t*)csc->values;
     const pastix_complex64_t *xptr   = x;
     pastix_complex64_t *yptr = y;
-    pastix_int_t col, row, i, baseval;
+    pastix_int_t col, row, i, j, baseval;
+    pastix_int_t ii, jj, k, dofi, dofj;
+    pastix_int_t *dofs=csc->dofs;
+
 
     if ( (csc == NULL) || (x == NULL) || (y == NULL ) )
     {
@@ -297,17 +336,43 @@ z_spmHeCSCv(      pastix_complex64_t  alpha,
 
     /* First, y = beta*y */
     if( beta == 0. ) {
-        memset( yptr, 0, csc->gN * sizeof(pastix_complex64_t) );
+        memset( yptr, 0, csc->gNexp * sizeof(pastix_complex64_t) );
     }
     else {
-        for( i=0; i<csc->gN; i++, yptr++ ) {
+        for( i=0; i<csc->gNexp; i++, yptr++ ) {
             (*yptr) *= beta;
         }
         yptr = y;
     }
 
     baseval = spmFindBase( csc );
+    if( alpha != 0.)
+    {
+        for( i=0; i < csc->gN; i++ )
+        {
+            dofi = ( csc->dof > 0 ) ? csc->dof : dofs[i+1] - dofs[i];
+            col = dofs[i];
+            for( k=csc->colptr[i]; k<csc->colptr[i+1]; k++ )
+            {
+                j = csc->rowptr[k-baseval]-baseval;
+                dofj = ( csc->dof > 0 ) ? csc->dof : dofs[j+1] - dofs[j];
+                row = dofs[j];
+                for(ii=0; ii<dofi; ii++)
+                {
+                    for(jj=0; jj<dofj; jj++, valptr++)
+                    {
+                        yptr[row+jj] += alpha * (*valptr) * xptr[col+ii];
+                        if( i != j )
+                        {
+                            yptr[col+ii] += alpha * conj( *valptr ) * xptr[row+jj];
+                        }
+                    }
+                }
+            }
+        }
+    }
 
+    /*
     if( alpha != 0. ) {
         for( col=0; col < csc->gN; col++ )
         {
@@ -320,6 +385,7 @@ z_spmHeCSCv(      pastix_complex64_t  alpha,
             }
         }
     }
+     */
 
     return PASTIX_SUCCESS;
 }
diff --git a/z_spm_norm.c b/z_spm_norm.c
index 4e4a594d..9d45ade4 100644
--- a/z_spm_norm.c
+++ b/z_spm_norm.c
@@ -50,7 +50,7 @@ z_spmFrobeniusNorm( const pastix_spm_t *spm )
     double sumsq = 0.;
 
     if (spm->mtxtype == PastixGeneral) {
-        for(i=0; i <spm->nnz; i++, valptr++) {
+        for(i=0; i <spm->nnzexp; i++, valptr++) {
             frobenius_update( 1, &scale, &sumsq, valptr );
 
 #if defined(PRECISION_z) || defined(PRECISION_c)
@@ -62,20 +62,31 @@ z_spmFrobeniusNorm( const pastix_spm_t *spm )
     else {
         pastix_int_t *colptr = spm->colptr;
         pastix_int_t *rowptr = spm->rowptr;
-        int nb;
+        int nb, dofi, dofj, ii, jj;
+        pastix_int_t *dofs=spm->dofs;
         baseval = spmFindBase( spm );
 
         switch( spm->fmttype ){
         case PastixCSC:
-            for(i=0; i<spm->n; i++, colptr++) {
-                for(j=colptr[0]; j<colptr[1]; j++, rowptr++, valptr++) {
-                    nb = ( i == (*rowptr-baseval) ) ? 1 : 2;
-                    frobenius_update( nb, &scale, &sumsq, valptr );
+            for(i=0; i<spm->n; i++, colptr++)
+            {
+                dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i];
+                for(j=colptr[0]; j<colptr[1]; j++, rowptr++)
+                {
+                    dofj = ( spm->dof > 0 ) ? spm->dof : dofs[(*rowptr-baseval)+1] - dofs[(*rowptr-baseval)];
+                    for(ii=0 ; ii < dofi; ii++)
+                    {
+                        for(jj=0; jj < dofj ;jj++,valptr++)
+                        {
+                            nb = ( i == (*rowptr-baseval) ) ? 1 : 2;
+                            frobenius_update( nb, &scale, &sumsq, valptr );
 
 #if defined(PRECISION_z) || defined(PRECISION_c)
-                    valptr++;
-                    frobenius_update( nb, &scale, &sumsq, valptr );
+                            valptr++;
+                            frobenius_update( nb, &scale, &sumsq, valptr );
 #endif
+                        }
+                    }
                 }
             }
             break;
@@ -211,7 +222,7 @@ z_spmInfNorm( const pastix_spm_t *spm )
          */
 
         /* Dofs */
-        for( i=0; i < spm->n; i++ )
+        for( i=0; i < spm->n; i++)
         {
             dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i];
             for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++)
@@ -230,7 +241,6 @@ z_spmInfNorm( const pastix_spm_t *spm )
                 }
             }
         }
-
         /* Add the symmetric/hermitian part */
         if ( (spm->mtxtype == PastixHermitian) ||
              (spm->mtxtype == PastixSymmetric) )
@@ -244,16 +254,20 @@ z_spmInfNorm( const pastix_spm_t *spm )
                 {
                     j = spm->rowptr[k - baseval] - baseval;
                     dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j];
-                    for(ii=0; ii < dofi; ii++)
-                        for(jj=0; jj < dofj; jj++,valptr++)
+                    if(i != j)
+                    {
+                        for(ii=0; ii < dofi; ii++)
                         {
+                            for(jj=0; jj < dofj; jj++, valptr++)
                             {
-                                if(i != j)
-                                {
-                                    sumcol[col + ii] += cabs( *valptr );
-                                }
+                                sumcol[col + ii] += cabs( *valptr );
                             }
                         }
+                    }
+                    else
+                    {
+                        valptr += dofi * dofj;
+                    }
                 }
             }
         }
@@ -368,18 +382,20 @@ z_spmOneNorm( const pastix_spm_t *spm )
         for(i=0; i < spm->n; i++)
         {
             col = dofs[i];
-            dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - spm->dofs[i];
+            dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i];
             for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++)
             {
                 j = spm->rowptr[k - baseval] - baseval;
-                dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - spm->dofs[j];
+                dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j];
                 for(ii=0; ii < dofi; ii++)
+                {
                     for(jj=0; jj < dofj; jj++, valptr++)
                     {
                         {
                             sumrow[col + ii] += cabs( *valptr );
                         }
                     }
+                }
             }
         }
         /* Add the symmetric/hermitian part */
@@ -389,22 +405,27 @@ z_spmOneNorm( const pastix_spm_t *spm )
             valptr = (pastix_complex64_t*)spm->values;
             for(i=0; i < spm->n; i++)
             {
-                dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - spm->dofs[i];
+                dofi = ( spm->dof > 0 ) ? spm->dof : dofs[i+1] - dofs[i];
                 for(k=spm->colptr[i]; k < spm->colptr[i+1]; k++)
                 {
                     j = spm->rowptr[k - baseval] - baseval;
                     row = dofs[j];
-                    dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - spm->dofs[j];
-                    for(ii=0; ii < dofi; ii++)
-                        for(jj=0; jj < dofj; jj++, valptr++)
+                    dofj = ( spm->dof > 0 ) ? spm->dof : dofs[j+1] - dofs[j];
+                    if(i != j)
+                    {
+                        for(ii=0; ii < dofi; ii++)
                         {
+                            for(jj=0; jj < dofj; jj++, valptr++)
                             {
-                                if(i != j)
-                                {
-                                    sumrow[row + jj] += cabs( *valptr );
-                                }
+
+                                sumrow[row + jj] += cabs( *valptr );
                             }
                         }
+                    }
+                    else
+                    {
+                        valptr += dofi * dofj;
+                    }
                 }
             }
         }
-- 
GitLab