From e2922b3f2e2bdbfe4249a1bf62fece79bdcdb61f Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Thu, 14 May 2020 16:49:06 +0200
Subject: [PATCH] Add frobenius_merge functions from pastix

---
 src/frobeniusupdate.h | 74 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 69 insertions(+), 5 deletions(-)

diff --git a/src/frobeniusupdate.h b/src/frobeniusupdate.h
index 90cbdad1..e232ad26 100644
--- a/src/frobeniusupdate.h
+++ b/src/frobeniusupdate.h
@@ -20,16 +20,20 @@
 /**
  *******************************************************************************
  *
- * @ingroup pastix_internal
+ * @ingroup spm_internal
  *
- * frobenius_update - Update the couple (scale, sumsq) with one element when
- * computing the Froebnius norm.
+ * @brief Update the couple (scale, sumsq) with one element when computing the
+ * Froebnius norm.
  *
  * The frobenius norm is equal to scale * sqrt( sumsq ), this method allows to
  * avoid overflow in the sum square computation.
  *
  *******************************************************************************
  *
+ * @param[in] nb
+ *          The number of time the value must be integrated into the couple
+ *          (scale, sumsq)
+ *
  * @param[inout] scale
  *           On entry, the former scale
  *           On exit, the update scale to take into account the value
@@ -44,7 +48,7 @@
  *******************************************************************************/
 static inline void
 #if defined(PRECISION_d) || defined(PRECISION_z)
-frobenius_update( int nb, double *scale, double *sumsq, double *value )
+frobenius_update( int nb, double *scale, double *sumsq, const double *value )
 {
     double absval = fabs(*value);
     double ratio;
@@ -60,7 +64,7 @@ frobenius_update( int nb, double *scale, double *sumsq, double *value )
     }
 }
 #elif defined(PRECISION_s) || defined(PRECISION_c)
-frobenius_update( int nb, float *scale, float *sumsq, float *value )
+frobenius_update( int nb, float *scale, float *sumsq, const float *value )
 {
     float absval = fabs(*value);
     float ratio;
@@ -77,4 +81,64 @@ frobenius_update( int nb, float *scale, float *sumsq, float *value )
 }
 #endif
 
+/**
+ *******************************************************************************
+ *
+ * @ingroup spm_internal
+ *
+ * @brief Merge together two sum square stored as a couple (scale, sumsq).
+ *
+ * The frobenius norm is equal to scale * sqrt( sumsq ), this method allows to
+ * avoid overflow in the sum square computation.
+ *
+ *******************************************************************************
+ *
+ * @param[in] scl_in
+ *           The scale factor of the first couple to merge
+ *
+ * @param[in] ssq_in
+ *           The sumsquare factor of the first couple to merge
+ *
+ * @param[inout] scl_out
+ *           On entry, the scale factor of the second couple to merge
+ *           On exit, the updated scale factor.
+ *
+ * @param[inout] ssq_out
+ *           The sumsquare factor of the second couple to merge
+ *           On exit, the updated sumsquare factor.
+ *
+ *******************************************************************************/
+static inline void
+#if defined(PRECISION_d) || defined(PRECISION_z)
+frobenius_merge( double  scl_in,  double  ssq_in,
+                 double *scl_out, double *ssq_out )
+{
+    double ratio;
+    if ( (*scl_out) < scl_in ) {
+        ratio  = (*scl_out) / scl_in;
+        *ssq_out = (*ssq_out) * ratio * ratio + ssq_in;
+        *scl_out = scl_in;
+    }
+    else {
+        ratio  = scl_in / (*scl_out);
+        *ssq_out = (*ssq_out) + ssq_in * ratio * ratio;
+    }
+}
+#else
+frobenius_merge( float  scl_in,  float ssq_in,
+                 float *scl_out, float *ssq_out )
+{
+    float ratio;
+    if ( (*scl_out) < scl_in ) {
+        ratio  = (*scl_out) / scl_in;
+        *ssq_out = (*ssq_out) * ratio * ratio + ssq_in;
+        *scl_out = scl_in;
+    }
+    else {
+        ratio  = scl_in / (*scl_out);
+        *ssq_out = (*ssq_out) + ssq_in * ratio * ratio;
+    }
+}
+#endif
+
 #endif /* _frobeniusupdate_h_ */
-- 
GitLab