From a9ef2a57d258eff4d5db8dd4273d65725f17370c Mon Sep 17 00:00:00 2001
From: Cyrille Piacibello <cyrille@cyrille-laptop.cyrille>
Date: Thu, 7 Apr 2016 17:18:00 +0200
Subject: [PATCH] Works in progress, Skeleton for BlockWP class and
 HessExtended written, some tests added

---
 CMakeLists.txt          |  12 +++-
 src/Block.hpp           |  61 +++++++++-----------
 src/BlockArnoldiIb.hpp  |  10 +++-
 src/BlockGMRes.hpp      |   2 +-
 src/BlockWP.hpp         | 104 ++++++++++++++++++++++++++++++++++
 src/HessExtended.hpp    | 121 ++++++++++++++++++++++++++++++++++++++++
 src/LapackInterface.hpp |  40 ++++++++++---
 test/CMakeLists.txt     |   2 +
 test/testBlockGMRes.cpp |   2 +-
 test/testBlockSvd.cpp   |  40 +++++++++++++
 test/testBlockWP.cpp    |  35 ++++++++++++
 11 files changed, 380 insertions(+), 49 deletions(-)
 create mode 100644 src/BlockWP.hpp
 create mode 100644 src/HessExtended.hpp
 create mode 100644 test/testBlockSvd.cpp
 create mode 100644 test/testBlockWP.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2291599..09e8e42 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,9 +50,17 @@ endif(LAPACKE_FOUND)
 
 option(DEBUG_MODE "Set to On to compile with debug info" OFF)
 option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
+option(BUILD_WARNINGS "Build with warning options" ON)
+
 
 if(DEBUG_MODE)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g3 -W -Wall -fdiagnostics-color=always")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
+else()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
+endif()
+
+if(BUILD_WARNINGS)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always -Wall -W")
 endif()
 
 if(BUILD_SHARED_LIBS)
@@ -70,7 +78,7 @@ if(BUILD_SHARED_LIBS)
   set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
 endif()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
+
 
 #Ajout d'un repertoire src
 add_subdirectory(src)
diff --git a/src/Block.hpp b/src/Block.hpp
index 95fe7c9..bde5d4a 100644
--- a/src/Block.hpp
+++ b/src/Block.hpp
@@ -107,16 +107,17 @@ public:
     /**
      * @brief Copy input Block datas inside
      */
-    void CopyBlock(Block<Scalar,Primary>& in){
+    void CopyBlock(Block<Scalar,Primary>& in, int nbColToCopy,int start=0){
         //Check
-        if(in.getSizeBlock() != SizeBlock){
-            std::cout<<"SizeBlock different between input block and current block\n";
-            exit(0);
-        }
         if(in.getLeadingDim() != ldb){
             std::cout<<"Leading Dim different between input block and current block\n";
             exit(0);
         }
+        if(nbColToCopy != SizeBlock){
+            std::cout<<"SizeBlock different between input clo to copy"
+                     <<" and current block\n";
+            exit(0);
+        }
         for(int i=0 ; i<SizeBlock ; ++i){
             memcpy(getPtr(i),in.getPtr(i),sizeof(Scalar)*ldb);
         }
@@ -133,39 +134,29 @@ public:
      * @brief Compute SVD of current block, and write down U inside
      * output, and Singular Values inside sigma.
      */
-    void DecompositionSVD(Block<Scalar,Primary>& output, Block<Scalar,Primary> sigma){
+    void DecompositionSVD(Block<Scalar,Primary>& output, Block<Primary>& sigma){
         //What I need:
-        //jobu : A --> all the U part is computed
-        //jobvt : N --> no part of V^{H} is computed
-
-    }
-
-
-    //Methods for Pj
-
-    /**
-     * @brief get the number of vectors used
-     */
-    int getUsedVects()const{
-        return nbVectUsed;
-    }
-
-    void OrthoBlock(Block<Scalar,Primary>& input,Scalar * toWrite, int ldToWrite){
-        Fill the coeff
-        call_Tgemm<>(getUsedVects(),input.getSizeBlock(),getLeadingDim(),
-                     getPtr(),getLeadingDim(),
-                     input.getPtr(),input.getLeadingDim(),
-                     toWrite,ldToWrite,Scalar(1),Scalar(0));
-        //Read again the coeffs in order to modify input
-        //Input = (-1) * Pj * toWrite + Wj;
-        call_gemm<Scalar>(getLeadingDim(),input.getSizeBlock(),getUsedVects(),
-                          getPtr(i),getLeadingDim(),
-                          toWrite, ldToWrite,
-                          Wj.getPtr(),Wj.getLeadingDim(),
-                          Scalar(-1), Scalar(1));
+        char jobu = 'S';  //jobu : S --> Only first size_block vectors computed
+        char jobvt = 'N'; //jobvt : N --> no part of V^{H} is computed
+        Scalar* Vt; //Won't be used
+        int ldVt = 1;         //Won't be used
+        //Needed in case Lapack did not converge
+        Primary * superb = new Primary[output.getSizeBlock()-1];
+        int res = callLapack_gesvd<>(jobu,jobvt,getLeadingDim(),getSizeBlock(),
+                                     getPtr(),getLeadingDim(),
+                                     sigma.getPtr(),
+                                     output.getPtr(),output.getLeadingDim(),
+                                     Vt,ldVt, superb);
+        if(res!=0){
+            std::cout<<"Problem ... in SVD, did not converge\n";
+            if(res>0){
+                for(int i=0 ; i<res; ++i) std::cout<<superb[i]<<"\t";
+                std::cout<<"\n";
+            }
+        }
+        delete [] superb;
     }
 
-
 };
 
 
diff --git a/src/BlockArnoldiIb.hpp b/src/BlockArnoldiIb.hpp
index 9a715fa..779b5be 100644
--- a/src/BlockArnoldiIb.hpp
+++ b/src/BlockArnoldiIb.hpp
@@ -15,7 +15,7 @@ int BlockArnoldiIb(Matrix& A,Block<Scalar,Primary>& B, Block<Scalar,Primary>& X0
     //store norms
     std::vector<Primary> normB;
     for(int i=0 ; i< SizeBlock ; ++i){
-        normB.push_back(B.template getNorm<Primary>(i));
+        normB.push_back(B.getNorm(i));
     }
     //Compute epsilon_R (step 1)
     Primary epsilon_R = normB.front();
@@ -68,8 +68,12 @@ int BlockArnoldiIb(Matrix& A,Block<Scalar,Primary>& B, Block<Scalar,Primary>& X0
         base.ComputeMGSOrtho(Wj,toWrite,L.getLeadingDim());
         std::cout<<"Ortho against V done"<<std::endl;
 
-        //Same against Pj
-        Pj.OrthoBlock(Wj,);
+        //Orthogonalisation de Wj par rapport à Pj
+        //Stockage des coeffs de l'ortho dans la partie en haut à droite de H|_j
+        //QR de Wj --> Wj~ et Dj
+        //Reconstitution de la matrice F ? ou bien tout sera fait in place
+        //
+
     }
 }
 
diff --git a/src/BlockGMRes.hpp b/src/BlockGMRes.hpp
index 4eacf02..48a3f64 100644
--- a/src/BlockGMRes.hpp
+++ b/src/BlockGMRes.hpp
@@ -188,7 +188,7 @@ int BlockGMRes(Matrix& A, Block<Scalar,Primary>& B, Block<Scalar,Primary>& X0,
             }
         }
     }
-    Sol.CopyBlock(X0);
+    Sol.CopyBlock(X0,X0.getSizeBlock());
     return globalStep;
 }
 
diff --git a/src/BlockWP.hpp b/src/BlockWP.hpp
new file mode 100644
index 0000000..318a420
--- /dev/null
+++ b/src/BlockWP.hpp
@@ -0,0 +1,104 @@
+#ifndef BLOCKWP_HPP
+#define BLOCKWP_HPP
+
+#include "Block.hpp"
+
+/**
+ * @brief This class represent a block of datas containing P and W
+ * concatenated.
+ * Thus a cursor to know where P ends and W starts is needed.
+ */
+template<typename Scalar,typename Primary>
+class BlockWP : public Block<Scalar,Primary>{
+    int cursor;
+    using Parent=Block<Scalar,Primary>;
+public:
+
+    BlockWP(int sizeBlock, int sizeVector) : Parent(sizeBlock,sizeVector),cursor(0){
+    }
+
+    ~BlockWP(){
+    }
+
+    Scalar * getW(){
+        return Parent::getPtr(cursor);
+    }
+
+    Scalar * getP(){
+        return Parent::getPtr();
+    }
+
+    int getSizeP() const {
+        return cursor;
+    }
+    int getSizeW() const {
+        return Parent::getSizeBlock()-cursor;
+    }
+
+    /**
+     * @brief Compute C = P^{H} * W and then W = W-C*P
+     *
+     */
+    void OrthoWP(Scalar * toWrite, int ldToWrite){
+        if(cursor>0){
+            //Generate coeef inside C
+            call_Tgemm<>(getSizeP(),getSizeW(),Parent::getLeadingDim(),
+                         getP(),Parent::getLeadingDim(),
+                         getW(),Parent::getLeadingDim(),
+                         toWrite,ldToWrite,Scalar(1),Scalar(0));
+            //Modify W part relatively to C and P
+            //Wj=(-1) * Pj * toWrite + Wj;
+            call_gemm<>(Parent::getLeadingDim(),getSizeW(),getSizeP(),
+                        getP(),Parent::getLeadingDim(),
+                        toWrite,ldToWrite,
+                        getW(),Parent::getLeadingDim(),
+                        Scalar(-1),Scalar(1));
+        }
+    }
+
+    /**
+     * @brief Compute QR facto of W part. W will be overwritten by
+     * Q. R part generated will go inside input toWrite ptr
+     */
+    void QRofW(Scalar * toWriteR, int ldR){
+    }
+
+    /**
+     * @brief Compute Product between [P,W] and input block and write
+     * result inside toWrite (will be used for updating V)
+     *
+     */
+    void ComputeProduct(Block<Scalar,Primary>& input,
+                        Scalar toWrite,int ldToWrite){
+        call_gemm<>(Parent::getLeadingDim(),input.getSizeBlock(),Parent::getSizeBlock(),
+                    Parent::getPtr(),Parent::getLeadingDim(),
+                    input.getPtr(),input.getLeadingDim(),
+                    toWrite,ldToWrite,
+                    Scalar(1),Scalar(0));
+    }
+
+    /**
+    * @brief Compute the product between [P,W] and input block and
+    * write the result in place of P. (obviously used to update P)
+    */
+    void ComputeProduct(Block<Scalar,Primary>& input){
+        //Create temporary blck to store result,
+        Block<Scalar,Primary> temp(input.getSizeBlock(),Parent::getLeadingDim());
+
+        call_gemm<>(Parent::getLeadingDim(),input.getSizeBlock(),Parent::getSizeBlock(),
+                    Parent::getPtr(),Parent::getLeadingDim(),
+                    input.getPtr(),input.getLeadingDim(),
+                    temp.getPtr(),temp.getLeadingDim(),
+                    Scalar(1),Scalar(0));
+        //temp will be copied back inside P part, and cursor will be
+        //updated
+        memcpy(getP(),temp.getPtr(),
+               sizeof(Scalar)*temp.getSizeBlock()*temp.getLeadingDim());
+        cursor = temp.getSizeBlock();
+        //Not necessary, should be erased but still...
+        memset(getW(),0,getSizeW()*Parent::getLeadingDim()*sizeof(Scalar));
+    }
+};
+
+
+#endif //BLOCKWP_HPP
diff --git a/src/HessExtended.hpp b/src/HessExtended.hpp
new file mode 100644
index 0000000..d7b6d25
--- /dev/null
+++ b/src/HessExtended.hpp
@@ -0,0 +1,121 @@
+#ifndef HESSEXTENDED_HPP
+#define HESSEXTENDED_HPP
+
+#include "Block.hpp"
+#include "LapackInterface.hpp"
+
+/**
+ * This class will store what correspond to the Hessenberg in
+ * classical Ib-BGMRes.
+ *
+ */
+template<typename Scalar>
+class HessExtended{
+    //Relative to global parameters
+    int p; //Size of first block
+
+    //Relative to total size
+    int nbTotalCol;
+    int ldHess;
+
+    //Relative to current state for columns
+    int nbBlckColUsed;
+    //Number of vector in each block col
+    // [p,p,p-1,p-2,p-2,...]
+    std::vector<int> nbVectInBlck;
+    //Will be used to get the starting of block col;
+    // [0,p,2p ,3p-1,4p-3,...]
+    std::vector<int> sumNbVectInBlck;
+
+    //Relative to current state of line
+    int nbLineUsed;//Correspond to the Ortho coeff', i.e. without Hj
+                   //on the last row.
+
+    //Datas:
+    Scalar * data;
+
+public:
+    HessExtended(int inP,int inRestart) : p(inP),
+                                          nbTotalCol(p*restart),
+                                          ldHess(p*(restart+1)),
+                                          nbBlckColUsed(0),
+                                          nbLineUsed(0),data(nullptr){
+        data = new Scalar[nbTotalLine*nbTotalCol];
+        memset(data,0,sizeof(Scalar)*nbTotalLine*nbTotalCol);
+    }
+
+    ~HessExtended(){
+        delete [] data;
+    }
+
+    int getLeadingDim() const {
+        return ldHess;
+    }
+
+    /**
+     * @brief return the ptr to the start of new blockcol, and augment
+     * the cursors
+     */
+    Scalar * getNewStartingCol(int inSizeBlock){
+        int ref = sumNbVectInBlck.back();
+        nbVectInBlck.push_back(inSizeBlock);
+        sumNbVectInBlck.push_back(ref.inSizeBlock);
+        return &data[ref*ldHess];
+    }
+
+    /**
+     * @brief Remove the last part (containing H), and filling it with
+     * W^{H}*H
+     *
+     * @param Wj
+     */
+    template<typename Primary>
+    void UpdateBottomLine(Block<Scalar,Primary>& W){
+        //Update nbLine !!
+    }
+
+    /**
+     * @brief Compute Product :
+     * |Lj|
+     * |Hj| x |Y|
+     */
+    template<typename Primary>
+    void ComputeResidual(Block<Scalar,Primary>& input,
+                         Block<Scalar,Primary>& gamma,
+                         Block<Scalar,Primary>& outputY,
+                         Block<Scalar,Primary>& outputResidual){
+        //Maybe Gamma need to be augmented
+    }
+
+
+
+    /**
+     * Following member function are relative to the |Hj block at the
+     * bottom.
+     */
+
+    /**
+     * @brief Get the ptr to write/read G
+     *
+     */
+    Scalar * getPtrToG(){
+    }
+
+    /**
+     * @brief Get the ptr to write/read C coeffs
+     *
+     */
+    Scalar * getPtrToC(){
+    }
+
+   /**
+     * @brief Get the ptr to write/read D triang part
+     *
+     */
+    Scalar * getPtrToD(){
+    }
+
+};
+
+
+#endif //HESSEXTENDED_HPP
diff --git a/src/LapackInterface.hpp b/src/LapackInterface.hpp
index b844388..afc2dbf 100644
--- a/src/LapackInterface.hpp
+++ b/src/LapackInterface.hpp
@@ -89,7 +89,8 @@ void call_gemm(int , int , int , //nb lines in mat, nbcol in b,
                Primary * , int , //Second one
                Primary * , int , //Res
                Primary , Primary){ //Factor : first:alpha, second:beta
-    std::cout<<"Support for complex double, complex float, float and double only (gemm)\n";
+    std::cout<<"Support for complex double, complex float,"
+             <<"float and double only (gemm)\n";
     exit(0);
 }
 template<> //float
@@ -314,15 +315,40 @@ int callLapack_orgqr(int m, int p,
  *
  * Ref : https://software.intel.com/en-us/node/521150
  */
-template<typename Scalar>
-int callLapack_gesvd(int ,int ,
-                     Scalar *, int,
-                     Scalar *, int){
-    std::cout<<"Only complx float and compl double supported\n";
+template<typename Scalar,typename Primary>
+int callLapack_gesvd(char,char,       //Jobu and jobvt
+                     int ,int ,       //nb line and nb col
+                     Scalar *, int,   //input matrix and leading dim associated
+                     Primary*,        //output sigma
+                     Scalar *, int,   //output U and leading dim associated
+                     Scalar *,int,    //output V^{H} and leading dim associated
+                     Primary *){      //array fill in case of no convergence
+    std::cout<<"Only complx float and complx double supported\n";
     exit(0);
     return 0;
 }
-
+template<> //specialization on std::complex<double>
+int callLapack_gesvd(char jobu,char jobvt,
+                     int m ,int n,
+                     std::complex<double> * A, int ldA,
+                     double* sigma,
+                     std::complex<double> * U, int ldU,
+                     std::complex<double> * V, int ldV,
+                     double * sup){
+    return LAPACKE_zgesvd(LAPACK_COL_MAJOR,jobu,jobvt,m,n,A,ldA,sigma,
+                          U,ldU,V,ldV,sup);
+}
+template<> //specialization on std::complex<float>
+int callLapack_gesvd(char jobu,char jobvt,
+                     int m ,int n,
+                     std::complex<float> * A, int ldA,
+                     float* sigma,
+                     std::complex<float> * U, int ldU,
+                     std::complex<float> * V, int ldV,
+                     float * sup){
+    return LAPACKE_cgesvd(LAPACK_COL_MAJOR,jobu,jobvt,m,n,A,ldA,sigma,
+                          U,ldU,V,ldV,sup);
+}
 
 
 #endif //LapackInterface.hpp
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 491ae02..cb62398 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -4,6 +4,8 @@ cmake_minimum_required(VERSION 2.8)
 set(GMRES_TEST_SRC
   testBlockClass.cpp
   testBlockGMRes.cpp
+  testBlockSvd.cpp
+  testBlockWP.cpp
   )
 
 #set(LIBS_FOR_TESTS ibgmresdr)
diff --git a/test/testBlockGMRes.cpp b/test/testBlockGMRes.cpp
index a94d812..0f4b0bb 100644
--- a/test/testBlockGMRes.cpp
+++ b/test/testBlockGMRes.cpp
@@ -151,7 +151,7 @@ int main(int ac, char ** av){
         Block<Scalar> MatxSolMinusB(SizeBlock,dim);
         //X_Diff : X_Exact - Sol_computed
         Block<Scalar> X_Diff(SizeBlock,dim);
-        MatxSolMinusB.CopyBlock(MatxSol);
+        MatxSolMinusB.CopyBlock(MatxSol,MatxSolMinusB.getSizeBlock());
         for(int idxRHS=0 ; idxRHS<SizeBlock ; ++idxRHS){
             for(int j=0 ; j<dim; ++j){
                 MatxSolMinusB.getPtr(idxRHS)[j] -= RHS.getPtr(idxRHS)[j];
diff --git a/test/testBlockSvd.cpp b/test/testBlockSvd.cpp
new file mode 100644
index 0000000..3f0d6bd
--- /dev/null
+++ b/test/testBlockSvd.cpp
@@ -0,0 +1,40 @@
+#include "../src/Block.hpp"
+#include <vector>
+#include <random>
+
+int main(int ac, char ** av){
+    std::vector<std::string > args(av,av+ac);
+    for(auto ag : args)
+        std::cout << ag  << "\n";
+
+    using Primary=double;
+    using Scalar=std::complex<Primary>;
+
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<> dis(0, 2);
+
+    int SizeBlock = 5;
+    int sizeVect = 25;
+
+    Block<Scalar,Primary> B(SizeBlock,sizeVect);
+    //Fill the block
+    for(int i=0; i<SizeBlock ; ++i){
+        for(int j=0 ; j<sizeVect ; ++j){
+            B.getPtr()[ i* B.getLeadingDim() + j] = dis(gen);
+        }
+    }
+
+    //Store Sigma
+    Block<Primary> Sigma(1,SizeBlock);
+
+    //Store U
+    Block<Scalar,Primary> U(SizeBlock,sizeVect);
+
+    B.DecompositionSVD(U,Sigma);
+
+    Sigma.displayBlock("Singular values");
+
+    return 0;
+}
diff --git a/test/testBlockWP.cpp b/test/testBlockWP.cpp
new file mode 100644
index 0000000..f0a8973
--- /dev/null
+++ b/test/testBlockWP.cpp
@@ -0,0 +1,35 @@
+#include "BlockWP.hpp"
+#include <complex>
+#include <random>
+
+/**
+ *  This exec is used to test the features of BlockWP class.
+ */
+int main(int ac ,char ** av){
+
+    std::vector<std::string > args(av,av+ac);
+    for(auto ag : args)
+        std::cout << ag  << "\n";
+
+    int SizeBlock = 5;
+    int Dim = 25;
+
+    using Primary=double;
+    using Scalar=std::complex<Primary>;
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<> dis(0, 2);
+
+    BlockWP<Scalar,Primary> WP(SizeBlock,Dim);
+
+    //Fill it
+    for(int i=0; i<SizeBlock ; ++i){
+        for(int j=0 ; j<Dim ; ++j){
+            WP.getPtr()[ i* WP.getLeadingDim() + j] = dis(gen);
+        }
+    }
+
+
+    return 0;
+}
-- 
GitLab