Commit c4fdc197 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille

Inexact Breakdown on R0 is now detected, and is present in default version of Arnoldi_IB

parent a0a0c3a5
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
typedef void* IbGMResDr_handle; typedef void* IbGMResDr_handle;
/** /**
* @file Header for using the Ib-GMRes-Dr library from C. * @file Ib-GMRes-Dr.h Header for using the Ib-GMRes-Dr library from C.
* *
* @brief This Library is Matrix free, meaning that the user must * @brief This Library is Matrix free, meaning that the user must
* provide a method to compute a Matrix x Block of Vector * provide a method to compute a Matrix x Block of Vector
......
...@@ -54,7 +54,7 @@ option(BUILD_WARNINGS "Build with warning options" ON) ...@@ -54,7 +54,7 @@ option(BUILD_WARNINGS "Build with warning options" ON)
if(DEBUG_MODE) if(DEBUG_MODE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g3 -O0")
else() else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
endif() endif()
......
...@@ -864,7 +864,7 @@ EXAMPLE_RECURSIVE = NO ...@@ -864,7 +864,7 @@ EXAMPLE_RECURSIVE = NO
# that contain images that are to be included in the documentation (see the # that contain images that are to be included in the documentation (see the
# \image command). # \image command).
IMAGE_PATH = IMAGE_PATH = @CMAKE_CURRENT_SOURCE_DIR@/image_dox
# The INPUT_FILTER tag can be used to specify a program that doxygen should # The INPUT_FILTER tag can be used to specify a program that doxygen should
# invoke to filter for each input file. Doxygen will invoke the filter program # invoke to filter for each input file. Doxygen will invoke the filter program
......
...@@ -24,7 +24,19 @@ This library does not implement the Matrix Product. Thus, the user ...@@ -24,7 +24,19 @@ This library does not implement the Matrix Product. Thus, the user
must provide to the library a Matrix x block of vector product. To must provide to the library a Matrix x block of vector product. To
provide it, the user must pass it through a function pointer with the provide it, the user must pass it through a function pointer with the
C Api, or by providing a Matrix class similar to the one displayed and C Api, or by providing a Matrix class similar to the one displayed and
used inside the examples provided. (testMatrixMarket ...). used inside the examples provided. (testMatrixMarket ...). Similarly,
the user can provide a right preconditionner, to be used during
computation.
\subsection perfs Performances
This graphe plots the min and max residual of the block solution
during the algorithm. On the x axis, is displayed the number of
Matrice Vector products needed to achieve the error displayed.
@image html MatCone_matvect.png
@image html MatCone_IB_matvect.png
\section qstart Quick Start \section qstart Quick Start
......
...@@ -99,6 +99,7 @@ struct Arnoldi{ ...@@ -99,6 +99,7 @@ struct Arnoldi{
//Tests over criterion //Tests over criterion
Hess.ComputeBlockResidual(R1,Yj); Hess.ComputeBlockResidual(R1,Yj);
std::cout<<"["<<j<<"]\t"<<"Residual computed"<<std::endl; std::cout<<"["<<j<<"]\t"<<"Residual computed"<<std::endl;
std::pair<Primary,Primary> MinMax; std::pair<Primary,Primary> MinMax;
{//Display residual to see convergence {//Display residual to see convergence
...@@ -138,6 +139,7 @@ struct Arnoldi{ ...@@ -138,6 +139,7 @@ struct Arnoldi{
} }
} }
std::cout<<"################# Iterations done ... "<<j<<"\n"; std::cout<<"################# Iterations done ... "<<j<<"\n";
returned_struct.sizeKSpace = (j+1)*SizeBlock;
returned_struct.nbIteDone = j; returned_struct.nbIteDone = j;
returned_struct.hasConverged = convergence; returned_struct.hasConverged = convergence;
return returned_struct; return returned_struct;
......
...@@ -37,16 +37,10 @@ struct Arnoldi_IB{ ...@@ -37,16 +37,10 @@ struct Arnoldi_IB{
//Return struct //Return struct
ArnReturn returned_struct; ArnReturn returned_struct;
returned_struct.sizeKSpace = 0;
//We store the size of each block -- Could be delegated to a class ?
// std::vector<int> sizeOfEachBlock;
// sizeOfEachBlock.reserve(max_iter);
// sizeOfEachBlock.push_back(SizeBlock);
std::vector<int> sumOfSizeOfEachBlock;
sumOfSizeOfEachBlock.push_back(SizeBlock);
//Set the hessenberg //Set the hessenberg
HessExtended<Scalar> L(MaxKSize,SizeBlock); HessExtended<Scalar,Primary> L(MaxKSize,SizeBlock);
Base<Scalar,Primary> base(SizeBlock,MaxKSize,dim); Base<Scalar,Primary> base(SizeBlock,MaxKSize,dim);
//Create Block to Store W and P (if IB happens) //Create Block to Store W and P (if IB happens)
BlockWP<Scalar,Primary> WP(SizeBlock,dim); BlockWP<Scalar,Primary> WP(SizeBlock,dim);
...@@ -62,18 +56,86 @@ struct Arnoldi_IB{ ...@@ -62,18 +56,86 @@ struct Arnoldi_IB{
R0.getPtr(i)[k] = B.getPtr(i)[k] - R0.getPtr(i)[k]; R0.getPtr(i)[k] = B.getPtr(i)[k] - R0.getPtr(i)[k];
} }
} }
Block<Scalar,Primary> V1(SizeBlock,dim); Block<Scalar,Primary> q0(SizeBlock,dim);
Block<Scalar,Primary> R1(SizeBlock,SizeBlock); Block<Scalar,Primary> r0(SizeBlock,SizeBlock);
//Q-R Decomposition of (R0) //Q-R Decomposition of (R0)
R0.ComputeQRFacto(V1,R1); R0.ComputeQRFacto(q0,r0);
//here, insert inexact breakdown on R0
{
//Copy r0 because needed to compute Lambda, but is
//destroyed if SVD is called
Block<Scalar,Primary> r0_cpy(SizeBlock,SizeBlock);
r0_cpy.CopyBlock(r0);
//U is of full size, in order to get all the left singular vectors
Block<Scalar,Primary> U{r0.getSizeBlock(),r0.getLeadingDim()};
//SVD
int nbKDir = r0_cpy.DecompositionSVD(U,epsilon);
//Test if needed to launch algorithm again
if(nbKDir == 0){
std::cout<<"\t\tINEXACT BREAKDOWN on R0\n";
std::cout<<"\t\tNb Kept Dir :"<<nbKDir<<"/"<<SizeBlock<<std::endl;
returned_struct.sizeKSpace = 0;
returned_struct.nbIteDone = 0;
returned_struct.hasConverged = true;
return returned_struct;
}
if(nbKDir == SizeBlock){
std::cout<<"\t\tNo INEXACT BREAKDOWN on R0\n";
//First block of base added //First block of base added
base.addBlockDatas(V1); base.addBlockDatas(q0);
L.initLambda(r0);
}else{
std::cout<<"\t\tINEXACT BREAKDOWN on R0\n";
std::cout<<"\t\tNb Kept Dir :"<<nbKDir<<"/"<<SizeBlock<<std::endl;
//Block empty that will contains the first base vectors
Block<Scalar,Primary> V1ToBeAdded{nbKDir,dim};
//V1ToBeAdded = V1 * U1
call_gemm<>(dim,nbKDir,SizeBlock,
q0.getPtr(),q0.getLeadingDim(),
U.getPtr(),U.getLeadingDim(),
V1ToBeAdded.getPtr(),V1ToBeAdded.getLeadingDim(),
Scalar(1),Scalar(0));
//P = V1*U2
call_gemm<>(dim,SizeBlock-nbKDir,SizeBlock,
q0.getPtr(),q0.getLeadingDim(),
U.getPtr(nbKDir),U.getLeadingDim(),
WP.getP(),WP.getLeadingDim(),
Scalar(1),Scalar(0));
WP.incSizeP(SizeBlock-nbKDir);
base.addBlockDatas(V1ToBeAdded);
//Create a Lambda1 = [U1,U2]^{H} * R0;
Block<Scalar,Primary> Lambda1{SizeBlock,SizeBlock};
call_Tgemm<>(SizeBlock,SizeBlock,SizeBlock,
U.getPtr(),U.getLeadingDim(),
r0.getPtr(),r0.getLeadingDim(),
Lambda1.getPtr(),Lambda1.getLeadingDim(),
Scalar(1),Scalar(0));
L.initLambda(Lambda1);
//Create and initialize Phi (\Phi_1)
L.initPhi(nbKDir);
}
}
//We store the size of each block -- Could be delegated to a class ?
std::vector<int> sumOfSizeOfEachBlock;
sumOfSizeOfEachBlock.push_back(SizeBlock - WP.getSizeP());
//Nb directions discarded //Nb directions discarded
int nbDeflDir = 0; int nbDeflDir = WP.getSizeP();
bool convergence = false; bool convergence = (nbDeflDir == SizeBlock);
//Counter //Counter
int j=0; int j=0;
while(j<max_iter){ //Main Loop while(j<max_iter){ //Main Loop
double timestamp = Logger<Primary>::GetTime(); double timestamp = Logger<Primary>::GetTime();
std::cout<<"\t\tStart of Iteration\n"; std::cout<<"\t\tStart of Iteration\n";
...@@ -92,6 +154,7 @@ struct Arnoldi_IB{ ...@@ -92,6 +154,7 @@ struct Arnoldi_IB{
A.MatBaseProduct(base.getPtr(base.getCurrentBlock()-1),WP.getSizeW(), A.MatBaseProduct(base.getPtr(base.getCurrentBlock()-1),WP.getSizeW(),
WP.getW()); WP.getW());
} }
returned_struct.sizeKSpace += WP.getSizeW();
//Ortho against V and store coeff inside new block column of Hess L //Ortho against V and store coeff inside new block column of Hess L
base.ComputeMGSOrtho(WP,toWrite,L.getLeadingDim(),WP.getSizeP()); base.ComputeMGSOrtho(WP,toWrite,L.getLeadingDim(),WP.getSizeP());
std::cout<<"Ortho against V and P"<<std::endl; std::cout<<"Ortho against V and P"<<std::endl;
...@@ -104,10 +167,13 @@ struct Arnoldi_IB{ ...@@ -104,10 +167,13 @@ struct Arnoldi_IB{
//Create Block and Residual //Create Block and Residual
Block<Scalar,Primary> Res(SizeBlock,L.getNbLineUsed() + SizeBlock); Block<Scalar,Primary> Res(SizeBlock,L.getNbLineUsed() + SizeBlock);
//Compute residual //Compute residual
L.ComputeResidual(R1,Y,Res); L.ComputeResidual(Y,Res);
std::cout<<"\nResidual Computed ...\n"; std::cout<<"\nResidual Computed ...\n";
Block<Scalar,Primary> Directions(SizeBlock,SizeBlock);
//Do we really need that ? //Do we really need that ?
std::pair<Primary,Primary> MinMax = Res.getMinMaxNorm(); std::pair<Primary,Primary> MinMax = Res.getMinMaxNorm();
std::cout<<"Ite ["<<j<<"] Done\t"<< std::cout<<"Ite ["<<j<<"] Done\t"<<
...@@ -125,11 +191,11 @@ struct Arnoldi_IB{ ...@@ -125,11 +191,11 @@ struct Arnoldi_IB{
//Block to Store U //Block to Store U
Block<Scalar,Primary> U; Block<Scalar,Primary> U;
Res.DecompositionSVD(U,epsilon); int nbDirKept = Res.DecompositionSVD(U,epsilon);
std::cout<<"Decomposition SVD done !\n\t\tSingular Values kept " std::cout<<"Decomposition SVD done !\n\t\tSingular Values kept "
<<U.getSizeBlock()<<" !"<<std::endl; <<nbDirKept<<" !"<<std::endl;
//Here : If no inexact breakdowns occured, updates procedures are easiers //Here : If no inexact breakdowns occured, updates procedures are easier
if(U.getSizeBlock() == SizeBlock){ if(U.getSizeBlock() == SizeBlock){
//Augment base : [P_{j-1},~Wj] = [~Wj] //Augment base : [P_{j-1},~Wj] = [~Wj]
base.addBlockDatas(WP); base.addBlockDatas(WP);
...@@ -139,11 +205,16 @@ struct Arnoldi_IB{ ...@@ -139,11 +205,16 @@ struct Arnoldi_IB{
} }
std::cout<<"### Inexact Breakdowns Occured ###!\t\t Ite : \t"<<j<<"\n"; std::cout<<"### Inexact Breakdowns Occured ###!\t\t Ite : \t"<<j<<"\n";
//Create Block to store \W //Create Block to store \W
Block<Scalar,Primary> Directions(SizeBlock,SizeBlock);
Block<Scalar,Primary> tempRpart; Block<Scalar,Primary> tempRpart;
//Qr facto of bottom block of U //Qr facto of bottom block of U
std::cout<<"About to compute QR of bottom part of U\n"
<<"Starting point : "<<sumOfSizeOfEachBlock.back()
<<"\n";
U.ComputeQRFacto(Directions,tempRpart,sumOfSizeOfEachBlock.back()); U.ComputeQRFacto(Directions,tempRpart,sumOfSizeOfEachBlock.back());
//Update Phi with directions (W1W2), doc Annexe Eq 2.7
L.updatePhi(Directions,nbDirKept);
//Update //Update
//V_{j+1} = [P_{j-1},~Wj] * Directions_1 //V_{j+1} = [P_{j-1},~Wj] * Directions_1
...@@ -157,7 +228,6 @@ struct Arnoldi_IB{ ...@@ -157,7 +228,6 @@ struct Arnoldi_IB{
} }
//Pj = [P_{j-1},~Wj] * Directions_2 //Pj = [P_{j-1},~Wj] * Directions_2
WP.ComputeProduct(Directions,U.getSizeBlock()); WP.ComputeProduct(Directions,U.getSizeBlock());
//L.displayHessExtended("hess before update bottom line");
//L_{j+1,} = | Directions_1^{H} | * H_j //L_{j+1,} = | Directions_1^{H} | * H_j
//Gj = | Directions_2^{H} | //Gj = | Directions_2^{H} |
L.UpdateBottomLine(Directions); L.UpdateBottomLine(Directions);
......
...@@ -102,6 +102,7 @@ struct Arnoldi_QRInc{ ...@@ -102,6 +102,7 @@ struct Arnoldi_QRInc{
//Tests over criterion //Tests over criterion
std::pair<Primary,Primary> MinMax = Hess.LeastSquare(Yj,epsilon); std::pair<Primary,Primary> MinMax = Hess.LeastSquare(Yj,epsilon);
std::cout<<"["<<j<<"]\t"<<"Residual computed\n" std::cout<<"["<<j<<"]\t"<<"Residual computed\n"
<<"\t\tMin\t"<<MinMax.first<<"\tMax\t"<<MinMax.second <<"\t\tMin\t"<<MinMax.first<<"\tMax\t"<<MinMax.second
<<std::endl; <<std::endl;
...@@ -140,6 +141,7 @@ struct Arnoldi_QRInc{ ...@@ -140,6 +141,7 @@ struct Arnoldi_QRInc{
} }
} }
std::cout<<"################# Iterations done ... "<<j<<"\n"; std::cout<<"################# Iterations done ... "<<j<<"\n";
returned_struct.sizeKSpace = (j+1)*SizeBlock;
returned_struct.nbIteDone = j; returned_struct.nbIteDone = j;
returned_struct.hasConverged = convergence; returned_struct.hasConverged = convergence;
return returned_struct; return returned_struct;
......
...@@ -70,11 +70,14 @@ int BGMRes(Matrix& A,Block<Scalar,Primary>& B, Block<Scalar,Primary>& X0, ...@@ -70,11 +70,14 @@ int BGMRes(Matrix& A,Block<Scalar,Primary>& B, Block<Scalar,Primary>& X0,
ArnReturn ret = Arn::Exec_Procedure(A,X0,B,arnoldi_ite,MaxKSize,epsilon,log); ArnReturn ret = Arn::Exec_Procedure(A,X0,B,arnoldi_ite,MaxKSize,epsilon,log);
nbIteDone+=ret.nbIteDone; nbIteDone+=ret.nbIteDone;
std::cout<<"\n################## Restart Done "<<nb_restart std::cout<<"######################################################\n";
<<" #################\n"; std::cout<<"\t\t Restart Done "<<nb_restart
std::cout<<"\n################## Total iterations "<<nbIteDone <<"\n";
<<" #################\n"; std::cout<<"\t\t Total iterations "<<nbIteDone
<<"\n";
std::cout<<"\t\t Size of Krylov Space "<<ret.sizeKSpace
<<"\n";
std::cout<<"######################################################\n";
//Three case : //Three case :
//--1 Arnoldi converged //--1 Arnoldi converged
//--2 Arnoldi reached Max Krylov Size allowed //--2 Arnoldi reached Max Krylov Size allowed
......
...@@ -58,6 +58,7 @@ public: ...@@ -58,6 +58,7 @@ public:
std::cout<<name<<"\t LeadingDim x SizeBlock : "<<getLeadingDim() std::cout<<name<<"\t LeadingDim x SizeBlock : "<<getLeadingDim()
<<" x "<<getSizeBlock()<<"\n"; <<" x "<<getSizeBlock()<<"\n";
for(int a=0 ; a<SizeBlock ; ++a){ for(int a=0 ; a<SizeBlock ; ++a){
std::cout<<"["<<a<<"] ";
for(int b=0 ; b<ldb ; ++b){ for(int b=0 ; b<ldb ; ++b){
std::cout<<datas[a*ldb+b]<<"\t"; std::cout<<datas[a*ldb+b]<<"\t";
} }
...@@ -247,8 +248,13 @@ public: ...@@ -247,8 +248,13 @@ public:
/** /**
* @brief Compute SVD of current block, and write down U inside * @brief Compute SVD of current block, and write down U inside
* output, and Singular Values inside sigma. * output, and Singular Values inside sigma.
* @param U, output block. Will contain only the vectors
* corresponding to the singular values superior to the threshold
* given.
* @param epsilon, treshold
*/ */
void DecompositionSVD(Block<Scalar,Primary>& output, Primary epsilon){ int DecompositionSVD(Block<Scalar,Primary>& output, Primary epsilon){
//What I need: //What I need:
char jobu = 'S'; //jobu : S --> Only first size_block vectors computed char jobu = 'S'; //jobu : S --> Only first size_block vectors computed
char jobvt = 'N'; //jobvt : N --> no part of V^{H} is computed char jobvt = 'N'; //jobvt : N --> no part of V^{H} is computed
...@@ -278,15 +284,17 @@ public: ...@@ -278,15 +284,17 @@ public:
while(id<getSizeBlock() && sigma[id]>epsilon){ while(id<getSizeBlock() && sigma[id]>epsilon){
id++; id++;
} }
//Display all the singular values //Test if output is already allocated
// for(int i=0 ; i<getSizeBlock() ; ++i){ if(output.getSizeBlock() != 0){ //Fill the output with vectors from U
// std::cout<<"sigma["<<i<<"]\t : "<<sigma[i]<<"\n"; memcpy(output.getPtr(),U.getPtr(),
// } sizeof(Scalar)*output.getSizeBlock() * U.getLeadingDim());
//std::cout<<"Number of sv sup. to "<<epsilon<<"\t: "<<id<<"\n"; }else{//Only fill ouptut with vctors corresponding to the directions kept
output.initData(id,getLeadingDim()); output.initData(id,getLeadingDim());
memcpy(output.getPtr(),U.getPtr(),sizeof(Scalar)*id*U.getLeadingDim()); memcpy(output.getPtr(),U.getPtr(),sizeof(Scalar)*id*U.getLeadingDim());
}
delete [] sigma; delete [] sigma;
delete [] superb; delete [] superb;
return id;
} }
}; };
......
...@@ -35,6 +35,10 @@ public: ...@@ -35,6 +35,10 @@ public:
return Parent::getSizeBlock()-cursor; return Parent::getSizeBlock()-cursor;
} }
void incSizeP(int inc){
cursor += inc;
}
/** /**
* @brief Compute C = P^{H} * W and then W = W-C*P * @brief Compute C = P^{H} * W and then W = W-C*P
* *
...@@ -121,6 +125,7 @@ public: ...@@ -121,6 +125,7 @@ public:
input.getPtr(nbKeptDir),input.getLeadingDim(), input.getPtr(nbKeptDir),input.getLeadingDim(),
temp.getPtr(),temp.getLeadingDim(), temp.getPtr(),temp.getLeadingDim(),
Scalar(1),Scalar(0)); Scalar(1),Scalar(0));
//temp will be copied back inside P part, and cursor will be //temp will be copied back inside P part, and cursor will be
//updated //updated
memcpy(getP(),temp.getPtr(), memcpy(getP(),temp.getPtr(),
......
...@@ -9,10 +9,10 @@ ...@@ -9,10 +9,10 @@
* Ib-BGMRes version. * Ib-BGMRes version.
* *
*/ */
template<typename Scalar> template<typename Scalar,typename Primary=Scalar>
class HessExtended{ class HessExtended{
//Relative to global parameters //Relative to global parameters
int p; //Size of first block int p; //Number of RHS
//Relative to total size //Relative to total size
int MaxKSize; int MaxKSize;
...@@ -34,6 +34,11 @@ class HessExtended{ ...@@ -34,6 +34,11 @@ class HessExtended{
//Datas: //Datas:
Scalar * data; Scalar * data;
//Storage of what's needed to compte the rhs for Least square
//Ref : IB BGMRes Dr : Page 10.
Block<Scalar,Primary> * Phi;
Block<Scalar,Primary> * Lambda1;
public: public:
/** /**
* @param inMaxKSize : Maximum size of Krylov search space * @param inMaxKSize : Maximum size of Krylov search space
...@@ -43,7 +48,8 @@ public: ...@@ -43,7 +48,8 @@ public:
MaxKSize(inMaxKSize), MaxKSize(inMaxKSize),
ldHess(inMaxKSize+inP), ldHess(inMaxKSize+inP),
nbBlckColUsed(0), nbBlckColUsed(0),
nbLineUsed(0),data(nullptr){ nbLineUsed(0),data(nullptr),
Phi(nullptr),Lambda1(nullptr){
data = new Scalar[ldHess*MaxKSize]; data = new Scalar[ldHess*MaxKSize];
memset(data,0,sizeof(Scalar)*ldHess*MaxKSize); memset(data,0,sizeof(Scalar)*ldHess*MaxKSize);
sumNbVectInBlck.push_back(0); sumNbVectInBlck.push_back(0);
...@@ -56,8 +62,43 @@ public: ...@@ -56,8 +62,43 @@ public:
~HessExtended(){ ~HessExtended(){
delete [] data; delete [] data;
if(Lambda1)
delete Lambda1;
if(Phi)
delete Phi;
}
void initLambda(Block<Scalar,Primary>& lambda){
//Check
if(lambda.getSizeBlock() != p){
std::cout<<"Lambda sizeBlock is wrong while initiating lambda in Hess\n";
}
if(lambda.getLeadingDim() != p){
std::cout<<"Lambda leadingDim is wrong while initiating lambda in Hess\n";
}
Lambda1 = new Block<Scalar,Primary>{p,p};
memcpy(Lambda1->getPtr(),lambda.getPtr(),p*p*sizeof(Scalar));
}
/**
* @brief Initialize Phi
* (refer to Annex, Eq 2.5)
*/
void initPhi(int p1){
if(!Phi){
Phi = new Block<Scalar,Primary>{Lambda1->getSizeBlock(),
Lambda1->getSizeBlock() + p1};
for(int i=0 ; i<Lambda1->getSizeBlock() ; ++i){
Phi->getPtr(i)[i] = Scalar(1);
}
}else{
std::cout<<"Should not be there !!\nExiting\n";
exit(0);
}
} }
void reset(){ void reset(){
memset(data,0,sizeof(Scalar)*ldHess*MaxKSize); memset(data,0,sizeof(Scalar)*ldHess*MaxKSize);
sumNbVectInBlck.clear(); sumNbVectInBlck.clear();
...@@ -85,7 +126,7 @@ public: ...@@ -85,7 +126,7 @@ public:
} }
std::cout<<std::endl; std::cout<<std::endl;
} }
template<class Primary>
void displayHessExtendedBitMap(std::string name = ""){ void displayHessExtendedBitMap(std::string name = ""){
std::cout<<name<<"\n"; std::cout<<name<<"\n";
for(int j=0 ; j<nbLineUsed+p ; ++j){ for(int j=0 ; j<nbLineUsed+p ; ++j){
...@@ -134,7 +175,6 @@ public: ...@@ -134,7 +175,6 @@ public:
* @param W FullBlock * @param W FullBlock
* @param sizeToRead : limite between W1 and W2 * @param sizeToRead : limite between W1 and W2
*/ */
template<typename Primary>
void UpdateBottomLine(Block<Scalar,Primary>& W){ void UpdateBottomLine(Block<Scalar,Primary>& W){
int nj = sumNbVectInBlck.back(); int nj = sumNbVectInBlck.back();
//Save H in order to write directly inside L //Save H in order to write directly inside L
...@@ -151,23 +191,82 @@ public: ...@@ -151,23 +191,82 @@ public:
//nbLineUsed += sizeToRead; //nbLineUsed += sizeToRead;
} }
/**
* @brief Update the Phi (if needed, thus if IB happened on the R0)
*
* Could be optimized in order to avoid copying, allocating and destroying
*/
void updatePhi(Block<Scalar,Primary> & Directions, int p_jplus1){
if(Phi){ //If Phi has been set, it means that an I.B. happened
//on R0, and we have a modified RHS for least square
//that depends on Phi
int nj = sumNbVectInBlck.back();
// std::cout<<"Update Phi ::\n"
// <<"p\t"<<p<<"\n"
// <<"Phi->getLeadingDim()\t"<<Phi->getLeadingDim()<<"\n"
// <<"nbVectInBlck.back()\t"<<nbVectInBlck.back()<<"\n"
// <<"nj (== sumNbVectInBlck.back()\t"<<sumNbVectInBlck.back()<<"\n"
// <<"p_jplus1\t"<<p_jplus1<<"\n"
// <<"\n";
//Create New Phi
Block<Scalar,Primary> * newPhi =
new Block<Scalar,Primary>{p,Phi->getLeadingDim() + p_jplus1};
//Init first n_j rows
for(int i=0 ; i<p ; ++i){
memcpy(newPhi->getPtr(i),Phi->getPtr(i),
nj*sizeof(Scalar));
}
//Compute W1W2^{H} * Phi(nj+1:nj+p,:)
call_Tgemm<>(p,p,p,
Directions.getPtr(),Directions.getLeadingDim(),
&Phi->getPtr()[nj],Phi->getLeadingDim(),
&newPhi->getPtr()[nj],newPhi->getLeadingDim(),
Scalar(1),Scalar(0));
delete Phi;
Phi = newPhi;
}else{
//Do nothing
}
}
/**
* @brief Modify RHS given using Phi and Lambda1
* Return Phi * Lambda
*/
void ComputeRHS(Scalar * RHS, int ldRHS){
call_gemm<>(Phi->getLeadingDim(),p,p,
Phi->getPtr(),Phi->getLeadingDim(),
Lambda1->getPtr(),Lambda1->getLeadingDim(),
RHS,ldRHS,
Scalar(1),Scalar(0));
std::cout<<"Computing RHS done at ite "
<< sumNbVectInBlck.size() <<"\n";
}
/** /**
* @brief Compute Least Square and product to get residual : * @brief Compute Least Square and product to get residual :
* |Lj| * |Lj|
* |Hj| x |Y| * |Hj| x |Y|
*/ */
template<typename Primary> void ComputeResidual(Block<Scalar,Primary>& outputY,
void ComputeResidual(Block<Scalar,Primary>& gamma,
Block<Scalar,Primary>& outputY,
Block<Scalar,Primary>& outputResidual){ Block<Scalar,Primary>& outputResidual){
//Complete gamma //Complete gamma
int ldR = nbLineUsed + p; int ldR = nbLineUsed + p;
Scalar * r = new Scalar[ldR*p]; Scalar * r = new Scalar[ldR*p];
if(!Phi){
memset(r,0,((nbLineUsed + p)*p)*sizeof(Scalar)); memset(r,0,((nbLineUsed + p)*p)*sizeof(Scalar));
for(int i=0 ; i<p ; ++i){ for(int i=0 ; i<p ; ++i){
memcpy(&r[i*ldR],gamma.getPtr(i), memcpy(&r[i*ldR],Lambda1->getPtr(i),
sizeof(Scalar)*(i+1)); sizeof(Scalar)*(i+1));
} }
}else{
ComputeRHS(r,ldR);
}
//get a working copy of F //get a working copy of F
Scalar*workingCopy = new Scalar[ldR*sumNbVectInBlck.back()]; Scalar*workingCopy = new Scalar[ldR*sumNbVectInBlck.back()];
memset(workingCopy,0,ldR*sumNbVectInBlck.back()*sizeof(Scalar)); memset(workingCopy,0,ldR*sumNbVectInBlck.back()*sizeof(Scalar));
...@@ -195,10 +294,13 @@ public: ...@@ -195,10 +294,13 @@ public:
outputY.getPtr(),outputY.getLeadingDim(), outputY.getPtr(),outputY.getLeadingDim(),
outputResidual.getPtr(),outputResidual.getLeadingDim(), outputResidual.getPtr(),outputResidual.getLeadingDim(),
Scalar(1),Scalar(0)); Scalar(1),Scalar(0));
std::cout<<"Compute F*Y done\n";
//Add gamma to residual //Add gamma to residual
for(int i=0 ; i<outputResidual.getSizeBlock();++i){ for(int i=0 ; i<outputResidual.getSizeBlock();++i){
for(int j=0 ; j<i+1 ; ++j){ for(int j=0 ; j<i+1 ; ++j){
outputResidual.getPtr(i)[j] -= gamma.getPtr(i)[j]; outputResidual.getPtr(i)[j] -= Lambda1->getPtr(i)[j];
} }
} }
......
...@@ -81,7 +81,9 @@ void call_Tgemm(int m, int n, int k, ...@@ -81,7 +81,9 @@ void call_Tgemm(int m, int n, int k,
double * res, int ldRes, double * res, int ldRes,
double alpha, double beta){ double alpha, double beta){
cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans, cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,
m,n,k,alpha,base,ldBase,block,ldBlock,beta, m,n,k,
alpha,base,ldBase,
block,ldBlock,beta,
res,ldRes); res,ldRes);
} }
......
...@@ -9,16 +9,18 @@ ...@@ -9,16 +9,18 @@
* procedure. * procedure.
*/ */
struct ArnReturn{ struct ArnReturn{
int sizeKSpace;
int nbIteDone; int nbIteDone;
bool hasConverged; bool hasConverged;
}; };