Commit 2c557ffa authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

Merge branch 'result_validation' into develop

parents caab0740 e801e6f0
# -*- mode: org -*-
# -*- coding: utf-8 -*-
#+TITLE: FABuLOuS numericals results
#+STARTUP: showeverything
* RESULTS
** Get Test files
[[file:./build/src/data/young1c.mtx][young1c.mtx]]
[[file:./build/src/data/matrices_BEM/MatconeSpherePC_MAIN_MAIN_0][MatconeSpherePC_MAIN_MAIN_0]] (binary)
[[file:./build/src/data/matrices_BEM/coneSpherePC_RHS_MAIN.0.res][coneSpherePC_RHS_MAIN.0.res]] (binary)
[[file:./build/src/data/sherman4.mtx][sherman4.mtx]]
[[file:./build/src/data/young4c.mtx][young4c.mtx]]
[[file:./build/src/data/bcsstk14.mtx][bcsstk14.mtx]]
[[file:./build/src/data/sherman5.mtx][sherman5.mtx]]
[[file:./build/src/data/bidiagonalmatrix1.mtx][bidiagonalmatrix1.mtx]]
[[file:./build/src/data/bidiagonalmatrix2.mtx][bidiagonalmatrix2.mtx]]
[[file:./build/src/data/bidiagonalmatrix3.mtx][bidiagonalmatrix3.mtx]]
[[file:./build/src/data/bidiagonalmatrix4.mtx][bidiagonalmatrix3.mtx]]
*** Downloads
#+begin_src shell :session *DOWNLOADS* :results none
files=(
"ftp://math.nist.gov/pub/MatrixMarket2/Harwell-Boeing/bcsstruc2/bcsstk14.mtx.gz"
"ftp://math.nist.gov/pub/MatrixMarket2/Harwell-Boeing/sherman/sherman4.mtx.gz"
"ftp://math.nist.gov/pub/MatrixMarket2/Harwell-Boeing/sherman/sherman5.mtx.gz"
"ftp://math.nist.gov/pub/MatrixMarket2/Harwell-Boeing/acoust/young1c.mtx.gz"
"ftp://math.nist.gov/pub/MatrixMarket2/Harwell-Boeing/acoust/young4c.mtx.gz"
)
mkdir -p ./build/src/data/
for i in "${files[@]}"; do
filename=$(basename "$i")
filename="${filename%.*}" # strip gz extension
curl $i | zcat > ./build/src/data/$filename
done
#+end_src
** Run Examples
#+begin_src bash :session foo :results output :exports both
cd fabulous/build/src/test_core/
./testMatrixMarket 100 500
# most matrix market with all ortho scheme + variable max_space (Krylov search space) parameter:
./testOrthoSchemes
# bcsstk14 with IB CGS_RUHE:
./test_RUHE_MGS_bcsstk_EXACT_BREAKDOWN
#+end_src
** Plot the results
#+begin_src R :results output graphics :file (org-babel-temp-file "figure" ".png") :exports both :width 600 :height 400 :session *R*
args <- commandArgs(TRUE)
if (length(args) < 1) {
stop("need filename as parameter")
}
library(ggplot2)
library(reshape2)
library(tools)
data <- read.table(args[1], header=T)
data$ID <- basename(file_path_sans_ext(args[1]))
if (length(args) >= 2) {
for (i in 2:length(args)) {
d <- read.table(args[i], header=T)
d$ID <- basename(file_path_sans_ext(args[i]))
data <- rbind(data, d)
}
}
id = data$ID;
data$ID1 <- paste(id, "minRes")
data$ID2 <- paste(id, "maxRes")
data$ID3 <- paste(id, "minRealRes")
data$ID4 <- paste(id, "maxRealRes")
p <- ggplot(data, aes(nb_mvp)) +
geom_line(aes(y=minRes, color=ID1)) +
geom_line(aes(y=maxRes, color=ID2)) +
geom_line(aes(y=minRealRes, color=ID3)) +
geom_line(aes(y=maxRealRes, color=ID4))
p + scale_y_log10()
#+end_src
Set this variable to your work directory (where you downloaded fabulous):
#+begin_src sh :session *TEST* :results none
export WORKDIR=/home/tmijieux/fabulous
#+end_src
To be able to run the following test cases, you must have compiled fabulous:
#+begin_src sh :session *TEST* :results output :exports both
cd ${WORKDIR}
mkdir -p build
cd build
cmake .. # -DCHAMELEON_DIR=$(spack location -i chameleon)
make
#+end_src
*** Impact of restart parameter
**** run test case
#+begin_src sh :session *TEST* :results none
cd ${WORKDIR}/build/src/test_core/
mkdir -p ../data/res
sizes=(200 400 600 800 1000)
for siz in "${sizes[@]}"; do
./testMatrixMarketChoice -t RUHE -s IMGS -m $siz -A IB -u -o "r=$siz"
done
#+end_src
**** plot the graphic
#+begin_src R :results output graphics :file (org-babel-temp-file "figure" ".png") :exports both :width 600 :height 400 :session *R*
library(ggplot2)
df <- read.table("./build/src/data/res/r=200.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/r=400.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/r=600.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/r=800.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/r=1000.res", header=T))
ggplot(df, aes(x=nb_mvp, y=maxRes, color=name)) +
geom_line() +
geom_hline(aes(yintercept=1e-4, color="threshold")) +
scale_y_log10() + ggtitle("Memory usage influence (young1c IB IMGS-RUHE)")
#+end_src
*** Impact of incremental QR factorization
**** run test case
#+begin_src sh :session *TEST* :results none
cd ${WORKDIR}/build/src/test_core/
./testMatrixMarketChoice -t BLOCK -s CGS -m 900 -A STDDR -u -o "Basic_GELS"
./testMatrixMarketChoice -t BLOCK -s CGS -m 900 -A QR -u -o "QR_factorization"
#+end_src
**** plot the graphic
#+begin_src R :results output graphics :file (org-babel-temp-file "figure" ".png") :exports both :width 600 :height 400 :session *R*
library(ggplot2)
df <- read.table("./build/src/data/res/Basic_GELS.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/QR_factorization.res", header=T))
ggplot(df, aes(x=global_iteration, y=least_square_time, color=name)) +
geom_line() + ggtitle("Least Square duration (young1c)")
#+end_src
*** Influence of the Algorithm
**** run test case
#+begin_src sh :session *TEST* :results none
cd ${WORKDIR}/build/src/test_core/
mkdir -p ../data/res
./testMatrixIW_DR
#+end_src
**** plot the graphic
#+begin_src R :results output graphics :file (org-babel-temp-file "figure" ".png") :exports both :width 600 :height 400 :session *R*
library(ggplot2)
df <- read.table("./build/src/data/res/STD.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/STD+DR.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/IB.res", header=T))
name <- df$name
df$name_min <- paste(name, " minRes")
df$name_max <- paste(name, " maxRes")
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=maxRes, color=name_max)) +
geom_line(aes(y=minRes, color=name_min)) +
geom_hline(aes(yintercept=1e-7, color="threshold")) +
scale_y_log10() +
ggtitle("coneSphere problem with CGS RUHE (STD, STD+DR and IB algorithm)")
#+end_src
#+TITLE: fabulous todo-list
#+STARTUP: showeverything
* DONE Implement Classic version with TOP LEVEL Chameleon Interface
* DONE Implement QR version with LOW LEVEL (_Tile_Async) Chameleon Interface
* DONE Make document about Deflated Restarting and Inexact Breakdown's blocks layout
** DONE Deflated Restarting with Incremental QR factorization: Hessenberg blocks layout
** DONE Inexact breakdown block layout (Hessenberg and Base layout)
** DONE Inexact breakdown on R0 (Hessenberg and RHS layout)
* DONE Implement DR+QR
* HOLD Reproducible installation
** DONE Instruction in INSTALL.org
** DONE Update spack package
** HOLD merge request to make Chameleon headers compatible with C++ complex types
* TODO reproducible results and visualization
** TODO Improve Logging
*** DONE Add timer for iterations (global)
*** TODO Improve timer semantics and logs
*** DONE Add timer for orthogonalization, least square and matrix vector product
*** TODO Add global timer
*** TODO Add flops/s counter
** TODO Improve RESULTS.org
Eventually, anyone must be able to gather all interesting results into RESULTS.org
just by evaluating code blocks from RESULTS.org and/or tangling RESULTS.org
* TODO Implement IB+DR
* TODO Implement IB+DR+QR
* TODO iterated orthogonalization stop criterion
* TODO find out how to link fabulous with parallel mkl with spack
......@@ -90,6 +90,14 @@ private:
o << restart_param;
}
template <class Matrix >
void check_params(Matrix &A, const Block<S> &X, const Block<S> B)
{
FABULOUS_ASSERT( A.size() == X.get_nb_row() );
FABULOUS_ASSERT( A.size() == B.get_nb_row() );
FABULOUS_ASSERT( X.get_nb_col() == B.get_nb_col() );
}
public:
BGMRes(bool log_real_residual = false):
_logger{log_real_residual},
......@@ -155,28 +163,32 @@ public:
*/
template< class Algo, class RestartParam, class Matrix >
int solve( Matrix &A, Block<S> &X, Block<S> &B,
Algo &, const int maxMVP, const int max_krylov_space_size,
Algo&, const int max_mvp, const int max_krylov_space_size,
const std::vector<P> &epsilon,
Orthogonalizer ortho, RestartParam restart_param )
{
check_params(A, X, B);
const int nbRHS = B.get_nb_col();
const int dim = B.get_nb_row();
reset();
print_start_info( dim, nbRHS, maxMVP, max_krylov_space_size,
print_start_info( dim, nbRHS, max_mvp, max_krylov_space_size,
ortho, restart_param, epsilon, A );
std::vector<P> normB, inv_normB;
normB = B.compute_norms(A);
inv_normB = array_inverse(normB);
X.scale(inv_normB); B.scale(inv_normB);
const int nb_eigen_pair = restart_param.get_k();
Base<S> base{dim, max_krylov_space_size+nbRHS};
Restarter<RestartParam, S> restarter{restart_param, base};
bool convergence = false;
while (!convergence && _mvp < maxMVP) {
while (!convergence && _mvp < max_mvp) {
//Compute nb of mat vect product to give to Arnoldi procedure
int size_to_span = std::min(max_krylov_space_size, maxMVP-_mvp);
int size_to_span = std::min(max_krylov_space_size, max_mvp-_mvp);
size_to_span = std::max(size_to_span, nbRHS);
print_iteration_start_info(size_to_span);
if (nbRHS + nb_eigen_pair > size_to_span)
break;
using Arnoldi = typename Algo::template t3mpl4te<S>;
Arnoldi arnoldi{_logger, restarter, dim, nbRHS, size_to_span};
convergence = arnoldi.run( A, X, B, size_to_span, epsilon,
......
......@@ -23,7 +23,8 @@ namespace fabulous {
*
* This class support DeflatedRestarting
*/
template<template<class> class HESSENBERG, class S > class ArnoldiDR
template<template<class> class HESSENBERG, class S >
class ArnoldiDR
{
static_assert(
arnoldiXhessenberg<fabulous::ArnoldiDR, HESSENBERG>::value,
......@@ -94,13 +95,16 @@ private:
void compute_real_residual(Matrix &A, const Block &X, const Block &B, Block &R)
{
R.copy(B); // R <- B
_logger.notify_mvp_begin();
A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- B-A*X
_logger.notify_mvp_end();
_nb_mvp += X.get_nb_col();
}
template< class Matrix, class Block >
void W_AMV(Matrix &A, Block &V, Block &W) // W = A*M*V
{
_logger.notify_mvp_begin();
if (A.useRightPrecond()) {
Block MV{W.get_nb_row(), V.get_nb_col()};
A.PrecondBlockVect(V, MV); // MV = M * V
......@@ -108,6 +112,7 @@ private:
} else {
A.MatBlockVect(V, W); // W = A * V
}
_logger.notify_mvp_end();
_nb_mvp += V.get_nb_col();
}
......@@ -151,6 +156,15 @@ private:
return false;
}
std::tuple<P,P,bool>
check_least_square_residual(const std::vector<P> &epsilon)
{
_logger.notify_least_square_check_begin();
auto minmaxconv = _hess.check_least_square_residual(epsilon);
_logger.notify_least_square_check_end();
return minmaxconv;
}
public:
template<class RestartParam>
ArnoldiDR(Logger<P> &logger, Restarter<RestartParam, S> &restarter,
......@@ -189,8 +203,8 @@ public:
*
* \return whether the convergence was reached
*/
template<class Matrix, class Block, class Restarter>
bool run( const Matrix &A, Block &X, const Block &B,
template<class Matrix, class Restarter>
bool run( const Matrix &A, Block<S> &X, const Block<S> &B,
const int max_krylov_space_size,
const std::vector<P> &epsilon,
Restarter &restarter,
......@@ -198,14 +212,14 @@ public:
{
print_header(max_krylov_space_size);
Block R1{}; // R1 = RHS of projected problem
Block<S> R1{}; // R1 = RHS of projected problem
_logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
if (restarter && _base.get_nb_block() > 0) {
restarter.run(R1, _hess);
//check_arnoldi_formula(A, _base, _hess);
} else { //No DR, or first run of arnoldi
_base.reset();
Block R0 = _base.get_W(_nbRHS);
Block<S> R0 = _base.get_W(_nbRHS);
compute_real_residual(A, X, B, R0);
_base.increase(_nbRHS);
R1.realloc(_nbRHS, _nbRHS);
......@@ -216,28 +230,30 @@ public:
bool convergence = std::get<2>(MinMaxConv);
_size_krylov_space = _base.get_nb_vect();
_logger.notify_iteration_end(
_iteration_count, _size_krylov_space, _nbRHS, MinMaxConv);
_iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
_logger.log_real_residual(A, B, X);
while (!convergence && _size_krylov_space < max_krylov_space_size) {
++ _iteration_count;
_logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
Block Vj = _base.get_Vj(); // V_j
Block<S> Vj = _base.get_Vj(); // V_j
assert( Vj.get_nb_col() == _nbRHS );
Block W = _base.get_W(_nbRHS); // W (candidate)
Block<S> W = _base.get_W(_nbRHS); // W (candidate)
W_AMV(A, Vj, W); // W := A*M*V_j
_logger.notify_ortho_begin();
ortho.run(_hess, _base, W, A);
_logger.notify_ortho_end();
_base.increase(_nbRHS); // add W to base
_size_krylov_space = _base.get_nb_vect();
//_hess.display_hess_bitmap("Hess Before Least Square");
auto MinMaxConv = _hess.check_least_square_residual(epsilon);
auto MinMaxConv = check_least_square_residual(epsilon);
bool proj_convergence = std::get<2>(MinMaxConv);
convergence = check_real_residual_if(proj_convergence, A, B, X, epsilon);
_logger.notify_iteration_end(
_iteration_count, _size_krylov_space, _nbRHS, MinMaxConv);
_iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
_logger.log_real_residual(A, B, X, _base, _hess);
}
......@@ -250,10 +266,10 @@ public:
_base.reset();
} else if (restarter && !convergence) {
int M = _hess.get_nb_vect() + _nbRHS;
Block LS{M, _nbRHS};
Block<S> LS{M, _nbRHS};
_hess.compute_proj_residual(LS);
restarter.save_LS(LS);
Block H = _hess.get_hess_block();
Block<S> H = _hess.get_hess_block();
restarter.save_hess(H);
}
......
......@@ -20,7 +20,8 @@ namespace fabulous {
*
* \warning This class does NOT support DeflatedRestarting (not implemented)
*/
template < template<class> class HESSENBERG, class S > class ArnoldiIB
template < template<class> class HESSENBERG, class S >
class ArnoldiIB
{
static_assert(
arnoldiXhessenberg<fabulous::ArnoldiIB, HESSENBERG>::value,
......@@ -48,7 +49,7 @@ private:
private:
Logger<P> &_logger;
HESSENBERG<S> L;
HESSENBERG<S> _F;
bool _solution_computed;
Block<S> _solution;
Block<S> _last_Y;
......@@ -76,7 +77,6 @@ public:
int get_nb_mvp() const { return _nb_mvp; }
int get_nb_iteration() const { return _iteration_count; }
private:
template< class Matrix, class Block, class Base >
void X_plus_MVY(Matrix &A, Block &X, Base &V, Block &Y)
......@@ -98,11 +98,13 @@ private:
}
template<class Matrix, class Block>
void compute_solution(Matrix &A, Block &X)
void compute_real_residual(Matrix &A, Block &X, const Block &B, Block &R)
{
Block Y = L.alloc_least_square_sol();
L.solve_least_square(Y);
X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
R.copy(B); // R <- B
_logger.notify_mvp_begin();
A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- B-A*X
_logger.notify_mvp_end();
_nb_mvp += X.get_nb_col();
}
template< class Matrix, class Block, class BlockWP >
......@@ -110,6 +112,7 @@ private:
{
Block W = WP.get_W();
_logger.notify_mvp_begin();
if (A.useRightPrecond()) {
Block MV{W.get_nb_row(), V.get_nb_col()};
A.PrecondBlockVect(V, MV); // MV = M * V
......@@ -117,15 +120,16 @@ private:
} else {
A.MatBlockVect(V, W); // W = A * V
}
_logger.notify_mvp_end();
_nb_mvp += V.get_nb_col();
}
template<class Matrix, class Block>
void compute_real_residual(Matrix &A, Block &X, const Block &B, Block &R)
void compute_solution(Matrix &A, Block &X)
{
R.copy(B); // R <- B
A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- A*X-B
_nb_mvp += X.get_nb_col();
Block Y = _F.alloc_least_square_sol();
_F.solve_least_square(Y);
X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
}
template<class Matrix, class Block>
......@@ -181,7 +185,7 @@ private:
W2 = W1_W2.sub_block(0, _nb_direction_kept, _nbRHS, _nb_direction_discarded);
U1_2.OutOfPlaceQRFacto(W1_W2, R); // Qr facto of bottom block of U
L.update_phi(W1_W2, _nb_direction_kept);
_F.update_phi(W1_W2, _nb_direction_kept);
// Update Phi with \W_1 and \W_2, doc Annexe Eq 2.7
Block V = _base.get_W(_nb_direction_kept);
......@@ -191,7 +195,7 @@ private:
// L_{j+1,} = | \W_1^{H} | * H_j
// Gj = | \W_2^{H} | * H_j
L.update_bottom_line(W1_W2);
_F.update_bottom_line(W1_W2);
}
void handle_IB_on_R0( Block<S> &U, BlockWP<S> &WP,
......@@ -234,17 +238,16 @@ private:
Lambda1.get_ptr(), Lambda1.get_leading_dim(),
S{1.0}, S{0.0}
);
L.init_lambda(Lambda1);
L.init_phi(_nb_direction_kept); // Create and initialize Phi (\Phi_1)
_F.init_lambda(Lambda1);
_F.init_phi(_nb_direction_kept); // Create and initialize Phi (\Phi_1)
}
template< class Matrix, class Block, class BlockWP,
class Epsilon, class Hess >
template< class Matrix, class Block, class BlockWP, class Epsilon >
std::tuple<P,P,bool>
inexact_breakdown_on_R0(
const Matrix &A, Block &R0,
const Epsilon &epsilon, const Epsilon &inv_epsilon,
Hess &L, BlockWP &WP)
BlockWP &WP)
{
Block Q0 = R0;
Block Lambda0{_nbRHS, _nbRHS};
......@@ -258,7 +261,7 @@ private:
_old_nb_direction_kept = _nb_direction_kept;
_nb_direction_discarded = _nbRHS - _nb_direction_kept;
// Test if needed to launch algorithm again
// Test if algorithm need to be launched again
if (_nb_direction_kept == 0) {
_IB_happened = true;
std::cout << "\t\tTOTAL INEXACT BREAKDOWN on R0\n";
......@@ -276,7 +279,7 @@ private:
Block W = _base.get_W(_nbRHS);
W.copy(Q0);
_base.increase(_nbRHS);
L.init_lambda(Lambda0);
_F.init_lambda(Lambda0);
}
_block_size_sum.clear();
_block_size_sum.push_back(_nb_direction_kept);
......@@ -290,10 +293,10 @@ private:
const std::vector<P> &epsilon, const std::vector<P> &inv_epsilon,
const Matrix &A, const Block<S> &X)
{
Block<S> Y = L.alloc_least_square_sol();
Block<S> PR{L.get_nb_hess_line(), _nbRHS};
L.solve_least_square(Y);
L.compute_proj_residual(PR);
Block<S> Y = _F.alloc_least_square_sol();
Block<S> PR{_F.get_nb_hess_line(), _nbRHS};
_F.solve_least_square(Y);
_F.compute_proj_residual(PR);
Block<S> U;
_nb_direction_kept = PR.decomposition_SVD(U, epsilon, inv_epsilon);
......@@ -302,8 +305,8 @@ private:
if (_nb_direction_kept == 0) { // Suspicious ...
if (!proj_convergence) { // Even more suspicious
FABULOUS_FATAL_ERROR(
"Directions kepts during SVD of residuals is equal to 0, "
"but residual itself has not converged. "
"Directions kepts during SVD of projected residual is "
" equal to 0, but residual itself has not converged. "
"That should not be possible."
);
} else {
......@@ -334,13 +337,21 @@ private:
_size_krylov_space = _base.get_nb_vect();
}
std::tuple<P,P,bool>
check_least_square_residual(const std::vector<P> &epsilon)
{
_logger.notify_least_square_check_begin();
auto minmaxconv = _F.check_least_square_residual(epsilon);
_logger.notify_least_square_check_end();
return minmaxconv;
}
public:
template<class RestartParam>
ArnoldiIB(Logger<P> &logger, Restarter<RestartParam, S> &restarter,
int dim, int nbRHS, int max_krylov_space_size):
_logger(logger),
L{max_krylov_space_size+1, nbRHS},
_F{max_krylov_space_size+1, nbRHS},
_solution_computed{false},
_base{restarter.get_base()},
_dim(dim),
......@@ -376,9 +387,9 @@ public:
*
* \return whether the convergence was reached
*/
template< class Matrix, class Block, class Restarter >
bool run( Matrix &A, Block &X, Block &B,
const int max_krylov_space_size,
template< class Matrix, class Restarter >
bool run( Matrix &A, Block<S> &X, Block<S> &B,
const int max_krylov_space_size,
const std::vector<P> &epsilon,
Restarter&,
Orthogonalizer &ortho)
......@@ -396,13 +407,13 @@ public:
_logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
// STEP1: Compute first block residual
Block R0{_dim, _nbRHS};
Block<S> R0{_dim, _nbRHS};
compute_real_residual(A, X, B, R0);
// auto minmaxR0 = R0.get_min_max_norm(A);
auto MinMaxConv = inexact_breakdown_on_R0(A, R0, epsilon, inv_epsilon, L, WP);
auto MinMaxConv = inexact_breakdown_on_R0(A, R0, epsilon, inv_epsilon, WP);
_logger.notify_iteration_end(
_iteration_count, _size_krylov_space, _nbRHS, MinMaxConv);
_iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
_logger.log_real_residual(A, B, X);
if (_nb_direction_kept == 0)
......@@ -415,21 +426,23 @@ public:
_size_krylov_space < max_krylov_space_size) // Main Loop
{
++ _iteration_count;
if (! L.check_room( WP.get_size_W()) )
if (! _F.check_room( WP.get_size_W()) )
break;
_logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
Block Vj = _base.get_Vj();
Block<S> Vj = _base.get_Vj();
W_AMV(A, Vj, WP); // W <- A*M*Vj: perform matrix multiplication
// Ortho and filling of L (the (IB) Hessenberg)
ortho.run(L, _base, WP, A);
_logger.notify_ortho_begin();
ortho.run(_F, _base, WP, A);
_logger.notify_ortho_end();
// WP.check_ortho_WP();// WP.get_W().check_ortho();
//L.display_hess_extended_bitmap();
//_F.display_hess_extended_bitmap();
// Y is the solution of LeastSquare problem: Y = argmin(||F*Y-\Lambda||)
// PR is the projected residual PR <- F*Y-\Lambda
auto MinMaxConv = L.check_least_square_residual(epsilon);
auto MinMaxConv = check_least_square_residual(epsilon);
bool proj_convergence = std::get<2>(MinMaxConv);
convergence = check_real_residual_if(proj_convergence, A, X, B, epsilon);
......@@ -439,8 +452,8 @@ public:
}
_size_krylov_space = _base.get_nb_vect();
_logger.notify_iteration_end(
_iteration_count, _size_krylov_space, WP.get_size_W(), MinMaxConv);
_logger.log_real_residual(A, B, X, _base, L);
_iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
_logger.log_real_residual(A, B, X, _base, _F);
}
if (_solution_computed)
......@@ -463,7 +476,6 @@ public:
}
};
}; // namespace fabulous
#endif // FABULOUS_ARNOLDI_IB_HPP
......@@ -131,15 +131,14 @@ public:
{
int M = _nb_vect + _nbRHS;
int N = _nb_vect;
Block<S> Y = alloc_least_square_sol();
assert( _YY.get_nb_row() == N );
assert( Y.get_nb_row() == N );
assert( LS.get_nb_row() == M );
LS.copy(_R1);
assert(LS.get_nb_col() == _R1.get_nb_col());
assert(LS.get_nb_row() >= _R1.get_nb_row());
//FABULOUS_DEBUG("R1.nb_row()="<<_R1.get_nb_row());
Block<S> Y = alloc_least_square_sol();
solve_least_square(Y);
LapackKernI::gemm(
......
......@@ -4,6 +4,7 @@
#include <cassert>
#include "fabulous/utils/Utils.hpp"
#include "fabulous/kernel/LapackInterface.hpp"
namespace fabulous {
......@@ -24,8 +25,8 @@ public:
*
* \note R MUST BE already allocated ( Q.get_nb_col() x Q.get_nb_col() )
*/
template< class Matrix, class S >
static void InPlaceQRFactoMGS_User(Block<S> &Q, Block<S> &R, const Matrix &A)
template< class Matrix, template <class> class Block, class S >
static void InPlaceQRFactoMGS_User(Block<S> &Q, Block<S>&R, const Matrix &A)
{
int M = Q.get_nb_row();
int N = Q.get_nb_col();
......
......@@ -73,7 +73,7 @@ protected:
assert( H.get_nb_row() == base.get_nb_vect() );
// Block tmp to be added to Hess
Block tmp{dim, W_size};
Block tmp{nb_vect, W_size};
LapackKernI::Tgemm( // H = Vm^{t}*W
nb_vect, W_size, dim,
......
......@@ -337,7 +337,6 @@ private:
case OrthoScheme::ICGS: ICGS(A, base, H, C, D, P, W); break;
default: FABULOUS_FATAL_ERROR("Invalid orthogonalization scheme\n"); break;
}
std::cout<<"Arnoldi RUHE done\n";
}
};
......
......@@ -31,6 +31,7 @@ namespace fabulous {
struct ClassicRestart
{
ClassicRestart() = default;
int get_k() { return 0; }
};
/**
......@@ -178,7 +179,9 @@ public:
template<class HESS >
int run(Block<S> &R1, HESS &hess)
{
double tic = Timer::get_time();
Timer time;
time.start();
int p = _LS.get_nb_col();
int nm = _base.get_nb_vect() - p;
int dim = _base.get_nb_row();
......@@ -314,11 +317,10 @@ public:
hess.set_rhs(R1);// important to do this before orthogonalization end
hess.notify_orthogonalization_end();
double tac = Timer::get_time();
std::cout<<"\tElapsed Time for Computing the Deflation at restart :: "
<<tac-tic<<" seconds\n";
std::cout<<Color::bold<<Color::yellow;
std::cout<<"\t\tDR done\n"<<Color::reset;
time.stop();
double length = time.get_length();
std::cout<<"\tElapsed Time for Computing the Deflation at restart: "<<length<<" seconds\n";
std::cout<<Color::bold<<Color::yellow<<"\t\tDR done\n"<<Color::reset;
return newK;
}
};
......