Commit 3f08f506 authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

add beginning of flops counting instrumentation

parent b90b2958
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
CLOSED: [2017-05-22 Mon 16:23] CLOSED: [2017-05-22 Mon 16:23]
see [[file:NOTES.org::*fabulous%20linking%20with%20lapacke/cblas%20kernels][fabulous linking with lapacke/cblas kernels]] see [[file:NOTES.org::*fabulous%20linking%20with%20lapacke/cblas%20kernels][fabulous linking with lapacke/cblas kernels]]
* TODO parallel(distributed) test case * TODO parallel(distributed) test case
** test with maphys ** test with maphys
see [[file:LABBOOK.org::*integrate%20and%20test%20latest%20fabulous%20api%20in%20a%20maphys%20fork][integrate and test latest fabulous api in a maphys fork]] see [[file:LABBOOK.org::*integrate%20and%20test%20latest%20fabulous%20api%20in%20a%20maphys%20fork][integrate and test latest fabulous api in a maphys fork]]
* DONE error handling system * DONE error handling system
CLOSED: [2017-05-30 Tue 15:53] CLOSED: [2017-05-30 Tue 15:53]
......
...@@ -237,20 +237,21 @@ private: ...@@ -237,20 +237,21 @@ private:
void apply_right_precond(Block<S> &Zj, Matrix &A, Block<S> &W) void apply_right_precond(Block<S> &Zj, Matrix &A, Block<S> &W)
{ {
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
int64_t flops = 0;
if (A.useRightPrecond()) { if (A.useRightPrecond()) {
A.PrecondBlockVect(W, Zj); flops = A.PrecondBlockVect(W, Zj);
} else { } else {
Zj.copy(W); Zj.copy(W);
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
} }
template<class Matrix> template<class Matrix>
void matrix_vector_product(Block<S> &W, Matrix &A, Block<S> &Zj) void matrix_vector_product(Block<S> &W, Matrix &A, Block<S> &Zj)
{ {
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
A.MatBlockVect(Zj, W); int64_t flops = A.MatBlockVect(Zj, W);
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += W.get_nb_col(); _nb_mvp += W.get_nb_col();
} }
......
...@@ -198,12 +198,13 @@ private: ...@@ -198,12 +198,13 @@ private:
void apply_right_precond(Block<S> &Zj, Matrix &A, Block<S> &R) void apply_right_precond(Block<S> &Zj, Matrix &A, Block<S> &R)
{ {
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
int64_t flops = 0;
if (A.useRightPrecond()) { if (A.useRightPrecond()) {
A.PrecondBlockVect(R, Zj); flops = A.PrecondBlockVect(R, Zj);
} else { } else {
Zj.copy(R); Zj.copy(R);
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
} }
template<class Matrix> template<class Matrix>
...@@ -213,8 +214,8 @@ private: ...@@ -213,8 +214,8 @@ private:
Block<S> Qi = Q.sub_block(0, 0, _dim, _nb_kept_direction); Block<S> Qi = Q.sub_block(0, 0, _dim, _nb_kept_direction);
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
A.MatBlockVect(Pi, Qi); // Qi <- A * Pi int64_t flops = A.MatBlockVect(Pi, Qi); // Qi <- A * Pi
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += Pi.get_nb_col(); _nb_mvp += Pi.get_nb_col();
} }
......
...@@ -99,28 +99,30 @@ private: ...@@ -99,28 +99,30 @@ private:
R.copy(B); // R <- B R.copy(B); // R <- B
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
A.MatBlockVect(X, R); // R <- A*X int64_t flops;
flops = A.MatBlockVect(X, R); // R <- A*X
for (int j = 0; j < R.get_nb_col(); ++j) { for (int j = 0; j < R.get_nb_col(); ++j) {
for (int i = 0; i < R.get_nb_row(); ++i) { for (int i = 0; i < R.get_nb_row(); ++i) {
R.at(i, j) = B.at(i, j) - R.at(i, j); // R -> B - A*X R.at(i, j) = B.at(i, j) - R.at(i, j); // R -> B - A*X
} }
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += X.get_nb_col(); _nb_mvp += X.get_nb_col();
} }
template< class Matrix, class Block > template< class Matrix, class Block >
void W_AMV(Matrix &A, Block &V, Block &W) // W = A*M*V void W_AMV(Matrix &A, Block &V, Block &W) // W = A*M*V
{ {
int64_t flops;
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
if (A.useRightPrecond()) { if (A.useRightPrecond()) {
Block MV{W.get_nb_row(), V.get_nb_col()}; Block MV{W.get_nb_row(), V.get_nb_col()};
A.PrecondBlockVect(V, MV); // MV = M * V flops = A.PrecondBlockVect(V, MV); // MV = M * V
A.MatBlockVect(MV, W); // W = A*MV flops += A.MatBlockVect(MV, W); // W = A*MV
} else { } else {
A.MatBlockVect(V, W); // W = A * V flops = A.MatBlockVect(V, W); // W = A * V
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += V.get_nb_col(); _nb_mvp += V.get_nb_col();
} }
......
...@@ -110,13 +110,13 @@ private: ...@@ -110,13 +110,13 @@ private:
{ {
R.copy(B); // R <- B R.copy(B); // R <- B
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
A.MatBlockVect(X, R); // R <- A*X int64_t flops = A.MatBlockVect(X, R); // R <- A*X
for (int j = 0; j < R.get_nb_col(); ++j) { for (int j = 0; j < R.get_nb_col(); ++j) {
for (int i = 0; i < R.get_nb_row(); ++i) { for (int i = 0; i < R.get_nb_row(); ++i) {
R.at(i, j) = B.at(i, j) - R.at(i, j); // R -> B - A*X R.at(i, j) = B.at(i, j) - R.at(i, j); // R -> B - A*X
} }
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += X.get_nb_col(); _nb_mvp += X.get_nb_col();
} }
...@@ -124,16 +124,16 @@ private: ...@@ -124,16 +124,16 @@ private:
void W_AMV(Matrix &A, Block &V, BlockWP &WP) // W = A*M*V void W_AMV(Matrix &A, Block &V, BlockWP &WP) // W = A*M*V
{ {
Block W = WP.get_W(); Block W = WP.get_W();
int64_t flops;
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
if (A.useRightPrecond()) { if (A.useRightPrecond()) {
Block MV{W.get_nb_row(), V.get_nb_col()}; Block MV{W.get_nb_row(), V.get_nb_col()};
A.PrecondBlockVect(V, MV); // MV = M * V flops = A.PrecondBlockVect(V, MV); // MV = M * V
A.MatBlockVect(MV, W); // B = A*MV flops += A.MatBlockVect(MV, W); // B = A*MV
} else { } else {
A.MatBlockVect(V, W); // W = A * V flops = A.MatBlockVect(V, W); // W = A * V
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += V.get_nb_col(); _nb_mvp += V.get_nb_col();
} }
...@@ -291,10 +291,12 @@ private: ...@@ -291,10 +291,12 @@ private:
return; return;
} }
_ib_update_scheduled = false; _ib_update_scheduled = false;
_logger.notify_ib_begin();
if (_nb_direction_discarded == 0) { if (_nb_direction_discarded == 0) {
Block<S> W = _base.get_W(_nbRHS); Block<S> W = _base.get_W(_nbRHS);
W.copy(_WP); W.copy(_WP);
_base.increase(_nbRHS); _base.increase(_nbRHS);
_logger.notify_ib_end(0);
return; return;
} }
...@@ -326,6 +328,7 @@ private: ...@@ -326,6 +328,7 @@ private:
// L_{j+1,} = | \W_1^{H} | * H_j // L_{j+1,} = | \W_1^{H} | * H_j
// Gj = | \W_2^{H} | * H_j // Gj = | \W_2^{H} | * H_j
_F.update_bottom_line(W1_W2, _nb_direction_kept); _F.update_bottom_line(W1_W2, _nb_direction_kept);
_logger.notify_ib_end( - 1 /* FIXME */);
} }
void do_R_criterion(int proj_convergence, void do_R_criterion(int proj_convergence,
...@@ -335,6 +338,8 @@ private: ...@@ -335,6 +338,8 @@ private:
Block<S> Y = _F.alloc_least_square_sol(); Block<S> Y = _F.alloc_least_square_sol();
Block<S> PR{_F.get_nb_hess_line(), _nbRHS}; Block<S> PR{_F.get_nb_hess_line(), _nbRHS};
_F.solve_least_square(Y); _F.solve_least_square(Y);
_logger.notify_ib_begin();
_F.compute_proj_residual(PR, Y); _F.compute_proj_residual(PR, Y);
Block<S> U; Block<S> U;
...@@ -358,6 +363,7 @@ private: ...@@ -358,6 +363,7 @@ private:
} }
} }
_ib_update_scheduled = true; _ib_update_scheduled = true;
_logger.notify_ib_end( -1 /*FIXME */);
} }
/*********************************************************/ /*********************************************************/
......
...@@ -110,31 +110,33 @@ private: ...@@ -110,31 +110,33 @@ private:
void W_AMV(Matrix &A, Block &V, BlockWP &WP) // W = A*M*V void W_AMV(Matrix &A, Block &V, BlockWP &WP) // W = A*M*V
{ {
Block W = WP.get_W(); Block W = WP.get_W();
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
int64_t flops;
if (A.useRightPrecond()) { if (A.useRightPrecond()) {
Block MV{W.get_nb_row(), V.get_nb_col()}; Block MV{W.get_nb_row(), V.get_nb_col()};
A.PrecondBlockVect(V, MV); // MV = M * V flops = A.PrecondBlockVect(V, MV); // MV = M * V
A.MatBlockVect(MV, W); // B = A*MV flops += A.MatBlockVect(MV, W); // B = A*MV
} else { } else {
A.MatBlockVect(V, W); // W = A * V flops = A.MatBlockVect(V, W); // W = A * V
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += V.get_nb_col(); _nb_mvp += V.get_nb_col();
} }
template<class Matrix, class Block> template<class Matrix, class Block>
void compute_real_residual(Matrix &A, Block &X, const Block &B, Block &R) void compute_real_residual(Matrix &A, Block &X, const Block &B, Block &R)
{ {
R.copy(B); // R <- B
_logger.notify_mvp_begin(); _logger.notify_mvp_begin();
A.MatBlockVect(X, R); // R <- A*X
R.copy(B); // R <- B
int64_t flops = A.MatBlockVect(X, R); // R <- A*X
for (int j = 0; j < R.get_nb_col(); ++j) { for (int j = 0; j < R.get_nb_col(); ++j) {
for (int i = 0; i < R.get_nb_row(); ++i) { for (int i = 0; i < R.get_nb_row(); ++i) {
R.at(i, j) = B.at(i, j) - R.at(i, j); // R -> B - A*X R.at(i, j) = B.at(i, j) - R.at(i, j); // R -> B - A*X
} }
} }
_logger.notify_mvp_end(); _logger.notify_mvp_end(flops);
_nb_mvp += X.get_nb_col(); _nb_mvp += X.get_nb_col();
} }
...@@ -338,7 +340,7 @@ private: ...@@ -338,7 +340,7 @@ private:
// L_{j+1,} = | \W_1^{H} | * H_j // L_{j+1,} = | \W_1^{H} | * H_j
// Gj = | \W_2^{H} | * H_j // Gj = | \W_2^{H} | * H_j
_F.update_bottom_line(W1_W2); _F.update_bottom_line(W1_W2);
_logger.notify_ib_end(); _logger.notify_ib_end(0);
// hook that may perform incremental factorization // hook that may perform incremental factorization
_F.after_bottom_line_update(_nb_direction_kept); _F.after_bottom_line_update(_nb_direction_kept);
...@@ -379,7 +381,7 @@ private: ...@@ -379,7 +381,7 @@ private:
} }
} }
_ib_update_scheduled = true; _ib_update_scheduled = true;
_logger.notify_ib_end(); _logger.notify_ib_end( - 1/*FIXME*/ );
} }
/*********************************************************/ /*********************************************************/
......
...@@ -333,7 +333,6 @@ public: ...@@ -333,7 +333,6 @@ public:
return std::make_tuple(mm.first, mm.second, mm.second < epsilon[0]); return std::make_tuple(mm.first, mm.second, mm.second < epsilon[0]);
} }
/*! \brief compare norm of each vector against values in epsilon /*! \brief compare norm of each vector against values in epsilon
* *
* if epsilon.size() == 1; then epsilon[0] is used for all vectors; * if epsilon.size() == 1; then epsilon[0] is used for all vectors;
......
...@@ -107,7 +107,8 @@ public: ...@@ -107,7 +107,8 @@ public:
#else #else
FABULOUS_THROW( FABULOUS_THROW(
Unsupported, Unsupported,
"You must define the FABULOUS_DEBUG_MODE preprocessor constant to use this function" "You must define the FABULOUS_DEBUG_MODE preprocessor"
" constant to use this function"
); );
(void) Hm; (void) Hm;
(void) m; (void) m;
...@@ -169,9 +170,10 @@ private: ...@@ -169,9 +170,10 @@ private:
_logger.notify_facto_begin(); _logger.notify_facto_begin();
MORSE_enum trans = Arithmetik<S>::mtrans; MORSE_enum trans = Arithmetik<S>::mtrans;
int64_t flops = 0;
/* STEP 1: Loop over each block in last column to /* STEP 1: Loop over each block in last column to
* apply already computed Householders * apply already computed Householders vector
* This is almost the only loop that can benefit from * This is almost the only loop that can benefit from
* Chameleon Asynchronicity * Chameleon Asynchronicity
*/ */
...@@ -182,6 +184,7 @@ private: ...@@ -182,6 +184,7 @@ private:
MORSE_desc_t *T = _tau[i].get(); MORSE_desc_t *T = _tau[i].get();
int err = chameleon::ormqr<S>(trans, A, T, C, _seq.get()); int err = chameleon::ormqr<S>(trans, A, T, C, _seq.get());
flops += lapacke::ormqr_flops<S>(_nbRHS, _nbRHS, _nbRHS);
if (err != 0) { if (err != 0) {
FABULOUS_THROW(Kernel, "ormqr 'step' err="<<err); FABULOUS_THROW(Kernel, "ormqr 'step' err="<<err);
} }
...@@ -192,6 +195,7 @@ private: ...@@ -192,6 +195,7 @@ private:
MORSE_desc_t *A = get_sub_hess(k, k, 2, 1); MORSE_desc_t *A = get_sub_hess(k, k, 2, 1);
int err = chameleon::geqrf<S>(A, tau, _seq.get()); int err = chameleon::geqrf<S>(A, tau, _seq.get());
flops += lapacke::geqrf_flops<S>(_nbRHS, _nbRHS);
if (err != 0) { if (err != 0) {
FABULOUS_THROW(Kernel, "geqrf 'last block' err="<<err); FABULOUS_THROW(Kernel, "geqrf 'last block' err="<<err);
} }
...@@ -202,12 +206,12 @@ private: ...@@ -202,12 +206,12 @@ private:
/* STEP 3: Apply Q^H generated at step 2 to last block of RHS */ /* STEP 3: Apply Q^H generated at step 2 to last block of RHS */
MORSE_desc_t *C = get_sub_rhs(k, 0, 2, 1); MORSE_desc_t *C = get_sub_rhs(k, 0, 2, 1);
err = chameleon::ormqr<S>(trans, A, tau.get(), C, _seq.get()); err = chameleon::ormqr<S>(trans, A, tau.get(), C, _seq.get());
flops += lapacke::ormqr_flops<S>(_nbRHS, _nbRHS, _nbRHS);
if (err != 0) { if (err != 0) {
FABULOUS_THROW(Kernel, "ormqr 'RHS' err="<<err); FABULOUS_THROW(Kernel, "ormqr 'RHS' err="<<err);
} }
_last_column_factorized = true; _last_column_factorized = true;
_logger.notify_facto_end(); _logger.notify_facto_end(flops);
} }
public: public:
...@@ -298,7 +302,8 @@ public: ...@@ -298,7 +302,8 @@ public:
MORSE_Sequence_Wait(_seq.get()); MORSE_Sequence_Wait(_seq.get());
MORSE_Tile_to_Lapack(YYd, _Y.get_ptr(), _Y.get_leading_dim()); MORSE_Tile_to_Lapack(YYd, _Y.get_ptr(), _Y.get_leading_dim());
_solution_computed = true; _solution_computed = true;
_logger.notify_least_square_end(); int64_t flops = lapacke::trsm_flops<S>(_nb_vect, _nbRHS);
_logger.notify_least_square_end(flops);
} }
void compute_proj_residual(const Block<S>&, const Block<S>&) void compute_proj_residual(const Block<S>&, const Block<S>&)
......
...@@ -26,19 +26,13 @@ template<class S> class HessChamQR_submat; ...@@ -26,19 +26,13 @@ template<class S> class HessChamQR_submat;
#include "fabulous/utils/Logger.hpp" #include "fabulous/utils/Logger.hpp"
#include "fabulous/kernel/chameleon.hpp" #include "fabulous/kernel/chameleon.hpp"
#include "fabulous/kernel/flops.hpp"
#include "fabulous/data/Block.hpp" #include "fabulous/data/Block.hpp"
#include "fabulous/data/MorseDesc2.hpp" #include "fabulous/data/MorseDesc2.hpp"
namespace fabulous { namespace fabulous {
namespace bgmres { namespace bgmres {
inline std::string make_quad_string_key(int i, int j, int m, int n)
{
std::stringstream ss;
ss<<i<<"|"<<j<<"|"<<m<<"|"<<n;
return ss.str();
}
/*! /*!
* \brief Hessenberg for Incremental QR version with chamelon back-end * \brief Hessenberg for Incremental QR version with chamelon back-end
* *
...@@ -68,8 +62,8 @@ private: ...@@ -68,8 +62,8 @@ private:
Logger<P> &_logger; Logger<P> &_logger;
int _max_nb_iter; /*!< maximum number of iterations */ int _max_nb_iter; /*!< maximum number of iterations */
int _mb; /*!< max number of (big) block line (_nbRHSx_nbRHS) in hessenberg */ int _mt; /*!< max number of (big) block line (_nbRHSx_nbRHS) in hessenberg */
int _nb; /*!< max number of (big) block column (_nbRHSx_nbRHS) in hessenberg */ int _nt; /*!< max number of (big) block column (_nbRHSx_nbRHS) in hessenberg */
int _nbRHS; /*!< number of right-hand-sides; AND size of one (big) block */ int _nbRHS; /*!< number of right-hand-sides; AND size of one (big) block */
int _nb_block_col; /*!< number of block columns in current hessenberg */ int _nb_block_col; /*!< number of block columns in current hessenberg */
int _nb_vect; /*!< number of columns in current hessenberg */ int _nb_vect; /*!< number of columns in current hessenberg */
...@@ -163,6 +157,7 @@ private: ...@@ -163,6 +157,7 @@ private:
_logger.notify_facto_begin(); _logger.notify_facto_begin();
MORSE_enum trans = Arithmetik<S>::mtrans; MORSE_enum trans = Arithmetik<S>::mtrans;
int64_t flops = 0;
/* STEP 1: Loop over each block in last column to /* STEP 1: Loop over each block in last column to
* apply already computed Householders * apply already computed Householders
...@@ -176,6 +171,7 @@ private: ...@@ -176,6 +171,7 @@ private:
MORSE_desc_t *T = _tau[i].get(); MORSE_desc_t *T = _tau[i].get();
int err = chameleon::ormqr<S>(trans, A.get(), T, C.get(), _seq.get()); int err = chameleon::ormqr<S>(trans, A.get(), T, C.get(), _seq.get());
flops += lapacke::ormqr_flops<S>(_nbRHS, _nbRHS, _nbRHS);
if (err != 0) { if (err != 0) {
FABULOUS_THROW(Kernel, "ormqr 'step' err="<<err); FABULOUS_THROW(Kernel, "ormqr 'step' err="<<err);
} }
...@@ -186,6 +182,7 @@ private: ...@@ -186,6 +182,7 @@ private:
MorseDesc2<S> A = get_sub_hess(k, k, 2, 1); MorseDesc2<S> A = get_sub_hess(k, k, 2, 1);
int err = chameleon::geqrf<S>(A.get(), tau, _seq.get()); int err = chameleon::geqrf<S>(A.get(), tau, _seq.get());
flops += lapacke::geqrf_flops<S>(_nbRHS, _nbRHS);
if (err != 0) { if (err != 0) {
FABULOUS_THROW(Kernel, "geqrf 'last block' err="<<err); FABULOUS_THROW(Kernel, "geqrf 'last block' err="<<err);
} }
...@@ -196,36 +193,37 @@ private: ...@@ -196,36 +193,37 @@ private:
/* STEP 3: Apply Q^H generated at step 2 to last block of RHS */ /* STEP 3: Apply Q^H generated at step 2 to last block of RHS */
MorseDesc2<S> C = get_sub_rhs(k, 0, 2, 1); MorseDesc2<S> C = get_sub_rhs(k, 0, 2, 1);
err = chameleon::ormqr<S>(trans, A.get(), tau.get(), C.get(), _seq.get()); err = chameleon::ormqr<S>(trans, A.get(), tau.get(), C.get(), _seq.get());
flops += lapacke::ormqr_flops<S>(_nbRHS, _nbRHS, _nbRHS);
if (err != 0) { if (err != 0) {
FABULOUS_THROW(Kernel, "ormqr 'RHS' err="<<err); FABULOUS_THROW(Kernel, "ormqr 'RHS' err="<<err);
} }
_last_column_factorized = true; _last_column_factorized = true;
_logger.notify_facto_end(); _logger.notify_facto_end(flops);
} }
public: public:
HessChamQR_submat(int max_krylov_space_size, int nbRHS, Logger<P> &logger): HessChamQR_submat(int max_krylov_space_size, int nbRHS, Logger<P> &logger):
_logger{logger}, _logger{logger},
_max_nb_iter{ (max_krylov_space_size/nbRHS) +1 }, _max_nb_iter{ (max_krylov_space_size/nbRHS) +1 },
_mb{ _max_nb_iter+1 }, _mt{ _max_nb_iter+1 },
_nb{ _max_nb_iter }, _nt{ _max_nb_iter },
_nbRHS{nbRHS}, _nbRHS{nbRHS},
_nb_block_col{0}, _nb_block_col{0},
_nb_vect{0}, _nb_vect{0},
_hessenbergs_tiles{_mb, _nb, nbRHS}, _hessenbergs_tiles{_mt, _nt, nbRHS},
_rhs_tiles{_mb, 1, nbRHS}, _rhs_tiles{_mt, 1, nbRHS},
_rhs_tmp_tiles{_mb, 1, nbRHS}, _rhs_tmp_tiles{_mt, 1, nbRHS},
_hess{MorseDesc2<S>::create_papa_descriptor(_hessenbergs_tiles, 0, 0, _mb, _nb)}, _hess{MorseDesc2<S>::create_papa_descriptor(_hessenbergs_tiles, 0, 0, _mt, _nt)},
_rhs{MorseDesc2<S>::create_papa_descriptor(_rhs_tiles, 0, 0, _mb, 1)}, _rhs{MorseDesc2<S>::create_papa_descriptor(_rhs_tiles, 0, 0, _mt, 1)},
_rhs_tmp{MorseDesc2<S>::create_papa_descriptor(_rhs_tmp_tiles, 0, 0, _mb, 1)}, _rhs_tmp{MorseDesc2<S>::create_papa_descriptor(_rhs_tmp_tiles, 0, 0, _mt, 1)},
#ifdef FABULOUS_DEBUG_MODE #ifdef FABULOUS_DEBUG_MODE
_original{_mb*nbRHS, _nb*nbRHS}, _original{_mt*nbRHS, _nt*nbRHS},
#endif #endif
_last_column_factorized{false}, _last_column_factorized{false},
_solution_computed{false}, _solution_computed{false},
_Y{}, _Y{_nt*_nbRHS, _nbRHS},
_HR{} _HR{ _mt*_nbRHS, _nbRHS}
{ {
_tau.reserve(_max_nb_iter); _tau.reserve(_max_nb_iter);
MORSE_sequence_t *sequence = nullptr; MORSE_sequence_t *sequence = nullptr;
...@@ -265,9 +263,7 @@ public: ...@@ -265,9 +263,7 @@ public:
Block<S> alloc_least_square_sol() Block<S> alloc_least_square_sol()
{ {
if (!_solution_computed) return _Y.sub_block(0, 0, _nb_vect, _nbRHS);
_Y = Block<S>{_nb_vect, _nbRHS};
return _Y;
} }
template< class Block > template< class Block >
...@@ -288,11 +284,12 @@ public: ...@@ -288,11 +284,12 @@ public:
// compute the solution // compute the solution
chameleon::trsm<S>(R.get(), Lambda_tmp.get(), _seq.get()); chameleon::trsm<S>(R.get(), Lambda_tmp.get(), _seq.get());
int flops = lapacke::trsm_flops<S>(_nb_vect, _nbRHS);
MORSE_Tile_to_Lapack(Lambda_tmp.get(), _Y.get_ptr(), _Y.get_leading_dim()); MORSE_Tile_to_Lapack(Lambda_tmp.get(), _Y.get_ptr(), _Y.get_leading_dim());
_solution_computed = true; _solution_computed = true;
_logger.notify_least_square_end(); _logger.notify_least_square_end(flops);
} }
void compute_proj_residual(const Block<S>&, const Block<S>&) void compute_proj_residual(const Block<S>&, const Block<S>&)
...@@ -343,7 +340,7 @@ public: ...@@ -343,7 +340,7 @@ public:
FABULOUS_ASSERT( block_size == _nbRHS ); FABULOUS_ASSERT( block_size == _nbRHS );
++ _nb_block_col; ++ _nb_block_col;
_nb_vect += _nbRHS; _nb_vect += _nbRHS;
_HR = Block<S>{_nb_vect+_nbRHS, _nbRHS}; //_HR = Block<S>{_nb_vect+_nbRHS, _nbRHS};
_last_column_factorized = false; _last_column_factorized = false;
_solution_computed = false; _solution_computed = false;
......
...@@ -136,7 +136,8 @@ public: ...@@ -136,7 +136,8 @@ public:
} }
_solution_computed = true; _solution_computed = true;
_logger.notify_least_square_end(); int64_t flops = lapacke::gels_flops<S>(M, N, _nbRHS);
_logger.notify_least_square_end(flops);
} }
void compute_proj_residual(Block<S> &LS, Block<S> &Y) void compute_proj_residual(Block<S> &LS, Block<S> &Y)
......
...@@ -133,7 +133,8 @@ public: ...@@ -133,7 +133,8 @@ public:
} }
_solution_computed = true; _solution_computed = true;
_logger.notify_least_square_end(); int64_t flops = lapacke::gels_flops<S>(M, N, _nbRHS);
_logger.notify_least_square_end(flops);
} }
void compute_proj_residual(Block<S> &LS, Block<S> &Y) void compute_proj_residual(Block<S> &LS, Block<S> &Y)
...@@ -164,7 +165,6 @@ public: ...@@ -164,7 +165,6 @@ public:
Block<S> residual = _YY.sub_block(_nb_vect, 0, _nbRHS, _nbRHS); Block<S> residual = _YY.sub_block(_nb_vect, 0, _nbRHS, _nbRHS);
auto MinMaxConv = residual.check_precision(epsilon); auto MinMaxConv = residual.check_precision(epsilon);
return MinMaxConv; return MinMaxConv;
} }
......
...@@ -275,7 +275,8 @@ public: ...@@ -275,7 +275,8 @@ public:
FABULOUS_THROW(Kernel, "gels (least square) err="<<err); FABULOUS_THROW(Kernel, "gels (least square) err="<<err);
} }
_solution_computed = true;