Commit d3432670 authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

bugfix, doc, and typesetting

parent dae3024b
......@@ -93,6 +93,12 @@
source /opt/intel/mkl/bin/mklvars.sh intel64
#+END_SRC
You may also want to reset the build system
(in order to flush cmake cache for instance)
#+BEGIN_SRC shell
rm -rf ${WORKDIR}/build/*
#+END_SRC
Setup the build system:
#+BEGIN_SRC shell
mkdir -p ${WORKDIR}/build
......@@ -100,7 +106,7 @@ cd ${WORKDIR}/build
cmake .. -DCMAKE_BUILD_TYPE=Debug \
-DFABULOUS_DEBUG_MODE=ON \
-DFABULOUS_LAPACKE_NANCHECK=OFF # \
# -DCHAMELEON_DIR=$(spack location -i chameleon)
# -DCHAMELEON_DIR=$(spack location -i chameleon)
#+END_SRC
Eventually compile fabulous:
......
This diff is collapsed.
......@@ -430,8 +430,8 @@ public:
// IS_INVARIANT( _nb_direction_discarded + _nb_direction_kept == nbRHS );
const auto inv_epsilon = array_inverse(epsilon);
bool convergence = initial_iteration(A, X, B, restarter, epsilon, inv_epsilon);
bool convergence = initial_iteration(A, X, B, restarter,
epsilon, inv_epsilon);
while (!convergence && // Main Loop
_size_krylov_space+_nb_direction_kept <= max_krylov_space_size)
{
......@@ -446,7 +446,8 @@ public:
auto MinMaxConv = _F.check_least_square_residual(epsilon);
bool proj_convergence = std::get<2>(MinMaxConv);
convergence = check_real_residual_if(proj_convergence, A, X, B, epsilon);
convergence = check_real_residual_if(proj_convergence,
A, X, B, epsilon);
if (!convergence) {
do_R_criterion(proj_convergence, epsilon, inv_epsilon);
}
......
......@@ -37,22 +37,28 @@ template<class> class Block;
namespace fabulous {
/**
* \brief Matrix storage: Lapack/FORTRAN style (Column major layout) 2D array of scalars
/*!
* \brief Matrix storage: Lapack/FORTRAN style (Column major layout)
* 2D array of scalars
*
* block objects have pointer semantics:
* multiple block object can refer to the same block or to part
* (subblocks) of the sameblocks
*/
template<class S>
class Block
{
private:
mutable int64_t _last_flops;
mutable int64_t _last_flops; /*!< number of flops of last operation */
protected:
int _m;
int _n;
int _ld;
int _m; /*!< number of lines */
int _n; /*!< number of columns */
int _ld; /*!< leading dimension of array */
private:
typedef std::shared_ptr<S> SPtr;
SPtr _sptr;
SPtr _sptr; /*!< shared ptr, memory deleter for block data */
static_assert(
std::is_same<S, typename Arithmetik<S>::value_type>::value,
"template parameter passed to Block class template is unsupported"
......@@ -63,7 +69,7 @@ private:
void resize(int m, int n, int /*nnz*/) { realloc(m, n); }
protected:
S *_ptr;
S *_ptr; /*!< pointer to block data */
public:
void realloc(int m, int n) { *this = Block{m, n}; }
......@@ -128,6 +134,17 @@ public:
}
}
/*!
* \brief create a block describing of sub_block of current block
*
* No copy is performed; The new block will reference the same
* subblock from the current block
*
* \param i starting line of subblock
* \param j starting column of subblock
* \param m number of line in subblock
* \param j number of column in subblock
*/
Block sub_block(int i, int j, int m, int n)
{
assert( ((i+m) <= _m) && ((j+n) <= _n) );
......@@ -150,9 +167,8 @@ public:
inline S &at(int i = 0, int j = 0) { return _ptr[j*_ld + i]; }
inline const S &at(int i = 0, int j = 0) const { return _ptr[j*_ld + i]; }
/* \brief return norm of k-th vector inside the Block
*/
template < class Matrix >
/*! \brief return norm of k-th vector inside the Block */
template<class Matrix>
P get_norm(int k, const Matrix &A) const
{
const S *ptr = get_vect(k);
......@@ -161,20 +177,17 @@ public:
return std::sqrt(fabulous::real(res));
}
/* \brief return norm of k-th vector inside the Block
*/
/*! \brief return norm of k-th vector inside the Block */
P get_norm(int k) const
{
const S *ptr = get_vect(k);
S res = S{0.0};
lapacke::dot(_m, ptr, 1, ptr, 1, &res);
_last_flops = lapacke::dot_flops<S>(_m);
return std::sqrt(fabulous::real(res));
}
/* @ brief compute norms of vector in the Block
*/
/*! \brief compute norms of vector in the Block */
template< class Matrix >
std::vector<P> compute_norms(Matrix &A) const
{
......@@ -190,9 +203,8 @@ public:
return norms;
}
/* @ brief scale vectors in the Block with given coefficients
*/
template < class U >
/*! \brief scale vectors in the Block with given coefficients */
template<class U>
void scale(const std::vector<U> &coef)
{
for (int j = 0; j < _n; ++j)
......@@ -200,9 +212,8 @@ public:
lapacke::scal(_m, S{coef[j]}, get_vect(j), 1);
}
/* @ brief compute min and max norms of vector in the Block
*/
template< class Matrix >
/*! \brief compute min and max norms of vector in the Block */
template<class Matrix>
std::pair<P,P> get_min_max_norm(const Matrix &A) const
{
std::pair<P, P> mm;
......@@ -217,8 +228,7 @@ public:
return mm;
}
/** \brief compute min and max (euclidian) norms of vector in the Block
*/
/*! \brief compute min and max (euclidian) norms of vector in the Block */
std::pair<P,P> get_min_max_norm() const
{
std::pair<P,P> mm;
......@@ -233,15 +243,14 @@ public:
return mm;
}
/** \brief fill the block with zeros
*/
/*! \brief fill the block with zeros */
void zero()
{
for (int j = 0; j < _n; ++j)
std::fill(get_ptr(0, j), get_ptr(0, j)+_m, S{0.0});
}
/** \brief copy the Block given as a parameter into 'this' Block */
/*! \brief copy the Block given as a parameter into 'this' Block */
void copy(const Block &o)
{
int M = std::min(_m, o._m);
......@@ -249,6 +258,7 @@ public:
std::copy(o.get_vect(j), o.get_vect(j)+M, get_vect(j));
}
/*! \brief return a deep copy of the block (memory is duplicated) */
Block copy()
{
Block cpy{_m, _n};
......@@ -256,8 +266,7 @@ public:
return cpy;
}
/** \brief pretty print the block values in a tabular format
*/
/*! \brief pretty print the block values in a tabular format */
void display(std::string name ="", std::ostream &o = std::cout) const
{
if (name != "")
......@@ -270,8 +279,7 @@ public:
o << "\n";
}
/** \brief print information about the Block for debug purposes
*/
/*! \brief print information about the Block for debug purposes */
void debug(std::string name="", std::ostream &o = std::cerr)
{
if (name != "")
......@@ -282,6 +290,7 @@ public:
o << "\n";
}
/*! \brief check orthogonality of the block columns */
void check_ortho(std::string name = "B")
{
Block A{_n, _n};
......@@ -290,10 +299,13 @@ public:
for (int k = 0; k < _n; ++k)
A.at(k, k) -= S{1.0};
auto MM = A.get_min_max_norm();
FABULOUS_DEBUG("CHECK_ORTHO["<<Color::green<<name<<Color::reset<<"]: (B^H*B - I) "
<<"Min="<<MM.first<<" Max="<<MM.second);
FABULOUS_DEBUG(
"CHECK_ORTHO["<<Color::green<<name<<Color::reset<<"]: (B^H*B - I) "
<<"Min="<<MM.first<<" Max="<<MM.second
);
}
/*! check that the block is filled with zeros */
void check_null()
{
for (int j = 0; j < _n; ++j)
......@@ -302,7 +314,7 @@ public:
FABULOUS_DEBUG("check_null OK");
}
/** \brief compare norm of each vector against values in epsilon
/*! \brief compare norm of each vector against values in epsilon
*
* if epsilon.size() == 1; then epsilon[0] is used for all vectors;
* otherwise it is assumed epsilon.size() == this->get_nb_col()
......@@ -334,7 +346,7 @@ public:
}
/** \brief compare norm of each vector against values in epsilon
/*! \brief compare norm of each vector against values in epsilon
*
* if epsilon.size() == 1; then epsilon[0] is used for all vectors;
* otherwise it is assumed epsilon.size() == this->get_nb_col()
......@@ -366,9 +378,7 @@ public:
return std::make_tuple(mm.first, mm.second, mm.second < epsilon[0]);
}
/**
* \brief Wrapper for the other DecompositionSVD method
*/
/*! \brief Wrapper for the other DecompositionSVD method */
int decomposition_SVD(Block &U1,
const std::vector<P> &epsilon,
const std::vector<P> &inv_epsilon)
......@@ -379,15 +389,18 @@ public:
return decomposition_SVD(U1, P{1.0});
}
/**
/*!
* \brief Compute SVD of current block
*
* \f$ M = \mathbb{U}_1 \Sigma_1 \mathbb{V}_1 + \mathbb{U}_2 \Sigma \mathbb{V}_2 \f$
*
* \param[out] U1 vectors associated with singular values superior to threshold.
* \param[out] U1 vectors associated with singular values
* superior to threshold.
* \param threshold minimum norm of kept singular values
*
* \return: count of singular values greater(in module) than the epsilon threshold
* \return count of singular values greater(in module)
* than the epsilon threshold
*/
int decomposition_SVD(Block &U1, P threshold)
{
......@@ -433,9 +446,7 @@ public:
return nb_krylov_direction;
}
/**
* \brief return number of flops performed by last operation
*/
/*! \brief return number of flops performed by last operation */
int64_t get_last_flops() const
{
return _last_flops;
......@@ -443,7 +454,7 @@ public:
}; // end class Block
template< class U >
template<class U>
std::ostream& operator<<(std::ostream &o, const Block<U> &b)
{
b.display("", o);
......
......@@ -68,7 +68,7 @@ public:
_nb_vect{0},
_nb_eigen_pair{0},
_restarted{false},
_phi(nullptr),
_phi{nullptr},
_lambda1{},
_Lambda{},
_solution_computed{false},
......
......@@ -27,7 +27,7 @@ class HessQRIBDR : public Block<S>
public:
FABULOUS_INHERITS_BLOCK(S);
private:
Logger<P> _logger;
Logger<P> &_logger;
int _nbRHS; /*!< number of right hand sides */
int _max_krylov_space_size; /*!< size of allocated number of columns */
int _nb_block_col; /*!< number of block columns */
......
......@@ -20,6 +20,7 @@ void Tgemm(int m, int n, int k,
S *C, int ldc,
S alpha = S{1.0}, S beta = S{0.0})
{
FABULOUS_DEBUG("");
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
}
......
......@@ -2,6 +2,7 @@
#define FABULOUS_GELS_HPP
#include "fabulous/utils/Arithmetic.hpp"
#include "fabulous/utils/Error.hpp"
#include "fabulous/ext/lapacke.h"
namespace fabulous {
......
......@@ -88,7 +88,8 @@ int ormqr(char trans, int m, int n, int k,
int m, int n, type_ *A, int lda, std::vector<type_> &tau) \
{ \
tau.resize(n); \
return LAPACKE_##prefix_##geqrf(LAPACK_COL_MAJOR, m, n, A, lda, tau.data()); \
return LAPACKE_##prefix_##geqrf(LAPACK_COL_MAJOR, m, n, \
A, lda, tau.data()); \
} \
FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_GEQRF);
......@@ -98,7 +99,8 @@ FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_GEQRF);
/******** GQR **************/
#define FABULOUS_SPECIALIZE_GQR(_1, type_, _3, _4, _5, _6, prefix_, unitary_, ...) \
#define FABULOUS_SPECIALIZE_GQR(_1, type_, _3, _4, _5, \
_6, prefix_, unitary_, ...) \
template<> \
int orgqr<type_>( \
int m, int n, int p, type_ *A, int lda, type_ *tau) \
......@@ -112,7 +114,8 @@ FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_GQR);
/************* MQR ********************/
#define FABULOUS_SPECIALIZE_MQR(_1, type_, _3, _4, _5, _6, prefix_, unitary_, ...) \
#define FABULOUS_SPECIALIZE_MQR(_1, type_, _3, _4, _5, \
_6, prefix_, unitary_, ...) \
template<> \
int ormqr(char trans, \
int m, int n, int k, \
......@@ -132,7 +135,8 @@ FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_MQR);
/******** QRF **************/
#define FABULOUS_SPECIALIZE_GEQRF(_1, S_, P_, _4, _5, _6, prefix_, ...) \
#define FABULOUS_SPECIALIZE_GEQRF(_1, S_, P_, _4, _5, \
_6, prefix_, ...) \
template<> \
int geqrf<S_>( \
int m, int n, S_ *A, int lda, std::vector<S_> &tau) \
......@@ -163,7 +167,8 @@ FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_GEQRF);
/******** GQR **************/
#define FABULOUS_SPECIALIZE_GQR(_1, S_, P_, _4, _5, _6, prefix_, unitary_, ...) \
#define FABULOUS_SPECIALIZE_GQR(_1, S_, P_, _4, _5, \
_6, prefix_, unitary_, ...) \
template<> \
int orgqr<S_>( \
int m, int n, int p, S_ *A, int lda, S_ *tau) \
......@@ -192,7 +197,8 @@ FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_GQR);
/************* MQR ********************/
#define FABULOUS_SPECIALIZE_MQR(_1, S_, P_, _4, _5, _6, prefix_, unitary_, ...) \
#define FABULOUS_SPECIALIZE_MQR(_1, S_, P_, _4, _5, \
_6, prefix_, unitary_, ...) \
template<> \
int ormqr(char trans, \
int m, int n, int k, \
......@@ -224,10 +230,10 @@ FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_GQR);
FABULOUS_ARITHMETIC_LIST(FABULOUS_SPECIALIZE_MQR);
#endif // FABULOUS_LAPACKE_NANCHECK
}; // end namespace lapacke
}; // end namespace fabulous
......
......@@ -64,7 +64,7 @@ public:
}
template<class HESS, class Base, class Block, class Matrix, class Logger>
void run(HESS &H, Base &base, Block &W, Matrix &A, Logger logger)
void run(HESS &H, Base &base, Block &W, Matrix &A, Logger &logger)
{
logger.notify_ortho_begin();
int64_t nb_flops;
......
......@@ -43,7 +43,7 @@ public:
int64_t ortho_flops; /*!< number of flops of orthogonalization */
static void print_header(std::ostream &o = std::cerr)
static void print_header(std::ostream &o = std::cout)
{
o <<"global_iteration,local_iteration,krylov_space_size,nb_mvp,"
"current_block_size,minRes,maxRes,minRealRes,maxRealRes,time,"
......@@ -51,7 +51,7 @@ public:
" ortho_flops,name\n";
}
void print(std::string name, std::ostream &o = std::cerr)
void print(std::string name, std::ostream &o = std::cout)
{
o << glob_iteration<<","
<< loc_iteration<<","
......@@ -83,8 +83,8 @@ class Logger
private:
using Entry = LogEntry<P>;
bool _started;
double _total_time;
bool _started; /*!< whether global timer was started */
double _total_time; /*!< total time (over multiple restart) */
Timer _global_timer;
Timer _iterations_timer;
......@@ -95,7 +95,7 @@ private:
Timer _ib_timer;
int64_t _last_ortho_flops;
bool _log_real_residual;
bool _log_real_residual; /*!< always compute real residual (debug purpose) */
int _global_iteration;
int _total_mvp;
int _last_mvp;
......@@ -169,31 +169,26 @@ public:
void notify_mvp_begin()
{
FABULOUS_DEBUG("");
_mvp_timer.start();
}
void notify_ortho_begin()
{
FABULOUS_DEBUG("");
_ortho_timer.start();
}
void notify_least_square_begin()
{
FABULOUS_DEBUG("");
_least_square_timer.start();
}
void notify_facto_begin()
{
FABULOUS_DEBUG("");
_facto_timer.start();
}
void notify_ib_begin()
{
FABULOUS_DEBUG("");
_ib_timer.start();
}
......@@ -201,32 +196,27 @@ public:
void notify_mvp_end()
{
FABULOUS_DEBUG("");
_mvp_timer.stop();
}
void notify_ortho_end(int64_t ortho_flops)
{
FABULOUS_DEBUG("");
_ortho_timer.stop();
_last_ortho_flops = ortho_flops;
}
void notify_least_square_end()
{
FABULOUS_DEBUG("");
_least_square_timer.stop();
}
void notify_facto_end()
{
FABULOUS_DEBUG("");
_facto_timer.stop();
}
void notify_ib_end()
{
FABULOUS_DEBUG("");
_ib_timer.stop();
}
......@@ -258,9 +248,10 @@ public:
mvp_elapsed, ortho_elapsed, ib_elapsed,
_last_ortho_flops }
);
FABULOUS_NOTE("\t\t "<<Color::red<<"MIN="<<Color::reset<<std::scientific<<min
<<"\t"<<Color::red<<"MAX="<<Color::reset<<std::scientific<<max);
FABULOUS_NOTE(
"\t\t "<<Color::red<<"MIN="<<Color::reset<<std::scientific<<min
<<"\t"<<Color::red<<"MAX="<<Color::reset<<std::scientific<<max
);
if (std::isnan(min) || std::isnan(max))
FABULOUS_FATAL_ERROR("nan encoutered");
std::cerr<< "End of Iteration ["<<Color::note<<id<<" :: "
......@@ -284,14 +275,16 @@ public:
auto pair = eval_current_solution(A, B, solution);
auto min = pair.first;
auto max = pair.second;
FABULOUS_NOTE("REAL RES:\t "<<Color::red<<"MIN="<<Color::reset<<std::scientific<<min
<<"\t"<<Color::red<<"MAX="<<Color::reset<<std::scientific<<max);
FABULOUS_NOTE(
"REAL RES:\t "
<<Color::red<<"MIN="<<Color::reset<<std::scientific<<min<<"\t"
<<Color::red<<"MAX="<<Color::reset<<std::scientific<<max
);
_iterations.back().minReal = pair.first;
_iterations.back().maxReal = pair.second;
}
template< class Matrix, class Block >
template<class Matrix, class Block>
void log_real_residual( Matrix &A, const Block &B, const Block &X0)
{
if (!_log_real_residual)
......@@ -300,8 +293,11 @@ public:
auto pair = eval_current_solution(A, B, X0);
auto min = pair.first;
auto max = pair.second;
FABULOUS_NOTE("REAL RES:\t "<<Color::red<<"MIN="<<Color::reset<<std::scientific<<min
<<"\t"<<Color::red<<"MAX="<<Color::reset<<std::scientific<<max);
FABULOUS_NOTE(
"REAL RES:\t "
<<Color::red<<"MIN="<<Color::reset<<std::scientific<<min<<"\t"
<<Color::red<<"MAX="<<Color::reset<<std::scientific<<max
);
_iterations.back().minReal = pair.first;
_iterations.back().maxReal = pair.second;
}
......@@ -324,7 +320,8 @@ public:
/**
* \brief iterator over the iterations with everything.
*/
void for_each_iteration(std::function<void(int,int,int,P,P,P,P,double,int,int)> func)
void for_each_iteration(
std::function<void(int,int,int,P,P,P,P,double,int,int)> func)
{
for (auto &it : _iterations) {
func(it.glob_iteration,
......
......@@ -30,8 +30,8 @@ private:
private:
static double duration_to_seconds(Duration d)
{
auto m = std::chrono::duration_cast<std::chrono::milliseconds>(d);
return m.count() / 1000.0;
auto m = std::chrono::duration_cast<std::chrono::microseconds>(d);
return m.count() / 1000000.0;
}
public:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment