Commit c0ce1ea3 authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

Make BLOCK ortho variant usable from C api

parent 18d54d27
......@@ -192,6 +192,7 @@ mkdir -p ${WORKDIR}/build
cd ${WORKDIR}/build
cmake .. -DCMAKE_BUILD_TYPE=Debug \
-DFABULOUS_DEBUG_MODE=ON \
-DFABULOUS_LAPACKE_NANCHECK=OFF \
-DFABULOUS_BUILD_EXAMPLES=ON \
-DFABULOUS_BUILD_TESTS=ON \
-DFABULOUS_USE_CHAMELEON=ON
......@@ -228,6 +229,7 @@ cd ${WORKDIR}/build
CC="${WORKDIR}/scripts/cc_args.py gcc" CXX="${WORKDIR}/scripts/cc_args.py g++" \
cmake .. -DCMAKE_BUILD_TYPE=Debug \
-DFABULOUS_DEBUG_MODE=ON \
-DFABULOUS_LAPACKE_NANCHECK=OFF \
-DFABULOUS_BUILD_EXAMPLES=ON \
-DFABULOUS_BUILD_TESTS=ON \
-DFABULOUS_USE_CHAMELEON=ON
......
......@@ -73,22 +73,21 @@ public:
* (no user reduction used in this function; cannot be used in distributed problem)
*
* \param[out] C the C block of the projected matrix to be filled
* \param[in] A the user callback matrix
*/
int64_t compute_C(Block<S> &C)
template<class Matrix>
int64_t compute_C(Block<S> &C, Matrix &A)
{
int64_t nb_flops = 0;
if (!_cursor)
return nb_flops;
lapacke::Tgemm( // C := P^{H} * \widetilde{W}
get_size_P(), get_size_W(), get_nb_row(),
nb_flops += A.DotProduct( // C := P^{H} * \widetilde{W}
get_size_P(), get_size_W(),
get_P_ptr(), get_leading_dim(),
get_W_ptr(), get_leading_dim(),
C.get_ptr(), C.get_leading_dim(),
S{1.0}, S{0.0}
C.get_ptr(), C.get_leading_dim()
);
namespace fps = lapacke::flops;
nb_flops += fps::gemm<S>(get_size_P(), get_size_W(), get_nb_row());
lapacke::gemm( // ~W = ~W - P_j*C
get_nb_row(), get_size_W(), get_size_P(),
......@@ -97,6 +96,7 @@ public:
get_W_ptr(), get_leading_dim(),
S{-1.0}, S{1.0}
);
namespace fps = lapacke::flops;
nb_flops += fps::gemm<S>(get_nb_row(), get_size_W(), get_size_P());
return nb_flops;
}
......
......@@ -247,12 +247,16 @@ public:
Block<S> get_H()
{
return _HR.sub_block(0, 0, _nb_vect, _nbRHS);
auto h = _HR.sub_block(0, 0, _nb_vect, _nbRHS);
h.zero();
return h;
}
Block<S> get_R()
{
return _HR.sub_block(_nb_vect, 0, _nbRHS, _nbRHS);
auto r = _HR.sub_block(_nb_vect, 0, _nbRHS, _nbRHS);
r.zero();
return r;
}
Block<S> get_H1new()
......@@ -314,9 +318,9 @@ public:
void notify_ortho_end()
{
int k = _nb_block_col;
assert( _nb_vect == k * _nbRHS );
assert( _HR.get_nb_col() == _nbRHS );
assert( _HR.get_nb_row() == _nb_vect+_nbRHS );
FABULOUS_ASSERT( _nb_vect == k * _nbRHS );
// FABULOUS_ASSERT( _HR.get_nb_col() == _nbRHS );
// FABULOUS_ASSERT( _HR.get_nb_row() == _nb_vect+_nbRHS );
#ifdef FABULOUS_DEBUG_MODE
// _HR.display("ortho col _HR: ");
......
......@@ -39,16 +39,16 @@ private:
* \param[in,out] W block to be orthogonalized against current base
* \param ortho_scheme the orthogonalization scheme,
*/
template<class S>
int64_t run(HESS &hess, Base<S> &base, Block<S> &W)
template<class Matrix, class S>
int64_t run(HESS &hess, Base<S> &base, Block<S> &W, Matrix &A)
{
hess.increase(W.get_nb_col());
Block<S> H = hess.get_H();
Block<S> R = hess.get_R();
int64_t nb_flops = dispatch(base, H, W);
nb_flops += qr::InPlaceQRFacto(W, R);
int64_t nb_flops = dispatch(base, H, W, A);
nb_flops += A.QRFacto(W, R);
return nb_flops;
}
};
......
......@@ -39,8 +39,8 @@ private:
* \param[in,out] WP candidate block in Inexact breakdown case
* \param ortho_scheme the orthogonalization scheme,
*/
template<class S>
int64_t run(HESS &L, Base<S> &base, BlockWP<S> &WP)
template<class Matrix, class S>
int64_t run(HESS &L, Base<S> &base, BlockWP<S> &WP, Matrix &A)
{
L.increase(WP.get_size_W());
......@@ -49,9 +49,9 @@ private:
Block<S> C = L.get_C(); // hessenberg's parts
Block<S> W = WP.get_W(); // candidate part
int64_t nb_flops = dispatch(base, H, W);
nb_flops += WP.compute_C(C); // Orthogonalisation of (candidate) W against P
nb_flops += qr::InPlaceQRFacto(W, D);
int64_t nb_flops = dispatch(base, H, W, A);
nb_flops += WP.compute_C(C, A); // Orthogonalisation of (candidate) W against P
nb_flops += A.QRFacto(W, D);
return nb_flops;
}
......
......@@ -34,8 +34,8 @@ private:
* \param[out] H block to write the orthogonalization coeffs
* \param[in,out] W block to be orthogonalized against current base
*/
template<class S>
void CGS(Base<S> &base, Block<S> &H, Block<S> &W)
template<class Matrix, class S>
void CGS(Matrix &A, Base<S> &base, Block<S> &H, Block<S> &W)
{
int dim = base.get_nb_row();
int nb_vect = base.get_nb_vect();
......@@ -47,14 +47,12 @@ private:
FABULOUS_ASSERT( H.get_nb_row() == base.get_nb_vect() );
FABULOUS_ASSERT( W.get_nb_row() == base.get_nb_row() );
lapacke::Tgemm( // H = Vm^{t}*W
nb_vect, W_size, dim,
_nb_flops += A.DotProduct( // H = Vm^{t}*W
nb_vect, W_size,
base.get_ptr(), base.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
H.get_ptr(), H.get_leading_dim(),
S{1.0}, S{0.0}
H.get_ptr(), H.get_leading_dim()
);
_nb_flops += fps::gemm<S>(nb_vect, W_size, dim);
lapacke::gemm( // W = W - Vm*H
dim, W_size, nb_vect,
......@@ -73,8 +71,8 @@ private:
* \param[in,out] W block to be orthogonalized against current base
* \param[out] H block to write the orthogonalization coeffs
*/
template<class S>
void ICGS(Base<S> &base, Block<S> &H, Block<S> &W)
template<class Matrix, class S>
void ICGS(Matrix &A, Base<S> &base, Block<S> &H, Block<S> &W)
{
int dim = base.get_nb_row();
int nb_vect = base.get_nb_vect();
......@@ -87,14 +85,12 @@ private:
// Block tmp to be added to Hess
Block<S> tmp{nb_vect, W_size};
lapacke::Tgemm( // H = Vm^{t}*W
nb_vect, W_size, dim,
_nb_flops += A.DotProduct( // H = Vm^{t}*W
nb_vect, W_size,
base.get_ptr(), base.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
tmp.get_ptr(), tmp.get_leading_dim(),
S{1.0}, S{0.0}
tmp.get_ptr(), tmp.get_leading_dim()
);
_nb_flops += fps::gemm<S>(nb_vect, W_size, dim);
lapacke::gemm( // W = W - Vm*H
dim, W_size, nb_vect,
......@@ -119,8 +115,8 @@ private:
* \param[in,out] W block to be orthogonalized against current base
* \param[out] H block to write the orthogonalization coeffs
*/
template<class S>
void MGS(Base<S> &base, Block<S> &H, Block<S> &W)
template<class Matrix, class S>
void MGS(Matrix &A, Base<S> &base, Block<S> &H, Block<S> &W)
{
int dim = base.get_nb_row();
int W_size = W.get_nb_col();
......@@ -137,14 +133,12 @@ private:
Block<S> H_k = H.sub_block(size_block_sum, 0, size_block, W_size);
size_block_sum += size_block;
lapacke::Tgemm( // H_{ij} = V_i^{h} * W
size_block, W_size, dim,
_nb_flops += A.DotProduct( // H_{ij} = V_i^{h} * W
size_block, W_size,
base.get_block_ptr(k), base.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
H_k.get_ptr(), H_k.get_leading_dim(),
S{1.0}, S{0.0}
H_k.get_ptr(), H_k.get_leading_dim()
);
_nb_flops += fps::gemm<S>(size_block, W_size, dim);
lapacke::gemm( // W = W - V_i * H_{ij}
dim, W_size, size_block,
......@@ -167,8 +161,8 @@ private:
* \param[in,out] W block to be orthogonalized against current base
* \param[out] H block to write the orthogonalization coeffs
*/
template<class S>
void IMGS(Base<S> &base, Block<S> &H, Block<S> &W)
template<class Matrix, class S>
void IMGS(Matrix &A, Base<S> &base, Block<S> &H, Block<S> &W)
{
int dim = base.get_nb_row();
int W_size = W.get_nb_col();
......@@ -186,14 +180,12 @@ private:
Block<S> tmp{size_block, W_size};
//FABULOUS_DEBUG("base.block["<<k<<"].size="<<size_block);
lapacke::Tgemm( // tmp = V_i^{h} * W
size_block, W_size, dim,
_nb_flops += A.DotProduct( // tmp = V_i^{h} * W
size_block, W_size,
base.get_block_ptr(k), base.get_leading_dim(),
W.get_ptr(),W.get_leading_dim(),
tmp.get_ptr(), tmp.get_leading_dim(),
S{1.0}, S{0.0}
tmp.get_ptr(), tmp.get_leading_dim()
);
_nb_flops += fps::gemm<S>(size_block, W_size, dim);
lapacke::gemm( // W = W - V_i * tmp
dim, W_size, size_block,
......@@ -230,21 +222,21 @@ protected:
* \param[in,out] W block to be orthogonalized against current base
* \param[out] H block to write the orthogonalization coeffs
*/
template<class S>
int64_t dispatch(Base<S> &base, Block<S> &H, Block<S> &W)
template<class Matrix, class S>
int64_t dispatch(Base<S> &base, Block<S> &H, Block<S> &W, Matrix &A)
{
_nb_flops = 0;
switch (_scheme) {
case OrthoScheme::CGS: CGS(base, H, W); break;
case OrthoScheme::MGS: MGS(base, H, W); break;
case OrthoScheme::CGS: CGS(A, base, H, W); break;
case OrthoScheme::MGS: MGS(A, base, H, W); break;
case OrthoScheme::ICGS:
for (int i = 0; i < _nb_iteration; ++i)
ICGS(base, H, W);
ICGS(A, base, H, W);
break;
case OrthoScheme::IMGS:
for (int i = 0; i < _nb_iteration; ++i)
IMGS(base, H, W);
IMGS(A, base, H, W);
break;
default: FABULOUS_THROW(Parameter, "Invalid orthogonalization scheme."); break;
}
......
......@@ -39,20 +39,20 @@ private:
return ruhe.run(H, base, W, A);
}
template<class HESS, class Base, class Block,
template<class HESS, class Base, class Block, class Matrix,
class = enable_if_t<handle_ib_t<HESS>::value > >
int64_t run_block(HESS &H, Base &base, Block &W)
int64_t run_block(HESS &H, Base &base, Block &W, Matrix &A)
{
bgmres::OrthogonalizerBlockIB<HESS> block{_param};
return block.run(H, base, W);
return block.run(H, base, W, A);
}
template<class HESS, class Base, class Block,
template<class HESS, class Base, class Block, class Matrix,
class = enable_if_t<not handle_ib_t<HESS>::value >, class=void>
int64_t run_block(HESS &H, Base &base, Block &W)
int64_t run_block(HESS &H, Base &base, Block &W, Matrix &A)
{
bgmres::OrthogonalizerBlockSTD<HESS> block{_param};
return block.run(H, base, W);
return block.run(H, base, W, A);
}
public:
......@@ -71,7 +71,7 @@ public:
int64_t nb_flops;
switch (_param.get_type()) {
case OrthoType::RUHE: nb_flops = run_ruhe(H, base, W, A); break;
case OrthoType::BLOCK: nb_flops = run_block(H, base, W); break;
case OrthoType::BLOCK: nb_flops = run_block(H, base, W, A); break;
default:
FABULOUS_THROW(
Parameter,
......
......@@ -39,7 +39,7 @@ private:
/**
* \brief Arnoldi Ruhe version with CGS ortho
*/
template< class Matrix, class S >
template<class Matrix, class S>
void CGS(Matrix &A, Base<S> &base,
Block<S> &H, Block<S> &R, // hessenberg
Block<S> &W) // candidate
......@@ -185,7 +185,7 @@ private:
/**
* \brief Arnoldi Ruhe version with MGS ortho
*/
template< class Matrix, class S >
template<class Matrix, class S>
void MGS(Matrix &A, Base<S> &base,
Block<S> &H, Block<S> &R, // hessenberg
Block<S> &W) // candidate
......
......@@ -32,7 +32,8 @@ struct ApiEngineI
virtual void set_dot_product(fabulous_dot_t user_dot) = 0;
virtual void set_parameters(
int max_mvp, int max_krylov_space_size, void *tolerance, int nb_tol) = 0;
virtual void set_ortho_process(fabulous_orthoproc orthoproc, int iteration) = 0;
virtual void set_ortho_process(fabulous_orthoscheme scheme,
fabulous_orthotype type, int nb_iter) = 0;
virtual int solve(int nrhs, void *B, int ldb, void *X, int ldx) = 0;
virtual int solve_GCR(int nrhs, void *B, int ldb, void *X, int ldx) = 0;
......@@ -73,6 +74,7 @@ private:
std::vector<P> _tolerance;
int _max_krylov_space_size;
OrthoScheme _ortho_scheme;
OrthoType _ortho_type;
int _ortho_nb_iter;
Logger<P> _logger;
......@@ -84,6 +86,7 @@ public:
_tolerance{},
_max_krylov_space_size{0},
_ortho_scheme{OrthoScheme::MGS/*default value*/},
_ortho_type{OrthoType::RUHE/*default value*/},
_ortho_nb_iter{2}
{
}
......@@ -122,20 +125,26 @@ public:
_max_krylov_space_size = max_krylov_space_size;
}
void set_ortho_process(fabulous_orthoproc orthoproc, int nb_iter) override
void set_ortho_process(fabulous_orthoscheme scheme,
fabulous_orthotype type, int nb_iter) override
{
switch (orthoproc) {
switch (scheme) {
case FABULOUS_MGS: _ortho_scheme = OrthoScheme::MGS; break;
case FABULOUS_CGS: _ortho_scheme = OrthoScheme::CGS; break;
case FABULOUS_IMGS: _ortho_scheme = OrthoScheme::IMGS; break;
case FABULOUS_ICGS: _ortho_scheme = OrthoScheme::ICGS; break;
default:
FABULOUS_THROW(Parameter, "Parameter orthoproc is invalid");
FABULOUS_THROW(Parameter, "Parameter ortho_scheme is invalid");
break;
}
if (nb_iter < 2 && (orthoproc == FABULOUS_IMGS
|| orthoproc == FABULOUS_ICGS))
{
switch (type) {
case FABULOUS_RUHE: _ortho_type = OrthoType::RUHE; break;
case FABULOUS_BLOCK: _ortho_type = OrthoType::BLOCK; break;
default:
FABULOUS_THROW(Parameter, "Parameter ortho_type is invalid");
break;
}
if (nb_iter < 2 && (scheme == FABULOUS_IMGS || scheme == FABULOUS_ICGS)) {
FABULOUS_THROW(Parameter, "the iter parameter must be greater than 2 or equal to 2");
}
_ortho_nb_iter = nb_iter;
......@@ -149,7 +158,7 @@ private:
int mvp = bgmres.solve(
_matrix, _dim, nrhs, B, ldb, X, ldx, _tolerance,
algo, _maxMVP, _max_krylov_space_size,
orthogonalization(_ortho_scheme, OrthoType::RUHE, _ortho_nb_iter),
orthogonalization(_ortho_scheme, _ortho_type, _ortho_nb_iter),
classic_restart()
);
_logger = bgmres.get_logger();
......@@ -164,7 +173,7 @@ private:
int mvp = bgmres.solve(
_matrix, _dim, nrhs, B, ldb, X, ldx, _tolerance,
algo, _maxMVP, _max_krylov_space_size,
orthogonalization(_ortho_scheme, OrthoType::RUHE, _ortho_nb_iter),
orthogonalization(_ortho_scheme, _ortho_type, _ortho_nb_iter),
deflated_restart(nb_eigen_pair, target)
);
_logger = bgmres.get_logger();
......@@ -179,7 +188,7 @@ private:
int mvp = bgcr.solve(
_matrix, _dim, nrhs, B, ldb, X, ldx, _tolerance,
algo, _maxMVP, _max_krylov_space_size,
orthogonalization(_ortho_scheme, OrthoType::BLOCK, _ortho_nb_iter)
orthogonalization(_ortho_scheme, _ortho_type, _ortho_nb_iter)
);
_logger = bgcr.get_logger();
return mvp;
......
......@@ -105,11 +105,12 @@ int fabulous_set_parameters(int max_mvp, int max_space_size,
TRY_CATCH_VOID(disp, set_parameters, max_mvp, max_space_size, tolerance, nb_tol);
}
int fabulous_set_ortho_process(fabulous_orthoproc orthoproc, int nb_iter,
fabulous_handle handle)
int fabulous_set_ortho_process(fabulous_orthoscheme scheme,
fabulous_orthotype type,
int nb_iter,fabulous_handle handle)
{
ApiEngineI *disp = FABULOUS_API_ENGINE(handle);
TRY_CATCH_VOID(disp, set_ortho_process, orthoproc, nb_iter);
TRY_CATCH_VOID(disp, set_ortho_process, scheme, type, nb_iter);
}
int fabulous_solve(int nrhs, void *B, int ldb, void *X, int ldx, fabulous_handle handle)
......
......@@ -78,17 +78,26 @@ enum fabulous_arithmetic {
};
/**
* \brief The different orthogonalization schemas available
* \brief The different available orthogonalization schemas
*/
enum fabulous_orthoproc {
enum fabulous_orthoscheme {
FABULOUS_MGS = 0, /*!< Modified Gram-Schmidt */
FABULOUS_CGS = 1, /*!< Classical Gram-Schmidt */
FABULOUS_IMGS = 2, /*!< Iterated Modified Gram-Schmidt */
FABULOUS_ICGS = 3, /*!< Iteratied Classical Gram-Schmidt */
};
/**
* \brief The different available orthogonalization types
*/
enum fabulous_orthotype {
FABULOUS_RUHE = 0, /*!< RUHE variant (vector-by-vector) */
FABULOUS_BLOCK = 1, /*!< BLOCK variant (block-by-block) */
};
typedef enum fabulous_arithmetic fabulous_arithmetic;
typedef enum fabulous_orthoproc fabulous_orthoproc;
typedef enum fabulous_orthoscheme fabulous_orthoscheme;
typedef enum fabulous_orthotype fabulous_orthotype;
/**
* \brief User defined function to compute
......@@ -293,8 +302,9 @@ int fabulous_set_parameters(int max_mvp, int max_krylov_space_size,
* as the default value for the orthoproc parameter
* \return 0 if the call was successful or a negative value to indicate an error
*/
int fabulous_set_ortho_process(fabulous_orthoproc orthoproc, int nb_iter,
fabulous_handle handle);
int fabulous_set_ortho_process(fabulous_orthoscheme scheme,
fabulous_orthotype type,
int nb_iter, fabulous_handle handle);
/**
* \brief Set the user dot product.
......
......@@ -111,8 +111,8 @@ public:
int64_t QRFacto(Block<S> &Q, Block<S> &R) const
{
return qr::InPlaceQRFactoMGS_User(Q, R, *this);
//qr::InPlaceQRFacto(Q, R);
// return qr::InPlaceQRFactoMGS_User(Q, R, *this);
return qr::InPlaceQRFacto(Q, R);
}
/**
......
......@@ -145,7 +145,7 @@ int main(int argc, char *argv[])
1, /* size of tolerance array */
handle /* handle as returned by fabulous_init */
);
fabulous_set_ortho_process(FABULOUS_MGS, 2, handle);
fabulous_set_ortho_process(FABULOUS_MGS, FABULOUS_RUHE, 2, handle);
// call solve function:
int nb_mvp = fabulous_solve_IB(nbRHS, RHS, dim, X0, dim, handle);
......
......@@ -246,7 +246,7 @@ int main(int argc, char *argv[])
*/
handle // the fabulous handle as returned by fabulous_create(...)
);
fabulous_set_ortho_process(FABULOUS_MGS, 2, handle);
fabulous_set_ortho_process(FABULOUS_MGS, FABULOUS_RUHE, 2, handle);
int nbRHS = RHS->dim2;
// Create Initial Solution Block
......
......@@ -35,6 +35,7 @@ ADD_TEST(NAME "test_bcg" COMMAND ./fabulous_test -A BCG -n 10 -f ../data/bcsstk1
ADD_TEST(NAME "test_gcr_cgs" COMMAND ./fabulous_test -A GCR -s CGS)
ADD_TEST(NAME "test_gcr_mgs" COMMAND ./fabulous_test -A GCR -s MGS)
ADD_TEST(NAME "test_qribdr" COMMAND ./fabulous_test -m 300 -A QRIBDR -n 10 -r DEFLATED -p 5)
ADD_TEST(NAME "test_qribdr2" COMMAND ./fabulous_test -m 700 -A QRIBDR -n 100 -r DEFLATED -p 5)
ADD_TEST(NAME "test_ib_cgs_block" COMMAND ./fabulous_test -A IB -s CGS -t BLOCK)
ADD_TEST(NAME "test_ib_imgs_block" COMMAND ./fabulous_test -A IB -s IMGS -t BLOCK)
ADD_TEST(NAME "test_ib_csc_young1c" COMMAND ./fabulous_test -A IB -s CGS -t BLOCK -k CSC)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment