ArnoldiDR.hpp 8.84 KB
Newer Older
1 2
#ifndef FABULOUS_ARNOLDI_HPP
#define FABULOUS_ARNOLDI_HPP
3

4
namespace fabulous {
5
template<template<class> class HESSENBERG, class S > class ArnoldiDR;
6 7
};

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
8 9 10 11
#include "fabulous/data/Base.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/utils/Utils.hpp"
#include "fabulous/utils/Logger.hpp"
12
#include "fabulous/utils/TypeInfo.hpp"
13

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
14 15 16
#include "fabulous/restart/DeflatedRestart.hpp"
#include "fabulous/orthogonalization/Arnoldi_Ortho.hpp"

17 18
namespace fabulous {

19
/**
20
 * \brief %Arnoldi iterations
21
 *
22
 * GELS kernel is used to solve the LeastSquare problem on the projected matrix
23
 *
24
 * This class support DeflatedRestarting
25
 */
26
template<template<class> class HESSENBERG, class S > class ArnoldiDR
27
{
28
    static_assert(
29 30
        arnoldiXhessenberg<fabulous::ArnoldiDR, HESSENBERG>::value,
        "ArnoldiDR cannot be combined with this Hessenberg"
31
    );
32 33 34 35 36 37 38 39
public:
    using value_type = typename Arithmetik<S>::value_type;
    using primary_type = typename Arithmetik<S>::primary_type;
    using P = primary_type;

private:

    void print_header(int nb_mvp, std::ostream &o = std::cout)
40
    {
41 42 43 44
        o << "#######################################################\n";
        o << "#################  Arnoldi Standard ###################\n";
        o << "######## Mat Vect product scheduled "<< nb_mvp <<"###########\n";
    }
45

46
    void print_footer(std::ostream &o = std::cout)
47
    {
48
        o << "################# Iterations done: "<<_iteration_count<<"(+1)\n";
49 50 51 52 53
        o << "#######################################################\n";
    }

private:
    Logger<P> &_logger;
54
    HESSENBERG<S> _hess;
55 56
    bool _solution_computed;
    Block<S> _solution;
57
    Block<S> _lastY;
58
    Base<S> &_base;
59

60 61
    int _dim;
    int _nbRHS;
62
    int _size_krylov_space;
63 64 65 66
    int _nb_mvp;
    int _iteration_count;

    // DR SPECIFIC
67
    bool _cold_restart;
68

69 70 71 72 73
public:
    int get_krylov_space_size() const { return _size_krylov_space; }
    int get_nb_mvp() const { return _nb_mvp; }
    int get_nb_iteration() const { return _iteration_count; }

74 75
private:
    template< class Matrix, class Block, class Base >
76
    void X_plus_MVY(Matrix &A, Block &X, Base &V, const Block &Y)
77 78 79 80 81 82 83
    {
        int dim = V.get_nb_row();
        Block MVY{dim, Y.get_nb_col()};

        if (A.useRightPrecond()) {
            Block VY{dim, Y.get_nb_col()};
            V.compute_product(Y, VY); // VY =  V*Y
84
            A.PrecondBlockVect(VY, MVY); // MVY := M*V*Y
85
        } else {
86
            V.compute_product(Y, MVY); // MVY := V*Y
87 88 89 90 91 92 93
        }
        // X_n = X_0 + MVY
        for (int j = 0; j < X.get_nb_col(); ++j)
            LapackKernI::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
    }

    template<class Matrix, class Block>
94
    void compute_real_residual(Matrix &A, const Block &X, const Block &B, Block &R)
95 96
    {
        R.copy(B); // R <- B
97 98
        A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- B-A*X
        _nb_mvp += X.get_nb_col();
99 100 101 102 103 104 105 106
    }

    template< class Matrix, class Block >
    void W_AMV(Matrix &A, Block &V, Block &W) // W = A*M*V
    {
        if (A.useRightPrecond()) {
            Block MV{W.get_nb_row(), V.get_nb_col()};
            A.PrecondBlockVect(V, MV); // MV = M * V
107
            A.MatBlockVect(MV, W); // W = A*MV
108 109
        } else {
            A.MatBlockVect(V, W); //  W = A * V
110
        }
111
        _nb_mvp += V.get_nb_col();
112
    }
113

114 115
    template<class Matrix>
    void compute_solution(Matrix &A, Block<S> &X)
116
    {
117 118
        Block<S> Y = _hess.alloc_least_square_sol();
        _hess.solve_least_square(Y);
119 120 121 122 123
        X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
    }

    template<class Matrix, class Block>
    bool check_real_residual_if(bool projected_residual_converged,
124
                                Matrix &A, const Block &B, Block &X,
125
                                const std::vector<P> &epsilon)
126 127
    {
        if (!projected_residual_converged)
128
            return false;
129 130 131 132

        _solution = Block{_dim, _nbRHS};
        _solution.copy(X);

133
        compute_solution(A, _solution);
134 135
        Block R{_dim, _nbRHS};
        compute_real_residual(A, _solution, B, R); // R <- A*sol - B
136 137

        auto MinMaxConv = R.check_precision(epsilon, A);
138 139

        #ifdef FABULOUS_DEBUG_MODE
140 141 142
        auto min = std::get<0>(MinMaxConv);
        auto max = std::get<1>(MinMaxConv);
        printf("real residual=(%g;%g)\n", min, max);
143
        #endif
144 145

        if (std::get<2>(MinMaxConv)) {
146
            _solution_computed = true;
147 148
            _cold_restart = false;
            return true;
149
        }
150 151
        _cold_restart = true;
        return false;
152 153 154
    }

public:
155
    template<template <class> class Restarter>
156
    ArnoldiDR(Logger<P> &logger, Restarter<S> &restarter,
157 158
            int dim, int nbRHS, int max_krylov_space_size):
        _logger(logger),
159
        _hess{max_krylov_space_size, nbRHS},
160 161 162
        _solution_computed(false),
        _base(restarter.get_base()),
        _dim(dim),
163 164
        _nbRHS(nbRHS),
        _size_krylov_space{0},
165 166
        _nb_mvp{0},
        _iteration_count{0},
167
        _cold_restart{false}
168
    {
169 170 171 172
        static_assert(
            hessenbergXrestarter<HESSENBERG, Restarter>::value,
            "The Hessenberg used is not compatible with the kind of restarting selected"
        );
173 174 175 176 177 178 179 180 181
    }

    /**
     * \brief Arnoldi iterations
     *
     * Solution will be stored inside X at the end of computation.
     * Ref IB-BGMRES-DR (Giraud Jing Agullo): p.21
     *
     * \param[in] A User Matrix
182
     * \param[in,out] X initial guess (input), and best approximated solution (output)
183 184
     * \param[in] B the right hand sides
     * \param max_krylov_space_size Maximum size of Krylov Search space.
185 186
     * \param[in] epsilon tolerance (backward error) for residual
     * \param[in,out] restarter hold data to deflate at restart
187 188 189 190
     * \param ortho_scheme  orthogonalization schema
     * \param ortho_type Choose between block wise arnoldi (through QR factorization)
     * or vector wise arnoldi. (In distributed, only the vector wise will works)
     *
191
     * \return whether the convergence was reached
192 193
     */
    template<class Matrix, class Block, class Restarter>
194 195 196 197
    bool run( const Matrix &A, Block &X, const Block &B,
              const int max_krylov_space_size,
              const std::vector<P> &epsilon,
              Restarter &restarter,
198 199
              OrthoScheme ortho_scheme,
              OrthoType   ortho_type    )
200 201
    {
        print_header(max_krylov_space_size);
202

203
        Block R1{}; // R1 = RHS of projected problem
204
        _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
205
        if (restarter && _base.get_nb_block() > 0) {
206 207
            restarter.run(R1, _hess);
            //check_arnoldi_formula(A, _base, _hess);
208
        } else { //No DR, or first run of arnoldi
209
            _base.reset();
210
            Block R0 = _base.get_W(_nbRHS);
211
            compute_real_residual(A, X, B, R0);
212
            _base.increase(_nbRHS);
213 214
            R1.realloc(_nbRHS, _nbRHS);
            A.QRFacto(R0, R1); /* auto minmaxR0 =  R0.get_min_max_norm(A); */
215
            _hess.set_rhs(R1);
216
        }
217 218 219 220
        auto MinMaxConv = R1.check_precision(epsilon);
        bool convergence = std::get<2>(MinMaxConv);
        _size_krylov_space = _base.get_nb_vect();
        _logger.notify_iteration_end(
221
            _iteration_count, _size_krylov_space, _nbRHS, MinMaxConv);
222
        _logger.log_real_residual(A, B, X);
223

224
        while (!convergence && _size_krylov_space < max_krylov_space_size) {
225 226
            ++ _iteration_count;
            _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
227 228 229
            Block Vj = _base.get_Vj(); // V_j
            assert( Vj.get_nb_col() == _nbRHS );
            Block W = _base.get_W(_nbRHS); // W (candidate)
230

231
            W_AMV(A, Vj, W); // W := A*M*V_j
232
            Arnoldi_Ortho(_hess, _base, W, ortho_type, ortho_scheme, A);
233
            _base.increase(_nbRHS); // add W to base
234 235
            _size_krylov_space = _base.get_nb_vect();
            //_hess.display_hess_bitmap("Hess Before Least Square");
236

237
            auto MinMaxConv = _hess.check_least_square_residual(epsilon);
238
            bool proj_convergence = std::get<2>(MinMaxConv);
239
            convergence = check_real_residual_if(proj_convergence, A, B, X, epsilon);
240
            _logger.notify_iteration_end(
241
                _iteration_count, _size_krylov_space, _nbRHS, MinMaxConv);
242
            _logger.log_real_residual(A, B, X, _base, _hess);
243
        }
244 245 246 247

        if (_solution_computed)
            X.copy(_solution);
        else if (_iteration_count)
248
            compute_solution(A, X);
249

250
        if (_cold_restart) {
251
            _base.reset();
252 253 254 255 256 257 258 259
        } else if (restarter && !convergence) {
            int M = _hess.get_nb_vect() + _nbRHS;
            Block LS{M, _nbRHS};
            _hess.compute_proj_residual(LS);
            restarter.save_LS(LS);
            Block H = _hess.get_hess_block();
            restarter.save_hess(H);
        }
260

261 262
        print_footer();
        return convergence;
263
    }
264
};
265

266 267
}; // namespace fabulous

268
#endif // FABULOUS_ARNOLDI_HPP