ArnoldiDR.hpp 9.31 KB
Newer Older
1 2
#ifndef FABULOUS_ARNOLDI_DR_HPP
#define FABULOUS_ARNOLDI_DR_HPP
3

4
namespace fabulous {
5
template< template<class> class HESSENBERG, class S > class ArnoldiDR;
6 7
};

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
8 9 10 11
#include "fabulous/data/Base.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/utils/Utils.hpp"
#include "fabulous/utils/Logger.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
12
#include "fabulous/utils/Traits.hpp"
13

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
14
#include "fabulous/restart/DeflatedRestart.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
15
#include "fabulous/orthogonalization/Orthogonalizer.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
16

17 18
namespace fabulous {

19
/**
20
 * \brief %Arnoldi iterations
21
 *
22
 * GELS kernel is used to solve the LeastSquare problem on the projected matrix
23
 *
24
 * This class support DeflatedRestarting
25
 */
26
template< template<class> class HESSENBERG, class S >
27
class ArnoldiDR
28
{
29
    static_assert(
30 31
        arnoldiXhessenberg<fabulous::ArnoldiDR, HESSENBERG>::value,
        "ArnoldiDR cannot be combined with this Hessenberg"
32
    );
33 34 35 36 37 38 39 40
public:
    using value_type = typename Arithmetik<S>::value_type;
    using primary_type = typename Arithmetik<S>::primary_type;
    using P = primary_type;

private:

    void print_header(int nb_mvp, std::ostream &o = std::cout)
41
    {
42 43 44 45
        o << "#######################################################\n";
        o << "#################  Arnoldi Standard ###################\n";
        o << "######## Mat Vect product scheduled "<< nb_mvp <<"###########\n";
    }
46

47
    void print_footer(std::ostream &o = std::cout)
48
    {
49
        o << "################# Iterations done: "<<_iteration_count<<"(+1)\n";
50 51 52 53 54
        o << "#######################################################\n";
    }

private:
    Logger<P> &_logger;
55
    HESSENBERG<S> _hess;
56 57
    bool _solution_computed;
    Block<S> _solution;
58
    Block<S> _lastY;
59
    Base<S> &_base;
60

61 62
    int _dim;
    int _nbRHS;
63
    int _size_krylov_space;
64 65 66 67
    int _nb_mvp;
    int _iteration_count;

    // DR SPECIFIC
68
    bool _cold_restart;
69

70 71 72 73 74
public:
    int get_krylov_space_size() const { return _size_krylov_space; }
    int get_nb_mvp() const { return _nb_mvp; }
    int get_nb_iteration() const { return _iteration_count; }

75 76
private:
    template< class Matrix, class Block, class Base >
77
    void X_plus_MVY(Matrix &A, Block &X, Base &V, const Block &Y)
78 79 80 81 82 83 84
    {
        int dim = V.get_nb_row();
        Block MVY{dim, Y.get_nb_col()};

        if (A.useRightPrecond()) {
            Block VY{dim, Y.get_nb_col()};
            V.compute_product(Y, VY); // VY =  V*Y
85
            A.PrecondBlockVect(VY, MVY); // MVY := M*V*Y
86
        } else {
87
            V.compute_product(Y, MVY); // MVY := V*Y
88 89 90 91 92 93 94
        }
        // X_n = X_0 + MVY
        for (int j = 0; j < X.get_nb_col(); ++j)
            LapackKernI::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
    }

    template<class Matrix, class Block>
95
    void compute_real_residual(Matrix &A, const Block &X, const Block &B, Block &R)
96 97
    {
        R.copy(B); // R <- B
98
        _logger.notify_mvp_begin();
99
        A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- B-A*X
100
        _logger.notify_mvp_end();
101
        _nb_mvp += X.get_nb_col();
102 103 104 105 106
    }

    template< class Matrix, class Block >
    void W_AMV(Matrix &A, Block &V, Block &W) // W = A*M*V
    {
107
        _logger.notify_mvp_begin();
108 109 110
        if (A.useRightPrecond()) {
            Block MV{W.get_nb_row(), V.get_nb_col()};
            A.PrecondBlockVect(V, MV); // MV = M * V
111
            A.MatBlockVect(MV, W); // W = A*MV
112 113
        } else {
            A.MatBlockVect(V, W); //  W = A * V
114
        }
115
        _logger.notify_mvp_end();
116
        _nb_mvp += V.get_nb_col();
117
    }
118

119 120
    template<class Matrix>
    void compute_solution(Matrix &A, Block<S> &X)
121
    {
122 123
        Block<S> Y = _hess.alloc_least_square_sol();
        _hess.solve_least_square(Y);
124 125 126 127 128
        X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
    }

    template<class Matrix, class Block>
    bool check_real_residual_if(bool projected_residual_converged,
129
                                Matrix &A, const Block &B, Block &X,
130
                                const std::vector<P> &epsilon)
131 132
    {
        if (!projected_residual_converged)
133
            return false;
134 135 136 137

        _solution = Block{_dim, _nbRHS};
        _solution.copy(X);

138
        compute_solution(A, _solution);
139 140
        Block R{_dim, _nbRHS};
        compute_real_residual(A, _solution, B, R); // R <- A*sol - B
141 142

        auto MinMaxConv = R.check_precision(epsilon, A);
143 144

        #ifdef FABULOUS_DEBUG_MODE
145 146 147
        auto min = std::get<0>(MinMaxConv);
        auto max = std::get<1>(MinMaxConv);
        printf("real residual=(%g;%g)\n", min, max);
148
        #endif
149 150

        if (std::get<2>(MinMaxConv)) {
151
            _solution_computed = true;
152 153
            _cold_restart = false;
            return true;
154
        }
155 156
        _cold_restart = true;
        return false;
157 158
    }

159 160 161 162 163 164 165 166 167
    std::tuple<P,P,bool>
    check_least_square_residual(const std::vector<P> &epsilon)
    {
        _logger.notify_least_square_check_begin();
        auto minmaxconv = _hess.check_least_square_residual(epsilon);
        _logger.notify_least_square_check_end();
        return minmaxconv;
    }

168
public:
169 170
    template<class RestartParam>
    ArnoldiDR(Logger<P> &logger, Restarter<RestartParam, S> &restarter,
171 172
            int dim, int nbRHS, int max_krylov_space_size):
        _logger(logger),
173
        _hess{max_krylov_space_size, nbRHS},
174 175 176
        _solution_computed(false),
        _base(restarter.get_base()),
        _dim(dim),
177 178
        _nbRHS(nbRHS),
        _size_krylov_space{0},
179 180
        _nb_mvp{0},
        _iteration_count{0},
181
        _cold_restart{false}
182
    {
183
        static_assert(
184
            hessenbergXrestarter<HESSENBERG, RestartParam>::value,
185 186
            "The Hessenberg used is not compatible with the kind of restarting selected"
        );
187 188 189 190 191 192 193 194 195
    }

    /**
     * \brief Arnoldi iterations
     *
     * Solution will be stored inside X at the end of computation.
     * Ref IB-BGMRES-DR (Giraud Jing Agullo): p.21
     *
     * \param[in] A User Matrix
196
     * \param[in,out] X initial guess (input), and best approximated solution (output)
197 198
     * \param[in] B the right hand sides
     * \param max_krylov_space_size Maximum size of Krylov Search space.
199 200
     * \param[in] epsilon tolerance (backward error) for residual
     * \param[in,out] restarter hold data to deflate at restart
201
     * \param ortho Orthogonalizer
202 203
     * or vector wise arnoldi. (In distributed, only the vector wise will works)
     *
204
     * \return whether the convergence was reached
205
     */
206 207
    template<class Matrix, class Restarter>
    bool run( const Matrix &A, Block<S> &X, const Block<S> &B,
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
208 209
              const int max_krylov_space_size, const std::vector<P> &epsilon,
              Restarter &restarter, OrthoParam &orthoparm )
210 211
    {
        print_header(max_krylov_space_size);
212

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
213
        std::tuple<P,P,bool> MinMaxConv;
214
        _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
215
        if (restarter && _base.get_nb_block() > 0) {
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
216
            MinMaxConv = restarter.run(_hess, epsilon);
217
            //check_arnoldi_formula(A, _base, _hess);
218
        } else { //No DR, or first run of arnoldi
219
            _base.reset();
220
            Block<S> R0 = _base.get_W(_nbRHS);
221
            compute_real_residual(A, X, B, R0);
222
            _base.increase(_nbRHS);
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
223
            Block<S> R1{_nbRHS, _nbRHS}; // R1 = RHS of projected problem
224
            A.QRFacto(R0, R1); /* auto minmaxR0 =  R0.get_min_max_norm(A); */
225
            _hess.set_rhs(R1);
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
226
            MinMaxConv = R1.check_precision(epsilon);
227
        }
228 229 230
        bool convergence = std::get<2>(MinMaxConv);
        _size_krylov_space = _base.get_nb_vect();
        _logger.notify_iteration_end(
231
            _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
232
        _logger.log_real_residual(A, B, X);
233

234
        while (!convergence && _size_krylov_space < max_krylov_space_size) {
235 236
            ++ _iteration_count;
            _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
237
            Block<S> Vj = _base.get_Vj(); // V_j
238
            assert( Vj.get_nb_col() == _nbRHS );
239
            Block<S> W = _base.get_W(_nbRHS); // W (candidate)
240

241
            W_AMV(A, Vj, W); // W := A*M*V_j
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
242 243

            Orthogonalizer ortho{orthoparm};
244
            _logger.notify_ortho_begin();
245
            ortho.run(_hess, _base, W, A);
246
            _logger.notify_ortho_end();
247

248
            _base.increase(_nbRHS); // add W to base
249 250
            _size_krylov_space = _base.get_nb_vect();
            //_hess.display_hess_bitmap("Hess Before Least Square");
251

252
            auto MinMaxConv = check_least_square_residual(epsilon);
253
            bool proj_convergence = std::get<2>(MinMaxConv);
254
            convergence = check_real_residual_if(proj_convergence, A, B, X, epsilon);
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
255 256 257

            //check_arnoldi_formula(A, _base, _hess, _nbRHS);

258
            _logger.notify_iteration_end(
259
                _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
260
            _logger.log_real_residual(A, B, X, _base, _hess);
261
        }
262 263 264 265

        if (_solution_computed)
            X.copy(_solution);
        else if (_iteration_count)
266
            compute_solution(A, X);
267

268
        if (_cold_restart) {
269
            _base.reset();
270 271
        } else if (restarter && !convergence) {
            int M = _hess.get_nb_vect() + _nbRHS;
272
            Block<S> LS{M, _nbRHS};
273 274
            _hess.compute_proj_residual(LS);
            restarter.save_LS(LS);
275
            Block<S> H = _hess.get_hess_block();
276 277
            restarter.save_hess(H);
        }
278

279 280
        print_footer();
        return convergence;
281
    }
282
};
283

284 285
}; // namespace fabulous

286
#endif // FABULOUS_ARNOLDI_DR_HPP