ArnoldiDR.hpp 9.82 KB
Newer Older
1 2
#ifndef FABULOUS_ARNOLDI_DR_HPP
#define FABULOUS_ARNOLDI_DR_HPP
3

4
namespace fabulous {
5
template< template<class> class HESSENBERG, class S > class ArnoldiDR;
6 7
};

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
8 9 10 11
#include "fabulous/data/Base.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/utils/Utils.hpp"
#include "fabulous/utils/Logger.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
12
#include "fabulous/utils/Traits.hpp"
13

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
14
#include "fabulous/restart/DeflatedRestart.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
15
#include "fabulous/orthogonalization/Orthogonalizer.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
16

17 18
namespace fabulous {

19
/**
20
 * \brief %Arnoldi iterations
21
 *
22
 * GELS kernel is used to solve the LeastSquare problem on the projected matrix
23
 *
24
 * This class support DeflatedRestarting
25
 */
26
template< template<class> class HESSENBERG, class S >
27
class ArnoldiDR
28
{
29
    static_assert(
30 31
        arnoldiXhessenberg<fabulous::ArnoldiDR, HESSENBERG>::value,
        "ArnoldiDR cannot be combined with this Hessenberg"
32
    );
33 34 35 36 37 38 39
public:
    using value_type = typename Arithmetik<S>::value_type;
    using primary_type = typename Arithmetik<S>::primary_type;
    using P = primary_type;

private:
    void print_header(int nb_mvp, std::ostream &o = std::cout)
40
    {
41
        o << "#######################################################\n";
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
42
        o << "#################### Arnoldi ##########################\n";
43 44
        o << "######## Mat Vect product scheduled "<< nb_mvp <<"###########\n";
    }
45

46
    void print_footer(std::ostream &o = std::cout)
47
    {
48
        o << "################# Iterations done: "<<_iteration_count<<"(+1)\n";
49 50 51 52 53
        o << "#######################################################\n";
    }

private:
    Logger<P> &_logger;
54
    HESSENBERG<S> _hess;
55 56
    bool _solution_computed;
    Block<S> _solution;
57
    Block<S> _lastY;
58
    Base<S> &_base;
59

60 61
    int _dim;
    int _nbRHS;
62
    int _size_krylov_space;
63 64 65 66
    int _nb_mvp;
    int _iteration_count;

    // DR SPECIFIC
67
    bool _cold_restart;
68

69 70 71 72 73
public:
    int get_krylov_space_size() const { return _size_krylov_space; }
    int get_nb_mvp() const { return _nb_mvp; }
    int get_nb_iteration() const { return _iteration_count; }

74 75
private:
    template< class Matrix, class Block, class Base >
76
    void X_plus_MVY(Matrix &A, Block &X, Base &V, const Block &Y)
77 78 79 80 81 82 83
    {
        int dim = V.get_nb_row();
        Block MVY{dim, Y.get_nb_col()};

        if (A.useRightPrecond()) {
            Block VY{dim, Y.get_nb_col()};
            V.compute_product(Y, VY); // VY =  V*Y
84
            A.PrecondBlockVect(VY, MVY); // MVY := M*V*Y
85
        } else {
86
            V.compute_product(Y, MVY); // MVY := V*Y
87 88 89 90 91 92 93
        }
        // X_n = X_0 + MVY
        for (int j = 0; j < X.get_nb_col(); ++j)
            LapackKernI::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
    }

    template<class Matrix, class Block>
94
    void compute_real_residual(Matrix &A, const Block &X, const Block &B, Block &R)
95 96
    {
        R.copy(B); // R <- B
97
        _logger.notify_mvp_begin();
98
        A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- B-A*X
99
        _logger.notify_mvp_end();
100
        _nb_mvp += X.get_nb_col();
101 102 103 104 105
    }

    template< class Matrix, class Block >
    void W_AMV(Matrix &A, Block &V, Block &W) // W = A*M*V
    {
106
        _logger.notify_mvp_begin();
107 108 109
        if (A.useRightPrecond()) {
            Block MV{W.get_nb_row(), V.get_nb_col()};
            A.PrecondBlockVect(V, MV); // MV = M * V
110
            A.MatBlockVect(MV, W); // W = A*MV
111 112
        } else {
            A.MatBlockVect(V, W); //  W = A * V
113
        }
114
        _logger.notify_mvp_end();
115
        _nb_mvp += V.get_nb_col();
116
    }
117

118 119
    template<class Matrix>
    void compute_solution(Matrix &A, Block<S> &X)
120
    {
121 122
        Block<S> Y = _hess.alloc_least_square_sol();
        _hess.solve_least_square(Y);
123 124 125
        X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
    }

126 127 128 129 130 131 132 133 134
    template<class Matrix>
    void do_ortho(Matrix &A, OrthoParam &orthoparm, Block<S> &W)
    {
        Orthogonalizer ortho{orthoparm};
        _logger.notify_ortho_begin();
        ortho.run(_hess, _base, W, A); // Ortho and filling of L (the (IB) Hessenberg)
        _logger.notify_ortho_end();
    }

135 136
    template<class Matrix, class Block>
    bool check_real_residual_if(bool projected_residual_converged,
137
                                Matrix &A, const Block &B, Block &X,
138
                                const std::vector<P> &epsilon)
139 140
    {
        if (!projected_residual_converged)
141
            return false;
142 143 144 145

        _solution = Block{_dim, _nbRHS};
        _solution.copy(X);

146
        compute_solution(A, _solution);
147 148
        Block R{_dim, _nbRHS};
        compute_real_residual(A, _solution, B, R); // R <- A*sol - B
149 150

        auto MinMaxConv = R.check_precision(epsilon, A);
151
        #ifdef FABULOUS_DEBUG_MODE
152 153 154
        auto min = std::get<0>(MinMaxConv);
        auto max = std::get<1>(MinMaxConv);
        printf("real residual=(%g;%g)\n", min, max);
155
        #endif
156 157

        if (std::get<2>(MinMaxConv)) {
158
            _solution_computed = true;
159 160
            _cold_restart = false;
            return true;
161
        }
162 163
        _cold_restart = true;
        return false;
164 165
    }

166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
    template<class Matrix, class Epsilon, class Restarter>
    bool initial_iteration(Matrix &A, Block<S> &X, const Block<S> &B,
                           const Epsilon &epsilon, Restarter &restarter)
    {
        std::tuple<P,P,bool> MinMaxConv;
        _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
        if (restarter && _base.get_nb_block() > 0) {
            MinMaxConv = restarter.run(_hess, epsilon);
            //check_arnoldi_formula(A, _base, _hess);
        } else { //No DR, or first run of arnoldi
            _base.reset();
            Block<S> R0 = _base.get_W(_nbRHS);
            compute_real_residual(A, X, B, R0);
            _base.increase(_nbRHS);
            Block<S> R1{_nbRHS, _nbRHS}; // R1 = RHS of projected problem
            A.QRFacto(R0, R1); /* auto minmaxR0 =  R0.get_min_max_norm(A); */
            _hess.set_rhs(R1);
            MinMaxConv = R1.check_precision(epsilon);
        }
        _size_krylov_space = _base.get_nb_vect();
        _logger.notify_iteration_end(
            _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
        _logger.log_real_residual(A, B, X);
        return std::get<2>(MinMaxConv);
    }

192 193 194 195 196 197 198 199 200
    std::tuple<P,P,bool>
    check_least_square_residual(const std::vector<P> &epsilon)
    {
        _logger.notify_least_square_check_begin();
        auto minmaxconv = _hess.check_least_square_residual(epsilon);
        _logger.notify_least_square_check_end();
        return minmaxconv;
    }

201
public:
202 203
    template<class RestartParam>
    ArnoldiDR(Logger<P> &logger, Restarter<RestartParam, S> &restarter,
204 205
            int dim, int nbRHS, int max_krylov_space_size):
        _logger(logger),
206
        _hess{max_krylov_space_size, nbRHS},
207 208 209
        _solution_computed(false),
        _base(restarter.get_base()),
        _dim(dim),
210 211
        _nbRHS(nbRHS),
        _size_krylov_space{0},
212 213
        _nb_mvp{0},
        _iteration_count{0},
214
        _cold_restart{false}
215
    {
216
        static_assert(
217
            hessenbergXrestarter<HESSENBERG, RestartParam>::value,
218 219
            "The Hessenberg used is not compatible with the kind of restarting selected"
        );
220 221 222 223 224 225 226 227 228
    }

    /**
     * \brief Arnoldi iterations
     *
     * Solution will be stored inside X at the end of computation.
     * Ref IB-BGMRES-DR (Giraud Jing Agullo): p.21
     *
     * \param[in] A User Matrix
229
     * \param[in,out] X initial guess (input), and best approximated solution (output)
230 231
     * \param[in] B the right hand sides
     * \param max_krylov_space_size Maximum size of Krylov Search space.
232 233
     * \param[in] epsilon tolerance (backward error) for residual
     * \param[in,out] restarter hold data to deflate at restart
234
     * \param orthoparm Orthogonalizer
235 236
     * or vector wise arnoldi. (In distributed, only the vector wise will works)
     *
237
     * \return whether the convergence was reached
238
     */
239 240
    template<class Matrix, class Restarter>
    bool run( const Matrix &A, Block<S> &X, const Block<S> &B,
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
241 242
              const int max_krylov_space_size, const std::vector<P> &epsilon,
              Restarter &restarter, OrthoParam &orthoparm )
243 244
    {
        print_header(max_krylov_space_size);
245

246
        bool convergence = initial_iteration(A, X, B, epsilon, restarter);
247

248 249 250
        while (!convergence && // Main Loop
               _size_krylov_space + _nbRHS < max_krylov_space_size)
        {
251 252
            ++ _iteration_count;
            _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
253
            Block<S> Vj = _base.get_Vj(); // V_j
254
            assert( Vj.get_nb_col() == _nbRHS );
255
            Block<S> W = _base.get_W(_nbRHS); // W (candidate)
256

257
            W_AMV(A, Vj, W); // W := A*M*V_j
258
            do_ortho(A, orthoparm, W);
259 260
            _base.increase(_nbRHS); // add W to base

261
            //_hess.display_hess_bitmap("Hess Before Least Square");
262
            auto MinMaxConv = check_least_square_residual(epsilon);
263
            bool proj_convergence = std::get<2>(MinMaxConv);
264
            convergence = check_real_residual_if(proj_convergence, A, B, X, epsilon);
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
265 266
            //check_arnoldi_formula(A, _base, _hess, _nbRHS);

267
            _size_krylov_space = _base.get_nb_vect();
268
            _logger.notify_iteration_end(
269
                _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
270
            _logger.log_real_residual(A, B, X, _base, _hess);
271
        }
272 273 274 275

        if (_solution_computed)
            X.copy(_solution);
        else if (_iteration_count)
276
            compute_solution(A, X);
277

278
        if (_cold_restart) {
279
            FABULOUS_WARNING("cold restart");
280
            _base.reset();
281
        } else if (restarter && !convergence) {
282 283 284
            Block<S> PR{_hess.get_nb_vect() + _nbRHS, _nbRHS};
            _hess.compute_proj_residual(PR);
            restarter.save_LS(PR);
285
            Block<S> H = _hess.get_hess_block();
286 287
            restarter.save_hess(H);
        }
288

289 290
        print_footer();
        return convergence;
291
    }
292

293
};
294

295
}; // end namespace fabulous
296

297
#endif // FABULOUS_ARNOLDI_DR_HPP