ArnoldiDR.hpp 10.1 KB
Newer Older
1 2
#ifndef FABULOUS_ARNOLDI_DR_HPP
#define FABULOUS_ARNOLDI_DR_HPP
3

4
namespace fabulous {
5
namespace bgmres {
6
template<class HESSENBERG, class S> class ArnoldiDR;
7 8
}
}
9

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
10 11 12 13
#include "fabulous/data/Base.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/utils/Utils.hpp"
#include "fabulous/utils/Logger.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
14
#include "fabulous/utils/Traits.hpp"
15

16
#include "fabulous/restart/Restarter.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
17
#include "fabulous/orthogonalization/Orthogonalizer.hpp"
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
18

19
namespace fabulous {
20
namespace bgmres {
21

22
/**
23
 * \brief %Arnoldi iterations
24
 *
25
 * GELS kernel is used to solve the LeastSquare problem on the projected matrix
26
 *
27
 * This class support DeflatedRestarting
28
 */
29
template<class HESSENBERG, class S >
30
class ArnoldiDR
31
{
32
    static_assert(
33
        arnoldiXhessenberg<ArnoldiDR, HESSENBERG>::value,
34
        "ArnoldiDR cannot be combined with this Hessenberg"
35
    );
36 37 38 39 40 41
public:
    using value_type = typename Arithmetik<S>::value_type;
    using primary_type = typename Arithmetik<S>::primary_type;
    using P = primary_type;

private:
42
    void print_header(int nb_mvp, std::ostream &o = std::cerr)
43
    {
44
        o << "#######################################################\n";
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
45
        o << "#################### Arnoldi ##########################\n";
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
46
        o << "######## Mat Vect product scheduled "<< nb_mvp <<" ###########\n";
47
    }
48

49
    void print_footer(std::ostream &o = std::cerr)
50
    {
51
        o << "################# Iterations done: "<<_iteration_count<<"(+1)\n";
52 53 54 55 56
        o << "#######################################################\n";
    }

private:
    Logger<P> &_logger;
57
    HESSENBERG _hess;
58 59
    bool _solution_computed;
    Block<S> _solution;
60
    Block<S> _lastY;
61
    Base<S> &_base;
62

63 64
    int _dim;
    int _nbRHS;
65
    int _size_krylov_space;
66 67 68 69
    int _nb_mvp;
    int _iteration_count;

    // DR SPECIFIC
70
    bool _cold_restart;
71

72 73 74 75 76
public:
    int get_krylov_space_size() const { return _size_krylov_space; }
    int get_nb_mvp() const { return _nb_mvp; }
    int get_nb_iteration() const { return _iteration_count; }

77 78
private:
    template< class Matrix, class Block, class Base >
79
    void X_plus_MVY(Matrix &A, Block &X, Base &V, const Block &Y)
80 81 82 83 84 85 86
    {
        int dim = V.get_nb_row();
        Block MVY{dim, Y.get_nb_col()};

        if (A.useRightPrecond()) {
            Block VY{dim, Y.get_nb_col()};
            V.compute_product(Y, VY); // VY =  V*Y
87
            A.PrecondBlockVect(VY, MVY); // MVY := M*V*Y
88
        } else {
89
            V.compute_product(Y, MVY); // MVY := V*Y
90 91 92
        }
        // X_n = X_0 + MVY
        for (int j = 0; j < X.get_nb_col(); ++j)
93
            lapacke::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
94 95 96
    }

    template<class Matrix, class Block>
97
    void compute_real_residual(Matrix &A, const Block &X, const Block &B, Block &R)
98 99
    {
        R.copy(B); // R <- B
100
        _logger.notify_mvp_begin();
101

102 103
        int64_t flops;
        flops = A.MatBlockVect(X, R); // R <- A*X
104 105 106 107 108
        for (int j = 0; j < R.get_nb_col(); ++j) {
            for (int i = 0; i < R.get_nb_row(); ++i) {
                R.at(i, j) = B.at(i, j) - R.at(i, j); // R -> B - A*X
            }
        }
109
        _logger.notify_mvp_end(flops);
110
        _nb_mvp += X.get_nb_col();
111 112
    }

113
    template<class Matrix, class Block>
114 115 116
    void W_AMV(Matrix &A, Block &V, Block &W) // W = A*M*V
    {
        if (A.useRightPrecond()) {
117 118 119
            int64_t flops;

            _logger.notify_precond_begin();
120
            Block MV{W.get_nb_row(), V.get_nb_col()};
121
            flops = A.PrecondBlockVect(V, MV); // MV = M * V
122 123 124 125 126
            _logger.notify_precond_end(flops);

            _logger.notify_mvp_begin();
            flops = A.MatBlockVect(MV, W); // W = A*MV
            _logger.notify_mvp_end(flops);
127
        } else {
128 129
            int64_t flops;
            _logger.notify_mvp_begin();
130
            flops = A.MatBlockVect(V, W); //  W = A * V
131
            _logger.notify_mvp_end(flops);
132
        }
133
        _nb_mvp += V.get_nb_col();
134
    }
135

136 137
    template<class Matrix>
    void compute_solution(Matrix &A, Block<S> &X)
138
    {
139 140
        Block<S> Y = _hess.alloc_least_square_sol();
        _hess.solve_least_square(Y);
141 142 143
        X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
    }

144 145 146 147
    template<class Matrix>
    void do_ortho(Matrix &A, OrthoParam &orthoparm, Block<S> &W)
    {
        Orthogonalizer ortho{orthoparm};
148 149
        // Ortho and filling of L (the (IB) Hessenberg)
        ortho.run(_hess, _base, W, A, _logger);
150 151
    }

152 153
    template<class Matrix, class Block>
    bool check_real_residual_if(bool projected_residual_converged,
154
                                Matrix &A, const Block &B, Block &X,
155
                                const std::vector<P> &epsilon)
156 157
    {
        if (!projected_residual_converged)
158
            return false;
159 160 161

        _solution = Block{_dim, _nbRHS};
        _solution.copy(X);
162
        compute_solution(A, _solution);
163 164
        Block R{_dim, _nbRHS};
        compute_real_residual(A, _solution, B, R); // R <- A*sol - B
165 166

        auto MinMaxConv = R.check_precision(epsilon, A);
167
        #ifdef FABULOUS_DEBUG_MODE
168 169 170
        auto min = std::get<0>(MinMaxConv);
        auto max = std::get<1>(MinMaxConv);
        printf("real residual=(%g;%g)\n", min, max);
171
        #endif
172 173

        if (std::get<2>(MinMaxConv)) {
174
            _solution_computed = true;
175 176
            _cold_restart = false;
            return true;
177
        }
178 179
        _cold_restart = true;
        return false;
180 181
    }

182 183 184 185 186 187 188 189
    template<class Matrix, class Epsilon, class Restarter>
    bool initial_iteration(Matrix &A, Block<S> &X, const Block<S> &B,
                           const Epsilon &epsilon, Restarter &restarter)
    {
        std::tuple<P,P,bool> MinMaxConv;
        _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
        if (restarter && _base.get_nb_block() > 0) {
            MinMaxConv = restarter.run(_hess, epsilon);
190
            // check_arnoldi_formula(A, _base, _hess, _nbRHS);
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
        } else { //No DR, or first run of arnoldi
            _base.reset();
            Block<S> R0 = _base.get_W(_nbRHS);
            compute_real_residual(A, X, B, R0);
            _base.increase(_nbRHS);
            Block<S> R1{_nbRHS, _nbRHS}; // R1 = RHS of projected problem
            A.QRFacto(R0, R1); /* auto minmaxR0 =  R0.get_min_max_norm(A); */
            _hess.set_rhs(R1);
            MinMaxConv = R1.check_precision(epsilon);
        }
        _size_krylov_space = _base.get_nb_vect();
        _logger.notify_iteration_end(
            _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
        _logger.log_real_residual(A, B, X);
        return std::get<2>(MinMaxConv);
    }

208
public:
209 210
    template<class RestartParam>
    ArnoldiDR(Logger<P> &logger, Restarter<RestartParam, S> &restarter,
211 212
            int dim, int nbRHS, int max_krylov_space_size):
        _logger(logger),
213
        _hess{max_krylov_space_size, nbRHS, _logger},
214 215 216
        _solution_computed(false),
        _base(restarter.get_base()),
        _dim(dim),
217 218
        _nbRHS(nbRHS),
        _size_krylov_space{0},
219 220
        _nb_mvp{0},
        _iteration_count{0},
221
        _cold_restart{false}
222
    {
223
        static_assert(
224
            hessenbergXrestarter<HESSENBERG, RestartParam>::value,
225 226
            "The Hessenberg used is not compatible with the kind "
            "of restarting selected"
227
        );
228 229 230 231 232 233 234 235 236
    }

    /**
     * \brief Arnoldi iterations
     *
     * Solution will be stored inside X at the end of computation.
     * Ref IB-BGMRES-DR (Giraud Jing Agullo): p.21
     *
     * \param[in] A User Matrix
237
     * \param[in,out] X initial guess (input), and best approximated solution (output)
238 239
     * \param[in] B the right hand sides
     * \param max_krylov_space_size Maximum size of Krylov Search space.
240 241
     * \param[in] epsilon tolerance (backward error) for residual
     * \param[in,out] restarter hold data to deflate at restart
242
     * \param orthoparm Orthogonalizer
243 244
     * or vector wise arnoldi. (In distributed, only the vector wise will works)
     *
245
     * \return whether the convergence was reached
246
     */
247 248
    template<class Matrix, class Restarter>
    bool run( const Matrix &A, Block<S> &X, const Block<S> &B,
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
249 250
              const int max_krylov_space_size, const std::vector<P> &epsilon,
              Restarter &restarter, OrthoParam &orthoparm )
251 252
    {
        print_header(max_krylov_space_size);
253

254
        bool convergence = initial_iteration(A, X, B, epsilon, restarter);
255

256
        while (!convergence && // Main Loop
257
               _size_krylov_space + _nbRHS <= max_krylov_space_size)
258
        {
259 260
            ++ _iteration_count;
            _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
261
            Block<S> Vj = _base.get_Vj(); // V_j
262
            assert( Vj.get_nb_col() == _nbRHS );
263
            Block<S> W = _base.get_W(_nbRHS); // W (candidate)
264

265
            W_AMV(A, Vj, W); // W := A*M*V_j
266
            do_ortho(A, orthoparm, W);
267 268
            _base.increase(_nbRHS); // add W to base

269
            //_hess.display_hess_bitmap("Hess Before Least Square");
270
            auto MinMaxConv = _hess.check_least_square_residual(epsilon);
271
            bool proj_convergence = std::get<2>(MinMaxConv);
272
            convergence = check_real_residual_if(proj_convergence, A, B, X, epsilon);
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
273 274
            //check_arnoldi_formula(A, _base, _hess, _nbRHS);

275
            _size_krylov_space = _base.get_nb_vect();
276
            _logger.notify_iteration_end(
277
                _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
278
            _logger.log_real_residual(A, B, X, _base, _hess);
279
        }
280 281 282 283

        if (_solution_computed)
            X.copy(_solution);
        else if (_iteration_count)
284
            compute_solution(A, X);
285

286
        if (_cold_restart) {
287
            FABULOUS_WARNING("cold restart");
288
            _base.reset();
289
        } else if (restarter && !convergence) {
290
            Block<S> PR{_hess.get_nb_vect() + _nbRHS, _nbRHS};
291 292 293
            Block<S> Y = _hess.alloc_least_square_sol();
            _hess.solve_least_square(Y);
            _hess.compute_proj_residual(PR, Y);
294
            restarter.save_LS(PR);
295
            Block<S> H = _hess.get_hess_block();
296 297
            restarter.save_hess(H);
        }
298

299
        print_footer();
MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
300
        _logger.set_convergence(convergence);
301
        return convergence;
302
    }
303

304
};
305

306 307
} // end namespace bgmres
} // end namespace fabulous
308

309
#endif // FABULOUS_ARNOLDI_DR_HPP