ArnoldiIB.hpp 16.3 KB
Newer Older
1 2
#ifndef FABULOUS_ARNOLDI_IB_HPP
#define FABULOUS_ARNOLDI_IB_HPP
3

4
namespace fabulous {
5
template<template<class> class HESSENBERG, class S> class ArnoldiIB;
6 7
};

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
8 9 10 11
#include "fabulous/data/Base.hpp"
#include "fabulous/data/BlockWP.hpp"
#include "fabulous/utils/Utils.hpp"
#include "fabulous/utils/Logger.hpp"
12
#include "fabulous/utils/TypeInfo.hpp"
13

MIJIEUX Thomas's avatar
MIJIEUX Thomas committed
14
namespace fabulous {
15 16 17 18 19 20 21 22

/**
 * \brief %Arnoldi iterations with inexact breakdowns
 *
 * GELS kernel is used to solve the LeastSquare problem on the projected matrix
 *
 * \warning This class does NOT support DeflatedRestarting (not implemented)
 */
23 24
template < template<class> class HESSENBERG, class S >
class ArnoldiIB
25
{
26
    static_assert(
27 28
        arnoldiXhessenberg<fabulous::ArnoldiIB, HESSENBERG>::value,
        "ArnoldiIB cannot be combined with this Hessenberg"
29
    );
30 31 32 33 34 35 36 37 38 39
public:
    using value_type = typename Arithmetik<S>::value_type;
    using primary_type = typename Arithmetik<S>::primary_type;
    using P = primary_type;

private:

    void print_header(int nb_mvp, std::ostream &o = std::cout)
    {
        o << "#######################################################\n";
40 41
        o << "#################### Arnoldi IB #######################\n";
        o << "######### Mat Vect product scheduled: "<< nb_mvp <<" ###########\n";
42
    }
43

44 45 46 47 48
    void print_footer(std::ostream &o = std::cout)
    {
        o << "################# Iterations done: "<<_iteration_count<<"(+1)\n";
        o << "#######################################################\n";
    }
49

50 51
private:
    Logger<P> &_logger;
52
    HESSENBERG<S> _F;
53 54 55 56 57
    bool _solution_computed;
    Block<S> _solution;
    Block<S> _last_Y;
    Base<S> &_base;

58
    // GENERIC
59 60
    int _dim;
    int _nbRHS;
61
    int _nb_mvp;
62 63 64 65 66 67 68 69 70 71 72 73 74
    int _size_krylov_space;
    int _iteration_count;

    // IB SPECIFIC:
    int _nb_direction_kept;
    int _old_nb_direction_kept;
    int _nb_direction_discarded;
    std::vector<int> _block_size_sum;
    bool _IB_happened;

    // DR specific
    bool _cold_restart;

75 76 77 78 79
public:
    int get_krylov_space_size() const { return _size_krylov_space; }
    int get_nb_mvp() const { return _nb_mvp; }
    int get_nb_iteration() const { return _iteration_count; }

80 81 82 83 84 85 86 87 88 89 90 91 92 93
private:
    template< class Matrix, class Block, class Base >
    void X_plus_MVY(Matrix &A, Block &X, Base &V, Block &Y)
    {
        int dim = V.get_nb_row();
        Block MVY{dim, Y.get_nb_col()};

        if (A.useRightPrecond()) {
            Block VY{dim, Y.get_nb_col()};
            V.compute_product(Y, VY); // VY =  V*Y
            A.PrecondBlockVect(VY, MVY); // MVY = M*V*Y
        } else {
            V.compute_product(Y, MVY); // MVY = V*Y
        }
94

95 96 97 98
        // X_n = X_0 + MVY
        for (int j = 0; j < X.get_nb_col(); ++j)
            LapackKernI::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
    }
99

100
    template<class Matrix, class Block>
101
    void compute_real_residual(Matrix &A, Block &X, const Block &B, Block &R)
102
    {
103 104 105 106 107
        R.copy(B); // R <- B
        _logger.notify_mvp_begin();
        A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- B-A*X
        _logger.notify_mvp_end();
        _nb_mvp += X.get_nb_col();
108
    }
109

110 111 112 113 114
    template< class Matrix, class Block, class BlockWP  >
    void W_AMV(Matrix &A, Block &V, BlockWP &WP) // W = A*M*V
    {
        Block W = WP.get_W();

115
        _logger.notify_mvp_begin();
116 117 118 119 120 121 122
        if (A.useRightPrecond()) {
            Block MV{W.get_nb_row(), V.get_nb_col()};
            A.PrecondBlockVect(V, MV); // MV = M * V
            A.MatBlockVect(MV, W); // B = A*MV
        } else {
            A.MatBlockVect(V, W); //  W = A * V
        }
123
        _logger.notify_mvp_end();
124
        _nb_mvp += V.get_nb_col();
125 126
    }

127
    template<class Matrix, class Block>
128
    void compute_solution(Matrix &A, Block &X)
129
    {
130 131 132
        Block Y = _F.alloc_least_square_sol();
        _F.solve_least_square(Y);
        X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
133 134
    }

135 136
    template<class Matrix, class Block>
    bool check_real_residual_if(bool projected_residual_converged,
137
                                Matrix &A, Block &X, const Block &B,
138 139 140 141
                                const std::vector<primary_type> &epsilon)
    {
        if (!projected_residual_converged)
            return false;
142

143 144
        _solution = Block{_dim, _nbRHS};
        _solution.copy(X);
145
        compute_solution(A, _solution);
146 147 148

        Block R{_dim, _nbRHS};
        compute_real_residual(A, _solution, B, R); // R <- A*sol - B
149
        auto MinMaxConv = R.check_precision(epsilon, A);
150 151 152 153 154 155
        #ifdef FABULOUS_DEBUG_MODE
        auto min = std::get<0>(MinMaxConv);
        auto max = std::get<1>(MinMaxConv);
        printf("real residual=(%g;%g)\n", min, max);
        #endif

156
        if (std::get<2>(MinMaxConv)) {
157
            _solution_computed = true;
158
            _cold_restart = false;
159 160
            return true;
        }
161
        _cold_restart = true;
162 163 164 165 166
        return false;
    }

    template < class Block>
    void handle_IB(Block &U, BlockWP<S> &WP)
167
    {
168 169 170 171 172 173 174 175 176
        assert( U.get_nb_col() == _nb_direction_kept );

        if (_nb_direction_kept != _old_nb_direction_kept) {
            std::cout<<Color::warning
                     <<"### Inexact Breakdown Occured!! ###\n"
                     <<Color::reset;
            std::cout<<"Nb direction kept: "<<_nb_direction_kept<<"/"<<_nbRHS<<"\n";
            _old_nb_direction_kept = _nb_direction_kept;
        }
177

178
        int nj = _block_size_sum.back();
179
        Block U1_2 = U.sub_block(nj, 0, _nbRHS, _nb_direction_kept);
180

181 182 183 184 185
        Block R{};
        Block W1_W2{_nbRHS, _nbRHS};
        Block W1, W2;
        W1 = W1_W2.sub_block(0, 0, _nbRHS, _nb_direction_kept);
        W2 = W1_W2.sub_block(0, _nb_direction_kept, _nbRHS, _nb_direction_discarded);
186

187
        U1_2.OutOfPlaceQRFacto(W1_W2, R); // Qr facto of bottom block of U
188
        _F.update_phi(W1_W2, _nb_direction_kept);
189
        // Update Phi with \W_1 and \W_2, doc Annexe Eq 2.7
190

191
        Block V = _base.get_W(_nb_direction_kept);
192
        WP.compute_V( W1, V ); // V_{j+1} = [P_{j-1},~Wj] * \W_1
193 194
        WP.update_P( W2 ); // Pj = [P_{j-1},~Wj] * \W_2
        _base.increase(_nb_direction_kept);
195

196 197
        // L_{j+1,} = | \W_1^{H} | * H_j
        // Gj       = | \W_2^{H} | * H_j
198
        _F.update_bottom_line(W1_W2);
199
    }
200

201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
    void handle_IB_on_R0( Block<S> &U, BlockWP<S> &WP,
                          const Block<S> &Q0, const Block<S> &Lambda0)
    {
        std::cout<<Color::yellow<<Color::bold;
        std::cout<<"\t\tINEXACT BREAKDOWN on R0\n";
        std::cout<<"\t\tNb direction kept: "<<_nb_direction_kept<<"/"<<_nbRHS;
        std::cout<<Color::reset<<std::endl;

        int M = U.get_nb_row();
        Block<S> U1 = U.sub_block(0, 0, M, _nb_direction_kept);
        Block<S> U2 = U.sub_block(0, _nb_direction_kept, M, _nb_direction_discarded);

        Block<S> V1 = _base.get_W(_nb_direction_kept);
        LapackKernI::gemm( // V_1 := Q_0 * \U_1
            _dim, _nb_direction_kept, _nbRHS,
            Q0.get_ptr(), Q0.get_leading_dim(),
            U1.get_ptr(), U1.get_leading_dim(),
            V1.get_ptr(), V1.get_leading_dim(),
            S{1.0}, S{0.0}
        );
        _base.increase(_nb_direction_kept);

        WP.increase_size_P(_nb_direction_discarded);
        Block<S> P = WP.get_P();
        LapackKernI::gemm( // P_0 := Q_0 * \U_2
            _dim, _nb_direction_discarded, _nbRHS,
            Q0.get_ptr(), Q0.get_leading_dim(),
            U2.get_ptr(), U2.get_leading_dim(),
            P.get_ptr(), P.get_leading_dim(),
            S{1.0}, S{0.0}
        );

        Block<S> Lambda1{_nbRHS, _nbRHS};
        LapackKernI::Tgemm( // \Lambda_1 := U^H * \Lambda_0
            _nbRHS, _nbRHS, _nbRHS,
            U.get_ptr(), U.get_leading_dim(),
            Lambda0.get_ptr(), Lambda0.get_leading_dim(),
            Lambda1.get_ptr(), Lambda1.get_leading_dim(),
            S{1.0}, S{0.0}
        );
241 242
        _F.init_lambda(Lambda1);
        _F.init_phi(_nb_direction_kept); // Create and initialize Phi (\Phi_1)
243
    }
244

245
    template< class Matrix, class Block, class BlockWP, class Epsilon >
246 247 248 249
    std::tuple<P,P,bool>
    inexact_breakdown_on_R0(
        const Matrix &A, Block &R0,
        const Epsilon &epsilon, const Epsilon &inv_epsilon,
250
        BlockWP &WP)
251 252 253 254 255 256 257 258 259 260 261 262 263
    {
        Block Q0 = R0;
        Block Lambda0{_nbRHS, _nbRHS};
        A.QRFacto(R0, Lambda0); // QR Decomposition of (R0)

        Block U{_nbRHS, _nbRHS};
        Block L0{_nbRHS, _nbRHS};

        L0.copy(Lambda0);
        _nb_direction_kept = L0.decomposition_SVD(U, epsilon, inv_epsilon);
        _old_nb_direction_kept = _nb_direction_kept;
        _nb_direction_discarded = _nbRHS - _nb_direction_kept;

264
        // Test if algorithm need to be launched again
265 266 267 268 269
        if (_nb_direction_kept == 0) {
            _IB_happened = true;
            std::cout << "\t\tTOTAL INEXACT BREAKDOWN on R0\n";
            std::cout << "\t\tNb direction kept: 0 / " << _nbRHS << std::endl;
            return Lambda0.check_precision(epsilon);
270 271
        }

272 273 274 275 276 277 278 279 280 281
        if (_nb_direction_kept != _nbRHS)
            _IB_happened = true;

        if (_IB_happened) {
            handle_IB_on_R0(U, WP, Q0, Lambda0);
        } else {
            std::cout<<"\t\tNo INEXACT BREAKDOWN on R0\n";
            Block W = _base.get_W(_nbRHS);
            W.copy(Q0);
            _base.increase(_nbRHS);
282
            _F.init_lambda(Lambda0);
283 284 285
        }
        _block_size_sum.clear();
        _block_size_sum.push_back(_nb_direction_kept);
286
        _size_krylov_space = _base.get_nb_vect();
287 288 289 290 291
        return Lambda0.check_precision(epsilon);
    }

    template<class Matrix>
    void handle_no_convergence(
292
        int proj_convergence, BlockWP<S> &WP,
293 294 295
        const std::vector<P> &epsilon, const std::vector<P> &inv_epsilon,
        const Matrix &A, const Block<S> &X)
    {
296 297 298 299
        Block<S> Y = _F.alloc_least_square_sol();
        Block<S> PR{_F.get_nb_hess_line(), _nbRHS};
        _F.solve_least_square(Y);
        _F.compute_proj_residual(PR);
300

301 302 303 304 305 306 307
        Block<S> U;
        _nb_direction_kept = PR.decomposition_SVD(U, epsilon, inv_epsilon);
        _nb_direction_discarded = _nbRHS - _nb_direction_kept;

        if (_nb_direction_kept == 0) { // Suspicious ...
            if (!proj_convergence) { // Even more suspicious
                FABULOUS_FATAL_ERROR(
308 309
                    "Directions kepts during SVD of projected residual is "
                    " equal to 0, but residual itself has not converged. "
310 311 312 313 314 315 316
                    "That should not be possible."
                );
            } else {
                // Cold Restart: WARNING: Do not DR there because Arnoldi
                // relation would be false from the begining
                _solution = Block<S>{_dim, _nbRHS};
                _solution.copy(X);
317
                compute_solution(A, _solution);
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
                _solution_computed = true;
                _cold_restart = true;
            }
        }
        if (!_cold_restart) {
            if (_nb_direction_discarded > 0)
                _IB_happened = true;

            if (!_IB_happened) {
                Block<S> W = _base.get_W(_nbRHS);
                W.copy(WP);
                _base.increase(_nbRHS);
            } else {
                handle_IB(U, WP);
            }
        }

        int sum = U.get_nb_col()+_block_size_sum.back();
        _block_size_sum.push_back(sum);
        _size_krylov_space = _base.get_nb_vect();
    }

340 341 342 343 344 345 346 347
    std::tuple<P,P,bool>
    check_least_square_residual(const std::vector<P> &epsilon)
    {
        _logger.notify_least_square_check_begin();
        auto minmaxconv = _F.check_least_square_residual(epsilon);
        _logger.notify_least_square_check_end();
        return minmaxconv;
    }
348 349

public:
350 351
    template<class RestartParam>
    ArnoldiIB(Logger<P> &logger, Restarter<RestartParam, S> &restarter,
352 353
               int dim, int nbRHS, int max_krylov_space_size):
        _logger(logger),
354
        _F{max_krylov_space_size+1, nbRHS},
355 356 357 358
        _solution_computed{false},
        _base{restarter.get_base()},
        _dim(dim),
        _nbRHS(nbRHS),
359 360 361 362 363 364
        _nb_mvp{0},
        _size_krylov_space{0},
        _iteration_count{0},
        _nb_direction_kept{nbRHS},
        _old_nb_direction_kept{nbRHS},
        _nb_direction_discarded{0},
365 366 367
        _IB_happened{false},
        _cold_restart{false}
    {
368
        static_assert(
369
            hessenbergXrestarter<HESSENBERG, RestartParam>::value,
370 371
            "The Hessenberg used is not compatible with the kind of restarting selected"
        );
372 373 374 375 376 377 378 379 380
    }

    /**
     * \brief Arnoldi iterations with inexact breakdowns
     *
     * Solution will be stored inside X at the end of computation.
     * Ref: IB-BGMRES-DR (Giraud Jing Agullo): p.21
     *
     * \param[in] A User Matrix
381
     * \param[in,out] X initial guess (input). Best approx. solution (output)
382 383 384
     * \param[in] B the right hand sides
     * \param max_krylov_space_size maximum size for the Krylov search space
     * \param[in] epsilon tolerance for Residual
385
     * \param ortho Orthogonalizer
386 387
     * or vector wise arnoldi(RUHE). (In distributed, only the vector wise will works)
     *
388
     * \return whether the convergence was reached
389
     */
390 391 392
    template< class Matrix, class Restarter >
    bool run( Matrix &A, Block<S> &X, Block<S> &B,
              const int max_krylov_space_size,
393
              const std::vector<P>  &epsilon,
394
              Restarter&,
395
              Orthogonalizer &ortho)
396 397 398 399
    {
        bool convergence = false;
        print_header(max_krylov_space_size);
        _base.reset(); // DR not implemented ; we force a cold restart
400 401 402
        assert( _nbRHS == B.get_nb_col() ); // Initial Size of block
        assert( _dim == B.get_nb_row() );

403 404 405 406 407 408 409
        // IS_INVARIANT( _nb_direction_discarded + _nb_direction_kept == nbRHS );

        auto inv_epsilon = array_inverse(epsilon);
        BlockWP<S> WP{_dim, _nbRHS}; // Create Block to Store W (and P if IB happens)

        _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
        // STEP1: Compute first block residual
410
        Block<S> R0{_dim, _nbRHS};
411 412
        compute_real_residual(A, X, B, R0);
        // auto minmaxR0 =  R0.get_min_max_norm(A);
413
        auto MinMaxConv = inexact_breakdown_on_R0(A, R0, epsilon, inv_epsilon, WP);
414

415
        _logger.notify_iteration_end(
416
            _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
417
        _logger.log_real_residual(A, B, X);
418

419
        if (_nb_direction_kept == 0)
420
            return true;
421

422
        // Should not be true (because we already tested it):
423 424 425 426 427 428
        convergence = (_nb_direction_discarded == _nbRHS);

        while (!convergence && !_cold_restart &&
               _size_krylov_space < max_krylov_space_size)  // Main Loop
        {
            ++ _iteration_count;
429
            if (! _F.check_room( WP.get_size_W()) )
430
                break;
431

432
            _logger.notify_iteration_begin(_iteration_count, _size_krylov_space);
433
            Block<S> Vj = _base.get_Vj();
434
            W_AMV(A, Vj, WP); // W <- A*M*Vj: perform matrix multiplication
435

436
            // Ortho and filling of L (the (IB) Hessenberg)
437 438 439
            _logger.notify_ortho_begin();
            ortho.run(_F, _base, WP, A);
            _logger.notify_ortho_end();
440
            // WP.check_ortho_WP();// WP.get_W().check_ortho();
441
            //_F.display_hess_extended_bitmap();
442 443 444

            // Y is the solution of LeastSquare problem: Y = argmin(||F*Y-\Lambda||)
            // PR is the projected residual PR <- F*Y-\Lambda
445
            auto MinMaxConv = check_least_square_residual(epsilon);
446
            bool proj_convergence = std::get<2>(MinMaxConv);
447
            convergence = check_real_residual_if(proj_convergence, A, X, B, epsilon);
448 449

            if (!convergence) {
450 451
                handle_no_convergence(proj_convergence, WP,
                                      epsilon, inv_epsilon, A, X);
452
            }
453 454
            _size_krylov_space = _base.get_nb_vect();
            _logger.notify_iteration_end(
455
                _iteration_count, _size_krylov_space, _nb_mvp, MinMaxConv);
456
            _logger.log_real_residual(A, B, X, _base, _F);
457
        }
458 459 460

        if (_solution_computed)
            X.copy(_solution);
461
        else if (_iteration_count)
462
            compute_solution(A, X);
463

464 465 466
        if (_cold_restart)
            FABULOUS_WARNING("WOULD HAVE COLD RESTART");

467 468 469
        #ifdef FABULOUS_DEBUG_MODE
        if (convergence) {
            auto mm = eval_current_solution(A, B, X);
470
            printf("real residual=(%e;%e)\n", mm.first, mm.second);
471 472 473 474
        }
        #endif

        print_footer();
475
        return convergence;
476 477 478
    }
};

479 480
}; // namespace fabulous

481
#endif // FABULOUS_ARNOLDI_IB_HPP