Commit 70e62ca4 authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

plot scripts+little bugfixes

parent 7dbb1f1f
......@@ -1471,7 +1471,7 @@ FORMULA_TRANSPARENT = YES
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.
USE_MATHJAX = NO
USE_MATHJAX = YES
# When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see:
......
#This script display convergence (i.e only residuals) of 3 files given
#in parameters
#To move the legend
set key center top
set term png size 1600,900
set output "Output_IB.png"
set title "IB-BGMRes, Residual" font ",14"
set tmargin 30
set lmargin 15
set rmargin 15
set xlabel "Number of Mat Vect Product" font ",8"
set xtics nomirror
set y2tics
set ylabel "Residual norm"
set y2label "Size of Block"
set y2range [0:7]
set tmargin 2
set title "Min Max Residuals" font ",11"
set logscale y
plot filename2 using 2:4 with l title "Min Res IB + CGS RUHE",\
filename2 using 2:5 with l title "Max Res IB + CGS RUHE",\
filename2 using 2:6 with l title "Min Real Res IB + CGS RUHE",\
filename2 using 2:7 with l title "Max Real Res IB + CGS RUHE",\
filename3 using 2:4 with l title "Min Res IB + CGS BLOCK",\
filename3 using 2:5 with l title "Max Res IB + CGS BLOCK",\
filename3 using 2:6 with l title "Min Real Res IB + CGS BLOCK",\
filename3 using 2:7 with l title "Max Real Res IB + CGS BLOCK",\
filename2 using 2:3 axes x1y2 with l title "RUHE : Size of Block",\
filename3 using 2:3 axes x1y2 with l title "BLOCK : Size of Block"
#This script display convergence (i.e only residuals) of 3 files given
#in parameters
#To move the legend
set key center top
set term png size 1600,900
set output "Output_IB.png"
set title "IB-BGMRes, Residual" font ",14"
set tmargin 30
set lmargin 15
set rmargin 15
set xlabel "Number of Mat Vect Product" font ",8"
set xtics nomirror
set y2tics
set ylabel "Residual norm"
set y2label "Size of Block"
set y2range [0:7]
set tmargin 2
set title "Min Max Residuals" font ",11"
set logscale y
plot filename2 using 2:(($5-$4)/2) with l title "Average Arnoldi residual IB CGS RUHE",\
filename2 using 2:(($6-$7)/2) with l title "Real Arnoldi residual IB CGS RUHE",\
filename3 using 2:(($5-$4)/2) with l title "Average Arnoldi residual IB CGS BLOCK",\
filename3 using 2:(($6-$7)/2) with l title "Real Arnoldi residual IB CGS BLOCK",\
filename2 using 2:3 axes x1y2 with l title "RUHE : Size of Block",\
filename3 using 2:3 axes x1y2 with l title "BLOCK : Size of Block"
#!/usr/bin/env gnuplot
#fake plot in order to sort after and get max size.
plot filename using 1:3 with impulses linewidth 2
# filename="young1c_randomRHS_QR.res"
# filename2="young1c_RANDOM_RHS__CHAM_DESC_BLOCK.res"
#to get the max
VAR = GPVAL_DATA_Y_MAX
# to summ the time over iterations
a=0
cumul_sum(x)=(a=a+x,a)
#To move the legend
set key center top
set term png size 1600,1100
set output "Output_multi.png"
#set multiplot layout 2, 1 title "IB-BGMRes, Residual and Block Size" font ",20"
set tmargin 60
set lmargin 15
set rmargin 15
set xtics nomirror
set ylabel "Residual norm" font ",14"
set tmargin 2
set title "Min Max Residuals" font ",16"
set logscale y
plot filename using 2:4 with l title "Min Res QR",\
filename using 2:5 with l title "Max Res QR"
# filename2 using 3:6 with l title "Min Res QR Cham",\
# filename2 using 3:7 with l title "Max Res QR Cham"
#unset ylabel
#set ylabel "Block Size" font ",14"
#set xlabel "Number of Mat Vect Product" font ",14"
#set y2label "Time" font ",14"
#set y2tics
#set title "Block Size and Elapsed Time" font ",16"
#unset logscale y
#plot filename using 2:(cumul_sum($8)) with lp axes x1y2 title "Cumulative time spent" ,\
# filename using 2:3 with impulses title "SizeBlock" axes x1y1 lw 2
#
# set style histogram columns
# set style fill solid
# set key autotitle column
# set boxwidth 0.8
# set format y " "
# set tics scale 0
# set title "Plot 3"
# plot 'immigration.dat' using 2 with histograms, \
# '' using 7 with histograms , \
# '' using 8 with histograms , \
# '' using 11 with histograms
#
#unset multiplot
#!/usr/bin/env Rscript
args <- commandArgs(TRUE)
if (length(args) < 1) {
stop("need filename as parameter")
}
library(ggplot2)
data <- read.table(args[1], header=T)
if (length(args) >= 2) {
for (i in 2:length(args)) {
data <- rbind(data, read.table(args[i], header=T))
}
}
p <- ggplot(data, aes(nb_mvp), log10='y') +
geom_line(aes(y=minRes, color=ID)) +
geom_line(aes(y=maxRes, color=ID))
p + scale_y_log10()
......@@ -68,13 +68,17 @@ public:
int N = Q.get_nb_col();
int LD = Q.get_leading_dim();
assert( R.get_nb_col() == Q.get_nb_col() );
assert( R.get_nb_row() == Q.get_nb_col() );
if ( R.get_nb_col() != 0 ) {
assert( R.get_nb_col() == Q.get_nb_col() );
assert( R.get_nb_row() == Q.get_nb_col() );
}
std::vector<T> tau;
LapackKernI::geqrf(M, N, Q.get_ptr(), LD, tau);
LapackKernI::lacpy('U', M, N, Q.get_ptr(), LD,
R.get_ptr(), R.get_leading_dim());
if (R.get_nb_col() != 0) {
LapackKernI::lacpy( 'U', M, N, Q.get_ptr(), LD,
R.get_ptr(), R.get_leading_dim());
}
LapackKernI::orgqr(M, N, N, Q.get_ptr(), LD, tau.data());
}
};
......
......@@ -60,13 +60,16 @@ private:
HessStandard<S> _hess;
bool _solution_computed;
Block<S> _solution;
Block<S> _lastY;
Base<S> &_base;
int _dim;
int _nbRHS;
int _size_krylov_space;
bool _cold_restart;
private:
template< class Matrix, class Block, class Base >
void X_plus_MVY(Matrix &A, Block &X, Base &V, Block &Y)
void X_plus_MVY(Matrix &A, Block &X, Base &V, const Block &Y)
{
int dim = V.get_nb_row();
Block MVY{dim, Y.get_nb_col()};
......@@ -85,7 +88,7 @@ private:
}
template<class Matrix, class Block>
void compute_real_residual(Matrix &A, Block &X, const Block &B, Block &R)
void compute_real_residual(Matrix &A, const Block &X, const Block &B, Block &R)
{
R.copy(B); // R <- B
A.MatBlockVect(X, R, S{-1.0}, S{1.0}); // R <- A*X-B
......@@ -106,31 +109,48 @@ private:
template<class Matrix, class Block>
void compute_solution(Matrix &A, Block &X)
{
Block Y = _hess.compute_least_square_sol();
int nb_vect = _hess.get_nb_vect();
Block YY{nb_vect+_nbRHS, _nbRHS};
_hess.compute_least_square_sol(YY);
Block Y = YY.sub_block(0, 0, nb_vect, _nbRHS);
X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
}
template<class Matrix, class Block>
std::tuple<bool,bool>
check_real_residual_if(bool projected_residual_converged,
Matrix &A, Block &X, const Block &B,
const std::vector<primary_type> &epsilon)
void compute_solution(Matrix &A, Block &X, const Block &Y)
{
X_plus_MVY(A, X, _base, Y); // X_n = X_0 + M*V*Y
}
template<class Matrix, class Block>
bool check_real_residual_if(bool projected_residual_converged,
Matrix &A, const Block &B,
Block &X, const Block &Y,
const std::vector<P> &epsilon)
{
if (!projected_residual_converged)
return std::make_tuple(false, true);
return false;
_solution = Block{_dim, _nbRHS};
_solution.copy(X);
compute_solution(A, _solution);
compute_solution(A, _solution, Y);
Block R{_dim, _nbRHS};
compute_real_residual(A, _solution, B, R); // R <- A*sol - B
if (std::get<2>(R.check_precision(epsilon, A))) {
auto MinMaxConv = R.check_precision(epsilon, A);
printf("real residual=(%g;%g)\n",
std::get<0>(MinMaxConv),
std::get<1>(MinMaxConv));
if (std::get<2>(MinMaxConv)) {
_solution_computed = true;
return std::make_tuple(true, true);
_cold_restart = false;
return true;
}
return std::make_tuple(false, false);
_cold_restart = true;
return false;
}
public:
......@@ -142,7 +162,9 @@ public:
_solution_computed(false),
_base(restarter.get_base()),
_dim(dim),
_nbRHS(nbRHS)
_nbRHS(nbRHS),
_size_krylov_space{0},
_cold_restart{false}
{
}
......@@ -174,61 +196,65 @@ public:
OrthoType ortho_type = OrthoType::RUHE )
{
print_header(max_krylov_space_size);
const int nbRHS = B.get_nb_col(); //Initial Size of block
Block R1{nbRHS, nbRHS}; // R1 = RHS of projected problem
Block R1{_nbRHS, _nbRHS}; // R1 = RHS of projected problem
bool convergence = false;
int size_krylov_space = 0, iteration_count = 0;
int iteration_count = 0;
if (restarter && _base.get_current_block() != 0) {
if (restarter && _base.get_nb_block() > 0) {
int k = restarter.run(R1);
restarter.restore_hess(_hess, k, nbRHS);
size_krylov_space = k;
restarter.restore_hess(_hess, k, _nbRHS);
_size_krylov_space = k;
std::cout<<Color::bold<<Color::yellow;
std::cout<<"\t\tDR done\n"<<Color::reset;
} else { //No DR, or first run of arnoldi
_logger.notify_iteration_begin(iteration_count, size_krylov_space);
Block R0 = _base.get_W(nbRHS);
_logger.notify_iteration_begin(iteration_count, _size_krylov_space);
Block R0 = _base.get_W(_nbRHS);
compute_real_residual(A, X, B, R0);
/* auto minmaxR0 = R0.get_min_max_norm(A); */
A.QRFacto(R0, R1);
auto MinMaxConv = R1.check_precision(epsilon);
_base.increase(nbRHS);
size_krylov_space = _base.get_nb_vect();
_base.increase(_nbRHS);
_size_krylov_space = _base.get_nb_vect();
convergence = std::get<2>(MinMaxConv);
_logger.notify_iteration_end(
iteration_count, size_krylov_space, nbRHS, MinMaxConv);
iteration_count, _size_krylov_space, _nbRHS, MinMaxConv);
}
_hess.set_rhs(R1);
bool residual_coherence = true;
while (!convergence && size_krylov_space < max_krylov_space_size) {
while (!convergence && _size_krylov_space < max_krylov_space_size) {
++ iteration_count;
_logger.notify_iteration_begin(iteration_count, size_krylov_space);
_logger.notify_iteration_begin(iteration_count, _size_krylov_space);
Block Vj = _base.get_Vj();
Block W = _base.get_W(nbRHS);
/* S *to_write = _hess.get_new_col(nbRHS);
if (to_write == nullptr) break; */
Block W = _base.get_W(_nbRHS);
W_AMV(A, Vj, W);
Arnoldi_Ortho(_hess, _base, W, ortho_type, ortho_scheme, A); // W.checkOrtho();
_base.increase(nbRHS);
size_krylov_space = _base.get_nb_vect();
//hess.template displayHessBitMap<P>("Hess Before Least Square");
// Tests over criterion
Block Yj = _hess.compute_least_square_sol();
Block LS = _hess.compute_proj_residual(Yj);
restarter.store_LS(LS);
auto MinMaxConv = LS.check_precision(epsilon);
Arnoldi_Ortho(_hess, _base, W, ortho_type, ortho_scheme, A);
// W.check_ortho();
_base.increase(_nbRHS);
_size_krylov_space = _base.get_nb_vect();
//_hess.display_hess_bitmap("Hess Before Least Square");
int nb_vect = _hess.get_nb_vect();
Block YY{nb_vect+_nbRHS, _nbRHS};
Block Y = YY.sub_block(0, 0, nb_vect, _nbRHS);
Block res = YY.sub_block(nb_vect, 0, _nbRHS, _nbRHS);
_hess.compute_least_square_sol(YY);
_lastY = Y;
auto MinMaxConv = res.check_precision(epsilon);
bool proj_convergence = std::get<2>(MinMaxConv);
std::tie(convergence, residual_coherence)
= check_real_residual_if(proj_convergence, A, X, B, epsilon);
Block PR{_hess.get_nb_vect()+_nbRHS, _nbRHS};
_hess.compute_proj_residual(Y, PR);
restarter.store_LS(PR);
convergence = check_real_residual_if(proj_convergence, A, B, X, Y, epsilon);
_logger.notify_iteration_end(
iteration_count, size_krylov_space, nbRHS, MinMaxConv);
_logger.log_real_residual(A, B, X, _base, Yj);
iteration_count, _size_krylov_space, _nbRHS, MinMaxConv);
_logger.log_real_residual(A, B, X, _base, Y);
// check_arnoldi_formula(A, _base, _hess);
}
......@@ -236,16 +262,13 @@ public:
if (_solution_computed)
X.copy(_solution);
else
compute_solution(A, X);
if (!convergence && !residual_coherence) {
// if Arnoldi residual is different from real residual, cold Restart
compute_solution(A, X, _lastY);
if (_cold_restart)
_base.reset();
}
print_footer(iteration_count);
print_footer(iteration_count);
restarter.save_hess(_hess);
return ArnReturn{size_krylov_space, iteration_count, convergence};
return ArnReturn{_size_krylov_space, iteration_count, convergence};
}
};
......
......@@ -39,8 +39,8 @@ private:
void print_header(int nb_mvp, std::ostream &o = std::cout)
{
o << "#######################################################\n";
o << "################## Arnoldi IB ### ###################\n";
o << "######## Mat Vect product scheduled: "<< nb_mvp <<" ###########\n";
o << "#################### Arnoldi IB #######################\n";
o << "######### Mat Vect product scheduled: "<< nb_mvp <<" ###########\n";
}
void print_footer(std::ostream &o = std::cout)
......@@ -129,12 +129,12 @@ private:
_solution = Block{_dim, _nbRHS};
_solution.copy(X);
compute_solution(A, _solution, Y);
Block R{_dim, _nbRHS};
compute_real_residual(A, _solution, B, R); // R <- A*sol - B
if (std::get<2>(R.check_precision(epsilon, A))) {
auto MinMaxConv = R.check_precision(epsilon, A);
if (std::get<2>(MinMaxConv)) {
_solution_computed = true;
return true;
}
......@@ -155,25 +155,26 @@ private:
}
int nj = _block_size_sum.back();
Block U1_2 = U.sub_block(nj, 0, _nbRHS, _nb_direction_kept); // CHECKME;
Block U1_2 = U.sub_block(nj, 0, _nbRHS, _nb_direction_kept);
Block R{_nbRHS, _nbRHS};
Block W1_2{_nbRHS, _nbRHS};
Block W1 = W1_2.sub_block(0, 0, _nbRHS, _nb_direction_kept); // CHECKME;
Block W2 = W1_2.sub_block(0, 0, _nbRHS, _nb_direction_discarded); // CHECKME;
Block R{};
Block W1_W2{_nbRHS, _nbRHS};
Block W1, W2;
W1 = W1_W2.sub_block(0, 0, _nbRHS, _nb_direction_kept);
W2 = W1_W2.sub_block(0, _nb_direction_kept, _nbRHS, _nb_direction_discarded);
U1_2.OutOfPlaceQRFacto(W1_2, R); // Qr facto of bottom block of U
L.update_phi(W1_2, _nb_direction_kept);
U1_2.OutOfPlaceQRFacto(W1_W2, R); // Qr facto of bottom block of U
L.update_phi(W1_W2, _nb_direction_kept);
// Update Phi with \W_1 and \W_2, doc Annexe Eq 2.7
Block V = _base.get_W(_nb_direction_kept);
WP.update_V( W1, V ); // V_{j+1} = [P_{j-1},~Wj] * \W_1
WP.compute_V( W1, V ); // V_{j+1} = [P_{j-1},~Wj] * \W_1
WP.update_P( W2 ); // Pj = [P_{j-1},~Wj] * \W_2
_base.increase(_nb_direction_kept);
// L_{j+1,} = | \W_1^{H} | * H_j
// Gj = | \W_2^{H} | * H_j
L.update_bottom_line(W1_2);
L.update_bottom_line(W1_W2);
}
void handle_IB_on_R0( Block<S> &U, BlockWP<S> &WP,
......@@ -399,21 +400,22 @@ public:
// Ortho and filling of L (the (IB) Hessenberg)
Arnoldi_Ortho(L, _base, WP, ortho_type, ortho_scheme, A);
WP.check_ortho_WP();
WP.get_W().check_ortho();
// WP.check_ortho_WP();
// WP.get_W().check_ortho();
//L.display_hess_extended_bitmap();
Block PR{L.get_nb_hess_line(), _nbRHS};
Block YY{L.get_nb_hess_line(), _nbRHS};
Block Y = YY.sub_block(0, 0, L.get_nb_vect(), _nbRHS);
Block res = YY.sub_block(L.get_nb_vect(), 0, _nbRHS, _nbRHS);
_last_Y = Y;
// Y is the solution of LeastSquare problem: Y = argmin(||F*Y-\Lambda||)
// PR is the projected residual PR <- F*Y-\Lambda
L.compute_proj_residual(YY, PR);
auto MinMaxConv = PR.check_precision(epsilon);
//auto MinMaxConv = PR.check_precision(epsilon);
auto MinMaxConv = res.check_precision(epsilon);
bool proj_convergence = std::get<2>(MinMaxConv);
convergence = check_real_residual_if(proj_convergence, A, X, Y, B, epsilon);
......
......@@ -21,7 +21,7 @@ void ArnoldiRuhe_CGS(Matrix &A, Base<S> &base,
int W_size = W.get_nb_col();
int dim = base.get_nb_row();
int nb_vect_in_base = base.get_nb_vect();
int block_size = base.get_block_size(base.get_current_block());
int block_size = base.get_block_size(base.get_nb_block()-1);
assert(W_size == block_size);
......@@ -80,7 +80,7 @@ void ArnoldiRuhe_ICGS(Matrix &A, Base<S> &base,
int W_size = W.get_nb_col();
int dim = base.get_nb_row();
int nb_vect_in_base = base.get_nb_vect();
int block_size = base.get_block_size(base.get_current_block());
int block_size = base.get_block_size(base.get_nb_block()-1);
assert(W_size == block_size);
int ldw = W.get_leading_dim();
......
......@@ -34,7 +34,7 @@ public:
*/
Base(int dim, int max_krylov_space_size):
Block<S>{dim, max_krylov_space_size},
_current_block{-1},
_current_block{0},
_nb_vect{0},
_max_krylov_space_size{max_krylov_space_size}
{
......@@ -46,15 +46,14 @@ public:
*/
void reset()
{
_current_block = -1;
_current_block = 0;
_nb_vect = 0;
_block_sizes.clear();
_block_sizes_sum.clear();
_block_sizes_sum.emplace_back(0);
}
int get_current_block() const { return _current_block; }
int get_nb_block() const { return _current_block+1; }
int get_nb_block() const { return _current_block; }
int get_nb_vect() const { return _nb_vect; }
int get_block_size(int k) const { return _block_sizes[k]; }
......
......@@ -130,8 +130,11 @@ public:
inline int get_nb_col() const { return _n; }
inline int get_leading_dim() const { return _ld; }
inline S *get_ptr(int i = 0, int j = 0) { return _ptr + j*_ld + i; }
inline const S *get_ptr(int i = 0, int j = 0) const { return _ptr + j*_ld + i; }
inline S *get_ptr() { return _ptr; }
inline const S *get_ptr() const { return _ptr; }
inline S *get_ptr(int i, int j) { return _ptr + j*_ld + i; }
inline const S *get_ptr(int i, int j) const { return _ptr + j*_ld + i; }
inline S *get_vect(int j = 0) { return get_ptr(0, j); }
inline const S *get_vect(int j = 0) const { return get_ptr(0, j); }
......@@ -142,9 +145,9 @@ public:
/* \brief return norm of k-th vector inside the Block
*/
template < class Matrix >
P get_norm(int k, Matrix &A)
P get_norm(int k, Matrix &A) const
{
S *ptr = get_vect(k);
const S *ptr = get_vect(k);
S res = S{0.0};
A.DotProduct(1, 1, ptr, _ld, ptr, _ld, &res, 1);
return std::sqrt(fabulous::real(res));
......@@ -152,9 +155,9 @@ public:
/* \brief return norm of k-th vector inside the Block
*/
P get_norm(int k)
P get_norm(int k) const
{
S *ptr = get_vect(k);
const S *ptr = get_vect(k);
S res = S{0.0};
LapackKernI::dot(_m, ptr, 1, ptr, 1, &res);
return std::sqrt(fabulous::real(res));
......@@ -163,7 +166,7 @@ public:
/* @ brief compute norms of vector in the Block
*/
template< class Matrix >
std::vector<P> compute_norms(Matrix &A)
std::vector<P> compute_norms(Matrix &A) const
{
std::vector<P> norms;
norms.resize(_n);
......@@ -362,14 +365,14 @@ public:
/**
* \brief Wrapper for the other DecompositionSVD method
*/
int decomposition_SVD(Block &output,
int decomposition_SVD(Block &U1,
const std::vector<P> &epsilon,
const std::vector<P> &inv_epsilon)
{
if (epsilon.size() == 1)
return decomposition_SVD(output, epsilon.front());
return decomposition_SVD(U1, epsilon.front());
scale(inv_epsilon);
return decomposition_SVD(output, P{1.0});
return decomposition_SVD(U1, P{1.0});
}
/**
......@@ -416,13 +419,12 @@ public:
while (nb_krylov_direction < _n && sigma[nb_krylov_direction] > threshold)
++ nb_krylov_direction;
// Test if U is already allocated
if (U.get_nb_col() == 0)
U.realloc(_m, nb_krylov_direction);
// Test if U1 is already allocated
if (U1.get_nb_col() == 0)
U1.realloc(_m, nb_krylov_direction);
// Fill the U with vectors from U
Block UU = U.sub_block(0, 0, _m, nb_krylov_direction);
U1.copy(UU);
U1.copy(U);
return nb_krylov_direction;
}
......
......@@ -91,7 +91,7 @@ public:
* \param[in] W1 \f$ \mathbb{W}_1 \f$
* \param[out] V \f$ V_{j+1} \f$
*/
void update_V(const Block<S> &W1, Block<S> &V) const
void compute_V(const Block<S> &W1, Block<S> &V) const
{
int nb_kept_direction = W1.get_nb_col();
LapackKernI::gemm( // V_{j+1} := [P_{j-1},~W] * \W_1
......
......@@ -71,7 +71,8 @@ public:
operator bool () { return true; }
Base<S> &get_base() { return _base; }
void store_LS(Block<S> &LS) { _LS = LS; }
void store_LS(Block<S> &LS) { _LS = LS; }
template< class HESS > void save_hess(HESS &hess) { _hess.store_hess(hess); }
template< class HESS > void restore_hess(HESS &hess, int k, int nbRHS)
{
......@@ -90,7 +91,6 @@ public:
double tic = Timer::get_time();
int p = _LS.get_nb_col();
int nm = _base.get_nb_vect() - p;
int ldh = _hess.get_nb_row();
int dim = _base.get_nb_row();
std::cout<<"About to DR ################## 2 !!!\n"
......@@ -101,63 +101,60 @@ public:
<<"\tnm :: "<<nm<<"\n"
<<"\tnbCol in Hess :: "<<_hess.get_nb_col()<<"\n"
<<"\t_Base nbCols :: "<<_base.get_nb_vect()<<"\n"
<<"\tldh :: "<<ldh<<"\n"
<<"\tldh :: "<<_hess.get_leading_dim()<<"\n"
<<"\tdim :: "<<dim<<"\n"
<<"\tLS : leading dim : "<<_LS.get_nb_row()<<"\n"
<<std::endl;
//Compute 2 blocks to hold A and B for computing the
// generalized eigen value problem
Block<S> A{nm,nm};
Block<S> B{nm,nm};
//A <- Hm_^{h} * Hm_
LapackKernI::Tgemm(nm,nm,nm+p,
_hess.get_ptr(),ldh,
_hess.get_ptr(),ldh,
A.get_ptr(),A.get_nb_row(),
S(1),S(0));
Block<S> A{nm, nm};
Block<S> B{nm, nm};
LapackKernI::Tgemm( // A <- Hm_^{h} * Hm_
nm, nm, nm+p,
_hess.get_ptr(), _hess.get_leading_dim(),
_hess.get_ptr(), _hess.get_leading_dim(),
A.get_ptr(), A.get_leading_dim(),
S{1.0}, S{0.0}
);
//B <- Hm^{h}
for (int i=0; i<nm; ++i)
for (int j=0; j<nm; ++j)
B.get_ptr(i)[j] = std::conj(_hess.get_ptr()[j*ldh + i]);
for (int j = 0; j < nm; ++j)
for (int i = 0; i < nm; ++i)
B.at(i,j) = std::conj(_hess.at(i, j));
Block<S> vl{1,0}; // hold left eigen vectors that won't be computed
Block<S> vr{nm,nm}; // hold right eigen vectors before sorting them
Block<S> vleft{1, 0}; // hold left eigen vectors that won't be computed
Block<S> vright{nm, nm}; // hold right eigen vectors before sorting them
/* complex alpha in order to store the eigen value that
* might be complex even in real case */
::fabulous::Block<std::complex<P>> alpha{1,nm};
Block<S> beta{1,nm};
{//call to lapack kernel
int err = LapackKernI::ggev(
nm,
A.get_ptr(), A.get_leading_dim(),
B.get_ptr(), B.get_leading_dim(),
alpha.get_ptr(), beta.get_ptr(),
vl.get_ptr(), vl.get_leading_dim(),
vr.get_ptr(), vr.get_leading_dim());
if (err != 0)
FABULOUS_FATAL_ERROR("Error in ggev ... err="<<err);
}
::fabulous::Block<std::complex<P>> alpha{nm, 1};
Block<S> beta{nm, 1};
int err = LapackKernI::ggev(
nm,
A.get_ptr(), A.get_leading_dim(),
B.get_ptr(), B.get_leading_dim(),
alpha.get_ptr(), beta.get_ptr(),
vleft.get_ptr(), vleft.get_leading_dim(),
vright.get_ptr(), vright.get_leading_dim()
);
if (err != 0)
FABULOUS_FATAL_ERROR("Error in ggev ... err="<<err);