Commit d12d53c7 authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

orthogonalization flops: compilation fix

parent 0abf67bc
......@@ -168,8 +168,8 @@ cd ${WORKDIR}/build/src/test_cham/
#+begin_src R :results output graphics :file (org-babel-temp-file "figure" ".png") :exports both :width 600 :height 400 :session
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/QR.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/CHAMQR.res", header=T))
df <- read.csv2("./build/src/data/res/QR.res", header=T)
df <- rbind(df, read.csv2("./build/src/data/res/CHAMQR.res", header=T))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=maxRes, color=name)) +
geom_line(aes(y=minRes, color=name)) +
......
......@@ -13,15 +13,26 @@
:END:
** Setup workstation
To be able to run the following test cases, you must have compiled fabulous:
#+begin_src shell
# optionnally: load mkl into environement:
For this, you may possibly need load mkl into environement:
#+BEGIN_SRC shell
source /opt/intel/mkl/bin/mklvars.sh intel64
#+END_SRC
Setup the build system:
#+BEGIN_SRC shell
mkdir -p ${WORKDIR}/build
cd ${WORKDIR}/build
cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DFABULOUS_DEBUG_MODE=ON \
-DFABULOUS_LAPACKE_NANCHECK=OFF # -DCHAMELEON_DIR=$(spack location -i chameleon)
make
#+END_SRC
Eventually compile fabulous:
#+BEGIN_SRC shell
make # -j4
#+END_SRC
#+end_src
** Get Test files
*** File list
......@@ -73,11 +84,11 @@ done
Section 4.5, show that the contrary is possible)
#+begin_src R
library(ggplot2)
df <- read.table("./build/src/data/res/r=200.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/r=400.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/r=600.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/r=800.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/r=1000.res", header=T))
df <- read.csv2("./build/src/data/res/r=200.res")
df <- rbind(df, read.csv2("./build/src/data/res/r=400.res"))
df <- rbind(df, read.csv2("./build/src/data/res/r=600.res"))
df <- rbind(df, read.csv2("./build/src/data/res/r=800.res"))
df <- rbind(df, read.csv2("./build/src/data/res/r=1000.res"))
ggplot(df, aes(x=nb_mvp, y=maxRes, color=name)) +
geom_line() +
geom_hline(aes(yintercept=1e-4, color="threshold")) +
......@@ -94,8 +105,8 @@ mkdir -p ../data/res
**** plot the graphic
#+begin_src R
library(ggplot2)
df <- read.table("./build/src/data/res/Full_GELS.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/GELS_with_Incremental_QR_factorization.res", header=T))
df <- read.csv2("./build/src/data/res/Full_GELS.res")
df <- rbind(df, read.csv2("./build/src/data/res/GELS_with_Incremental_QR_factorization.res"))
ggplot(df, aes(x=global_iteration, y=least_square_time, color=name)) +
geom_line() + ggtitle("Least Square duration (young1c)")
#+end_src
......@@ -109,9 +120,9 @@ mkdir -p ../data/res
**** plot the graphic
#+begin_src R
library(ggplot2)
df <- read.table("./build/src/data/res/STD.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/STD+DR.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/IB.res", header=T))
df <- read.csv2("./build/src/data/res/STD.res")
df <- rbind(df, read.csv2("./build/src/data/res/STD+DR.res"))
df <- rbind(df, read.csv2("./build/src/data/res/IB.res"))
name <- df$name
df$name_min <- paste(name, " minRes")
df$name_max <- paste(name, " maxRes")
......@@ -144,16 +155,18 @@ mkdir -p ../data/res
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/BGMRES.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/IB-BGMRES.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/BGMRES-DR.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/IB-BGMRES-DR.res", header=T))
df <- read.csv2("./build/src/data/res/BGMRES.res")
df <- rbind(df, read.csv2("./build/src/data/res/IB-BGMRES.res"))
df <- rbind(df, read.csv2("./build/src/data/res/BGMRES-DR.res"))
df <- rbind(df, read.csv2("./build/src/data/res/IB-BGMRES-DR.res"))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=maxRes, color=name)) +
geom_line(aes(y=minRes, color=name)) +
scale_y_log10() + geom_hline(aes(yintercept=1e-6, color="threshold")) +
ggtitle("young1c (nrhs=6, m=90 k=5)") + ylab(TeX("$\\eta_b$ (min, max)"))
#+end_src
*** lightInTissue (nrhs=6, m=90, k=5)
**** run test case
#+begin_src shell
......@@ -169,10 +182,10 @@ mkdir -p ../data/res
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/BGMRES.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/IB-BGMRES.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/BGMRES-DR.res", header=T))
df <- rbind(df, read.table("./build/src/data/res/IB-BGMRES-DR.res", header=T))
df <- read.csv2("./build/src/data/res/BGMRES.res")
df <- rbind(df, read.csv2("./build/src/data/res/IB-BGMRES.res"))
df <- rbind(df, read.csv2("./build/src/data/res/BGMRES-DR.res"))
df <- rbind(df, read.csv2("./build/src/data/res/IB-BGMRES-DR.res"))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=maxRes, color=name)) +
geom_line(aes(y=minRes, color=name)) +
......@@ -194,10 +207,10 @@ mkdir -p ../data/res
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/BGMRES-DR(05).res", header=T)
df <- rbind(df, read.table("./build/src/data/res/BGMRES-DR(10).res", header=T))
df <- rbind(df, read.table("./build/src/data/res/BGMRES-DR(15).res", header=T))
df <- rbind(df, read.table("./build/src/data/res/BGMRES-DR(20).res", header=T))
df <- read.csv2("./build/src/data/res/BGMRES-DR(05).res")
df <- rbind(df, read.csv2("./build/src/data/res/BGMRES-DR(10).res"))
df <- rbind(df, read.csv2("./build/src/data/res/BGMRES-DR(15).res"))
df <- rbind(df, read.csv2("./build/src/data/res/BGMRES-DR(20).res"))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=maxRes, color=name)) +
geom_line(aes(y=minRes, color=name)) +
......@@ -217,10 +230,10 @@ mkdir -p ../data/res
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/IB-BGMRES-DR.res", header=T)
#df <- rbind(df, read.table("./build/src/data/res/IB-BGMRES.res", header=T))
#df <- rbind(df, read.table("./build/src/data/res/BGMRES-DR.res", header=T))
#df <- rbind(df, read.table("./build/src/data/res/IB-BGMRES-DR.res", header=T))
df <- read.csv2("./build/src/data/res/IB-BGMRES-DR.res")
#df <- rbind(df, read.csv2("./build/src/data/res/IB-BGMRES.res"))
#df <- rbind(df, read.csv2("./build/src/data/res/BGMRES-DR.res"))
#df <- rbind(df, read.csv2("./build/src/data/res/IB-BGMRES-DR.res"))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=time, color="total")) +
geom_line(aes(y=least_square_time, color="gels")) +
......@@ -241,8 +254,8 @@ mkdir -p ../data/res
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/IB-BGMRES-DR.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/QR-IB-BGMRES-DR.res", header=T))
df <- read.csv2("./build/src/data/res/IB-BGMRES-DR.res")
df <- rbind(df, read.csv2("./build/src/data/res/QR-IB-BGMRES-DR.res"))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=maxRes, color=name)) +
geom_line(aes(y=minRes, color=name)) +
......@@ -262,8 +275,8 @@ mkdir -p ../data/res
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/IB-BGMRES-DR.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/QR-IB-BGMRES-DR.res", header=T))
df <- read.csv2("./build/src/data/res/IB-BGMRES-DR.res")
df <- rbind(df, read.csv2("./build/src/data/res/QR-IB-BGMRES-DR.res"))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=least_square_time, color=name)) +
geom_line(aes(y=least_square_time, color=name)) +
......@@ -291,7 +304,7 @@ export STARPU_FXT_PREFIX=${WORKDIR}/build/
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/CHAM-TL.res", header=T)
df <- read.csv2("./build/src/data/res/CHAM-TL.res")
ggplot(df, aes(x=global_iteration)) +
geom_line(aes(y=100, color="total")) +
geom_line(aes(y=(least_square_time/time)*100, color="Gels")) +
......@@ -314,8 +327,8 @@ cd ${WORKDIR}/build/src/test_cham/
#+begin_src R
library(ggplot2)
library(latex2exp)
df <- read.table("./build/src/data/res/QR.res", header=T)
df <- rbind(df, read.table("./build/src/data/res/CHAMQR.res", header=T))
df <- read.csv2("./build/src/data/res/QR.res")
df <- rbind(df, read.csv2("./build/src/data/res/CHAMQR.res"))
ggplot(df, aes(x=nb_mvp)) +
geom_line(aes(y=maxRes, color=name)) +
geom_line(aes(y=minRes, color=name)) +
......
......@@ -3,63 +3,82 @@
#include <cstdint>
#include "fabulous/utils/Arithmetic.hpp"
#include "fabulous/utils/Traits.hpp"
namespace fabulous {
namespace lapacke {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
template<class S, class = enable_if_t<is_real_t<S>::value>>
int64_t Tgemm_flops(int m, int n, int k)
{
return m*n*(2*k+2);
}
template<class S>
template<class S, class = enable_if_t<is_complex_t<S>::value>, class=void>
int64_t Tgemm_flops(int m, int n, int k)
{
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
return -1;
return m*n*(8*k*12);
}
template<class S>
template<class S, class = enable_if_t<is_real_t<S>::value>>
int64_t gemm_flops(int m, int n, int k)
{
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
return -1;
return m*n*(2*k+2);
}
template<class S>
int64_t axpy_flops(int m)
template<class S, class = enable_if_t<is_complex_t<S>::value>, class=void>
int64_t gemm_flops(int m, int n, int k)
{
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
return -1;
return m*n*(8*k*12);
}
template<class S>
int64_t scal_flops(int m)
template<class S, class = enable_if_t<is_real_t<S>::value>>
int64_t axpy_flops(int)
{
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
return -1;
return 0;
}
template<class S>
int64_t dot_flops(int m)
template<class S, class = enable_if_t<is_complex_t<S>::value>, class=void>
int64_t axpy_flops(int)
{
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
return -1;
return 0;
}
template<class S>
int64_t geqrf_flops(int m, int n)
template<class S, class = enable_if_t<is_real_t<S>::value>>
int64_t scal_flops(int)
{
return 0;
}
template<class S, class = enable_if_t<is_complex_t<S>::value>, class=void>
int64_t scal_flops(int)
{
return 0;
}
template<class S, class = enable_if_t<is_real_t<S>::value>>
int64_t dot_flops(int)
{
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
return -1;
return 0;
}
template<class S, class = enable_if_t<is_complex_t<S>::value>, class=void>
int64_t dot_flops(int)
{
return 0;
}
template<class S>
int64_t orgqr_flops(int m, int n, int k)
int64_t geqrf_flops(int, int)
{
FABULOUS_DISABLE_INVALID_ARITHMETIC(S);
return -1;
return 0;
}
#pragma GCC diagnostic pop
template<class S>
int64_t orgqr_flops(int, int, int)
{
return 0;
}
};
};
......
......@@ -24,34 +24,34 @@ private:
template<class HESS, class Base, class Block, class Matrix,
class = enable_if_t<handle_ib_t<HESS>::value > >
void run_ruhe(HESS &H, Base &base, Block &W, Matrix &A)
int64_t run_ruhe(HESS &H, Base &base, Block &W, Matrix &A)
{
OrthogonalizerRuheIB<HESS> ruhe{_param};
ruhe.run(H, base, W, A);
return ruhe.run(H, base, W, A);
}
template<class HESS, class Base, class Block, class Matrix,
class = enable_if_t<not handle_ib_t<HESS>::value >, class=void >
void run_ruhe(HESS &H, Base &base, Block &W, Matrix &A)
class = enable_if_t<not handle_ib_t<HESS>::value >, class=void>
int64_t run_ruhe(HESS &H, Base &base, Block &W, Matrix &A)
{
OrthogonalizerRuheSTD<HESS> ruhe{_param};
ruhe.run(H, base, W, A);
return ruhe.run(H, base, W, A);
}
template<class HESS, class Base, class Block,
class = enable_if_t<handle_ib_t<HESS>::value > >
void run_block(HESS &H, Base &base, Block &W)
int64_t run_block(HESS &H, Base &base, Block &W)
{
OrthogonalizerBlockIB<HESS> block{_param};
block.run(H, base, W);
return block.run(H, base, W);
}
template<class HESS, class Base, class Block,
class = enable_if_t<not handle_ib_t<HESS>::value >, class=void >
void run_block(HESS &H, Base &base, Block &W)
class = enable_if_t<not handle_ib_t<HESS>::value >, class=void>
int64_t run_block(HESS &H, Base &base, Block &W)
{
OrthogonalizerBlockSTD<HESS> block{_param};
block.run(H, base, W);
return block.run(H, base, W);
}
public:
......@@ -67,16 +67,17 @@ public:
void run(HESS &H, Base &base, Block &W, Matrix &A, Logger logger)
{
logger.notify_ortho_begin();
int64_t nb_flops;
switch (_param.get_type()) {
case OrthoType::RUHE: run_ruhe(H, base, W, A); break;
case OrthoType::BLOCK: run_block(H, base, W); break;
case OrthoType::RUHE: nb_flops = run_ruhe(H, base, W, A); break;
case OrthoType::BLOCK: nb_flops = run_block(H, base, W); break;
default:
FABULOUS_FATAL_ERROR(
"Choose between blockwise or Ruhe orthogonalization, "
"Exiting\n"
); break;
}
logger.notify_ortho_end();
logger.notify_ortho_end(nb_flops);
H.notify_orthogonalization_end();
}
......
......@@ -37,29 +37,31 @@ public:
double least_square_time; /*!< Time time for this iteration in least_square */
double mvp_time; /*!< Time time for this iteration in mvp */
double ortho_time; /*!< Time time for this iteration in orthogonalization */
int64_t ortho_flops; /*!< number of flops of orthogonalization */
static void print_header(std::ostream &o = std::cerr)
{
o <<"global_iteration\tlocal_iteration\tkrylov_space_size\tnb_mvp\t"
"current_block_size\tminRes\tmaxRes\tminRealRes\tmaxRealRes\ttime\t"
"least_square_time\tmvp_time\tortho_time\tname\n";
o <<"global_iteration;local_iteration;krylov_space_size;nb_mvp;"
"current_block_size;minRes;maxRes;minRealRes;maxRealRes;time;"
"least_square_time;mvp_time;ortho_time;ortho_flops;name\n";
}
void print(std::string name, std::ostream &o = std::cerr)
{
o << glob_iteration<<"\t"
<< loc_iteration<<"\t"
<< krylov_space_size<<"\t"
<< nb_mvp<<"\t"
<< current_block_size<<"\t"
<< min<<"\t"
<< max<<"\t"
<< minReal<<"\t"
<< maxReal<<"\t"
<< total_time<<"\t"
<< least_square_time<<"\t"
<< mvp_time<<"\t"
<< ortho_time<<"\t"
o << glob_iteration<<";"
<< loc_iteration<<";"
<< krylov_space_size<<";"
<< nb_mvp<<";"
<< current_block_size<<";"
<< min<<";"
<< max<<";"
<< minReal<<";"
<< maxReal<<";"
<< total_time<<";"
<< least_square_time<<";"
<< mvp_time<<";"
<< ortho_time<<";"
<< ortho_flops <<";"
<< name << "\n";
}
};
......@@ -82,6 +84,7 @@ private:
Timer _least_square_timer;
Timer _mvp_timer;
Timer _ortho_timer;
int64_t _last_ortho_flops;
bool _log_real_residual;
int _global_iteration;
......@@ -133,6 +136,7 @@ public:
_least_square_timer.reset();
_mvp_timer.reset();
_ortho_timer.reset();
_last_ortho_flops = 0;
std::cerr<<"\nStart of Iteration ["
<<Color::note<<iteration_count<<" :: "
......@@ -170,9 +174,10 @@ public:
_mvp_timer.stop();
}
void notify_ortho_end()
void notify_ortho_end(int64_t ortho_flops)
{
_ortho_timer.stop();
_last_ortho_flops = ortho_flops;
}
void notify_least_square_check_end()
......@@ -203,7 +208,7 @@ public:
Entry{ _global_iteration, id, size_krylov_space,
_total_mvp, mvp_diff, min, max, 0, 0,
elapsed, least_square_elapsed,
mvp_elapsed, ortho_elapsed }
mvp_elapsed, ortho_elapsed, _last_ortho_flops }
);
FABULOUS_NOTE("\t\t "<<Color::red<<"MIN="<<Color::reset<<std::scientific<<min
......
......@@ -48,13 +48,13 @@ Type<T> get_type(const T&)
template<class U>
constexpr bool is_complex(const Type<U>&)
{
return false;
return is_complex_t<U>::value;
}
template<class U>
constexpr bool is_complex(const Type<std::complex<U>>&)
constexpr bool is_real(const Type<U>&)
{
return true;
return is_real_t<U>::value;
}
}; // end namespace fabulous
......
......@@ -2,10 +2,12 @@
#define FABULOUS_TRAITS_HPP
#include <type_traits>
#include <complex>
namespace fabulous {
template<bool B, class T=void> using enable_if_t = typename std::enable_if<B,T>::type;
template<bool B, class T=void>
using enable_if_t = typename std::enable_if<B,T>::type;
template< template<template<class> class, class> class ARNOLDI,
template<class> class HESSENBERG> struct arnoldiXhessenberg;
......@@ -43,23 +45,23 @@ template<
template<class> class HESSENBERG >
struct arnoldiXhessenberg : public std::false_type {};
#define ARNOLDI_X_HESSENBERG(ARNOLDI, HESSENBERG) \
template<> \
struct arnoldiXhessenberg<ARNOLDI, HESSENBERG> \
: public std::true_type {}; \
#define FABULOUS_ARNOLDI_X_HESSENBERG(ARNOLDI, HESSENBERG) \
template<> \
struct arnoldiXhessenberg<ARNOLDI, HESSENBERG> \
: public std::true_type {}; \
ARNOLDI_X_HESSENBERG(ArnoldiDR, HessDR);
ARNOLDI_X_HESSENBERG(ArnoldiDR, HessQR);
ARNOLDI_X_HESSENBERG(ArnoldiDR, HessQRDR);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiDR, HessDR);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiDR, HessQR);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiDR, HessQRDR);
#ifdef FABULOUS_USE_CHAMELEON
ARNOLDI_X_HESSENBERG(ArnoldiDR, HessChamQR);
ARNOLDI_X_HESSENBERG(ArnoldiDR, HessChamTLDR);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiDR, HessChamQR);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiDR, HessChamTLDR);
#endif
ARNOLDI_X_HESSENBERG(ArnoldiIB, HessIB);
ARNOLDI_X_HESSENBERG(ArnoldiIBDR, HessIBDR);
ARNOLDI_X_HESSENBERG(ArnoldiIBDR, HessQRIBDR);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiIB, HessIB);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiIBDR, HessIBDR);
FABULOUS_ARNOLDI_X_HESSENBERG(ArnoldiIBDR, HessQRIBDR);
/**
* \brief boolean field 'value' is set whether HESSENBERG is compatible with RESTARTER
......@@ -70,35 +72,35 @@ template<
>
struct hessenbergXrestarter : public std::false_type {};
#define HESSENBERG_X_RESTARTER(HESSENBERG, RESTARTER) \
template<> \
struct hessenbergXrestarter<HESSENBERG, RESTARTER> \
: public std::true_type {}; \
#define FABULOUS_HESSENBERG_X_RESTARTER(HESSENBERG, RESTARTER) \
template<> \
struct hessenbergXrestarter<HESSENBERG, RESTARTER> \
: public std::true_type {}; \
#define HESSENBERG_X_RESTARTER_S(HESSENBERG, RESTARTER) \
template<class S> \
struct hessenbergXrestarter<HESSENBERG, RESTARTER<S>> \
: public std::true_type {}; \
#define FABULOUS_HESSENBERG_X_RESTARTER_S(HESSENBERG, RESTARTER) \
template<class S> \
struct hessenbergXrestarter<HESSENBERG, RESTARTER<S>> \
: public std::true_type {}; \
HESSENBERG_X_RESTARTER(HessDR, ClassicRestart);
HESSENBERG_X_RESTARTER_S(HessDR, DeflatedRestart);
HESSENBERG_X_RESTARTER(HessQRDR, ClassicRestart);
HESSENBERG_X_RESTARTER_S(HessQRDR, DeflatedRestart);
HESSENBERG_X_RESTARTER(HessIBDR, ClassicRestart);
HESSENBERG_X_RESTARTER_S(HessIBDR, DeflatedRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessDR, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER_S(HessDR, DeflatedRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessQRDR, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER_S(HessQRDR, DeflatedRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessIBDR, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER_S(HessIBDR, DeflatedRestart);
HESSENBERG_X_RESTARTER(HessQRIBDR, ClassicRestart);
HESSENBERG_X_RESTARTER_S(HessQRIBDR, DeflatedRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessQRIBDR, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER_S(HessQRIBDR, DeflatedRestart);
HESSENBERG_X_RESTARTER(HessQR, ClassicRestart);
HESSENBERG_X_RESTARTER(HessIB, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessQR, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessIB, ClassicRestart);
#ifdef FABULOUS_USE_CHAMELEON
HESSENBERG_X_RESTARTER(HessChamTLDR, ClassicRestart);
HESSENBERG_X_RESTARTER_S(HessChamTLDR, DeflatedRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessChamTLDR, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER_S(HessChamTLDR, DeflatedRestart);
HESSENBERG_X_RESTARTER(HessChamQR, ClassicRestart);
FABULOUS_HESSENBERG_X_RESTARTER(HessChamQR, ClassicRestart);
#endif
......@@ -109,13 +111,37 @@ HESSENBERG_X_RESTARTER(HessChamQR, ClassicRestart);
* telling wether the Hessenberg handle Inexact Breakdown.
*/
template<class HESS> struct handle_ib_t : public std::false_type {};
#define HESSENBERG_HANDLE_IB(HESSENBERG) \
template<class S> \
#define FABULOUS_HESSENBERG_HANDLE_IB(HESSENBERG) \
template<class S> \
struct handle_ib_t<HESSENBERG<S>> : public std::true_type {};
HESSENBERG_HANDLE_IB(HessIB);
HESSENBERG_HANDLE_IB(HessIBDR);
HESSENBERG_HANDLE_IB(HessQRIBDR);
FABULOUS_HESSENBERG_HANDLE_IB(HessIB);
FABULOUS_HESSENBERG_HANDLE_IB(HessIBDR);
FABULOUS_HESSENBERG_HANDLE_IB(HessQRIBDR);
template<class S>
struct is_complex_t : public std::false_type {};
#define FABULOUS_IS_COMPLEX(Type_S_) \
template<> \
struct is_complex_t<Type_S_> : public std::true_type {};
FABULOUS_IS_COMPLEX(std::complex<float>);
FABULOUS_IS_COMPLEX(std::complex<double>);
template<class S>
struct is_real_t : public std::false_type {};
#define FABULOUS_IS_REAL(Type_S_) \
template<> \
struct is_real_t<Type_S_> : public std::true_type {};
FABULOUS_IS_REAL(float);
FABULOUS_IS_REAL(double);
#define FABULOUS
}; // end namespace fabulous
......
......@@ -34,12 +34,14 @@ public:
bool useRightPrecond() const { return false; }
// B := M^{-1} * X
void PrecondBlockVect(const Block&/*X*/, Block&/*B*/) const
int64_t PrecondBlockVect(const Block&/*X*/, Block&/*B*/) const
{
FABULOUS_FATAL_ERROR("no preconditionner implemented");
return 0;
}
// B := A * X
void MatBlockVect(const Block &X, Block &B,
int64_t MatBlockVect(const Block &X, Block &B,
S alpha = S{1.0}, S beta = S{0.0}) const
{
int M = B.get_nb_row();
......@@ -52,26 +54,28 @@ public:
B.get_ptr(), B.get_leading_dim(),
alpha, beta
);
return ::fabulous::lapacke::gemm_flops<S>(M, N, K);
}
// [Q,R] := QR(Q)
void QRFacto(Block &Q, Block &R) const
int64_t QRFacto(Block &Q, Block &R) const
{
// default implementation: ( distributed QR; uses DotProduct )
//::fabulous::qr::InPlaceQRFactoMGS_User(Q, R, *this);
// default implementation: (not distributed)
::fabulous::qr::InPlaceQRFacto(Q, R);
return ::fabulous::qr::InPlaceQRFacto(Q, R);
}
// C := A^{H} * B
void DotProduct(int M, int N,
int64_t DotProduct(int M, int N,
const S *A, int lda,
const S *B, int ldb,
S *C, int ldc) const
{
int dim = _data.get_nb_row();
::fabulous::lapacke::Tgemm(M, N, dim, A, lda, B, ldb, C, ldc);
return ::fabulous::lapacke::Tgemm_flops<S>(M, N, dim);
/*
MPI_Datatype type;
MPI_Type_vector(2*N, 2*M, 2*ldc, MPI_DOUBLE, &type);
......
......@@ -93,7 +93,7 @@ public:
bool useRightPrecond() const { return false; }
void MatBlockVect(const Block<S> &X, Block<S> &B,
int64_t MatBlockVect(const Block<S> &X, Block<S> &B,
S alpha = S{1.0}, S beta = S{0.0}) const
{
assert( B.get_nb_col() == X.get_nb_col() );
......@@ -111,11 +111,13 @@ public:
X.get_ptr(), X.get_leading_dim(),
beta, B.get_ptr(), B.get_leading_dim()
);
FABULOUS_WARNING("nb_flops not computed for sparse mvp");
return 0;
}
void QRFacto(Block<S> &Q, Block<S> &R) const
int64_t QRFacto(Block<S> &Q, Block<S> &R) const
{
qr::InPlaceQRFactoMGS_User(Q, R, *this);
return qr::InPlaceQRFactoMGS_User(Q, R, *this);
//qr::InPlaceQRFacto(Q, R);
}
......@@ -126,17 +128,20 @@ public:
* M^{-1} == diagonal_matrix(inverse_of_diagonal_element(A))
* M^{-1} * A ~= I
*/
void PrecondBlockVect(Block<S> &, Block<S> &) const
int64_t PrecondBlockVect(Block<S> &, Block<S> &) const
{
FABULOUS_FATAL_ERROR("no preconditionner associated with sparse COO matrix");
return 0;
}
void DotProduct(int M, int N,
int64_t DotProduct(int M, int N,
const S *A, int lda,
const S *B, int ldb,
S *C, int ldc) const
{
int dim = size();
lapacke::Tgemm(M, N, dim, A, lda, B, ldb, C, ldc, S{1.0}, S{0.0});
return lapacke::Tgemm_flops<S>(M, N, dim);
}
}; // end class UserInputMatrix
......
......@@ -110,7 +110,7 @@ public:
bool useRightPrecond() const { return false; }
void MatBlockVect(const Block<S> &X, Block<S> &B,
int64_t MatBlockVect(const Block<S> &X, Block<S> &B,
S alpha = S{1.0}, S beta = S{0.0}) const
{
assert( B.get_nb_col() == X.get_nb_col() );
......@@ -129,11 +129,13 @@ public:
X.get_ptr(), X.get_leading_dim(),
beta, B.get_ptr(), B.get_leading_dim()
);
FABULOUS_WARNING("nb_flops not computed for sparse mvp");
return 0;
}
void QRFacto(Block<S> &Q, Block<S> &R) const
int64_t QRFacto(Block<S> &Q, Block<S> &R) const
{
qr::InPlaceQRFactoMGS_User(Q, R, *this);
return qr::InPlaceQRFactoMGS_User(Q, R, *this);