Commit 22d5c0ee authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

Rearrange folders

parent cfe916e3
-DHAVE_LAPACK_CONFIG_H
-DLAPACK_COMPLEX_CPP
-DMORSE_COMPLEX_CPP
-I/home/tonton/fabulous/Api/include
-I/home/tonton/fabulous/src
-I/usr/include/chameleon
-I/usr/include/chameleon/coreblas/include
-I/usr/include/starpu/1.2
-Wall
-Wextra
-std=c++11
\ No newline at end of file
...@@ -36,8 +36,8 @@ ...@@ -36,8 +36,8 @@
(template-args-cont c-lineup-template-args +) (template-args-cont c-lineup-template-args +)
(incomposition . +) (incomposition . +)
(inmodule . +) (inmodule . +)
(innamespace . +) (innamespace . 0)
(inextern-lang . +) (inextern-lang . 0)
(composition-close . 0) (composition-close . 0)
(module-close . 0) (module-close . 0)
(namespace-close . 0) (namespace-close . 0)
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
(friend . 0) (friend . 0)
(cpp-define-intro c-lineup-cpp-define +) (cpp-define-intro c-lineup-cpp-define +)
(cpp-macro-cont . +) (cpp-macro-cont . +)
(cpp-macro . [0]) (cpp-macro . 0)
(inclass . +) (inclass . +)
(stream-op . c-lineup-streamop) (stream-op . c-lineup-streamop)
(arglist-cont-nonempty c-lineup-gcc-asm-reg c-lineup-arglist) (arglist-cont-nonempty c-lineup-gcc-asm-reg c-lineup-arglist)
...@@ -84,14 +84,14 @@ ...@@ -84,14 +84,14 @@
(knr-argdecl . 0) (knr-argdecl . 0)
(func-decl-cont . +) (func-decl-cont . +)
(inline-close . 0) (inline-close . 0)
(inline-open . +) (inline-open . 0)
(class-close . 0) (class-close . 0)
(class-open . 0) (class-open . 0)
(defun-block-intro . +) (defun-block-intro . +)
(defun-close . 0) (defun-close . 0)
(defun-open . 0) (defun-open . 0)
(string . c-lineup-dont-change) (string . c-lineup-dont-change)
(arglist-close . c-lineup-arglist) (arglist-close . 0)
(substatement-open . 0) (substatement-open . 0)
(case-label . 0) (case-label . 0)
(block-open . 0) (block-open . 0)
......
build/ build/
data/
*.tar.gz *.tar.gz
*.o *.o
*.d
GPATH GPATH
GRTAGS GRTAGS
GTAGS GTAGS
.clang_complete
-DHAVE_LAPACK_CONFIG_H
-DLAPACK_COMPLEX_CPP
-DMORSE_COMPLEX_CPP
-I/home/tonton/fabulous/Api/include
-I/home/tonton/fabulous/src
-I/usr/include/chameleon
-I/usr/include/chameleon/coreblas/include
-I/usr/include/starpu/1.2
-Wall
-Wextra
-std=c99
\ No newline at end of file
-DHAVE_LAPACK_CONFIG_H
-DLAPACK_COMPLEX_CPP
-DMORSE_COMPLEX_CPP
-I/home/tonton/fabulous/Api/include
-I/home/tonton/fabulous/src
-I/usr/include/chameleon
-I/usr/include/chameleon/coreblas/include
-I/usr/include/starpu/1.2
-Wall
-Wextra
-std=c99
\ No newline at end of file
...@@ -17,11 +17,6 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake_modules/") ...@@ -17,11 +17,6 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake_modules/")
find_package(CBLAS COMPONENTS BLASEXT REQUIRED) find_package(CBLAS COMPONENTS BLASEXT REQUIRED)
find_package(LAPACKE COMPONENTS LAPACKEXT REQUIRED) find_package(LAPACKE COMPONENTS LAPACKEXT REQUIRED)
find_package(Sanitizers) find_package(Sanitizers)
#find_package(PkgConfig)
# (You can use CHAMELEON_DIR to specify the prefix where chameleon was installed.)")
# (You can use STARPU_DIR to specify the prefix where starpu was installed.)")
find_package(CHAMELEON REQUIRED COMPONENTS STARPU MPI)
#find_package(STARPU REQUIRED COMPONENTS HWLOC MPI)
##### RPATH HANDLING #### ##### RPATH HANDLING ####
...@@ -38,14 +33,6 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") ...@@ -38,14 +33,6 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
################ ################
if(CHAMELEON_FOUND)
add_definitions( -DMORSE_COMPLEX_CPP )
link_directories(${CHAMELEON_LIBRARY_DIRS})
link_directories(${CHAMELEON_LIBRARY_DIRS_DEP})
include_directories(${CHAMELEON_INCLUDE_DIRS})
include_directories(${CHAMELEON_INCLUDE_DIRS_DEP})
endif()
if(CBLAS_FOUND) if(CBLAS_FOUND)
message(STATUS "CBLAS_FOUND = ${CBLAS_FOUND}") message(STATUS "CBLAS_FOUND = ${CBLAS_FOUND}")
message(STATUS "CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}") message(STATUS "CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}")
...@@ -75,6 +62,14 @@ else() ...@@ -75,6 +62,14 @@ else()
message(FATAL_ERROR "LAPACKE library has not been found") message(FATAL_ERROR "LAPACKE library has not been found")
endif(LAPACKE_FOUND) endif(LAPACKE_FOUND)
# Remove semicolons (;) coming from lapack's 'findBlas'
list(REMOVE_DUPLICATES CMAKE_EXE_LINKER_FLAGS)
string(REPLACE ";" " " CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
# Fix a problem on Mac OS X when building shared libraries
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_SHARED_LINKER_FLAGS "-undefined dynamic_lookup")
endif()
option(FABULOUS_DEBUG_MODE "Set to On to compile with debug info" OFF) option(FABULOUS_DEBUG_MODE "Set to On to compile with debug info" OFF)
option(FABULOUS_BUILD_SHARED_LIBS "Build shared libraries" OFF) option(FABULOUS_BUILD_SHARED_LIBS "Build shared libraries" OFF)
option(FABULOUS_BUILD_DOC "Set to ON to build the Doxygen documentation " OFF ) option(FABULOUS_BUILD_DOC "Set to ON to build the Doxygen documentation " OFF )
...@@ -85,41 +80,19 @@ if (FABULOUS_LAPACKE_NANCHECK) ...@@ -85,41 +80,19 @@ if (FABULOUS_LAPACKE_NANCHECK)
endif (FABULOUS_LAPACKE_NANCHECK) endif (FABULOUS_LAPACKE_NANCHECK)
if(FABULOUS_DEBUG_MODE) if(FABULOUS_DEBUG_MODE)
set( set( CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -DFABULOUS_DEBUG_MODE -ggdb -g3 -O0 -fmax-errors=2" ) "${CMAKE_CXX_FLAGS} -DFABULOUS_DEBUG_MODE -ggdb -g3 -O0 -fmax-errors=2" )
set( CMAKE_C_FLAGS
"${CMAKE_C_FLAGS} -DFABULOUS_DEBUG_MODE -ggdb -g3 -O0 -fmax-errors=2" )
else() else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fmax-errors=2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fmax-errors=2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -fmax-errors=2")
endif() endif()
if(FABULOUS_BUILD_SHARED_LIBS) include_directories(include)
# Set the RPATH config: include_directories(src/api/include)
# use, i.e. don't skip the full RPATH for the build tree
set(CMAKE_SKIP_BUILD_RPATH FALSE)
# when building, don't use the install RPATH already
# (but later on when installing)
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
# the RPATH to be used when installing
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
endif()
add_subdirectory(src) add_subdirectory(src)
include_directories(src)
# Remove semicolons (;) coming from lapack's 'findBlas'
list(REMOVE_DUPLICATES CMAKE_EXE_LINKER_FLAGS)
string(REPLACE ";" " " CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
# Fix a problem on Mac OS X when building shared libraries
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_SHARED_LINKER_FLAGS "-undefined dynamic_lookup")
endif()
add_subdirectory(test)
add_subdirectory(cham_hessqr)
if(FABULOUS_BUILD_DOC) if(FABULOUS_BUILD_DOC)
add_subdirectory(doc) add_subdirectory(doc)
endif() endif()
################# API #################
add_subdirectory(Api)
#ifndef FABULOUS_ARITHMETIC_H
#define FABULOUS_ARITHMETIC_H
#include <complex>
#include <morse.h>
namespace fabulous {
#define PASTE_2(a, b) a##b
#define PASTE_(a, b) PASTE_2(a, b)
#define PASTE(a, b) PASTE_(a, b)
#define STRINGIFY_(x) #x
#define STRINGIFY(x) STRINGIFY_(x)
#define FABULOUS_ARITHMETIC_LIST(ARITHMETIC) \
ARITHMETIC( \
REAL_FLOAT, float, float, 0, \
MorseRealFloat, MorseTrans, s, or, 'T' \
) \
ARITHMETIC( \
REAL_DOUBLE, double, double, 1, \
MorseRealDouble, MorseTrans, d, or, 'T' \
) \
ARITHMETIC( \
COMPLEX_FLOAT, std::complex<float>, float, 2, \
MorseComplexFloat, MorseConjTrans, c, un, 'C' \
) \
ARITHMETIC( \
COMPLEX_DOUBLE, std::complex<double>, double, 3, \
MorseComplexDouble, MorseConjTrans, z, un, 'C' \
) \
/* **************** Arithmetic CLASS ********************** */
template<class U> struct Arithmetik
{
static_assert(
sizeof(U) != sizeof(U), // false only if instantiated 8)
"Only float, double, std::complex<float> and "
"std::complex<double> specializations are allowed!"
);
};
#define FABULOUS_DEFINE_ARITHMETIC( \
NAME_, type_, primary_type_, value_, \
morse_type_, morse_trans_, \
_7, _8, la_trans_, ... ) \
template <> \
struct Arithmetik<type_> { \
typedef type_ type; \
typedef type_ value_type; \
typedef primary_type_ primary_type; \
static const constexpr int morse_type = morse_type_; \
static const constexpr int mtrans = morse_trans_; \
static const constexpr char ltrans = la_trans_; \
static const constexpr char *type_name = #type_; \
static const constexpr char *name = #NAME_; \
};
FABULOUS_ARITHMETIC_LIST(FABULOUS_DEFINE_ARITHMETIC);
/* **************** Arithmetic Enum ********************** */
#define FABULOUS_ARITHMETIC_TO_ENUM(NAME_, _3, _4, value_, ...) \
NAME_ = value_,
enum class Arithmetic {
FABULOUS_ARITHMETIC_LIST(FABULOUS_ARITHMETIC_TO_ENUM)
};
}; // end namespace fabulous
#endif // FABULOUS_ARITHMETIC_H
#ifndef FABULOUS_ARNOLDI_ORTHO_HPP
#define FABULOUS_ARNOLDI_ORTHO_HPP
#include "Base.hpp"
#include "Block.hpp"
#include "KernelInterface.hpp"
#include "Utils.hpp"
#include "Arnoldi_Ortho_Enum.hpp"
#include "Error.hpp"
#undef FABULOUS_ORTHO_IMPL_INCLUDE
#define FABULOUS_ORTHO_IMPL_INCLUDE
# include "Arnoldi_Ortho_RUHE.hpp"
# include "Arnoldi_Ortho_BLOCK.hpp"
#undef FABULOUS_ORTHO_IMPL_INCLUDE
template< class Hess, class Base, class Matrix, class Block >
void Arnoldi_Ortho(Hess &hess, Base &base, Block &W,
OrthoType type, OrthoScheme scheme, Matrix &A)
{
switch (type) {
case OrthoType::RUHE:
ArnoldiRuhe(hess, base, W, scheme, A);
break;
case OrthoType::BLOCK:
ArnoldiBlock(hess, base, W, scheme);
break;
default:
FABULOUS_FATAL_ERROR("Choose between blockwise or Ruhe orthogonalization");
break;
}
}
#endif // FABULOUS_ARNOLDI_ORTHO_HPP
#include <cassert>
#ifndef FABULOUS_ORTHO_IMPL_INCLUDE
#error "Never include this header directly; include 'Arnoldi_Ortho.hpp' instead"
#endif
namespace fabulous {
/**
* \brief Orthogonalisation process :: Classical Gram Schmidt
*/
template< class Base, class Block, class U = typename Block::value_type >
void ArnoldiBlock_CGS(Base &base, Block &H, Block &W)
{
int dim = base.get_nb_row();
int nb_vect = base.get_nb_vect();
int W_size = W.get_nb_col();
assert( H.get_nb_col() == W.get_nb_col() );
assert( H.get_nb_row() == base.get_nb_vect() );
LapackKernI::Tgemm( // H = Vm^{t}*W
nb_vect, W_size, dim,
base.get_ptr(), base.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
H.get_ptr(), H.get_leading_dim(),
U{1.0}, U{0.0}
);
LapackKernI::gemm( // W = W - Vm*H
dim, W_size, nb_vect,
base.get_ptr(), base.get_leading_dim(),
H.get_ptr(), H.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
U{1.0}, U{-1.0}
);
}
/**
* \brief orthogonalisation process :: Classical Gram Schmidt
*/
template< class Base, class Block, class U = typename Block::value_type >
void ArnoldiBlock_ICGS(Base &base, Block &H, Block &W)
{
int dim = base.get_nb_row();
int nb_vect = base.get_nb_vect();
int W_size = W.get_nb_col();
assert( H.get_nb_col() == W.get_nb_col() );
assert( H.get_nb_row() == base.get_nb_vect() );
// Block tmp to be added to Hess
Block tmp{dim, W_size};
LapackKernI::Tgemm( // H = Vm^{t}*W
nb_vect, W_size, dim,
base.get_ptr(), base.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
tmp.get_ptr(), tmp.get_leading_dim(),
U{1.0}, U{0.0}
);
LapackKernI::gemm( // W = W - Vm*H
dim, W_size, nb_vect,
base.get_ptr(), base.get_leading_dim(),
tmp.get_ptr(), tmp.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
U{-1.0}, U{1.0}
);
// Add tmp to hess
for (int j = 0; j < tmp.get_nb_col(); ++j) // H = H + tmp;
LapackKernI::axpy(nb_vect, U{1.0}, tmp.get_vect(j), 1, H.get_vect(j), 1);
}
/**
* \brief Orthogonalization process (Modified Gram Schmidt)
*
* \param W block to be orthogonalized against current base
* \param HessToWrite Array of Scalar to write the
* orthogonalization coeffs
* \param ldHess : Leading dimension of Hessenberg
* \param idxToStart : number of vectors to discard in Ortho process
*/
template< class Base, class Block, class U = typename Block::value_type >
void ArnoldiBlock_MGS(Base &base, Block &H, Block &W)
{
int dim = base.get_nb_row();
int W_size = W.get_nb_col();
int nb_block = base.get_nb_block();
assert( H.get_nb_col() == W.get_nb_col() );
assert( H.get_nb_row() == base.get_nb_vect() );
int size_block_sum = 0;
// Loop over different blocks
for (int k = 0; k < nb_block; ++k) {
int size_block = base.get_block_size(k);
Block H_k = H.sub_block(size_block_sum, 0, size_block, W_size);
size_block_sum += size_block;
LapackKernI::Tgemm( // H_{ij} += V_i^{h} * W
size_block, W_size, dim,
base.get_block_ptr(k), base.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
H_k.get_ptr(), H_k.get_leading_dim(),
U{1.0}, U{0.0}
);
// W = W - V_i * H_{ij}
LapackKernI::gemm(
dim, W_size, size_block,
base.get_block_ptr(k), dim,
H_k.get_ptr(), H_k.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
U{-1.0}, U{1.0}
);
}
}
/**
* \brief Orthogonalization process (Modified Gram Schmidt), Same
* as above, but we store the ortho coefficients inside a
* tmporary block in order to be able to call this function twice
* in case of Iterated processus.
*
* \param W block to be orthogonalized against current base
* \param HessToWrite Array of Scalar to write the
* orthogonalization coeffs
* \param ldHess : Leading dimension of Hessenberg
* \param idxToStart : number of vectors to discard in Ortho process
*/
template< class Base, class Block, class U = typename Block::value_type >
void ArnoldiBlock_IMGS(Base &base, Block &H, Block &W)
{
int dim = base.get_nb_row();
int W_size = W.get_nb_col();
int nb_block = base.get_nb_block();
assert( H.get_nb_col() == W.get_nb_col() );
assert( H.get_nb_row() == base.get_nb_vect() );
// Loop over different blocks
int size_block_sum = 0;
for (int k = 0; k < nb_block; ++k) {
int size_block = base.get_block_size(k);
Block tmp{size_block, W_size};
LapackKernI::Tgemm( //tmp = V_i^{h} * W
size_block, W_size, dim,
base.get_block_ptr(k), base.get_leading_dim(),
W.get_ptr(),W.get_leading_dim(),
tmp.get_ptr(),tmp.get_leading_dim(),
U{1.0}, U{0.0}
);
LapackKernI::gemm( // W = W - V_i * tmp
dim, W_size, size_block,
base.get_block_ptr(k), base.get_leading_dim(),
tmp.get_ptr(), tmp.get_leading_dim(),
W.get_ptr(), W.get_leading_dim(),
U{-1.0}, U{1.0}
);
Block H_k = H.sub_block(size_block_sum, 0, size_block, W_size);
size_block_sum += size_block;
// Add tmp to hess
for (int j = 0; j < tmp.get_nb_col(); ++j) // H_k = H_k + tmp
LapackKernI::axpy(size_block, U{1.0}, tmp.get_vect(j), 1, H_k.get_vect(j), 1);
}
}
/**
* \brief Orthogonalization process dispatcher.
*
* Choice is an input, but may be moved as a flag, set at Base construction.
*/
template< class Base, class Block, class U = typename Block::value_type >
void ArnoldiBlock_Dispatch(OrthoScheme scheme, Base &base, Block &H, Block &W)
{
const int iter_count = 2;
switch (scheme) {
case OrthoScheme::CGS:
ArnoldiBlock_CGS(base, H, W);
break;
case OrthoScheme::ICGS:
for (int i = 0; i < iter_count; ++i)
ArnoldiBlock_ICGS(base, H, W);
break;
case OrthoScheme::MGS:
ArnoldiBlock_MGS(base, H, W);
break;
case OrthoScheme::IMGS:
for (int i = 0; i < iter_count; ++i)
ArnoldiBlock_IMGS(base, H, W);
break;
default:
std::cout << "Warning :: No Ortho scheme choosen, MGS is default choice\n";
ArnoldiBlock_MGS(base, H, W);
break;
}
}
/**
* \brief Arnoldi Block version
*/
template<class Hess, class Base, class Block
/* class = typename std::enable_if<!Block::IB_supported::value>::type
class = void */ >
void ArnoldiBlock(Hess &hess, Base &base, Block &W, OrthoScheme scheme)
{
int nb_vect_in_base = base.get_nb_vect();
int block_size = W.get_nb_col();
int mh = nb_vect_in_base + block_size;
int nh = block_size;
Block HR{mh, nh};
Block H = HR.sub_block(0, 0, nb_vect_in_base, block_size);
Block R = HR.sub_block(nb_vect_in_base, 0, block_size, block_size);
ArnoldiBlock_Dispatch(scheme, base, H, W); // Ortho against vectors inside base
W.InPlaceQRFacto(R); // QR facto of orthogonalized W
std::cout << "Block Arnoldi done" << std::endl;
hess.add_HR_part(HR);
}
}; // namespace fabulous
#ifndef FABULOUS_ARNOLDI_ORTHO_ENUM_H
#define FABULOUS_ARNOLDI_ORTHO_ENUM_H
#include <unordered_map>
/**
* \brief Enum over the two different approach to ortho phase during Arnoldi.
*
*/
enum class OrthoType {
RUHE = 0, /*!< Arnoldi-Ruhe : otrtho vector by vector*/
BLOCK = 1, /*!< Block Arnoldi Ortho block wise followed by a a QR factorization*/
};
/**
* \brief Enum over different available Orthogonalization scheme.
*/
enum class OrthoScheme {
MGS = 0, /*!< Modified Gram Schmidt */
CGS = 1, /*!< Classical Gram Schmidt */
IMGS = 2, /*!< Iterated Modified Gram Schmidt */
ICGS = 3 /*!< Iterated Classical Gram Schmidt */
};
/* **************** STRING CONVERSION ******************** */
static std::unordered_map<OrthoType, std::string>
arnortho_names= {
{ OrthoType::BLOCK, "BLOCK" },
{ OrthoType::RUHE, "RUHE" },
};
static std::unordered_map<OrthoScheme, std::string>
ortho_choice_names = {
{ OrthoScheme::MGS, "MGS" },
{ OrthoScheme::CGS, "CGS" },
{ OrthoScheme::IMGS, "IMGS" },
{ OrthoScheme::ICGS, "ICGS" },
};
/* **************** OPERATOR<< OVERLOAD ********************* */
inline std::ostream& operator<<(std::ostream& os, const OrthoType ortho)
{
auto it = arnortho_names.find(ortho);
if (it == arnortho_names.end())
os << "No choice made between BLOCK and RUHE";
else
os << it->second;
return os;
}
inline std::ostream& operator<<(std::ostream& os, const OrthoScheme ortho)
{
auto it = ortho_choice_names.find(ortho);
if (it == ortho_choice_names.end())
os << "Orthogonalization process unknown";
else
os << it->second;
return os;
}
#endif // FABULOUS_ARNOLDI_ORTHO_ENUM_H
#ifndef FABULOUS_ARNOLDI_ORTHO_RUHE_HPP
#define FABULOUS_ARNOLDI_ORTHO_RUHE_HPP
#ifndef FABULOUS_ORTHO_IMPL_INCLUDE
#error "Never include this header directly; include 'Arnoldi_Ortho.hpp' instead"
#endif
namespace fabulous {
/* **************************** RUHE ******************************** */
/**
* \brief Arnoldi Ruhe version with CGS ortho
*/
template< class Base, class Block, class Matrix,
class U = typename Block::value_type >
void ArnoldiRuhe_CGS(Block &HR, Base &base, Block &W, Matrix &A)
{
int W_size = W.get_nb_col();
int dim = base.get_nb_row();
int nb_vect_in_base = base.get_nb_vect();
int block_size = base.get_block_size(base.get_current_block());
assert(W_size == block_size);
int ldh = HR.get_leading_dim();
int ldw = W.get_leading_dim();
int ldv = base.get_leading_dim();
auto *V = base.get_ptr();
// Loop over vector in W block
for (int k = 0; k < W_size; ++k) {
U *W_k = W.get_vect(k);
U *h_k = HR.get_vect(k);
{ // Ortho against Base
A.DotProduct(dim, nb_vect_in_base,
V, ldv, W_k, ldw, h_k);
// W_k = W_k - V * H_k
LapackKernI::gemm(dim, 1, nb_vect_in_base,
V, ldv,
h_k, ldh,
W_k, ldw,
U{-1.0}, U{1.0});
}
{ // Ortho against already processed W vectors
A.DotProduct(dim, k, W.get_ptr(), ldw, W_k, ldw, h_k+nb_vect_in_base);
//W_k = W_k - W_{0->k-1} * h_k[nb_vect_in_base]
LapackKernI::gemm(dim, 1, k,
W.get_ptr(), ldw,
h_k+nb_vect_in_base, ldh,
W_k, ldw,
U{-1.0}, U{1.0});
}
auto n = W.get_norm(k, A);
h_k[nb_vect_in_base+k] = n; // Hess[k+1,k] = norm(wj_k)
if (n == 0.0) {
FABULOUS_FATAL_ERROR(
"Rank loss in block candidate for extending the Krylov Space" );