Commit fa106948 authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

simplify kernel header(end)

parent 2cc5e4ae
......@@ -18,11 +18,6 @@ IF(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
SET(CMAKE_SHARED_LINKER_FLAGS "-undefined dynamic_lookup")
ENDIF()
# Trouver un moyen de tester que le compilateur support la norme 2011
# (regarder éventuellement Simgrid/boost etc)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -Wall -Wextra")
#Ajout à la liste des repertoires dans lesquels CMAKE cherche ses modules.
SET(MORSE_CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake_modules/morse_cmake/modules/")
LIST(APPEND CMAKE_MODULE_PATH "${MORSE_CMAKE_MODULE_PATH}")
......@@ -34,6 +29,11 @@ FIND_PACKAGE(LAPACKE REQUIRED COMPONENTS LAPACKEXT)
FIND_PACKAGE(Sanitizers)
FIND_PACKAGE(OpenMP)
# Trouver un moyen de tester que le compilateur support la norme 2011
# (regarder éventuellement Simgrid/boost etc)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -fmax-errors=2")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -Wall -Wextra -fmax-errors=2")
IF(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
......
......@@ -4,6 +4,7 @@
#include "fabulous/algo/solve.hpp"
#include "fabulous/algo/AlgoType.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/kernel/LapackInterface.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/kernel/QR.hpp"
#endif // FABULOUS_HPP
......@@ -87,7 +87,7 @@ private:
}
// X_n = X_0 + MVY
for (int j = 0; j < X.get_nb_col(); ++j)
LapackKernI::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
lapacke::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
}
template<class Matrix, class Block>
......
......@@ -11,6 +11,7 @@ template<template<class> class HESSENBERG, class S> class ArnoldiIB;
#include "fabulous/utils/Logger.hpp"
#include "fabulous/utils/Traits.hpp"
#include "fabulous/restart/DeflatedRestart.hpp"
#include "fabulous/kernel/QR.hpp"
#include "fabulous/orthogonalization/Orthogonalizer.hpp"
#include "fabulous/orthogonalization/OrthoParam.hpp"
......@@ -98,7 +99,7 @@ private:
// X_n = X_0 + MVY
for (int j = 0; j < X.get_nb_col(); ++j)
LapackKernI::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
lapacke::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
}
template<class Matrix, class Block>
......@@ -210,7 +211,7 @@ private:
Block<S> U1 = U.sub_block(0, 0, M, _nb_direction_kept);
Block<S> U2 = U.sub_block(0, _nb_direction_kept, M, _nb_direction_discarded);
Block<S> V1 = _base.get_W(_nb_direction_kept);
LapackKernI::gemm( // V_1 := Q_0 * \U_1
lapacke::gemm( // V_1 := Q_0 * \U_1
_dim, _nb_direction_kept, _nbRHS,
Q0.get_ptr(), Q0.get_leading_dim(),
U1.get_ptr(), U1.get_leading_dim(),
......@@ -221,7 +222,7 @@ private:
_WP.increase_size_P(_nb_direction_discarded);
Block<S> P = _WP.get_P();
LapackKernI::gemm( // P_0 := Q_0 * \U_2
lapacke::gemm( // P_0 := Q_0 * \U_2
_dim, _nb_direction_discarded, _nbRHS,
Q0.get_ptr(), Q0.get_leading_dim(),
U2.get_ptr(), U2.get_leading_dim(),
......@@ -230,7 +231,7 @@ private:
);
Block<S> Lambda1{_nbRHS, _nbRHS};
LapackKernI::Tgemm( // \Lambda_1 := U^H * \Lambda_0
lapacke::Tgemm( // \Lambda_1 := U^H * \Lambda_0
_nbRHS, _nbRHS, _nbRHS,
U.get_ptr(), U.get_leading_dim(),
Lambda0.get_ptr(), Lambda0.get_leading_dim(),
......@@ -311,7 +312,7 @@ private:
Block<S> U1_2 = U.sub_block(nb_vect, 0, _nbRHS, _nb_direction_kept);
Block<S> W1_W2{_nbRHS, _nbRHS};
Block<S> R{};
U1_2.OutOfPlaceQRFacto(W1_W2, R); // Qr facto of bottom block of U
qr::OutOfPlaceQRFacto(U1_2, W1_W2, R); // Qr facto of bottom block of U
Block<S> W1, W2;
W1 = W1_W2.sub_block(0, 0, _nbRHS, _nb_direction_kept);
W2 = W1_W2.sub_block(0, _nb_direction_kept, _nbRHS, _nb_direction_discarded);
......
......@@ -13,6 +13,7 @@ template<template<class> class HESSENBERG, class S> class ArnoldiIBDR;
#include "fabulous/orthogonalization/OrthoParam.hpp"
#include "fabulous/orthogonalization/Orthogonalizer.hpp"
#include "fabulous/restart/DeflatedRestart.hpp"
#include "fabulous/kernel/QR.hpp"
namespace fabulous {
......@@ -99,7 +100,7 @@ private:
// X_n = X_0 + MVY
for (int j = 0; j < X.get_nb_col(); ++j)
LapackKernI::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
lapacke::axpy(dim, S{1.0}, MVY.get_vect(j), 1, X.get_vect(j), 1);
}
template< class Matrix, class Block, class BlockWP >
......@@ -223,7 +224,7 @@ private:
Block<S> U1 = U.sub_block(0, 0, M, _nb_direction_kept);
Block<S> U2 = U.sub_block(0, _nb_direction_kept, M, _nb_direction_discarded);
Block<S> V1 = _base.get_W(_nb_direction_kept);
LapackKernI::gemm( // V_1 := Q_0 * \U_1
lapacke::gemm( // V_1 := Q_0 * \U_1
_dim, _nb_direction_kept, _nbRHS,
Q0.get_ptr(), Q0.get_leading_dim(),
U1.get_ptr(), U1.get_leading_dim(),
......@@ -234,7 +235,7 @@ private:
_WP.increase_size_P(_nb_direction_discarded);
Block<S> P = _WP.get_P();
LapackKernI::gemm( // P_0 := Q_0 * \U_2
lapacke::gemm( // P_0 := Q_0 * \U_2
_dim, _nb_direction_discarded, _nbRHS,
Q0.get_ptr(), Q0.get_leading_dim(),
U2.get_ptr(), U2.get_leading_dim(),
......@@ -243,7 +244,7 @@ private:
);
Block<S> Lambda1{_nbRHS, _nbRHS};
LapackKernI::Tgemm( // \Lambda_1 := U^H * \Lambda_0
lapacke::Tgemm( // \Lambda_1 := U^H * \Lambda_0
_nbRHS, _nbRHS, _nbRHS,
U.get_ptr(), U.get_leading_dim(),
Lambda0.get_ptr(), Lambda0.get_leading_dim(),
......@@ -320,7 +321,7 @@ private:
Block<S> U1_2 = U.sub_block(nb_vect, 0, _nbRHS, _nb_direction_kept);
Block<S> W1_W2{_nbRHS, _nbRHS};
Block<S> R{};
U1_2.OutOfPlaceQRFacto(W1_W2, R); // Qr facto of bottom block of U
qr::OutOfPlaceQRFacto(U1_2, W1_W2, R); // Qr facto of bottom block of U
Block<S> W1, W2;
W1 = W1_W2.sub_block(0, 0, _nbRHS, _nb_direction_kept);
W2 = W1_W2.sub_block(0, _nb_direction_kept, _nbRHS, _nb_direction_discarded);
......
......@@ -6,7 +6,8 @@ template<class S> class Base;
};
#include "fabulous/utils/Utils.hpp"
#include "./Block.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/data/Block.hpp"
namespace fabulous {
......@@ -92,7 +93,15 @@ public:
int len_Y = Y.get_nb_row();
assert( len_Y <= _nb_vect );
Block V = sub_block(0, 0, get_nb_row(), len_Y);
LapackKernI::gemm(V, Y, VY, alpha, beta);
int M = VY.get_nb_row();
int N = VY.get_nb_col();
int K = Y.get_nb_row();
lapacke::gemm(M, N, K,
V.get_ptr(), V.get_leading_dim(),
Y.get_ptr(), Y.get_leading_dim(),
VY.get_ptr(), VY.get_leading_dim(),
alpha, beta);
}
}; // end class Base
......
......@@ -34,6 +34,7 @@ template<class> class Block;
#include "fabulous/utils/Error.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/kernel/svd.hpp"
namespace fabulous {
......
......@@ -5,6 +5,7 @@ namespace fabulous {
template<class> class BlockWP;
};
#include "fabulous/kernel/blas.hpp"
#include "fabulous/data/Block.hpp"
namespace fabulous {
......@@ -77,14 +78,14 @@ public:
if (!_cursor)
return;
LapackKernI::Tgemm( // C := P^{H} * \widetilde{W}
lapacke::Tgemm( // C := P^{H} * \widetilde{W}
get_size_P(), get_size_W(), get_nb_row(),
get_P_ptr(), get_leading_dim(),
get_W_ptr(), get_leading_dim(),
C.get_ptr(), C.get_leading_dim(),
S{1.0}, S{0.0}
);
LapackKernI::gemm( // ~W = ~W - P_j*C
lapacke::gemm( // ~W = ~W - P_j*C
get_nb_row(), get_size_W(), get_size_P(),
get_P_ptr(), get_leading_dim(),
C.get_ptr(), C.get_leading_dim(),
......@@ -101,7 +102,7 @@ public:
void compute_V(const Block<S> &W1, Block<S> &V) const
{
int nb_kept_direction = W1.get_nb_col();
LapackKernI::gemm( // V_{j+1} := [P_{j-1},~W] * \W_1
lapacke::gemm( // V_{j+1} := [P_{j-1},~W] * \W_1
get_nb_row(), nb_kept_direction, get_nb_col(),
get_ptr(), get_leading_dim(),
W1.get_ptr(), W1.get_leading_dim(),
......@@ -118,7 +119,7 @@ public:
{
int nb_discarded_direction = W2.get_nb_col();
Block<S> tmp{get_nb_row(), nb_discarded_direction};
LapackKernI::gemm( // P_{j} := [P_{j-1},~W] * \W_1
lapacke::gemm( // P_{j} := [P_{j-1},~W] * \W_1
get_nb_row(), nb_discarded_direction, get_nb_col(),
get_ptr(), get_leading_dim(),
W2.get_ptr(), W2.get_leading_dim(),
......@@ -140,7 +141,7 @@ public:
return;
super tmp{get_size_P(), get_size_W()};
LapackKernI::Tgemm(
lapacke::Tgemm(
get_size_P(), get_size_W(), get_nb_row(),
get_P_ptr(), get_leading_dim(),
get_W_ptr(), get_leading_dim(),
......
......@@ -22,7 +22,7 @@ template<class S> class HessChamQR;
#include "fabulous/utils/Error.hpp"
#include "fabulous/utils/Chameleon.hpp"
#include "fabulous/kernel/ChameleonInterface.hpp"
#include "fabulous/kernel/chameleon.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/data/MorseDesc.hpp"
......@@ -35,7 +35,6 @@ inline std::string make_quad_key(int i, int j, int m, int n)
return ss.str();
}
/**
* \brief Hessenberg for Incremental QR version with chamelon back-end
*
......@@ -54,7 +53,7 @@ inline std::string make_quad_key(int i, int j, int m, int n)
*
* \note only the upper hessenberg block are allocated.
*/
template< class S >
template<class S>
class HessChamQR
{
private:
......@@ -173,7 +172,7 @@ private:
MORSE_desc_t *C = get_sub_hess(i, k, 2, 1);
MORSE_desc_t *T = _tau[i].get();
int err = ChameleonKernI::ormqr<S>(trans, A, T, C, _seq.get());
int err = chameleon::ormqr<S>(trans, A, T, C, _seq.get());
if (err != 0)
FABULOUS_FATAL_ERROR("return value of ormqr step "<<i<<" is "<<err);
}
......@@ -182,7 +181,7 @@ private:
DescPtr tau;
MORSE_desc_t *A = get_sub_hess(k, k, 2, 1);
int err = ChameleonKernI::geqrf<S>(A, tau, _seq.get());
int err = chameleon::geqrf<S>(A, tau, _seq.get());
if (err != 0)
FABULOUS_FATAL_ERROR("return values of geqrf (last block) is"<<err);
_tau.push_back(tau);
......@@ -191,7 +190,7 @@ private:
/* STEP 3: Apply Q^H generated at step 2 to last block of RHS */
MORSE_desc_t *C = get_sub_rhs(k, 0, 2, 1);
err = ChameleonKernI::ormqr<S>(trans, A, tau.get(), C, _seq.get());
err = chameleon::ormqr<S>(trans, A, tau.get(), C, _seq.get());
if (err != 0)
FABULOUS_FATAL_ERROR("return value of ormqrf (RHS) is "<<err);
......@@ -278,10 +277,10 @@ public:
MORSE_desc_t *YYd = yy.get();
// copy (least square RHS (Lambda)) to YYd:
ChameleonKernI::lacpy<S>(MorseUpperLower, Lambda, YYd, _seq.get());
chameleon::lacpy<S>(MorseUpperLower, Lambda, YYd, _seq.get());
// compute the solution
ChameleonKernI::trsm<S>(R, YYd, _seq.get());
chameleon::trsm<S>(R, YYd, _seq.get());
MORSE_Sequence_Wait(_seq.get());
MORSE_Tile_to_Lapack(YYd, _Y.get_ptr(), _Y.get_leading_dim());
......
......@@ -7,8 +7,7 @@ namespace fabulous {
template< class > class HessChamTLDR;
};
#include "fabulous/kernel/LapackInterface.hpp"
#include "fabulous/kernel/ChameleonInterfaceTopLevel.hpp"
#include "fabulous/kernel/chameleon-toplevel.hpp"
#include "fabulous/data/Block.hpp"
namespace fabulous {
......@@ -119,7 +118,7 @@ public:
H.copy(*this);
assert( _YY.get_nb_row() == M );
int err = ChameleonKernTopLevelI::gels(
int err = chameleon::toplevel::gels(
M, N, _nbRHS,
H.get_ptr(), H.get_leading_dim(),
_YY.get_ptr(), _YY.get_leading_dim()
......@@ -143,7 +142,7 @@ public:
assert(LS.get_nb_row() >= _R1.get_nb_row());
solve_least_square(Y);
LapackKernI::gemm(
lapacke::gemm(
M, _nbRHS, N,
get_ptr(), get_leading_dim(),
Y.get_ptr(), Y.get_leading_dim(),
......
......@@ -5,7 +5,8 @@ namespace fabulous {
template<class S> class HessDR;
};
#include "fabulous/kernel/LapackInterface.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/kernel/gels.hpp"
#include "fabulous/data/Block.hpp"
namespace fabulous {
......@@ -116,7 +117,7 @@ public:
H.copy(*this);
assert( _YY.get_nb_row() == M );
int err = LapackKernI::gels(
int err = lapacke::gels(
M, N, _nbRHS,
H.get_ptr(), H.get_leading_dim(),
_YY.get_ptr(), _YY.get_leading_dim()
......@@ -140,7 +141,7 @@ public:
assert(LS.get_nb_row() >= _R1.get_nb_row());
solve_least_square(Y);
LapackKernI::gemm(
lapacke::gemm(
M, _nbRHS, N,
get_ptr(), get_leading_dim(),
Y.get_ptr(), Y.get_leading_dim(),
......
......@@ -9,7 +9,8 @@ template<class> class HessIB;
};
#include "fabulous/data/BlockWP.hpp"
#include "fabulous/kernel/LapackInterface.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/kernel/gels.hpp"
namespace fabulous {
......@@ -123,7 +124,7 @@ public:
subphi.copy(*_phi);
// Compute newphi(nj+1:nj+p,:) := W1_W2^{H} * _phi(nj+1:nj+p,:)
LapackKernI::Tgemm(
lapacke::Tgemm(
_nbRHS, _nbRHS, _nbRHS,
W1_W2.get_ptr(), W1_W2.get_leading_dim(),
_phi->get_ptr(nj, 0), _phi->get_leading_dim(),
......@@ -145,7 +146,7 @@ public:
return;
}
LapackKernI::gemm( // RHS <- Phi * Lambda1
lapacke::gemm( // RHS <- Phi * Lambda1
_phi->get_nb_row(), _nbRHS, _nbRHS,
_phi->get_ptr(), _phi->get_leading_dim(),
_lambda1.get_ptr(), _lambda1.get_leading_dim(),
......@@ -224,7 +225,7 @@ public:
old_Hj.copy(Hj); // Save Hj in order to write directly inside F
LapackKernI::Tgemm(
lapacke::Tgemm(
_nbRHS, nj, _nbRHS,
W1_W2.get_ptr(), W1_W2.get_leading_dim(),
old_Hj.get_ptr(), old_Hj.get_leading_dim(),
......@@ -253,7 +254,7 @@ public:
Block<S> F{M, N}; // working copy of F (to be overwritten by kernel)
F.copy(this->get_F());
int err = LapackKernI::gels( // solve least square
int err = lapacke::gels( // solve least square
M, N, _nbRHS, // this function overwrites both F and Lambda
F.get_ptr(), F.get_leading_dim(),
_YY.get_ptr(), _YY.get_leading_dim()
......@@ -277,7 +278,7 @@ public:
int N = _nb_vect;
compute_Lambda(PR);// save \Lambda in PR in order to compute PR in the end
LapackKernI::gemm( // PR := F*Y - \Lambda
lapacke::gemm( // PR := F*Y - \Lambda
M, _nbRHS, N,
get_ptr(), get_leading_dim(),
_YY.get_ptr(), _YY.get_leading_dim(),
......
......@@ -9,7 +9,8 @@ template<class> class HessIBDR;
};
#include "fabulous/data/BlockWP.hpp"
#include "fabulous/kernel/LapackInterface.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/kernel/gels.hpp"
namespace fabulous {
......@@ -150,7 +151,7 @@ public:
subphi.copy(*_phi);
// Compute newphi(nj+1:nj+p,:) := W1_W2^{H} * _phi(nj+1:nj+p,:)
LapackKernI::Tgemm(
lapacke::Tgemm(
_nbRHS, _nbRHS, _nbRHS,
W1_W2.get_ptr(), W1_W2.get_leading_dim(),
_phi->get_ptr(_nb_vect, 0), _phi->get_leading_dim(),
......@@ -188,14 +189,14 @@ public:
// restart update method :: Lambda1: (k+p, p)
Block<S> L1sub = _lambda1.sub_block(0, 0, p1, p);
Lambda.copy(L1sub);
LapackKernI::gemm(
lapacke::gemm(
p1, p, p,
_phi->get_ptr(), _phi->get_leading_dim(),
_lambda1.get_ptr()+p1, _lambda1.get_leading_dim(),
Lambda.get_ptr(), Lambda.get_leading_dim(),
S{1.0}, S{1.0}
);
LapackKernI::gemm(
lapacke::gemm(
Lambda.get_nb_row()-p1, p, p,
_phi->get_ptr()+p1, _phi->get_leading_dim(),
_lambda1.get_ptr()+p1, _lambda1.get_leading_dim(),
......@@ -204,7 +205,7 @@ public:
);
} else {
// breakdown on R0 restart method :: Lambda1: (p, p)
LapackKernI::gemm( // RHS <- Phi * Lambda1
lapacke::gemm( // RHS <- Phi * Lambda1
Lambda.get_nb_row(), _nbRHS, _nbRHS,
_phi->get_ptr(), _phi->get_leading_dim(),
_lambda1.get_ptr(), _lambda1.get_leading_dim(),
......@@ -291,7 +292,7 @@ public:
old_Hj.copy(Hj); // Save Hj in order to write directly inside F
LapackKernI::Tgemm(
lapacke::Tgemm(
_nbRHS, nj, _nbRHS,
W1_W2.get_ptr(), W1_W2.get_leading_dim(),
old_Hj.get_ptr(), old_Hj.get_leading_dim(),
......@@ -320,7 +321,7 @@ public:
Block<S> F{M, N}; // working copy of F (to be overwritten by kernel)
F.copy(this->get_F());
int err = LapackKernI::gels( // solve least square
int err = lapacke::gels( // solve least square
M, N, _nbRHS, // this function overwrites both F and Lambda
F.get_ptr(), F.get_leading_dim(),
_YY.get_ptr(), _YY.get_leading_dim()
......@@ -344,7 +345,7 @@ public:
int N = _nb_vect;
compute_Lambda(PR);// save \Lambda in PR in order to compute PR in the end
LapackKernI::gemm( // PR := F*Y - \Lambda
lapacke::gemm( // PR := F*Y - \Lambda
M, _nbRHS, N,
get_ptr(), get_leading_dim(),
_YY.get_ptr(), _YY.get_leading_dim(),
......
......@@ -5,7 +5,8 @@ namespace fabulous {
template<class> class HessQR;
};
#include "fabulous/kernel/LapackInterface.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/kernel/qrf.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/utils/Arithmetic.hpp"
......@@ -66,7 +67,7 @@ private:
Block<S> C = this->macro_sub_block(i, k, 2, 1);
S *tau = _tau[i].data();
int err = LapackKernI::ormqr(
int err = lapacke::ormqr(
Trans, C.get_nb_row(), C.get_nb_col(), C.get_nb_col(),
A.get_ptr(), A.get_leading_dim(), tau,
C.get_ptr(), C.get_leading_dim()
......@@ -81,14 +82,14 @@ private:
FABULOUS_DEBUG("k="<<k);
Block<S> A = this->macro_sub_block(k, k, 2, 1);
int err = LapackKernI::geqrf( A.get_nb_row(), A.get_nb_col(),
int err = lapacke::geqrf( A.get_nb_row(), A.get_nb_col(),
A.get_ptr(), A.get_leading_dim(), tau);
if (err != 0)
FABULOUS_FATAL_ERROR("return values of geqrf (last block) is"<<err);
/* STEP 3: Apply Q^H generated at step 2 to last block of RHS */
Block<S> C = _Lambda.sub_block(k*_nbRHS, 0, 2*_nbRHS, _nbRHS);
err = LapackKernI::ormqr(
err = lapacke::ormqr(
Trans, C.get_nb_row(), C.get_nb_col(), C.get_nb_col(),
A.get_ptr(), A.get_leading_dim(), tau.data(),
C.get_ptr(), C.get_leading_dim()
......@@ -164,7 +165,7 @@ public:
FABULOUS_DEBUG("R.row = " << R.get_nb_row());
*/
LapackKernI::trsm(
lapacke::trsm(
R.get_nb_row(), Lambda.get_nb_col(),
R.get_ptr(), R.get_leading_dim(),
_Y.get_ptr(), _Y.get_leading_dim()
......
......@@ -5,7 +5,8 @@ namespace fabulous {
template<class> class HessQRDR;
};
#include "fabulous/kernel/LapackInterface.hpp"
#include "fabulous/kernel/blas.hpp"
#include "fabulous/kernel/qrf.hpp"
#include "fabulous/data/Block.hpp"
#include "fabulous/utils/Arithmetic.hpp"
......@@ -86,7 +87,7 @@ private:
Block<S> A = this->qr_sub_block(i, i);
Block<S> C = this->qr_sub_block(i, k);
S *tau = _tau[i].data();
int err = LapackKernI::ormqr(
int err = lapacke::ormqr(
Trans, C.get_nb_row(), C.get_nb_col(), A.get_nb_col(),
A.get_ptr(), A.get_leading_dim(), tau,
C.get_ptr(), C.get_leading_dim()
......@@ -99,14 +100,14 @@ private:
std::vector<S> &tau = _tau.back();
Block<S> A = this->qr_sub_block(k, k);
int err = LapackKernI::geqrf( A.get_nb_row(), A.get_nb_col(),
int err = lapacke::geqrf( A.get_nb_row(), A.get_nb_col(),
A.get_ptr(), A.get_leading_dim(), tau);
if (err != 0)
FABULOUS_FATAL_ERROR("return values of geqrf (last block) is"<<err);
/* STEP 3: Apply Q^H generated at step 2 to last block of RHS */
Block<S> C = rhs_sub_block(k);
err = LapackKernI::ormqr(
err = lapacke::ormqr(
Trans, C.get_nb_row(), C.get_nb_col(), A.get_nb_col(),
A.get_ptr(), A.get_leading_dim(), tau.data(),
C.get_ptr(), C.get_leading_dim()
......@@ -135,10 +136,10 @@ private:
}
Block<S> R{M, N};
LapackKernI::lacpy('U', M, N,
lapacke::lacpy('U', M, N,
get_ptr(), get_leading_dim(),
R.get_ptr(), R.get_leading_dim());
LapackKernI::ormqr('N', M, N, N,
lapacke::ormqr('N', M, N, N,
get_ptr(), get_leading_dim(), tau.data(),
R.get_ptr(), R.get_leading_dim() );
return R;
......@@ -205,7 +206,7 @@ public:
Block<S> Lambda = _Lambda.sub_block(0, 0, _nb_vect, _nbRHS);
Y.copy(Lambda);
LapackKernI::trsm(
lapacke::trsm(
R.get_nb_row(), Lambda.get_nb_col(),
R.get_ptr(), R.get_leading_dim(),
_Y.get_ptr(), _Y.get_leading_dim()
......@@ -226,7 +227,7 @@ public:
assert(PR.get_nb_col() == _Lambda1.get_nb_col());
assert(PR.get_nb_row() >= _Lambda1.get_nb_row());
LapackKernI::gemm(
lapacke::gemm(
M, _nbRHS, N,
_original.get_ptr(), _original.get_leading_dim(),
Y.get_ptr(), Y.get_leading_dim(),
......
#ifndef FABULOUS_LAPACK_INTERFACE_HPP
#define FABULOUS_LAPACK_INTERFACE_HPP
#include <complex>
#include <vector>
namespace fabulous {
template<class> class Block;
struct LapackKernI;
};
#include "fabulous/utils/Arithmetic.hpp"
#include "fabulous/utils/Error.hpp"
namespace fabulous {
/**
* \brief Generic (templatized) wrappers for Lapack kernels
*/
namespace lapacke
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
/**
* \brief Solve Least square problem
*
* solve the least square problem \f$ X = argmin(|| A*X - B ||) \f$
*
* \param m number of lines in A
* \param n number of column in A
* \param nrhs number of column in B
* \param[in,out] A the matrix. Overwritten with factorization data.
* \param lda leading dimension of A
* \param[in,out] B input: the right hand size <br/>
* output: the solution and the residual
* \param ldb leading dimension of B
*/
template< class S >
static int gels(int m, int n, int nrhs, S *A, int lda, S *B, int ldb)
{
FABULOUS_FATAL_ERROR("support for float, double (real and complex) only");
return -1;
}
/******************* BASIC QR FACTORIZATION ************************/
/**
* QR-Factorisation (full)
* \param m number of lines
* \param n number of col
* \param[in,out] A data to factorize
* \param lda leading dim of a
* \param[out] tau on output: extra factorization data (needed for ormqr and orgqr)
*/
template< class S >
static int geqrf(int m, int n, S *A, int lda, std::vector<S> &tau)
{
FABULOUS_FATAL_ERROR("support for float, double (real and complex) only");
return 1;
}
/**
* \brief Generation of Orthogonal part after QR
*
* ref: https://software.intel.com/en-us/node/521010
*
* \param m number of lines in A
* \param n number of columns in Q
* \param p number of householder reflectors
* \param[in,out] A the matrix with factorization data from GEQRF
* \param lda leading dim of matrix A
* \param[in] tau extra factorization data as returned by geqrf
*/
template< class S >
static int orgqr(int m, int n, int p, S *A, int lda, S *tau)
{
FABULOUS_FATAL_ERROR("support for float, double (real and complex) only");
return -1;
}
/**
* \brief Apply Q to a matrix C where Q is the orthogonal part
* obtained from Lapack GEQRF call
*
* \param trans char Can be 'N' or 'T', to know if we need Q^{H}*C or Q*C.
* \param m Integer number of rows in C
* \param n Integer number of columns in C
* \param k Integer Number of elementary reflectors in Q
* \param A array returned by geqrf
* \param lda leading dim of a
* \param tau array returned by geqrf
* \param C the MxN matrix to be multiplied
* \param ldc leading dimension of C
*
* Ref: https://software.intel.com/en-us/node/521011
*
* lapack_int LAPACKE_sormqr (int matrix_layout, char side, char
* trans, lapack_int m, lapack_int n, lapack_int k, const float* a,
* lapack_int lda, const float* tau, float* c, lapack_int ldc);
*/
template< class S >
static int ormqr(char trans, int m, int n, int k,
const S *A, int lda, const S *tau, S *C, int ldc)
{
FABULOUS_FATAL_ERROR("support for float, double (real and complex) only");
return -1;
}
/****** TILED TRIANGULAR over PENTAGONAL QR FACTORIZATION (for QR+IB(+DR)) *********/
/**
* QR-Factorisation (full)
* \param m number of lines
* \param n number of col
* \param[in,out] A data to factorize
* \param lda leading dim of a
* \param[out] tau on output: extra factorization data (needed for ormqr and orgqr)
*/
template< class S >
static int tpqrt(int m, int n, int l, int nb, S *A, int lda, S *B, int ldb, Block<S> &T)
{
FABULOUS_FATAL_ERROR("support for float, double (real and complex) only"