Commit 77b3a35a authored by MIJIEUX Thomas's avatar MIJIEUX Thomas

fix missing copy for LapackKernI in (or/un)gqr call

parent 4f3d938c
......@@ -65,23 +65,29 @@ public:
{
//Create temporary tau
typename KI:: template TauHolder<Scalar> tau;
int ld = Parent::getLeadingDim();
//3 steps::
//1 - Call qr facto on W bloc
int geqrf = KI::geqrf(Parent::getLeadingDim(),getSizeW(),getW(),
Parent::getLeadingDim(), tau);
int geqrf = KI::geqrf(ld,getSizeW(),getW(),
ld, tau);
if (geqrf != 0)
std::cout<<"Problem on QR facto\n";
//2 - get the R part and write it in input
for (int i=0 ; i<getSizeW() ; ++i) {
memcpy(&toWriteR[i*ldR],&getW()[i*Parent::getLeadingDim()],
memcpy(&toWriteR[i*ldR],&getW()[i*ld],
(i+1)*sizeof(Scalar));
}
//3 - Call orgqr on W block
int orgqr = KI::orgqr(Parent::getLeadingDim(),getSizeW(),
getW(),Parent::getLeadingDim(),tau,
(Scalar*)nullptr, 0);
Block<Scalar,Primary,KI> tmp{getSizeW(), ld};
int orgqr = KI::orgqr(ld, getSizeW(),
getW(), ld, tau,
tmp.getPtr(), tmp.getLeadingDim());
for (int i = 0; i < getSizeW(); ++i)
memcpy(getW()+i*ld, tmp.getPtr(i), ld*sizeof(Scalar));
//check
if (orgqr != 0) {
std::cout<<"Problem in BlockWP::QRofW\n";
......
......@@ -10,9 +10,8 @@
#error "Never include this header directly; Use KernelInterface.hpp instead"
#endif
#define ChameleonKernI LapackKernI
// #define ChameleonKernI LapackKernI
//struct ChameleonKernI // : implements KernelInterface "Concept"
struct ChameleonKernI // : implements KernelInterface "Concept"
{
template<class T>
......
......@@ -32,7 +32,7 @@ struct Trans<double>{
};
#define IBBGMRESDR_KERNELINTERFACE_HPP_INSIDE
//#include "LapackInterface.hpp"
#include "LapackInterface.hpp"
#include "ChameleonInterface.hpp"
#undef IBBGMRESDR_KERNELINTERFACE_HPP_INSIDE
......
......@@ -101,8 +101,10 @@ struct LapackKernI // : implements KernelInterface "Concept"
* doc : https://software.intel.com/en-us/node/521010
*/
template<typename Scalar>
static int orgqr(int /*m*/, int /*p*/, Scalar * /*mat*/,int /*lda*/,
LapackKernI::TauHolder<Scalar>& /*tau*/,bool/*full*/=false)
static int orgqr(int /*m*/, int /*p*/, Scalar * /*A*/,int /*lda*/,
LapackKernI::TauHolder<Scalar>& /*tau*/,
Scalar* /*Q*/, int /*ldq*/,
bool/*full*/=false)
{
std::cout<<"Only Float or Double supported (orgqr)"<< std::endl;
exit(0);
......@@ -434,52 +436,67 @@ int LapackKernI::geqrf<std::complex<double> >(
}
template<>
int LapackKernI::orgqr(int m, int p,float * mat, int ldMat,
int LapackKernI::orgqr(int m, int p, float *A, int lda,
LapackKernI::TauHolder<float> &tau,
float *Q, int ldq,
bool full)
{
for (int i = 0; i < p; ++i)
memcpy(Q+i*ldq, A+i*lda, m*sizeof(float));
if (full)
return LAPACKE_sorgqr(LAPACK_COL_MAJOR, m, m, p, mat, ldMat, tau.ptr());
return LAPACKE_sorgqr(LAPACK_COL_MAJOR, m, m, p, Q, ldq, tau.ptr());
else
return LAPACKE_sorgqr(LAPACK_COL_MAJOR, m, p, p, mat, ldMat, tau.ptr());
return LAPACKE_sorgqr(LAPACK_COL_MAJOR, m, p, p, Q, ldq, tau.ptr());
}
template<>
int LapackKernI::orgqr(int m, int p,
double *mat, int ldMat,
double *A, int lda,
LapackKernI::TauHolder<double> &tau,
double *Q, int ldq,
bool full)
{
if (full) {
return LAPACKE_dorgqr(LAPACK_COL_MAJOR, m, m, p, mat, ldMat, tau.ptr());
} else {
return LAPACKE_dorgqr(LAPACK_COL_MAJOR, m, p, p, mat, ldMat, tau.ptr());
}
for (int i = 0; i < p; ++i)
memcpy(Q+i*ldq, A+i*lda, m*sizeof(float));
if (full)
return LAPACKE_dorgqr(LAPACK_COL_MAJOR, m, m, p, Q, ldq, tau.ptr());
else
return LAPACKE_dorgqr(LAPACK_COL_MAJOR, m, p, p, Q, ldq, tau.ptr());
}
//https://software.intel.com/en-us/node/521012#FCFCCE90-1C8F-4879-824A-7EBA7C119AE8
template<>
int LapackKernI::orgqr(int m, int p,
std::complex<float> * mat, int ldMat,
LapackKernI::TauHolder<std::complex<float>> &tau, bool full)
std::complex<float> *A, int lda,
LapackKernI::TauHolder<std::complex<float>> &tau,
std::complex<float> *Q, int ldq,
bool full)
{
if (full) {
return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, m, p, mat, ldMat, tau.ptr());
} else {
return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, p, p, mat, ldMat, tau.ptr());
}
for (int i = 0; i < p; ++i)
memcpy(Q+i*ldq, A+i*lda, m*sizeof(float));
if (full)
return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, m, p, Q, ldq, tau.ptr());
else
return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, p, p, Q, ldq, tau.ptr());
}
template<>
int LapackKernI::orgqr(int m, int p,
std::complex<double> * mat, int ldMat,
LapackKernI::TauHolder<std::complex<double>> &tau, bool full)
std::complex<double> *A, int lda,
LapackKernI::TauHolder<std::complex<double>> &tau,
std::complex<double> *Q, int ldq,
bool full)
{
if (full) {
return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, m, p, mat, ldMat, tau.ptr());
} else {
return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, p, p, mat, ldMat, tau.ptr());
}
for (int i = 0; i < p; ++i)
memcpy(Q+i*ldq, A+i*lda, m*sizeof(float));
if (full)
return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, m, p, Q, ldq, tau.ptr());
else
return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, p, p, Q, ldq, tau.ptr());
}
template<> //specialization on double
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment