Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 6abda28b authored by hhakim's avatar hhakim
Browse files

Optimize TransformHelperPoly<FPP>::multiply(MatDense) avoiding 3 copies per...

Optimize TransformHelperPoly<FPP>::multiply(MatDense) avoiding 3 copies per iteration/column (using Eigen Map -- views)
parent c607c0c4
No related branches found
No related tags found
No related merge requests found
......@@ -30,6 +30,7 @@ namespace Faust
Vect<FPP,Cpu> multiply(const FPP* x, const bool transpose=false, const bool conjugate=false);
void multiply(const FPP* x, FPP* y, const bool transpose=false, const bool conjugate=false);
MatDense<FPP, Cpu> multiply(const MatDense<FPP,Cpu> &X, const bool transpose=false, const bool conjugate=false);
void multiply(const FPP* X, int n, FPP* out, const bool transpose=false, const bool conjugate=false);
TransformHelper<FPP, Cpu>* next(int K);
TransformHelper<FPP, Cpu>* next();
Vect<FPP, Cpu> poly(MatDense<FPP,Cpu> & basisX, Vect<FPP, Cpu> coeffs);
......
......@@ -100,16 +100,27 @@ namespace Faust
int d = L->getNbRow();
int K = this->size()-1;
int n = X.getNbCol();
MatDense<FPP,Cpu> V0_ord(d*(K+1), n);
MatDense<FPP,Cpu> Y(d*(K+1), n);
multiply(X.getData(), n, Y.getData(), transpose, conjugate);
return Y;
}
template<typename FPP>
void TransformHelperPoly<FPP>::multiply(const FPP* X, int n, FPP* Y, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{
int d = L->getNbRow();
int K = this->size()-1;
auto scale = (K+1)*d;
#pragma omp parallel for
for(int i=0;i<n;i++)
{
Vect<FPP,Cpu> x(d, X.getData()+i*d);
auto y = multiply(x);
memcpy(V0_ord.getData()+scale*i, y.getData(), sizeof(FPP)*scale);
// Vect<FPP,Cpu> x(d, X.getData()+i*d);
// auto y = multiply(x);
// memcpy(V0_ord.getData()+scale*i, y.getData(), sizeof(FPP)*scale);
Eigen::Map<Eigen::Matrix<FPP, Eigen::Dynamic, 1>> x_vec(const_cast<FPP*>(X+i*d), d);
Eigen::Map<Eigen::Matrix<FPP, Eigen::Dynamic, 1>> y_vec(const_cast<FPP*>(Y+i*scale), scale);
multiply(x_vec.data(), y_vec.data(), transpose, conjugate);
}
return V0_ord;
}
template<typename FPP>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment