Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 8d5269a8 authored by hhakim's avatar hhakim
Browse files

Optimize C++ Faust*Dense-matrix especially for pyfaust.

Removing many useless copies. For instance, the new multiply function works directly in numpy allocated memory instead of copying back and forth from/to Faust::MatDense.
In that purpose it uses Eigen Map (views).

 hinria  …  build  wrapper  python  python3 ./test_mul_matdense.py
time F@M: 72.12258507497609
 hinria  …  build  wrapper  python  python3 ./test_mul_matdense.py
time F@M: 55.56243183999322

Taking into account that there is several methods implemented to multiply a Faust by a vector or by a matrix. For now, this optimization is available only for the FaustMulMode::DEFAULT method (see issue #188).
parent 6abda28b
No related branches found
No related tags found
No related merge requests found
......@@ -135,6 +135,7 @@ namespace Faust
friend class MatSparse<FPP,Cpu>;
friend TransformHelper<FPP,Cpu>; // TODO: limit to needed member functions only
friend Transform<FPP,Cpu>; //TODO: limit to needed member functions only (multiply)
friend void MatDiag<FPP>::multiply(MatDense<FPP,Cpu> & M, char opThis) const;
/// All derived class template of MatDense are considered as friends
......
......@@ -117,6 +117,7 @@ namespace Faust
friend GivensFGFTParallel<FPP,Cpu, float>;
friend GivensFGFTComplex<FPP,Cpu, double>;
friend GivensFGFTComplex<FPP,Cpu, float>;
friend Transform<FPP,Cpu>; //TODO: limit to needed member functions only (multiply)
friend TransformHelper<FPP,Cpu>; // TODO: limit to needed member functions only
friend TransformHelperPoly<FPP>; // TODO: limit to needed member functions only
friend void wht_factors<>(unsigned int n, std::vector<MatGeneric<FPP,Cpu>*>& factors, const bool, const bool);
......
......@@ -166,6 +166,8 @@ namespace Faust
const bool transpose=false) const;
faust_unsigned_int getNbRow() const;
faust_unsigned_int getNbCol() const;
int max_nrows() const;
int max_ncols() const;
void print_file(const char* filename) const;
void print_data_ptrs() const
{
......@@ -285,6 +287,18 @@ namespace Faust
*/
MatDense<FPP,Cpu> multiply(const MatDense<FPP,Cpu> &A,const char opThis='N') const;
/**
* \brief Multiplies this by A into C.
*
* This function uses Eigen Maps to optimize the computation (avoiding internal copies as it is with MatDense).
*
* \param A properly allocated memory area of size getNbCol()*A_ncols.
* \param C properly allocated memory area of size getNbRow()*A_ncols.
* \param opThis the same as multiply(MatDense<FPP,Cpu>, const char)
*
*/
void multiply(const FPP* A, int A_ncols, FPP* C, const char opThis='N');
MatSparse<FPP,Cpu> multiply(const MatSparse<FPP,Cpu> A,const char opThis='N') const;
......
......@@ -1165,6 +1165,106 @@ Faust::MatSparse<FPP,Cpu> Faust::Transform<FPP,Cpu>::multiply(const Faust::MatSp
return mat;
}
template<typename FPP>
int Faust::Transform<FPP,Cpu>::max_nrows() const
{
int max_nrows = this->getNbRow();
for(int i=1;i<this->size();i++)
{
auto i_nrows = data[i]->getNbRow();
max_nrows = max_nrows>i_nrows?max_nrows:i_nrows;
}
return max_nrows;
}
template<typename FPP>
int Faust::Transform<FPP,Cpu>::max_ncols() const
{
int max_ncols = this->getNbCol();
for(int i=1;i<this->size();i++)
{
auto i_ncols = data[i]->getNbRow();
max_ncols = max_ncols>i_ncols?max_ncols:i_ncols;
}
return max_ncols;
}
template<typename FPP>
void Faust::Transform<FPP,Cpu>::multiply(const FPP* A, int A_ncols, FPP* C, const char opThis/*='N'*/)
{
auto lhs_ncols = A_ncols;
int lhs_nrows, out_nrows;
int i, fac_i;
int max_nrows;
FPP *tmp_buf1, *tmp_buf2;
auto lhs_buf = A;
FPP* out_buf = nullptr;
MatSparse<FPP, Cpu>* sp_mat;
MatDense<FPP, Cpu>* ds_mat;
std::function<int(int)> get_fac_i, calc_out_nrows;
using SparseMat = Eigen::SparseMatrix<FPP,Eigen::RowMajor>;
using DenseMat = Eigen::Matrix<FPP, Eigen::Dynamic, Eigen::Dynamic>;
using DenseMatMap = Eigen::Map<Eigen::Matrix<FPP, Eigen::Dynamic, Eigen::Dynamic>>;
std::function<void(SparseMat&, DenseMatMap&, DenseMatMap&)> mul_sp_mat;
std::function<void(DenseMat&, DenseMatMap&, DenseMatMap&)> mul_ds_mat;
std::function<DenseMat(DenseMat&)> op_dsmat;
std::function<SparseMat(SparseMat&)> op_spmat;
if(opThis == 'N')
{
max_nrows = this->max_nrows();
lhs_nrows = this->getNbCol();
get_fac_i = [this](int i) {return this->size()-1-i;};
calc_out_nrows = [this](int i) { return this->data[i]->getNbRow();};
op_spmat = [](SparseMat& sp_mat) {return sp_mat;};
op_dsmat = [](DenseMat& ds_mat) {return ds_mat;};
}
else
{
max_nrows = this->max_ncols();
lhs_nrows = this->getNbRow();
get_fac_i = [this](int i) {return i;};
calc_out_nrows = [this](int i) { return this->data[i]->getNbCol();};
if(opThis == 'T')
{
op_spmat = [](SparseMat& sp_mat) {return sp_mat.transpose();};
op_dsmat = [](DenseMat& ds_mat) {return ds_mat.transpose();};
}
else if(opThis == 'H')
{
op_spmat = [](SparseMat& sp_mat) {return sp_mat.adjoint();};
op_dsmat = [](DenseMat& ds_mat) {return ds_mat.adjoint();};
}
else
throw std::runtime_error("Unknown opThis");
}
mul_ds_mat = [&op_dsmat] (DenseMat& ds_mat, DenseMatMap& lhs_mat, DenseMatMap& out_mat) {out_mat.noalias() = op_dsmat(ds_mat) * lhs_mat;};
mul_sp_mat = [&op_spmat](SparseMat& sp_mat, DenseMatMap& lhs_mat, DenseMatMap& out_mat) { out_mat.noalias() = op_spmat(sp_mat) * lhs_mat;};
tmp_buf1 = new FPP[max_nrows*A_ncols];
tmp_buf2 = new FPP[max_nrows*A_ncols];
for(i=0; i < this->size(); i++)
{
if(i == size()-1)
out_buf = C;
else
out_buf = i&1?tmp_buf1:tmp_buf2;
fac_i = get_fac_i(i);
out_nrows = calc_out_nrows(fac_i);
Eigen::Map<Eigen::Matrix<FPP, Eigen::Dynamic, Eigen::Dynamic>> out_mat(const_cast<FPP*>(out_buf), out_nrows, lhs_ncols);
Eigen::Map<Eigen::Matrix<FPP, Eigen::Dynamic, Eigen::Dynamic>> lhs_mat(const_cast<FPP*>(lhs_buf), lhs_nrows, lhs_ncols);
if(sp_mat = dynamic_cast<MatSparse<FPP, Cpu>*>(data[fac_i]))
mul_sp_mat(sp_mat->mat, lhs_mat, out_mat);
else if(ds_mat = dynamic_cast<MatDense<FPP, Cpu>*>(data[fac_i]))
mul_ds_mat(ds_mat->mat, lhs_mat, out_mat);
else
throw std::runtime_error(std::string("Unknown matrix at index: ")+std::to_string(fac_i));
lhs_buf = out_buf;
// lhs_ncols never changes
lhs_nrows = out_mat.rows();
}
delete [] tmp_buf1;
delete [] tmp_buf2;
}
template<typename FPP>
Faust::MatDense<FPP,Cpu> Faust::Transform<FPP,Cpu>::multiply(const MatDense<FPP,Cpu> &A, const char opThis) const
{
......
......@@ -103,6 +103,8 @@ namespace Faust
virtual Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> &x, const bool transpose=false, const bool conjugate=false);
virtual Vect<FPP,Cpu> multiply(const FPP* x, const bool transpose=false, const bool conjugate=false);
virtual void multiply(const FPP* x, FPP* y, const bool transpose=false, const bool conjugate=false);
// \brief multiply this by A (of size: this->getNbCol()*A_ncols) into C (buffers must be properly allocated from the callee).
virtual void multiply(const FPP* A, int A_ncols, FPP* C, const bool transpose=false, const bool conjugate=false);
// MatDense<FPP,Cpu> multiply(const MatDense<FPP,Cpu> A) const;
virtual MatDense<FPP, Cpu> multiply(const MatDense<FPP,Cpu> &A, const bool transpose=false, const bool conjugate=false);
void update_total_nnz();
......
......@@ -213,6 +213,7 @@ namespace Faust {
template<typename FPP>
void TransformHelper<FPP,Cpu>::multiply(const FPP *x, FPP* y, const bool transpose, const bool conjugate)
{
//TODO: move to Faust::Transform ?
int x_size;
// assuming that x size is valid, infer it from this size
if(this->is_transposed && transpose || ! this->is_transposed && ! transpose)
......@@ -298,6 +299,16 @@ namespace Faust {
return M;
}
template<typename FPP>
void TransformHelper<FPP,Cpu>::multiply(const FPP* A, int A_ncols, FPP* C, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{
this->is_transposed ^= transpose;
this->is_conjugate ^= conjugate;
this->transform->multiply(A, A_ncols, C, this->isTransposed2char());
this->is_conjugate ^= conjugate;
this->is_transposed ^= transpose;
}
template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize(const bool transp /* deft to false */)
{
......
......@@ -46,6 +46,7 @@
#include <iostream>
#include <exception>
#include <vector>
#include "faust_prod_opt.h"
template<typename FPP, FDevice DEV>
......@@ -172,23 +173,39 @@ void FaustCoreCpp<FPP,DEV>::multiply(FPP* value_y,int nbrow_y,int nbcol_y, const
}
if (nbcol_x == 1)
{
// Faust::Vect<FPP,Cpu> X(nbrow_x,value_x);
// Faust::Vect<FPP,Cpu> Y;
// Y = this->transform->multiply(X);
// Y = this->transform->multiply(value_x);
// memcpy(value_y,Y.getData(),sizeof(FPP)*nbrow_y);
// assuming that x and y are pointers to memory allocated to the proper
// sizes
this->transform->multiply(value_x, value_y);
}else
if(this->transform->get_Fv_mul_mode() == Faust::DEFAULT)
{
// assuming that x and y are pointers to memory allocated to the proper
// sizes
this->transform->multiply(value_x, value_y);
}
else
{ // for other ways to multiply, it is not handled yet
Faust::Vect<FPP,Cpu> X(nbrow_x,value_x);
Faust::Vect<FPP,Cpu> Y;
Y = this->transform->multiply(X);
Y = this->transform->multiply(value_x);
memcpy(value_y,Y.getData(),sizeof(FPP)*nbrow_y);
}
}
else
{
Faust::MatDense<FPP,Cpu> X(value_x,nbrow_x,nbcol_x);
Faust::MatDense<FPP,Cpu> Y;
Y = this->transform->multiply(X);
memcpy(value_y,Y.getData(),sizeof(FPP)*nbrow_y*nbcol_y);
if(this->transform->get_mul_order_opt_mode() == Faust::DEFAULT)
{
//assuming that value_x and value_y are allocated properly (to the good
//sizes) in numpy world
this->transform->multiply(value_x, nbcol_x, value_y);
}
else
{ // for other ways to multiply, it is not handled yet
Faust::MatDense<FPP,Cpu> X(value_x,nbrow_x,nbcol_x);
Faust::MatDense<FPP,Cpu> Y;
Y = this->transform->multiply(X);
memcpy(value_y,Y.getData(),sizeof(FPP)*nbrow_y*nbcol_y);
}
}
}
template<typename FPP, FDevice DEV>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment