Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 89024747 authored by hhakim's avatar hhakim
Browse files

Update Faust::TransformHelperPoly.

- Optimization of TransformHelperPoly::multiply(Vect): no realloc() calls, directly allocate the final vector avoiding the whole buffer copy.
- Implementation of TransformHelperPoly::multiply(Matdense)
- Optimization of TransformHelperPoly::basisChebyshev: building factors in parallel with OpenMP.
- Multiple impls of TransformHelperPoly::poly to compute linear combination of polynomials.
parent c85f2569
No related branches found
No related tags found
No related merge requests found
...@@ -102,7 +102,7 @@ namespace Faust ...@@ -102,7 +102,7 @@ namespace Faust
void copy_mul_mode_state(const TransformHelper<FPP,Cpu>& th); void copy_mul_mode_state(const TransformHelper<FPP,Cpu>& th);
virtual Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> x, const bool transpose=false, const bool conjugate=false); virtual Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> x, const bool transpose=false, const bool conjugate=false);
// MatDense<FPP,Cpu> multiply(const MatDense<FPP,Cpu> A) const; // MatDense<FPP,Cpu> multiply(const MatDense<FPP,Cpu> A) const;
MatDense<FPP, Cpu> multiply(const MatDense<FPP,Cpu> A, const bool transpose=false, const bool conjugate=false); virtual MatDense<FPP, Cpu> multiply(const MatDense<FPP,Cpu> A, const bool transpose=false, const bool conjugate=false);
void update_total_nnz(); void update_total_nnz();
void set_FM_mul_mode(const int mul_order_opt_mode, const bool silent=false); void set_FM_mul_mode(const int mul_order_opt_mode, const bool silent=false);
void set_Fv_mul_mode(const int mode); void set_Fv_mul_mode(const int mode);
......
...@@ -20,11 +20,19 @@ namespace Faust ...@@ -20,11 +20,19 @@ namespace Faust
const bool internal_call) : TransformHelper<FPP,Cpu>(facts, lambda_, optimizedCopy, cloning_fact, internal_call) {} const bool internal_call) : TransformHelper<FPP,Cpu>(facts, lambda_, optimizedCopy, cloning_fact, internal_call) {}
Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> x, const bool transpose=false, const bool conjugate=false); Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> x, const bool transpose=false, const bool conjugate=false);
// MatDense<FPP, Cpu> multiply(const MatDense<FPP,Cpu> A, const bool transpose=false, const bool conjugate=false); MatDense<FPP, Cpu> multiply(MatDense<FPP,Cpu> X, const bool transpose=false, const bool conjugate=false);
Vect<FPP, Cpu> poly(MatDense<FPP,Cpu> & basisX, Vect<FPP, Cpu> coeffs);
MatDense<FPP, Cpu> poly(int n, MatDense<FPP,Cpu> & basisX, Vect<FPP, Cpu> coeffs);
void poly(int d, int n, const FPP* basisX, const FPP* coeffs, FPP* out);
TransformHelper<FPP, Cpu>* polyFaust(const FPP* coeffs);
}; };
template<typename FPP> template<typename FPP>
TransformHelper<FPP, Cpu>* basisChebyshev(MatSparse<FPP,Cpu>* L, int32_t K); TransformHelper<FPP, Cpu>* basisChebyshev(MatSparse<FPP,Cpu>* L, int32_t K);
template<typename FPP>
void poly(int d, int K, int n, const FPP* basisX, const FPP* coeffs, FPP* out);
} }
#include "faust_TransformHelperPoly.hpp" #include "faust_TransformHelperPoly.hpp"
#endif #endif
......
...@@ -6,38 +6,161 @@ namespace Faust ...@@ -6,38 +6,161 @@ namespace Faust
template<typename FPP> template<typename FPP>
Vect<FPP,Cpu> TransformHelperPoly<FPP>::multiply(const Vect<FPP,Cpu> x, const bool transpose/*=false*/, const bool conjugate/*=false*/) Vect<FPP,Cpu> TransformHelperPoly<FPP>::multiply(const Vect<FPP,Cpu> x, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{ {
// std::cout << "TransformHelperPoly<FPP>::multiply(Vect)" << std::endl;
/**
* Recurrence relation (k=1 to K):
* v_{0,k} := [v_{0,k-1} ; v_{1, k-1} ] // concatenation
* v_{1,k} := v_{2,k-1}
* v_{2,k} := 2Lv_{2,k-1} - v_{1,k-1}
*
* First terms:
* v_{0,1} = []
* v_{1,1} = x
* v_{2,1} = Lx
*
* Fx = [v_{0, K} ; v_{1, K} ; v_{2, K}]
*/
int d = L.getNbRow(); int d = L.getNbRow();
Vect<FPP,Cpu> v1(d), v2, tmp; Vect<FPP,Cpu> v1(d), v2, tmp;
FPP *v0_buf = nullptr;
int K = this->size(); int K = this->size();
Vect<FPP, Cpu> v0(d*(K+1));
// seeing how basisChebyshev method is instantiating this class // seeing how basisChebyshev method is instantiating this class
// K can't be strictly lower than two // K can't be strictly lower than two
memcpy(v1.getData(), x.getData(), sizeof(FPP)*d); memcpy(v1.getData(), x.getData(), sizeof(FPP)*d);
v2 = L*x; v2 = L*x;
for(int i=2;i<=K;i++) for(int i=2;i<=K;i++)
{ {
v0_buf = (FPP*) realloc(v0_buf, sizeof(FPP)*d*(i-1)); memcpy(v0.getData()+d*(i-2), v1.getData(), sizeof(FPP)*d);
memcpy(v0_buf+d*(i-2), v1.getData(), sizeof(FPP)*d);
tmp = v1; tmp = v1;
v1 = v2; v1 = v2;
// gemv(L, v2, tmp, FPP(1), FPP(-1)); // gemv(L, v2, tmp, FPP(1), FPP(-1));
v2 = twoL*v2; v2 = twoL*v2;
v2 -= tmp; v2 -= tmp;
} }
Vect<FPP, Cpu> v(d*(K+1)); memcpy(v0.getData()+d*(K-1), v1.getData(), sizeof(FPP)*d);
// K >= 2 so v0_buf is not nullptr memcpy(v0.getData()+d*K, v2.getData(), sizeof(FPP)*d);
memcpy(v.getData(), v0_buf, sizeof(FPP)*d*(K-1)); return v0;
memcpy(v.getData()+d*(K-1), v1.getData(), sizeof(FPP)*d);
memcpy(v.getData()+d*K, v2.getData(), sizeof(FPP)*d);
free(v0_buf);
return v;
} }
// Slower method but kept commented here until further tests
// template<typename FPP>
// MatDense<FPP, Cpu> TransformHelperPoly<FPP>::multiply(MatDense<FPP,Cpu> X, const bool transpose/*=false*/, const bool conjugate/*=false*/)
// {
//// std::cout << "TransformHelperPoly<FPP>::multiply(MatDense)" << std::endl;
// int d = L.getNbRow();
// int n = X.getNbCol();
// int K = this->size();
// MatDense<FPP, Cpu> V0(d*(K+1), n);
// MatDense<FPP,Cpu> V1(d, n), V2 = X, tmp;
// memcpy(V1.getData(), X.getData(), sizeof(FPP)*d*n);
// L.multiply(V2, 'N');
// for(int i=2;i<=K;i++)
// {
// memcpy(V0.getData()+d*n*(i-2), V1.getData(), sizeof(FPP)*d*n);
// tmp = V1;
// V1 = V2;
// twoL.multiply(V2, 'N');
// V2 -= tmp;
//// gemm(twoL, V2, tmp, (FPP)1.0, (FPP)-1.0, 'N', 'N');
//// V2 = tmp;
// }
// memcpy(V0.getData()+d*n*(K-1), V1.getData(), sizeof(FPP)*d*n);
// memcpy(V0.getData()+d*n*K, V2.getData(), sizeof(FPP)*d*n);
// // put bocks in proper order
// MatDense<FPP,Cpu> V0_ord(d*(K+1), n);
// #pragma omp parallel for
// for(int i=0;i<(K+1);i++)
// {
// auto i_offset = i*n*d;
// for(int j=0;j<n;j++)
// memcpy(V0_ord.getData()+(K+1)*d*j+d*i, V0.getData()+j*d+i_offset, sizeof(FPP)*d);
// }
// return V0_ord;
// }
template<typename FPP>
MatDense<FPP, Cpu> TransformHelperPoly<FPP>::multiply(MatDense<FPP,Cpu> X, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{
// std::cout << "TransformHelperPoly<FPP>::multiply(MatDense)" << std::endl;
int d = L.getNbRow();
int K = this->size();
int n = X.getNbCol();
MatDense<FPP,Cpu> V0_ord(d*(K+1), n);
auto scale = (K+1)*d;
#pragma omp parallel for
for(int i=0;i<n;i++)
{
Vect<FPP,Cpu> x(d, X.getData()+i*d);
auto y = multiply(x);
memcpy(V0_ord.getData()+scale*i, y.getData(), sizeof(FPP)*scale);
}
return V0_ord;
}
template<typename FPP>
Vect<FPP, Cpu> TransformHelperPoly<FPP>::poly(MatDense<FPP,Cpu> & basisX, Vect<FPP, Cpu> coeffs)
{
coeffs.multiplyLeft(basisX);
return coeffs;
}
template<typename FPP>
MatDense<FPP, Cpu> TransformHelperPoly<FPP>::poly(int n, MatDense<FPP,Cpu> & basisX, Vect<FPP, Cpu> coeffs)
{
int d = L.getNbRow();
MatDense<FPP,Cpu> Y(d, n);
this->poly(d, n, basisX.getData(), coeffs.getData(), Y.getData());
return Y;
}
template<typename FPP>
void TransformHelperPoly<FPP>::poly(int d, int n, const FPP* basisX, const FPP* coeffs, FPP* out)
{
int K = this->size();
Faust::poly(d, K, n, basisX, coeffs, out);
}
template<typename FPP>
TransformHelper<FPP, Cpu>* TransformHelperPoly<FPP>::polyFaust(const FPP* coeffs)
{
// Id = sp.eye(L.shape[1], format="csr")
// scoeffs = sp.hstack(tuple(Id*coeffs[i] for i in range(0, K+1)),
// format="csr")
MatSparse<FPP,Cpu> Id, Id1;
MatSparse<FPP,Cpu> coeffDiags;
auto d = L.getNbRow();
int K = this->size();
Id.resize(d, d, d);
Id.setEyes();
auto Id0 = Id;
Id0 *= coeffs[0];
// K >= 2
for(int i=1;i < K+1; i++)
{
Id1 = Id;
Id1 *= coeffs[i];
MatSparse<FPP,Cpu> coeffDiags;
coeffDiags.hstack(Id0, Id1);
Id0 = coeffDiags;
}
std::vector<MatGeneric<FPP,Cpu>*> facts(K+1);
facts.resize(K+1);
facts[0] = new MatSparse<FPP,Cpu>(Id0);
for(int i=1;i<=K;i++)
{
facts[i] = this->get_gen_fact_nonconst(i-1);
}
return new TransformHelper<FPP,Cpu>(facts, (FPP) 1.0,
/* optimizedCopy */ false,
/* cloning_fact */ false,
/* internal_call */ true);
}
template<typename FPP> template<typename FPP>
TransformHelper<FPP,Cpu>* basisChebyshev(MatSparse<FPP,Cpu>* L, int32_t K) TransformHelper<FPP,Cpu>* basisChebyshev(MatSparse<FPP,Cpu>* L, int32_t K)
{ {
// assuming L is symmetric // assuming L is symmetric
TransformHelper<FPP, Cpu>* basis = nullptr;
MatSparse<FPP,Cpu> *T1, *T2; MatSparse<FPP,Cpu> *T1, *T2;
auto d = L->getNbRow(); auto d = L->getNbRow();
MatSparse<FPP,Cpu> Id, twoL, minus_Id, rR, R, zero; MatSparse<FPP,Cpu> Id, twoL, minus_Id, rR, R, zero;
...@@ -73,6 +196,7 @@ namespace Faust ...@@ -73,6 +196,7 @@ namespace Faust
T2->vstack(Id, rR); T2->vstack(Id, rR);
facts[K-2] = T2; facts[K-2] = T2;
// T3 to TK // T3 to TK
#pragma omp parallel for private(Id, zero, R)
for(int i=3;i<K+1;i++) for(int i=3;i<K+1;i++)
{ {
Id.resize(i*d, i*d, i*d); Id.resize(i*d, i*d, i*d);
...@@ -92,4 +216,20 @@ namespace Faust ...@@ -92,4 +216,20 @@ namespace Faust
basisP->twoL *= 2; basisP->twoL *= 2;
return basisP; return basisP;
} }
template<typename FPP>
void poly(int d, int K, int n, const FPP* basisX, const FPP* coeffs, FPP* out)
{
auto K_plus_1 = K+1;
auto d_K_plus_1 = d*K_plus_1;
#pragma omp parallel for
for(int i=0;i<n;i++)
{
Vect<FPP,Cpu> _coeffs(K_plus_1, coeffs);
MatDense<FPP,Cpu> basisXi(d, K_plus_1);
memcpy(basisXi.getData(), basisX+d_K_plus_1*i, sizeof(FPP)*d_K_plus_1);
_coeffs.multiplyLeft(basisXi);
memcpy(out+i*d, _coeffs.getData(), sizeof(FPP)*d);
}
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment