Mentions légales du service

Skip to content
Snippets Groups Projects
Commit bf7cf6c1 authored by hhakim's avatar hhakim
Browse files

Implement the GPU version of the TransformHelperPoly Faust-matrix polynomial...

Implement the GPU version of the TransformHelperPoly Faust-matrix polynomial specialized multiplication.
parent 324fa67f
Branches
Tags
No related merge requests found
......@@ -115,6 +115,8 @@ namespace Faust
void multiply_gpu(const FPP* x, FPP* y, const bool transpose=false, const bool conjugate=false);
MatDense<FPP, Cpu> multiply(const MatDense<FPP,Cpu> &X, const bool transpose=false, const bool conjugate=false);
void multiply(const FPP* X, int n, FPP* out, const bool transpose=false, const bool conjugate=false);
void multiply_cpu(const FPP* X, int n, FPP* out, const bool transpose=false, const bool conjugate=false);
void multiply_gpu(const FPP* X, int n, FPP* out, const bool transpose=false, const bool conjugate=false);
TransformHelper<FPP, Cpu>* next(uint K);
TransformHelper<FPP, Cpu>* next();
Vect<FPP, Cpu> poly(MatDense<FPP,Cpu> & basisX, Vect<FPP, Cpu> coeffs);
......
......@@ -173,7 +173,6 @@ namespace Faust
memcpy(y, x, sizeof(FPP)*d);
if(K == 0)
return;
Eigen::Map<Eigen::Matrix<FPP, Eigen::Dynamic, 1>> v2(const_cast<FPP*>(y+d), d);
// gpu_v2 == x
gpu_v2.multiplyLeft(gpu_L);
gpu_v2.tocpu(y+d); // v2 = L->mat*x_vec;
......@@ -303,6 +302,19 @@ namespace Faust
template<typename FPP>
void TransformHelperPoly<FPP>::multiply(const FPP* X, int n, FPP* Y, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{
if(this->mul_and_combi_lin_on_gpu)
{
multiply_gpu(X, n, Y, transpose, conjugate);
}
else
{
multiply_cpu(X, n, Y, transpose, conjugate);
}
}
template<typename FPP>
void TransformHelperPoly<FPP>::multiply_cpu(const FPP* X, int n, FPP* Y, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{
int d = L->getNbRow();
uint K = this->size()-1;
......@@ -319,6 +331,53 @@ namespace Faust
}
}
template<typename FPP>
void TransformHelperPoly<FPP>::multiply_gpu(const FPP* X, int n, FPP* Y, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{
#ifdef USE_GPU_MOD
int d = L->getNbRow();
uint K = this->size()-1;
MatDense<FPP, GPU2> gpu_V1(d, n, X);
MatDense<FPP, GPU2> gpu_V2(gpu_V1);
MatDense<FPP, GPU2> gpu_new_V2(d, n);
MatDense<FPP, Cpu> tmp_cpu_V2(d, n);
const MatSparse<FPP, GPU2> gpu_L(*this->L);
MatSparse<FPP, GPU2> gpu_twoL(gpu_L);
gpu_twoL *= 2;
auto block_to_cpu = [&Y, &d, &n, &K, &tmp_cpu_V2](int i, const FPP* X_)
{
#pragma omp parallel for
for(int j=0;j<n;j++)
{
memcpy(Y+(K+1)*d*j+d*i, X_+j*d, sizeof(FPP)*d);
}
};
block_to_cpu(0, X);
if(K == 0)
return;
// gpu_V2 == X
gpu_V2.multiplyLeft(gpu_L);
gpu_V2.Display();
gpu_V2.tocpu(tmp_cpu_V2); // v2 = L->mat*x_vec;
block_to_cpu(1, tmp_cpu_V2.getData());
if(K == 1) // not necessary but clearer
return;
for(int i=3;i<=K+1;i++)
{
gpu_new_V2 = gpu_V2;
gpu_new_V2.multiplyLeft(const_cast<const MatSparse<FPP,GPU2>&>(gpu_twoL));
gpu_new_V2 -= gpu_V1; // new_v2_ = L->mat*v2_*2-v1_;
// prepare next it
gpu_V1 = gpu_V2;
gpu_V2 = gpu_new_V2;
gpu_new_V2.tocpu(tmp_cpu_V2);
block_to_cpu(i-1, tmp_cpu_V2.getData());
}
#else
throw std::runtime_error("USE_GPU_MOD option must be enabled at compiling time to use this function (TransformHelperPoly<FPP>::multiply_gpu).");
#endif
}
template<typename FPP>
MatDense<FPP, Cpu> TransformHelperPoly<FPP>::multiply(const MatSparse<FPP,Cpu> &A, const bool transpose/*=false*/, const bool conjugate/*=false*/)
{
......
......@@ -27,7 +27,6 @@ def Chebyshev(L, K, dev='cpu', T0=None, impl='native'):
L can aslo be a Faust if impl is "py".
K: the degree of the last polynomial, i.e. the K+1 first polynomials are built.
dev (optional): the device to instantiate the returned Faust ('cpu' or 'gpu').
'gpu' is not available yet for impl='native'.
T0 (optional): to define the 0-degree polynomial as something else than the identity.
impl (optional): 'native' (by default) for the C++ impl., "py" for the Python impl.
......@@ -79,7 +78,6 @@ def basis(L, K, basis_name, dev='cpu', T0=None, impl='native'):
K: the degree of the last polynomial, i.e. the K+1 first polynomials are built.
basis_name: 'chebyshev', and others yet to come.
dev (optional): the device to instantiate the returned Faust ('cpu' or 'gpu').
'gpu' is not available yet for impl='native'.
T0 (optional): a sparse matrix to replace the identity as a 0-degree polynomial of the basis.
impl (optional): 'native' (by default) for the C++ impl., "py" for the Python impl.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment