Mentions légales du service

Skip to content
Snippets Groups Projects
Commit db72ef86 authored by hhakim's avatar hhakim
Browse files

Optimize ButteflyMat<FPP, GPU2>::multiply by using a dedicated kernel (gpu_mod@837684f4).

 #275
parent 8970c50d
Branches
Tags
No related merge requests found
Subproject commit 5e2a81bf1ba342d36b1167492cb35d48acbe34be Subproject commit 837684f462c31c451fd246538ec2e3e18c6a5100
...@@ -767,5 +767,12 @@ namespace Faust ...@@ -767,5 +767,12 @@ namespace Faust
dsm_funcs->free(real_mat.gpu_mat); dsm_funcs->free(real_mat.gpu_mat);
real_mat.gpu_mat = real_gpu_mat; real_mat.gpu_mat = real_gpu_mat;
} }
}; template<>
void butterfly_diag_prod(MatDense<@FAUST_SCALAR_FOR_GM@, GPU2>& X, const Vect<@FAUST_SCALAR_FOR_GM@, GPU2>& d1, const Vect<@FAUST_SCALAR_FOR_GM@, GPU2>& d2, const int* ids)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->butterfly_diag_prod(X.gpu_mat, d1.gpu_mat, d2.gpu_mat, ids);
}
}
...@@ -15,6 +15,10 @@ namespace Faust ...@@ -15,6 +15,10 @@ namespace Faust
template<typename FPP, FDevice DEVICE> template<typename FPP, FDevice DEVICE>
class MatDense; class MatDense;
//TODO: move in a specific module
template<typename FPP>
void butterfly_diag_prod(MatDense<FPP, GPU2>& X, const Vect<FPP, GPU2>& d1, const Vect<FPP, GPU2>& d2, const int* ids);
template<typename FPP> template<typename FPP>
class MatDense<FPP, GPU2> : public MatGeneric<FPP,GPU2> class MatDense<FPP, GPU2> : public MatGeneric<FPP,GPU2>
{ {
...@@ -22,6 +26,7 @@ namespace Faust ...@@ -22,6 +26,7 @@ namespace Faust
friend MatSparse<FPP,GPU2>; friend MatSparse<FPP,GPU2>;
friend MatBSR<FPP,GPU2>; friend MatBSR<FPP,GPU2>;
friend MatDense<std::complex<double>,GPU2>; // TODO limit to real function friend MatDense<std::complex<double>,GPU2>; // TODO limit to real function
friend void butterfly_diag_prod<>(MatDense<FPP, GPU2>& X, const Vect<FPP, GPU2>& d1, const Vect<FPP, GPU2>& d2, const int* ids);
// friend void gemm<>(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB); // friend void gemm<>(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
// //
// friend void gemv<>(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &B, Vect<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB); // friend void gemv<>(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &B, Vect<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
......
...@@ -172,10 +172,11 @@ namespace Faust ...@@ -172,10 +172,11 @@ namespace Faust
template<typename FPP> template<typename FPP>
MatDense<FPP, GPU2> ButterflyMat<FPP, GPU2>::multiply(MatDense<FPP, GPU2> &gpu_X) MatDense<FPP, GPU2> ButterflyMat<FPP, GPU2>::multiply(MatDense<FPP, GPU2> &gpu_X)
{ {
MatDense<FPP, GPU2> gpu_X2(gpu_X); /*MatDense<FPP, GPU2> gpu_X2(gpu_X);
gpu_X.eltwise_mul(d2, subdiag_ids); gpu_X.eltwise_mul(d2, subdiag_ids);
gpu_X2.eltwise_mul(d1); gpu_X2.eltwise_mul(d1);
gpu_X += gpu_X2; gpu_X += gpu_X2;*/
butterfly_diag_prod(gpu_X, d1, d2, subdiag_ids);
return gpu_X; return gpu_X;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment