Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b10ca730 authored by hhakim's avatar hhakim
Browse files

Rewrite Transform<FPP, GPU2>::multiply(MatDense<FPP, GPU2>, ...) without...

Rewrite Transform<FPP, GPU2>::multiply(MatDense<FPP, GPU2>, ...) without relying on gpu_mod + add its unit test.
parent de45fab9
No related branches found
No related tags found
No related merge requests found
......@@ -49,6 +49,14 @@ void free_gpu_factors(vector<MatGeneric<FPP,GPU2>*> &gpu_factors)
}
bool verifyMatEq(MatDense<FPP, Cpu> refm, MatDense<FPP, GPU2> testm_gpu, double tol=1e-6)
{
auto testm = testm_gpu.tocpu();
auto err = refm;
err -= testm;
return err.norm() <= tol;
}
void assert_gpu_cpu_Transform_almost_eq(const Faust::Transform<FPP, GPU2> & t_gpu, const Faust::Transform<FPP, Cpu>& t_cpu)
{
auto cpu_p = t_cpu.get_product();
......@@ -268,6 +276,26 @@ void test_Transform_multiply_Transform()
cout << "OK" << endl;
}
void test_Transform_multiply_MatDense()
{
cout << "void test_Transform_multiply_MatDense()" << endl;
vector<MatGeneric<FPP,GPU2>*> gpu_factors;
vector<MatGeneric<FPP,Cpu>*> cpu_factors;
generate_cpu_gpu_factors(gpu_factors, cpu_factors);
Faust::Transform<FPP, GPU2> t_gpu(gpu_factors);
Faust::Transform<FPP, Cpu> t_cpu(cpu_factors);
t_gpu.Display();
t_cpu.Display();
auto cpu_mat = MatDense<FPP, Cpu>::randMat(t_cpu.getNbCol(), 32);
MatDense<FPP, GPU2> gpu_mat(*cpu_mat);
t_cpu.multiply(*cpu_mat, 'N');
t_gpu.multiply(gpu_mat, 'N');
assert(verifyMatEq(*cpu_mat, gpu_mat));
free_gpu_factors(gpu_factors);
delete cpu_mat;
cout << "OK" << endl;
}
void test_Transform_multiplyLeft_Transform()
{
cout << "void test_Transform_multiplyLeft_Transform()" << endl;
......@@ -401,5 +429,6 @@ int main()
test_Transform_spectralNorm();
test_Transform_get_facts();
test_Transform_tocpu();
test_Transform_multiply_MatDense();
return EXIT_SUCCESS;
}
......@@ -562,33 +562,6 @@ void Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_fact(const faust_unsigned
}
template<>
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> &A, const char opThis)
{
auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
gm_Op op;
if(A.gpu_mat == nullptr)
throw std::runtime_error("MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> argument is not initialized.");
if(gpu_mat_arr == nullptr)
throw std::runtime_error("No factors in Transform.");
int32_t out_nrows = this->getNbCol(), out_ncols = A.getNbCol(); // transpose/adjoint case
if(opThis == 'N')
{
op = OP_NOTRANSP;
out_nrows = getNbRow();
out_ncols = getNbCol();
}
else if(opThis == 'T')
op = OP_TRANSP;
else if(opThis == 'H')
op = OP_CONJTRANSP;
else
throw std::runtime_error("Invalid opThis");
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
out.gpu_mat = marr_funcs->chain_matmul_by_dsm_one(this->gpu_mat_arr, op, A.gpu_mat);
return out;
}
template<>
Vect<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const Vect<@FAUST_SCALAR_FOR_GM@,GPU2>& x, const char opThis/*='N'*/)
{
......
......@@ -111,5 +111,6 @@ namespace Faust
};
}
#include "faust_Transform_gpu.hpp"
#endif
#endif
namespace Faust
{
//TODO: refactor to generic CPU/GPU code (using if needed a non-member function on Transform<FPP, DEV>)
template<typename FPP>
MatDense<FPP,GPU2> Transform<FPP,GPU2>::multiply(const MatDense<FPP,GPU2> &A, const char opThis) /*const*/ //TODO: should be const
{
if (size() == 0)
handleWarning("Transform<FPP,GPU2> : multiply : empty Transform<FPP,GPU2>");
MatDense<FPP,GPU2> mat(A);
if (opThis == 'N')
for (int i=this->size()-1; i >= 0; i--)
data[i]->multiply(mat, opThis);
else
for (int i=0; i < this->size(); i++)
data[i]->multiply(mat, opThis);
return mat;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment