Rewrite Transform<FPP, GPU2>::multiply(MatDense<FPP, GPU2>, ...) without...

Rewrite Transform<FPP, GPU2>::multiply(MatDense<FPP, GPU2>, ...) without relying on gpu_mod + add its unit test.

Rewrite Transform<FPP, GPU2>::multiply(MatDense<FPP, GPU2>, ...) without...
b10ca730 · hhakim · de45fab9 · b10ca730 · b10ca730 · b10ca730
Commit b10ca730 authored 2 years ago by hhakim
--- a/misc/test/src/C++/test_transform_gpu_mod.cpp.in
+++ b/misc/test/src/C++/test_transform_gpu_mod.cpp.in
@@ -49,6 +49,14 @@ void free_gpu_factors(vector<MatGeneric<FPP,GPU2>*> &gpu_factors)
 }


+bool verifyMatEq(MatDense<FPP, Cpu> refm, MatDense<FPP, GPU2> testm_gpu, double tol=1e-6)
+{
+	auto testm = testm_gpu.tocpu();
+	auto err = refm;
+	err -= testm;
+	return err.norm() <= tol;
+}
+
 void assert_gpu_cpu_Transform_almost_eq(const Faust::Transform<FPP, GPU2> & t_gpu, const Faust::Transform<FPP, Cpu>& t_cpu)
 {
 	auto cpu_p = t_cpu.get_product();
@@ -268,6 +276,26 @@ void test_Transform_multiply_Transform()
 	cout << "OK" << endl;
 }

+void test_Transform_multiply_MatDense()
+{
+	cout << "void test_Transform_multiply_MatDense()" << endl;
+	vector<MatGeneric<FPP,GPU2>*> gpu_factors;
+	vector<MatGeneric<FPP,Cpu>*> cpu_factors;
+	generate_cpu_gpu_factors(gpu_factors, cpu_factors);
+	Faust::Transform<FPP, GPU2> t_gpu(gpu_factors);
+	Faust::Transform<FPP, Cpu> t_cpu(cpu_factors);
+	t_gpu.Display();
+	t_cpu.Display();
+	auto cpu_mat = MatDense<FPP, Cpu>::randMat(t_cpu.getNbCol(), 32);
+	MatDense<FPP, GPU2> gpu_mat(*cpu_mat);
+	t_cpu.multiply(*cpu_mat, 'N');
+	t_gpu.multiply(gpu_mat, 'N');
+	assert(verifyMatEq(*cpu_mat, gpu_mat));
+	free_gpu_factors(gpu_factors);
+	delete cpu_mat;
+	cout << "OK" << endl;
+}
+
 void test_Transform_multiplyLeft_Transform()
 {
 	cout << "void test_Transform_multiplyLeft_Transform()" << endl;
@@ -401,5 +429,6 @@ int main()
 	test_Transform_spectralNorm();
 	test_Transform_get_facts();
 	test_Transform_tocpu();
+	test_Transform_multiply_MatDense();
 	return EXIT_SUCCESS;
 }
--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.cpp.in
@@ -562,33 +562,6 @@ void Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_fact(const faust_unsigned
 }


-	template<>
-		MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> &A, const char opThis)
-		{
-			auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
-			gm_Op op;
-			if(A.gpu_mat == nullptr)
-				throw std::runtime_error("MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> argument is not initialized.");
-			if(gpu_mat_arr == nullptr)
-				throw std::runtime_error("No factors in Transform.");
-			int32_t out_nrows = this->getNbCol(), out_ncols = A.getNbCol(); // transpose/adjoint case
-			if(opThis == 'N')
-			{
-				op = OP_NOTRANSP;
-				out_nrows = getNbRow();
-				out_ncols = getNbCol();
-			}
-			else if(opThis == 'T')
-				op = OP_TRANSP;
-			else if(opThis == 'H')
-				op = OP_CONJTRANSP;
-			else
-				throw std::runtime_error("Invalid opThis");
-			MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
-			out.gpu_mat = marr_funcs->chain_matmul_by_dsm_one(this->gpu_mat_arr, op, A.gpu_mat);
-			return out;
-		}
-
 	template<>
 	Vect<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const Vect<@FAUST_SCALAR_FOR_GM@,GPU2>& x, const char opThis/*='N'*/)
 	{

--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.h
@@ -111,5 +111,6 @@ namespace Faust
 		};
 }

+#include "faust_Transform_gpu.hpp"
 #endif
 #endif
--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.hpp
+namespace Faust
+{
+	//TODO: refactor to generic CPU/GPU code (using if needed a non-member function on Transform<FPP, DEV>)
+	template<typename FPP>
+		MatDense<FPP,GPU2> Transform<FPP,GPU2>::multiply(const MatDense<FPP,GPU2> &A, const char opThis) /*const*/ //TODO: should be const
+		{
+			if (size() == 0)
+				handleWarning("Transform<FPP,GPU2> : multiply : empty Transform<FPP,GPU2>");
+
+			MatDense<FPP,GPU2> mat(A);
+
+
+			if (opThis == 'N')
+				for (int i=this->size()-1; i >= 0; i--)
+					data[i]->multiply(mat, opThis);
+			else
+				for (int i=0; i < this->size(); i++)
+					data[i]->multiply(mat, opThis);
+
+			return mat;
+		}
+}