Add new signature for MatDense<FPP,GPU2>::multiply and update tests.

a42f4b65 · hhakim · 500667b8 · a42f4b65 · a42f4b65 · a42f4b65
Commit a42f4b65 authored 4 years ago by hhakim
--- a/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
+++ b/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
 #include "faust_constant.h"
 #include "faust_gpu_mod.h"
 #include "faust_MatDense_gpu.h"
+#include "faust_MatSparse.h"
 #include <cstdlib>
+#include <cmath>
+using namespace std;
 using namespace Faust;

-int main(int argc, char** argv)
+
+void test_mul_gpu_dense()
 {
 	faust_unsigned_int nrows = 10, ncols = 10;
 	faust_unsigned_int nrows2 = 10, ncols2 = 15;
 	double data[100];
-	Faust::enable_gpu_mod();
 	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(cpu_mat1->getData(), nrows, ncols);
+	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	auto cpu_mat2 = Faust::MatDense<double,Cpu>::randMat(nrows2,ncols2);
-	MatDense<double,GPU2> gpu_mat2(cpu_mat2->getData(), nrows2, ncols2);
+	MatDense<double,GPU2> gpu_mat2(nrows2, ncols2, cpu_mat2->getData());
+	// test MatDense<FPP,GPU2> * MatDense<FPP,GPU2>
 	gpu_mat1.multiply(gpu_mat2);
 	auto cpu_mat1_mat2_test = gpu_mat2.tocpu();
 	auto cpu_mat1_mat2_ref = *cpu_mat2;
 	cpu_mat1->multiply(cpu_mat1_mat2_ref, 'N');
 	auto err_diff = cpu_mat1_mat2_ref;
 	err_diff -= cpu_mat1_mat2_test;
-	cout << "err mul." << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
+	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
+	// test MatDense<FPP,GPU2> * MatDense<FPP,Cpu>
+	cpu_mat1_mat2_test = *cpu_mat2;
+	gpu_mat1.multiply(cpu_mat1_mat2_test);
+	err_diff = cpu_mat1_mat2_ref;
+	err_diff -= cpu_mat1_mat2_test;
+	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
+	// test MatDense<FPP,GPU2> * MatSparse<FPP,Cpu>
+	Faust::MatSparse<double,Cpu> cpu_mat2_sparse(*cpu_mat2);
+	gpu_mat1.multiply(cpu_mat2_sparse, cpu_mat1_mat2_test);
+	err_diff = cpu_mat1_mat2_ref;
+	err_diff -= cpu_mat1_mat2_test;
+	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
+}
+
+int main(int argc, char** argv)
+{
+	Faust::enable_gpu_mod();
+	test_mul_gpu_dense();
 	return EXIT_SUCCESS;
 }
--- a/src/faust_linear_operator/CPU/faust_gpu_mod.h
+++ b/src/faust_linear_operator/CPU/faust_gpu_mod.h
@@ -23,6 +23,7 @@ namespace Faust
 		public:
 		static void* marr_funcs; //void because we don't know FPP yet and templates aren't available through shared lib interface (extern C, no name mangling)
 		static void* dsm_funcs;
+		static void* spm_funcs;
 #ifdef _MSC_VER
 		public: // should not be public but Visual Studio 14 (and only it) can't access private members from lambda exp (error C2248) // cf. ref_man ini.
 				//TODO: set back to private later (mayber with a more recent version)

--- a/src/faust_linear_operator/CPU/faust_gpu_mod.hpp
+++ b/src/faust_linear_operator/CPU/faust_gpu_mod.hpp
@@ -10,6 +10,9 @@ void* Faust::FaustGPU<FPP>::marr_funcs = nullptr;
 template <typename FPP>
 void* Faust::FaustGPU<FPP>::dsm_funcs = nullptr;

+template <typename FPP>
+void* Faust::FaustGPU<FPP>::spm_funcs = nullptr;
+
 template <typename FPP>
 void* Faust::FaustGPU<FPP>::gp_funcs = nullptr;


--- a/src/faust_linear_operator/CPU/faust_gpu_mod_complexdouble.hpp
+++ b/src/faust_linear_operator/CPU/faust_gpu_mod_complexdouble.hpp
@@ -13,12 +13,15 @@ namespace Faust
 			marr_funcs = new gm_MatArrayFunc_cuDoubleComplex(); // on the heap because because it cannot be shared among FaustGPU instances if on the stack
 			dsm_funcs = new gm_DenseMatFunc_cuDoubleComplex();
 			gp_funcs = new gm_GenPurposeFunc_cuDoubleComplex();
+			spm_funcs = new gm_SparseMatFunc_cuDoubleComplex();
 			load_marr_funcs_cuDoubleComplex(gm_handle, marr_funcs);
 			load_dsm_funcs_cuDoubleComplex(gm_handle, dsm_funcs);
+			load_spm_funcs_cuDoubleComplex(gm_handle, static_cast<gm_SparseMatFunc_cuDoubleComplex*>(spm_funcs));
 			load_gp_funcs_cuDoubleComplex(gm_handle, gp_funcs);
 			FaustGPU<complex<double>>::marr_funcs = marr_funcs;
 			FaustGPU<complex<double>>::dsm_funcs = dsm_funcs;
 			FaustGPU<complex<double>>::gp_funcs = gp_funcs;
+			FaustGPU<complex<double>>::spm_funcs = spm_funcs;
 		}

 	}

--- a/src/faust_linear_operator/CPU/faust_gpu_mod_double.hpp
+++ b/src/faust_linear_operator/CPU/faust_gpu_mod_double.hpp
@@ -13,12 +13,15 @@ namespace Faust
 			marr_funcs = new gm_MatArrayFunc_double(); // on the heap because because it cannot be shared among FaustGPU instances if on the stack
 			dsm_funcs = new gm_DenseMatFunc_double();
 			gp_funcs = new gm_GenPurposeFunc_double();
+			spm_funcs = new gm_SparseMatFunc_double();
 			load_marr_funcs_double(gm_handle, marr_funcs);
 			load_dsm_funcs_double(gm_handle, dsm_funcs);
+			load_spm_funcs_double(gm_handle, static_cast<gm_SparseMatFunc_double*>(spm_funcs));
 			load_gp_funcs_double(gm_handle, gp_funcs);
 			FaustGPU<double>::marr_funcs = marr_funcs;
 			FaustGPU<double>::dsm_funcs = dsm_funcs;
 			FaustGPU<double>::gp_funcs = gp_funcs;
+			FaustGPU<double>::spm_funcs = spm_funcs;
 		}

 	}

--- a/src/faust_linear_operator/CPU/faust_gpu_mod_gen.hpp.in
+++ b/src/faust_linear_operator/CPU/faust_gpu_mod_gen.hpp.in
@@ -13,12 +13,15 @@ namespace Faust
 			marr_funcs = new gm_MatArrayFunc_@GM_SCALAR@(); // on the heap because because it cannot be shared among FaustGPU instances if on the stack
 			dsm_funcs = new gm_DenseMatFunc_@GM_SCALAR@();
 			gp_funcs = new gm_GenPurposeFunc_@GM_SCALAR@();
+			spm_funcs = new gm_SparseMatFunc_@GM_SCALAR@();
 			load_marr_funcs_@GM_SCALAR@(gm_handle, marr_funcs);
 			load_dsm_funcs_@GM_SCALAR@(gm_handle, dsm_funcs);
+			load_spm_funcs_@GM_SCALAR@(gm_handle, static_cast<gm_SparseMatFunc_@GM_SCALAR@*>(spm_funcs));
 			load_gp_funcs_@GM_SCALAR@(gm_handle, gp_funcs);
 			FaustGPU<@FAUST_SCALAR_FOR_GM@>::marr_funcs = marr_funcs;
 			FaustGPU<@FAUST_SCALAR_FOR_GM@>::dsm_funcs = dsm_funcs;
 			FaustGPU<@FAUST_SCALAR_FOR_GM@>::gp_funcs = gp_funcs;
+			FaustGPU<@FAUST_SCALAR_FOR_GM@>::spm_funcs = spm_funcs;
 		}

 	}

--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
@@ -14,21 +14,26 @@ namespace Faust
 		class MatDense<FPP, GPU2> : MatDense<FPP, Cpu>
 		{
 			public:
-				MatDense(const FPP* data, const faust_unsigned_int nbRow, const faust_unsigned_int nbCol);
+				MatDense(const faust_unsigned_int nbRow, const faust_unsigned_int nbCol, const FPP* data = nullptr);

-//				multiply(const MatDense<Cpu,FPP> &other);
-//				multiply(const MatSparse<Cpu,FPP> &other);
 //				multiply(const Vect<Cpu,FPP> &vec);
 				void multiply(MatDense<FPP, GPU2> &other, const char op_this='N');
+				void multiply(MatDense<FPP,Cpu> &other, const char op_this='N');
+//				void multiply(MatSparse<FPP, Cpu> &other, MatDense<FPP, GPU2>& output, const char op_this='N');
+				void multiply(const MatSparse<FPP, Cpu> &other, MatDense<FPP, Cpu>& output, const char op_this='N');
 				MatDense<FPP, Cpu> tocpu();
+				~MatDense<FPP, GPU2>();
 			private:
 				static void* dsm_funcs;
+				static void* spm_funcs;
 				gm_DenseMat_t gpu_mat;
 		};

 	template <typename FPP>
 		void* Faust::MatDense<FPP,GPU2>::dsm_funcs = nullptr;

+	template <typename FPP>
+		void* Faust::MatDense<FPP,GPU2>::spm_funcs = nullptr;
 };
 #include "faust_MatDense_gpu_double.hpp"
 #endif

--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.hpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.hpp.in
 //TODO: move to CPP
 template<>
-Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::MatDense(const @FAUST_SCALAR_FOR_GM@* data,
+Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::MatDense(
 		const faust_unsigned_int nbRow,
-		const faust_unsigned_int nbCol)
+		const faust_unsigned_int nbCol,
+		const @FAUST_SCALAR_FOR_GM@* data/*=nullptr*/)
 {
 	this->dim1 = nbRow;
 	this->dim2 = nbCol;
 	FaustGPU<@FAUST_SCALAR_FOR_GM@>::load_gm_functions();
 	if(this->dsm_funcs == nullptr)
 		this->dsm_funcs = FaustGPU<@FAUST_SCALAR_FOR_GM@>::dsm_funcs;
-	gpu_mat = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs)->togpu(nbRow, nbCol, const_cast<@FAUST_SCALAR_FOR_GM@*>(data));
+	if(this->spm_funcs == nullptr)
+		this->spm_funcs = FaustGPU<@FAUST_SCALAR_FOR_GM@>::spm_funcs;
+	if(nullptr != data)
+		gpu_mat = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs)->togpu(nbRow, nbCol, const_cast<@FAUST_SCALAR_FOR_GM@*>(data));
+	else
+		gpu_mat = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs)->create(nbRow, nbCol);
+}
+
+template<>
+Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::~MatDense()
+{
+	auto dsm_funcs = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs);
+	dsm_funcs->free(gpu_mat);
 }

 template<>
@@ -22,6 +35,31 @@ void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(MatDense<@FAUST_SCALA
 	other.dim1 = dim1;
 }

+template<>
+void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(MatDense<@FAUST_SCALAR_FOR_GM@, Cpu> &other, const char op_this)
+{
+	// other = this * other
+	auto dsm_funcs = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs);
+	MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> other_gpu_mat(other.dim1, other.dim2,other.getData());
+	dsm_funcs->mul_gpu_dsm_tocpu_ext(this->gpu_mat, other_gpu_mat.gpu_mat, other.getData(), OP_NOTRANSP, OP_NOTRANSP);
+	//TODO: update dims (in transpose/adjoint case)
+	other.dim1 = dim1;
+}
+
+template<>
+void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const MatSparse<@FAUST_SCALAR_FOR_GM@, Cpu> &other, MatDense<@FAUST_SCALAR_FOR_GM@,Cpu>& output, const char op_this)
+{
+	auto spm_funcs = ((gm_SparseMatFunc_@GM_SCALAR@*) this->spm_funcs);
+	auto dsm_funcs = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs);
+	//togpu(int32_t nrows, int32_t ncols, int32_t nnz, int32_t* row_ptr, int32_t* col_inds, @GM_SCALAR@* values)
+	auto other_gpu = spm_funcs->togpu(other.getNbRow(), other.getNbCol(), other.getNonZeros(), (int32_t*) other.getRowPtr(), (int32_t*) other.getColInd(), (double*) other.getValuePtr());
+	MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> output_gpu_mat(getNbRow(), other.getNbCol());
+	// mul_gpu_spm_ext(gm_DenseMat_t, gm_SparseMat_t, gm_DenseMat_t output, gm_Op, gm_Op);
+	dsm_funcs->mul_gpu_spm_ext(gpu_mat, other_gpu, output_gpu_mat.gpu_mat, OP_NOTRANSP, OP_NOTRANSP);
+	output = MatDense<@FAUST_SCALAR_FOR_GM@, Cpu>(getNbRow(), other.getNbCol()); //TODO: manage transpose case
+	dsm_funcs->tocpu(output_gpu_mat.gpu_mat, output.getData());
+}
+
 template<>
 Faust::MatDense<@FAUST_SCALAR_FOR_GM@, Cpu> Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::tocpu()
 {
@@ -30,3 +68,5 @@ Faust::MatDense<@FAUST_SCALAR_FOR_GM@, Cpu> Faust::MatDense<@FAUST_SCALAR_FOR_GM
 	dsm_funcs->tocpu(gpu_mat, cpu_mat.getData());
 	return cpu_mat; //TODO: move constructor for MatDense<FPP, Cpu>
 }
+
+