Update MatDense<FPP,GPU2>::multiply(MatSparse<FPP,Cpu> freeing gpug memory and...

Update MatDense<FPP,GPU2>::multiply(MatSparse<FPP,Cpu> freeing gpug memory and add a new signature to output in MatDense<FPP,GPU2>.

Update MatDense<FPP,GPU2>::multiply(MatSparse<FPP,Cpu> freeing gpug memory and...
3376b752 · hhakim · 2ac9f7ba · 3376b752 · 3376b752 · 3376b752
Commit 3376b752 authored 4 years ago by hhakim
--- a/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
+++ b/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
@@ -10,32 +10,48 @@ using namespace Faust;

 void test_mul_gpu_dense()
 {
-	faust_unsigned_int nrows = 10, ncols = 10;
-	faust_unsigned_int nrows2 = 10, ncols2 = 15;
+	faust_unsigned_int nrows = 90, ncols = 90;
+	faust_unsigned_int nrows2 = 90, ncols2 = 150;
 	double data[100];
 	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
 	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	auto cpu_mat2 = Faust::MatDense<double,Cpu>::randMat(nrows2,ncols2);
 	MatDense<double,GPU2> gpu_mat2(nrows2, ncols2, cpu_mat2->getData());
 	// test MatDense<FPP,GPU2> * MatDense<FPP,GPU2>
+	cout << "Mul. GPUDense*GPUDense in GPUDense" << endl;
 	gpu_mat1.multiply(gpu_mat2);
 	auto cpu_mat1_mat2_test = gpu_mat2.tocpu();
 	auto cpu_mat1_mat2_ref = *cpu_mat2;
 	cpu_mat1->multiply(cpu_mat1_mat2_ref, 'N');
+	cout << "ref norm: " << cpu_mat1_mat2_ref.norm() << endl;
+	cout << "test norm: " << cpu_mat1_mat2_test.norm() << endl;
 	auto err_diff = cpu_mat1_mat2_ref;
 	err_diff -= cpu_mat1_mat2_test;
 	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
-	// test MatDense<FPP,GPU2> * MatDense<FPP,Cpu>
+	cout << "Mul. GPUDense*CPUDense in CPUDense" << endl;
 	cpu_mat1_mat2_test = *cpu_mat2;
 	gpu_mat1.multiply(cpu_mat1_mat2_test);
 	err_diff = cpu_mat1_mat2_ref;
 	err_diff -= cpu_mat1_mat2_test;
 	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
-	// test MatDense<FPP,GPU2> * MatSparse<FPP,Cpu>
+	cout << "Mul. GPUDense*CPUSparse in CPUDense" << endl;
 	Faust::MatSparse<double,Cpu> cpu_mat2_sparse(*cpu_mat2);
+//	cout << cpu_mat2_sparse.to_string(false, true) << endl;
+//	cout << cpu_mat2->to_string(false, true) << endl;
 	gpu_mat1.multiply(cpu_mat2_sparse, cpu_mat1_mat2_test);
 	err_diff = cpu_mat1_mat2_ref;
 	err_diff -= cpu_mat1_mat2_test;
+	cout << "ref norm: " << cpu_mat1_mat2_ref.norm() << endl;
+	cout << "test norm: " << cpu_mat1_mat2_test.norm() << endl;
+//	cout << cpu_mat1_mat2_ref.to_string(false, true) << endl;
+//	cout << cpu_mat1_mat2_test.to_string(false, true) << endl;
+	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
+	cout << "Mul. GPUDense*CPUSparse in GPUDense" << endl;
+	MatDense<double, GPU2> gpu_mat1_mat2_test(nrows, ncols2);
+	gpu_mat1.multiply(cpu_mat2_sparse, gpu_mat1_mat2_test);
+	auto gpu_mat1_mat2_test_to_cpu = gpu_mat1_mat2_test.tocpu();
+	err_diff = gpu_mat1_mat2_test_to_cpu;
+	err_diff -= cpu_mat1_mat2_ref;
 	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
 }


--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
@@ -21,6 +21,7 @@ namespace Faust
 				void multiply(MatDense<FPP,Cpu> &other, const char op_this='N');
 //				void multiply(MatSparse<FPP, Cpu> &other, MatDense<FPP, GPU2>& output, const char op_this='N');
 				void multiply(const MatSparse<FPP, Cpu> &other, MatDense<FPP, Cpu>& output, const char op_this='N');
+				void multiply(const MatSparse<FPP, Cpu> &other, MatDense<FPP, GPU2>& output, const char op_this='N');
 				MatDense<FPP, Cpu> tocpu();
 				~MatDense<FPP, GPU2>();
 			private:

--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.hpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.hpp.in
@@ -53,11 +53,24 @@ void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const MatSparse<@FAUS
 	auto dsm_funcs = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs);
 	//togpu(int32_t nrows, int32_t ncols, int32_t nnz, int32_t* row_ptr, int32_t* col_inds, @GM_SCALAR@* values)
 	auto other_gpu = spm_funcs->togpu(other.getNbRow(), other.getNbCol(), other.getNonZeros(), (int32_t*) other.getRowPtr(), (int32_t*) other.getColInd(), (double*) other.getValuePtr());
-	MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> output_gpu_mat(getNbRow(), other.getNbCol());
 	// mul_gpu_spm_ext(gm_DenseMat_t, gm_SparseMat_t, gm_DenseMat_t output, gm_Op, gm_Op);
-	dsm_funcs->mul_gpu_spm_ext(gpu_mat, other_gpu, output_gpu_mat.gpu_mat, OP_NOTRANSP, OP_NOTRANSP);
+	auto output_gpu_mat = dsm_funcs->mul_gpu_spm_ext(gpu_mat, other_gpu, nullptr, OP_NOTRANSP, OP_NOTRANSP);
 	output = MatDense<@FAUST_SCALAR_FOR_GM@, Cpu>(getNbRow(), other.getNbCol()); //TODO: manage transpose case
-	dsm_funcs->tocpu(output_gpu_mat.gpu_mat, output.getData());
+	dsm_funcs->tocpu(output_gpu_mat, output.getData());
+	dsm_funcs->free(output_gpu_mat);
+	spm_funcs->free(other_gpu);
+}
+
+template<>
+void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const MatSparse<@FAUST_SCALAR_FOR_GM@, Cpu> &other, MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>& output, const char op_this)
+{
+	auto spm_funcs = ((gm_SparseMatFunc_@GM_SCALAR@*) this->spm_funcs);
+	auto dsm_funcs = ((gm_DenseMatFunc_@GM_SCALAR@*) this->dsm_funcs);
+	auto other_gpu = spm_funcs->togpu(other.getNbRow(), other.getNbCol(), other.getNonZeros(), (int32_t*) other.getRowPtr(), (int32_t*) other.getColInd(), (double*) other.getValuePtr());
+	if(output.gpu_mat != nullptr)
+		dsm_funcs->free(output.gpu_mat);
+	output.gpu_mat = dsm_funcs->mul_gpu_spm_ext(gpu_mat, other_gpu, nullptr, OP_NOTRANSP, OP_NOTRANSP);
+	spm_funcs->free(other_gpu);
 }

 template<>