Add MatBSR<FPP, GPU2>::multiply(MatDense<FPP, GPU2>) and its unit test.

Update to gpu_mod@4645c7f2.

Add MatBSR<FPP, GPU2>::multiply(MatDense<FPP, GPU2>) and its unit test.
5ccef30f · hhakim · e9dbefaa · 4645c7f2 · e37ee8a8 · 4645c7f2
Commit 5ccef30f authored 3 years ago by hhakim
--- a/gpu_mod @ 4645c7f2
+++ b/gpu_mod @ 4645c7f2
-Subproject commit e37ee8a882a27ee94a75c2b11aac32babefe5e04
+Subproject commit 4645c7f2182c9378cc685eaa82a321af91d26109
--- a/misc/test/src/C++/test_matbsr_gpu_mod.cpp.in
+++ b/misc/test/src/C++/test_matbsr_gpu_mod.cpp.in
@@ -12,6 +12,7 @@ typedef @TEST_FPP@ FPP;

 void test_gpu_ctor_and_tocpu()
 {
+	std::cout << "test_gpu_ctor_and_tocpu" << std::endl;
 	auto nrows = 10;
 	auto ncols = 15;
 	auto bnrows = 5;
@@ -30,17 +31,42 @@ void test_gpu_ctor_and_tocpu()
 			cpu_bsr_mat->get_bcolinds());
 	gpu_bsr_mat.tocpu(cpu_bsr_mat2);
 	assert(cpu_bsr_mat->norm() == cpu_bsr_mat2.norm());
-	MatDense<double, Cpu> diff_mat = cpu_bsr_mat->to_dense();
+	MatDense<FPP, Cpu> diff_mat = cpu_bsr_mat->to_dense();
 	diff_mat -= cpu_bsr_mat2.to_dense();
 	assert(diff_mat.norm() < 1e-3);
 	cout << "OK" << endl;
 	delete cpu_bsr_mat;
 }

+void test_gpu_mul_dense()
+{
+	std::cout << "test_gpu_mul_dense" << std::endl;
+	// gen a cpu random dense matrix
+	MatDense<FPP, Cpu> *cpu_dmat = MatDense<FPP, Cpu>::randMat(15, 10);
+	// convert it to gpu
+	MatDense<FPP, GPU2> gpu_dmat(*cpu_dmat);
+	// idem for bsr mats
+	auto cpu_bmat = MatBSR<double, Cpu>::randMat(10, 15, 5, 5, 2);
+	MatBSR<FPP, GPU2> gpu_bmat(*cpu_bmat);
+
+	// multiply on cpu
+	cpu_bmat->multiply(*cpu_dmat, 'N');
+	gpu_bmat.multiply(gpu_dmat, 'N');
+
+	MatDense<FPP, Cpu> diff_mat;
+	gpu_dmat.tocpu(diff_mat);
+	diff_mat -= *cpu_dmat;
+	assert(diff_mat.norm() < 1e-3);
+	delete cpu_bmat;
+	delete cpu_dmat;
+	std::cout << "OK" << std::endl;
+}
+
 int main()
 {
 	Faust::enable_gpu_mod();
 	test_gpu_ctor_and_tocpu();
+	test_gpu_mul_dense();
 }


--- a/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.cpp.in
@@ -24,6 +24,18 @@ namespace Faust
 			gp_funcs->set_dev(cur_dev_id);
 		}

+	template<>
+		void MatBSR<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>& M, char opThis/*='N'*/) const
+		{
+			gm_Op gop_this;
+			char2gm_Op(opThis, gop_this);
+			auto bsr_funcs = GPUModHandler::get_singleton()->bsr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			auto gpu_dmat = bsr_funcs->mul_gpu_dsm_ext(gpu_mat, M.gpu_mat, nullptr, gop_this, OP_NOTRANSP);
+			dsm_funcs->free(M.get_gpu_mat_ptr());
+			M.gpu_mat = gpu_dmat;
+		}
+
 	template<>
 		void MatBSR<@FAUST_SCALAR_FOR_GM@,GPU2>::tocpu(int32_t* browptr, int32_t* bcolinds, @FAUST_SCALAR_FOR_GM@* bdata, int32_t* nrows/*=nullptr*/, int32_t* ncols/*=nullptr*/, int32_t *bnrows/*=nullptr*/, int32_t *bncols/*=nullptr*/, int32_t* bnnz/*=nullptr*/) const
 		{

--- a/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.h
@@ -65,6 +65,7 @@ namespace Faust

 			virtual void set_gpu_mat_ptr(void*);
 			/*********** own member functions **************/
+			void multiply(MatDense<FPP,GPU2>& M, char opThis='N') const;

 			void tocpu(int32_t* browptr, int32_t* bcolinds, FPP* bdata, int32_t* nrows=nullptr, int32_t* ncols=nullptr, int32_t *bnrows=nullptr, int32_t *bncol=nullptr, int32_t* bnnz=nullptr) const;
 			void tocpu(MatBSR<FPP, Cpu> &cpu_mat) const;

--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
@@ -7,6 +7,7 @@
 #include "faust_MatGeneric_gpu.h"
 #include "faust_Vect_gpu.h"
 #include "faust_MatSparse_gpu.h"
+#include "faust_MatBSR_gpu.h"
 #include "faust_gpu_mod_utils.h"
 namespace Faust
 {
@@ -19,6 +20,7 @@ namespace Faust
 		{
 			friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
 			friend MatSparse<FPP,GPU2>;
+			friend MatBSR<FPP,GPU2>;
 			friend MatDense<std::complex<double>,GPU2>; // TODO limit to real function
 //			friend void gemm<>(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
 //