Add prox_spcol and prox_splin pure GPU implementations along with a test/benchmark.

Update to gpu_mod@79e7daee

Add prox_spcol and prox_splin pure GPU implementations along with a test/benchmark.
9d73e9ca · hhakim · b242af8b · b0f0915f · f94b91be · b0f0915f
Commit 9d73e9ca authored 4 years ago by hhakim
--- a/gpu_mod @ b0f0915f
+++ b/gpu_mod @ b0f0915f
-Subproject commit f94b91bec5f095e99199eece785c86d4612ec477
+Subproject commit b0f0915ffad0f13363a865ff3466387d5d405cd6
--- a/misc/test/CMakeLists.txt
+++ b/misc/test/CMakeLists.txt
@@ -206,7 +206,7 @@ if(MATIO_LIB_FILE AND MATIO_INC_DIR AND BUILD_READ_MAT_FILE AND NOT NOCPPTESTS)
 	endif()

 	if(USE_GPU_MOD)
-		list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod hierarchical2020_gpu2 hierarchical2020Hadamard_gpu2 MEG_factorization test_prox_sp_gpu)
+		list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod hierarchical2020_gpu2 hierarchical2020Hadamard_gpu2 MEG_factorization test_prox_sp_gpu test_prox_splin_spcol_gpu)
 	endif()

 	foreach(TEST_FPP float double complex<float> complex<double>)

--- a/misc/test/src/C++/test_prox_splin_spcol_gpu.cpp.in
+++ b/misc/test/src/C++/test_prox_splin_spcol_gpu.cpp.in
+#include "faust_MatDense.h"
+#include "faust_MatDense_gpu.h"
+#include "faust_prox_gpu.h"
+#include <iostream>
+#include <string>
+
+/** \brief unitary test for MatDense conjugate
+*/
+
+typedef @TEST_FPP@ FPP;
+
+
+using namespace Faust;
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+	Faust::enable_gpu_mod();
+	faust_unsigned_int dim1 = 204;
+	faust_unsigned_int dim2 = 204;
+
+	auto num_its = 30000;
+
+	int32_t k = dim1/10;
+
+	int row_or_col = 0; // row by deft
+
+	if(argc > 1)
+	{
+		num_its = std::atoi(argv[1]);
+		if(argc > 2)
+		{
+			dim1 = dim2 = std::atoi(argv[2]);
+			if(argc > 3)
+			{
+				k = std::atoi(argv[3]);
+				if(argc > 4)
+				{
+					row_or_col = std::atoi(argv[4]);
+				}
+			}
+			else
+			{
+				k = dim1/10;
+			}
+		}
+	}
+
+	MatDense<FPP,Cpu>* M;
+	MatDense<FPP,GPU2> M1, M2;
+	M = MatDense<FPP,Cpu>::randMat(dim1,dim2);
+	*M *= FPP(100);
+	M1 = *M;
+	M2 = *M;
+
+
+	std::chrono::time_point<std::chrono::steady_clock> pure_gpu_prox_start, pure_gpu_prox_end, gpu_cpu_rt_prox_start, gpu_cpu_rt_prox_end;
+	std::chrono::duration<double> pure_gpu_prox_dur, gpu_cpu_rt_prox_dur;
+
+	for(int i=0; i < num_its; i++)
+	{
+		pure_gpu_prox_start = std::chrono::steady_clock::now();
+		if(row_or_col)
+			prox_spcol(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
+		else
+			prox_splin(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
+		pure_gpu_prox_end = std::chrono::steady_clock::now();
+		pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
+
+		gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
+		if(row_or_col)
+			prox_spcol(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
+		else
+			prox_splin(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
+		gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
+		gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
+		M1 -= M2;
+		auto err = M1.norm();
+//		cout << "err:" << err << endl;
+		assert(err < 1e-6);
+		M1 = *M;
+		M2 = *M;
+	}
+
+	cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
+	cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
+
+	return 0;
+}
--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.cpp.in
@@ -717,4 +717,18 @@ namespace Faust
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
 			dsm_funcs->prox_sp(gpu_mat, k, normalized, pos);
 		}
+
+	template<>
+		void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::prox_splin(int32_t k, bool normalized/*=false*/, bool pos/*=false*/) const
+		{
+			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
+			dsm_funcs->prox_splin(gpu_mat, k, normalized, pos);
+		}
+
+	template<>
+		void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::prox_spcol(int32_t k, bool normalized/*=false*/, bool pos/*=false*/) const
+		{
+			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
+			dsm_funcs->prox_spcol(gpu_mat, k, normalized, pos);
+		}
 };
--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
@@ -130,6 +130,8 @@ namespace Faust
 				void copyBuf(FPP* dst_cpu_buf, const void* stream=nullptr) const;
 				bool isReal() const;
 				void prox_sp(int32_t k, bool normalized=false, bool pos=false) const;
+				void prox_spcol(int32_t k, bool normalized=false, bool pos=false) const;
+				void prox_splin(int32_t k, bool normalized=false, bool pos=false) const;

 			protected:
 				gm_DenseMat_t gpu_mat;

--- a/src/faust_linear_operator/GPU2/faust_prox_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_prox_gpu.h
@@ -58,9 +58,9 @@ namespace Faust
 	template<typename FPP>
 		void prox_spcol(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false);
 	template<typename FPP>
-		void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false);
+		void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false, const bool pure_gpu=true);
 	template<typename FPP>
-		void prox_splincol(MatDense<FPP,GPU2> &M,faust_unsigned_int k, const bool normalized=true, const bool pos=false);
+		void prox_splincol(MatDense<FPP,GPU2> &M,faust_unsigned_int k, const bool normalized=true, const bool pos=false, const bool pure_gpu=true);
 	template<typename FPP>
 		void prox_supp(MatDense<FPP,GPU2> & M, const MatDense<FPP,GPU2> & supp, const bool normalized=true, const bool pos=false);
 	template<typename FPP>

--- a/src/faust_linear_operator/GPU2/faust_prox_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_prox_gpu.hpp
@@ -70,19 +70,33 @@ namespace Faust
 		}

 	template<typename FPP>
-		void prox_spcol(MatDense<FPP,GPU2> & M, faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/)
+		void prox_spcol(MatDense<FPP,GPU2> & M, faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/, const bool pure_gpu/*=true*/)
 		{
-			MatDense<FPP,Cpu> cpuM = M.tocpu();
-			prox_spcol(cpuM, k, normalized, pos);
-			M = cpuM;
+			if(pure_gpu)
+			{
+				M.prox_spcol(k, normalized, pos);
+			}
+			else
+			{
+				MatDense<FPP,Cpu> cpuM = M.tocpu();
+				prox_spcol(cpuM, k, normalized, pos);
+				M = cpuM;
+			}
 		}

 	template<typename FPP>
-		void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/)
+		void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/, const bool pure_gpu/*=true*/)
 		{
-			MatDense<FPP,Cpu> cpuM = M.tocpu();
-			prox_splin(cpuM, k, normalized, pos);
-			M = cpuM;
+			if(pure_gpu)
+			{
+				M.prox_splin(k, normalized, pos);
+			}
+			else
+			{
+				MatDense<FPP,Cpu> cpuM = M.tocpu();
+				prox_splin(cpuM, k, normalized, pos);
+				M = cpuM;
+			}
 		}

 	template<typename FPP>