Update things about C++ proxs.

Update of gpu prox with gpu_mod@94f9562a (positivity), Update gpu sp, splin, spcol proxs to test with and without positivity. Update of cpu dynamic format prox_sp doc, simplifying cpu pre_pros_pos.

Update things about C++ proxs.
db173e4f · hhakim · 61945343 · 94f9562a · 88439463 · 94f9562a
Commit db173e4f authored 4 years ago by hhakim
--- a/gpu_mod @ 94f9562a
+++ b/gpu_mod @ 94f9562a
-Subproject commit 8843946348e1464779f54ceda06dd142ef30f9f7
+Subproject commit 94f9562af81b2db7635ca7c0cd70e1d5559dd10f
--- a/misc/test/src/C++/test_prox_sp_gpu.cpp.in
+++ b/misc/test/src/C++/test_prox_sp_gpu.cpp.in
@@ -31,9 +31,9 @@ int main(int argc, char* argv[])
 	faust_unsigned_int dim1 = 204;
 	faust_unsigned_int dim2 = 204;
-//	auto MatrixFilename = "D.mat";
+	//	auto MatrixFilename = "D.mat";
-//	Faust::MatDense<FPP,Cpu> D;
+	//	Faust::MatDense<FPP,Cpu> D;
-//    init_faust_mat_from_matio(D,MatrixFilename,"D");
+	//    init_faust_mat_from_matio(D,MatrixFilename,"D");
 	auto num_its = 60;
@@ -69,9 +69,9 @@ int main(int argc, char* argv[])
 	MatDense<FPP,GPU2> M1, M2;
 	M = MatDense<FPP,Cpu>::randMat(dim1,dim2);
-//	M = &D;
+	//	M = &D;
-//	M->Display();
+	//	M->Display();
-//	*M *= FPP(100);
+	//	*M *= FPP(100);
 	M1 = *M;
 	M2 = *M;
@@ -81,37 +81,41 @@ int main(int argc, char* argv[])
 	for(int i=0; i < num_its; i++)
 	{
-		pure_gpu_prox_start = std::chrono::steady_clock::now();
+		for(int pos=0;pos < 2; pos++)
-		prox_sp(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true);
-		pure_gpu_prox_end = std::chrono::steady_clock::now();
-		pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
-		gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
-		prox_sp(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false);
-		gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
-		gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
-		assert(M1.getNonZeros() == M2.getNonZeros());
-		M1 -= M2;
-		auto err = M1.norm();
-#ifdef DEBUG
-		if(i == 0)
 		{
-			cout << "M1 norm:" << M1.norm() << endl;
+			pure_gpu_prox_start = std::chrono::steady_clock::now();
-			M1.Display();
+			prox_sp(M1, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ true);
-			cout << "M2 norm:" << M2.norm() << endl;
+			pure_gpu_prox_end = std::chrono::steady_clock::now();
-			M2.Display();
+			pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
-			cout << "err:" << err << endl;
-		}
+			gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
+			prox_sp(M2, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ false);
+			gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
+			gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
+			assert(M1.getNonZeros() == M2.getNonZeros());
+			M1 -= M2;
+			auto err = M1.norm();
+#ifdef DEBUG
+			if(i == 0)
+			{
+				cout << "M1 norm:" << M1.norm() << endl;
+				M1.Display();
+				cout << "M2 norm:" << M2.norm() << endl;
+				M2.Display();
+				cout << "err:" << err << endl;
+			}
 #endif
-		assert(err < 1e-6);
+			assert(err < 1e-6);
-		M1 = *M;
+			M1 = *M;
-		M2 = *M;
+			M2 = *M;
+		}
 	}
 	cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
 	cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
 	cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl;
+	delete M;
 	return 0;
 }
--- a/misc/test/src/C++/test_prox_splin_spcol_gpu.cpp.in
+++ b/misc/test/src/C++/test_prox_splin_spcol_gpu.cpp.in
@@ -49,7 +49,7 @@ int main(int argc, char* argv[])
 	std::cout << "M nrows: " << dim1 << std::endl;
 	std::cout << "M ncols: " << dim2 << std::endl;
 	std::cout << (row_or_col==1?"prox_spcol":"prox_splin") << " k: " << k << std::endl;
-	std::cout << "benchmark num_its: " << num_its << std::endl;
+	std::cout << "benchmark num_its (* 2 for pos==true and pos==false): " << num_its << std::endl;
 	MatDense<FPP,Cpu>* M;
 	MatDense<FPP,GPU2> M1, M2;
@@ -64,32 +64,39 @@ int main(int argc, char* argv[])
 	for(int i=0; i < num_its; i++)
 	{
-		pure_gpu_prox_start = std::chrono::steady_clock::now();
+		for(int pos=0;pos < 2; pos++)
-		if(row_or_col)
+		{
-			prox_spcol(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true);
+			pure_gpu_prox_start = std::chrono::steady_clock::now();
-		else
+			if(row_or_col)
-			prox_splin(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true);
+				prox_spcol(M1, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ true);
-		pure_gpu_prox_end = std::chrono::steady_clock::now();
+			else
-		pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
+				prox_splin(M1, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ true);
+			pure_gpu_prox_end = std::chrono::steady_clock::now();
-		gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
+			pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
-		if(row_or_col)
-			prox_spcol(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false);
+			gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
-		else
+			if(row_or_col)
-			prox_splin(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false);
+				prox_spcol(M2, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ false);
-		gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
+			else
-		gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
+				prox_splin(M2, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ false);
-		M1 -= M2;
+			gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
-		auto err = M1.norm();
+			gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
-//		cout << "err:" << err << endl;
+			cout << "M1.norm():" << M1.norm() << " nnz:"  << M1.getNonZeros() << endl;
-		assert(err < 1e-6);
+			cout << "M2.norm():" << M2.norm() << " nnz:"  << M2.getNonZeros() << endl;
-		M1 = *M;
+			M1 -= M2;
-		M2 = *M;
+			auto err = M1.norm();
+			cout << "err:" << err << endl;
+			assert(err < 1e-6);
+			M1 = *M;
+			M2 = *M;
+		}
 	}
 	cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
 	cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
 	cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl;
+	delete M;
 	return 0;
 }
--- a/src/faust_linear_operator/CPU/faust_prox.h
+++ b/src/faust_linear_operator/CPU/faust_prox.h
@@ -102,11 +102,17 @@ namespace Faust
 	template<typename FPP> faust_unsigned_int sparse_size(faust_unsigned_int nnz, faust_unsigned_int nrows);
 	template<typename FPP> faust_unsigned_int dense_size(faust_unsigned_int nrows, faust_unsigned_int ncols);
-	/**
+    /**
-	 * Decides which output format to use when appliying the SP prox. op. to M. Either M or spM is the output, it depends on the byte size. The minimum memory fingerprint is targeted.
+     * Decides which output format to use when appliying the SP prox. op. to M. Either M or spM is the output, it depends on the byte sizes. The minimum memory fingerprint is targeted.
-	 *
+     *
-	 *  \param forcedType: used to choose explicitely the output format with values Sparse or Dense (MatSparse or MatDense).
+     *  \param M: the input matrix to project (which could be the output too, if dense format is chosen).
-	 */
+     *  \param spM: the output matrix if eventually the format is the output format is sparse.
+     *  \param k: the sparsity parameter (pseudo-)norm_1 of the output matrix.
+     *  \param normalized: true to normalize the output matrix.
+     *  \param pos: true to filter negative values of M before applying the de prox.
+     *  \param forcedType: used to choose explicitely the output format with values Sparse or Dense (MatSparse or MatDense).
+     * \return the prox image as a MatGeneric matrix.
+     */
 	template<typename FPP>
 		MatGeneric<FPP,Cpu>* prox_sp(MatDense<FPP,Cpu> & M, MatSparse<FPP, Cpu> & spM, faust_unsigned_int k, const bool normalized=true, const bool pos=false, const MatType forcedType=None);
 }

--- a/src/faust_linear_operator/CPU/faust_prox.hpp
+++ b/src/faust_linear_operator/CPU/faust_prox.hpp
@@ -387,9 +387,9 @@ Faust::pre_prox_pos(MatDense<FPP,Cpu> & M)
 	//	bool is_cplx = typeid(ptr_data[0])==typeid(std::complex<double>())||typeid(ptr_data[0])==typeid(std::complex<float>());
 	//don't want to duplicate the function for all realizations of template we need
 	//so we use a little trick to make the code valid for double/float and complex<double>/complex<float>
-	bool is_cplx = std::is_same<FPP,complex<double>>::value || std::is_same<FPP, complex<float>>::value;
+	bool is_real = std::is_same<FPP, Real<FPP>>::value;
 	for (int i=0;i<(M.getNbRow() * M.getNbCol());i++)
-		if (!is_cplx && std::complex<float>(ptr_data[i]).real() < 0)
+		if (is_real && std::complex<float>(ptr_data[i]).real() < 0)
 			ptr_data[i]=0;
 }