Mentions légales du service

Skip to content
Snippets Groups Projects
Commit db173e4f authored by hhakim's avatar hhakim
Browse files

Update things about C++ proxs.

Update of gpu prox with gpu_mod@94f9562a (positivity),
Update gpu sp, splin, spcol proxs to test with and without positivity.
Update of cpu dynamic format prox_sp doc,
simplifying cpu pre_pros_pos.
parent 61945343
Branches
Tags
No related merge requests found
Subproject commit 8843946348e1464779f54ceda06dd142ef30f9f7 Subproject commit 94f9562af81b2db7635ca7c0cd70e1d5559dd10f
...@@ -31,9 +31,9 @@ int main(int argc, char* argv[]) ...@@ -31,9 +31,9 @@ int main(int argc, char* argv[])
faust_unsigned_int dim1 = 204; faust_unsigned_int dim1 = 204;
faust_unsigned_int dim2 = 204; faust_unsigned_int dim2 = 204;
// auto MatrixFilename = "D.mat"; // auto MatrixFilename = "D.mat";
// Faust::MatDense<FPP,Cpu> D; // Faust::MatDense<FPP,Cpu> D;
// init_faust_mat_from_matio(D,MatrixFilename,"D"); // init_faust_mat_from_matio(D,MatrixFilename,"D");
auto num_its = 60; auto num_its = 60;
...@@ -69,9 +69,9 @@ int main(int argc, char* argv[]) ...@@ -69,9 +69,9 @@ int main(int argc, char* argv[])
MatDense<FPP,GPU2> M1, M2; MatDense<FPP,GPU2> M1, M2;
M = MatDense<FPP,Cpu>::randMat(dim1,dim2); M = MatDense<FPP,Cpu>::randMat(dim1,dim2);
// M = &D; // M = &D;
// M->Display(); // M->Display();
// *M *= FPP(100); // *M *= FPP(100);
M1 = *M; M1 = *M;
M2 = *M; M2 = *M;
...@@ -81,37 +81,41 @@ int main(int argc, char* argv[]) ...@@ -81,37 +81,41 @@ int main(int argc, char* argv[])
for(int i=0; i < num_its; i++) for(int i=0; i < num_its; i++)
{ {
pure_gpu_prox_start = std::chrono::steady_clock::now(); for(int pos=0;pos < 2; pos++)
prox_sp(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true);
pure_gpu_prox_end = std::chrono::steady_clock::now();
pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
prox_sp(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false);
gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
assert(M1.getNonZeros() == M2.getNonZeros());
M1 -= M2;
auto err = M1.norm();
#ifdef DEBUG
if(i == 0)
{ {
cout << "M1 norm:" << M1.norm() << endl; pure_gpu_prox_start = std::chrono::steady_clock::now();
M1.Display(); prox_sp(M1, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ true);
cout << "M2 norm:" << M2.norm() << endl; pure_gpu_prox_end = std::chrono::steady_clock::now();
M2.Display(); pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
cout << "err:" << err << endl;
} gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
prox_sp(M2, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ false);
gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
assert(M1.getNonZeros() == M2.getNonZeros());
M1 -= M2;
auto err = M1.norm();
#ifdef DEBUG
if(i == 0)
{
cout << "M1 norm:" << M1.norm() << endl;
M1.Display();
cout << "M2 norm:" << M2.norm() << endl;
M2.Display();
cout << "err:" << err << endl;
}
#endif #endif
assert(err < 1e-6); assert(err < 1e-6);
M1 = *M; M1 = *M;
M2 = *M; M2 = *M;
}
} }
cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl; cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl; cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl; cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl;
delete M;
return 0; return 0;
} }
...@@ -49,7 +49,7 @@ int main(int argc, char* argv[]) ...@@ -49,7 +49,7 @@ int main(int argc, char* argv[])
std::cout << "M nrows: " << dim1 << std::endl; std::cout << "M nrows: " << dim1 << std::endl;
std::cout << "M ncols: " << dim2 << std::endl; std::cout << "M ncols: " << dim2 << std::endl;
std::cout << (row_or_col==1?"prox_spcol":"prox_splin") << " k: " << k << std::endl; std::cout << (row_or_col==1?"prox_spcol":"prox_splin") << " k: " << k << std::endl;
std::cout << "benchmark num_its: " << num_its << std::endl; std::cout << "benchmark num_its (* 2 for pos==true and pos==false): " << num_its << std::endl;
MatDense<FPP,Cpu>* M; MatDense<FPP,Cpu>* M;
MatDense<FPP,GPU2> M1, M2; MatDense<FPP,GPU2> M1, M2;
...@@ -64,32 +64,39 @@ int main(int argc, char* argv[]) ...@@ -64,32 +64,39 @@ int main(int argc, char* argv[])
for(int i=0; i < num_its; i++) for(int i=0; i < num_its; i++)
{ {
pure_gpu_prox_start = std::chrono::steady_clock::now(); for(int pos=0;pos < 2; pos++)
if(row_or_col) {
prox_spcol(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true); pure_gpu_prox_start = std::chrono::steady_clock::now();
else if(row_or_col)
prox_splin(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true); prox_spcol(M1, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ true);
pure_gpu_prox_end = std::chrono::steady_clock::now(); else
pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start; prox_splin(M1, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ true);
pure_gpu_prox_end = std::chrono::steady_clock::now();
gpu_cpu_rt_prox_start = std::chrono::steady_clock::now(); pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
if(row_or_col)
prox_spcol(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false); gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
else if(row_or_col)
prox_splin(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false); prox_spcol(M2, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ false);
gpu_cpu_rt_prox_end = std::chrono::steady_clock::now(); else
gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start; prox_splin(M2, k, /* normalized */ true, /* pos*/ pos, /* pure_gpu */ false);
M1 -= M2; gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
auto err = M1.norm(); gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
// cout << "err:" << err << endl; cout << "M1.norm():" << M1.norm() << " nnz:" << M1.getNonZeros() << endl;
assert(err < 1e-6); cout << "M2.norm():" << M2.norm() << " nnz:" << M2.getNonZeros() << endl;
M1 = *M; M1 -= M2;
M2 = *M; auto err = M1.norm();
cout << "err:" << err << endl;
assert(err < 1e-6);
M1 = *M;
M2 = *M;
}
} }
cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl; cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl; cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl; cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl;
delete M;
return 0; return 0;
} }
...@@ -102,11 +102,17 @@ namespace Faust ...@@ -102,11 +102,17 @@ namespace Faust
template<typename FPP> faust_unsigned_int sparse_size(faust_unsigned_int nnz, faust_unsigned_int nrows); template<typename FPP> faust_unsigned_int sparse_size(faust_unsigned_int nnz, faust_unsigned_int nrows);
template<typename FPP> faust_unsigned_int dense_size(faust_unsigned_int nrows, faust_unsigned_int ncols); template<typename FPP> faust_unsigned_int dense_size(faust_unsigned_int nrows, faust_unsigned_int ncols);
/** /**
* Decides which output format to use when appliying the SP prox. op. to M. Either M or spM is the output, it depends on the byte size. The minimum memory fingerprint is targeted. * Decides which output format to use when appliying the SP prox. op. to M. Either M or spM is the output, it depends on the byte sizes. The minimum memory fingerprint is targeted.
* *
* \param forcedType: used to choose explicitely the output format with values Sparse or Dense (MatSparse or MatDense). * \param M: the input matrix to project (which could be the output too, if dense format is chosen).
*/ * \param spM: the output matrix if eventually the format is the output format is sparse.
* \param k: the sparsity parameter (pseudo-)norm_1 of the output matrix.
* \param normalized: true to normalize the output matrix.
* \param pos: true to filter negative values of M before applying the de prox.
* \param forcedType: used to choose explicitely the output format with values Sparse or Dense (MatSparse or MatDense).
* \return the prox image as a MatGeneric matrix.
*/
template<typename FPP> template<typename FPP>
MatGeneric<FPP,Cpu>* prox_sp(MatDense<FPP,Cpu> & M, MatSparse<FPP, Cpu> & spM, faust_unsigned_int k, const bool normalized=true, const bool pos=false, const MatType forcedType=None); MatGeneric<FPP,Cpu>* prox_sp(MatDense<FPP,Cpu> & M, MatSparse<FPP, Cpu> & spM, faust_unsigned_int k, const bool normalized=true, const bool pos=false, const MatType forcedType=None);
} }
......
...@@ -387,9 +387,9 @@ Faust::pre_prox_pos(MatDense<FPP,Cpu> & M) ...@@ -387,9 +387,9 @@ Faust::pre_prox_pos(MatDense<FPP,Cpu> & M)
// bool is_cplx = typeid(ptr_data[0])==typeid(std::complex<double>())||typeid(ptr_data[0])==typeid(std::complex<float>()); // bool is_cplx = typeid(ptr_data[0])==typeid(std::complex<double>())||typeid(ptr_data[0])==typeid(std::complex<float>());
//don't want to duplicate the function for all realizations of template we need //don't want to duplicate the function for all realizations of template we need
//so we use a little trick to make the code valid for double/float and complex<double>/complex<float> //so we use a little trick to make the code valid for double/float and complex<double>/complex<float>
bool is_cplx = std::is_same<FPP,complex<double>>::value || std::is_same<FPP, complex<float>>::value; bool is_real = std::is_same<FPP, Real<FPP>>::value;
for (int i=0;i<(M.getNbRow() * M.getNbCol());i++) for (int i=0;i<(M.getNbRow() * M.getNbCol());i++)
if (!is_cplx && std::complex<float>(ptr_data[i]).real() < 0) if (is_real && std::complex<float>(ptr_data[i]).real() < 0)
ptr_data[i]=0; ptr_data[i]=0;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment