Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ae0b908e authored by hhakim's avatar hhakim
Browse files

Update pure gpu splin, spcol, sp proxs tests (prog. arguments and output).

parent 51b659dc
Branches
Tags
No related merge requests found
......@@ -2,6 +2,8 @@
#include "faust_MatDense_gpu.h"
#include "faust_prox_gpu.h"
#include <iostream>
//#include "faust_init_from_matio_params.h"
//#include "faust_init_from_matio_core.h"
/** \brief unitary test for MatDense conjugate
*/
......@@ -29,10 +31,20 @@ int main(int argc, char* argv[])
faust_unsigned_int dim1 = 204;
faust_unsigned_int dim2 = 204;
// auto MatrixFilename = "D.mat";
// Faust::MatDense<FPP,Cpu> D;
// init_faust_mat_from_matio(D,MatrixFilename,"D");
auto num_its = 30000;
auto num_its = 60;
auto k = dim1/10;
auto k = 1632;
char* str_k = getenv("K");
if(str_k)
{
k = std::atoi(str_k);
}
if(argc > 1)
{
......@@ -47,10 +59,19 @@ int main(int argc, char* argv[])
}
}
std::cout << "M nrows: " << dim1 << std::endl;
std::cout << "M ncols: " << dim2 << std::endl;
cout << "k:" << k << endl;
std::cout << "benchmark num_its: " << num_its << std::endl;
MatDense<FPP,Cpu>* M;
MatDense<FPP,GPU2> M1, M2;
M = MatDense<FPP,Cpu>::randMat(dim1,dim2);
*M *= FPP(100);
// M = &D;
// M->Display();
// *M *= FPP(100);
M1 = *M;
M2 = *M;
......@@ -61,17 +82,28 @@ int main(int argc, char* argv[])
for(int i=0; i < num_its; i++)
{
pure_gpu_prox_start = std::chrono::steady_clock::now();
prox_sp(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
prox_sp(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true);
pure_gpu_prox_end = std::chrono::steady_clock::now();
pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
prox_sp(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
prox_sp(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false);
gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
assert(M1.getNonZeros() == M2.getNonZeros());
M1 -= M2;
auto err = M1.norm();
// cout << "err:" << err << endl;
#ifdef DEBUG
if(i == 0)
{
cout << "M1 norm:" << M1.norm() << endl;
M1.Display();
cout << "M2 norm:" << M2.norm() << endl;
M2.Display();
cout << "err:" << err << endl;
}
#endif
assert(err < 1e-6);
M1 = *M;
M2 = *M;
......@@ -79,6 +111,7 @@ int main(int argc, char* argv[])
cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl;
return 0;
}
......@@ -17,13 +17,13 @@ int main(int argc, char* argv[])
{
Faust::enable_gpu_mod();
faust_unsigned_int dim1 = 204;
faust_unsigned_int dim2 = 204;
faust_unsigned_int dim2 = 8193;
auto num_its = 30000;
auto num_its = 270;
int32_t k = dim1/10;
int32_t k = 10;
int row_or_col = 0; // row by deft
int row_or_col = 1; // col by deft
if(argc > 1)
{
......@@ -46,6 +46,11 @@ int main(int argc, char* argv[])
}
}
std::cout << "M nrows: " << dim1 << std::endl;
std::cout << "M ncols: " << dim2 << std::endl;
std::cout << (row_or_col==1?"prox_spcol":"prox_splin") << " k: " << k << std::endl;
std::cout << "benchmark num_its: " << num_its << std::endl;
MatDense<FPP,Cpu>* M;
MatDense<FPP,GPU2> M1, M2;
M = MatDense<FPP,Cpu>::randMat(dim1,dim2);
......@@ -61,17 +66,17 @@ int main(int argc, char* argv[])
{
pure_gpu_prox_start = std::chrono::steady_clock::now();
if(row_or_col)
prox_spcol(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
prox_spcol(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true);
else
prox_splin(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
prox_splin(M1, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ true);
pure_gpu_prox_end = std::chrono::steady_clock::now();
pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
if(row_or_col)
prox_spcol(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
prox_spcol(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false);
else
prox_splin(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
prox_splin(M2, k, /* normalized */ true, /* pos*/ false, /* pure_gpu */ false);
gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
M1 -= M2;
......@@ -84,6 +89,7 @@ int main(int argc, char* argv[])
cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
cout << "pure gpu speedup: " << gpu_cpu_rt_prox_dur.count()/pure_gpu_prox_dur.count() << endl;
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment