Mentions légales du service

Skip to content
Snippets Groups Projects
Commit a3672b36 authored by hhakim's avatar hhakim
Browse files

Add prox_sp GPU2 implementation with gpu_mod@422d2d79 and a test/benchmark (test_prox_sp_gpu).

parent 49113b78
Branches
Tags
No related merge requests found
Subproject commit 70d319e0dc518f3d490de756695a69ef4e17bd32
Subproject commit 422d2d799162e4a6611c1088b929d2e3a819547b
......@@ -206,7 +206,7 @@ if(MATIO_LIB_FILE AND MATIO_INC_DIR AND BUILD_READ_MAT_FILE AND NOT NOCPPTESTS)
endif()
if(USE_GPU_MOD)
list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod hierarchical2020_gpu2 hierarchical2020Hadamard_gpu2 MEG_factorization)
list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod hierarchical2020_gpu2 hierarchical2020Hadamard_gpu2 MEG_factorization test_prox_sp_gpu)
endif()
foreach(TEST_FPP float double complex<float> complex<double>)
......
#include "faust_MatDense.h"
#include "faust_MatDense_gpu.h"
#include "faust_prox_gpu.h"
#include <iostream>
/** \brief unitary test for MatDense conjugate
*/
typedef @TEST_FPP@ FPP;
using namespace Faust;
using namespace std;
//void print_mat_data(MatDense<FPP,Cpu>& mat)
//{
// int nrows = mat.getNbRow(), ncols = mat.getNbCol();
// for (int i=0;i < nrows; i++)
// {
// for(int j=0;j < ncols; j++)
// cout << mat.getData()[j*nrows+i] << " ";
// cout << endl;
// }
//}
int main(int argc, char* argv[])
{
Faust::enable_gpu_mod();
faust_unsigned_int dim1 = 204;
faust_unsigned_int dim2 = 204;
auto num_its = 30000;
auto k = dim1/10;
if(argc > 1)
{
num_its = std::atoi(argv[1]);
if(argc > 2)
{
dim1 = dim2 = std::atoi(argv[2]);
if(argc > 3)
{
k = std::atoi(argv[3]);
}
}
}
MatDense<FPP,Cpu>* M;
MatDense<FPP,GPU2> M1, M2;
M = MatDense<FPP,Cpu>::randMat(dim1,dim2);
*M *= FPP(100);
M1 = *M;
M2 = *M;
std::chrono::time_point<std::chrono::steady_clock> pure_gpu_prox_start, pure_gpu_prox_end, gpu_cpu_rt_prox_start, gpu_cpu_rt_prox_end;
std::chrono::duration<double> pure_gpu_prox_dur, gpu_cpu_rt_prox_dur;
for(int i=0; i < num_its; i++)
{
pure_gpu_prox_start = std::chrono::steady_clock::now();
prox_sp(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
pure_gpu_prox_end = std::chrono::steady_clock::now();
pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
prox_sp(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
M1 -= M2;
auto err = M1.norm();
// cout << "err:" << err << endl;
assert(err < 1e-6);
M1 = *M;
M2 = *M;
}
cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
return 0;
}
......@@ -710,4 +710,11 @@ namespace Faust
{
return typeid(Faust::MatDense<@FAUST_SCALAR_FOR_GM@, GPU2>) == typeid(Faust::MatDense<double, GPU2>);
}
template<>
void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::prox_sp(int32_t k, bool normalized/*=false*/, bool pos/*=false*/) const
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->prox_sp(gpu_mat, k, normalized, pos);
}
};
......@@ -129,6 +129,7 @@ namespace Faust
MatDense<FPP,GPU2>* get_cols(faust_unsigned_int* col_ids, faust_unsigned_int n) const;
void copyBuf(FPP* dst_cpu_buf, const void* stream=nullptr) const;
bool isReal() const;
void prox_sp(int32_t k, bool normalized=false, bool pos=false) const;
protected:
gm_DenseMat_t gpu_mat;
......
......@@ -52,7 +52,7 @@
namespace Faust
{
template<typename FPP>
void prox_sp(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false);
void prox_sp(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false, const bool pure_gpu=true);
template<typename FPP>
void prox_sp_pos(MatDense<FPP, GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false );
template<typename FPP>
......
......@@ -47,11 +47,18 @@
namespace Faust
{
template<typename FPP>
void prox_sp(MatDense<FPP,GPU2> & M, faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/)
void prox_sp(MatDense<FPP,GPU2> & M, faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/, const bool pure_gpu/*=true*/)
{
MatDense<FPP,Cpu> cpuM = M.tocpu();
prox_sp(cpuM, k, normalized, pos);
M = cpuM;
if(pure_gpu)
{
M.prox_sp(k, normalized, pos);
}
else
{
MatDense<FPP,Cpu> cpuM = M.tocpu();
prox_sp(cpuM, k, normalized, pos);
M = cpuM;
}
}
template<typename FPP>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment