Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 9d73e9ca authored by hhakim's avatar hhakim
Browse files

Add prox_spcol and prox_splin pure GPU implementations along with a test/benchmark.

Update to gpu_mod@79e7daee
parent b242af8b
Branches
No related tags found
No related merge requests found
Subproject commit f94b91bec5f095e99199eece785c86d4612ec477
Subproject commit b0f0915ffad0f13363a865ff3466387d5d405cd6
......@@ -206,7 +206,7 @@ if(MATIO_LIB_FILE AND MATIO_INC_DIR AND BUILD_READ_MAT_FILE AND NOT NOCPPTESTS)
endif()
if(USE_GPU_MOD)
list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod hierarchical2020_gpu2 hierarchical2020Hadamard_gpu2 MEG_factorization test_prox_sp_gpu)
list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod hierarchical2020_gpu2 hierarchical2020Hadamard_gpu2 MEG_factorization test_prox_sp_gpu test_prox_splin_spcol_gpu)
endif()
foreach(TEST_FPP float double complex<float> complex<double>)
......
#include "faust_MatDense.h"
#include "faust_MatDense_gpu.h"
#include "faust_prox_gpu.h"
#include <iostream>
#include <string>
/** \brief unitary test for MatDense conjugate
*/
typedef @TEST_FPP@ FPP;
using namespace Faust;
using namespace std;
int main(int argc, char* argv[])
{
Faust::enable_gpu_mod();
faust_unsigned_int dim1 = 204;
faust_unsigned_int dim2 = 204;
auto num_its = 30000;
int32_t k = dim1/10;
int row_or_col = 0; // row by deft
if(argc > 1)
{
num_its = std::atoi(argv[1]);
if(argc > 2)
{
dim1 = dim2 = std::atoi(argv[2]);
if(argc > 3)
{
k = std::atoi(argv[3]);
if(argc > 4)
{
row_or_col = std::atoi(argv[4]);
}
}
else
{
k = dim1/10;
}
}
}
MatDense<FPP,Cpu>* M;
MatDense<FPP,GPU2> M1, M2;
M = MatDense<FPP,Cpu>::randMat(dim1,dim2);
*M *= FPP(100);
M1 = *M;
M2 = *M;
std::chrono::time_point<std::chrono::steady_clock> pure_gpu_prox_start, pure_gpu_prox_end, gpu_cpu_rt_prox_start, gpu_cpu_rt_prox_end;
std::chrono::duration<double> pure_gpu_prox_dur, gpu_cpu_rt_prox_dur;
for(int i=0; i < num_its; i++)
{
pure_gpu_prox_start = std::chrono::steady_clock::now();
if(row_or_col)
prox_spcol(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
else
prox_splin(M1, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ true);
pure_gpu_prox_end = std::chrono::steady_clock::now();
pure_gpu_prox_dur += pure_gpu_prox_end-pure_gpu_prox_start;
gpu_cpu_rt_prox_start = std::chrono::steady_clock::now();
if(row_or_col)
prox_spcol(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
else
prox_splin(M2, k, /* normalized */ false, /* pos*/ false, /* pure_gpu */ false);
gpu_cpu_rt_prox_end = std::chrono::steady_clock::now();
gpu_cpu_rt_prox_dur += gpu_cpu_rt_prox_end-gpu_cpu_rt_prox_start;
M1 -= M2;
auto err = M1.norm();
// cout << "err:" << err << endl;
assert(err < 1e-6);
M1 = *M;
M2 = *M;
}
cout << "pure gpu prox time:" << pure_gpu_prox_dur.count() << endl;
cout << "gpu2cpu round-trip prox time:" << gpu_cpu_rt_prox_dur.count() << endl;
return 0;
}
......@@ -717,4 +717,18 @@ namespace Faust
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->prox_sp(gpu_mat, k, normalized, pos);
}
template<>
void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::prox_splin(int32_t k, bool normalized/*=false*/, bool pos/*=false*/) const
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->prox_splin(gpu_mat, k, normalized, pos);
}
template<>
void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::prox_spcol(int32_t k, bool normalized/*=false*/, bool pos/*=false*/) const
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->prox_spcol(gpu_mat, k, normalized, pos);
}
};
......@@ -130,6 +130,8 @@ namespace Faust
void copyBuf(FPP* dst_cpu_buf, const void* stream=nullptr) const;
bool isReal() const;
void prox_sp(int32_t k, bool normalized=false, bool pos=false) const;
void prox_spcol(int32_t k, bool normalized=false, bool pos=false) const;
void prox_splin(int32_t k, bool normalized=false, bool pos=false) const;
protected:
gm_DenseMat_t gpu_mat;
......
......@@ -58,9 +58,9 @@ namespace Faust
template<typename FPP>
void prox_spcol(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false);
template<typename FPP>
void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false);
void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized=true, const bool pos=false, const bool pure_gpu=true);
template<typename FPP>
void prox_splincol(MatDense<FPP,GPU2> &M,faust_unsigned_int k, const bool normalized=true, const bool pos=false);
void prox_splincol(MatDense<FPP,GPU2> &M,faust_unsigned_int k, const bool normalized=true, const bool pos=false, const bool pure_gpu=true);
template<typename FPP>
void prox_supp(MatDense<FPP,GPU2> & M, const MatDense<FPP,GPU2> & supp, const bool normalized=true, const bool pos=false);
template<typename FPP>
......
......@@ -70,19 +70,33 @@ namespace Faust
}
template<typename FPP>
void prox_spcol(MatDense<FPP,GPU2> & M, faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/)
void prox_spcol(MatDense<FPP,GPU2> & M, faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/, const bool pure_gpu/*=true*/)
{
MatDense<FPP,Cpu> cpuM = M.tocpu();
prox_spcol(cpuM, k, normalized, pos);
M = cpuM;
if(pure_gpu)
{
M.prox_spcol(k, normalized, pos);
}
else
{
MatDense<FPP,Cpu> cpuM = M.tocpu();
prox_spcol(cpuM, k, normalized, pos);
M = cpuM;
}
}
template<typename FPP>
void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/)
void prox_splin(MatDense<FPP,GPU2> & M,faust_unsigned_int k, const bool normalized/*=true*/, const bool pos/*=false*/, const bool pure_gpu/*=true*/)
{
MatDense<FPP,Cpu> cpuM = M.tocpu();
prox_splin(cpuM, k, normalized, pos);
M = cpuM;
if(pure_gpu)
{
M.prox_splin(k, normalized, pos);
}
else
{
MatDense<FPP,Cpu> cpuM = M.tocpu();
prox_splin(cpuM, k, normalized, pos);
M = cpuM;
}
}
template<typename FPP>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment