Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 9caa4a4e authored by hhakim's avatar hhakim
Browse files

Implement TransformHelper<FPP,GPU2>::optimize_multiply (only for DEFAULT_L2R...

Implement TransformHelper<FPP,GPU2>::optimize_multiply (only for DEFAULT_L2R and DYNPROG) and refactor TransformHelper::optimize* functions in parent TransformHelperGen (for CPU and GPU) + minor fixes in cpp python wrapper of optimize* functions.
parent 962ecc10
No related branches found
No related tags found
No related merge requests found
......@@ -199,11 +199,7 @@ namespace Faust
* \param npasses: the number of passes to run, by default it goes until the optimal Faust is obtained.
*/
virtual TransformHelper<FPP,Cpu>* pruneout(const int nnz_tres, const int npasses=-1, const bool only_forward=false);
TransformHelper<FPP,Cpu>* optimize(const bool transp=false);
virtual TransformHelper<FPP,Cpu>* optimize_multiply(std::function<void()> f, const bool transp=false, const bool inplace=false, const int nsamples=1, const char* op_name="unamed_op");
virtual TransformHelper<FPP,Cpu>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
virtual TransformHelper<FPP,Cpu>* optimize_time_full(const bool transp=false, const bool inplace=false, const int nsamples=1);
TransformHelper<FPP,Cpu>* optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp=false, const bool inplace=false, const int nsamples=1);
/**
\brief Returns the left hand side factors of this from index 0 to id included (as a new TransformHelper obj).
......
......@@ -407,55 +407,6 @@ namespace Faust {
}
}
template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize(const bool transp /* deft to false */)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
//TODO: need a nsamples argument to feed optimize_time*
Faust::TransformHelper<FPP,Cpu> *th = this->pruneout(/*nnz_tres=*/0), *th2;
th2 = th->optimize_storage(false);
delete th;
th = th2;
th->optimize_time(transp, true);
return th;
}
template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_time(const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
// choose the quickest method for the Faust "toarray"
auto t = this->optimize_time_full(transp, inplace, nsamples);
return t;
}
template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_time_full(const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
return this->optimize_multiply([this](){this->get_product();}, transp, inplace, nsamples, "Faust-toarray");
}
template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
std::function<void(void)> benchmark_func;
auto md = dynamic_cast<const MatDense<FPP,Cpu>*>(test_mat);
auto ms = dynamic_cast<const MatSparse<FPP,Cpu>*>(test_mat);
if(! md && ! ms)
throw std::runtime_error("optimize_time_prod supports only MatDense or MatSparse benchmarking.");
return this->optimize_multiply([this, ms, md]()
{
if(md) this->multiply(*md);
else /* ms != nullptr */ this->multiply(*ms);
}, transp, inplace, nsamples, "Faust-matrix product");
}
template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_multiply(std::function<void()> f, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples, const char* op_name)
{
......
......@@ -42,7 +42,8 @@ namespace Faust
void get_product(MatDense<FPP,GPU2>& M, int prod_mod=-1);
void get_product(MatDense<FPP,Cpu>& M, int prod_mod=-1);
MatDense<FPP,GPU2> multiply(const MatDense<FPP,GPU2> &A);
MatDense<FPP,Cpu> multiply(const Faust::MatDense<FPP,Cpu> &A);
MatDense<FPP,GPU2> multiply(const MatSparse<FPP,GPU2> &A) { return multiply(MatDense<FPP,GPU2>(A));}
MatDense<FPP,Cpu> multiply(const MatDense<FPP,Cpu> &A);
TransformHelper<FPP,GPU2>* multiply(const FPP& a);
TransformHelper<FPP,GPU2>* multiply(const TransformHelper<FPP,GPU2>*);
Vect<FPP,GPU2> multiply(const Faust::Vect<FPP,GPU2>& a);
......@@ -86,18 +87,13 @@ namespace Faust
const bool permutation=false,
const bool inplace=false,
const bool check_transpose=true);
void set_FM_mul_mode(const int mul_order_opt_mode, const bool silent=false) const;
void set_Fv_mul_mode(const int Fv_mul_mode) const;
faust_unsigned_int get_total_nnz() const;
// faust_unsigned_int get_fact_nnz(const faust_unsigned_int id) const;
TransformHelper<FPP,GPU2>* normalize(const int meth /* 1 for 1-norm, 2 for 2-norm (2-norm), -1 for inf-norm */) const;
TransformHelper<FPP,GPU2>* transpose();
TransformHelper<FPP,GPU2>* conjugate();
TransformHelper<FPP,GPU2>* adjoint();
TransformHelper<FPP,GPU2>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
TransformHelper<FPP,GPU2>* optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp/*=false*/, const bool inplace/*=false*/, const int nsamples/*=1*/);
TransformHelper<FPP,GPU2>* optimize(const bool transp=false);
TransformHelper<FPP,GPU2>* optimize_multiply(std::function<void()> f, const bool transp=false, const bool inplace=false, const int nsamples=1, const char* op_name="unamed_op");
TransformHelper<FPP,GPU2>* clone(int32_t dev_id=-1, void* stream=nullptr);
void get_fact(const faust_unsigned_int id,
int* rowptr,
......
......@@ -501,18 +501,6 @@ namespace Faust
return v;
}
template<typename FPP>
void TransformHelper<FPP,GPU2>::set_FM_mul_mode(const int mul_order_opt_mode, const bool silent/*=false*/) const
{
throw std::runtime_error("set_FM_mul_mode is yet to implement in Faust C++ core for GPU.");
}
template<typename FPP>
void TransformHelper<FPP,GPU2>::set_Fv_mul_mode(const int Fv_mul_mode) const
{
throw std::runtime_error("set_Fv_mul_mode is yet to implement in Faust C++ core for GPU.");
}
template<typename FPP>
void TransformHelper<FPP,GPU2>::pop_front()
{
......@@ -714,38 +702,58 @@ namespace Faust
}
return nbytes;
}
template<typename FPP>
TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize_time(const bool transp/*=false*/, const bool inplace/*=false*/, const int nsamples/*=1*/)
{
throw std::runtime_error("optimize_time is yet to implement in Faust C++ core for GPU.");
return nullptr;
// TransformHelper<FPP,Cpu> th;
// this->tocpu(th);
// auto thn = th.optimize_time(transp, /*inplace*/ true, nsamples);
// auto gpu_thn = new TransformHelper<FPP,GPU2>(*thn, -1, nullptr);
// delete thn;
// return gpu_thn;
}
template<typename FPP>
TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp/*=false*/, const bool inplace/*=false*/, const int nsamples/*=1*/)
{
throw std::runtime_error("optimize_time_prod is yet to implement in Faust C++ core for GPU.");
return nullptr;
}
template<typename FPP>
TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize(const bool transp/*=false*/)
template<typename FPP>
TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize_multiply(std::function<void()> f, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples, const char* op_name)
{
throw std::runtime_error("optimize is yet to implement in Faust C++ core for GPU.");
return nullptr;
// TransformHelper<FPP,Cpu> th;
// this->tocpu(th);
// auto thn = th.optimize(transp);
// auto gpu_thn = new TransformHelper<FPP,GPU2>(*thn, -1, nullptr);
// delete thn;
// return gpu_thn;
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
std::vector<string> meth_names = {"DEFAULT_L2R", "DYNPROG"}; //TODO: it should be a function of faust_prod_opt module
std::vector<int> meth_ids = {DEFAULT_L2R, DYNPROG};
TransformHelper<FPP,GPU2>* t_opt = nullptr;
int NMETS = 2;
std::chrono::duration<double> * times = new std::chrono::duration<double>[NMETS]; //use heap because of VS14 (error C3863)
int old_meth = this->get_mul_order_opt_mode();
int nmuls = nsamples, opt_meth=0;
#if DEBUG_OPT_MUL
cout << "nsamples used to measure time: " << nmuls << endl;
#endif
for(int i=0; i < NMETS; i++)
{
this->set_FM_mul_mode(meth_ids[i]);
auto start = std::chrono::system_clock::now();
for(int j=0;j < nmuls; j++)
{
f();
}
auto end = std::chrono::system_clock::now();
times[i] = end-start;
}
for(int i=0; i < NMETS-1; i++)
{
opt_meth = times[opt_meth]<times[i+1]?opt_meth:i+1;
}
if(inplace)
{
this->set_FM_mul_mode(meth_ids[opt_meth]);
t_opt = this;
}
else
{
t_opt = new TransformHelper<FPP, GPU2>(this->transform->data, 1.0, false, false, true);
cout << "best method measured in time on operation "<< op_name << " is: " << meth_names[opt_meth] << endl;
#if DEBUG_OPT_MUL
cout << "all times: ";
for(int i = 0; i < NMETS; i ++)
cout << times[i].count() << " ";
cout << endl;
#endif
t_opt->set_FM_mul_mode(meth_ids[opt_meth]);
// leave the current Faust unchanged
this->set_FM_mul_mode(old_meth);
}
delete [] times;
t_opt->copy_transconj_state(*this);
return t_opt;
}
template<typename FPP>
......
......@@ -92,7 +92,18 @@ namespace Faust
virtual TransformHelper<FPP, DEV>* slice(faust_unsigned_int start_row_id, faust_unsigned_int end_row_id,
faust_unsigned_int start_col_id, faust_unsigned_int end_col_id);
TransformHelper<FPP, DEV>* fancy_index(faust_unsigned_int* row_ids, faust_unsigned_int num_rows, faust_unsigned_int* col_ids, faust_unsigned_int num_cols);
virtual MatDense<FPP,DEV> get_product(const int mul_order_opt_mode=-1)=0;
virtual MatDense<FPP, DEV> multiply(const MatDense<FPP,DEV> &A) =0;
virtual MatDense<FPP, DEV> multiply(const MatSparse<FPP,DEV> &A) =0;
virtual TransformHelper<FPP,DEV>* pruneout(const int nnz_tres, const int npasses=-1, const bool only_forward=false)=0;
virtual TransformHelper<FPP,DEV>* optimize_storage(const bool time=false);
virtual TransformHelper<FPP,DEV>* optimize_multiply(std::function<void()> f, const bool transp=false, const bool inplace=false, const int nsamples=1, const char* op_name="unamed_op")=0;
virtual TransformHelper<FPP,DEV>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
virtual TransformHelper<FPP,DEV>* optimize_time_full(const bool transp=false, const bool inplace=false, const int nsamples=1);
virtual TransformHelper<FPP,DEV>* optimize_time_prod(const MatGeneric<FPP, DEV>* test_mat, const bool transp=false, const bool inplace=false, const int nsamples=1);
TransformHelper<FPP,DEV>* optimize(const bool transp=false);
virtual TransformHelper<FPP,DEV>* clone();
virtual void convertToSparse()=0;
virtual void convertToDense()=0;
......
......@@ -740,6 +740,55 @@ namespace Faust
}
template<typename FPP, FDevice DEV>
TransformHelper<FPP,DEV>* TransformHelperGen<FPP,DEV>::optimize_time(const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
// choose the quickest method for the Faust "toarray"
auto t = this->optimize_time_full(transp, inplace, nsamples);
return t;
}
template<typename FPP, FDevice DEV>
TransformHelper<FPP,DEV>* TransformHelperGen<FPP,DEV>::optimize_time_full(const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
return this->optimize_multiply([this](){this->get_product();}, transp, inplace, nsamples, "Faust-toarray");
}
template<typename FPP, FDevice DEV>
TransformHelper<FPP,DEV>* TransformHelperGen<FPP,DEV>::optimize_time_prod(const MatGeneric<FPP, DEV>* test_mat, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
std::function<void(void)> benchmark_func;
auto md = dynamic_cast<const MatDense<FPP,DEV>*>(test_mat);
auto ms = dynamic_cast<const MatSparse<FPP,DEV>*>(test_mat);
if(! md && ! ms)
throw std::runtime_error("optimize_time_prod supports only MatDense or MatSparse benchmarking.");
return this->optimize_multiply([this, ms, md]()
{
if(md) this->multiply(*md);
else /* ms != nullptr */ this->multiply(*ms);
}, transp, inplace, nsamples, "Faust-matrix product");
}
template<typename FPP, FDevice DEV>
TransformHelper<FPP,DEV>* TransformHelperGen<FPP,DEV>::optimize(const bool transp /* deft to false */)
{
this->eval_sliced_Transform();
this->eval_fancy_idx_Transform();
//TODO: need a nsamples argument to feed optimize_time*
Faust::TransformHelper<FPP,DEV> *th = this->pruneout(/*nnz_tres=*/0), *th2;
th2 = th->optimize_storage(false);
delete th;
th = th2;
th->optimize_time(transp, true);
return th;
}
template<typename FPP, FDevice DEV>
TransformHelperGen<FPP, DEV>::~TransformHelperGen()
{
......
......@@ -438,7 +438,7 @@ FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const FPP* x_data, i
#ifdef FAUST_VERBOSE
std::cout << "FaustCoreCpp::optimize_time() th=" << th << "core=" << core << std::endl;
#endif
Faust::MatSparse<FPP, Cpu> X(x_nnz, x_nrows, x_ncols, x_data, x_row_ptr, x_id_col);
Faust::MatSparse<FPP, DEV> X(x_nnz, x_nrows, x_ncols, x_data, x_row_ptr, x_id_col);
if(inplace)
{
this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
......@@ -455,7 +455,7 @@ FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const FPP* x_data, i
#ifdef FAUST_VERBOSE
std::cout << "FaustCoreCpp::optimize_time() th=" << th << "core=" << core << std::endl;
#endif
Faust::MatDense<FPP, Cpu> X(x_data, x_nrows, x_ncols);
Faust::MatDense<FPP, DEV> X(x_nrows, x_ncols, x_data);
if(inplace)
{
this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment