Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b3b9fede authored by hhakim's avatar hhakim
Browse files

Add a mat argument to pyfaust.Faust.optimize_time in order to run the...

Add a mat argument to pyfaust.Faust.optimize_time in order to run the benchmark on F@mat instead of F.toarray().

The function is implemented in the C++ core only for the CPU backend.
parent c75d035d
No related branches found
No related tags found
No related merge requests found
...@@ -172,6 +172,7 @@ namespace Faust ...@@ -172,6 +172,7 @@ namespace Faust
virtual TransformHelper<FPP,Cpu>* optimize_multiply(std::function<void()> f, const bool transp=false, const bool inplace=false, const int nsamples=1, const char* op_name="unamed_op"); virtual TransformHelper<FPP,Cpu>* optimize_multiply(std::function<void()> f, const bool transp=false, const bool inplace=false, const int nsamples=1, const char* op_name="unamed_op");
virtual TransformHelper<FPP,Cpu>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1); virtual TransformHelper<FPP,Cpu>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
virtual TransformHelper<FPP,Cpu>* optimize_time_full(const bool transp=false, const bool inplace=false, const int nsamples=1); virtual TransformHelper<FPP,Cpu>* optimize_time_full(const bool transp=false, const bool inplace=false, const int nsamples=1);
TransformHelper<FPP,Cpu>* optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp=false, const bool inplace=false, const int nsamples=1);
/** /**
\brief Returns the left hand side factors of this from index 0 to id included (as a new TransformHelper obj). \brief Returns the left hand side factors of this from index 0 to id included (as a new TransformHelper obj).
...@@ -213,8 +214,6 @@ namespace Faust ...@@ -213,8 +214,6 @@ namespace Faust
void convertToDense(); void convertToDense();
template<typename FPP2> template<typename FPP2>
TransformHelper<Real<FPP>, Cpu>* real(); TransformHelper<Real<FPP>, Cpu>* real();
private:
MatDense<FPP,Cpu> multiply_dynprog(const MatGeneric<FPP,Cpu> &A, MatDense<FPP, Cpu> &out);
}; };
......
...@@ -164,7 +164,6 @@ namespace Faust { ...@@ -164,7 +164,6 @@ namespace Faust {
} }
#endif #endif
switch(this->mul_order_opt_mode) switch(this->mul_order_opt_mode)
{ {
case GREEDY_ALL_ENDS: case GREEDY_ALL_ENDS:
...@@ -186,7 +185,12 @@ namespace Faust { ...@@ -186,7 +185,12 @@ namespace Faust {
} }
break; break;
case DYNPROG: case DYNPROG:
this->multiply_dynprog(A, M); {
std::vector<Faust::MatGeneric<FPP,Cpu>*> data = this->transform->data;
if(this->is_transposed)
std::reverse(data.begin(), data.end());
M = std::move(dynprog_multiply(data, this->isTransposed2char(), &A));
}
break; break;
case CPP_PROD_PAR_REDUC: case CPP_PROD_PAR_REDUC:
case OMP_PROD_PAR_REDUC: case OMP_PROD_PAR_REDUC:
...@@ -249,16 +253,6 @@ namespace Faust { ...@@ -249,16 +253,6 @@ namespace Faust {
memcpy(y, y_vec.getData(), sizeof(FPP)*y_vec.size()); memcpy(y, y_vec.getData(), sizeof(FPP)*y_vec.size());
} }
template<typename FPP>
MatDense<FPP,Cpu> TransformHelper<FPP,Cpu>::multiply_dynprog(const MatGeneric<FPP,Cpu> &A, MatDense<FPP, Cpu> &out)
{ // specific scope for variable initialized here
std::vector<Faust::MatGeneric<FPP,Cpu>*> data = this->transform->data;
if(this->is_transposed)
std::reverse(data.begin(), data.end());
out = std::move(dynprog_multiply(data, this->isTransposed2char(), &A));
}
template<typename FPP> template<typename FPP>
MatDense<FPP,Cpu> TransformHelper<FPP,Cpu>::multiply(const MatDense<FPP,Cpu> &A, const bool transpose, const bool conjugate) MatDense<FPP,Cpu> TransformHelper<FPP,Cpu>::multiply(const MatDense<FPP,Cpu> &A, const bool transpose, const bool conjugate)
{ {
...@@ -274,7 +268,6 @@ template<typename FPP> ...@@ -274,7 +268,6 @@ template<typename FPP>
#endif #endif
switch(this->mul_order_opt_mode) switch(this->mul_order_opt_mode)
{ {
case GREEDY_ALL_ENDS: case GREEDY_ALL_ENDS:
...@@ -297,7 +290,10 @@ template<typename FPP> ...@@ -297,7 +290,10 @@ template<typename FPP>
break; break;
case DYNPROG: case DYNPROG:
{ {
this->multiply_dynprog(A, M); std::vector<Faust::MatGeneric<FPP,Cpu>*> data = this->transform->data;
if(this->is_transposed)
std::reverse(data.begin(), data.end());
M = std::move(dynprog_multiply(data, this->isTransposed2char(), &A));
} }
break; break;
case CPP_PROD_PAR_REDUC: case CPP_PROD_PAR_REDUC:
...@@ -362,6 +358,21 @@ template<typename FPP> ...@@ -362,6 +358,21 @@ template<typename FPP>
return this->optimize_multiply([this](){this->get_product();}, transp, inplace, nsamples, "Faust-toarray"); return this->optimize_multiply([this](){this->get_product();}, transp, inplace, nsamples, "Faust-toarray");
} }
template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
{
std::function<void(void)> benchmark_func;
auto md = dynamic_cast<const MatDense<FPP,Cpu>*>(test_mat);
auto ms = dynamic_cast<const MatSparse<FPP,Cpu>*>(test_mat);
if(! md && ! ms)
throw std::runtime_error("optimize_time_prod supports only MatDense or MatSparse benchmarking.");
return this->optimize_multiply([this, ms, md]()
{
if(md) this->multiply(*md);
else /* ms != nullptr */ this->multiply(*ms);
}, transp, inplace, nsamples, "Faust-matrix product");
}
template<typename FPP> template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_multiply(std::function<void()> f, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples, const char* op_name) TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_multiply(std::function<void()> f, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples, const char* op_name)
{ {
......
...@@ -90,6 +90,8 @@ namespace Faust ...@@ -90,6 +90,8 @@ namespace Faust
TransformHelper<FPP,GPU2>* conjugate(); TransformHelper<FPP,GPU2>* conjugate();
TransformHelper<FPP,GPU2>* adjoint(); TransformHelper<FPP,GPU2>* adjoint();
TransformHelper<FPP,GPU2>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1); TransformHelper<FPP,GPU2>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
TransformHelper<FPP,GPU2>* optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp/*=false*/, const bool inplace/*=false*/, const int nsamples/*=1*/);
TransformHelper<FPP,GPU2>* optimize(const bool transp=false); TransformHelper<FPP,GPU2>* optimize(const bool transp=false);
TransformHelper<FPP,GPU2>* clone(int32_t dev_id=-1, void* stream=nullptr); TransformHelper<FPP,GPU2>* clone(int32_t dev_id=-1, void* stream=nullptr);
void get_fact(const faust_unsigned_int id, void get_fact(const faust_unsigned_int id,
......
...@@ -440,7 +440,7 @@ namespace Faust ...@@ -440,7 +440,7 @@ namespace Faust
} }
template<typename FPP> template<typename FPP>
void Faust::TransformHelper<FPP,GPU2>::pack_factors(faust_unsigned_int start_id, faust_unsigned_int end_id,const int mul_order_opt_mode/*=DEFAULT_L2R*/) void Faust::TransformHelper<FPP,GPU2>::pack_factors(faust_unsigned_int start_id, faust_unsigned_int end_id,const int mul_order_opt_mode/*=DEFAULT*/)
{ {
if(start_id < 0 || start_id >= size()) if(start_id < 0 || start_id >= size())
throw out_of_range("start_id is out of range."); throw out_of_range("start_id is out of range.");
...@@ -611,6 +611,13 @@ namespace Faust ...@@ -611,6 +611,13 @@ namespace Faust
// return gpu_thn; // return gpu_thn;
} }
template<typename FPP>
TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp/*=false*/, const bool inplace/*=false*/, const int nsamples/*=1*/)
{
throw std::runtime_error("optimize_time_prod is yet to implement in Faust C++ core for GPU.");
return nullptr;
}
template<typename FPP> template<typename FPP>
TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize(const bool transp/*=false*/) TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize(const bool transp/*=false*/)
{ {
......
...@@ -2134,7 +2134,7 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin): ...@@ -2134,7 +2134,7 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
F_opt = Faust(core_obj=F.m_faust.optimize(transp)) F_opt = Faust(core_obj=F.m_faust.optimize(transp))
return F_opt return F_opt
def optimize_time(F, transp=False, inplace=False, nsamples=1): def optimize_time(F, transp=False, inplace=False, nsamples=1, mat=None):
""" """
Returns a Faust configured with the quickest Faust-matrix multiplication mode (benchmark ran on the fly). Returns a Faust configured with the quickest Faust-matrix multiplication mode (benchmark ran on the fly).
...@@ -2142,11 +2142,7 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin): ...@@ -2142,11 +2142,7 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
available differ by the order used to compute the matrix chain available differ by the order used to compute the matrix chain
multiplication or by the use (or unuse) of libraries to performs the multiplication or by the use (or unuse) of libraries to performs the
calculation. calculation.
The evaluated methods in the benchmark are listed in The evaluated methods in the benchmark are listed in pyfaust.FaustMulMode.
pyfaust.FaustMulMode but note that FaustMulMode.CPP_PROD_PAR_REDUC and
FaustMulMode.OMP_PROD_PAR_REDUC are excluded from the benchmark because
it doesn't worth it in any case when Eigen multithread is enabled
(which is the case in any package of pyfaust delivered).
Although depending on the package you installed and the capability of your Although depending on the package you installed and the capability of your
hardware the methods based on Torch library can be used. hardware the methods based on Torch library can be used.
...@@ -2159,6 +2155,10 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin): ...@@ -2159,6 +2155,10 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
calculated in order to measure time taken by each method (it could matter calculated in order to measure time taken by each method (it could matter
to discriminate methods when the performances are similar). By default, to discriminate methods when the performances are similar). By default,
only one product is computed to evaluate the method. only one product is computed to evaluate the method.
mat: if not None must be a numpy.ndarray or a
scipy.sparse.csr_matrix. Use this argument to run the benchmark on
the Faust multiplication by mat instead of Faust.toarray() (if mat
is None).
Returns: Returns:
The optimized Faust. The optimized Faust.
...@@ -2167,11 +2167,11 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin): ...@@ -2167,11 +2167,11 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
""" """
if(inplace): if(inplace):
F.m_faust.optimize_time(transp, inplace, nsamples) F.m_faust.optimize_time(transp, inplace, nsamples, M=mat)
return F return F
else: else:
F_opt = Faust(core_obj=F.m_faust.optimize_time(transp, inplace, F_opt = Faust(core_obj=F.m_faust.optimize_time(transp, inplace,
nsamples)) nsamples, M=mat))
return F_opt return F_opt
def copy(F, dev='cpu'): def copy(F, dev='cpu'):
......
...@@ -142,6 +142,9 @@ class FaustCoreCpp ...@@ -142,6 +142,9 @@ class FaustCoreCpp
FaustCoreCpp<FPP,DEV>* optimize_storage(const bool time=false); FaustCoreCpp<FPP,DEV>* optimize_storage(const bool time=false);
FaustCoreCpp<FPP,DEV>* optimize(const bool transp=false); FaustCoreCpp<FPP,DEV>* optimize(const bool transp=false);
FaustCoreCpp<FPP,DEV>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1); FaustCoreCpp<FPP,DEV>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
FaustCoreCpp<FPP,DEV>* optimize_time(const FPP* value_x,int nbrow_x,int nbcol_x, const bool transp=false, const bool inplace=false, const int nsamples=1);
FaustCoreCpp<FPP,DEV>* optimize_time(const FPP* x_data, int* x_row_ptr, int* x_id_col, int x_nnz, int x_nrows, int x_ncols, const bool transp=false, const bool inplace=false, const int nsamples=1);
const bool isTransposed(); const bool isTransposed();
FaustCoreCpp<FPP,DEV>* transpose()const; FaustCoreCpp<FPP,DEV>* transpose()const;
FaustCoreCpp<FPP,DEV>* conjugate()const; FaustCoreCpp<FPP,DEV>* conjugate()const;
......
...@@ -392,6 +392,40 @@ FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const bool transp /* ...@@ -392,6 +392,40 @@ FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const bool transp /*
#endif #endif
} }
template<typename FPP, FDevice DEV>
FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const FPP* x_data, int* x_row_ptr, int* x_id_col, int x_nnz, int x_nrows, int x_ncols, const bool transp /* deft to false*/, const bool inplace /* default to false */, const int nsamples /* default to 1*/)
{
Faust::MatSparse<FPP, Cpu> X(x_nnz, x_nrows, x_ncols, x_data, x_row_ptr, x_id_col);
if(inplace)
this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
else
{
auto th = this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
return new FaustCoreCpp<FPP,DEV>(th);
}
#ifdef FAUST_VERBOSE
std::cout << "FaustCoreCpp::optimize_time() th=" << th << "core=" << core << std::endl;
#endif
}
template<typename FPP, FDevice DEV>
FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const FPP* x_data, int x_nrows, int x_ncols, const bool transp /* deft to false*/, const bool inplace /* default to false */, const int nsamples /* default to 1*/)
{
Faust::MatDense<FPP, Cpu> X(x_data, x_nrows, x_ncols);
if(inplace)
this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
else
{
auto th = this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
return new FaustCoreCpp<FPP,DEV>(th);
}
#ifdef FAUST_VERBOSE
std::cout << "FaustCoreCpp::optimize_time() th=" << th << "core=" << core << std::endl;
#endif
}
template<typename FPP, FDevice DEV> template<typename FPP, FDevice DEV>
FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize(const bool transp /* deft to false*/) FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize(const bool transp /* deft to false*/)
{ {
......
...@@ -61,6 +61,12 @@ cdef extern from "FaustCoreCpp.h": ...@@ -61,6 +61,12 @@ cdef extern from "FaustCoreCpp.h":
@CPP_CORE_CLASS@[FPP]* optimize_time(const bool transp, const bool inplace, @CPP_CORE_CLASS@[FPP]* optimize_time(const bool transp, const bool inplace,
const int nsamples) const int nsamples)
@CPP_CORE_CLASS@[FPP]* optimize_time(FPP* x_data, int* x_row_ptr, int* x_id_col, int x_nnz, int x_nrows, int x_ncols, const bool transp, const bool inplace,
const int nsamples)
@CPP_CORE_CLASS@[FPP]* optimize_time(FPP* x_data, int x_nrows, int
x_ncols, const bool transp, const bool inplace,
const int nsamples)
@CPP_CORE_CLASS@[FPP]* optimize(const bool transp) @CPP_CORE_CLASS@[FPP]* optimize(const bool transp)
@CPP_CORE_CLASS@[FPP]* optimize_storage(const bool time) @CPP_CORE_CLASS@[FPP]* optimize_storage(const bool time)
const bool isTransposed() const bool isTransposed()
......
...@@ -553,15 +553,63 @@ cdef class FaustCoreGen@TYPE_NAME@@PROC@: ...@@ -553,15 +553,63 @@ cdef class FaustCoreGen@TYPE_NAME@@PROC@:
core.@CORE_OBJ@ = self.@CORE_OBJ@.optimize(transp) core.@CORE_OBJ@ = self.@CORE_OBJ@.optimize(transp)
return core return core
def optimize_time(self, transp=False, inplace=False, nsamples=1): def optimize_time(self, transp=False, inplace=False, nsamples=1, M=None):
if(inplace): cdef @TYPE@[:,:] M_data
self.@CORE_OBJ@.optimize_time(transp, inplace, nsamples) cdef int [:] M_indices
cdef int [:] M_indptr
cdef @TYPE@ [:] M_csr_data
M_is_dense = False
if M is None:
# optimize time according to Faust.toarray()
if(inplace):
self.@CORE_OBJ@.optimize_time(transp, inplace, nsamples)
else:
core = @CORE_CLASS@(core=True)
core.@CORE_OBJ@ = self.@CORE_OBJ@.optimize_time(transp,
inplace,
nsamples)
return core
else: else:
core = @CORE_CLASS@(core=True) # optimize time according to F@M
core.@CORE_OBJ@ = self.@CORE_OBJ@.optimize_time(transp, if isinstance(M, np.ndarray):
inplace, M_is_dense = True
nsamples) M_nrows = M.shape[0]
return core M_ncols = M.shape[1]
M_data = M
elif isinstance(M, csr_matrix):
M_nrows = M.shape[0]
M_ncols = M.shape[1]
M_csr_data = M.data
M_indices = M.indices
M_indptr = M.indptr
M_nnz = M.nnz
else:
raise TypeError("M must be a np.ndarray or a csr_matrix.")
if(inplace):
if M_is_dense:
self.@CORE_OBJ@.optimize_time(&M_data[0,0], M_nrows, M_ncols, transp, inplace, nsamples)
else:
self.@CORE_OBJ@.optimize_time(&M_csr_data[0], &M_indptr[0],
&M_indices[0],
M_nnz, M_nrows, M_ncols, transp, inplace, nsamples)
else:
core = @CORE_CLASS@(core=True)
if M_is_dense:
core.@CORE_OBJ@ = \
self.@CORE_OBJ@.optimize_time(&M_data[0,0], M_nrows, M_ncols,
transp,
inplace,
nsamples)
else:
core.@CORE_OBJ@ = \
self.@CORE_OBJ@.optimize_time(&M_csr_data[0], &M_indptr[0],
&M_indices[0],
M_nnz, M_nrows, M_ncols,
transp,
inplace,
nsamples)
return core
def conj(self): def conj(self):
core = @CORE_CLASS@(core=True) core = @CORE_CLASS@(core=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment