Mentions légales du service

Skip to content
Snippets Groups Projects
Commit f6e5b689 authored by hhakim's avatar hhakim
Browse files

Optimize cuMatArray mul by scalar for GPU2 -- smallest factor copied (issue #159).

Update to gpu_mod@8a7a155f
parent d521ca10
Branches
Tags 2.5.50
No related merge requests found
Subproject commit 7920c9afc73247d37e3a14dfd4dd458814ba9f01 Subproject commit 8a7a155f54d37605b381b8586b994dc4ceea45e9
...@@ -16,13 +16,25 @@ namespace Faust ...@@ -16,13 +16,25 @@ namespace Faust
TransformHelper<FPP,GPU2>::TransformHelper(const std::vector<MatGeneric<FPP,GPU2> *>& facts, const FPP lambda_/*= (FPP)1.0*/, const bool optimizedCopy/*=false*/, const bool cloning_fact /*= true*/, const bool internal_call/*=false*/) : TransformHelper<FPP,GPU2>() TransformHelper<FPP,GPU2>::TransformHelper(const std::vector<MatGeneric<FPP,GPU2> *>& facts, const FPP lambda_/*= (FPP)1.0*/, const bool optimizedCopy/*=false*/, const bool cloning_fact /*= true*/, const bool internal_call/*=false*/) : TransformHelper<FPP,GPU2>()
{ {
//if lambda is not 1.0 the first factor will be multiplied and so it needs to be copied to preserved the original that could be used elsewhere //if lambda is not 1.0 a factor will be multiplied and so it needs to be copied to preserve the original that could be used elsewhere
this->push_back(facts[0], false, cloning_fact || lambda_ != (FPP) 1.0); // in an optimization purpose, the smallest factor is copied
for(int i=1; i < facts.size(); i++) int min_size_id = 0;
if(lambda_ != FPP(1.0))
{ {
this->push_back(facts[i], false, cloning_fact); std::vector<int> fact_ids(facts.size());
int i = -1;
std::generate(fact_ids.begin(), fact_ids.end(), [&i](){return ++i;});
std::vector<int>::iterator result = std::min_element(fact_ids.begin(), fact_ids.end(), [&facts](const int &a, const int &b){return facts[a]->getNBytes() < facts[b]->getNBytes();});
min_size_id = std::distance(fact_ids.begin(), result);
} }
this->transform->multiply(lambda_); for(int i=0; i < facts.size(); i++)
{
if(i == min_size_id)
this->push_back(facts[min_size_id], false, cloning_fact || lambda_ != (FPP) 1.0);
else
this->push_back(facts[i], false, cloning_fact);
}
this->transform->multiply(lambda_, min_size_id);
} }
template<typename FPP> template<typename FPP>
......
...@@ -371,10 +371,10 @@ namespace Faust ...@@ -371,10 +371,10 @@ namespace Faust
} }
template<> template<>
void Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const @FAUST_SCALAR_FOR_GM@& a) void Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const @FAUST_SCALAR_FOR_GM@& a, const int32_t id/*=-1*/)
{ {
auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0)); auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
marr_funcs->scalar_mul(this->gpu_mat_arr, reinterpret_cast<const @GM_SCALAR@*>(&a)); marr_funcs->scalar_mul_id(this->gpu_mat_arr, reinterpret_cast<const @GM_SCALAR@*>(&a), id);
} }
template<> template<>
......
...@@ -65,7 +65,7 @@ namespace Faust ...@@ -65,7 +65,7 @@ namespace Faust
MatDense<FPP,GPU2> multiply(const MatDense<FPP,GPU2> &A, const char opThis); MatDense<FPP,GPU2> multiply(const MatDense<FPP,GPU2> &A, const char opThis);
void multiply(const Transform<FPP,GPU2> & A); void multiply(const Transform<FPP,GPU2> & A);
void multiplyLeft(const Transform<FPP,GPU2> & A); void multiplyLeft(const Transform<FPP,GPU2> & A);
void multiply(const FPP& a); void multiply(const FPP& a, const int32_t id=-1);
Vect<FPP,GPU2> multiply(const Vect<FPP,GPU2>& x, const char opThis='N'); Vect<FPP,GPU2> multiply(const Vect<FPP,GPU2>& x, const char opThis='N');
Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag); Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag);
Real<FPP> normL1(const bool transpose = false) const; Real<FPP> normL1(const bool transpose = false) const;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment