Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 528ff0ec authored by hhakim's avatar hhakim
Browse files

Implement the spectral norm (operator L2 norm) using gpu_mod@7d259afe.

Two methods have been implemented, only the more efficient is enabled (except for a special case which poses problem: see issue #140).

Minor change: review member functions and arguments constness in TransformHelper.

[skip ci]
parent 4ba82adb
Branches
Tags
No related merge requests found
...@@ -106,11 +106,11 @@ namespace Faust { ...@@ -106,11 +106,11 @@ namespace Faust {
public: public:
TransformHelper(const std::vector<MatGeneric<FPP,Cpu> *>& facts, const FPP lambda_ = (FPP)1.0, const bool optimizedCopy=false, const bool cloning_fact = true, const bool internal_call=false); TransformHelper(const std::vector<MatGeneric<FPP,Cpu> *>& facts, const FPP lambda_ = (FPP)1.0, const bool optimizedCopy=false, const bool cloning_fact = true, const bool internal_call=false);
TransformHelper(); TransformHelper();
TransformHelper(TransformHelper<FPP,Cpu>* th_left, TransformHelper<FPP,Cpu>* th_right); TransformHelper(const TransformHelper<FPP,Cpu>* th_left, const TransformHelper<FPP,Cpu>* th_right);
TransformHelper(TransformHelper<FPP,Cpu>* th); TransformHelper(TransformHelper<FPP,Cpu>* th);
TransformHelper(TransformHelper<FPP,Cpu>& th); TransformHelper(TransformHelper<FPP,Cpu>& th);
void operator=(TransformHelper<FPP,Cpu>& th); void operator=(TransformHelper<FPP,Cpu>& th);
TransformHelper(TransformHelper<FPP,Cpu>* th, bool transpose, bool conjugate); TransformHelper(const TransformHelper<FPP,Cpu>* th, bool transpose, bool conjugate);
TransformHelper(TransformHelper<FPP,Cpu>* th, Slice s[2]); TransformHelper(TransformHelper<FPP,Cpu>* th, Slice s[2]);
TransformHelper(TransformHelper<FPP,Cpu>* th, faust_unsigned_int* row_ids, faust_unsigned_int num_rows, faust_unsigned_int* col_ids, faust_unsigned_int num_cols); TransformHelper(TransformHelper<FPP,Cpu>* th, faust_unsigned_int* row_ids, faust_unsigned_int num_rows, faust_unsigned_int* col_ids, faust_unsigned_int num_cols);
TransformHelper(Transform<FPP,Cpu> &t, const bool moving=false); TransformHelper(Transform<FPP,Cpu> &t, const bool moving=false);
...@@ -127,7 +127,7 @@ namespace Faust { ...@@ -127,7 +127,7 @@ namespace Faust {
int get_mul_order_opt_mode() const; int get_mul_order_opt_mode() const;
MatDense<FPP, Cpu> multiply(const MatSparse<FPP,Cpu> A, const bool transpose=false, const bool conjugate=false); MatDense<FPP, Cpu> multiply(const MatSparse<FPP,Cpu> A, const bool transpose=false, const bool conjugate=false);
TransformHelper<FPP, Cpu>* multiply(TransformHelper<FPP, Cpu>*); TransformHelper<FPP, Cpu>* multiply(const TransformHelper<FPP, Cpu>*) const;
TransformHelper<FPP, Cpu>* multiply(FPP& scalar); TransformHelper<FPP, Cpu>* multiply(FPP& scalar);
template<typename Head, typename ... Tail> template<typename Head, typename ... Tail>
void push_back_(Head& h, Tail&... t); void push_back_(Head& h, Tail&... t);
...@@ -186,7 +186,7 @@ namespace Faust { ...@@ -186,7 +186,7 @@ namespace Faust {
double spectralNorm(const int nbr_iter_max, double threshold, int &flag) const; double spectralNorm(const int nbr_iter_max, double threshold, int &flag) const;
TransformHelper<FPP,Cpu>* transpose(); TransformHelper<FPP,Cpu>* transpose();
TransformHelper<FPP,Cpu>* conjugate(); TransformHelper<FPP,Cpu>* conjugate();
TransformHelper<FPP,Cpu>* adjoint(); TransformHelper<FPP,Cpu>* adjoint() const;
TransformHelper<FPP,Cpu>* vertcat(const TransformHelper<FPP,Cpu>*); TransformHelper<FPP,Cpu>* vertcat(const TransformHelper<FPP,Cpu>*);
TransformHelper<FPP,Cpu>* horzcat(const TransformHelper<FPP,Cpu>*); TransformHelper<FPP,Cpu>* horzcat(const TransformHelper<FPP,Cpu>*);
bool isTransposed() const; bool isTransposed() const;
...@@ -242,7 +242,7 @@ namespace Faust { ...@@ -242,7 +242,7 @@ namespace Faust {
MatGeneric<FPP,Cpu>* get_gen_fact_nonconst(const faust_unsigned_int id) const; MatGeneric<FPP,Cpu>* get_gen_fact_nonconst(const faust_unsigned_int id) const;
void update(const MatGeneric<FPP,Cpu>& M, const faust_unsigned_int fact_id); void update(const MatGeneric<FPP,Cpu>& M, const faust_unsigned_int fact_id);
private: private:
void copy_slices(TransformHelper<FPP, Cpu>* th, const bool transpose = false); void copy_slices(const TransformHelper<FPP, Cpu>* th, const bool transpose = false);
const MatGeneric<FPP,Cpu>* get_gen_fact(const faust_unsigned_int id) const; const MatGeneric<FPP,Cpu>* get_gen_fact(const faust_unsigned_int id) const;
}; };
} }
......
...@@ -96,7 +96,7 @@ namespace Faust { ...@@ -96,7 +96,7 @@ namespace Faust {
} }
template<typename FPP> template<typename FPP>
TransformHelper<FPP,Cpu>::TransformHelper(TransformHelper<FPP,Cpu>* th_left, TransformHelper<FPP,Cpu>* th_right) TransformHelper<FPP,Cpu>::TransformHelper(const TransformHelper<FPP,Cpu>* th_left, const TransformHelper<FPP,Cpu>* th_right)
: TransformHelper<FPP,Cpu>() : TransformHelper<FPP,Cpu>()
{ {
bool right_left_transposed = th_left->is_transposed && th_right->is_transposed; bool right_left_transposed = th_left->is_transposed && th_right->is_transposed;
...@@ -115,7 +115,7 @@ namespace Faust { ...@@ -115,7 +115,7 @@ namespace Faust {
} }
template<typename FPP> template<typename FPP>
TransformHelper<FPP,Cpu>::TransformHelper(TransformHelper<FPP,Cpu>* th, bool transpose, bool conjugate) : TransformHelper<FPP,Cpu>() TransformHelper<FPP,Cpu>::TransformHelper(const TransformHelper<FPP,Cpu>* th, bool transpose, bool conjugate) : TransformHelper<FPP,Cpu>()
{ {
this->transform = th->transform; this->transform = th->transform;
this->is_transposed = transpose?!th->is_transposed:th->is_transposed; this->is_transposed = transpose?!th->is_transposed:th->is_transposed;
...@@ -735,7 +735,7 @@ namespace Faust { ...@@ -735,7 +735,7 @@ namespace Faust {
} }
template<typename FPP> template<typename FPP>
TransformHelper<FPP, Cpu>* TransformHelper<FPP,Cpu>::multiply(TransformHelper<FPP, Cpu>* th_right) TransformHelper<FPP, Cpu>* TransformHelper<FPP,Cpu>::multiply(const TransformHelper<FPP, Cpu>* th_right) const
{ {
return new TransformHelper<FPP,Cpu>(this, th_right); return new TransformHelper<FPP,Cpu>(this, th_right);
} }
...@@ -1092,7 +1092,7 @@ namespace Faust { ...@@ -1092,7 +1092,7 @@ namespace Faust {
} }
template<typename FPP> template<typename FPP>
void TransformHelper<FPP, Cpu>::copy_slices(TransformHelper<FPP, Cpu> *th, const bool transpose /* default to false */) void TransformHelper<FPP, Cpu>::copy_slices(const TransformHelper<FPP, Cpu> *th, const bool transpose /* default to false */)
{ {
this->slices[0].copy(th->slices[0]); this->slices[0].copy(th->slices[0]);
this->slices[1].copy(th->slices[1]); this->slices[1].copy(th->slices[1]);
...@@ -1252,6 +1252,44 @@ namespace Faust { ...@@ -1252,6 +1252,44 @@ namespace Faust {
template<typename FPP> template<typename FPP>
double TransformHelper<FPP,Cpu>::spectralNorm(const int nbr_iter_max, double threshold, int &flag) const double TransformHelper<FPP,Cpu>::spectralNorm(const int nbr_iter_max, double threshold, int &flag) const
{ {
#ifdef USE_GPU_MOD
if(gpu_faust != nullptr)
{
bool purely_sparse = true;
bool all_squares = true; // it's about factors
for(auto f: this->transform->data)
{
if(dynamic_cast<MatSparse<FPP,Cpu>*>(f) == nullptr)
purely_sparse = false;
all_squares = all_squares && f->getNbCol() == f->getNbRow();
}
// std::cout << "purely_sparse=" << purely_sparse << " all_squares=" << all_squares << std::endl;
if(purely_sparse && ! all_squares)
{
// std::cout << "method 1 used for gpu spectral norm" << std::endl;
// this method is slower because of the copy on CPU side of the transposed factors to construct FF'
TransformHelper<FPP,Cpu> *AtA;
TransformHelper<FPP,Cpu>* At = this->adjoint();
if(this->getNbCol() < this->getNbRow())
{
AtA = At->multiply(this);
}
else
{
AtA = this->multiply(At);
}
AtA->enable_gpu_meth_for_mul(); //TODO: it should not be needed if A is already GPU enabled
FPP maxAbsValue = std::sqrt(AtA->gpu_faust->power_iteration(nbr_iter_max, threshold, flag));
delete AtA;
}
else
{
// std::cout << "method 2 used for gpu spectral norm" << std::endl;
FPP maxAbsValue = std::sqrt(this->gpu_faust->power_iteration2(nbr_iter_max, threshold, flag));
return absValue(maxAbsValue);
}
}
#endif
vector <MatGeneric<FPP, Cpu>*>& orig_facts = this->transform->data; vector <MatGeneric<FPP, Cpu>*>& orig_facts = this->transform->data;
int start_id, end_id; int start_id, end_id;
this->transform->get_nonortho_interior_prod_ids(start_id, end_id); this->transform->get_nonortho_interior_prod_ids(start_id, end_id);
...@@ -1472,7 +1510,7 @@ namespace Faust { ...@@ -1472,7 +1510,7 @@ namespace Faust {
} }
template<typename FPP> template<typename FPP>
TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::adjoint() TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::adjoint() const
{ {
return new TransformHelper<FPP,Cpu>(this, true, true); return new TransformHelper<FPP,Cpu>(this, true, true);
} }
......
...@@ -53,6 +53,14 @@ namespace Faust ...@@ -53,6 +53,14 @@ namespace Faust
void insert(const Faust::MatGeneric<FPP,Cpu>* M, int32_t id); void insert(const Faust::MatGeneric<FPP,Cpu>* M, int32_t id);
/** TODO: this function should be deleted because it definitely slower than the version 2 */
FPP power_iteration(int32_t max_iter, Real<FPP> threshold, int& flag);
/** This version differs from power_iteration in the way it computes F'Fv in this order F'(Fv)
* so that it doesn't need to load all factors of F'F into GPU memory but only F.
*/
FPP power_iteration2(int32_t max_iter, Real<FPP> threshold, int& flag);
/* Update on gpu the copy matrix of M (M must have already been loaded otherwise an exception is raised) */ /* Update on gpu the copy matrix of M (M must have already been loaded otherwise an exception is raised) */
void update(const Faust::MatGeneric<FPP,Cpu>* M, int32_t id); void update(const Faust::MatGeneric<FPP,Cpu>* M, int32_t id);
......
...@@ -334,4 +334,100 @@ namespace Faust ...@@ -334,4 +334,100 @@ namespace Faust
} }
} }
template<>
@FAUST_SCALAR_FOR_GM@ FaustGPU<@FAUST_SCALAR_FOR_GM@>::power_iteration(int32_t max_iter, Real<@FAUST_SCALAR_FOR_GM@> threshold, int& flag)
{
int i = 0;
int32_t nrows, ncols, xk_bufsz=0, xk2_bufsz=0; //TODO: remove (just here fore test info of xk)
gm_MatArrayFunc_@GM_SCALAR@* marr_funcs = (gm_MatArrayFunc_@GM_SCALAR@*) this->marr_funcs;
gm_DenseMatFunc_@GM_SCALAR@* dsm_funcs = (gm_DenseMatFunc_@GM_SCALAR@*) this->dsm_funcs;
Faust::Vect<@FAUST_SCALAR_FOR_GM@,Cpu> xk_cpu(this->ncols);
xk_cpu.setOnes();
for(auto p: cpu_mat_ptrs)
{
auto mat = static_cast<MatGeneric<@FAUST_SCALAR_FOR_GM@,Cpu>*>(p);
if(xk_bufsz < mat->getNbCol()) // xk buf receives the result of F'*xk2
xk_bufsz = mat->getNbCol();
if(xk2_bufsz < mat->getNbRow()) // xk2 buf receives the result of F*xk
xk2_bufsz = mat->getNbRow();
}
auto xk = dsm_funcs->togpu_bufsz(this->ncols, 1, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(xk_cpu.getData()), xk_bufsz, 1);
auto xk_norm = dsm_funcs->togpu_bufsz(this->ncols, 1, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(xk_cpu.getData()), xk_bufsz, 1); // the CPU data is irrelevant for xk_norm but it doesn't matter
@FAUST_SCALAR_FOR_GM@ lambda_old = 1.0;
@FAUST_SCALAR_FOR_GM@ lambda = 0.0;
@FAUST_SCALAR_FOR_GM@ alpha = 1.0;
@FAUST_SCALAR_FOR_GM@ beta = 0.0;
gm_Op op = OP_NOTRANSP;
// if(transpose)
// if(conjugate)
// op = OP_CONJTRANSP;
// else
// op = OP_TRANSP;
@GM_SCALAR@ one;
set_one<@GM_SCALAR@>(&one);
while((Faust::fabs(lambda_old-lambda)> Faust::fabs(threshold) || Faust::fabs(lambda) <= Faust::fabs(threshold)) && i < max_iter)
{
i++;
lambda_old = lambda;
dsm_funcs->copy(xk, xk_norm);
dsm_funcs->normalize(xk_norm);
/*xk = */marr_funcs->chain_matmul_by_dsm_into(gpu_mat_arr, one, op, xk_norm, xk);
dsm_funcs->info(xk, &nrows, &ncols);
dsm_funcs->dot(xk, xk_norm, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&lambda));
}
flag = (i<max_iter)?i:-1;
return lambda;
}
template<>
@FAUST_SCALAR_FOR_GM@ FaustGPU<@FAUST_SCALAR_FOR_GM@>::power_iteration2(int32_t max_iter, Real<@FAUST_SCALAR_FOR_GM@> threshold, int& flag)
{
int i = 0;
int32_t nrows, ncols, xk_bufsz=0, xk2_bufsz=0; //TODO: remove (just here fore test info of xk)
gm_MatArrayFunc_@GM_SCALAR@* marr_funcs = (gm_MatArrayFunc_@GM_SCALAR@*) this->marr_funcs;
gm_DenseMatFunc_@GM_SCALAR@* dsm_funcs = (gm_DenseMatFunc_@GM_SCALAR@*) this->dsm_funcs;
//TODO: allow to work with F'F or FF' // up to this we compute only F'F
Faust::Vect<@FAUST_SCALAR_FOR_GM@,Cpu> xk_cpu(this->nrows);
Faust::Vect<@FAUST_SCALAR_FOR_GM@,Cpu> xk2_cpu(this->ncols);
xk_cpu.setOnes();
for(auto p: cpu_mat_ptrs)
{
auto mat = static_cast<MatGeneric<@FAUST_SCALAR_FOR_GM@,Cpu>*>(p);
if(xk_bufsz < mat->getNbCol()) // xk buf receives the result of F'*xk2
xk_bufsz = mat->getNbCol();
if(xk2_bufsz < mat->getNbRow()) // xk2 buf receives the result of F*xk
xk2_bufsz = mat->getNbRow();
}
auto xk = dsm_funcs->togpu_bufsz(this->ncols, 1, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(xk_cpu.getData()), xk_bufsz, 1);
auto xk_norm = dsm_funcs->togpu_bufsz(this->ncols, 1, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(xk_cpu.getData()), xk_bufsz, 1); // the CPU data is irrelevant for xk_norm but it doesn't matter
auto xk2 = dsm_funcs->togpu_bufsz(this->nrows, 1, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(xk2_cpu.getData()), xk2_bufsz, 1);
@FAUST_SCALAR_FOR_GM@ lambda_old = 1.0;
@FAUST_SCALAR_FOR_GM@ lambda = 0.0;
@FAUST_SCALAR_FOR_GM@ alpha = 1.0;
@FAUST_SCALAR_FOR_GM@ beta = 0.0;
//TODO: consider the optimal order: FF'*xk or F'F*xk
gm_Op op = OP_NOTRANSP;
gm_Op op2 = OP_TRANSP; //TODO: complex case //verify if OP_CONJTRANSP is more costful
// if(transpose)
// if(conjugate)
// op = OP_CONJTRANSP;
// else
// op = OP_TRANSP;
@GM_SCALAR@ one;
set_one<@GM_SCALAR@>(&one);
while((Faust::fabs(lambda_old-lambda)> Faust::fabs(threshold) || Faust::fabs(lambda) <= Faust::fabs(threshold)) && i < max_iter)
{
i++;
lambda_old = lambda;
dsm_funcs->copy(xk, xk_norm);
dsm_funcs->normalize(xk_norm);
marr_funcs->chain_matmul_by_dsm_into(gpu_mat_arr, one, op, xk_norm, xk2);
marr_funcs->chain_matmul_by_dsm_into(gpu_mat_arr, one, op2, xk2, xk);
dsm_funcs->dot(xk, xk_norm, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&lambda));
}
flag = (i<max_iter)?i:-1;
return lambda;
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment