Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 4a8add51 authored by hhakim's avatar hhakim
Browse files

Remove old unused PALM4MSA 2020 implementation (cf. issue #244).

parent 72be7706
No related branches found
No related tags found
No related merge requests found
......@@ -115,12 +115,7 @@ int main(int argc, char* argv[])
vector<ConstraintGeneric*> gpu_constraints{&gpu_c1, &gpu_c2};
#endif
if(meth == 1)
{
cout << "use impl1" << endl;
Faust::palm4msa<FPP,Cpu>(data, constraints, facts, lambda, sc, is_update_way_R2L, factors_format, compute_2norm_on_array, norm2_threshold, norm2_max_iter, false, FAUST_PRECISION, on_gpu, true);
}
else if(meth == 2)
if(meth == 2)
{
cout << "use impl2" << endl;
Faust::palm4msa2<FPP,Cpu>(data, constraints, facts, lambda, sc, is_update_way_R2L, factors_format, packing_RL, no_normalization, no_lambda, MHTPParams<FPP>(), compute_2norm_on_array, norm2_threshold, norm2_max_iter, false, FAUST_PRECISION, on_gpu);
......@@ -138,7 +133,7 @@ int main(int argc, char* argv[])
#endif
else
{
std::cerr << "meth (arg 1) must be 1 or 2." << endl;
std::cerr << "meth (arg 1) must be 2 or 3." << endl;
}
// auto left_th_2 = th->left(2);
// left_th_2->display();
......
......@@ -37,27 +37,6 @@
namespace Faust
{
template <typename FPP, FDevice DEVICE>
void palm4msa(
/** input matrix */
const MatDense<FPP,DEVICE>& A,
/* input constraints */
std::vector<ConstraintGeneric*> & constraints,
/** output factors (if not initialized to size nfacts, set to nfacts identities (with constraints dims) */
TransformHelper<FPP,DEVICE>& S,
/** lambda output, intialized from outside */
FPP& lambda,
//const unsigned int nites,
const StoppingCriterion<Real<FPP>>& sc,
const bool is_update_way_R2L=false,
const FactorsFormat factors_format=AllDynamic,
const bool compute_2norm_on_array=false,
const Real<FPP> norm2_threshold=FAUST_PRECISION,
const unsigned int norm2_max_iter=FAUST_NORM2_MAX_ITER,
const bool constant_step_size=false, const Real<FPP> step_size=FAUST_PRECISION,
const bool on_gpu=false,
const bool is_verbose=false); //TODO: the def of this decl must be updated in faust_palm4msa2020.hpp
template <typename FPP, FDevice DEVICE>
void palm4msa2(
......@@ -162,5 +141,4 @@ namespace Faust
}
#include "faust_palm4msa2020.hpp"
#include "faust_palm4msa2020_2.hpp"
#endif
template <typename FPP, FDevice DEVICE>
void Faust::palm4msa(const Faust::MatDense<FPP,DEVICE>& A,
void Faust::fill_of_eyes(
Faust::TransformHelper<FPP,DEVICE>& S,
const unsigned int nfacts,
const bool sparse,
const std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims,
const bool on_gpu)
{
if(S.size() > 0)
throw std::runtime_error("The Faust must be empty for intializing it to eyes factors.");
for(auto fdims : dims)
{
// std::cout << "fill_of_eyes() fdims: " << fdims.first << " " << fdims.second << std::endl;
// init all facts as identity matrices
// with proper dimensions
Faust::MatGeneric<FPP,DEVICE>* fact;
if(sparse)
{
auto sfact = new Faust::MatSparse<FPP,DEVICE>(fdims.first, fdims.second);
sfact->setEyes();
fact = sfact;
}
else
{
auto dfact = new Faust::MatDense<FPP,DEVICE>(fdims.first, fdims.second);
dfact->setEyes();
fact = dfact;
}
S.push_back(fact); //TODO: copying=false
}
}
template<typename FPP, FDevice DEVICE>
Real<FPP> Faust::calc_rel_err(const TransformHelper<FPP,DEVICE>& S, const MatDense<FPP,DEVICE> &A, const Real<FPP> &lambda/*=1*/, const Real<FPP>* A_norm/*=nullptr*/)
{
MatDense<FPP, DEVICE> err = const_cast<TransformHelper<FPP, DEVICE>&>(S).get_product();
err *= FPP(lambda);
err -= A;
if(A_norm == nullptr)
return err.norm() / A.norm();
return err.norm() / *A_norm;
}
template <typename FPP, FDevice DEVICE>
void Faust::palm4msa2(const Faust::MatDense<FPP,DEVICE>& A,
std::vector<Faust::ConstraintGeneric*> & constraints,
Faust::TransformHelper<FPP,DEVICE>& S,
FPP& lambda,
Real<FPP>& lambda, //TODO: FPP lambda ? is it useful to have a complex lamdba ?
//const unsigned int nites,
const StoppingCriterion<Real<FPP>>& sc,
const bool is_update_way_R2L,
const FactorsFormat factors_format/*=AllDynamic*/,
const FactorsFormat factors_format,
const bool packing_RL,
const bool no_normalization/*=false*/,
const bool no_lambda/*=false*/,
const MHTPParams<Real<FPP>> mhtp_params/*=MHTPParams<FPP>()*/,
const bool compute_2norm_on_array,
const Real<FPP> norm2_threshold,
const unsigned int norm2_max_iter,
const bool constant_step_size, const Real<FPP> step_size,
bool constant_step_size, Real<FPP> step_size,
const bool on_gpu /*=false*/,
const bool is_verbose/*=false*/)
const bool is_verbose/*=false*/, const int id/*=0*/)
{
// TODO: this function must be reconsidered for deletion (keeping only palm4msa2 -- yet to rename)
std::chrono::duration<double> norm2_duration = std::chrono::duration<double>::zero();
std::chrono::duration<double> fgrad_duration = std::chrono::duration<double>::zero();
double norm1, norm2;
/* variable environment parameters (which are interesting enough for debugging/profiling but not yet for the wrapper user API */
char* str_env_prod_mod = getenv("PROD_MOD");
int prod_mod = DYNPROG; // GREEDY_ALL_BEST_GENMAT; DYNPROG is a bit better to factorize the MEG matrix and not slower to factorize a Hadamard matrix
if(str_env_prod_mod)
prod_mod = std::atoi(str_env_prod_mod);
auto str_env_no_lambda_error = getenv("NO_LAMBDA_ERROR");
bool no_lambda_error = false;
if(str_env_no_lambda_error)
no_lambda_error = (bool) std::atoi(str_env_no_lambda_error);
// auto str_env_no_lambda = getenv("NO_LAMBDA");
// if(str_env_no_lambda)
// {
// no_lambda = (bool) std::atoi(str_env_no_lambda);
// }
bool use_grad1 = false;
auto str_env_use_grad1 = getenv("USE_GRAD1");
if(str_env_use_grad1)
use_grad1 = (bool) std::atoi(str_env_use_grad1);
/******************************************************/
// std::cout << "palm4msa2 "<< std::endl;
if(constraints.size() == 0)
throw out_of_range("No constraint passed to palm4msa.");
const Real<FPP> lipschitz_multiplicator = 1.001;
Faust::MatGeneric<FPP,DEVICE>* cur_fac;
Faust::MatSparse<FPP,DEVICE>* scur_fac;
Faust::MatDense<FPP,DEVICE>* dcur_fac;
Faust::MatSparse<FPP,DEVICE> spD;
Real<FPP> error = -1; // negative error is ignored
const unsigned int nfacts = constraints.size();
unsigned int nfacts = constraints.size();
Real<FPP> error = -1; //negative error is ignored
std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims;
int norm2_flag; // return val
for(auto c: constraints)
{
dims.push_back(make_pair(c->get_rows(), c->get_cols()));
if(no_normalization)
{
c->set_normalizing(false);
}
}
//TODO: make it possible to receive a MatSparse A
if(is_verbose && mhtp_params.used)
{
std::cout << mhtp_params.constant_step_size << std::endl;
std::cout << mhtp_params.to_string() << std::endl;
}
Faust::MatDense<FPP,DEVICE> A_H = A;
A_H.adjoint();
if(S.size() != nfacts)
fill_of_eyes(S, nfacts, factors_format != AllDense, dims, on_gpu);
int i = 0, f_id;
std::function<void()> init_fid, next_fid;
else if(factors_format == AllSparse)
{
S.convertToSparse();
}
else if(factors_format == AllDense)
{
S.convertToDense();
}
int i = 0, f_id, j;
std::function<void()> init_ite, next_fid;
std::function<bool()> updating_facs;
std::function<bool()> is_last_fac_updated;
// packed Fausts corresponding to each factor
std::vector<TransformHelper<FPP,DEVICE>*> pL, pR;
pL.resize(nfacts);// pL[i] is the Faust for all factors to the left of the factor *(S.begin()+i)
pR.resize(nfacts);// pR[i] the same for the right of S_i
Faust::MatDense<FPP,DEVICE> D, tmp;
Faust::MatSparse<FPP,DEVICE> spD;
Real<FPP> c = 1/step_size;
for(int i=0;i<nfacts;i++)
{
pL[i] = new TransformHelper<FPP,DEVICE>();
pR[i] = new TransformHelper<FPP,DEVICE>();
}
if(is_update_way_R2L)
{
init_fid = [&f_id, &nfacts]() {f_id = nfacts-1;};
next_fid = [&f_id]() {f_id--;};
init_ite = [&f_id, &nfacts, &pL, &S, &packing_RL, &on_gpu, &prod_mod]()
{
//pre-compute left products for each Si
if(pL[0] != nullptr) delete pL[0];
pL[0] = new TransformHelper<FPP,DEVICE>(); // empty faust // no factors to the left of *(S.begin())
for(int i=1;i < nfacts; i++)
{
auto vec_Si_minus_1 = { *(S.begin()+i-1) };
if(pL[i] != nullptr) delete pL[i]; //TODO: maybe to replace by a TransformHelper stored in the stack to avoid deleting each time
pL[i] = new TransformHelper<FPP,DEVICE>(*pL[i-1], vec_Si_minus_1);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pL[i])->pack_factors(prod_mod);
}
// all pL[i] Fausts are composed at most of one factor matrix
f_id = nfacts-1;
};
next_fid = [&f_id, &pR, &S, &packing_RL, &on_gpu, &prod_mod]()
{
if(f_id > 0)
{
if(pR[f_id-1] != nullptr)
delete pR[f_id-1];
auto vec_Sj = { *(S.begin()+f_id) };
pR[f_id-1] = new Faust::TransformHelper<FPP,DEVICE>(vec_Sj, *pR[f_id]);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pR[f_id-1])->pack_factors(prod_mod);
}
f_id--;
};
is_last_fac_updated = [&f_id]() {return f_id == 0;};
updating_facs = [&f_id]() {return f_id >= 0;};
}
else
{
init_fid = [&f_id]() {f_id = 0;};
next_fid = [&f_id]() {f_id++;};
init_ite = [&f_id, &pR, &S, &packing_RL, &on_gpu, &prod_mod]()
{
if(pR[S.size()-1] != nullptr) delete pR[S.size()-1];
pR[S.size()-1] = new TransformHelper<FPP,DEVICE>(); // empty faust // no factors to the right of *(S.begin()+S.size()]-1)
for(int i=S.size()-2;i >= 0; i--)
{
auto vec_Si_plus_1 = { *(S.begin()+i+1) };
if(pR[i] != nullptr) delete pR[i];
pR[i] = new TransformHelper<FPP,DEVICE>(vec_Si_plus_1, *pR[i+1]);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pR[i])->pack_factors(prod_mod);
}
f_id = 0;
};
next_fid = [&f_id, &S, &pL, &nfacts, packing_RL, &on_gpu, &prod_mod]()
{
if(f_id < nfacts-1)
{
if(pL[f_id+1] != nullptr)
delete pL[f_id+1];
auto vec_Sj = { *(S.begin()+f_id) };
pL[f_id+1] = new Faust::TransformHelper<FPP,DEVICE>(*pL[f_id], vec_Sj);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pL[f_id+1])->pack_factors(prod_mod);
}
f_id++;
};
updating_facs = [&f_id, &nfacts]() {return f_id < nfacts;};
is_last_fac_updated = [&f_id, &nfacts]() {return f_id == nfacts-1;};
}
Faust::MatDense<FPP,Cpu> D, tmp;
Faust::TransformHelper<FPP, Cpu> LSR;
Real<FPP> c = 1/step_size;
// lambda exp to update fact when its id is 0
auto update_1stfac = [&sc, &error, &constant_step_size, &c, &A, &D, &tmp, &LSR, &scur_fac, &dcur_fac, &f_id, &S, &lipschitz_multiplicator, &lambda, &norm2_threshold, &norm2_flag, &norm2_max_iter, &on_gpu](Faust::MatGeneric<FPP, DEVICE> *cur_fac)
while(sc.do_continue(i, error))
{
auto R = S.right(f_id+1);
if(! constant_step_size)
// std::cout << "i: " << i << std::endl;
// std::cout << "nfacts:" << nfacts << std::endl;
init_ite();
while(updating_facs())
{
Real<FPP> nR = R->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
c = lipschitz_multiplicator*lambda*lambda*nR*nR;
// std::cout << "#f_id: " << f_id << std::endl;
cur_fac = S.get_gen_fact_nonconst(f_id);
if(mhtp_params.used && i%mhtp_params.palm4msa_period == 0)
{
perform_MHTP(mhtp_params, A, A_H, S, f_id, pL, pR, packing_RL,
is_verbose, *constraints[f_id], norm2_max_iter, norm2_threshold,
norm2_duration,
fgrad_duration,
sc, error, factors_format, prod_mod, c, lambda);
}
else
update_fact(cur_fac, f_id, A, S, pL, pR, packing_RL,
is_verbose, *constraints[f_id], norm2_max_iter, norm2_threshold,
norm2_duration,
fgrad_duration,
constant_step_size, step_size,
sc, error, factors_format, prod_mod, c, lambda, use_grad1);
next_fid(); // f_id updated to iteration factor index (pL or pR too)
}
// std::cout << "c=" << c << std::endl;
//TODO: check if c is nan
Faust::TransformHelper<FPP, Cpu> L;
scur_fac = nullptr, dcur_fac = nullptr;
if(S.is_fact_sparse(f_id))
//update lambda
if(! no_lambda)
update_lambda(S, pL, pR, A_H, lambda, no_lambda_error);
if(is_verbose)
{
scur_fac = dynamic_cast<Faust::MatSparse<FPP,Cpu>*>(cur_fac);
D = *scur_fac;
Faust::TransformHelper<FPP, Cpu> _L({scur_fac}, 1.0, false, false);
L = _L;
set_calc_err_ite_period(); //macro setting the variable ite_period
if((! (i%ite_period)) && ite_period > 0)
{
std::cout << "PALM4MSA2020 iteration: " << i;
auto err = calc_rel_err(S, A, lambda);
std::cout << " relative error: " << err;
std::cout << " (call id: " << id << ")" << std::endl;
std::cout << " lambda=" << lambda << std::endl;
}
}
i++;
}
S.update_total_nnz();
// free the latest pR and pL TransformHelpers
for(int i=0;i<nfacts;i++)
{
if(pL[i] != nullptr)
delete pL[i];
if(pR[i] != nullptr)
delete pR[i];
}
if(is_verbose)
{
std::cout << "palm4msa spectral time=" << norm2_duration.count() << std::endl;
std::cout << "palm4msa fgrad time=" << fgrad_duration.count() << std::endl;
}
}
template <typename FPP, FDevice DEVICE>
void Faust::compute_n_apply_grad1(const int f_id, const Faust::MatDense<FPP,DEVICE> &A, Faust::TransformHelper<FPP,DEVICE>& S, std::vector<TransformHelper<FPP,DEVICE>*> &pL, std::vector<TransformHelper<FPP,DEVICE>*>& pR, const bool packing_RL, const Real<FPP>& lambda, const Real<FPP> &c, Faust::MatDense<FPP,DEVICE> &out /* D */, const StoppingCriterion<Real<FPP>>& sc, Real<FPP> &error, const int prod_mod)
{
Faust::MatDense<FPP,DEVICE> tmp;
Faust::MatDense<FPP,DEVICE> & D = out;
// Faust::MatGeneric<FPP,DEVICE> * LorR;
Faust::MatDense<FPP,DEVICE> * LorR;
Faust::MatDense<FPP,DEVICE> _LorR;
auto S_j_vec = {*(S.begin()+f_id)};
Faust::TransformHelper<FPP, DEVICE> _LSR(/*lambda_vec,*/ *pL[f_id], S_j_vec, *pR[f_id]);
// tmp = _LSR.get_product(prod_mod);
_LSR.get_product(tmp, prod_mod);
// _LSR.get_product(tmp);
tmp *= FPP(lambda);
tmp -= A;
if(sc.isCriterionErr())
error = tmp.norm();
FPP alpha_R = 1, alpha_L = 1, beta_R = 0, beta_L = 0; //decl in parent scope
auto pR_sz = pR[f_id]->size();
auto pL_sz = pL[f_id]->size();
if(pR_sz > 0)
{
if(pR_sz == 1 && packing_RL) // packing_RL == true
LorR = dynamic_cast<Faust::MatDense<FPP,DEVICE>*>(pR[f_id]->get_gen_fact_nonconst(0));
// LorR = pR[f_id]->get_gen_fact_nonconst(0); //normally pR[f_id] is packed (hence reduced to a single MatDense)
else
{
dcur_fac = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(cur_fac); // TOFIX: possible it is not Dense... but MatDiag (sanity check on function start)
D = *dcur_fac;
Faust::TransformHelper<FPP, Cpu> _L({dcur_fac}, 1.0, false, false);
L = _L;
_LorR = pR[f_id]->get_product(prod_mod);
LorR = &_LorR;
}
Faust::TransformHelper<FPP, Cpu> _LSR(L, *R); // L contains cur_fac
// LSR = _LSR;
// _LSR.multiply(lambda);
// tmp = _LSR.get_product();
_LSR.get_product(tmp);
tmp *= lambda;
tmp -= A;
if(sc.isCriterionErr())
error = tmp.norm();
//TODO: do something to lighten the double transpose conjugate
tmp.adjoint();
tmp = R->multiply(tmp, /* H */ false, false);
tmp.adjoint();
tmp *= lambda/c;
D -= tmp;
};
auto update_lastfac = [&sc, &error, &constant_step_size, &c, &A, &D, &tmp, &LSR, &scur_fac, &dcur_fac, &f_id, &S, &lipschitz_multiplicator, &lambda, &norm2_threshold, &norm2_flag, &norm2_max_iter, &on_gpu](Faust::MatGeneric<FPP, DEVICE> *cur_fac)
if(pL_sz == 0)
{ //no L factor for factor f_id
alpha_R = - lambda/c;
beta_R = 1;
gemm(tmp, *LorR, D, alpha_R, beta_R, 'N', 'H');
// gemm_gen(tmp, *LorR, D, alpha_R, beta_R, 'N', 'H');
}
else
gemm(tmp, *LorR, tmp, alpha_R, beta_R, 'N', 'H');
// gemm_gen(tmp, *LorR, tmp, alpha_R, beta_R, 'N', 'H');
}
if(pL_sz > 0)
{
auto L = S.left(f_id-1);
// TODO: factorize with other lambda exp
if(! constant_step_size)
if(pL_sz == 1 && packing_RL) // packing_RL == true
LorR = dynamic_cast<Faust::MatDense<FPP,DEVICE>*>(pL[f_id]->get_gen_fact_nonconst(0));
// LorR = pL[f_id]->get_gen_fact_nonconst(0);
else
{
Real<FPP> nL = L->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
c = lipschitz_multiplicator*lambda*lambda*nL*nL;
_LorR = pL[f_id]->get_product(prod_mod);
LorR = &_LorR;
}
// std::cout << "c=" << c << std::endl;
//TODO: check if c is nan
Faust::TransformHelper<FPP, Cpu> R;
scur_fac = nullptr, dcur_fac = nullptr;
if(S.is_fact_sparse(f_id))
alpha_L = -lambda/c;
beta_L = 1;
gemm(*LorR, tmp, D, alpha_L, beta_L, 'H', 'N');
// gemm_gen(*LorR, tmp, D, alpha_L, beta_L, 'H', 'N');
}
}
template <typename FPP, FDevice DEVICE>
void Faust::compute_n_apply_grad2(const int f_id, const Faust::MatDense<FPP,DEVICE> &A, Faust::TransformHelper<FPP,DEVICE>& S, std::vector<TransformHelper<FPP,DEVICE>*> &pL, std::vector<TransformHelper<FPP,DEVICE>*>& pR, const bool packing_RL, const Real<FPP>& lambda, const Real<FPP> &c, Faust::MatDense<FPP,DEVICE> &out /* D */, const StoppingCriterion<Real<FPP>>& sc, Real<FPP> &error, const int prod_mod)
{
Faust::MatDense<FPP,DEVICE> tmp;
Faust::MatDense<FPP,DEVICE> grad_over_c;
Faust::MatDense<FPP,DEVICE> & D = out;
Faust::MatGeneric<FPP,DEVICE> *_L, *_R;
Faust::MatDense<FPP,DEVICE> __L, __R;
Faust::MatDense<FPP,DEVICE> * LorR;
Faust::MatDense<FPP,DEVICE> _LorR;
std::vector<Faust::MatGeneric<FPP,DEVICE>*> facts;
std::vector<char> tc_flags;
//#define mul_3_facts multiply_order_opt
#define mul_3_facts multiply_order_opt_all_ends// this one only optimizes the product on factor ends but for three factors it doesn't change anything comparing to multiply_order_opt
tmp = A;
auto pR_sz = pR[f_id]->size();
auto pL_sz = pL[f_id]->size();
if(pR_sz > 0)
{
if(pR_sz == 1)
_R = pR[f_id]->get_gen_fact_nonconst(0);
else
{
scur_fac = dynamic_cast<Faust::MatSparse<FPP,Cpu>*>(cur_fac);
D = *scur_fac;
Faust::TransformHelper<FPP, Cpu> _R({scur_fac}, 1.0, false, false);
R = _R;
// __R = pR[f_id]->get_product(prod_mod); // disabled because GREEDY_ALL_BEST_GENMAT is slower than DEFAULT_L2R for Hadamard factorization
__R = pR[f_id]->get_product();
_R = &__R;
}
}
if(pL_sz > 0)
{
if(pL_sz == 1)
_L = pL[f_id]->get_gen_fact_nonconst(0);
else
{
dcur_fac = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(cur_fac); // TOFIX: possible it is not Dense... but MatDiag (sanity check on function start)
D = *dcur_fac;
Faust::TransformHelper<FPP, Cpu> _R({dcur_fac}, 1.0, false, false);
R = _R;
// __L = pL[f_id]->get_product(prod_mod); // disabled because GREEDY_ALL_BEST_GENMAT is slower than DEFAULT_L2R for Hadamard factorization
__L = pL[f_id]->get_product();
_L = &__L;
}
Faust::TransformHelper<FPP, Cpu> _LSR(*L, R); // L contains cur_fac
// LSR = _LSR;
// _LSR.multiply(lambda);
tmp = _LSR.get_product();
tmp *= lambda;
tmp -= A;
}
if(pR_sz > 0 && pL_sz > 0)
{
// compute error = m_lambda*L*S*R-data
facts = { _L, &D, _R };
mul_3_facts(facts, tmp, (FPP) lambda, (FPP) -1.0);
if(sc.isCriterionErr())
error = tmp.norm();
// compute m_lambda/c * L'*error*R'
facts = { _L, &tmp, _R };
tc_flags = {'H', 'N', 'H'};
}
else if(pR_sz > 0)
{
// compute error = m_lambda*L*S*R-data
facts = { &D, _R };
mul_3_facts(facts, tmp, (FPP) lambda, (FPP) -1.0);
if(sc.isCriterionErr())
error = tmp.norm();
tmp = L->multiply(tmp, /* NO H */ true, true);
tmp *= lambda/c;
D -= tmp;
};
auto update_interfac = [&sc, &error, &constant_step_size, &c, &A, &D, &tmp, &LSR, &scur_fac, &dcur_fac, &f_id, &S, &lipschitz_multiplicator, &lambda, &norm2_threshold, &norm2_flag, &norm2_max_iter, &on_gpu](Faust::MatGeneric<FPP, DEVICE> *cur_fac)
// compute m_lambda/c * L'*error*R'
facts = { &tmp, _R };
tc_flags = { 'N', 'H'};
}
else //if(pL_sz > 0)
{
auto R = S.right(f_id+1);
auto L = S.left(f_id-1);
if(! constant_step_size)
// compute error = m_lambda*L*S*R-data
facts = { _L, &D};
mul_3_facts(facts, tmp, (FPP) lambda, (FPP) -1.0);
if(sc.isCriterionErr())
error = tmp.norm();
// compute m_lambda/c * L'*error*R'
facts = { _L, &tmp};
tc_flags = {'H', 'N'};
}
// mul_3_facts(facts, D, (FPP) - lambda/c, (FPP)1, tc_flags); // this one has showed error in calculation when using complex matrices (DFT factorization)
// do it two steps: 1) compute the gradient, then 2) apply it separately to D (the current factor)
mul_3_facts(facts, grad_over_c, (FPP) lambda/c, (FPP)0, tc_flags);
D -= grad_over_c;
}
template<typename FPP, FDevice DEVICE>
void Faust::update_lambda(Faust::TransformHelper<FPP,DEVICE>& S, std::vector<TransformHelper<FPP,DEVICE>*> &pL, std::vector<TransformHelper<FPP,DEVICE>*>& pR, const MatDense<FPP, DEVICE> &A_H, Real<FPP>& lambda, bool no_lambda_error/*= false*/)
{
Faust::MatDense<FPP,DEVICE> A_H_S;
MatDense<FPP, DEVICE> S_mat;
FPP tr; // A_H_S trace
Real<FPP> nS; // S fro. norm
auto n = S.size();
bool packing_RL = (pR[0] == nullptr || pR[0]->size() == 1) && (pL[n-1] == nullptr || pL[n-1]->size() == 1);
// compute S full matrix
if(packing_RL)
{
if(pR[0] == nullptr || pL[n-1] == nullptr)
throw std::logic_error("update_lambda: pR and pL weren't properly initialized.");
// optimize S product by re-using R[0] or L[0] (S == S[0]*R[0] or S == L[n-1]*S[n-1])
if(S.get_gen_fact(0)->getNbCol() * pR[0]->getNbRow() < pL[n-1]->getNbCol() * S.get_gen_fact(n-1)->getNbRow())
{
Real<FPP> nR = R->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
Real<FPP> nL = L->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
c = lipschitz_multiplicator*lambda*lambda*nR*nR*nL*nL;
auto S0 = { S.get_gen_fact(0) };
TransformHelper<FPP, DEVICE> _S(S0, *pR[0]);
_S.get_product(S_mat);
}
//TODO: check if c is nan
scur_fac = nullptr, dcur_fac = nullptr;
if(S.is_fact_sparse(f_id))
else
{
scur_fac = dynamic_cast<Faust::MatSparse<FPP,Cpu>*>(cur_fac);
D = *scur_fac;
auto Sn = { S.get_gen_fact(n-1) };
TransformHelper<FPP, DEVICE> _S(*pL[n-1], Sn);
_S.get_product(S_mat);
}
else
}
else
S.get_product(S_mat);
gemm(A_H, S_mat, A_H_S, (FPP) 1.0, (FPP) 0.0, 'N', 'N');
tr = A_H_S.trace();
nS = S_mat.norm();
if(FPP(0) == nS)
if(no_lambda_error)
{
dcur_fac = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(cur_fac); // TOFIX: possible it is not Dense... but MatDiag (sanity check on function start)
D = *dcur_fac;
// don't change lambda
std::cout << "WARNING: lambda didn't change because S Fro. norm is zero." << std::endl;
return;
}
Faust::TransformHelper<FPP, Cpu> _LSR(*L, *R); // L contains cur_fac
// LSR = _LSR;
// _LSR.multiply(lambda);
tmp = _LSR.get_product();
tmp *= lambda;
tmp -= A;
if(sc.isCriterionErr())
error = tmp.norm();
//TODO: do something to lighten the double transpose conjugate
tmp.adjoint();
tmp = R->multiply(tmp, /* NO H */ false, false);
tmp.adjoint();
tmp = L->multiply(tmp, true, true);
tmp *= lambda/c;
D -= tmp;
};
while(sc.do_continue(i, error))
{
// std::cout << "nfacts:" << nfacts << std::endl;
init_fid();
while(updating_facs())
else
throw std::runtime_error("Error in update_lambda: S Frobenius norm is zero, can't compute lambda.");
if(std::isnan(std::real(tr)) || std::isnan(nS))
if(no_lambda_error)
{
// std::cout << "f_id: " << f_id << std::endl;
cur_fac = S.get_gen_fact_nonconst(f_id);
if(f_id == 0)
{
update_1stfac(cur_fac);
}
else if(f_id == nfacts-1)
update_lastfac(cur_fac);
else
update_interfac(cur_fac);
// really update now
// don't change lambda
std::cout << "WARNING: lambda didn't change because S contains NaN." << std::endl;
return;
}
else
throw std::runtime_error("Error in update_lambda: S (the Faust) contains nan elements in at least one of its matrices, can't compute lambda.");
lambda = std::real(tr)/(nS*nS);
}
//TODO: delete (after GPU2 power_iteration mod) the following block since the nan error is managed directly in power_iteration now
template<typename FPP>
Real<FPP> Faust::compute_double_spectralNorm(MatDense<FPP, Cpu>& mat, int norm2_max_iter, double norm2_threshold)
{
int flag;
MatDense<Real<FPP>, Cpu> rmat;
mat.real(rmat);
Faust::MatDense<double, Cpu> dmat;
dmat.resize(mat.getNbRow(), mat.getNbCol());
for(int i=0;i < dmat.getNbRow(); i++)
for(int j=0;j < dmat.getNbCol(); j++)
dmat.getData()[j*dmat.getNbRow()+i] = (double) rmat(i,j);
// mat.real(dmat);
auto n = dmat.spectralNorm(norm2_max_iter, norm2_threshold, flag);
return (Real<FPP>)n;
}
#ifdef USE_GPU_MOD
//TODO: handle the nan problem in GPU2 power_iteration directly and the remove this function
template<typename FPP>
Real<FPP> Faust::compute_double_spectralNorm(MatDense<FPP, GPU2>& mat, int norm2_max_iter, double norm2_threshold)
{
int flag;
// Update the S factor
if(factors_format == AllDynamic)
Faust::MatDense<Real<FPP>, GPU2> rmat(mat.getNbRow(), mat.getNbCol(), nullptr, true);
mat.real(rmat);
Faust::MatDense<Real<FPP>, Cpu> cpu_rmat;
rmat.tocpu(cpu_rmat);
return compute_double_spectralNorm(cpu_rmat, norm2_max_iter, norm2_threshold);
}
#endif
template<typename FPP, FDevice DEVICE>
void Faust::update_fact(
Faust::MatGeneric<FPP,DEVICE>* cur_fac,
int f_id,
const Faust::MatDense<FPP,DEVICE>& A,
Faust::TransformHelper<FPP,DEVICE>& S,
std::vector<TransformHelper<FPP,DEVICE>*> &pL,
std::vector<TransformHelper<FPP,DEVICE>*> &pR,
const bool packing_RL,
const bool is_verbose,
const Faust::ConstraintGeneric &constraint,
const int norm2_max_iter,
const Real<FPP>& norm2_threshold,
std::chrono::duration<double>& norm2_duration,
std::chrono::duration<double>& fgrad_duration,
const bool constant_step_size,
const Real<FPP> step_size,
const StoppingCriterion<Real<FPP>>& sc,
Real<FPP> &error,
const FactorsFormat factors_format,
const int prod_mod,
Real<FPP> &c,
const Real<FPP>& lambda,
bool use_grad1/*= false*/)
{
int norm2_flag;
std::chrono::time_point<std::chrono::high_resolution_clock> spectral_stop, spectral_start;
std::chrono::time_point<std::chrono::high_resolution_clock> fgrad_stop, fgrad_start;
Faust::MatSparse<FPP,DEVICE>* scur_fac = nullptr;
Faust::MatDense<FPP,DEVICE>* dcur_fac = nullptr;
Faust::MatDense<FPP,DEVICE> D;
Faust::MatSparse<FPP,DEVICE> spD;
Real<FPP> nR=1,nL=1;
if(constant_step_size)
c = 1 / step_size;
else
{
if(is_verbose)
spectral_start = std::chrono::high_resolution_clock::now();
if(pR[f_id]->size() > 0)
{
nR = pR[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
//TODO: delete (after GPU2 power_iteration mod) the following block since the nan error is managed directly in power_iteration now
if(std::is_same<FPP, float>::value && std::isnan(nR))
{
// new_fac is a MatGeneric but underliying concrete object can be a MatSparse or a MatDense
// new_fac is allocated in the heap
// replace the former fact by the new one
auto new_fac = constraints[f_id]->project_gen<FPP,DEVICE,Real<FPP>>(D);
S.replace(new_fac, f_id);
auto M = pR[f_id]->get_product();
nR = compute_double_spectralNorm(M, norm2_max_iter, norm2_threshold);
if(is_verbose)
std::cout << "Corrected R NaN float 2-norm by recomputing as double 2-norm" << nR << std::endl;
}
else // factors_format == AllDense or AllSparse
}
if(pL[f_id]->size() > 0)
{
nL = pL[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
//TODO: delete (after GPU2 power_iteration mod) the following block since the nan error is managed directly in power_iteration now
if(std::is_same<FPP, float>::value && std::isnan(nL))
{
constraints[f_id]->project<FPP,DEVICE,Real<FPP>>(D);
// D is the prox image (always a MatDense
// convert D to the proper format (MatSparse or MatDense)
if(factors_format == AllSparse && dcur_fac != nullptr || factors_format != AllSparse && scur_fac != nullptr)
throw std::runtime_error("Current factor is inconsistent with the configured factors_format.");
if(factors_format == AllSparse)
{
// convert to sparse then update
spD = D;
S.update(spD, f_id); // update is at higher level than a simple assignment
}
else
{ // factors_format == AllDense
// directly update (D is a MatDense)
S.update(D, f_id);
}
auto M = pL[f_id]->get_product();
nL = compute_double_spectralNorm(M, norm2_max_iter, norm2_threshold);
if(is_verbose)
std::cout << "Corrected L NaN float 2-norm by recomputing as double 2-norm:" << nL << std::endl;
}
next_fid(); //f_id updated to iteration factor index
}
//update lambda
//TODO: variable decl in parent scope
Faust::MatDense<FPP,DEVICE> A_H_S = S.multiply(A_H);
FPP tr = A_H_S.trace();
Real<FPP> trr = std::real(tr);
Real<FPP> n = S.normFro();
lambda = trr/(n*n);
// std::cout << "debug lambda: " << lambda << std::endl;
i++;
if(std::isnan(nL) || std::isnan(nR))
{
std::cout << "R 2-norm:" << nR << std::endl;
std::cout << "L 2-norm:" << nL << std::endl;
std::cout << "S id:" << f_id << std::endl;
throw std::runtime_error("2-norm computation error: R or L 2-norm is NaN.");
}
if(is_verbose)
{
spectral_stop = std::chrono::high_resolution_clock::now();
norm2_duration += spectral_stop-spectral_start;
}
c = LIPSCHITZ_MULTIPLICATOR*lambda*lambda*nR*nR*nL*nL;
}
if(S.is_fact_sparse(f_id))
{
scur_fac = dynamic_cast<Faust::MatSparse<FPP,DEVICE>*>(cur_fac);
D = *scur_fac;
dcur_fac = nullptr;
}
else
{
dcur_fac = dynamic_cast<Faust::MatDense<FPP,DEVICE>*>(cur_fac); // TOFIX: possible it is not Dense... but MatDiag (sanity check on function start)
D = *dcur_fac;
scur_fac = nullptr;
}
}
template <typename FPP, FDevice DEVICE>
void Faust::fill_of_eyes(
Faust::TransformHelper<FPP,DEVICE>& S,
const unsigned int nfacts,
const bool sparse,
const std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims,
const bool on_gpu)
{
if(S.size() > 0)
throw std::runtime_error("The Faust must be empty for intializing it to eyes factors.");
for(auto fdims : dims)
if(is_verbose)
fgrad_start = std::chrono::high_resolution_clock::now();
if(typeid(D) != typeid(Faust::MatDense<FPP,Cpu>) || use_grad1)
// compute_n_apply_grad2 is not yet supported with GPU2
compute_n_apply_grad1(f_id, A, S, pL, pR, packing_RL, lambda, c, D, sc, error, prod_mod);
else
compute_n_apply_grad2(f_id, A, S, pL, pR, packing_RL, lambda, c, D, sc, error, prod_mod);
if(is_verbose)
{
// std::cout << "fill_of_eyes() fdims: " << fdims.first << " " << fdims.second << std::endl;
// init all facts as identity matrices
// with proper dimensions
Faust::MatGeneric<FPP,DEVICE>* fact;
if(sparse)
fgrad_stop = std::chrono::high_resolution_clock::now();
fgrad_duration += fgrad_stop-fgrad_start;
}
// Update the S factor
if(factors_format == AllDynamic)
{
// new_fac is a MatGeneric but underliying concrete object can be a MatSparse or a MatDense
// new_fac is allocated in the heap
// replace the former fact by the new one
auto new_fac = constraint.project_gen<FPP,DEVICE,Real<FPP>>(D);
S.replace(new_fac, f_id);
}
else // factors_format == AllDense or AllSparse
{
constraint.project<FPP,DEVICE,Real<FPP>>(D);
// D is the prox image (always a MatDense
// convert D to the proper format (MatSparse or MatDense)
if(factors_format == AllSparse && dcur_fac != nullptr || factors_format == AllDense && scur_fac != nullptr)
throw std::runtime_error("Current factor is inconsistent with the configured factors_format.");
if(factors_format == AllSparse)
{
auto sfact = new Faust::MatSparse<FPP,DEVICE>(fdims.first, fdims.second);
sfact->setEyes();
fact = sfact;
// convert to sparse then update
spD = D;
S.update(spD, f_id); // update is at higher level than a simple assignment
}
else
{
auto dfact = new Faust::MatDense<FPP,DEVICE>(fdims.first, fdims.second);
dfact->setEyes();
fact = dfact;
{ // factors_format == AllDense
// directly update (D is a MatDense)
S.update(D, f_id);
}
S.push_back(fact); //TODO: copying=false
}
}
template<typename FPP, FDevice DEVICE>
Real<FPP> Faust::calc_rel_err(const TransformHelper<FPP,DEVICE>& S, const MatDense<FPP,DEVICE> &A, const Real<FPP> &lambda/*=1*/, const Real<FPP>* A_norm/*=nullptr*/)
{
MatDense<FPP, DEVICE> err = const_cast<TransformHelper<FPP, DEVICE>&>(S).get_product();
err *= FPP(lambda);
err -= A;
if(A_norm == nullptr)
return err.norm() / A.norm();
return err.norm() / *A_norm;
}
template <typename FPP, FDevice DEVICE>
void Faust::palm4msa2(const Faust::MatDense<FPP,DEVICE>& A,
std::vector<Faust::ConstraintGeneric*> & constraints,
Faust::TransformHelper<FPP,DEVICE>& S,
Real<FPP>& lambda, //TODO: FPP lambda ? is it useful to have a complex lamdba ?
//const unsigned int nites,
const StoppingCriterion<Real<FPP>>& sc,
const bool is_update_way_R2L,
const FactorsFormat factors_format,
const bool packing_RL,
const bool no_normalization/*=false*/,
const bool no_lambda/*=false*/,
const MHTPParams<Real<FPP>> mhtp_params/*=MHTPParams<FPP>()*/,
const bool compute_2norm_on_array,
const Real<FPP> norm2_threshold,
const unsigned int norm2_max_iter,
bool constant_step_size, Real<FPP> step_size,
const bool on_gpu /*=false*/,
const bool is_verbose/*=false*/, const int id/*=0*/)
{
std::chrono::duration<double> norm2_duration = std::chrono::duration<double>::zero();
std::chrono::duration<double> fgrad_duration = std::chrono::duration<double>::zero();
double norm1, norm2;
/* variable environment parameters (which are interesting enough for debugging/profiling but not yet for the wrapper user API */
char* str_env_prod_mod = getenv("PROD_MOD");
int prod_mod = DYNPROG; // GREEDY_ALL_BEST_GENMAT; DYNPROG is a bit better to factorize the MEG matrix and not slower to factorize a Hadamard matrix
if(str_env_prod_mod)
prod_mod = std::atoi(str_env_prod_mod);
auto str_env_no_lambda_error = getenv("NO_LAMBDA_ERROR");
bool no_lambda_error = false;
if(str_env_no_lambda_error)
no_lambda_error = (bool) std::atoi(str_env_no_lambda_error);
// auto str_env_no_lambda = getenv("NO_LAMBDA");
// if(str_env_no_lambda)
// {
// no_lambda = (bool) std::atoi(str_env_no_lambda);
// }
bool use_grad1 = false;
auto str_env_use_grad1 = getenv("USE_GRAD1");
if(str_env_use_grad1)
use_grad1 = (bool) std::atoi(str_env_use_grad1);
/******************************************************/
// std::cout << "palm4msa2 "<< std::endl;
if(constraints.size() == 0)
throw out_of_range("No constraint passed to palm4msa.");
const Real<FPP> lipschitz_multiplicator = 1.001;
Faust::MatGeneric<FPP,DEVICE>* cur_fac;
Faust::MatSparse<FPP,DEVICE>* scur_fac;
Faust::MatDense<FPP,DEVICE>* dcur_fac;
unsigned int nfacts = constraints.size();
Real<FPP> error = -1; //negative error is ignored
std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims;
int norm2_flag; // return val
for(auto c: constraints)
{
dims.push_back(make_pair(c->get_rows(), c->get_cols()));
if(no_normalization)
{
c->set_normalizing(false);
}
}
//TODO: make it possible to receive a MatSparse A
if(is_verbose && mhtp_params.used)
{
std::cout << mhtp_params.constant_step_size << std::endl;
std::cout << mhtp_params.to_string() << std::endl;
}
Faust::MatDense<FPP,DEVICE> A_H = A;
A_H.adjoint();
if(S.size() != nfacts)
fill_of_eyes(S, nfacts, factors_format != AllDense, dims, on_gpu);
else if(factors_format == AllSparse)
{
S.convertToSparse();
}
else if(factors_format == AllDense)
{
S.convertToDense();
}
int i = 0, f_id, j;
std::function<void()> init_ite, next_fid;
std::function<bool()> updating_facs;
std::function<bool()> is_last_fac_updated;
// packed Fausts corresponding to each factor
std::vector<TransformHelper<FPP,DEVICE>*> pL, pR;
pL.resize(nfacts);// pL[i] is the Faust for all factors to the left of the factor *(S.begin()+i)
pR.resize(nfacts);// pR[i] the same for the right of S_i
Faust::MatDense<FPP,DEVICE> D, tmp;
Faust::MatSparse<FPP,DEVICE> spD;
Real<FPP> c = 1/step_size;
for(int i=0;i<nfacts;i++)
{
pL[i] = new TransformHelper<FPP,DEVICE>();
pR[i] = new TransformHelper<FPP,DEVICE>();
}
if(is_update_way_R2L)
{
init_ite = [&f_id, &nfacts, &pL, &S, &packing_RL, &on_gpu, &prod_mod]()
{
//pre-compute left products for each Si
if(pL[0] != nullptr) delete pL[0];
pL[0] = new TransformHelper<FPP,DEVICE>(); // empty faust // no factors to the left of *(S.begin())
for(int i=1;i < nfacts; i++)
{
auto vec_Si_minus_1 = { *(S.begin()+i-1) };
if(pL[i] != nullptr) delete pL[i]; //TODO: maybe to replace by a TransformHelper stored in the stack to avoid deleting each time
pL[i] = new TransformHelper<FPP,DEVICE>(*pL[i-1], vec_Si_minus_1);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pL[i])->pack_factors(prod_mod);
}
// all pL[i] Fausts are composed at most of one factor matrix
f_id = nfacts-1;
};
next_fid = [&f_id, &pR, &S, &packing_RL, &on_gpu, &prod_mod]()
{
if(f_id > 0)
{
if(pR[f_id-1] != nullptr)
delete pR[f_id-1];
auto vec_Sj = { *(S.begin()+f_id) };
pR[f_id-1] = new Faust::TransformHelper<FPP,DEVICE>(vec_Sj, *pR[f_id]);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pR[f_id-1])->pack_factors(prod_mod);
}
f_id--;
};
is_last_fac_updated = [&f_id]() {return f_id == 0;};
updating_facs = [&f_id]() {return f_id >= 0;};
}
else
{
init_ite = [&f_id, &pR, &S, &packing_RL, &on_gpu, &prod_mod]()
{
if(pR[S.size()-1] != nullptr) delete pR[S.size()-1];
pR[S.size()-1] = new TransformHelper<FPP,DEVICE>(); // empty faust // no factors to the right of *(S.begin()+S.size()]-1)
for(int i=S.size()-2;i >= 0; i--)
{
auto vec_Si_plus_1 = { *(S.begin()+i+1) };
if(pR[i] != nullptr) delete pR[i];
pR[i] = new TransformHelper<FPP,DEVICE>(vec_Si_plus_1, *pR[i+1]);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pR[i])->pack_factors(prod_mod);
}
f_id = 0;
};
next_fid = [&f_id, &S, &pL, &nfacts, packing_RL, &on_gpu, &prod_mod]()
{
if(f_id < nfacts-1)
{
if(pL[f_id+1] != nullptr)
delete pL[f_id+1];
auto vec_Sj = { *(S.begin()+f_id) };
pL[f_id+1] = new Faust::TransformHelper<FPP,DEVICE>(*pL[f_id], vec_Sj);
if(packing_RL) ((TransformHelperGen<FPP,DEVICE>*)pL[f_id+1])->pack_factors(prod_mod);
}
f_id++;
};
updating_facs = [&f_id, &nfacts]() {return f_id < nfacts;};
is_last_fac_updated = [&f_id, &nfacts]() {return f_id == nfacts-1;};
}
while(sc.do_continue(i, error))
{
// std::cout << "i: " << i << std::endl;
// std::cout << "nfacts:" << nfacts << std::endl;
init_ite();
while(updating_facs())
{
// std::cout << "#f_id: " << f_id << std::endl;
cur_fac = S.get_gen_fact_nonconst(f_id);
if(mhtp_params.used && i%mhtp_params.palm4msa_period == 0)
{
perform_MHTP(mhtp_params, A, A_H, S, f_id, pL, pR, packing_RL,
is_verbose, *constraints[f_id], norm2_max_iter, norm2_threshold,
norm2_duration,
fgrad_duration,
sc, error, factors_format, prod_mod, c, lambda);
}
else
update_fact(cur_fac, f_id, A, S, pL, pR, packing_RL,
is_verbose, *constraints[f_id], norm2_max_iter, norm2_threshold,
norm2_duration,
fgrad_duration,
constant_step_size, step_size,
sc, error, factors_format, prod_mod, c, lambda, use_grad1);
next_fid(); // f_id updated to iteration factor index (pL or pR too)
}
//update lambda
if(! no_lambda)
update_lambda(S, pL, pR, A_H, lambda, no_lambda_error);
if(is_verbose)
{
set_calc_err_ite_period(); //macro setting the variable ite_period
if((! (i%ite_period)) && ite_period > 0)
{
std::cout << "PALM4MSA2020 iteration: " << i;
auto err = calc_rel_err(S, A, lambda);
std::cout << " relative error: " << err;
std::cout << " (call id: " << id << ")" << std::endl;
std::cout << " lambda=" << lambda << std::endl;
}
}
i++;
}
S.update_total_nnz();
// free the latest pR and pL TransformHelpers
for(int i=0;i<nfacts;i++)
{
if(pL[i] != nullptr)
delete pL[i];
if(pR[i] != nullptr)
delete pR[i];
}
if(is_verbose)
{
std::cout << "palm4msa spectral time=" << norm2_duration.count() << std::endl;
std::cout << "palm4msa fgrad time=" << fgrad_duration.count() << std::endl;
}
}
template <typename FPP, FDevice DEVICE>
void Faust::compute_n_apply_grad1(const int f_id, const Faust::MatDense<FPP,DEVICE> &A, Faust::TransformHelper<FPP,DEVICE>& S, std::vector<TransformHelper<FPP,DEVICE>*> &pL, std::vector<TransformHelper<FPP,DEVICE>*>& pR, const bool packing_RL, const Real<FPP>& lambda, const Real<FPP> &c, Faust::MatDense<FPP,DEVICE> &out /* D */, const StoppingCriterion<Real<FPP>>& sc, Real<FPP> &error, const int prod_mod)
{
Faust::MatDense<FPP,DEVICE> tmp;
Faust::MatDense<FPP,DEVICE> & D = out;
// Faust::MatGeneric<FPP,DEVICE> * LorR;
Faust::MatDense<FPP,DEVICE> * LorR;
Faust::MatDense<FPP,DEVICE> _LorR;
auto S_j_vec = {*(S.begin()+f_id)};
Faust::TransformHelper<FPP, DEVICE> _LSR(/*lambda_vec,*/ *pL[f_id], S_j_vec, *pR[f_id]);
// tmp = _LSR.get_product(prod_mod);
_LSR.get_product(tmp, prod_mod);
// _LSR.get_product(tmp);
tmp *= FPP(lambda);
tmp -= A;
if(sc.isCriterionErr())
error = tmp.norm();
FPP alpha_R = 1, alpha_L = 1, beta_R = 0, beta_L = 0; //decl in parent scope
auto pR_sz = pR[f_id]->size();
auto pL_sz = pL[f_id]->size();
if(pR_sz > 0)
{
if(pR_sz == 1 && packing_RL) // packing_RL == true
LorR = dynamic_cast<Faust::MatDense<FPP,DEVICE>*>(pR[f_id]->get_gen_fact_nonconst(0));
// LorR = pR[f_id]->get_gen_fact_nonconst(0); //normally pR[f_id] is packed (hence reduced to a single MatDense)
else
{
_LorR = pR[f_id]->get_product(prod_mod);
LorR = &_LorR;
}
if(pL_sz == 0)
{ //no L factor for factor f_id
alpha_R = - lambda/c;
beta_R = 1;
gemm(tmp, *LorR, D, alpha_R, beta_R, 'N', 'H');
// gemm_gen(tmp, *LorR, D, alpha_R, beta_R, 'N', 'H');
}
else
gemm(tmp, *LorR, tmp, alpha_R, beta_R, 'N', 'H');
// gemm_gen(tmp, *LorR, tmp, alpha_R, beta_R, 'N', 'H');
}
if(pL_sz > 0)
{
if(pL_sz == 1 && packing_RL) // packing_RL == true
LorR = dynamic_cast<Faust::MatDense<FPP,DEVICE>*>(pL[f_id]->get_gen_fact_nonconst(0));
// LorR = pL[f_id]->get_gen_fact_nonconst(0);
else
{
_LorR = pL[f_id]->get_product(prod_mod);
LorR = &_LorR;
}
alpha_L = -lambda/c;
beta_L = 1;
gemm(*LorR, tmp, D, alpha_L, beta_L, 'H', 'N');
// gemm_gen(*LorR, tmp, D, alpha_L, beta_L, 'H', 'N');
}
}
template <typename FPP, FDevice DEVICE>
void Faust::compute_n_apply_grad2(const int f_id, const Faust::MatDense<FPP,DEVICE> &A, Faust::TransformHelper<FPP,DEVICE>& S, std::vector<TransformHelper<FPP,DEVICE>*> &pL, std::vector<TransformHelper<FPP,DEVICE>*>& pR, const bool packing_RL, const Real<FPP>& lambda, const Real<FPP> &c, Faust::MatDense<FPP,DEVICE> &out /* D */, const StoppingCriterion<Real<FPP>>& sc, Real<FPP> &error, const int prod_mod)
{
Faust::MatDense<FPP,DEVICE> tmp;
Faust::MatDense<FPP,DEVICE> grad_over_c;
Faust::MatDense<FPP,DEVICE> & D = out;
Faust::MatGeneric<FPP,DEVICE> *_L, *_R;
Faust::MatDense<FPP,DEVICE> __L, __R;
Faust::MatDense<FPP,DEVICE> * LorR;
Faust::MatDense<FPP,DEVICE> _LorR;
std::vector<Faust::MatGeneric<FPP,DEVICE>*> facts;
std::vector<char> tc_flags;
//#define mul_3_facts multiply_order_opt
#define mul_3_facts multiply_order_opt_all_ends// this one only optimizes the product on factor ends but for three factors it doesn't change anything comparing to multiply_order_opt
tmp = A;
auto pR_sz = pR[f_id]->size();
auto pL_sz = pL[f_id]->size();
if(pR_sz > 0)
{
if(pR_sz == 1)
_R = pR[f_id]->get_gen_fact_nonconst(0);
else
{
// __R = pR[f_id]->get_product(prod_mod); // disabled because GREEDY_ALL_BEST_GENMAT is slower than DEFAULT_L2R for Hadamard factorization
__R = pR[f_id]->get_product();
_R = &__R;
}
}
if(pL_sz > 0)
{
if(pL_sz == 1)
_L = pL[f_id]->get_gen_fact_nonconst(0);
else
{
// __L = pL[f_id]->get_product(prod_mod); // disabled because GREEDY_ALL_BEST_GENMAT is slower than DEFAULT_L2R for Hadamard factorization
__L = pL[f_id]->get_product();
_L = &__L;
}
}
if(pR_sz > 0 && pL_sz > 0)
{
// compute error = m_lambda*L*S*R-data
facts = { _L, &D, _R };
mul_3_facts(facts, tmp, (FPP) lambda, (FPP) -1.0);
if(sc.isCriterionErr())
error = tmp.norm();
// compute m_lambda/c * L'*error*R'
facts = { _L, &tmp, _R };
tc_flags = {'H', 'N', 'H'};
}
else if(pR_sz > 0)
{
// compute error = m_lambda*L*S*R-data
facts = { &D, _R };
mul_3_facts(facts, tmp, (FPP) lambda, (FPP) -1.0);
if(sc.isCriterionErr())
error = tmp.norm();
// compute m_lambda/c * L'*error*R'
facts = { &tmp, _R };
tc_flags = { 'N', 'H'};
}
else //if(pL_sz > 0)
{
// compute error = m_lambda*L*S*R-data
facts = { _L, &D};
mul_3_facts(facts, tmp, (FPP) lambda, (FPP) -1.0);
if(sc.isCriterionErr())
error = tmp.norm();
// compute m_lambda/c * L'*error*R'
facts = { _L, &tmp};
tc_flags = {'H', 'N'};
}
// mul_3_facts(facts, D, (FPP) - lambda/c, (FPP)1, tc_flags); // this one has showed error in calculation when using complex matrices (DFT factorization)
// do it two steps: 1) compute the gradient, then 2) apply it separately to D (the current factor)
mul_3_facts(facts, grad_over_c, (FPP) lambda/c, (FPP)0, tc_flags);
D -= grad_over_c;
}
template<typename FPP, FDevice DEVICE>
void Faust::update_lambda(Faust::TransformHelper<FPP,DEVICE>& S, std::vector<TransformHelper<FPP,DEVICE>*> &pL, std::vector<TransformHelper<FPP,DEVICE>*>& pR, const MatDense<FPP, DEVICE> &A_H, Real<FPP>& lambda, bool no_lambda_error/*= false*/)
{
Faust::MatDense<FPP,DEVICE> A_H_S;
MatDense<FPP, DEVICE> S_mat;
FPP tr; // A_H_S trace
Real<FPP> nS; // S fro. norm
auto n = S.size();
bool packing_RL = (pR[0] == nullptr || pR[0]->size() == 1) && (pL[n-1] == nullptr || pL[n-1]->size() == 1);
// compute S full matrix
if(packing_RL)
{
if(pR[0] == nullptr || pL[n-1] == nullptr)
throw std::logic_error("update_lambda: pR and pL weren't properly initialized.");
// optimize S product by re-using R[0] or L[0] (S == S[0]*R[0] or S == L[n-1]*S[n-1])
if(S.get_gen_fact(0)->getNbCol() * pR[0]->getNbRow() < pL[n-1]->getNbCol() * S.get_gen_fact(n-1)->getNbRow())
{
auto S0 = { S.get_gen_fact(0) };
TransformHelper<FPP, DEVICE> _S(S0, *pR[0]);
_S.get_product(S_mat);
}
else
{
auto Sn = { S.get_gen_fact(n-1) };
TransformHelper<FPP, DEVICE> _S(*pL[n-1], Sn);
_S.get_product(S_mat);
}
}
else
S.get_product(S_mat);
gemm(A_H, S_mat, A_H_S, (FPP) 1.0, (FPP) 0.0, 'N', 'N');
tr = A_H_S.trace();
nS = S_mat.norm();
if(FPP(0) == nS)
if(no_lambda_error)
{
// don't change lambda
std::cout << "WARNING: lambda didn't change because S Fro. norm is zero." << std::endl;
return;
}
else
throw std::runtime_error("Error in update_lambda: S Frobenius norm is zero, can't compute lambda.");
if(std::isnan(std::real(tr)) || std::isnan(nS))
if(no_lambda_error)
{
// don't change lambda
std::cout << "WARNING: lambda didn't change because S contains NaN." << std::endl;
return;
}
else
throw std::runtime_error("Error in update_lambda: S (the Faust) contains nan elements in at least one of its matrices, can't compute lambda.");
lambda = std::real(tr)/(nS*nS);
}
template<typename FPP>
Real<FPP> Faust::compute_double_spectralNorm(MatDense<FPP, Cpu>& mat, int norm2_max_iter, double norm2_threshold)
{
int flag;
MatDense<Real<FPP>, Cpu> rmat;
mat.real(rmat);
Faust::MatDense<double, Cpu> dmat;
dmat.resize(mat.getNbRow(), mat.getNbCol());
for(int i=0;i < dmat.getNbRow(); i++)
for(int j=0;j < dmat.getNbRow(); j++)
dmat.getData()[j*dmat.getNbRow()+i] = (double) rmat(i,j);
auto n = dmat.spectralNorm(norm2_max_iter, norm2_threshold, flag);
return (Real<FPP>)n;
}
#ifdef USE_GPU_MOD
template<typename FPP>
Real<FPP> Faust::compute_double_spectralNorm(MatDense<FPP, GPU2>& mat, int norm2_max_iter, double norm2_threshold)
{
int flag;
Faust::MatDense<Real<FPP>, GPU2> rmat(mat.getNbRow(), mat.getNbCol(), nullptr, true);
mat.real(rmat);
Faust::MatDense<Real<FPP>, Cpu> cpu_rmat;
rmat.tocpu(cpu_rmat);
return compute_double_spectralNorm(cpu_rmat, norm2_max_iter, norm2_threshold);
}
#endif
template<typename FPP, FDevice DEVICE>
void Faust::update_fact(
Faust::MatGeneric<FPP,DEVICE>* cur_fac,
int f_id,
const Faust::MatDense<FPP,DEVICE>& A,
Faust::TransformHelper<FPP,DEVICE>& S,
std::vector<TransformHelper<FPP,DEVICE>*> &pL,
std::vector<TransformHelper<FPP,DEVICE>*> &pR,
const bool packing_RL,
const bool is_verbose,
const Faust::ConstraintGeneric &constraint,
const int norm2_max_iter,
const Real<FPP>& norm2_threshold,
std::chrono::duration<double>& norm2_duration,
std::chrono::duration<double>& fgrad_duration,
const bool constant_step_size,
const Real<FPP> step_size,
const StoppingCriterion<Real<FPP>>& sc,
Real<FPP> &error,
const FactorsFormat factors_format,
const int prod_mod,
Real<FPP> &c,
const Real<FPP>& lambda,
bool use_grad1/*= false*/)
{
int norm2_flag;
std::chrono::time_point<std::chrono::high_resolution_clock> spectral_stop, spectral_start;
std::chrono::time_point<std::chrono::high_resolution_clock> fgrad_stop, fgrad_start;
Faust::MatSparse<FPP,DEVICE>* scur_fac = nullptr;
Faust::MatDense<FPP,DEVICE>* dcur_fac = nullptr;
Faust::MatDense<FPP,DEVICE> D;
Faust::MatSparse<FPP,DEVICE> spD;
Real<FPP> nR=1,nL=1;
if(constant_step_size)
c = 1 / step_size;
else
{
if(is_verbose)
spectral_start = std::chrono::high_resolution_clock::now();
if(pR[f_id]->size() > 0)
{
nR = pR[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
if(std::is_same<FPP, float>::value && std::isnan(nR))
{
auto M = pR[f_id]->get_product();
nR = compute_double_spectralNorm(M, norm2_max_iter, norm2_threshold);
if(is_verbose)
std::cout << "Corrected R NaN float 2-norm by recomputing as double 2-norm" << nR << std::endl;
}
}
if(pL[f_id]->size() > 0)
{
nL = pL[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
if(std::is_same<FPP, float>::value && std::isnan(nL))
{
auto M = pL[f_id]->get_product();
nL = compute_double_spectralNorm(M, norm2_max_iter, norm2_threshold);
if(is_verbose)
std::cout << "Corrected L NaN float 2-norm by recomputing as double 2-norm:" << nL << std::endl;
}
}
if(std::isnan(nL) || std::isnan(nR))
{
std::cout << "R 2-norm:" << nR << std::endl;
std::cout << "L 2-norm:" << nL << std::endl;
std::cout << "S id:" << f_id << std::endl;
throw std::runtime_error("2-norm computation error: R or L 2-norm is NaN.");
}
if(is_verbose)
{
spectral_stop = std::chrono::high_resolution_clock::now();
norm2_duration += spectral_stop-spectral_start;
}
c = LIPSCHITZ_MULTIPLICATOR*lambda*lambda*nR*nR*nL*nL;
}
if(S.is_fact_sparse(f_id))
{
scur_fac = dynamic_cast<Faust::MatSparse<FPP,DEVICE>*>(cur_fac);
D = *scur_fac;
dcur_fac = nullptr;
}
else
{
dcur_fac = dynamic_cast<Faust::MatDense<FPP,DEVICE>*>(cur_fac); // TOFIX: possible it is not Dense... but MatDiag (sanity check on function start)
D = *dcur_fac;
scur_fac = nullptr;
}
if(is_verbose)
fgrad_start = std::chrono::high_resolution_clock::now();
if(typeid(D) != typeid(Faust::MatDense<FPP,Cpu>) || use_grad1)
// compute_n_apply_grad2 is not yet supported with GPU2
compute_n_apply_grad1(f_id, A, S, pL, pR, packing_RL, lambda, c, D, sc, error, prod_mod);
else
compute_n_apply_grad2(f_id, A, S, pL, pR, packing_RL, lambda, c, D, sc, error, prod_mod);
if(is_verbose)
{
fgrad_stop = std::chrono::high_resolution_clock::now();
fgrad_duration += fgrad_stop-fgrad_start;
}
// Update the S factor
if(factors_format == AllDynamic)
{
// new_fac is a MatGeneric but underliying concrete object can be a MatSparse or a MatDense
// new_fac is allocated in the heap
// replace the former fact by the new one
auto new_fac = constraint.project_gen<FPP,DEVICE,Real<FPP>>(D);
S.replace(new_fac, f_id);
}
else // factors_format == AllDense or AllSparse
{
constraint.project<FPP,DEVICE,Real<FPP>>(D);
// D is the prox image (always a MatDense
// convert D to the proper format (MatSparse or MatDense)
if(factors_format == AllSparse && dcur_fac != nullptr || factors_format == AllDense && scur_fac != nullptr)
throw std::runtime_error("Current factor is inconsistent with the configured factors_format.");
if(factors_format == AllSparse)
{
// convert to sparse then update
spD = D;
S.update(spD, f_id); // update is at higher level than a simple assignment
}
else
{ // factors_format == AllDense
// directly update (D is a MatDense)
S.update(D, f_id);
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment