Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 2fbb2a40 authored by hhakim's avatar hhakim
Browse files

Make palm4msa2020 (impl2) capable to do Faust/matrices muls on GPU. Update the test for it.

parent e6aa48a2
Branches
Tags
No related merge requests found
...@@ -56,8 +56,9 @@ int main(int argc, char* argv[]) ...@@ -56,8 +56,9 @@ int main(int argc, char* argv[])
} }
Faust::MatDense<FPP,Cpu> data; Faust::MatDense<FPP,Cpu> data;
char configPalm2Filename[] = "@FAUST_DATA_MAT_DIR@/config_compared_palm2.mat"; // char configPalm2Filename[] = "@FAUST_DATA_MAT_DIR@/config_compared_palm2.mat";
char configPalm2Filename[] = "config_compared_palm2.mat";
init_faust_mat_from_matio(data, configPalm2Filename, "data"); init_faust_mat_from_matio(data, configPalm2Filename, "data");
Faust::ConstraintInt<FPP,Cpu> c1(CONSTRAINT_NAME_SPLIN, 5, 500, 32); Faust::ConstraintInt<FPP,Cpu> c1(CONSTRAINT_NAME_SPLIN, 5, 500, 32);
Faust::ConstraintFPP<FPP, Cpu, Real<FPP>> c2(CONSTRAINT_NAME_NORMCOL, 1.0, 32, 32); Faust::ConstraintFPP<FPP, Cpu, Real<FPP>> c2(CONSTRAINT_NAME_NORMCOL, 1.0, 32, 32);
...@@ -70,25 +71,48 @@ int main(int argc, char* argv[]) ...@@ -70,25 +71,48 @@ int main(int argc, char* argv[])
// MatDense<FPP,Cpu> H = th->get_product(); // MatDense<FPP,Cpu> H = th->get_product();
// H.Display(); // H.Display();
// palm4msa<FPP,Cpu>(H, constraints, facts, lambda, 30, true); // palm4msa<FPP,Cpu>(H, constraints, facts, lambda, 30, true);
// MatDense<FPP, Cpu>* H = Faust::MatDense<FPP,Cpu>::randMat(500,32); // MatDense<FPP, Cpu>* H = Faust::MatDense<FPP,Cpu>::randMat(500,32);
// H->Display(); // H->Display();
data.Display(); data.Display();
bool use_csr = false, is_update_way_R2L = false; bool use_csr = false, is_update_way_R2L = false;
bool packing_RL = true; bool packing_RL = true;
packing_RL = false;
int nites = 200; int nites = 200;
StoppingCriterion<Real<FPP>> sc(nites); StoppingCriterion<Real<FPP>> sc(nites);
bool compute_2norm_on_array = false; bool compute_2norm_on_array = false;
Real<FPP> norm2_threshold = 10e-16; Real<FPP> norm2_threshold = 10e-16;
int norm2_max_iter = 1000; int norm2_max_iter = 1000;
int meth = 2;
bool on_gpu = false;
if(argc > 1) if(argc > 1)
{
meth = std::atoi(argv[1]);
if(argc > 2)
on_gpu = true;
}
cout << "on_gpu=" << on_gpu << endl;
#ifdef USE_GPU_MOD
if(on_gpu) Faust::enable_gpu_mod();
#else
if(on_gpu)
{
std::cerr << "The test is not compiled with gpu_mod (USE_GPU_MOD not defined)." << std::endl;
exit(2);
}
#endif
if(meth == 1)
{ {
cout << "use impl1" << endl; cout << "use impl1" << endl;
Faust::palm4msa<FPP,Cpu>(data, constraints, facts, lambda, sc, is_update_way_R2L, use_csr, compute_2norm_on_array, norm2_threshold, norm2_max_iter); Faust::palm4msa<FPP,Cpu>(data, constraints, facts, lambda, sc, is_update_way_R2L, use_csr, compute_2norm_on_array, norm2_threshold, norm2_max_iter, false, FAUST_PRECISION, on_gpu);
} }
else else if(meth == 2)
{ {
cout << "use impl2" << endl; cout << "use impl2" << endl;
Faust::palm4msa2<FPP,Cpu>(data, constraints, facts, lambda, sc, is_update_way_R2L, use_csr, packing_RL, compute_2norm_on_array, norm2_threshold, norm2_max_iter); Faust::palm4msa2<FPP,Cpu>(data, constraints, facts, lambda, sc, is_update_way_R2L, use_csr, packing_RL, compute_2norm_on_array, norm2_threshold, norm2_max_iter, false, FAUST_PRECISION, on_gpu);
}
else
{
std::cerr << "meth (arg 1) must be 1 or 2." << endl;
} }
// auto left_th_2 = th->left(2); // auto left_th_2 = th->left(2);
// left_th_2->display(); // left_th_2->display();
...@@ -107,6 +131,5 @@ int main(int argc, char* argv[]) ...@@ -107,6 +131,5 @@ int main(int argc, char* argv[])
std::cout<<"**************** RELATIVE ERROR BETWEEN FAUST AND DATA MATRIX **************** "<<std::endl; std::cout<<"**************** RELATIVE ERROR BETWEEN FAUST AND DATA MATRIX **************** "<<std::endl;
std::cout<< "\t\t" << relativeError<<std::endl<<std::endl; std::cout<< "\t\t" << relativeError<<std::endl<<std::endl;
return 0; return 0;
} }
...@@ -31,7 +31,8 @@ namespace Faust ...@@ -31,7 +31,8 @@ namespace Faust
const bool compute_2norm_on_array=false, const bool compute_2norm_on_array=false,
const Real<FPP> norm2_threshold=FAUST_PRECISION, const Real<FPP> norm2_threshold=FAUST_PRECISION,
const unsigned int norm2_max_iter=FAUST_NORM2_MAX_ITER, const unsigned int norm2_max_iter=FAUST_NORM2_MAX_ITER,
const bool constant_step_size=false, const Real<FPP> step_size=FAUST_PRECISION); const bool constant_step_size=false, const Real<FPP> step_size=FAUST_PRECISION,
const bool on_gpu=false);
template <typename FPP, FDevice DEVICE> template <typename FPP, FDevice DEVICE>
void palm4msa2( void palm4msa2(
...@@ -51,7 +52,8 @@ namespace Faust ...@@ -51,7 +52,8 @@ namespace Faust
const bool compute_2norm_on_array=false, const bool compute_2norm_on_array=false,
const Real<FPP> norm2_threshold=FAUST_PRECISION, const Real<FPP> norm2_threshold=FAUST_PRECISION,
const unsigned int norm2_max_iter=FAUST_NORM2_MAX_ITER, const unsigned int norm2_max_iter=FAUST_NORM2_MAX_ITER,
const bool constant_step_size=false, const Real<FPP> step_size=FAUST_PRECISION); const bool constant_step_size=false, const Real<FPP> step_size=FAUST_PRECISION,
const bool on_gpu=false);
/** /**
...@@ -66,8 +68,10 @@ namespace Faust ...@@ -66,8 +68,10 @@ namespace Faust
void fill_of_eyes(Faust::TransformHelper<FPP,DEVICE>& S, void fill_of_eyes(Faust::TransformHelper<FPP,DEVICE>& S,
const unsigned int nfacts, const unsigned int nfacts,
const bool sparse, const bool sparse,
const std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims); const std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims,
const bool on_gpu=false);
//TODO: maybe move this function in a Faust::utils module (for now it serves only for PALM4MSA) //TODO: maybe move this function in a Faust::utils module (for now it serves only for PALM4MSA)
} }
#include "faust_palm4msa2020.hpp" #include "faust_palm4msa2020.hpp"
#include "faust_palm4msa2020_2.hpp"
#endif #endif
...@@ -10,7 +10,8 @@ void Faust::palm4msa(const Faust::MatDense<FPP,DEVICE>& A, ...@@ -10,7 +10,8 @@ void Faust::palm4msa(const Faust::MatDense<FPP,DEVICE>& A,
const bool compute_2norm_on_array, const bool compute_2norm_on_array,
const Real<FPP> norm2_threshold, const Real<FPP> norm2_threshold,
const unsigned int norm2_max_iter, const unsigned int norm2_max_iter,
const bool constant_step_size, const Real<FPP> step_size) const bool constant_step_size, const Real<FPP> step_size,
const bool on_gpu /*=false*/)
{ {
if(constraints.size() == 0) if(constraints.size() == 0)
throw out_of_range("No constraint passed to palm4msa."); throw out_of_range("No constraint passed to palm4msa.");
...@@ -236,232 +237,19 @@ void Faust::palm4msa(const Faust::MatDense<FPP,DEVICE>& A, ...@@ -236,232 +237,19 @@ void Faust::palm4msa(const Faust::MatDense<FPP,DEVICE>& A,
} }
} }
template <typename FPP, FDevice DEVICE>
void Faust::palm4msa2(const Faust::MatDense<FPP,DEVICE>& A,
std::vector<Faust::ConstraintGeneric*> & constraints,
Faust::TransformHelper<FPP,DEVICE>& S,
Real<FPP>& lambda, //TODO: FPP lambda ? is useful to have a complex lamdba ?
//const unsigned int nites,
const StoppingCriterion<Real<FPP>>& sc,
const bool is_update_way_R2L,
const bool use_csr,
const bool packing_RL,
const bool compute_2norm_on_array,
const Real<FPP> norm2_threshold,
const unsigned int norm2_max_iter,
const bool constant_step_size, const Real<FPP> step_size)
{
if(constraints.size() == 0)
throw out_of_range("No constraint passed to palm4msa.");
const Real<FPP> lipschitz_multiplicator = 1.001;
Faust::MatGeneric<FPP,DEVICE>* cur_fac;
Faust::MatSparse<FPP,DEVICE>* scur_fac;
Faust::MatDense<FPP,DEVICE>* dcur_fac;
unsigned int nfacts = constraints.size();
Real<FPP> error = -1; //negative error is ignored
std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims;
int norm2_flag; // return val
for(auto c: constraints)
dims.push_back(make_pair(c->get_rows(), c->get_cols()));
//TODO: make it possible to have a MatSparse A
Faust::MatDense<FPP,DEVICE> A_H = A;
A_H.adjoint();
if(S.size() != nfacts)
fill_of_eyes(S, nfacts, use_csr, dims);
int i = 0, f_id;
std::function<void()> init_ite, next_fid;
std::function<bool()> updating_facs;
std::function<bool()> is_last_fac_updated;
// packed Fausts corresponding to each factor
std::vector<TransformHelper<FPP,Cpu>*> pL, pR;
pL.resize(nfacts);// pL[i] is the Faust for all factors to the left of the factor *(S.begin()+i)
pR.resize(nfacts);// pR[i] the same for the right of S_i
for(int i=0;i<nfacts;i++)
{
pL[i] = new TransformHelper<FPP,Cpu>();
pR[i] = new TransformHelper<FPP,Cpu>();
}
if(is_update_way_R2L)
{
init_ite = [&f_id, &nfacts, &pL, &S, &packing_RL]()
{
//pre-compute left products for each Si
if(pL[0] != nullptr) delete pL[0];
pL[0] = new TransformHelper<FPP,Cpu>(); // empty faust // no factors to the left of *(S.begin())
for(int i=1;i < nfacts; i++)
{
auto vec_Si_minus_1 = { *(S.begin()+i-1) };
if(pL[i] != nullptr) delete pL[i]; //TODO: maybe to replace by a TransformHelper stored in the stack to avoid deleting each time
pL[i] = new TransformHelper<FPP,Cpu>(*pL[i-1], vec_Si_minus_1);
if(packing_RL) pL[i]->pack_factors();
}
// all pL[i] Fausts are composed at most of one factor matrix
f_id = nfacts-1;
};
next_fid = [&f_id, &pR, &S, &packing_RL]()
{
if(f_id > 0)
{
if(pR[f_id-1] != nullptr)
delete pR[f_id-1];
auto vec_Sj = { *(S.begin()+f_id) };
pR[f_id-1] = new Faust::TransformHelper<FPP,Cpu>(vec_Sj, *pR[f_id]);
if(packing_RL) pR[f_id-1]->pack_factors();
}
f_id--;
};
is_last_fac_updated = [&f_id]() {return f_id == 0;};
updating_facs = [&f_id]() {return f_id >= 0;};
}
else
{
init_ite = [&f_id, &pR, &S, &packing_RL]()
{
if(pR[S.size()-1] != nullptr) delete pR[S.size()-1];
pR[S.size()-1] = new TransformHelper<FPP,Cpu>(); // empty faust // no factors to the right of *(S.begin()+S.size()]-1)
for(int i=S.size()-2;i >= 0; i--)
{
auto vec_Si_plus_1 = { *(S.begin()+i+1) };
if(pR[i] != nullptr) delete pR[i];
pR[i] = new TransformHelper<FPP,Cpu>(vec_Si_plus_1, *pR[i+1]);
if(packing_RL) pR[i]->pack_factors();
}
f_id = 0;
};
next_fid = [&f_id, &S, &pL, &nfacts, packing_RL]()
{
if(f_id < nfacts-1)
{
if(pL[f_id+1] != nullptr)
delete pL[f_id+1];
auto vec_Sj = { *(S.begin()+f_id) };
pL[f_id+1] = new Faust::TransformHelper<FPP,Cpu>(*pL[f_id], vec_Sj);
if(packing_RL) pL[f_id+1]->pack_factors();
}
f_id++;
};
updating_facs = [&f_id, &nfacts]() {return f_id < nfacts;};
is_last_fac_updated = [&f_id, &nfacts]() {return f_id == nfacts-1;};
}
Faust::MatDense<FPP,Cpu> D, tmp;
Faust::MatDense<FPP,Cpu> * LorR;
Faust::MatDense<FPP,Cpu> _LorR;
Real<FPP> c = 1/step_size;
while(sc.do_continue(i, error))
{
// std::cout << "nfacts:" << nfacts << std::endl;
init_ite();
while(updating_facs())
{
// std::cout << "f_id: " << f_id << std::endl;
cur_fac = S.get_gen_fact_nonconst(f_id);
Real<FPP> nR=1,nL=1;
if(! constant_step_size)
{
if(pR[f_id]->size() > 0)
nR = pR[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
if(pL[f_id]->size() > 0)
nL = pL[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
c = lipschitz_multiplicator*lambda*lambda*nR*nR*nL*nL;
}
auto S_j_vec = {*(S.begin()+f_id)};
Faust::TransformHelper<FPP, Cpu> _LSR(*pL[f_id], S_j_vec, *pR[f_id]);
// tmp = _LSR.get_product();
_LSR.get_product(tmp);
tmp *= FPP(lambda);
tmp -= A;
if(sc.isCriterionErr())
error = tmp.norm();
FPP alpha_R = 1, alpha_L = 1, beta_R = 0, beta_L = 0; //decl in parent scope
if(S.is_fact_sparse(f_id))
{
scur_fac = dynamic_cast<Faust::MatSparse<FPP,Cpu>*>(cur_fac);
D = *scur_fac;
dcur_fac = nullptr;
}
else
{
dcur_fac = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(cur_fac); // TOFIX: possible it is not Dense... but MatDiag (sanity check on function start)
D = *dcur_fac;
scur_fac = nullptr;
}
if(pR[f_id]->size() > 0)
{
if(packing_RL)
LorR = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(pR[f_id]->get_gen_fact_nonconst(0)); //normally pR[f_id] is packed (hence reduced to a single MatDense)
else
{
_LorR = pR[f_id]->get_product();
LorR = &_LorR;
}
if(pL[f_id]->size() == 0)
{ //no L factor for factor f_id
alpha_R = - lambda/c;
beta_R = 1;
gemm(tmp, *LorR, D, alpha_R, beta_R, 'N', 'H');
}
else
gemm(tmp, *LorR, tmp, alpha_R, beta_R, 'N', 'H');
}
if(pL[f_id]->size() > 0)
{
if(packing_RL)
LorR = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(pL[f_id]->get_gen_fact_nonconst(0));
else
{
_LorR = pL[f_id]->get_product();
LorR = &_LorR;
}
alpha_L = -lambda/c;
beta_L = 1;
gemm(*LorR, tmp, D, alpha_L, beta_L, 'H', 'N');
}
// really update now
constraints[f_id]->project<FPP,DEVICE,Real<FPP>>(D);
// cout << "scur_fac=" << scur_fac << " dcur_fac=" << dcur_fac << endl;
if(! use_csr && scur_fac == nullptr)
{
// not CSR and cur_fac DENSE
*dcur_fac = D;
}
else if(use_csr && dcur_fac == nullptr)
{
// CSR and cur_fac SPARSE
*scur_fac = D;
}
else
throw std::runtime_error("Current factor is inconsistent with use_csr.");
cur_fac->set_id(false);
next_fid(); //f_id updated to iteration factor index (pL or pR too)
}
//update lambda
//TODO: variable decl in parent scope
Faust::MatDense<FPP,DEVICE> A_H_S = S.multiply(A_H);
// auto last_Sfac_vec = { *(S.begin()+nfacts-1), dynamic_cast<Faust::MatGeneric<FPP,Cpu>*>(&A_H)};
// Faust::TransformHelper<FPP,Cpu> A_H_S_(*pL[nfacts-1], last_Sfac_vec);
// A_H_S_.disable_dtor();
// Faust::MatDense<FPP,DEVICE> A_H_S = A_H_S_.get_product();
Real<FPP> trr = std::real(A_H_S.trace());
Real<FPP> n = S.normFro();
lambda = trr/(n*n);
// std::cout << "debug lambda: " << lambda << std::endl;
i++;
}
S.update_total_nnz();
}
template <typename FPP, FDevice DEVICE> template <typename FPP, FDevice DEVICE>
void Faust::fill_of_eyes( void Faust::fill_of_eyes(
Faust::TransformHelper<FPP,DEVICE>& S, Faust::TransformHelper<FPP,DEVICE>& S,
const unsigned int nfacts, const unsigned int nfacts,
const bool sparse, const bool sparse,
const std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims) const std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims,
const bool on_gpu)
{ {
if(S.size() > 0) if(S.size() > 0)
throw std::runtime_error("The Faust must be empty for intializing it to eyes factors."); throw std::runtime_error("The Faust must be empty for intializing it to eyes factors.");
for(auto fdims : dims) for(auto fdims : dims)
{ {
// std::cout << "fill_of_eyes() fdims: " << fdims.first << " " << fdims.second << std::endl;
// init all facts as identity matrices // init all facts as identity matrices
// with proper dimensions // with proper dimensions
Faust::MatGeneric<FPP,DEVICE>* fact; Faust::MatGeneric<FPP,DEVICE>* fact;
...@@ -479,4 +267,5 @@ void Faust::fill_of_eyes( ...@@ -479,4 +267,5 @@ void Faust::fill_of_eyes(
} }
S.push_back(fact); //TODO: copying=false S.push_back(fact); //TODO: copying=false
} }
if(on_gpu) S.set_mul_order_opt_mode(10);
} }
template <typename FPP, FDevice DEVICE>
void Faust::palm4msa2(const Faust::MatDense<FPP,DEVICE>& A,
std::vector<Faust::ConstraintGeneric*> & constraints,
Faust::TransformHelper<FPP,DEVICE>& S,
Real<FPP>& lambda, //TODO: FPP lambda ? is useful to have a complex lamdba ?
//const unsigned int nites,
const StoppingCriterion<Real<FPP>>& sc,
const bool is_update_way_R2L,
const bool use_csr,
const bool packing_RL,
const bool compute_2norm_on_array,
const Real<FPP> norm2_threshold,
const unsigned int norm2_max_iter,
const bool constant_step_size, const Real<FPP> step_size,
const bool on_gpu /*=false*/)
{
double norm1, norm2;
std::cout << "palm4msa2 "<< std::endl;
std::cout << "on_gpu: " << on_gpu << std::endl;
if(constraints.size() == 0)
throw out_of_range("No constraint passed to palm4msa.");
const Real<FPP> lipschitz_multiplicator = 1.001;
Faust::MatGeneric<FPP,DEVICE>* cur_fac;
Faust::MatSparse<FPP,DEVICE>* scur_fac;
Faust::MatDense<FPP,DEVICE>* dcur_fac;
unsigned int nfacts = constraints.size();
Real<FPP> error = -1; //negative error is ignored
std::vector<std::pair<faust_unsigned_int,faust_unsigned_int>> dims;
int norm2_flag; // return val
for(auto c: constraints)
dims.push_back(make_pair(c->get_rows(), c->get_cols()));
//TODO: make it possible to receive a MatSparse A
Faust::MatDense<FPP,DEVICE> A_H = A;
A_H.adjoint();
if(S.size() != nfacts)
fill_of_eyes(S, nfacts, use_csr, dims, on_gpu);
int i = 0, f_id;
std::function<void()> init_ite, next_fid;
std::function<bool()> updating_facs;
std::function<bool()> is_last_fac_updated;
// packed Fausts corresponding to each factor
std::vector<TransformHelper<FPP,Cpu>*> pL, pR;
pL.resize(nfacts);// pL[i] is the Faust for all factors to the left of the factor *(S.begin()+i)
pR.resize(nfacts);// pR[i] the same for the right of S_i
for(int i=0;i<nfacts;i++)
{
pL[i] = new TransformHelper<FPP,Cpu>();
pR[i] = new TransformHelper<FPP,Cpu>();
}
if(is_update_way_R2L)
{
init_ite = [&f_id, &nfacts, &pL, &S, &packing_RL, &on_gpu]()
{
//pre-compute left products for each Si
if(pL[0] != nullptr) delete pL[0];
pL[0] = new TransformHelper<FPP,Cpu>(); // empty faust // no factors to the left of *(S.begin())
for(int i=1;i < nfacts; i++)
{
auto vec_Si_minus_1 = { *(S.begin()+i-1) };
if(pL[i] != nullptr) delete pL[i]; //TODO: maybe to replace by a TransformHelper stored in the stack to avoid deleting each time
pL[i] = new TransformHelper<FPP,Cpu>(*pL[i-1], vec_Si_minus_1);
// if the ctor args are GPU-enabled so is pL[i]
// if(on_gpu) assert(10 == pL[i]->get_mul_order_opt_mode());
if(packing_RL) pL[i]->pack_factors();
}
// all pL[i] Fausts are composed at most of one factor matrix
f_id = nfacts-1;
};
next_fid = [&f_id, &pR, &S, &packing_RL, &on_gpu]()
{
if(f_id > 0)
{
if(pR[f_id-1] != nullptr)
delete pR[f_id-1];
auto vec_Sj = { *(S.begin()+f_id) };
pR[f_id-1] = new Faust::TransformHelper<FPP,Cpu>(vec_Sj, *pR[f_id]);
// if(on_gpu) assert(10 == pR[f_id-1]->get_mul_order_opt_mode());
if(packing_RL) pR[f_id-1]->pack_factors();
}
f_id--;
};
is_last_fac_updated = [&f_id]() {return f_id == 0;};
updating_facs = [&f_id]() {return f_id >= 0;};
}
else
{
init_ite = [&f_id, &pR, &S, &packing_RL, &on_gpu]()
{
if(pR[S.size()-1] != nullptr) delete pR[S.size()-1];
pR[S.size()-1] = new TransformHelper<FPP,Cpu>(); // empty faust // no factors to the right of *(S.begin()+S.size()]-1)
for(int i=S.size()-2;i >= 0; i--)
{
auto vec_Si_plus_1 = { *(S.begin()+i+1) };
if(pR[i] != nullptr) delete pR[i];
pR[i] = new TransformHelper<FPP,Cpu>(vec_Si_plus_1, *pR[i+1]);
// if(on_gpu) assert(10 == pR[i]->get_mul_order_opt_mode());
if(packing_RL) pR[i]->pack_factors();
}
f_id = 0;
};
next_fid = [&f_id, &S, &pL, &nfacts, packing_RL, &on_gpu]()
{
if(f_id < nfacts-1)
{
if(pL[f_id+1] != nullptr)
delete pL[f_id+1];
auto vec_Sj = { *(S.begin()+f_id) };
pL[f_id+1] = new Faust::TransformHelper<FPP,Cpu>(*pL[f_id], vec_Sj);
// if(on_gpu) assert(10 == pL[f_id+1]->get_mul_order_opt_mode());
if(packing_RL) pL[f_id+1]->pack_factors();
}
f_id++;
};
updating_facs = [&f_id, &nfacts]() {return f_id < nfacts;};
is_last_fac_updated = [&f_id, &nfacts]() {return f_id == nfacts-1;};
}
Faust::MatDense<FPP,Cpu> D, tmp;
Faust::MatSparse<FPP,Cpu> spD;
Faust::MatDense<FPP,Cpu> * LorR;
Faust::MatDense<FPP,Cpu> _LorR;
Real<FPP> c = 1/step_size;
while(sc.do_continue(i, error))
{
// std::cout << "nfacts:" << nfacts << std::endl;
init_ite();
while(updating_facs())
{
// std::cout << "f_id: " << f_id << std::endl;
cur_fac = S.get_gen_fact_nonconst(f_id);
Real<FPP> nR=1,nL=1;
if(! constant_step_size)
{
if(pR[f_id]->size() > 0)
nR = pR[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
if(pL[f_id]->size() > 0)
nL = pL[f_id]->spectralNorm(norm2_max_iter, norm2_threshold, norm2_flag);
c = lipschitz_multiplicator*lambda*lambda*nR*nR*nL*nL;
}
auto S_j_vec = {*(S.begin()+f_id)};
Faust::TransformHelper<FPP, Cpu> _LSR(*pL[f_id], S_j_vec, *pR[f_id]);
// tmp = _LSR.get_product();
_LSR.get_product(tmp);
tmp *= FPP(lambda);
tmp -= A;
if(sc.isCriterionErr())
error = tmp.norm();
FPP alpha_R = 1, alpha_L = 1, beta_R = 0, beta_L = 0; //decl in parent scope
if(S.is_fact_sparse(f_id))
{
scur_fac = dynamic_cast<Faust::MatSparse<FPP,Cpu>*>(cur_fac);
D = *scur_fac;
dcur_fac = nullptr;
}
else
{
dcur_fac = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(cur_fac); // TOFIX: possible it is not Dense... but MatDiag (sanity check on function start)
D = *dcur_fac;
scur_fac = nullptr;
}
if(pR[f_id]->size() > 0)
{
if(packing_RL)
LorR = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(pR[f_id]->get_gen_fact_nonconst(0)); //normally pR[f_id] is packed (hence reduced to a single MatDense)
else
{
_LorR = pR[f_id]->get_product();
LorR = &_LorR;
}
if(pL[f_id]->size() == 0)
{ //no L factor for factor f_id
alpha_R = - lambda/c;
beta_R = 1;
gemm(tmp, *LorR, D, alpha_R, beta_R, 'N', 'H');
}
else
gemm(tmp, *LorR, tmp, alpha_R, beta_R, 'N', 'H');
}
if(pL[f_id]->size() > 0)
{
if(packing_RL)
LorR = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(pL[f_id]->get_gen_fact_nonconst(0));
else
{
_LorR = pL[f_id]->get_product();
LorR = &_LorR;
}
alpha_L = -lambda/c;
beta_L = 1;
gemm(*LorR, tmp, D, alpha_L, beta_L, 'H', 'N');
}
// really update now
constraints[f_id]->project<FPP,DEVICE,Real<FPP>>(D);
if(use_csr && dcur_fac != nullptr || !use_csr && scur_fac != nullptr)
throw std::runtime_error("Current factor is inconsistent with use_csr.");
if(use_csr)
{
spD = D;
S.update(spD, f_id); // update is at higher level than a simple assignment
}
else
S.update(D, f_id);
next_fid(); //f_id updated to iteration factor index (pL or pR too)
}
//update lambda
//TODO: variable decl in parent scope
Faust::MatDense<FPP,DEVICE> A_H_S = S.multiply(A_H);
// auto last_Sfac_vec = { *(S.begin()+nfacts-1), dynamic_cast<Faust::MatGeneric<FPP,Cpu>*>(&A_H)};
// Faust::TransformHelper<FPP,Cpu> A_H_S_(*pL[nfacts-1], last_Sfac_vec);
// A_H_S_.disable_dtor();
// Faust::MatDense<FPP,DEVICE> A_H_S = A_H_S_.get_product();
Real<FPP> trr = std::real(A_H_S.trace());
Real<FPP> n = S.normFro();
lambda = trr/(n*n);
// std::cout << "debug lambda: " << lambda << std::endl;
i++;
}
S.update_total_nnz();
}
#ifndef __FAUST_MAT_DIAG__ #ifndef __FAUST_MAT_DIAG__
#define __FAUST_MAT_DIAG__ #define __FAUST_MAT_DIAG__
#include "matio.h"
#include <Eigen/Core> #include <Eigen/Core>
#include "faust_constant.h" #include "faust_constant.h"
#include "faust_Vect.h" #include "faust_Vect.h"
......
...@@ -66,6 +66,8 @@ namespace Faust { ...@@ -66,6 +66,8 @@ namespace Faust {
template<typename FPP,FDevice DEVICE> class Vect; template<typename FPP,FDevice DEVICE> class Vect;
template<typename FPP,FDevice DEVICE> class MatDense; template<typename FPP,FDevice DEVICE> class MatDense;
template<typename FPP,FDevice DEVICE> class MatGeneric; template<typename FPP,FDevice DEVICE> class MatGeneric;
template<typename FPP> class FaustGPU;
enum FaustMulMode enum FaustMulMode
{ {
...@@ -236,6 +238,10 @@ namespace Faust { ...@@ -236,6 +238,10 @@ namespace Faust {
transf_iterator<FPP> begin() const; transf_iterator<FPP> begin() const;
transf_iterator<FPP> end() const; transf_iterator<FPP> end() const;
#ifdef USE_GPU_MOD
FaustGPU<FPP>* get_gpu_faust();
#endif
void pack_factors(faust_unsigned_int start_id, faust_unsigned_int end_id); void pack_factors(faust_unsigned_int start_id, faust_unsigned_int end_id);
void pack_factors(); void pack_factors();
void pack_factors(const faust_unsigned_int id, const PackDir dir); void pack_factors(const faust_unsigned_int id, const PackDir dir);
...@@ -251,6 +257,7 @@ namespace Faust { ...@@ -251,6 +257,7 @@ namespace Faust {
unsigned long long get_fact_addr(const faust_unsigned_int id) const; unsigned long long get_fact_addr(const faust_unsigned_int id) const;
/* use carefully */ /* use carefully */
MatGeneric<FPP,Cpu>* get_gen_fact_nonconst(const faust_unsigned_int id) const; MatGeneric<FPP,Cpu>* get_gen_fact_nonconst(const faust_unsigned_int id) const;
void update(const MatGeneric<FPP,Cpu>& M, const faust_unsigned_int fact_id);
private: private:
void copy_slices(TransformHelper<FPP, Cpu>* th, const bool transpose = false); void copy_slices(TransformHelper<FPP, Cpu>* th, const bool transpose = false);
const MatGeneric<FPP,Cpu>* get_gen_fact(const faust_unsigned_int id) const; const MatGeneric<FPP,Cpu>* get_gen_fact(const faust_unsigned_int id) const;
......
...@@ -102,6 +102,14 @@ namespace Faust { ...@@ -102,6 +102,14 @@ namespace Faust {
this->is_transposed = right_left_transposed; this->is_transposed = right_left_transposed;
//likewise for the conjugate //likewise for the conjugate
this->is_conjugate = right_left_conjugate; this->is_conjugate = right_left_conjugate;
#ifdef USE_GPU_MOD
if(th_left->gpu_faust && th_right->gpu_faust)
{ // both left and right side Fausts are gpu enabled
// this->transform is freshly created, reuse it to create the gpu_faust object
this->gpu_faust = new FaustGPU<FPP>(this->transform->data);
mul_order_opt_mode = Fv_mul_mode = 10;
}
#endif
} }
template<typename FPP> template<typename FPP>
...@@ -132,6 +140,9 @@ namespace Faust { ...@@ -132,6 +140,9 @@ namespace Faust {
copy_slices(th); copy_slices(th);
this->mul_order_opt_mode = th->mul_order_opt_mode; this->mul_order_opt_mode = th->mul_order_opt_mode;
this->Fv_mul_mode = th->Fv_mul_mode; this->Fv_mul_mode = th->Fv_mul_mode;
#ifdef USE_GPU_MOD
this->gpu_faust = th->gpu_faust;
#endif
} }
template<typename FPP> template<typename FPP>
...@@ -151,6 +162,9 @@ namespace Faust { ...@@ -151,6 +162,9 @@ namespace Faust {
copy_slices(&th); copy_slices(&th);
this->mul_order_opt_mode = th.mul_order_opt_mode; this->mul_order_opt_mode = th.mul_order_opt_mode;
this->Fv_mul_mode = th.Fv_mul_mode; this->Fv_mul_mode = th.Fv_mul_mode;
#ifdef USE_GPU_MOD
this->gpu_faust = th.gpu_faust;
#endif
} }
template<typename FPP> template<typename FPP>
...@@ -168,6 +182,14 @@ namespace Faust { ...@@ -168,6 +182,14 @@ namespace Faust {
eval_sliced_Transform(); eval_sliced_Transform();
this->mul_order_opt_mode = th->mul_order_opt_mode; this->mul_order_opt_mode = th->mul_order_opt_mode;
this->Fv_mul_mode = th->Fv_mul_mode; this->Fv_mul_mode = th->Fv_mul_mode;
#ifdef USE_GPU_MOD
if(th->gpu_faust)
{ // both left and right side Fausts are gpu enabled
// this->transform is freshly created, reuse it to create the gpu_faust object
this->gpu_faust = new FaustGPU<FPP>(this->transform->data);
mul_order_opt_mode = Fv_mul_mode = 10;
}
#endif
} }
template<typename FPP> template<typename FPP>
...@@ -199,6 +221,14 @@ namespace Faust { ...@@ -199,6 +221,14 @@ namespace Faust {
delete[] this->fancy_indices[1]; delete[] this->fancy_indices[1];
this->mul_order_opt_mode = th->mul_order_opt_mode; this->mul_order_opt_mode = th->mul_order_opt_mode;
this->Fv_mul_mode = th->Fv_mul_mode; this->Fv_mul_mode = th->Fv_mul_mode;
#ifdef USE_GPU_MOD
if(th->gpu_faust)
{ // both left and right side Fausts are gpu enabled
// this->transform is freshly created, reuse it to create the gpu_faust object
this->gpu_faust = new FaustGPU<FPP>(this->transform->data);
mul_order_opt_mode = Fv_mul_mode = 10;
}
#endif
} }
#ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL #ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL
template<typename FPP> template<typename FPP>
...@@ -206,6 +236,15 @@ namespace Faust { ...@@ -206,6 +236,15 @@ namespace Faust {
TransformHelper<FPP,Cpu>::TransformHelper(GList& ... t) : TransformHelper<FPP,Cpu>() TransformHelper<FPP,Cpu>::TransformHelper(GList& ... t) : TransformHelper<FPP,Cpu>()
{ {
this->push_back_(t...); this->push_back_(t...);
#ifdef USE_GPU_MOD
// it's difficult here to test if all t are TransformHelper with a gpu_faust enabled (for example t can also be a vector<MatGeneric>)
// so verify directly that cpu matrices copied are loaded in GPU to respect the invariant: a Faust is fully loaded on GPU or is not on GPU at all
if(FaustGPU<FPP>::are_cpu_mat_all_known(this->transform->data))
{
gpu_faust = new FaustGPU<FPP>(this->transform->data);
mul_order_opt_mode = Fv_mul_mode = 10;
}
#endif
} }
#endif #endif
template<typename FPP> template<typename FPP>
...@@ -567,7 +606,6 @@ namespace Faust { ...@@ -567,7 +606,6 @@ namespace Faust {
} }
} }
// backward pass // backward pass
if(! only_forward) if(! only_forward)
for(int i = pth->size()-1; i > 0; i--) for(int i = pth->size()-1; i > 0; i--)
{ {
...@@ -663,6 +701,7 @@ namespace Faust { ...@@ -663,6 +701,7 @@ namespace Faust {
std::cout << " (opt. disabled, default mul.)"; std::cout << " (opt. disabled, default mul.)";
std::cout << std::endl; std::cout << std::endl;
} }
} }
template<typename FPP> template<typename FPP>
...@@ -902,6 +941,35 @@ namespace Faust { ...@@ -902,6 +941,35 @@ namespace Faust {
return this->transform->data[is_transposed?size()-id-1:id]; return this->transform->data[is_transposed?size()-id-1:id];
} }
template<typename FPP>
void TransformHelper<FPP,Cpu>::update(const MatGeneric<FPP,Cpu>& M, const faust_unsigned_int fact_id)
{
MatGeneric<FPP,Cpu>& M_ = const_cast<MatGeneric<FPP,Cpu>&>(M);
// I promise I won't change M
MatSparse<FPP,Cpu> *sp_mat, *sp_fact;
MatDense<FPP,Cpu> *ds_mat, *ds_fact;
MatGeneric<FPP,Cpu>* fact = get_gen_fact_nonconst(fact_id);
if(sp_mat = dynamic_cast<MatSparse<FPP,Cpu>*>(&M_))
{
if(! (sp_fact = dynamic_cast<MatSparse<FPP,Cpu>*>(fact)))
throw std::runtime_error("A sparse factor can't be updated with a dense factor");
*sp_fact = *sp_mat;
}
else if(ds_mat = dynamic_cast<MatDense<FPP,Cpu>*>(&M_))
{
if(! (ds_fact = dynamic_cast<MatDense<FPP,Cpu>*>(fact)))
throw std::runtime_error("A dense factor can't be updated with a dense factor");
*ds_fact = *ds_mat;
}
fact->set_id(M.is_id());
#if USE_GPU_MOD
if(gpu_faust)
{
gpu_faust->update(fact, fact_id);
}
#endif
}
template<typename FPP> template<typename FPP>
unsigned long long TransformHelper<FPP,Cpu>::get_fact_addr(const faust_unsigned_int id) const unsigned long long TransformHelper<FPP,Cpu>::get_fact_addr(const faust_unsigned_int id) const
{ {
...@@ -1553,6 +1621,10 @@ namespace Faust { ...@@ -1553,6 +1621,10 @@ namespace Faust {
// transform is deleted auto. when no TransformHelper uses it (no more weak refs left) // transform is deleted auto. when no TransformHelper uses it (no more weak refs left)
#ifdef FAUST_VERBOSE #ifdef FAUST_VERBOSE
cout << "Destroying Faust::TransformHelper object." << endl; cout << "Destroying Faust::TransformHelper object." << endl;
#endif
#ifdef USE_GPU_MOD
if(gpu_faust != nullptr)
delete gpu_faust;
#endif #endif
} }
...@@ -1688,10 +1760,14 @@ namespace Faust { ...@@ -1688,10 +1760,14 @@ namespace Faust {
//nothing to do except converting to MatDense if start_id //nothing to do except converting to MatDense if start_id
//factor is a MatSparse //factor is a MatSparse
packed_fac = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(*(begin()+start_id)); packed_fac = dynamic_cast<Faust::MatDense<FPP,Cpu>*>(*(begin()+start_id));
if(packed_fac == nullptr) // factor start_id is not at MatDense, convert it if(packed_fac == nullptr)
{// factor start_id is not at MatDense, convert it
packed_fac = new MatDense<FPP,Cpu>(*dynamic_cast<Faust::MatSparse<FPP,Cpu>*>(*(begin()+start_id))); packed_fac = new MatDense<FPP,Cpu>(*dynamic_cast<Faust::MatSparse<FPP,Cpu>*>(*(begin()+start_id)));
}
else else
{
return; //no change return; //no change
}
} }
else else
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment