Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 4c68c41b authored by hhakim's avatar hhakim
Browse files

Renew the SVDTJ algorithm: more accurate error stopping criterion and other enhancements.

- Different automatic t (number of Givens per factor) for U and V.
- Ignore U and V extra factors that don't enhance the error.
- Update unit C++ tests (command line options and pre-defined auto tests).
- Addons and refactoring in Givens* classes.
parent c502359f
Branches
Tags
No related merge requests found
Showing with 970 additions and 282 deletions
...@@ -3,57 +3,40 @@ ...@@ -3,57 +3,40 @@
#include "faust_GivensFGFTGen.h" #include "faust_GivensFGFTGen.h"
#include "faust_SVDTJ.h" #include "faust_SVDTJ.h"
#include <cstdlib> #include <cstdlib>
#include <string>
#include <unistd.h>
typedef @TEST_FPP@ FPP; typedef @TEST_FPP@ FPP;
using namespace std; using namespace std;
using namespace Faust; using namespace Faust;
/** Real<FPP> svdtj_and_error(MatDense<FPP, Cpu>* A, int J, int t, Real<FPP> tol, bool relErr, int order, bool enable_large_Faust, int verb, TransformHelper<FPP,Cpu> **U_=nullptr, TransformHelper<FPP, Cpu> **V_=nullptr, Vect<FPP,Cpu> **S__=nullptr, bool del=true)
* This test verifies the SVD factorization of an random matrix of size m x n.
* We authorize large Fausts for U and V (a lot of Givens factors) because we want to test if it works well.
*/
int main(int argc, char **argv)
{ {
// input
int m, n, min_mn; int m = A->getNbRow(), n = A->getNbCol();
m = 16; auto min_mn = m > n?n:m;
n = 32;
auto J = 1024; // nGivens
if(argc >= 3)
{
m = std::atoi(argv[1]);
n = std::atoi(argv[2]);
if(argc >= 4)
J = std::atoi(argv[3]);
}
cout << "A size m x n: " << m << " x " << n << endl;
cout << "J: " << J << endl;
min_mn = m > n?n:m;
auto A = MatDense<FPP, Cpu>::randMat(m, n);
// auto DFT = TransformHelper<std::complex<double>, Cpu>::fourierFaust(4, false);
// auto A = new MatDense<FPP, Cpu>(DFT->real<double>()->get_product()); // in the heap to match the delete in the end (needed for randMat generated A)
// delete DFT;
#if DEBUG_SVDTJ
A->save_to_mat_file("/tmp/A_cpp.mat", "A");
#endif
auto t = m / 2; // nGivens per factor
if(t > J)
throw runtime_error("t > J"); // TODO: the check should be in eigtj C++ code
bool relErr = true; // use relative error instead of absolute error if tol is not 0
Real<FPP> tol = 0; // not the stopping criterion
int verb = 0;
int order = -1;
bool enable_large_Faust = true;
// output // output
TransformHelper<FPP,Cpu> *U, *V;
Faust::Vect<FPP,Cpu> * S_;
// input svdtj nGivens, tol, order, relerr, nGivens_per_fac, verbosity, enable_large_Faust: 4096 0 descend True None 0 True // input svdtj nGivens, tol, order, relerr, nGivens_per_fac, verbosity, enable_large_Faust: 4096 0 descend True None 0 True
// //
//void Faust::svdtj(MatDense<FPP, DEVICE> & dM, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Faust::Vect<FPP,DEVICE> ** S_) //void Faust::svdtj(MatDense<FPP, DEVICE> & dM, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Faust::Vect<FPP,DEVICE> ** S_)
MatDense<FPP, Cpu> err(*A); MatDense<FPP, Cpu> err(*A);
// warning *A is modified by the svdtj // warning *A is modified by the svdtj
svdtj(*A, J, t, tol, verb, relErr, order, enable_large_Faust, &U, &V, &S_); Vect<FPP, Cpu> * S_;
TransformHelper<FPP, Cpu> * U, *V;
if(! S__)
S__ = &S_;
if(! U_)
U_ = &U;
if(! V_)
V_ = &V;
svdtj(*A, J, t, tol, verb, relErr, order, enable_large_Faust, U_, V_, S__);
U = *U_;
V = *V_;
S_ = *S__;
cout << "U, V number of factors: " << U->size() << ", " << V->size() << endl; cout << "U, V number of factors: " << U->size() << ", " << V->size() << endl;
cout << "U, V RCGs: " << double(m * m) / U->get_total_nnz() << " " << double(n * n) / V->get_total_nnz() << endl; cout << "U, V RCGs: " << double(m * m) / U->get_total_nnz() << " " << double(n * n) / V->get_total_nnz() << endl;
MatDense<FPP, Cpu> S(m, n); MatDense<FPP, Cpu> S(m, n);
...@@ -75,15 +58,219 @@ int main(int argc, char **argv) ...@@ -75,15 +58,219 @@ int main(int argc, char **argv)
#endif #endif
err -= USV_; err -= USV_;
cout << "svdtj err: " << err.norm() / A->norm() << endl; cout << "svdtj err: " << err.norm() / A->norm() << endl;
Real<FPP> norm_err = (A->norm() - S.norm());
if (relErr) norm_err /= A->norm();
cout << "svdtj norm err: " << norm_err << endl;
#if DEBUG_SVDTJ #if DEBUG_SVDTJ
U->save_mat_file("/tmp/U_cpp.mat"); U->save_mat_file("/tmp/U_cpp.mat");
V->save_mat_file("/tmp/V_cpp.mat"); V->save_mat_file("/tmp/V_cpp.mat");
S.save_to_mat_file("/tmp/S_cpp.mat", "S"); S.save_to_mat_file("/tmp/S_cpp.mat", "S");
#endif #endif
delete A;
if(del)
{
delete U;
delete V;
delete S_;
}
return norm_err;
}
void auto_tests()
{
int m, n, min_mn;
int J, t, order;
Real<FPP> tol;
bool verb, relErr, enable_large_Faust;
// first test
m = 128;
n = 64;
J = 1024; // nGivens
tol = 0; // not the stopping criterion if 0
relErr = true; // use relative error instead of absolute error if tol is not 0
t = -1; // nGivens per factor // -1 for automatic (diff for U and V)
verb = 1;
order = -1; // not yet implemented
enable_large_Faust = false;
auto A = MatDense<FPP, Cpu>::randMat(m, n);
TransformHelper<FPP,Cpu> *U, *V;
Vect<FPP,Cpu> * S_;
cout << string(20, '=') << " test 1: J limit" << endl;
auto norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(U->size() == 16); // auto-computed t = 64 for U, 64 * 16 == 1024 == J
delete U;
delete V;
delete S_;
cout << string(20, '=') << " test 2: rel. error" << endl;
J = 0;
tol = 1e-3;
norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(norm_err <= tol);
delete U;
delete V;
delete S_;
cout << string(20, '=') << " test 3: abs. error" << endl;
J = 0;
tol = 1e-3;
relErr = false;
norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(norm_err <= tol);
delete U; delete U;
delete V; delete V;
delete S_; delete S_;
cout << string(20, '=') << " test 4: concurrent J and rel. error" << endl;
J = 128;
tol = 1e-3;
relErr = true;
norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(norm_err <= tol || U->size() * m / 2 <= J); //automatic t = m / 2 for U
cout << string(20, '=') << " test 5: J limit, t=1" << endl;
t = 1;
J = 100;
norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(U->size() == J); // auto-computed t = 1
delete U;
delete V;
delete S_;
cout << string(20, '=') << " test 6: rel. error" << endl;
J = 0;
tol = 1e-3;
relErr = true;
enable_large_Faust = true; //otherwise it doesn't work
norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(norm_err <= tol);
delete U;
delete V;
delete S_;
cout << string(20, '=') << " test 3: abs. error" << endl;
J = 0;
tol = 1e-3;
relErr = false;
enable_large_Faust = true; //otherwise it doesn't work
norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(norm_err <= tol);
delete U;
delete V;
delete S_;
cout << string(20, '=') << " test 4: concurrent J and rel. error" << endl;
J = 128;
tol = 1e-3;
relErr = true;
norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
assert(norm_err <= tol || U->size() == J); //automatic t = m / 2 for U
delete A;
}
/**
* This test verifies the SVD factorization of an random matrix of size m x n.
* We authorize large Fausts for U and V (a lot of Givens factors) because we want to test if it works well.
*/
int main(int argc, char **argv)
{
// default input
int m, n, min_mn;
m = 128;
n = 64;
auto J = 1024; // nGivens
Real<FPP> tol = 0; // not the stopping criterion if 0
bool relErr = true; // use relative error instead of absolute error if tol is not 0
auto t = -1; // nGivens per factor // -1 for automatic (diff for U and V)
int verb = 0;
int order = -1; // not yet implemented
bool enable_large_Faust = false;
if(argc == 1)
{
auto_tests();
return EXIT_SUCCESS;
}
int opt;
const char *optstr = "m:n:J:t:ralo:ve:";
const char *optspec = "-m Anrows -n Ancols -J <int> -t <int> -r(elerr)|-a(bserr) -l(largeFaust) -o <1|-1|0> -v(erbose)";
while ((opt = getopt(argc, argv, optstr)) != -1)
{
switch (opt)
{
case 'n':
n = atoi(optarg);
break;
case 'm':
m = atoi(optarg);
break;
case 'J':
J = atoi(optarg);
break;
case 't':
t = atoi(optarg);
break;
case 'o':
order = atoi(optarg);
break;
case 'r':
relErr = true;
break;
case 'a':
relErr = false;
break;
case 'l':
enable_large_Faust = true;
break;
case 'v':
verb = 1;
break;
case 'e':
tol = atof(optarg);
break;
default: /* '?' */
fprintf(stderr, "Usage: %s %s\n",
argv[0], optspec);
exit(EXIT_FAILURE);
}
}
cout << "A size m x n: " << m << " x " << n << endl;
cout << "J: " << J << endl;
cout << "t: " << t << endl;
cout << "relErr: " << relErr << endl;
cout << "enable_large_Faust: " << enable_large_Faust << endl;
std::cout << "error target (stop crit if not 0):" << tol << std::endl;
min_mn = m > n?n:m;
auto A = MatDense<FPP, Cpu>::randMat(m, n);
#if DEBUG_SVDTJ
A->save_to_mat_file("/tmp/A_cpp.mat", "A");
#endif
if(J != 0 && t > J)
throw runtime_error("t > J"); // TODO: the check should be in eigtj C++ code
svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb);
delete A;
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
...@@ -20,6 +20,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::next_step() ...@@ -20,6 +20,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::next_step()
#endif #endif
(this->*substep[i])(); (this->*substep[i])();
} }
this->ite++;
} }
template<typename FPP, FDevice DEVICE, typename FPP2> template<typename FPP, FDevice DEVICE, typename FPP2>
...@@ -132,14 +133,14 @@ void GivensFGFT<FPP,DEVICE,FPP2>::calc_theta() ...@@ -132,14 +133,14 @@ void GivensFGFT<FPP,DEVICE,FPP2>::calc_theta()
// end // end
// //
#define calc_err(theta) (*this->L)(this->p,this->q)*cos(2*theta) + 0.5*((*this->L)(this->q,this->q) - (*this->L)(this->p,this->p))*sin(2*theta) #define calc_theta_err(theta) (*this->L)(this->p,this->q)*cos(2*theta) + 0.5*((*this->L)(this->q,this->q) - (*this->L)(this->p,this->p))*sin(2*theta)
FPP2 theta1, theta2, err_theta1, err_theta2; FPP2 theta1, theta2, err_theta1, err_theta2;
theta1 = atan2((*this->L)(this->q,this->q) - (*this->L)(this->p,this->p),(2*(*this->L)(this->p,this->q)))/2; theta1 = atan2((*this->L)(this->q,this->q) - (*this->L)(this->p,this->p),(2*(*this->L)(this->p,this->q)))/2;
theta2 = theta1 + M_PI_4; // from cmath theta2 = theta1 + M_PI_4; // from cmath
err_theta1 = calc_err(theta1); err_theta1 = calc_theta_err(theta1);
err_theta2 = calc_err(theta2); err_theta2 = calc_theta_err(theta2);
if(err_theta1 < err_theta2 && !always_theta2) if(err_theta1 < err_theta2 && !always_theta2)
theta = theta1; theta = theta1;
else else
...@@ -181,7 +182,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::update_fact() ...@@ -181,7 +182,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::update_fact()
this->fact_mod_row_ids.push_back(this->q); this->fact_mod_row_ids.push_back(this->q);
this->fact_mod_col_ids.push_back(this->q); this->fact_mod_col_ids.push_back(this->q);
this->fact_mod_values.push_back(c); this->fact_mod_values.push_back(c);
if(this->J == 0) this->facts.resize(this->ite+1); if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
this->facts[this->ite] = MatSparse<FPP,DEVICE>(this->fact_mod_row_ids, this->fact_mod_col_ids, this->fact_mod_values, n, n); this->facts[this->ite] = MatSparse<FPP,DEVICE>(this->fact_mod_row_ids, this->fact_mod_col_ids, this->fact_mod_values, n, n);
this->facts[this->ite].set_orthogonal(true); this->facts[this->ite].set_orthogonal(true);
#ifdef DEBUG_GIVENS #ifdef DEBUG_GIVENS
...@@ -354,19 +355,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::update_err() ...@@ -354,19 +355,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::update_err()
// //
if(!((this->ite+1)%GivensFGFT<FPP,DEVICE,FPP2>::ERROR_CALC_PERIOD) || this->stoppingCritIsError || this->verbosity > 1) if(!((this->ite+1)%GivensFGFT<FPP,DEVICE,FPP2>::ERROR_CALC_PERIOD) || this->stoppingCritIsError || this->verbosity > 1)
{ {
FPP2 err = 0, err_d; auto err = this->calc_err();
for(int i=0;i<this->D.size();i++)
err += this->D(i)*this->D(i);
if(this->Lap_squared_fro_norm == FPP(0))
{
err_d = Faust::fabs(this->Lap.norm());
err_d = err_d*err_d;
this->Lap_squared_fro_norm = err_d;
}
else
err_d = Faust::fabs(this->Lap_squared_fro_norm);
err = Faust::fabs(err_d - err);
if(this->errIsRel) err /= err_d;
if(this->verbosity) if(this->verbosity)
{ {
cout << "factor : "<< this->ite << ", " << ((this->errIsRel)?"relative ":"absolute ") << "err.: " << err; cout << "factor : "<< this->ite << ", " << ((this->errIsRel)?"relative ":"absolute ") << "err.: " << err;
......
...@@ -20,6 +20,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::next_step() ...@@ -20,6 +20,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::next_step()
#endif #endif
(this->*substep[i])(); (this->*substep[i])();
} }
this->ite++;
} }
template<typename FPP, FDevice DEVICE, typename FPP2> template<typename FPP, FDevice DEVICE, typename FPP2>
...@@ -207,7 +208,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::update_fact() ...@@ -207,7 +208,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::update_fact()
this->fact_mod_row_ids.push_back(this->q); this->fact_mod_row_ids.push_back(this->q);
this->fact_mod_col_ids.push_back(this->q); this->fact_mod_col_ids.push_back(this->q);
this->fact_mod_values.push_back(c_qq); this->fact_mod_values.push_back(c_qq);
if(this->J == 0) this->facts.resize(this->ite+1); if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
this->facts[this->ite] = MatSparse<FPP,DEVICE>(this->fact_mod_row_ids, this->fact_mod_col_ids, this->fact_mod_values, n, n); this->facts[this->ite] = MatSparse<FPP,DEVICE>(this->fact_mod_row_ids, this->fact_mod_col_ids, this->fact_mod_values, n, n);
this->facts[this->ite].set_orthogonal(true); this->facts[this->ite].set_orthogonal(true);
#ifdef DEBUG_GIVENS #ifdef DEBUG_GIVENS
...@@ -420,19 +421,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::update_err() ...@@ -420,19 +421,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::update_err()
// //
if(!((this->ite+1)%GivensFGFTComplex<FPP,DEVICE,FPP2>::ERROR_CALC_PERIOD) || this->stoppingCritIsError || this->verbosity > 1) if(!((this->ite+1)%GivensFGFTComplex<FPP,DEVICE,FPP2>::ERROR_CALC_PERIOD) || this->stoppingCritIsError || this->verbosity > 1)
{ {
FPP2 err = 0, err_d; auto err = this->calc_err();
for(int i=0;i<this->D.size();i++)
err += /*Faust::fabs(*/this->D(i)*this->D(i)/*)*/;
if(this->Lap_squared_fro_norm == FPP2(0))
{
err_d = Faust::fabs(this->Lap.norm());
err_d = err_d*err_d;
this->Lap_squared_fro_norm = err_d;
}
else
err_d = Faust::fabs(this->Lap_squared_fro_norm);
err = Faust::fabs(err_d - err);
if(this->errIsRel) err /= err_d;
if(this->verbosity) if(this->verbosity)
{ {
cout << "factor : "<< this->ite << ", " << ((this->errIsRel)?"relative ":"absolute ") << "err.: " << err; cout << "factor : "<< this->ite << ", " << ((this->errIsRel)?"relative ":"absolute ") << "err.: " << err;
......
...@@ -39,7 +39,7 @@ namespace Faust ...@@ -39,7 +39,7 @@ namespace Faust
// Vect<FPP4,DEVICE> C_min_row; // Vect<FPP4,DEVICE> C_min_row;
protected: protected:
/** \brief Fourier matrix/eigenvectors factorization matrices (Givens matrices). */ /** \brief Fourier matrix/eigenvectors factorization matrices (Givens matrices). */
vector<MatSparse<FPP4,DEVICE>> facts; std::vector<MatSparse<FPP4,DEVICE>> facts;
/** \brief L iteration factor: L_i = S^T L_{i-1} S, initialized from Lap (with S being facts[i]). */ /** \brief L iteration factor: L_i = S^T L_{i-1} S, initialized from Lap (with S being facts[i]). */
MatGeneric<FPP4, DEVICE>* L; MatGeneric<FPP4, DEVICE>* L;
...@@ -80,8 +80,6 @@ namespace Faust ...@@ -80,8 +80,6 @@ namespace Faust
/** \brief inverse permutation of ord_indices (needed to retrieve start undefined order). */ /** \brief inverse permutation of ord_indices (needed to retrieve start undefined order). */
vector<int> inv_ord_indices; vector<int> inv_ord_indices;
/** \brief True is the last fact (of facts) has been permuted */
bool last_fact_permuted;
/** \brief Cache for the ordered D. */ /** \brief Cache for the ordered D. */
Vect<FPP,DEVICE> ordered_D; Vect<FPP,DEVICE> ordered_D;
/** \brief true if D has already been ordered (order_D() was called). */ /** \brief true if D has already been ordered (order_D() was called). */
...@@ -230,6 +228,12 @@ namespace Faust ...@@ -230,6 +228,12 @@ namespace Faust
public: public:
/**
* Computes the error norm of D compared to L.
* It can be relative or absolute depending on this->errIsRel.
*/
FPP2 calc_err();
/** /**
* Returns the ordered indices of D to get increasing eigenvalues along the diagonal. * Returns the ordered indices of D to get increasing eigenvalues along the diagonal.
* *
...@@ -316,17 +320,29 @@ namespace Faust ...@@ -316,17 +320,29 @@ namespace Faust
*/ */
const vector<MatSparse<FPP4,DEVICE>>& get_facts() const; const vector<MatSparse<FPP4,DEVICE>>& get_facts() const;
/**
* Number of Givens factors used for the eigenvector approximate so far.
*/
const size_t nfacts() const
{
return this->get_facts().size();
}
/** /**
* Returns a Transform object with copy of facts into it. * Returns a Transform object with copy of facts into it.
* *
* \param ord true to get the Transform's facts ordering the last one columns according to ascendant eigenvalues, false to let facts as they went out from the algorithm (without reordering). * \param ord true to get the Transform's facts ordering the last one columns according to ascending eigenvalues, false to let facts as they went out from the algorithm (without reordering).
*/ */
Transform<FPP4,DEVICE> get_transform(bool ord); Transform<FPP4,DEVICE> get_transform(const bool ord);
/** /**
* Returns the eigen vectors as a Transform t of Givens matrices.
*
* \param ord -1 for descending order, 1 for ascending order. * \param ord -1 for descending order, 1 for ascending order.
*
* Warning: if copy == false and ord == true the caller is responsible to free the last permuted factor of t.
*/ */
Transform<FPP4,DEVICE> get_transform(int ord); Transform<FPP4,DEVICE> get_transform(const int ord, const bool copy=true, const int first_nfacts=-1);
}; };
......
...@@ -19,6 +19,7 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::update_D() ...@@ -19,6 +19,7 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::update_D()
D.Display(); D.Display();
cout << "D fro. norm: " << D.norm() << endl; cout << "D fro. norm: " << D.norm() << endl;
#endif #endif
is_D_ordered = false;
} }
template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4> template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
...@@ -79,7 +80,6 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::compute_facts() ...@@ -79,7 +80,6 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::compute_facts()
while(! stopping && (J == 0 || ite < facts.size())) // when J == 0 the stopping criterion is the error against Lap while(! stopping && (J == 0 || ite < facts.size())) // when J == 0 the stopping criterion is the error against Lap
{ {
next_step(); next_step();
ite++;
if(stopping = (ite > 1 && stoppingCritIsError && errs.size() > 2 && errs[ite-1]-errs[ite-2] > FLT_EPSILON)) if(stopping = (ite > 1 && stoppingCritIsError && errs.size() > 2 && errs[ite-1]-errs[ite-2] > FLT_EPSILON))
/*if(verbosity>0) */cerr << "WARNING: the eigtj algorithm stopped because the last error is greater than the previous one." << endl; /*if(verbosity>0) */cerr << "WARNING: the eigtj algorithm stopped because the last error is greater than the previous one." << endl;
if(stopping || stoppingCritIsError && errs.size() > 0 && (*(errs.end()-1) - stoppingError ) < FLT_EPSILON) if(stopping || stoppingCritIsError && errs.size() > 0 && (*(errs.end()-1) - stoppingError ) < FLT_EPSILON)
...@@ -102,7 +102,7 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::compute_facts() ...@@ -102,7 +102,7 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::compute_facts()
template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4> template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::GivensFGFTGen(MatGeneric<FPP4,DEVICE>* Lap, int J, unsigned int verbosity /* deft val == 0 */, const double stoppingError, const bool errIsRel, const bool enable_large_Faust /* deft to false */) : GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::GivensFGFTGen(MatGeneric<FPP4,DEVICE>* Lap, int J, unsigned int verbosity /* deft val == 0 */, const double stoppingError, const bool errIsRel, const bool enable_large_Faust /* deft to false */) :
facts(J>0?(J*4<Lap->getNbRow()*Lap->getNbRow()||enable_large_Faust?J:0):1 /* don't allocate if the complexity doesn't worth it and enable_large_Faust is false*/), q_candidates(new int[Lap->getNbRow()]), J(J), D(Lap->getNbRow()), errs(0), coord_choices(0), Lap(*Lap), dim_size(Lap->getNbRow()), Lap_squared_fro_norm(0), last_fact_permuted(false), is_D_ordered(false), verbosity(verbosity), stoppingCritIsError(stoppingError != 0.0), stoppingError(stoppingError), errIsRel(errIsRel), enable_large_Faust(enable_large_Faust) facts(J>0?(J*4<Lap->getNbRow()*Lap->getNbRow()||enable_large_Faust?J:0):0 /* don't allocate if the complexity doesn't worth it and enable_large_Faust is false*/), q_candidates(new int[Lap->getNbRow()]), J(J), D(Lap->getNbRow()), errs(0), coord_choices(0), Lap(*Lap), dim_size(Lap->getNbRow()), Lap_squared_fro_norm(0), is_D_ordered(false), verbosity(verbosity), stoppingCritIsError(stoppingError != 0.0), stoppingError(stoppingError), errIsRel(errIsRel), enable_large_Faust(enable_large_Faust), ite(0)
{ {
if(Lap->getNbCol() != Lap->getNbRow()) if(Lap->getNbCol() != Lap->getNbRow())
handleError("Faust::GivensFGFTComplex", "Laplacian must be a square matrix."); handleError("Faust::GivensFGFTComplex", "Laplacian must be a square matrix.");
...@@ -257,44 +257,83 @@ const vector<Faust::MatSparse<FPP4,DEVICE>>& GivensFGFTGen<FPP,DEVICE,FPP2,FPP4> ...@@ -257,44 +257,83 @@ const vector<Faust::MatSparse<FPP4,DEVICE>>& GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>
} }
template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4> template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(bool ord) Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(const bool ord)
{ {
return get_transform(ord?1:0); return get_transform(ord?1:0);
} }
template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4> template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(int ord) Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(const int ord, const bool copy/*=true*/, const int first_nfacts/*=-1*/)
{ {
//TODO: an optimization is possible by changing type of facts to vector<MatGeneric*> it would avoid copying facts into Transform and rather use them directly. It will need a destructor that deletes them eventually if they weren't transfered to a Transform object before. //TODO: facts should be a Transform or a TransformHelper to avoid the risk of memory leak in caller code when ord == true and copy == false
if(facts.size() == 0) if(facts.size() == 0)
throw out_of_range("The transform is empty. The algorithm stopped before computing any factor."); throw out_of_range("The transform is empty. The algorithm stopped before computing any factor.");
MatSparse<FPP4,DEVICE> & last_fact = *(facts.end()-1);
MatSparse<FPP4,DEVICE> P(last_fact.getNbCol(), last_fact.getNbCol()); //last_fact permutation matrix
// (to reorder eigenvector transform according to ordered D)
if(last_fact_permuted) std::vector<MatGeneric<FPP4, DEVICE>*> facts;
{ int end_id;
// get back to undefined order
for(int i=0;i<inv_ord_indices.size();i++) size_t nfacts;
P.setCoeff(inv_ord_indices[i],i, FPP4(1.0)); if(first_nfacts < 0)
last_fact.multiplyRight(P); // we want only effectively computed facts
last_fact_permuted = false; nfacts = this->ite;
} else
// we want the number of facts asked by the caller
nfacts = first_nfacts;
if (ord)
// we process the last factor separately because the columns must be reordered
end_id = nfacts - 1;
else
end_id = nfacts;
for(int i=0; i < end_id; i++)
facts.push_back(&this->facts[i]);
auto t = Faust::Transform<FPP4,DEVICE>(facts, /* lambda */ FPP4(1.0), /* optimizedCopy */false, /* cloning_fact */ copy);
if(! copy)
t.disable_dtor(); // don't want to free facts when t is out of scope because we might still need them in this object
if(ord) if(ord)
{ {
// (reorder eigenvector transform according to ordered D)
MatSparse<FPP4,DEVICE> & last_fact = *(this->facts.begin() + end_id);
auto P = new MatSparse<FPP4, DEVICE>(last_fact.getNbCol(), last_fact.getNbCol()); //last_fact permutation matrix
if(!is_D_ordered || ord != D_order_dir) if(!is_D_ordered || ord != D_order_dir)
order_D(ord); order_D(ord);
for(int i=0;i<ord_indices.size();i++) for(int i=0;i<ord_indices.size();i++)
P.setCoeff(ord_indices[i],i, FPP4(1.0)); P->setCoeff(ord_indices[i],i, FPP4(1.0));
// P.set_orthogonal(true); // P.set_orthogonal(true);
// facts.push_back(P); // we prefer to directly multiply the last factor by P // facts.push_back(P); // we prefer to directly multiply the last factor by P
last_fact_permuted = true; // last_fact.multiplyRight(P);
last_fact.multiplyRight(P); last_fact.multiply(*P, 'N');
t.push_back(P, false, false, false, copy); //default to optimizedCopy=false, transpose=false, conjugate=false, copying=false, verify_dims_agree=true
if(copy) delete P;
// Warning: if copy == false the caller is responsible to free the P
} }
Faust::Transform<FPP4,DEVICE> t = Faust::Transform<FPP4,DEVICE>(facts);
// // remove the permutation factor if added temporarily for reordering
// return ord?facts.erase(facts.end()-1),t:t;
return t; return t;
}
template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
FPP2 GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::calc_err()
{
FPP2 err = 0, err_d;
for(int i=0;i<this->D.size();i++)
err += /*Faust::fabs(*/this->D(i)*this->D(i)/*)*/;
if(this->Lap_squared_fro_norm == FPP2(0))
{
err_d = Faust::fabs(this->Lap.norm());
err_d = err_d*err_d;
this->Lap_squared_fro_norm = err_d;
}
else
err_d = Faust::fabs(this->Lap_squared_fro_norm);
err = Faust::fabs(err_d - err);
if(this->errIsRel) err /= err_d;
return err;
} }
...@@ -41,10 +41,11 @@ void GivensFGFTParallel<FPP,DEVICE,FPP2>::next_step() ...@@ -41,10 +41,11 @@ void GivensFGFTParallel<FPP,DEVICE,FPP2>::next_step()
for(int i=0;i<sizeof(substep)/sizeof(substep_fun);i++) for(int i=0;i<sizeof(substep)/sizeof(substep_fun);i++)
{ {
#ifdef DEBUG_GIVENS #ifdef DEBUG_GIVENS
cout << "GivensFGFTParallel ite=" << this->ite << " substep i=" << i << endl; std::cout << "GivensFGFTParallel ite=" << this->ite << " substep i=" << i << std::endl;
#endif #endif
(this->*substep[i])(); (this->*substep[i])();
} }
this->ite++;
} }
template<typename FPP, FDevice DEVICE, typename FPP2> template<typename FPP, FDevice DEVICE, typename FPP2>
...@@ -75,7 +76,7 @@ void GivensFGFTParallel<FPP,DEVICE,FPP2>::update_fact() ...@@ -75,7 +76,7 @@ void GivensFGFTParallel<FPP,DEVICE,FPP2>::update_fact()
this->fact_mod_row_ids.push_back(this->q); this->fact_mod_row_ids.push_back(this->q);
this->fact_mod_col_ids.push_back(this->q); this->fact_mod_col_ids.push_back(this->q);
this->fact_mod_values.push_back(cos(this->theta)); this->fact_mod_values.push_back(cos(this->theta));
if(this->J == 0) this->facts.resize(this->ite+1); if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
} }
template<typename FPP, FDevice DEVICE, typename FPP2> template<typename FPP, FDevice DEVICE, typename FPP2>
......
...@@ -47,6 +47,7 @@ void GivensFGFTParallelComplex<FPP,DEVICE,FPP2>::next_step() ...@@ -47,6 +47,7 @@ void GivensFGFTParallelComplex<FPP,DEVICE,FPP2>::next_step()
#endif #endif
(this->*substep[i])(); (this->*substep[i])();
} }
this->ite++;
} }
template<typename FPP, FDevice DEVICE, typename FPP2> template<typename FPP, FDevice DEVICE, typename FPP2>
...@@ -97,7 +98,7 @@ void GivensFGFTParallelComplex<FPP,DEVICE,FPP2>::update_fact() ...@@ -97,7 +98,7 @@ void GivensFGFTParallelComplex<FPP,DEVICE,FPP2>::update_fact()
this->fact_mod_row_ids.push_back(this->q); this->fact_mod_row_ids.push_back(this->q);
this->fact_mod_col_ids.push_back(this->q); this->fact_mod_col_ids.push_back(this->q);
this->fact_mod_values.push_back(c_qq); this->fact_mod_values.push_back(c_qq);
if(this->J == 0) this->facts.resize(this->ite+1); if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
} }
template<typename FPP, FDevice DEVICE, typename FPP2> template<typename FPP, FDevice DEVICE, typename FPP2>
......
...@@ -9,34 +9,33 @@ namespace Faust ...@@ -9,34 +9,33 @@ namespace Faust
#if(@SVD_COMPLEX@==1) #if(@SVD_COMPLEX@==1)
template<> template<>
void instantiate_algos<complex<@REAL_TYPE@>, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW1, GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW2, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dM_M, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dMM_,int J, int t, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust) void instantiate_algos<complex<@REAL_TYPE@>, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW1, GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW2, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dM_M, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dMM_,int J1, int J2, int t1, int t2, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust)
{ {
if(t <= 1) if(t1 <= 1)
{ *algoW1 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J1, verbosity, tol, relErr, enable_large_Faust);
*algoW1 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J, verbosity, tol, relErr, enable_large_Faust);
*algoW2 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J, verbosity, tol, relErr, enable_large_Faust);
}
else else
{ *algoW1 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J1, t1, verbosity, tol, relErr, enable_large_Faust);
*algoW1 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J, t, verbosity, tol, relErr, enable_large_Faust);
*algoW2 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J, t, verbosity, tol, relErr, enable_large_Faust); if(t2 <= 1)
} *algoW2 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J2, verbosity, tol, relErr, enable_large_Faust);
else
*algoW2 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J2, t2, verbosity, tol, relErr, enable_large_Faust);
} }
#endif #endif
template<> template<>
void instantiate_algos<@REAL_TYPE@, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW1, GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW2, Faust::MatDense<@REAL_TYPE@,Cpu> &dM_M, Faust::MatDense<@REAL_TYPE@,Cpu> &dMM_,int J, int t, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust) void instantiate_algos<@REAL_TYPE@, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW1, GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW2, Faust::MatDense<@REAL_TYPE@,Cpu> &dM_M, Faust::MatDense<@REAL_TYPE@,Cpu> &dMM_,int J1, int J2, int t1, int t2, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust)
{ {
if(t <= 1) if(t1 <= 1)
{ *algoW1 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J1, verbosity, tol, relErr, enable_large_Faust);
*algoW1 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J, verbosity, tol, relErr, enable_large_Faust); else
*algoW2 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J, verbosity, tol, relErr, enable_large_Faust); *algoW1 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J1, t1, verbosity, tol, relErr, enable_large_Faust);
}
if(t2 <= 1)
*algoW2 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J2, verbosity, tol, relErr, enable_large_Faust);
else else
{ *algoW2 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J2, t2, verbosity, tol, relErr, enable_large_Faust);
*algoW1 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J, t, verbosity, tol, relErr, enable_large_Faust);
*algoW2 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J, t, verbosity, tol, relErr, enable_large_Faust);
}
} }
} }
...@@ -17,12 +17,94 @@ namespace Faust ...@@ -17,12 +17,94 @@ namespace Faust
template<typename FPP, FDevice DEVICE, typename FPP2 = float> template<typename FPP, FDevice DEVICE, typename FPP2 = float>
void svdtj_cplx(MatSparse<FPP, DEVICE> & M, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order, const bool enable_large_Faust,TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S); void svdtj_cplx(MatSparse<FPP, DEVICE> & M, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order, const bool enable_large_Faust,TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S);
template<typename FPP, FDevice DEVICE, typename FPP2 = float>
void svdtj_core_gen(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_);
template<typename FPP, FDevice DEVICE, typename FPP2> /**
void instantiate_algos(GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW1, GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW2, Faust::MatDense<FPP,DEVICE> &dM_M, Faust::MatDense<FPP,DEVICE> &dMM_, int J, int t, unsigned int verbosity, FPP2 tol, bool relErr, bool enable_large_Faust); * \brief Wrapper for blind and step versions of SVDTJ.
}; */
template<typename FPP, FDevice DEVICE, typename FPP2 = float>
void svdtj_core_gen(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_, const bool by_step=true);
/**
* This version runs two eigtjs step by step until the tol error or the number of Givens J is reached.
*/
template<typename FPP, FDevice DEVICE, typename FPP2 = float>
void svdtj_core_gen_step(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_);
/**
* This version runs two eigtjs blindly until they reach the tol error for the eigen decomposition or the number of Givens is reached.
*/
template<typename FPP, FDevice DEVICE, typename FPP2 = float>
void svdtj_core_gen_blind(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_);
template<typename FPP, FDevice DEVICE, typename FPP2>
void instantiate_algos(GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW1, GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW2, Faust::MatDense<FPP,DEVICE> &dM_M, Faust::MatDense<FPP,DEVICE> &dMM_, int J1, int J2, int t1, int t2, unsigned int verbosity, FPP2 tol, bool relErr, bool enable_large_Faust);
/**
* \brief Order S in descending order and permute columns of thW1 and thW2 consequently.
*
* \param order: not yet impl., only descending (-1) order is handled. //TODO
*/
template<typename FPP, FDevice DEVICE>
void svdtj_order_W1_W2_S(const faust_unsigned_int &m, const faust_unsigned_int &n, const int order, const Vect<FPP, DEVICE> &S, Vect<FPP, DEVICE> **S_, TransformHelper<FPP, DEVICE> &thW1, TransformHelper<FPP, DEVICE> &thW2);
/**
* Utility function of svdtj_core_gen_step for computing U'MV (or W1' M W2), method 1 (the simplest way).
*
* This method is not used but is kept as reference because the code goes more an more complicated for methods 2 and 3.
*
* \param dM: (in out) matrix of which we calculate the SVD approximate as a MatDense.
* \param dM_adj: (in out) true if dM is already the adjoint of the original matrix, false otherwise (in which case the adjoint is computed internvally and dM is modified). The goal is to avoid computing the adjoint M many times in svdtj_core_gen_step.
* \param tW1: the "Faust" of Givens matrices for U (W1).
* \param tW2: the "Faust" of Givens matrices for V (W2).
*
* \return the MatDense for W1'*M*W2.
*/
template<typename FPP, FDevice DEVICE>
MatDense<FPP,DEVICE> svdtj_compute_W1H_M_W2_meth1(MatDense<FPP,DEVICE> &dM, bool &dM_adj, const Transform<FPP, DEVICE> &tW1, const Transform<FPP, DEVICE> &tW2);
/**
* Utility function of svdtj_core_gen_step for computing U'MV (or W1' M W2), method 2 (not faster than method 1, just a sketching of method 3).
*
* This method is not used but is kept as reference because the code goes more an more complicated in method 3.
*
* \param dM: matrix of which we calculate the SVD approximate as a MatDense.
* \param tW1: the "Faust" of Givens matrices for U (W1).
* \param tW2: the "Faust" of Givens matrices for V (W2).
*
* \return the MatDense for W1'*M*W2.
*/
template<typename FPP, FDevice DEVICE>
MatDense<FPP,DEVICE> svdtj_compute_W1H_M_W2_meth2(const MatDense<FPP,DEVICE> &dM, const Transform<FPP, DEVICE> &tW1, const Transform<FPP, DEVICE> &tW2);
/**
* Utility function of svdtj_core_gen_step for computing U'MV (or W1' M W2), method 3 (fastest method).
*
* It is advised to refer to *meth1 and *meth2 function in order to understand this one.
*
* The principle of this method is to compute W1H_M_W2 recursively reusing previous computations.
* if W1 is a sequence of factors W1,0 W1,1 ... W1,N,
* W2 a sequence of factors W2,0 W2,1 ... W2,N
* S_j the successive products U' M V along the SVDTJ algorithm execution.
* Then S_{n} = W1,N' * S_{n-1} * W2,N.
*
*
* \param dM: matrix of which we calculate the SVD approximate as a MatDense.
* \param tW1: the "Faust" of Givens matrices for U (W1).
* \param tW2: the "Faust" of Givens matrices for V (W2).
* \param prev_W1H_M_W2: previous result of W1'*M*W2 to base the computation of this one.
* \param err_step_period: the period in number of Givens according which the SVDJT computes the error. It matters to determine the order of the recursive calculation of W1' M W2.
* \param k1, k2: number of Givens used for W1 and W2 until the latest iteration of SVDTJ.
* \param t1, t2: the number of Givens matrix in each W1 W2 factors.
*
* \return the MatDense for W1'*M*W2.
*/
template<typename FPP, FDevice DEVICE>
MatDense<FPP,DEVICE> svdtj_compute_W1H_M_W2_meth3(const MatDense<FPP,DEVICE> &dM, const Transform<FPP, DEVICE> &tW1, const Transform<FPP, DEVICE> &tW2, MatDense<FPP, DEVICE> & prev_W1H_M_W2, const int err_step_period, const int k1, const int k2, const int t1, const int t2, const bool new_W1, const bool new_W2);
}
#include "faust_SVDTJ.hpp" #include "faust_SVDTJ.hpp"
#endif #endif
This diff is collapsed.
...@@ -135,7 +135,7 @@ void svdtj(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start of inpu ...@@ -135,7 +135,7 @@ void svdtj(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start of inpu
Faust::MatDense<FPP,Cpu> M(M_data, (faust_unsigned_int) num_rows, (faust_unsigned_int) num_cols); Faust::MatDense<FPP,Cpu> M(M_data, (faust_unsigned_int) num_rows, (faust_unsigned_int) num_cols);
TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr; TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr;
Faust::Vect<FPP,Cpu> * S_ = nullptr; Faust::Vect<FPP,Cpu> * S_ = nullptr;
svdtj(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust ,&U_, &V_, &S_); svdtj(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust ,&U_, &V_, &S_);
create_svdtj_output(U_, V_, U, V, S, S_); create_svdtj_output(U_, V_, U, V, S, S_);
} }
...@@ -145,7 +145,7 @@ void svdtj_sparse(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start ...@@ -145,7 +145,7 @@ void svdtj_sparse(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start
Faust::MatSparse<FPP, Cpu> M(nnz, nrows, ncols, data, id_col, row_ptr); Faust::MatSparse<FPP, Cpu> M(nnz, nrows, ncols, data, id_col, row_ptr);
TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr; TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr;
Faust::Vect<FPP,Cpu> * S_ = nullptr; Faust::Vect<FPP,Cpu> * S_ = nullptr;
svdtj(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_); svdtj(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
create_svdtj_output(U_, V_, U, V, S, S_); create_svdtj_output(U_, V_, U, V, S, S_);
} }
...@@ -155,7 +155,7 @@ void svdtj_cplx(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start of ...@@ -155,7 +155,7 @@ void svdtj_cplx(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start of
Faust::MatDense<FPP,Cpu> M(M_data, (faust_unsigned_int) num_rows, (faust_unsigned_int) num_cols); Faust::MatDense<FPP,Cpu> M(M_data, (faust_unsigned_int) num_rows, (faust_unsigned_int) num_cols);
TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr; TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr;
Faust::Vect<FPP,Cpu> * S_ = nullptr; Faust::Vect<FPP,Cpu> * S_ = nullptr;
svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_); svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
create_svdtj_output(U_, V_, U, V, S, S_); create_svdtj_output(U_, V_, U, V, S, S_);
} }
...@@ -165,7 +165,7 @@ void svdtj_sparse_cplx(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*s ...@@ -165,7 +165,7 @@ void svdtj_sparse_cplx(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*s
Faust::MatSparse<FPP, Cpu> M(nnz, nrows, ncols, data, id_col, row_ptr); Faust::MatSparse<FPP, Cpu> M(nnz, nrows, ncols, data, id_col, row_ptr);
TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr; TransformHelper<FPP,Cpu> *U_ = nullptr, *V_ = nullptr;
Faust::Vect<FPP,Cpu> * S_ = nullptr; Faust::Vect<FPP,Cpu> * S_ = nullptr;
svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_); svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
create_svdtj_output(U_, V_, U, V, S, S_); create_svdtj_output(U_, V_, U, V, S, S_);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment