Renew the SVDTJ algorithm: more accurate error stopping criterion and other enhancements.

- Different automatic t (number of Givens per factor) for U and V. - Ignore U and V extra factors that don't enhance the error. - Update unit C++ tests (command line options and pre-defined auto tests). - Addons and refactoring in Givens* classes.

Renew the SVDTJ algorithm: more accurate error stopping criterion and other enhancements.
4c68c41b · hhakim · c502359f · 4c68c41b · 4c68c41b · 4c68c41b
Commit 4c68c41b authored 2 years ago by hhakim
--- a/misc/test/src/C++/test_svdtj.cpp.in
+++ b/misc/test/src/C++/test_svdtj.cpp.in
@@ -3,57 +3,40 @@
 #include "faust_GivensFGFTGen.h"
 #include "faust_SVDTJ.h"
 #include <cstdlib>
+#include <string>
+#include <unistd.h>
 typedef @TEST_FPP@ FPP;
 using namespace std;
 using namespace Faust;
-/**
+Real<FPP> svdtj_and_error(MatDense<FPP, Cpu>* A, int J, int t, Real<FPP> tol, bool relErr, int order, bool enable_large_Faust, int verb, TransformHelper<FPP,Cpu> **U_=nullptr, TransformHelper<FPP, Cpu> **V_=nullptr, Vect<FPP,Cpu> **S__=nullptr, bool del=true)
- * This test verifies the SVD factorization of an random matrix of size m x n.
- * We authorize large Fausts for U and V (a lot of Givens factors) because we want to test if it works well.
- */
-int main(int argc, char **argv)
 {
-	// input
-	int m, n, min_mn;
+	int m = A->getNbRow(), n = A->getNbCol();
-	m = 16;
+	auto min_mn = m > n?n:m;
-	n = 32;
-	auto J = 1024; // nGivens
-	if(argc >= 3)
-	{
-		m = std::atoi(argv[1]);
-		n = std::atoi(argv[2]);
-		if(argc >= 4)
-			J = std::atoi(argv[3]);
-	}
-	cout << "A size m x n: " << m << " x " << n << endl;
-	cout << "J: " << J << endl;
-	min_mn = m > n?n:m;
-	auto A = MatDense<FPP, Cpu>::randMat(m, n);
-//	auto DFT = TransformHelper<std::complex<double>, Cpu>::fourierFaust(4, false);
-//	auto A = new MatDense<FPP, Cpu>(DFT->real<double>()->get_product()); // in the heap to match the delete in the end (needed for randMat generated A)
-//	delete DFT;
-#if DEBUG_SVDTJ
-	A->save_to_mat_file("/tmp/A_cpp.mat", "A");
-#endif
-	auto t =  m / 2; // nGivens per factor
-	if(t > J)
-		throw runtime_error("t > J"); // TODO: the check should be in eigtj C++ code
-	bool relErr = true; // use relative error instead of absolute error if tol is not 0
-	Real<FPP> tol = 0; // not the stopping criterion
-	int verb = 0;
-	int order = -1;
-	bool enable_large_Faust = true;
 	// output
-	TransformHelper<FPP,Cpu> *U, *V;
-	Faust::Vect<FPP,Cpu> * S_;
 // input svdtj nGivens, tol, order, relerr, nGivens_per_fac, verbosity, enable_large_Faust: 4096 0 descend True None 0 True
 //
 //void Faust::svdtj(MatDense<FPP, DEVICE> & dM, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Faust::Vect<FPP,DEVICE> ** S_)
 	MatDense<FPP, Cpu> err(*A);
 	// warning *A is modified by the svdtj
-	svdtj(*A, J, t, tol, verb, relErr, order, enable_large_Faust, &U, &V, &S_);
+	Vect<FPP, Cpu> * S_;
+	TransformHelper<FPP, Cpu> * U, *V;
+	if(! S__)
+		S__ = &S_;
+	if(! U_)
+		U_ = &U;
+	if(! V_)
+		V_ = &V;
+	svdtj(*A, J, t, tol, verb, relErr, order, enable_large_Faust, U_, V_, S__);
+	U = *U_;
+	V = *V_;
+	S_ = *S__;
 	cout << "U, V number of factors: " << U->size() << ", " << V->size() << endl;
 	cout << "U, V RCGs: " << double(m * m) / U->get_total_nnz() << " " << double(n * n) / V->get_total_nnz() << endl;
 	MatDense<FPP, Cpu> S(m, n);
@@ -75,15 +58,219 @@ int main(int argc, char **argv)
 #endif
 	err -= USV_;
 	cout << "svdtj err: " << err.norm() / A->norm() << endl;
+	Real<FPP> norm_err = (A->norm() - S.norm());
+	if (relErr)  norm_err /= A->norm();
+	cout << "svdtj norm err: " << norm_err << endl;
 #if DEBUG_SVDTJ
 	U->save_mat_file("/tmp/U_cpp.mat");
 	V->save_mat_file("/tmp/V_cpp.mat");
 	S.save_to_mat_file("/tmp/S_cpp.mat", "S");
 #endif
-	delete A;
+	if(del)
+	{
+		delete U;
+		delete V;
+		delete S_;
+	}
+	return norm_err;
+}
+void auto_tests()
+{
+	int m, n, min_mn;
+	int J, t, order;
+	Real<FPP> tol;
+	bool verb, relErr, enable_large_Faust;
+	// first test
+	m = 128;
+	n = 64;
+	J = 1024; // nGivens
+	tol = 0; // not the stopping criterion if 0
+	relErr = true; // use relative error instead of absolute error if tol is not 0
+	t = -1; // nGivens per factor // -1 for automatic (diff for U and V)
+	verb = 1;
+	order = -1; // not yet implemented
+	enable_large_Faust = false;
+	auto A = MatDense<FPP, Cpu>::randMat(m, n);
+	TransformHelper<FPP,Cpu> *U, *V;
+	Vect<FPP,Cpu> * S_;
+	cout << string(20, '=') << " test 1: J limit" << endl;
+	auto norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(U->size() == 16); // auto-computed t = 64 for U, 64 * 16 == 1024 == J
+	delete U;
+	delete V;
+	delete S_;
+	cout << string(20, '=') << " test 2: rel. error" << endl;
+	J = 0;
+	tol = 1e-3;
+	norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(norm_err <= tol);
+	delete U;
+	delete V;
+	delete S_;
+	cout << string(20, '=') << " test 3: abs. error" << endl;
+	J = 0;
+	tol = 1e-3;
+	relErr = false;
+	norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(norm_err <= tol);
 	delete U;
 	delete V;
 	delete S_;
+	cout << string(20, '=') << " test 4: concurrent J and rel. error" << endl;
+	J = 128;
+	tol = 1e-3;
+	relErr = true;
+	norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(norm_err <= tol || U->size() * m / 2  <= J); //automatic t = m / 2 for U
+	cout << string(20, '=') << " test 5: J limit, t=1" << endl;
+	t = 1;
+	J = 100;
+	norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(U->size() == J); // auto-computed t = 1
+	delete U;
+	delete V;
+	delete S_;
+	cout << string(20, '=') << " test 6: rel. error" << endl;
+	J = 0;
+	tol = 1e-3;
+	relErr = true;
+	enable_large_Faust = true; //otherwise it doesn't work
+	norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(norm_err <= tol);
+	delete U;
+	delete V;
+	delete S_;
+	cout << string(20, '=') << " test 3: abs. error" << endl;
+	J = 0;
+	tol = 1e-3;
+	relErr = false;
+	enable_large_Faust = true; //otherwise it doesn't work
+	norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(norm_err <= tol);
+	delete U;
+	delete V;
+	delete S_;
+	cout << string(20, '=') << " test 4: concurrent J and rel. error" << endl;
+	J = 128;
+	tol = 1e-3;
+	relErr = true;
+	norm_err = svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb, &U, &V, &S_, false);
+	assert(norm_err <= tol || U->size() == J); //automatic t = m / 2 for U
+	delete A;
+}
+/**
+ * This test verifies the SVD factorization of an random matrix of size m x n.
+ * We authorize large Fausts for U and V (a lot of Givens factors) because we want to test if it works well.
+ */
+int main(int argc, char **argv)
+{
+	// default input
+	int m, n, min_mn;
+	m = 128;
+	n = 64;
+	auto J = 1024; // nGivens
+	Real<FPP> tol = 0; // not the stopping criterion if 0
+	bool relErr = true; // use relative error instead of absolute error if tol is not 0
+	auto t = -1; // nGivens per factor // -1 for automatic (diff for U and V)
+	int verb = 0;
+	int order = -1; // not yet implemented
+	bool enable_large_Faust = false;
+	if(argc == 1)
+	{
+		auto_tests();
+		return EXIT_SUCCESS;
+	}
+	int opt;
+	const char *optstr = "m:n:J:t:ralo:ve:";
+	const char *optspec = "-m Anrows -n Ancols -J <int> -t <int> -r(elerr)|-a(bserr) -l(largeFaust) -o <1|-1|0> -v(erbose)";
+	while ((opt = getopt(argc, argv, optstr)) != -1)
+	{
+		switch (opt)
+		{
+			case 'n':
+				n = atoi(optarg);
+				break;
+			case 'm':
+				m = atoi(optarg);
+				break;
+			case 'J':
+				J = atoi(optarg);
+				break;
+			case 't':
+				t = atoi(optarg);
+				break;
+			case 'o':
+				order = atoi(optarg);
+				break;
+			case 'r':
+				relErr = true;
+				break;
+			case 'a':
+				relErr = false;
+				break;
+			case 'l':
+				enable_large_Faust = true;
+				break;
+			case 'v':
+				verb = 1;
+				break;
+			case 'e':
+				tol = atof(optarg);
+				break;
+			default: /* '?' */
+				fprintf(stderr, "Usage: %s %s\n",
+						argv[0], optspec);
+				exit(EXIT_FAILURE);
+		}
+	}
+	cout << "A size m x n: " << m << " x " << n << endl;
+	cout << "J: " << J << endl;
+	cout << "t: " << t << endl;
+	cout << "relErr: " << relErr << endl;
+	cout << "enable_large_Faust: " << enable_large_Faust << endl;
+	std::cout << "error target (stop crit if not 0):" << tol << std::endl;
+	min_mn = m > n?n:m;
+	auto A = MatDense<FPP, Cpu>::randMat(m, n);
+#if DEBUG_SVDTJ
+	A->save_to_mat_file("/tmp/A_cpp.mat", "A");
+#endif
+	if(J != 0 && t > J)
+		throw runtime_error("t > J"); // TODO: the check should be in eigtj C++ code
+	svdtj_and_error(A, J, t, tol, relErr, order, enable_large_Faust, verb);
+	delete A;
 	return EXIT_SUCCESS;
 }
--- a/src/algorithm/factorization/faust_GivensFGFT.hpp
+++ b/src/algorithm/factorization/faust_GivensFGFT.hpp
@@ -20,6 +20,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::next_step()
 #endif
 		(this->*substep[i])();
 	}
+	this->ite++;
 }
 template<typename FPP, FDevice DEVICE, typename FPP2>
@@ -132,14 +133,14 @@ void GivensFGFT<FPP,DEVICE,FPP2>::calc_theta()
 //        end
 //
-#define calc_err(theta) (*this->L)(this->p,this->q)*cos(2*theta) + 0.5*((*this->L)(this->q,this->q) - (*this->L)(this->p,this->p))*sin(2*theta)
+#define calc_theta_err(theta) (*this->L)(this->p,this->q)*cos(2*theta) + 0.5*((*this->L)(this->q,this->q) - (*this->L)(this->p,this->p))*sin(2*theta)
 	FPP2 theta1, theta2, err_theta1, err_theta2;
 	theta1 = atan2((*this->L)(this->q,this->q) - (*this->L)(this->p,this->p),(2*(*this->L)(this->p,this->q)))/2;
 	theta2 = theta1 + M_PI_4; // from cmath
-	err_theta1 = calc_err(theta1);
+	err_theta1 = calc_theta_err(theta1);
-	err_theta2 = calc_err(theta2);
+	err_theta2 = calc_theta_err(theta2);
 	if(err_theta1 < err_theta2 && !always_theta2)
 		theta = theta1;
 	else
@@ -181,7 +182,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::update_fact()
 	this->fact_mod_row_ids.push_back(this->q);
 	this->fact_mod_col_ids.push_back(this->q);
 	this->fact_mod_values.push_back(c);
-	if(this->J == 0) this->facts.resize(this->ite+1);
+	if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
 	this->facts[this->ite] = MatSparse<FPP,DEVICE>(this->fact_mod_row_ids, this->fact_mod_col_ids, this->fact_mod_values, n, n);
 	this->facts[this->ite].set_orthogonal(true);
 #ifdef DEBUG_GIVENS
@@ -354,19 +355,7 @@ void GivensFGFT<FPP,DEVICE,FPP2>::update_err()
 	//
 	if(!((this->ite+1)%GivensFGFT<FPP,DEVICE,FPP2>::ERROR_CALC_PERIOD) || this->stoppingCritIsError || this->verbosity > 1)
 	{
-		FPP2 err = 0, err_d;
+		auto err = this->calc_err();
-		for(int i=0;i<this->D.size();i++)
-			err += this->D(i)*this->D(i);
-		if(this->Lap_squared_fro_norm == FPP(0))
-		{
-			err_d = Faust::fabs(this->Lap.norm());
-			err_d = err_d*err_d;
-			this->Lap_squared_fro_norm = err_d;
-		}
-		else
-			err_d = Faust::fabs(this->Lap_squared_fro_norm);
-		err = Faust::fabs(err_d - err);
-		if(this->errIsRel) err /= err_d;
 		if(this->verbosity)
 		{
 			cout << "factor : "<< this->ite <<  ", " << ((this->errIsRel)?"relative ":"absolute ") << "err.: " << err;

--- a/src/algorithm/factorization/faust_GivensFGFTComplex.hpp
+++ b/src/algorithm/factorization/faust_GivensFGFTComplex.hpp
@@ -20,6 +20,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::next_step()
 #endif
 		(this->*substep[i])();
 	}
+	this->ite++;
 }
 template<typename FPP, FDevice DEVICE, typename FPP2>
@@ -207,7 +208,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::update_fact()
 	this->fact_mod_row_ids.push_back(this->q);
 	this->fact_mod_col_ids.push_back(this->q);
 	this->fact_mod_values.push_back(c_qq);
-	if(this->J == 0) this->facts.resize(this->ite+1);
+	if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
 	this->facts[this->ite] = MatSparse<FPP,DEVICE>(this->fact_mod_row_ids, this->fact_mod_col_ids, this->fact_mod_values, n, n);
 	this->facts[this->ite].set_orthogonal(true);
 #ifdef DEBUG_GIVENS
@@ -420,19 +421,7 @@ void GivensFGFTComplex<FPP,DEVICE,FPP2>::update_err()
 	//
 	if(!((this->ite+1)%GivensFGFTComplex<FPP,DEVICE,FPP2>::ERROR_CALC_PERIOD) || this->stoppingCritIsError || this->verbosity > 1)
 	{
-		FPP2 err = 0, err_d;
+		auto err = this->calc_err();
-		for(int i=0;i<this->D.size();i++)
-			err += /*Faust::fabs(*/this->D(i)*this->D(i)/*)*/;
-		if(this->Lap_squared_fro_norm == FPP2(0))
-		{
-			err_d = Faust::fabs(this->Lap.norm());
-			err_d = err_d*err_d;
-			this->Lap_squared_fro_norm = err_d;
-		}
-		else
-			err_d = Faust::fabs(this->Lap_squared_fro_norm);
-		err = Faust::fabs(err_d - err);
-		if(this->errIsRel) err /= err_d;
 		if(this->verbosity)
 		{
 			cout << "factor : "<< this->ite <<  ", " << ((this->errIsRel)?"relative ":"absolute ") << "err.: " << err;

--- a/src/algorithm/factorization/faust_GivensFGFTGen.h
+++ b/src/algorithm/factorization/faust_GivensFGFTGen.h
@@ -39,7 +39,7 @@ namespace Faust
 			//			Vect<FPP4,DEVICE> C_min_row;
 			protected:
 				/** \brief Fourier matrix/eigenvectors factorization matrices (Givens matrices). */
-				vector<MatSparse<FPP4,DEVICE>> facts;
+				std::vector<MatSparse<FPP4,DEVICE>> facts;
 				/** \brief L iteration factor:  L_i = S^T L_{i-1} S, initialized from Lap (with S being facts[i]). */
 				MatGeneric<FPP4, DEVICE>* L;
@@ -80,8 +80,6 @@ namespace Faust
 				/** \brief inverse permutation of ord_indices (needed to retrieve start undefined order). */
 				vector<int> inv_ord_indices;
-				/** \brief True is the last fact (of facts) has been permuted */
-				bool last_fact_permuted;
 				/** \brief Cache for the ordered D. */
 				Vect<FPP,DEVICE> ordered_D;
 				/** \brief true if D has already been ordered (order_D() was called). */
@@ -230,6 +228,12 @@ namespace Faust
 			public:
+				/**
+				 *  Computes the error norm of D compared to L.
+				 *  It can be relative or absolute depending on this->errIsRel.
+				 */
+				FPP2 calc_err();
 				/**
 				 * Returns the ordered indices of D to get increasing eigenvalues along the diagonal.
 				 *
@@ -316,17 +320,29 @@ namespace Faust
 				 */
 				const vector<MatSparse<FPP4,DEVICE>>& get_facts() const;
+				/**
+				 * Number of Givens factors used for the eigenvector approximate so far.
+				 */
+				const size_t nfacts() const
+				{
+					return this->get_facts().size();
+				}
 				/**
 				 * Returns a Transform object with copy of facts into it.
 				 *
-				 * \param ord true to get the Transform's facts ordering the last one columns according to ascendant eigenvalues, false to let facts as they went out from the algorithm (without reordering).
+				 * \param ord true to get the Transform's facts ordering the last one columns according to ascending eigenvalues, false to let facts as they went out from the algorithm (without reordering).
 				 */
-				Transform<FPP4,DEVICE> get_transform(bool ord);
+				Transform<FPP4,DEVICE> get_transform(const bool ord);
 				/**
+				 * Returns the eigen vectors as a Transform t of Givens matrices.
+				 *
 				 * \param ord -1 for descending order, 1 for ascending order.
+				 *
+				 * Warning: if copy == false and ord == true the caller is responsible to free the last permuted factor of t.
 				 */
-				Transform<FPP4,DEVICE> get_transform(int ord);
+				Transform<FPP4,DEVICE> get_transform(const int ord, const bool copy=true, const int first_nfacts=-1);
 		};

--- a/src/algorithm/factorization/faust_GivensFGFTGen.hpp
+++ b/src/algorithm/factorization/faust_GivensFGFTGen.hpp
@@ -19,6 +19,7 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::update_D()
 	D.Display();
 	cout << "D fro. norm: " << D.norm() << endl;
 #endif
+	is_D_ordered = false;
 }
 template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
@@ -79,7 +80,6 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::compute_facts()
 	while(! stopping && (J == 0 || ite < facts.size())) // when J == 0 the stopping criterion is the error against Lap
 	{
 		next_step();
-		ite++;
 		if(stopping = (ite > 1 && stoppingCritIsError && errs.size() > 2 && errs[ite-1]-errs[ite-2] > FLT_EPSILON))
 			/*if(verbosity>0) */cerr << "WARNING: the eigtj algorithm stopped because the last error is greater than the previous one." << endl;
 		if(stopping || stoppingCritIsError && errs.size() > 0 && (*(errs.end()-1) - stoppingError ) < FLT_EPSILON)
@@ -102,7 +102,7 @@ void GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::compute_facts()
 template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
 GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::GivensFGFTGen(MatGeneric<FPP4,DEVICE>* Lap, int J, unsigned int verbosity /* deft val == 0 */, const double stoppingError, const bool errIsRel, const bool enable_large_Faust /* deft to false */) :
-facts(J>0?(J*4<Lap->getNbRow()*Lap->getNbRow()||enable_large_Faust?J:0):1 /* don't allocate if the complexity doesn't worth it and enable_large_Faust is false*/), q_candidates(new int[Lap->getNbRow()]), J(J), D(Lap->getNbRow()), errs(0), coord_choices(0), Lap(*Lap), dim_size(Lap->getNbRow()), Lap_squared_fro_norm(0), last_fact_permuted(false), is_D_ordered(false), verbosity(verbosity), stoppingCritIsError(stoppingError != 0.0), stoppingError(stoppingError), errIsRel(errIsRel), enable_large_Faust(enable_large_Faust)
+facts(J>0?(J*4<Lap->getNbRow()*Lap->getNbRow()||enable_large_Faust?J:0):0 /* don't allocate if the complexity doesn't worth it and enable_large_Faust is false*/), q_candidates(new int[Lap->getNbRow()]), J(J), D(Lap->getNbRow()), errs(0), coord_choices(0), Lap(*Lap), dim_size(Lap->getNbRow()), Lap_squared_fro_norm(0), is_D_ordered(false), verbosity(verbosity), stoppingCritIsError(stoppingError != 0.0), stoppingError(stoppingError), errIsRel(errIsRel), enable_large_Faust(enable_large_Faust), ite(0)
 {
 	if(Lap->getNbCol() != Lap->getNbRow())
 		handleError("Faust::GivensFGFTComplex", "Laplacian must be a square matrix.");
@@ -257,44 +257,83 @@ const vector<Faust::MatSparse<FPP4,DEVICE>>& GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>
 }
 template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
-Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(bool ord)
+Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(const bool ord)
 {
 	return get_transform(ord?1:0);
 }
 template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
-Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(int ord)
+Faust::Transform<FPP4,DEVICE> GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::get_transform(const int ord, const bool copy/*=true*/, const int first_nfacts/*=-1*/)
 {
-	//TODO: an optimization is possible by changing type of facts to vector<MatGeneric*> it would avoid copying facts into Transform and rather use them directly. It will need a destructor that deletes them eventually if they weren't transfered to a Transform object before.
+	//TODO: facts should be a Transform or a TransformHelper to avoid the risk of memory leak in caller code when ord == true and copy == false
 	if(facts.size() == 0)
 		throw out_of_range("The transform is empty. The algorithm stopped before computing any factor.");
-	MatSparse<FPP4,DEVICE> & last_fact = *(facts.end()-1);
-	MatSparse<FPP4,DEVICE> P(last_fact.getNbCol(), last_fact.getNbCol()); //last_fact permutation matrix
-	// (to reorder eigenvector transform according to ordered D)
-	if(last_fact_permuted)
+	std::vector<MatGeneric<FPP4, DEVICE>*> facts;
-	{
+	int end_id;
-		// get back to undefined order
-		for(int i=0;i<inv_ord_indices.size();i++)
+	size_t nfacts;
-			P.setCoeff(inv_ord_indices[i],i, FPP4(1.0));
+	if(first_nfacts < 0)
-		last_fact.multiplyRight(P);
+		// we want only effectively computed facts
-		last_fact_permuted = false;
+		nfacts = this->ite;
-	}
+	else
+		// we want the number of facts asked by the caller
+		nfacts = first_nfacts;
+	if (ord)
+		// we process the last factor separately because the columns must be reordered
+		end_id = nfacts - 1;
+	else
+		end_id = nfacts;
+	for(int i=0; i < end_id; i++)
+		facts.push_back(&this->facts[i]);
+	auto t = Faust::Transform<FPP4,DEVICE>(facts, /* lambda */ FPP4(1.0), /* optimizedCopy */false, /* cloning_fact */ copy);
+	if(! copy)
+		t.disable_dtor(); // don't want to free facts when t is out of scope because we might still need them in this object
 	if(ord)
 	{
+		// (reorder eigenvector transform according to ordered D)
+		MatSparse<FPP4,DEVICE> & last_fact = *(this->facts.begin() + end_id);
+		auto P = new MatSparse<FPP4, DEVICE>(last_fact.getNbCol(), last_fact.getNbCol()); //last_fact permutation matrix
 		if(!is_D_ordered || ord != D_order_dir)
 			order_D(ord);
 		for(int i=0;i<ord_indices.size();i++)
-			P.setCoeff(ord_indices[i],i, FPP4(1.0));
+			P->setCoeff(ord_indices[i],i, FPP4(1.0));
 		//		P.set_orthogonal(true);
 		//		facts.push_back(P); // we prefer to directly multiply the last factor by P
-		last_fact_permuted = true;
+//		last_fact.multiplyRight(P);
-		last_fact.multiplyRight(P);
+		last_fact.multiply(*P, 'N');
+		t.push_back(P, false, false, false, copy); //default to optimizedCopy=false, transpose=false, conjugate=false, copying=false, verify_dims_agree=true
+		if(copy) delete P;
+		// Warning: if copy == false the caller is responsible to free the P
 	}
-	Faust::Transform<FPP4,DEVICE> t = Faust::Transform<FPP4,DEVICE>(facts);
-	//	// remove the permutation factor if added temporarily for reordering
-	//	return ord?facts.erase(facts.end()-1),t:t;
 	return t;
+}
+template<typename FPP, FDevice DEVICE, typename FPP2, typename FPP4>
+FPP2 GivensFGFTGen<FPP,DEVICE,FPP2,FPP4>::calc_err()
+{
+	FPP2 err = 0, err_d;
+	for(int i=0;i<this->D.size();i++)
+		err += /*Faust::fabs(*/this->D(i)*this->D(i)/*)*/;
+	if(this->Lap_squared_fro_norm == FPP2(0))
+	{
+		err_d = Faust::fabs(this->Lap.norm());
+		err_d = err_d*err_d;
+		this->Lap_squared_fro_norm = err_d;
+	}
+	else
+		err_d = Faust::fabs(this->Lap_squared_fro_norm);
+	err = Faust::fabs(err_d - err);
+	if(this->errIsRel) err /= err_d;
+	return err;
 }
--- a/src/algorithm/factorization/faust_GivensFGFTParallel.hpp
+++ b/src/algorithm/factorization/faust_GivensFGFTParallel.hpp
@@ -41,10 +41,11 @@ void GivensFGFTParallel<FPP,DEVICE,FPP2>::next_step()
 	for(int i=0;i<sizeof(substep)/sizeof(substep_fun);i++)
 	{
 #ifdef DEBUG_GIVENS
-		cout << "GivensFGFTParallel ite=" << this->ite << " substep i=" << i << endl;
+		std::cout << "GivensFGFTParallel ite=" << this->ite << " substep i=" << i << std::endl;
 #endif
 		(this->*substep[i])();
 	}
+	this->ite++;
 }
 template<typename FPP, FDevice DEVICE, typename FPP2>
@@ -75,7 +76,7 @@ void GivensFGFTParallel<FPP,DEVICE,FPP2>::update_fact()
 	this->fact_mod_row_ids.push_back(this->q);
 	this->fact_mod_col_ids.push_back(this->q);
 	this->fact_mod_values.push_back(cos(this->theta));
-	if(this->J == 0) this->facts.resize(this->ite+1);
+	if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
 }
 template<typename FPP, FDevice DEVICE, typename FPP2>

--- a/src/algorithm/factorization/faust_GivensFGFTParallelComplex.hpp
+++ b/src/algorithm/factorization/faust_GivensFGFTParallelComplex.hpp
@@ -47,6 +47,7 @@ void GivensFGFTParallelComplex<FPP,DEVICE,FPP2>::next_step()
 #endif
 		(this->*substep[i])();
 	}
+	this->ite++;
 }
 template<typename FPP, FDevice DEVICE, typename FPP2>
@@ -97,7 +98,7 @@ void GivensFGFTParallelComplex<FPP,DEVICE,FPP2>::update_fact()
 	this->fact_mod_row_ids.push_back(this->q);
 	this->fact_mod_col_ids.push_back(this->q);
 	this->fact_mod_values.push_back(c_qq);
-	if(this->J == 0) this->facts.resize(this->ite+1);
+	if(this->J <= this->ite+1) this->facts.resize(this->ite+1);
 }
 template<typename FPP, FDevice DEVICE, typename FPP2>

--- a/src/algorithm/factorization/faust_SVDTJ.cpp.in
+++ b/src/algorithm/factorization/faust_SVDTJ.cpp.in
@@ -9,34 +9,33 @@ namespace Faust
 #if(@SVD_COMPLEX@==1)
 	template<>
-		void instantiate_algos<complex<@REAL_TYPE@>, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW1, GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW2, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dM_M, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dMM_,int J, int t, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust)
+		void instantiate_algos<complex<@REAL_TYPE@>, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW1, GivensFGFTGen<Real<complex<@REAL_TYPE@>>, Cpu, @REAL_TYPE@, complex<@REAL_TYPE@>>** algoW2, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dM_M, Faust::MatDense<complex<@REAL_TYPE@>,Cpu> &dMM_,int J1, int J2, int t1, int t2, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust)
 		{
-			if(t <= 1)
+			if(t1 <= 1)
-			{
+				*algoW1 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J1, verbosity, tol, relErr, enable_large_Faust);
-				*algoW1 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J, verbosity, tol, relErr, enable_large_Faust);
-				*algoW2 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J, verbosity, tol, relErr, enable_large_Faust);
-			}
 			else
-			{
+				*algoW1 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J1, t1, verbosity, tol, relErr, enable_large_Faust);
-				*algoW1 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dMM_, J, t, verbosity, tol, relErr, enable_large_Faust);
-				*algoW2 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J, t, verbosity, tol, relErr, enable_large_Faust);
+			if(t2 <= 1)
-			}
+				*algoW2 = new GivensFGFTComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J2, verbosity, tol, relErr, enable_large_Faust);
+			else
+				*algoW2 = new GivensFGFTParallelComplex<complex<@REAL_TYPE@>,Cpu,@REAL_TYPE@>(dM_M, J2, t2, verbosity, tol, relErr, enable_large_Faust);
 		}
 #endif
 	template<>
-		void instantiate_algos<@REAL_TYPE@, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW1, GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW2, Faust::MatDense<@REAL_TYPE@,Cpu> &dM_M, Faust::MatDense<@REAL_TYPE@,Cpu> &dMM_,int J, int t, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust)
+		void instantiate_algos<@REAL_TYPE@, Cpu, @REAL_TYPE@>(GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW1, GivensFGFTGen<Real<@REAL_TYPE@>, Cpu, @REAL_TYPE@, @REAL_TYPE@>** algoW2, Faust::MatDense<@REAL_TYPE@,Cpu> &dM_M, Faust::MatDense<@REAL_TYPE@,Cpu> &dMM_,int J1, int J2, int t1, int t2, unsigned int verbosity, @REAL_TYPE@ tol, bool relErr, bool enable_large_Faust)
 		{
-			if(t <= 1)
+			if(t1 <= 1)
-			{
+				*algoW1 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J1, verbosity, tol, relErr, enable_large_Faust);
-				*algoW1 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J, verbosity, tol, relErr, enable_large_Faust);
+			else
-				*algoW2 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J, verbosity, tol, relErr, enable_large_Faust);
+				*algoW1 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J1, t1, verbosity, tol, relErr, enable_large_Faust);
-			}
+			if(t2 <= 1)
+				*algoW2 = new GivensFGFT<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J2, verbosity, tol, relErr, enable_large_Faust);
 			else
-			{
+				*algoW2 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J2, t2, verbosity, tol, relErr, enable_large_Faust);
-				*algoW1 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dMM_, J, t, verbosity, tol, relErr, enable_large_Faust);
-				*algoW2 = new GivensFGFTParallel<@REAL_TYPE@,Cpu,@REAL_TYPE@>(dM_M, J, t, verbosity, tol, relErr, enable_large_Faust);
-			}
 		}
 }
--- a/src/algorithm/factorization/faust_SVDTJ.h
+++ b/src/algorithm/factorization/faust_SVDTJ.h
@@ -17,12 +17,94 @@ namespace Faust
 	template<typename FPP, FDevice DEVICE, typename FPP2 = float>
 		void svdtj_cplx(MatSparse<FPP, DEVICE> & M, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order, const bool enable_large_Faust,TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S);
-template<typename FPP, FDevice DEVICE, typename FPP2 = float>
-void svdtj_core_gen(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_);
-template<typename FPP, FDevice DEVICE, typename FPP2>
+	/**
-	void instantiate_algos(GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW1, GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW2, Faust::MatDense<FPP,DEVICE> &dM_M, Faust::MatDense<FPP,DEVICE> &dMM_, int J, int t, unsigned int verbosity, FPP2 tol, bool relErr, bool enable_large_Faust);
+	 * \brief Wrapper for blind and step versions of SVDTJ.
-};
+	 */
+	template<typename FPP, FDevice DEVICE, typename FPP2 = float>
+		void svdtj_core_gen(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_, const bool by_step=true);
+	/**
+	 * This version runs two eigtjs step by step until the tol error or the number of Givens J is reached.
+	 */
+	template<typename FPP, FDevice DEVICE, typename FPP2 = float>
+		void svdtj_core_gen_step(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_);
+	/**
+	 * This version runs two eigtjs blindly until they reach the tol error for the eigen decomposition or the number of Givens is reached.
+	 */
+	template<typename FPP, FDevice DEVICE, typename FPP2 = float>
+		void svdtj_core_gen_blind(MatGeneric<FPP,DEVICE>* M, MatDense<FPP,DEVICE> &dM, MatDense<FPP,DEVICE> &dM_M, MatDense<FPP,DEVICE> &dMM_, int J, int t, FPP2 tol, unsigned int verbosity, bool relErr, int order /* ignored anyway, kept here just in case */, const bool enable_large_Faust, TransformHelper<FPP,DEVICE> ** U, TransformHelper<FPP,DEVICE> **V, Vect<FPP,DEVICE> ** S_);
+	template<typename FPP, FDevice DEVICE, typename FPP2>
+		void instantiate_algos(GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW1, GivensFGFTGen<Real<FPP>, Cpu, FPP2, FPP>** algoW2, Faust::MatDense<FPP,DEVICE> &dM_M, Faust::MatDense<FPP,DEVICE> &dMM_, int J1, int J2, int t1, int t2, unsigned int verbosity, FPP2 tol, bool relErr, bool enable_large_Faust);
+	/**
+	 * \brief Order S in descending order and permute columns of thW1 and thW2 consequently.
+	 *
+	 * \param order: not yet impl., only descending (-1) order is handled. //TODO
+	 */
+	template<typename FPP, FDevice DEVICE>
+		void svdtj_order_W1_W2_S(const faust_unsigned_int &m, const faust_unsigned_int &n, const int order, const Vect<FPP, DEVICE> &S, Vect<FPP, DEVICE> **S_, TransformHelper<FPP, DEVICE> &thW1, TransformHelper<FPP, DEVICE> &thW2);
+	/**
+	 * Utility function of svdtj_core_gen_step for computing U'MV (or W1' M W2), method 1 (the simplest way).
+	 *
+	 * This method is not used but is kept as reference because the code goes more an more complicated for methods 2 and 3.
+	 *
+	 * \param dM: (in out) matrix of which we calculate the SVD approximate as a MatDense.
+	 * \param dM_adj: (in out) true if dM is already the adjoint of the original matrix, false otherwise (in which case the adjoint is computed internvally and dM is modified). The goal is to avoid computing the adjoint M many times in svdtj_core_gen_step.
+	 * \param tW1: the "Faust" of Givens matrices for U (W1).
+	 * \param tW2: the "Faust" of Givens matrices for V (W2).
+	 *
+	 * \return the MatDense for W1'*M*W2.
+	 */
+	template<typename FPP, FDevice DEVICE>
+		MatDense<FPP,DEVICE> svdtj_compute_W1H_M_W2_meth1(MatDense<FPP,DEVICE> &dM, bool &dM_adj, const Transform<FPP, DEVICE> &tW1, const Transform<FPP, DEVICE> &tW2);
+	/**
+	 * Utility function of svdtj_core_gen_step for computing U'MV (or W1' M W2), method 2 (not faster than method 1, just a sketching of method 3).
+	 *
+	 * This method is not used but is kept as reference because the code goes more an more complicated in method 3.
+	 *
+	 * \param dM: matrix of which we calculate the SVD approximate as a MatDense.
+	 * \param tW1: the "Faust" of Givens matrices for U (W1).
+	 * \param tW2: the "Faust" of Givens matrices for V (W2).
+	 *
+	 * \return the MatDense for W1'*M*W2.
+	 */
+	template<typename FPP, FDevice DEVICE>
+		MatDense<FPP,DEVICE> svdtj_compute_W1H_M_W2_meth2(const MatDense<FPP,DEVICE> &dM, const Transform<FPP, DEVICE> &tW1, const Transform<FPP, DEVICE> &tW2);
+	/**
+	 * Utility function of svdtj_core_gen_step for computing U'MV (or W1' M W2), method 3 (fastest method).
+	 *
+	 * It is advised to refer to *meth1 and *meth2 function in order to understand this one.
+	 *
+	 * The principle of this method is to compute W1H_M_W2 recursively reusing previous computations.
+	 * if W1 is a sequence of factors W1,0 W1,1 ... W1,N,
+	 * W2 a sequence of factors W2,0 W2,1 ... W2,N
+	 * S_j the successive products U' M V along the SVDTJ algorithm execution.
+	 * Then S_{n} = W1,N' * S_{n-1} * W2,N.
+	 *
+	 *
+	 * \param dM: matrix of which we calculate the SVD approximate as a MatDense.
+	 * \param tW1: the "Faust" of Givens matrices for U (W1).
+	 * \param tW2: the "Faust" of Givens matrices for V (W2).
+	 * \param prev_W1H_M_W2: previous result of W1'*M*W2 to base the computation of this one.
+	 * \param err_step_period: the period in number of Givens according which the SVDJT computes the error. It matters to determine the order of the recursive calculation of W1' M W2.
+	 * \param k1, k2: number of Givens used for W1 and W2 until the latest iteration of SVDTJ.
+	 * \param t1, t2: the number of Givens matrix in each W1 W2 factors.
+	 *
+	 * \return the MatDense for W1'*M*W2.
+	 */
+	template<typename FPP, FDevice DEVICE>
+		MatDense<FPP,DEVICE> svdtj_compute_W1H_M_W2_meth3(const MatDense<FPP,DEVICE> &dM, const Transform<FPP, DEVICE> &tW1, const Transform<FPP, DEVICE> &tW2, MatDense<FPP, DEVICE> & prev_W1H_M_W2, const int err_step_period, const int k1, const int k2, const int t1, const int t2, const bool new_W1, const bool new_W2);
+}
 #include "faust_SVDTJ.hpp"
 #endif
--- a/src/algorithm/factorization/faust_SVDTJ.hpp
+++ b/src/algorithm/factorization/faust_SVDTJ.hpp
--- a/wrapper/python/src/FaustFactGivensFGFT.hpp
+++ b/wrapper/python/src/FaustFactGivensFGFT.hpp
@@ -135,7 +135,7 @@ void svdtj(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start of inpu
    Faust::MatDense<FPP,Cpu> M(M_data, (faust_unsigned_int) num_rows, (faust_unsigned_int) num_cols);
    TransformHelper<FPP,Cpu> *U_ = nullptr,  *V_ = nullptr;
    Faust::Vect<FPP,Cpu> * S_ = nullptr;
-    svdtj(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust ,&U_, &V_, &S_);
+    svdtj(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust ,&U_, &V_, &S_);
    create_svdtj_output(U_, V_, U, V, S, S_);
 }
@@ -145,7 +145,7 @@ void svdtj_sparse(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start
    Faust::MatSparse<FPP, Cpu> M(nnz, nrows, ncols, data, id_col, row_ptr);
    TransformHelper<FPP,Cpu> *U_ = nullptr,  *V_ = nullptr;
    Faust::Vect<FPP,Cpu> * S_ = nullptr;
-    svdtj(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
+    svdtj(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
    create_svdtj_output(U_, V_, U, V, S, S_);
 }
@@ -155,7 +155,7 @@ void svdtj_cplx(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*start of
    Faust::MatDense<FPP,Cpu> M(M_data, (faust_unsigned_int) num_rows, (faust_unsigned_int) num_cols);
    TransformHelper<FPP,Cpu> *U_ = nullptr,  *V_ = nullptr;
    Faust::Vect<FPP,Cpu> * S_ = nullptr;
-    svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
+    svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
    create_svdtj_output(U_, V_, U, V, S, S_);
 }
@@ -165,7 +165,7 @@ void svdtj_sparse_cplx(FaustCoreCpp<FPP>** U, FaustCoreCpp<FPP> **V, FPP* S, /*s
    Faust::MatSparse<FPP, Cpu> M(nnz, nrows, ncols, data, id_col, row_ptr);
    TransformHelper<FPP,Cpu> *U_ = nullptr,  *V_ = nullptr;
    Faust::Vect<FPP,Cpu> * S_ = nullptr;
-    svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, 1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
+    svdtj_cplx(M, J, t, stoppingError, verbosity, errIsRel, -1 /* order (useless) */, enable_large_Faust, &U_, &V_, &S_);
    create_svdtj_output(U_, V_, U, V, S, S_);
 }