New TransformHelper<FPP,GPU2> class and misc update in other GPU2 classes.

Add TransformHelper<FPP,GPU2> and quick tests (yet to enhance). Refactor TransformHelper<FPP,GPU2> and TransformHelper<FPP,Cpu> into parent class TransformHelperGen (yet to finish). Add Transform::update(), multiply(MatDense<FPP,GPU2>), update get_product, add factor iterator and begin()/end(). Update MatDense<FPP,GPU2>/MatSparse<FPP,GPU2> : move operator overload and ctor, getType. Update MatDense<FPP,GPU2> tests. Fix some Vect functions. Move hpp.in GPU2 files to cpp.in as they are only template specializations (types as variables generated by cmake).

New TransformHelper<FPP,GPU2> class and misc update in other GPU2 classes.
042dcd6c · hhakim · 59467cfe · 042dcd6c · 5da8834a · 1fd7b2e7
Commit 042dcd6c authored 4 years ago by hhakim
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -619,10 +619,12 @@ if(USE_GPU_MOD)
 	set(GM_SCALAR double)
 	set(GM_REINTERPRET_CAST_SCALAR double)
 	configure_file(${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_gen.hpp.in ${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_double.hpp)
-	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu_double.hpp)
-	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu_double.hpp)
-	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu_double.hpp)
-	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu_double.hpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu_double.cpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu_double.cpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu_double.cpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu_double.cpp)
+	#target_sources(${FAUST_TARGET} PUBLIC ${GPU_MOD_CPP_FILES})
+	#list(APPEND CPP_FILES )
 	set(FAUST_SCALAR_FOR_GM complex<double>)
 	set(GM_SCALAR cuDoubleComplex)
 	set(GM_REINTERPRET_CAST_SCALAR double)
@@ -649,12 +651,6 @@ if(FAUST_TORCH)
 	message(STATUS TORCH_LIBRARY=${TORCH_LIBRARY})
 endif()

-# before add Matlab directory, because its includes depend on faust's
-if(USE_GPU_MOD)
-	include_directories(${FAUST_TARGET} ${GPU_MOD_INCLUDE_DIR} ${GPU_MOD_INCLUDE_DIR}/../build)
-	include_directories(${FAUST_TARGET} ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR})
-
-endif()

 ########### add_subdirectory ###############
 if (BUILD_WRAPPER_PYTHON)
@@ -750,12 +746,15 @@ if(BUILD_MULTITHREAD)
 endif()

 if(USE_GPU_MOD)
+	include_directories(${FAUST_TARGET} ${GPU_MOD_INCLUDE_DIR} ${GPU_MOD_INCLUDE_DIR}/../build)
+	include_directories(${FAUST_TARGET} ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR})
 	target_include_directories(${FAUST_TARGET} PUBLIC  ${GPU_MOD_INCLUDE_DIR} ${GPU_MOD_INCLUDE_DIR}/../build)
 	target_link_libraries(${FAUST_TARGET} ${GPU_MOD_LIBS})
 	if(UNIX)
 		target_link_libraries(${FAUST_TARGET} dl)
 	endif(UNIX)
 	install(FILES  ${GPU_MOD_INCLUDE_DIR}/../build/libgm.so DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_EXECUTE WORLD_READ)
+	include_directories(${FAUST_TARGET} ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR})
 endif()

 # In case of GPU, the "CUDA" library libfaust_matrix_cu.a is linked with faust (libfaust_matrix_cu.a is not a OBJECT library) 

--- a/gpu_mod @ 5da8834a
+++ b/gpu_mod @ 5da8834a
-Subproject commit 1fd7b2e7d72c6acd004dcc08b4571ec9c84895d0
+Subproject commit 5da8834a803570fe70b2f2b6435b47da39fb509a
--- a/misc/test/CMakeLists.txt
+++ b/misc/test/CMakeLists.txt
@@ -204,7 +204,7 @@ if(MATIO_LIB_FILE AND MATIO_INC_DIR AND BUILD_READ_MAT_FILE AND NOT NOCPPTESTS)
 	endif()

 	if(USE_GPU_MOD)
-		list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod)
+		list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod)
 	endif()

 	foreach(TEST_FPP float double complex<float> complex<double>)

--- a/misc/test/src/C++/test_matdense_gpu_mod.cpp
+++ b/misc/test/src/C++/test_matdense_gpu_mod.cpp
-#include "faust_MatDense_gpu2.h"
-#include <cstdlib>
-using namespace Faust;
-
-int main(int argc, char** argv)
-{
-	MatDense<FPP,GPU2> mat;
-	return EXIT_SUCCESS;
-}
--- a/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
+++ b/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
@@ -2,6 +2,7 @@
 #include "faust_gpu_mod.h"
 #include "faust_MatDense_gpu.h"
 #include "faust_MatSparse.h"
+#include "faust_MatSparse_gpu.h"
 #include "faust_Vect_gpu.h"
 #include "faust_gpu_mod_utils.h"
 #include "faust_cuda_device.h"
@@ -452,6 +453,17 @@ void test_mul_vec()
 	assert(err < 1e-3);
 }

+void test_gpu_matsparse2matdense()
+{
+	cout << "test MatDense<FPP,GPU2>::MatDense<FPP,GPU2>(MatSparse<FPP,GPU2>&)" << endl;
+	auto sM = Faust::MatSparse<double,Cpu>::randMat(22, 33, .2);
+	Faust::MatSparse<double,GPU2> sM_gpu(*sM);
+	MatDense<double,GPU2> dM_gpu(sM_gpu);
+	cout << sM_gpu.norm() << " " << dM_gpu.norm() << endl;
+	assert(abs(sM_gpu.norm()-dM_gpu.norm())< 1e-4);
+	cout << "OK" << endl;
+}
+
 int main(int argc, char** argv)
 {
 	Faust::enable_gpu_mod();
@@ -475,5 +487,6 @@ int main(int argc, char** argv)
 	test_get_device();
 	test_move();
 	test_mul_vec();
+	test_gpu_matsparse2matdense();
 	return EXIT_SUCCESS;
 }
--- a/misc/test/src/C++/test_transform_helper_gpu_mod.cpp.in
+++ b/misc/test/src/C++/test_transform_helper_gpu_mod.cpp.in
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include "faust_MatDense.h"
+#include "faust_TransformHelper_gpu.h"
+#include "faust_TransformHelper.h"
+#include "faust_gpu_mod_utils.h"
+
+using namespace Faust;
+using namespace std;
+
+int main()
+{
+	Faust::enable_gpu_mod();
+	TransformHelper<double, GPU2> th;
+	auto cpu_mat1 = MatDense<double,Cpu>::randMat(12,24);
+	auto cpu_mat2 = MatDense<double,Cpu>::randMat(24,32);
+	auto cpu_mat3 = Faust::MatSparse<double,Cpu>::randMat(32, 22, .3);
+	MatDense<double,GPU2> gpu_mat1(*cpu_mat1);
+	MatDense<double,GPU2> gpu_mat2(*cpu_mat2);
+	Faust::MatSparse<double,GPU2> gpu_mat3(*cpu_mat3);
+	vector<MatGeneric<double,GPU2>*> gpu_fact_list = {&gpu_mat1, &gpu_mat2, &gpu_mat3};
+	TransformHelper<double, GPU2> th2(gpu_fact_list);
+	int flag;
+	th2.Display();
+	cout << "gpu norm fro:" << th2.normFro() << endl;
+	cout << "gpu spectral norm fro:" << th2.spectralNorm(100, 1e-3, flag) << endl;
+	vector<MatGeneric<double,Cpu>*> cpu_fact_list = {cpu_mat1, cpu_mat2, cpu_mat3};
+	TransformHelper<double, Cpu> th2_cpu(cpu_fact_list);
+	th2_cpu.display();
+	cout << "cpu norm fro:" <<th2_cpu.normFro() << endl;
+	cout << "cpu spectral norm:" <<th2_cpu.spectralNorm(100,1e-3, flag) << endl;
+	for(int i=0;i<th2.size();i++)
+		cout << " fact "<< i<< " is sparse: " << th2_cpu.is_fact_sparse(i)  << " is dense: " << th2_cpu.is_fact_dense(i) << endl;
+	// test update
+	auto cpu_mat4 = MatDense<double,Cpu>::randMat(24,32);
+	auto cpu_mat5 = Faust::MatSparse<double,Cpu>::randMat(32, 22, .3);
+	MatDense<double,GPU2> gpu_mat4(*cpu_mat4);
+	Faust::MatSparse<double,GPU2> gpu_mat5(*cpu_mat5);
+	th2.update(gpu_mat4, 1);
+	th2.update(gpu_mat5, 2);
+	cout << "norm of updated gpu dmat:" << static_cast<MatDense<double,GPU2>*>(th2.get_gen_fact_nonconst(1))->norm() << endl;
+	cout << "norm of updated gpu smat:" << static_cast<Faust::MatSparse<double,GPU2>*>(th2.get_gen_fact_nonconst(2))->norm() << endl;
+	cout << "norm of gpu source dmat:" << gpu_mat4.norm() << endl;
+	cout << "norm of gpu source smat:" << gpu_mat5.norm() << endl;
+	cout << "norm of cpu source dmat:" << cpu_mat4->norm() << endl;
+	cout << "norm of cpu source smat:" << cpu_mat5->norm() << endl;
+	th2.Display();
+	cout << "============ multiply" << endl;
+	auto cpu_mat6 = Faust::MatDense<double,Cpu>::randMat(22, 32);
+	MatDense<double,GPU2> gpu_mat6(*cpu_mat6);
+	auto M_gpu = th2.multiply(gpu_mat6);
+	auto M_cpu = th2_cpu.multiply(*cpu_mat6);
+	cout << "M_cpu.norm() " << M_cpu.norm() << endl;
+	cout << "M_gpu.norm() " << M_gpu.norm() << endl;
+	return EXIT_FAILURE;
+}
+
+
--- a/src/faust_linear_operator/CPU/faust_TransformHelper.h
+++ b/src/faust_linear_operator/CPU/faust_TransformHelper.h
@@ -43,6 +43,7 @@
 #define __FAUST_TRANSFORM_HELPER___

 #include <memory>
+#include "faust_TransformHelperGen.h"
 #include "faust_RefManager.h"
 #include "faust_exception.h"
 #include "faust_Transform.h"
@@ -62,11 +63,9 @@ namespace Faust {

 	template<typename FPP>
 		using transf_iterator = typename Transform<FPP,Cpu>::transf_iterator;
-	template<typename FPP,FDevice DEVICE> class Transform;
-	template<typename FPP,FDevice DEVICE> class Vect;
-	template<typename FPP,FDevice DEVICE> class MatDense;
-	template<typename FPP,FDevice DEVICE> class MatGeneric;
+#ifdef USE_GPU_MOD
 	template<typename FPP> class FaustGPU;
+#endif

 	enum RandFaustType {
 		DENSE,
@@ -80,21 +79,12 @@ namespace Faust {
 	};

 	template<typename FPP>
-		class TransformHelper<FPP,Cpu> {
+		class TransformHelper<FPP,Cpu> : public TransformHelperGen<FPP,Cpu> {
 			static std::default_random_engine generator;
 			static bool seed_init;

-			bool is_transposed;
-			bool is_conjugate;
-			bool is_sliced;
-			Slice slices[2];
-			bool is_fancy_indexed;
 			int mul_order_opt_mode;
 			int Fv_mul_mode;
-			faust_unsigned_int * fancy_indices[2];
-			faust_unsigned_int fancy_num_rows;
-			faust_unsigned_int fancy_num_cols;
-			shared_ptr<Transform<FPP,Cpu>> transform;
 #ifdef FAUST_TORCH
 			std::vector<torch::Tensor> tensor_data;
 #endif
@@ -131,7 +121,7 @@ namespace Faust {
 			TransformHelper<FPP, Cpu>* multiply(FPP& scalar);
 			template<typename Head, typename ... Tail>
 				void push_back_(Head& h, Tail&... t);
-
+//
 			void push_back_();
 			void push_back(const MatGeneric<FPP,Cpu>* M, const bool optimizedCopy=false, const bool copying=true);
            void pop_back();
@@ -191,7 +181,7 @@ namespace Faust {
 			TransformHelper<FPP,Cpu>* horzcat(const TransformHelper<FPP,Cpu>*);
 			bool isTransposed() const;
 			bool isConjugate() const;
-			const char isTransposed2char() const;
+//			const char isTransposed2char() const;
 			double normL1() const;
 			double normFro() const;
 			double normInf() const;

--- a/src/faust_linear_operator/CPU/faust_TransformHelper.hpp
+++ b/src/faust_linear_operator/CPU/faust_TransformHelper.hpp
--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.hpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.hpp.in
@@ -3,6 +3,7 @@
 typedef @FAUST_SCALAR_FOR_GM@ FSFG;
 #endif
 //TODO: move to cpp.in
+#include "faust_MatDense_gpu.h"
 namespace Faust
 {
 	template<>
@@ -42,6 +43,8 @@ namespace Faust
 			gpu_mat = dsm_funcs->togpu_stream(nbRow, nbCol, const_cast<FSFG*>(data), stream);
 		else if(! no_alloc)
 			gpu_mat = dsm_funcs->create(nbRow, nbCol);
+		else
+			gpu_mat = nullptr;
 		gp_funcs->set_dev(cur_dev_id);
 	}

@@ -49,7 +52,8 @@ namespace Faust
 		MatDense<FSFG,GPU2>::~MatDense()
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
-			dsm_funcs->free(gpu_mat);
+			if(gpu_mat != nullptr)
+				dsm_funcs->free(gpu_mat);
 		}

 	template<>
@@ -65,6 +69,8 @@ namespace Faust
 				const int32_t dev_id/*=-1*/,
 				const void* stream/*=nullptr*/) : MatDense<FSFG,GPU2>(mat.getNbRow(), mat.getNbCol(), mat.getData(), /*no_alloc*/ mat.getData() == nullptr, dev_id, stream){}

+
+
 	template<>
 		void Faust::MatDense<FSFG,GPU2>::multiply(MatDense<FSFG, GPU2> &other, const char op_this)
 		{
@@ -151,7 +157,10 @@ namespace Faust
 		void Faust::MatDense<FSFG,GPU2>::resize(const faust_unsigned_int nbRow, const faust_unsigned_int nbCol)
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
-			dsm_funcs->resize(gpu_mat, getNbRow(), getNbCol());
+			if(gpu_mat != nullptr)
+				dsm_funcs->resize(gpu_mat, getNbRow(), getNbCol());
+			else
+				gpu_mat = dsm_funcs->create(nbRow, nbCol);
 #ifndef NDEBUG
 			int32_t new_nrows, new_ncols;
 			dsm_funcs->info(gpu_mat, &new_nrows, &new_ncols);
@@ -159,6 +168,17 @@ namespace Faust
 #endif
 		}

+	template<>
+		Faust::MatDense<FSFG,GPU2>::MatDense(const MatSparse<FSFG,GPU2>& mat) : MatDense<FSFG,GPU2>()
+		{
+			if(mat.get_gpu_mat_ptr() != nullptr)
+			{
+				resize(mat.getNbRow(), mat.getNbCol());
+				auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs(FSFG(0));
+				spm_funcs->copy2dense(mat.get_gpu_mat_ptr(), this->gpu_mat);
+			}
+		}
+
 	template<>
 		void Faust::MatDense<FSFG,GPU2>::setOnes()
 		{
@@ -410,8 +430,9 @@ namespace Faust
 		{

 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
-			auto gpu_mat = dsm_funcs->togpu(getNbRow(), getNbCol(), const_cast<FSFG*>(A.getData()));
-			dsm_funcs->free(this->gpu_mat);
+			auto gpu_mat = dsm_funcs->togpu(A.getNbRow(), A.getNbCol(), const_cast<FSFG*>(A.getData()));
+			if(this->gpu_mat != nullptr)
+				dsm_funcs->free(this->gpu_mat);
 			this->gpu_mat = gpu_mat;
 		}

@@ -423,12 +444,17 @@ namespace Faust
 		}

 	template<>
-		void Faust::MatDense<FSFG, GPU2>::operator=(const MatDense<FSFG, GPU2>& A)
+		Faust::MatDense<FSFG, GPU2>& Faust::MatDense<FSFG, GPU2>::operator=(const MatDense<FSFG, GPU2>& A)
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
-			auto gpu_mat = dsm_funcs->clone(A.gpu_mat);
-			dsm_funcs->free(this->gpu_mat);
-			this->gpu_mat = gpu_mat;
+			if(A.gpu_mat != nullptr)
+			{
+				resize(A.getNbRow(), A.getNbCol());
+				dsm_funcs->copy(A.gpu_mat, this->gpu_mat);
+			}
+			else
+				this->gpu_mat = nullptr;
+			return *this;
 		}

 	template <>
@@ -492,11 +518,48 @@ namespace Faust
 			throw std::runtime_error("gpu_mat is nullptr");
 		}

-		template<typename FSFG>
+	template<>
 		void MatDense<FSFG,GPU2>::multiply(const Vect<FSFG, GPU2>& vec, Vect<FSFG, GPU2>& out_vec) const
 		{

 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
 			dsm_funcs->mul_gpu_dsm_ext(this->gpu_mat, vec.get_gpu_mat_ptr(), out_vec.get_gpu_mat_ptr(), OP_NOTRANSP, OP_NOTRANSP);
 		}
+
+	template<>
+		MatType MatDense<FSFG,GPU2>::getType() const
+		{
+			return Dense;
+		}
+
+	template<>
+		MatDense<FSFG,GPU2>& MatDense<FSFG,GPU2>::operator=(MatDense<FSFG,GPU2>&& mat)
+		{
+			if(this->gpu_mat != nullptr)
+			{
+
+				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
+				dsm_funcs->free(this->gpu_mat);
+			}
+			this->gpu_mat = mat.gpu_mat;
+			mat.gpu_mat = nullptr;
+			return *this;
+		}
+
+	template<>
+		MatDense<FSFG,GPU2>::MatDense(MatDense<FSFG,GPU2>&& mat)
+		{
+			int32_t dev_id = 0;
+			if(this->gpu_mat != nullptr)
+			{
+
+				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
+				dsm_funcs->free(this->gpu_mat);
+			}
+//			dev_id = mat.getDevice();
+//			auto gp_funcs = GPUModHandler::get_singleton()->gp_funcs();
+			this->gpu_mat = mat.gpu_mat;
+			mat.gpu_mat = nullptr;
+		}
+
 };
--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
@@ -3,18 +3,23 @@
 #ifdef USE_GPU_MOD
 #include "faust_MatDense.h"
 #include "faust_MatGeneric_gpu.h"
+#include "faust_Vect_gpu.h"
+#include "faust_MatSparse_gpu.h"
 #include "faust_gpu_mod_utils.h"
+#include <cstdint>
 namespace Faust
 {
 	template <typename FPP>
 	void gemm(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);


-
+	template<typename FPP, FDevice DEVICE>
+		class MatDense;
 	template<typename FPP>
 		class MatDense<FPP, GPU2> : public MatGeneric<FPP,GPU2>
 		{
 			friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
+			friend MatSparse<FPP,GPU2>;
 			friend void gemm<>(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);

 			public:
@@ -28,9 +33,12 @@ namespace Faust
 				MatDense();
 				MatDense(const MatDense<FPP,Cpu>& mat, const int32_t dev_id=-1, const void* stream=nullptr);

+				MatDense(MatDense<FPP,GPU2> && mat);
+				MatDense(const MatSparse<FPP,GPU2> & mat);
 				~MatDense();

-				void operator=(const MatDense<FPP,GPU2> & A);
+				MatDense<FPP,GPU2>& operator=(MatDense<FPP,GPU2> && mat);
+				MatDense<FPP,GPU2>& operator=(const MatDense<FPP,GPU2> & A);
 				void operator=(const MatDense<FPP,Cpu> & A);
 				void operator=(const MatSparse<FPP,Cpu> & A);
 				// *this = *this + A
@@ -88,10 +96,11 @@ namespace Faust
 				MatDense<FPP, Cpu> tocpu(const void* stream=nullptr) const;
 				void Display() const;
 				std::string to_string(const bool transpose=false, const bool displaying_small_mat_elts=false) const;
+				MatType getType() const;
 				int32_t getNbRow() const;
 				int32_t getNbCol() const;
 				faust_unsigned_int getNonZeros() const;
-			private:
+			protected:
 				gm_DenseMat_t gpu_mat;
 				void* get_gpu_mat_ptr() const;
 				void set_gpu_mat_ptr(void*);
@@ -99,6 +108,6 @@ namespace Faust


 }
-#include "faust_MatDense_gpu_double.hpp"
+
 #endif
 #endif
--- a/src/faust_linear_operator/GPU2/faust_MatGeneric_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatGeneric_gpu.h
 #ifndef __FAUST_MATGENERIC_GPU__
 #define __FAUST_MATGENERIC_GPU__
+#include "faust_constant.h"
 namespace Faust
 {
 	template<typename FPP, FDevice DEVICE> class Transform;
 	template<typename FPP, FDevice DEVICE> class MatGeneric;
 	template<typename FPP>
 		class Transform<FPP,GPU2>;
-	//TODO: this class is temporary, ideally MatSparse<FPP,GPU2> and MatDense<FPP,GPU2> should extend the MatGeneric<FPP, Device> class
-	//TODO: keep this class until MatSparse<FPP,GPU2> and MatDense<FPP,GPU2> fully implement the MatGeneric<FPP, Device> methods
 	// The interest of this class is mostly to make Transform capable of storing generic matrix
+	// TODO: this class should extends MatGeneric<FPP,Device>
 	template<typename FPP>
 		class MatGeneric<FPP, GPU2>
 		{
-			friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
+			friend Transform<FPP,GPU2>; // needs access to get_gpu_mat_ptr
 			virtual void set_gpu_mat_ptr(void*)=0;
 			protected:
 				bool is_identity;
 				bool is_zeros;
 			public:
+				virtual MatType getType() const=0;
 				virtual int32_t getNbRow() const=0;
 				virtual int32_t getNbCol() const=0;
 				virtual MatGeneric<FPP,GPU2>* clone(const int32_t dev_id=-1, const void* stream=nullptr) const=0;

--- a/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.hpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.hpp.in
 //TODO: move to cpp.in
+#include "faust_MatSparse_gpu.h"
+#include "faust_MatDense_gpu.h"
+#ifndef _FSFG_
+#define _FSFG_
+typedef @FAUST_SCALAR_FOR_GM@ FSFG;
+#endif
 namespace Faust
 {
 	template<>
@@ -79,10 +85,11 @@ namespace Faust
 	}

 	template<>
-	void MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::operator=(const MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>& mat)
+	MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>& MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::operator=(const MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>& mat)
 	{
 			auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
 			spm_funcs->copy(mat.gpu_mat, gpu_mat);
+			return *this;
 	}

 	template<>
@@ -282,4 +289,50 @@ namespace Faust
 		{
 			this->gpu_mat = gpu_mat;
 		}
+
+		template<>
+			MatType MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>::getType() const
+			{
+				return Sparse;
+			}
+
+	template<>
+		MatSparse<FSFG,GPU2>& MatSparse<FSFG,GPU2>::operator=(MatSparse<FSFG,GPU2>&& mat)
+		{
+			if(this->gpu_mat != nullptr)
+			{
+
+				auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs(FSFG(0));
+				spm_funcs->free(this->gpu_mat);
+			}
+			this->gpu_mat = mat.gpu_mat;
+			mat.gpu_mat = nullptr;
+			return *this;
+		}
+
+		template<>
+			MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>::MatSparse(const MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>& mat)
+			{
+				//TODO: process the copy totally on gpu side
+				if(mat.get_gpu_mat_ptr() != nullptr)
+				{
+					auto cpu_dmat = mat.tocpu();
+					MatSparse<@FAUST_SCALAR_FOR_GM@,Cpu> cpu_smat(cpu_dmat);
+					MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2> gpu_smat(cpu_smat);
+					*this = std::move(gpu_smat);
+				}
+			}
+
+	template<>
+		MatSparse<FSFG,GPU2>::MatSparse(MatSparse<FSFG,GPU2>&& mat)
+		{
+			if(this->gpu_mat != nullptr)
+			{
+
+				auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs(FSFG(0));
+				spm_funcs->free(this->gpu_mat);
+			}
+			this->gpu_mat = mat.gpu_mat;
+			mat.gpu_mat = nullptr;
+		}
 };
--- a/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.h
@@ -4,14 +4,20 @@
 #include "faust_gpu_mod_utils.h"
 #include "faust_constant.h"
 #include "faust_MatGeneric_gpu.h"
+#include "faust_MatSparse.h"
 namespace Faust
 {
+	template<typename FPP, FDevice DEVICE>
+		class MatSparse;
+	template<typename FPP, FDevice DEVICE>
+		class MatDense;

 	template<typename FPP>
 		class MatSparse<FPP, GPU2> : public MatGeneric<FPP,GPU2>
 		{

 			friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
+			friend MatDense<FPP,GPU2>;
 			public:
 				/** \brief Inits from CPU buffers.
 				 *
@@ -37,9 +43,14 @@ namespace Faust
 						const int32_t dev_id=-1,
 						const void* stream=nullptr);

+				MatSparse(const MatDense<FPP,GPU2>& mat);
+
+				MatSparse(MatSparse<FPP,GPU2> && mat);
+				MatSparse<FPP,GPU2>& operator=(MatSparse<FPP,GPU2> && mat);
+
 				MatSparse();

-				void operator=(const MatSparse<FPP, GPU2>& mat);
+				MatSparse<FPP,GPU2>& operator=(const MatSparse<FPP, GPU2>& mat);
 				void operator=(const MatSparse<FPP, Cpu>& mat);
 				void operator*=(const FPP& alpha);
 				void operator/=(const FPP& alpha);
@@ -72,6 +83,7 @@ namespace Faust
 				int32_t getDevice() const;
 				void Display() const;
 				std::string to_string(const bool transpose=false, const bool displaying_small_mat_elts=false) const;
+				MatType getType() const;
 				~MatSparse();

 			private:
@@ -82,6 +94,6 @@ namespace Faust


 };
-#include "faust_MatSparse_gpu_double.hpp"
+
 #endif
 #endif
--- a/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.h
+#ifndef __TRANSFORM_HELPER_GPU2__
+#define __TRANSFORM_HELPER_GPU2__
+#include "faust_constant.h"
+#include "faust_Transform_gpu.h"
+//#include "faust_Transform.h"
+#include "faust_TransformHelperGen.h"
+#include <memory>
+namespace Faust
+{
+	template<typename FPP, FDevice DEVICE> class TransformHelper;
+	template<typename FPP, FDevice DEVICE> class TransformHelperGen;
+	template<typename FPP>
+		class TransformHelper<FPP,GPU2> : public TransformHelperGen<FPP,GPU2>
+		{
+			public:
+				TransformHelper();
+#ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL
+			template<typename ...GList> TransformHelper(GList& ... t);
+#endif
+				void push_back(const MatGeneric<FPP,GPU2>* M, const bool optimizedCopy=false, const bool copying=true);
+				template<typename Head, typename ... Tail>
+					void push_back_(Head& h, Tail&... t);
+				void push_back_();
+				void Display() const;
+				MatDense<FPP,GPU2> get_product();
+				MatDense<FPP,GPU2> multiply(const Faust::MatDense<FPP,GPU2> &A, const bool transpose=false, const bool conjugate=false);
+				Real<FPP> normFro() const;
+				faust_unsigned_int size() const;
+				void update_total_nnz() const;
+				Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag);
+				bool is_fact_sparse(int id) const;
+				bool is_fact_dense(int id) const;
+				MatGeneric<FPP,GPU2>* get_gen_fact_nonconst(const faust_unsigned_int id) const;
+				void update(const MatGeneric<FPP, GPU2>& M, const faust_unsigned_int id);
+		};
+}
+#include "faust_TransformHelper_gpu.hpp"
+#endif
--- a/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.hpp
+namespace Faust
+{
+	template<typename FPP,FDevice DEVICE> class Transform;
+
+	template<typename FPP>
+	TransformHelper<FPP,GPU2>::TransformHelper() : TransformHelperGen<FPP,GPU2>()
+	{
+	}
+
+#ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL
+	template<typename FPP>
+		template<typename ... GList>
+		TransformHelper<FPP,GPU2>::TransformHelper(GList& ... t): TransformHelper<FPP,GPU2>()
+		{
+			this->push_back_(t...);
+		}
+#endif
+
+	template<typename FPP>
+		void TransformHelper<FPP,GPU2>::push_back(const MatGeneric<FPP,GPU2>* M, const bool optimizedCopy/*=false*/, const bool copying/*=true*/)
+		{
+			//optimizedCopy is ignored because not handled yet by Transform<FPP,GPU2> // TODO ? (it's not used by wrappers anyway)
+			this->transform->push_back(M, copying);
+		}
+
+	template<typename FPP>
+		void TransformHelper<FPP,GPU2>::Display() const
+		{
+			this->transform->Display();
+		}
+
+	template<typename FPP>
+		template<typename Head, typename ... Tail>
+		void TransformHelper<FPP,GPU2>::push_back_(Head& h, Tail&... t)
+		{
+			for(auto it=h.begin(); it < h.end(); it++)
+			{
+				auto f = *it;
+				this->push_back(f, false, false);
+			}
+			this->push_back_(t...);
+		}
+
+	template<typename FPP>
+		void TransformHelper<FPP,GPU2>::push_back_()
+		{
+			// do nothing, here just for empty tail of above function
+		}
+
+	template<typename FPP>
+		MatDense<FPP,GPU2> TransformHelper<FPP,GPU2>::get_product()
+		{
+			return this->transform->get_product();
+		}
+
+	template<typename FPP>
+		Real<FPP> TransformHelper<FPP,GPU2>::normFro() const
+		{
+			return this->transform->get_product().norm();
+		}
+
+	template<typename FPP>
+		faust_unsigned_int TransformHelper<FPP,GPU2>::size() const
+		{
+			return this->transform->size();
+		}
+
+	template<typename FPP>
+		void TransformHelper<FPP,GPU2>::update_total_nnz() const
+		{
+			this->transform->update_total_nnz();
+		}
+
+	template<typename FPP>
+		Real<FPP> TransformHelper<FPP,GPU2>::spectralNorm(int32_t nb_iter_max, float threshold, int& flag)
+		{
+			return this->transform->spectralNorm(nb_iter_max, threshold, flag);
+		}
+
+	template<typename FPP>
+		bool TransformHelper<FPP,GPU2>::is_fact_sparse(int id) const
+		{
+			return this->transform->is_fact_sparse(id);
+		}
+
+	template<typename FPP>
+		bool TransformHelper<FPP,GPU2>::is_fact_dense(int id) const
+		{
+			return this->transform->is_fact_dense(id);
+		}
+
+	template<typename FPP>
+		MatGeneric<FPP,GPU2>* TransformHelper<FPP,GPU2>::get_gen_fact_nonconst(const faust_unsigned_int id) const
+		{
+			return this->transform->get_fact(id, false);
+		}
+
+	template<typename FPP>
+		void TransformHelper<FPP,GPU2>::update(const MatGeneric<FPP, GPU2>& M,const faust_unsigned_int id)
+		{
+			return this->transform->update(M, id);
+		}
+
+	template<typename FPP>
+		MatDense<FPP,GPU2> TransformHelper<FPP,GPU2>::multiply(const Faust::MatDense<FPP,GPU2> &A, const bool transpose /* deft to false */, const bool conjugate)
+		{
+			this->is_transposed ^= transpose;
+			this->is_conjugate ^= conjugate;
+			MatDense<FPP,GPU2> M = this->transform->multiply(A, this->isTransposed2char());
+			this->is_transposed ^= transpose;
+			this->is_conjugate ^= conjugate;
+			return M;
+		}
+}
--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.hpp.in
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.hpp.in
+#include "faust_Transform_gpu.h"
 namespace Faust
 {

@@ -47,6 +48,8 @@ namespace Faust
 				marr_funcs->addgpu_anymat(gpu_mat_arr, M->get_gpu_mat_ptr());
 		}

+
+
 	template<>
 		int32_t Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::size() const
 		{
@@ -80,6 +83,29 @@ namespace Faust
 			return M;
 		}

+	template<>
+		void Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::update(const MatGeneric<@FAUST_SCALAR_FOR_GM@, GPU2>& M, const faust_unsigned_int id)
+		{
+			auto fact = get_fact(id, false);
+			auto fact_type = fact->getType();
+			if(M.getType() != fact_type)
+				throw std::runtime_error("The factor matrix to update is not of the same type (dense or sparse) as the input matrix.");
+			if(fact_type == Dense)
+			{
+				// fact to update is dense
+				auto dfact = dynamic_cast<MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>*>(fact);
+				auto dM = dynamic_cast<const MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>*>(&M);
+				*dfact = *dM;
+			}
+			else
+			{
+				// fact to update is sparse
+				auto sfact = dynamic_cast<MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>*>(fact);
+				auto sM = dynamic_cast<const MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>*>(&M);
+				*sfact = *sM;
+			}
+		}
+
 	template<>
 		void Transform<@FAUST_SCALAR_FOR_GM@, GPU2>::get_facts(std::vector<MatGeneric<@FAUST_SCALAR_FOR_GM@,GPU2>*> &factors, bool cloning_facts/*=true*/) const
 		{
@@ -185,12 +211,32 @@ namespace Faust


 	template<>
-		MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_product() const
+		void Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_product(MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>& M, const char opThis/*='N'*/, const bool isConj/*=false*/) const
 		{
 			auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
-			auto gpu_mat = marr_funcs->chain_matmul_one(gpu_mat_arr, OP_NOTRANSP);
-			MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> M;
+			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			gm_Op op;
+			if(opThis == 'N')
+				op = OP_NOTRANSP;
+			else if(opThis == 'T')
+				op = OP_TRANSP;
+			else if(opThis == 'H')
+				op = OP_CONJTRANSP;
+			else
+				throw std::runtime_error("Invalid opThis");
+			auto gpu_mat = marr_funcs->chain_matmul_one(gpu_mat_arr, op);
+			if(M.gpu_mat != nullptr)
+				dsm_funcs->free(M.gpu_mat);
+			//TODO: rather to delete use a marr_funcs function that allows to pass a pre-allocated output buffer
 			M.gpu_mat = gpu_mat;
+			if(isConj && opThis != 'H') M.conjugate();
+		}
+
+	template<>
+		MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_product(const char opThis/*='N'*/, const bool isConj/*=false*/) const
+		{
+			MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> M;
+			this->get_product(M, opThis, isConj);
 			return M;
 		}

@@ -272,4 +318,84 @@ namespace Faust
 				}
 			}
 		}
+
+	template<>
+		bool Faust::Transform<@FAUST_SCALAR_FOR_GM@, GPU2>::is_fact_sparse(int id) const
+		{
+			return get_fact(id)->getType() == Sparse;
+
+		}
+
+	template<>
+		bool Faust::Transform<@FAUST_SCALAR_FOR_GM@, GPU2>::is_fact_dense(int id) const
+		{
+			return get_fact(id)->getType() == Dense;
+		}
+
+	template<>
+		MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> &A, const char opThis)
+		{
+			auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			gm_Op op;
+			if(A.gpu_mat == nullptr)
+				throw std::runtime_error("MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> argument is not initialized.");
+			if(gpu_mat_arr == nullptr)
+				throw std::runtime_error("No factors in Transform.");
+			int32_t out_nrows = this->getNbCol(), out_ncols = A.getNbCol(); // transpose/adjoint case
+			if(opThis == 'N')
+			{
+				op = OP_NOTRANSP;
+				out_nrows = getNbRow();
+				out_ncols = getNbCol();
+			}
+			else if(opThis == 'T')
+				op = OP_TRANSP;
+			else if(opThis == 'H')
+				op = OP_CONJTRANSP;
+			else
+				throw std::runtime_error("Invalid opThis");
+			MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
+			out.gpu_mat = marr_funcs->chain_matmul_by_dsm_one(this->gpu_mat_arr, op, A.gpu_mat);
+			return out;
+		}
+
+	template<>
+		MatGeneric<@FAUST_SCALAR_FOR_GM@,GPU2>* Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::operator*() const
+		{
+			return container.get_fact(index, /*cloning_fact*/ false);
+		}
+
+	template<>
+		Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator& Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::operator++()
+		{
+			index++;
+			return *this;
+		}
+
+	template<>
+		Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::operator++(int)
+		{
+			iterator copy(*this);
+			(*this)++;
+			return copy;
+		}
+
+	template<>
+		Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::iterator(const Transform<@FAUST_SCALAR_FOR_GM@, GPU2>& container, size_t index) : index(index), container(container)
+		{
+		}
+
+	template<>
+		Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::begin() const
+		{
+			return Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator(*this, 0);
+		}
+
+	template<>
+		Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::end() const
+		{
+			return Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator(*this, size());
+		}
+
+
 }
--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.h
@@ -4,44 +4,71 @@
 #include "faust_gpu_mod_utils.h"
 #include "faust_constant.h"
 #include "faust_MatGeneric_gpu.h"
+#include "faust_MatSparse_gpu.h"
 #include "faust_MatDense_gpu.h"
 #include <vector>

 namespace Faust
 {
-	template<typename FPP, FDevice DEVICE> class Transform;

 	template<typename FPP>
 		class Transform<FPP,GPU2>
 		{
 			gm_MatArray_t gpu_mat_arr;
 			public:
-				Transform();
-				Transform(const std::vector<MatGeneric<FPP,GPU2>*> &factors);
-				Transform(const Transform<FPP,GPU2>& t);
-				~Transform();
-				void operator=(const Transform<FPP,GPU2>& t);
-				void push_back(const MatGeneric<FPP,GPU2>*, bool copying=true);
-				void push_first(const MatGeneric<FPP,GPU2>*, bool copying=true);
-				void pop_front();
-				void pop_back();
-				void clear();
-				MatGeneric<FPP,GPU2>* get_fact(int32_t id, bool cloning_fact=true) const;
-				void get_facts(std::vector<MatGeneric<FPP,GPU2>*> &factors, bool cloning_facts=true) const;
-				void transpose();
-				int32_t getNbRow()const;
-				int32_t getNbCol()const;
-				void Display() const;
-				int32_t size() const;
-				faust_unsigned_int get_total_nnz() const;
-				void update_total_nnz() const;
-				void scalarMultiply(const FPP& alpha);
-				MatDense<FPP,GPU2> get_product() const;
-				void multiply(const Transform<FPP,GPU2> & A);
-				void multiplyLeft(const Transform<FPP,GPU2> & A);
-				Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag);
+			Transform();
+			Transform(const std::vector<MatGeneric<FPP,GPU2>*> &factors);
+			Transform(const Transform<FPP,GPU2>& t);
+			~Transform();
+			void operator=(const Transform<FPP,GPU2>& t);
+			void push_back(const MatGeneric<FPP,GPU2>*, bool copying=true);
+			void push_first(const MatGeneric<FPP,GPU2>*, bool copying=true);
+			void pop_front();
+			void pop_back();
+			void clear();
+			void update(const MatGeneric<FPP, GPU2>& M, const faust_unsigned_int id);
+			MatGeneric<FPP,GPU2>* get_fact(int32_t id, bool cloning_fact=true) const;
+			void get_facts(std::vector<MatGeneric<FPP,GPU2>*> &factors, bool cloning_facts=true) const;
+			bool is_fact_sparse(int id) const;
+			bool is_fact_dense(int id) const;
+			void transpose();
+			int32_t getNbRow()const;
+			int32_t getNbCol()const;
+			void Display() const;
+			int32_t size() const;
+			faust_unsigned_int get_total_nnz() const;
+			void update_total_nnz() const;
+			void scalarMultiply(const FPP& alpha);
+			MatDense<FPP,GPU2> get_product(const char opThis='N', const bool isConj=false) const;
+			void get_product(MatDense<FPP,GPU2>& M, const char opThis='N', const bool isConj=false) const;
+			MatDense<FPP,GPU2> multiply(const Faust::MatDense<FPP,GPU2> &A, const char opThis);
+			void multiply(const Transform<FPP,GPU2> & A);
+			void multiplyLeft(const Transform<FPP,GPU2> & A);
+			Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag);
+//			using transf_iterator = typename std::vector<Faust::MatGeneric<FPP,Cpu>*>::const_iterator;
+//
+//			transf_iterator begin() const;
+//
+//			transf_iterator end() const;
+			public:
+			class iterator : public std::iterator<std::output_iterator_tag, MatGeneric<FPP,GPU2>*>
+			{
+				public:
+					explicit iterator(const Transform<FPP, GPU2>& container, size_t index = 0);
+					MatGeneric<FPP,GPU2>* operator*() const;
+					iterator & operator++();
+					//post-increment op
+					iterator operator++(int);
+				private:
+					size_t index;
+					const Transform<FPP, GPU2> & container;
+			};
+
+			Transform<FPP,GPU2>::iterator begin() const;
+			Transform<FPP,GPU2>::iterator end() const;
+
 		};
 }
-#include "faust_Transform_gpu_double.hpp"
+
 #endif
 #endif
--- a/src/faust_linear_operator/GPU2/faust_Vect_gpu.hpp.in
+++ b/src/faust_linear_operator/GPU2/faust_Vect_gpu.hpp.in
+#include "faust_Vect_gpu.h"
 #ifndef _FSFG_
 #define _FSFG_
 typedef @FAUST_SCALAR_FOR_GM@ FSFG;
@@ -6,12 +7,12 @@ typedef @FAUST_SCALAR_FOR_GM@ FSFG;
 namespace Faust
 {

-	template<typename FSFG>
+	template<>
 		Vect<FSFG,GPU2>::Vect():MatDense<FSFG,GPU2>()
 	{
 	}

-	template<typename FSFG>
+	template<>
 		Vect<FSFG,GPU2>::Vect(const faust_unsigned_int size,
 				const FSFG* cpu_data,
 				const bool no_alloc,
@@ -20,19 +21,19 @@ namespace Faust
 	{
 	}

-	template<typename FSFG>
+	template<>
 	faust_unsigned_int Vect<FSFG,GPU2>::size() const
 	{
 		return this->getNbRow();
 	}

-	template<typename FSFG>
-		void resize(const faust_unsigned_int size)
+	template<>
+		void Vect<FSFG,GPU2>::resize(const faust_unsigned_int size)
 		{
 			MatDense<FSFG, GPU2>::resize(size, 1);
 		}

-	template<typename FSFG>
+	template<>
 		void Vect<FSFG,GPU2>::operator=(const Vect<FSFG,GPU2> & v)
 		{
 			if(size() == v.size())
@@ -41,7 +42,7 @@ namespace Faust
 				throw std::runtime_error("Dimensions must agree.");
 		}

-	template<typename FSFG>
+	template<>
 		void Vect<FSFG,GPU2>::operator=(const Vect<FSFG,Cpu> & v)
 		{
 			if(size() == v.size())
@@ -55,7 +56,7 @@ namespace Faust
 				throw std::runtime_error("Dimensions must agree.");
 		}

-	template<typename FSFG>
+	template<>
 		FSFG Vect<FSFG,GPU2>::max()
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
@@ -64,7 +65,7 @@ namespace Faust
 			return max_coeff;
 		}

-		template<typename FSFG>
+		template<>
 		FSFG Vect<FSFG,GPU2>::min()
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
@@ -73,8 +74,8 @@ namespace Faust
 			return min_coeff;
 		}

-		template<typename FSFG>
-		FSFG Vect<FSFG,GPU2>::dot(const Vect<FSFG,Cpu> &v)
+		template<>
+		FSFG Vect<FSFG,GPU2>::dot(const Vect<FSFG,GPU2> &v)
 		{
 			FSFG alpha;
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
@@ -82,7 +83,7 @@ namespace Faust
 			return alpha;
 		}

-		template<typename FSFG>
+		template<>
 		FSFG Vect<FSFG,GPU2>::sum() const
 		{
 			FSFG s;
@@ -91,66 +92,66 @@ namespace Faust
 			return s;
 		}

-		template<typename FSFG>
+		template<>
 			FSFG Vect<FSFG,GPU2>::mean() const
 			{
 				return sum()/size();
 			}

-		template<typename FSFG>
+		template<>
 		void Vect<FSFG,GPU2>::operator*=(const Vect<FSFG,GPU2> &v)
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-			dsm_funcs->elt_wise_mul=(this->gpu_mat, v.gpu_mat);
+			dsm_funcs->elt_wise_mul(this->gpu_mat, v.gpu_mat);
 		}

-		template<typename FSFG>
+		template<>
 		void Vect<FSFG,GPU2>::operator/=(const Vect<FSFG,GPU2> &v)
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-			dsm_funcs->elt_wise_div=(this->gpu_mat, v.gpu_mat);
+			dsm_funcs->elt_wise_div(this->gpu_mat, v.gpu_mat);
 		}

-		template<typename FSFG>
-			void Vect<FSFG,GPU2>::operator==(const Vect<FSFG,GPU2> &v) const
+		template<>
+			bool Vect<FSFG,GPU2>::operator==(const Vect<FSFG,GPU2> &v) const
 			{
 				return this->get_gpu_mat_ptr() == v.get_gpu_mat_ptr();
 			}

-		template<typename FSFG>
-			void Vect<FSFG,GPU2>::operator!=(const Vect<FSFG,GPU2> &v) const
+		template<>
+			bool Vect<FSFG,GPU2>::operator!=(const Vect<FSFG,GPU2> &v) const
 			{
-				return ! (*this)==v;
+				return ! (*this==v);
 			}

-		template<typename FSFG>
+		template<>
 			Vect<FSFG,Cpu> Vect<FSFG,GPU2>::tocpu(const void* stream/*=nullptr*/) const
 			{
 				auto matvec = MatDense<FSFG,GPU2>::tocpu(stream);
 				return Vect<FSFG, Cpu>(size(), matvec.getData());
 			}

-		template<typename FSFG>
+		template<>
 			void Vect<FSFG,GPU2>::Display() const
 			{
 				Vect<FSFG, Cpu> v = tocpu();
 				v.Display();
 			}

-		template<typename FSFG>
+		template<>
 			void Vect<FSFG,GPU2>::setValues(const FSFG& val)
 			{
 				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-				dsm_funcs->setval(this->gpu_mat, reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&val));
+				dsm_funcs->setval(this->gpu_mat, reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(const_cast<FSFG*>(&val)));
 			}

-		template<typename FSFG>
+		template<>
 			FSFG Vect<FSFG,GPU2>::mean_relative_error(const Vect<FSFG,GPU2>& ref_vec) const
 			{

 				FSFG e;
 				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-				dsm_funcs->mean_relerr(this->get_gpu_mat_ptr(), ref_vec->get_gpu_mat_ptr(), reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&e));
+				dsm_funcs->mean_relerr(this->get_gpu_mat_ptr(), ref_vec.get_gpu_mat_ptr(), reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&e));
 			}



--- a/src/faust_linear_operator/GPU2/faust_Vect_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_Vect_gpu.h
@@ -28,11 +28,11 @@ namespace Faust
 			void operator=(const Vect<FPP,Cpu> &v);
 			void operator*=(const Vect<FPP,GPU2> &v);
 			void operator/=(const Vect<FPP,GPU2> &v);
-			void operator==(const Vect<FPP,GPU2> &v)const;
-			void operator!=(const Vect<FPP,GPU2> &v)const;
+			bool operator==(const Vect<FPP,GPU2> &v)const;
+			bool operator!=(const Vect<FPP,GPU2> &v)const;
 			FPP max();
 			FPP min();
-			FPP dot(const Vect<FPP,Cpu> &v);
+			FPP dot(const Vect<FPP,GPU2> &v);
 			FPP sum() const;
 			FPP mean() const;
 			void Display() const;
@@ -43,5 +43,5 @@ namespace Faust
 			void setEyes() = delete;
 		};
 }
-#include "faust_Vect_gpu_double.hpp"
+
 #endif
--- a/src/faust_linear_operator/faust_TransformHelperGen.h
+++ b/src/faust_linear_operator/faust_TransformHelperGen.h
+#ifndef __FAUST_TRANSFORM_HELPER_DEVICE__
+#define __FAUST_TRANSFORM_HELPER_DEVICE__
+#include "faust_Slice.h"
+#include <memory>
+
+namespace Faust
+{
+	template<typename FPP,FDevice DEVICE> class Transform;
+	template<typename FPP,FDevice DEVICE> class TransformHelper;
+	template<typename FPP,FDevice DEVICE> class Vect;
+	template<typename FPP,FDevice DEVICE> class MatDense;
+	template<typename FPP,FDevice DEVICE> class MatGeneric;
+
+	template<typename FPP, FDevice DEV>
+	class TransformHelperGen
+	{
+		public:
+		TransformHelperGen();
+#ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL
+		template<typename ...GList> TransformHelperGen(GList& ... t);
+#endif
+
+		virtual void push_back(const MatGeneric<FPP,DEV>* M, const bool optimizedCopy=false, const bool copying=true)=0;
+
+		const char isTransposed2char() const;
+
+		protected:
+			bool is_transposed;
+			bool is_conjugate;
+			bool is_sliced;
+			Slice slices[2];
+			bool is_fancy_indexed;
+			faust_unsigned_int * fancy_indices[2];
+			faust_unsigned_int fancy_num_rows;
+			faust_unsigned_int fancy_num_cols;
+			std::shared_ptr<Transform<FPP,DEV>> transform;
+	};
+}
+#include "faust_TransformHelperGen.hpp"
+#endif
+
+