Adjust GPU2 Faust::MatDense, Faust::MatSparse and Faust::Vect to support...

Adjust GPU2 Faust::MatDense, Faust::MatSparse and Faust::Vect to support complex scalars (updating to gpu_mod@3920c368).

Adjust GPU2 Faust::MatDense, Faust::MatSparse and Faust::Vect to support...
fed57eb3 · hhakim · 64642111 · fed57eb3 · 3920c368 · ed251cfa
Commit fed57eb3 authored 4 years ago by hhakim
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -605,6 +605,7 @@ include(CMake/findExternalLibs.cmake)
 #########################################
 if(USE_GPU_MOD)
+	# GPU2
 	find_package(GPU_MOD) #REQUIRED)
 	if(NOT GPU_MOD_INCLUDE_DIR)
 		set(GPU_MOD_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/gpu_mod/src) # fallback to submodule directory in case find_package didn't work
@@ -620,9 +621,14 @@ if(USE_GPU_MOD)
 	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu_double.cpp)
 	#target_sources(${FAUST_TARGET} PUBLIC ${GPU_MOD_CPP_FILES})
 	#list(APPEND CPP_FILES )
-	set(FAUST_SCALAR_FOR_GM complex<double>)
+	set(FAUST_SCALAR_FOR_GM std::complex<double>)
 	set(GM_SCALAR cuDoubleComplex)
 	set(GM_REINTERPRET_CAST_SCALAR double)
+	#configure_file(${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_gen.hpp.in ${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_complexdouble.hpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu_complexdouble.cpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu_complexdouble.cpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu_complexdouble.cpp)
+	configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu_complexdouble.cpp)
 	configure_file(${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_gen.hpp.in ${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_complexdouble.hpp)
 	# set install relative path of the lib (for each OS)
 	if(APPLE)
@@ -634,7 +640,6 @@ if(USE_GPU_MOD)
 	endif()
 	file(TO_NATIVE_PATH lib/${GPU_MOD_LIB} GPU_MOD_LIB_PATH) # used in setup.py
 	file(TO_NATIVE_PATH ${CMAKE_INSTALL_PREFIX}/${GPU_MOD_LIB} GPU_MOD_LIB_INSTALL_PATH) # only for system packages (not for pip package)
-	# GPU2
 endif()

--- a/gpu_mod @ 3920c368
+++ b/gpu_mod @ 3920c368
-Subproject commit ed251cfad23484794e02500f1e413b86eb58eb85
+Subproject commit 3920c368b6d21dd5b4f7d6b86535bfee96518357
--- a/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
+++ b/misc/test/src/C++/test_matdense_gpu_mod.cpp.in
@@ -8,10 +8,13 @@
 #include "faust_cuda_device.h"
 #include <cstdlib>
 #include <cmath>
+typedef @TEST_FPP@ FPP;
 using namespace std;
 using namespace Faust;
-double calc_err(MatDense<double, GPU2> &gpu_mat, MatDense<double, Cpu> &cpu_mat)
+double calc_err(MatDense<FPP, GPU2> &gpu_mat, MatDense<FPP, Cpu> &cpu_mat)
 {
 	auto gpu_mat_to_cpu = gpu_mat.tocpu();
 	auto err_diff = cpu_mat;
@@ -26,10 +29,10 @@ void test_mul_gpu_dense()
 {
 	faust_unsigned_int nrows = 1024, ncols = 1024;
 	faust_unsigned_int nrows2 = 1024, ncols2 = 1024;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
-	auto cpu_mat2 = Faust::MatDense<double,Cpu>::randMat(nrows2,ncols2);
+	auto cpu_mat2 = Faust::MatDense<FPP,Cpu>::randMat(nrows2,ncols2);
-	MatDense<double,GPU2> gpu_mat2(nrows2, ncols2, cpu_mat2->getData());
+	MatDense<FPP,GPU2> gpu_mat2(nrows2, ncols2, cpu_mat2->getData());
 	// test MatDense<FPP,GPU2> * MatDense<FPP,GPU2>
 	cout << "Mul. GPUDense*GPUDense in GPUDense" << endl;
 	gpu_mat1.multiply(gpu_mat2);
@@ -50,7 +53,7 @@ void test_mul_gpu_dense()
 	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
 	cout << "OK" << endl;
 	cout << "Mul. GPUDense*CPUSparse in CPUDense" << endl;
-	Faust::MatSparse<double,Cpu> cpu_mat2_sparse(*cpu_mat2);
+	Faust::MatSparse<FPP,Cpu> cpu_mat2_sparse(*cpu_mat2);
 //	cout << cpu_mat2_sparse.to_string(false, true) << endl;
 //	cout << cpu_mat2->to_string(false, true) << endl;
 	gpu_mat1.multiply(cpu_mat2_sparse, cpu_mat1_mat2_test);
@@ -63,7 +66,7 @@ void test_mul_gpu_dense()
 	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
 	cout << "OK" << endl;
 	cout << "Mul. GPUDense*CPUSparse in GPUDense" << endl;
-	MatDense<double, GPU2> gpu_mat1_mat2_test(nrows, ncols2);
+	MatDense<FPP, GPU2> gpu_mat1_mat2_test(nrows, ncols2);
 	gpu_mat1.multiply(cpu_mat2_sparse, gpu_mat1_mat2_test);
 	auto gpu_mat1_mat2_test_to_cpu = gpu_mat1_mat2_test.tocpu();
 	err_diff = gpu_mat1_mat2_test_to_cpu;
@@ -71,8 +74,8 @@ void test_mul_gpu_dense()
 	cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
 	cout << "OK" << endl;
 	cout << "MatDense*cpu_vec" << endl;
-	Faust::Vect<double, Cpu>* vec = Faust::Vect<double,Cpu>::rand(cpu_mat1->getNbCol());
+	Faust::Vect<FPP, Cpu>* vec = Faust::Vect<FPP,Cpu>::rand(cpu_mat1->getNbCol());
-	Faust::Vect<double, Cpu> vec_copy(*vec);
+	Faust::Vect<FPP, Cpu> vec_copy(*vec);
 	cout << vec->norm() << endl;
 	cout << vec_copy.norm() << endl;
 	gpu_mat1.multiply(*vec);
@@ -94,10 +97,10 @@ void test_mul_eq_op()
 	// convert the two cpu mats to gpu mats
 	faust_unsigned_int nrows = 1024, ncols = 1024;
 	faust_unsigned_int nrows2 = 1024, ncols2 = 784;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
-	auto cpu_mat2 = Faust::MatDense<double,Cpu>::randMat(nrows2,ncols2);
+	auto cpu_mat2 = Faust::MatDense<FPP,Cpu>::randMat(nrows2,ncols2);
-	MatDense<double,GPU2> gpu_mat2(nrows2, ncols2, cpu_mat2->getData());
+	MatDense<FPP,GPU2> gpu_mat2(nrows2, ncols2, cpu_mat2->getData());
 	// test *=
 	*cpu_mat1 *= *cpu_mat2;
 	gpu_mat1 *= *cpu_mat2;
@@ -109,9 +112,9 @@ void test_mul_spm()
 {
 	cout << "test_mul_spm()" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 784;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	Faust::MatSparse<double, Cpu> *cpu_mat2_sp = Faust::MatSparse<double, Cpu>::randMat(ncols, nrows, .2);
+	Faust::MatSparse<FPP, Cpu> *cpu_mat2_sp = Faust::MatSparse<FPP, Cpu>::randMat(ncols, nrows, .2);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	gpu_mat1.multiplyLeft(*cpu_mat2_sp);
 	cpu_mat1->multiplyLeft(*cpu_mat2_sp);
 //	cout << gpu_mat1.tocpu().to_string(false, true) << endl;
@@ -125,8 +128,8 @@ void test_resize()
 {
 	cout << "Test resize()" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 784;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	gpu_mat1.resize(45, 12);
 	cout << "OK" << endl;
 }
@@ -135,8 +138,8 @@ void test_setones()
 {
 	cout << "Test setones()" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 784;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols);
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols);
 	gpu_mat1.setOnes();
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
 //	cout << gpu_mat1_to_cpu.to_string(false, true) << endl;
@@ -151,8 +154,8 @@ void test_setzeros()
 {
 	cout << "Test setzeros()" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 784;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols);
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols);
 	gpu_mat1.setZeros();
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
 //	cout << gpu_mat1_to_cpu.to_string(false, true) << endl;
@@ -164,8 +167,8 @@ void test_seteyes()
 {
 	cout << "Test seteyes()" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols);
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols);
 	gpu_mat1.setEyes();
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
 	cout << gpu_mat1_to_cpu.to_string(false, true) << endl;
@@ -180,8 +183,8 @@ void test_clone()
 {
 	cout << "Test clone()" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
 	auto gpu_mat1_clone = gpu_mat1.clone();
 	auto gpu_mat1_clone_to_cpu = gpu_mat1_clone->tocpu();
@@ -198,8 +201,8 @@ void test_transpose()
 {
 	cout << "Test transpose" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 //	cout << cpu_mat1->to_string(false, true) << endl;
 	cpu_mat1->transpose();
 	gpu_mat1.transpose();
@@ -216,8 +219,8 @@ void test_adjoint()
 {
 	cout << "Test adjoint" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 //	cout << cpu_mat1->to_string(false, true) << endl;
 	cpu_mat1->adjoint();
 	gpu_mat1.adjoint();
@@ -234,8 +237,8 @@ void test_conjugate()
 {
 	cout << "Test conjugate" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 //	cout << cpu_mat1->to_string(false, true) << endl;
 	cpu_mat1->conjugate();
 	gpu_mat1.conjugate();
@@ -253,8 +256,8 @@ void test_norm()
 	cout << "test spectral norm" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
 	int flag;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	auto gpu_norm = gpu_mat1.spectralNorm(1000, (float)1e-6);
 	auto cpu_norm = cpu_mat1->spectralNorm(1000, 1e-6, flag);
 //	cout << gpu_norm << endl;
@@ -269,7 +272,7 @@ void test_norm()
 	cout << "test l1 norm" << endl;
 	gpu_norm = gpu_mat1.normL1();
 	cpu_norm = cpu_mat1->normL1();
-//	cout << "gpu: " << gpu_norm << " cpu: " << cpu_norm << endl;
+	cout << "gpu: " << gpu_norm << " cpu: " << cpu_norm << endl;
 	assert(abs(cpu_norm-gpu_norm) < 1e-6);
 	cout << "OK" << endl;
 }
@@ -278,8 +281,8 @@ void test_normalize()
 {
 	cout << "test normalize" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	cpu_mat1->normalize();
 	gpu_mat1.normalize();
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
@@ -294,9 +297,9 @@ void test_scalar_mul()
 {
 	cout << "test scalar mul" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
-	auto lambda = 12.;
+	auto lambda = FPP(12.);
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	cpu_mat1->scalarMultiply(lambda);
 	gpu_mat1.scalarMultiply(lambda);
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
@@ -311,10 +314,10 @@ void test_abs()
 	cout << "test abs" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
 	auto lambda = 12.;
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	cpu_mat1->scalarMultiply(-1);
+	cpu_mat1->scalarMultiply(FPP(-1));
 	cout << cpu_mat1->to_string(false, true) << endl;
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	cpu_mat1->abs();
 	gpu_mat1.abs();
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
@@ -331,10 +334,10 @@ void test_add()
 	cout << "test add" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
 	auto lambda = 12.;
-	// test MatDense<double, GPU2> + MatDense<double, Cpu>
+	// test MatDense<FPP, GPU2> + MatDense<FPP, Cpu>
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	auto cpu_mat2 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat2 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	cpu_mat1->add(*cpu_mat2);
 	gpu_mat1.add(*cpu_mat2);
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
@@ -342,8 +345,8 @@ void test_add()
 	err_diff -= gpu_mat1_to_cpu;
 	auto err = err_diff.norm()/cpu_mat1->norm();
 	assert(err < 1e-6);
-	// test MatDense<double, GPU2> + MatSparse<double, Cpu>
+	// test MatDense<FPP, GPU2> + MatSparse<FPP, Cpu>
-	Faust::MatSparse<double, Cpu> *cpu_mat3_sp = Faust::MatSparse<double, Cpu>::randMat(nrows, ncols, .2);
+	Faust::MatSparse<FPP, Cpu> *cpu_mat3_sp = Faust::MatSparse<FPP, Cpu>::randMat(nrows, ncols, .2);
 //	cpu_mat3_sp->Display();
 //	cout << cpu_mat3_sp->to_string(false, true) << endl;
 	assert(calc_err(gpu_mat1, *cpu_mat1) < 1e-6);
@@ -364,10 +367,10 @@ void test_sub()
 	cout << "test sub" << endl;
 	faust_unsigned_int nrows = 1024, ncols = 1024;
 	auto lambda = 12.;
-	// test MatDense<double, GPU2> + MatDense<double, Cpu>
+	// test MatDense<FPP, GPU2> + MatDense<FPP, Cpu>
-	auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat1 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	auto cpu_mat2 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat2 = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
+	MatDense<FPP,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
 	cpu_mat1->sub(*cpu_mat2);
 	gpu_mat1.sub(*cpu_mat2);
 	auto gpu_mat1_to_cpu = gpu_mat1.tocpu();
@@ -375,10 +378,10 @@ void test_sub()
 	err_diff -= gpu_mat1_to_cpu;
 	auto err = err_diff.norm()/cpu_mat1->norm();
 	assert(err < 1e-6);
-	// test MatDense<double, GPU2> + MatSparse<double, Cpu>
+	// test MatDense<FPP, GPU2> + MatSparse<FPP, Cpu>
 	return;
 	//TODO: fix below test error
-	Faust::MatSparse<double, Cpu> *cpu_mat3_sp = Faust::MatSparse<double, Cpu>::randMat(nrows, ncols, .2);
+	Faust::MatSparse<FPP, Cpu> *cpu_mat3_sp = Faust::MatSparse<FPP, Cpu>::randMat(nrows, ncols, .2);
 //	cpu_mat3_sp->Display();
 //	cout << cpu_mat3_sp->to_string(false, true) << endl;
 	assert(calc_err(gpu_mat1, *cpu_mat1) < 1e-6);
@@ -399,8 +402,8 @@ void test_get_device()
 	cout << "test MatDense<FPP,GPU2>::getDevice" << endl;
 	auto nrows = 5;
 	auto ncols = 8;
-	auto cpu_ds_mat = Faust::MatDense<double, Cpu>::randMat(nrows, ncols);
+	auto cpu_ds_mat = Faust::MatDense<FPP, Cpu>::randMat(nrows, ncols);
-	Faust::MatDense<double, GPU2> gpu_ds_mat(nrows, ncols, cpu_ds_mat->getData());
+	Faust::MatDense<FPP, GPU2> gpu_ds_mat(nrows, ncols, cpu_ds_mat->getData());
 	auto dev_id = gpu_ds_mat.getDevice();
 	cout << "device id: " << dev_id << endl;
 	delete cpu_ds_mat;
@@ -411,8 +414,8 @@ void test_move()
 	cout << "test MatDense<FPP,GPU2>::move" << endl;
 	auto nrows = 5;
 	auto ncols = 8;
-	auto cpu_ds_mat = Faust::MatDense<double, Cpu>::randMat(nrows, ncols);
+	auto cpu_ds_mat = Faust::MatDense<FPP, Cpu>::randMat(nrows, ncols);
-	Faust::MatDense<double, GPU2> gpu_ds_mat(nrows, ncols, cpu_ds_mat->getData());
+	Faust::MatDense<FPP, GPU2> gpu_ds_mat(nrows, ncols, cpu_ds_mat->getData());
 	auto ndevs = Faust::count_devices();
 	cout << "device count: " << ndevs << endl;
 	int32_t dest_dev = -1;
@@ -439,12 +442,12 @@ void test_mul_vec()
 	auto size = 2048;
 	auto nrows = 1024;
 	auto ncols = size;
-	auto vec_mat = MatDense<double, Cpu>::randMat(size, 1);
+	auto vec_mat = MatDense<FPP, Cpu>::randMat(size, 1);
-	Faust::Vect<double, Cpu> v_cpu(size, vec_mat->getData());
+	Faust::Vect<FPP, Cpu> v_cpu(size, vec_mat->getData());
-	Faust::Vect<double, GPU2> v_gpu(size, vec_mat->getData());
+	Faust::Vect<FPP, GPU2> v_gpu(size, vec_mat->getData());
-	Faust::Vect<double, GPU2> v_gpu_out(nrows);
+	Faust::Vect<FPP, GPU2> v_gpu_out(nrows);
-	auto cpu_mat = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
+	auto cpu_mat = Faust::MatDense<FPP,Cpu>::randMat(nrows,ncols);
-	MatDense<double,GPU2> gpu_mat(nrows, ncols, cpu_mat->getData());
+	MatDense<FPP,GPU2> gpu_mat(nrows, ncols, cpu_mat->getData());
 	gpu_mat.multiply(v_gpu, v_gpu_out);
 	auto v_gpu_out_cpu = v_gpu_out.tocpu();
 	v_gpu_out_cpu.Display();
@@ -459,9 +462,9 @@ void test_mul_vec()
 void test_gpu_matsparse2matdense()
 {
 	cout << "test MatDense<FPP,GPU2>::MatDense<FPP,GPU2>(MatSparse<FPP,GPU2>&)" << endl;
-	auto sM = Faust::MatSparse<double,Cpu>::randMat(22, 33, .2);
+	auto sM = Faust::MatSparse<FPP,Cpu>::randMat(22, 33, .2);
-	Faust::MatSparse<double,GPU2> sM_gpu(*sM);
+	Faust::MatSparse<FPP,GPU2> sM_gpu(*sM);
-	MatDense<double,GPU2> dM_gpu(sM_gpu);
+	MatDense<FPP,GPU2> dM_gpu(sM_gpu);
 	cout << sM_gpu.norm() << " " << dM_gpu.norm() << endl;
 	assert(abs(sM_gpu.norm()-dM_gpu.norm())< 1e-4);
 	cout << "OK" << endl;

--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.cpp.in
--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
 #ifndef __FAUST_MATDENSE_GPU2__
 #define __FAUST_MATDENSE_GPU2__
 #ifdef USE_GPU_MOD
+#include <complex>
 #include <cstdint>
 #include "faust_MatDense.h"
 #include "faust_MatGeneric_gpu.h"
 #include "faust_Vect_gpu.h"
 #include "faust_MatSparse_gpu.h"
 #include "faust_gpu_mod_utils.h"
-#include <cstdint>
 namespace Faust
 {
 	template <typename FPP>

--- a/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.cpp.in
@@ -46,7 +46,7 @@ namespace Faust
 		{
 			if(rowptr == nullptr || colinds == nullptr)
 				throw std::runtime_error("All GPU buffers or none must be defined to instantiate a MatSparse<FPP,GPU2>.");
-			gpu_mat = spm_funcs->togpu_stream(nbRow, nbCol, nnz, rowptr, colinds, values, stream);
+			gpu_mat = spm_funcs->togpu_stream(nbRow, nbCol, nnz, rowptr, colinds, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(values), stream);
 		}
 		else if(!no_zero)
 		{ // no values given, set the matrix as a zero matrix
@@ -134,7 +134,7 @@ namespace Faust
 	{
 			auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
 			if(gpu_mat != nullptr)
-				spm_funcs->mul_scalar(gpu_mat, reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&scalar));
+				spm_funcs->mul_scalar(gpu_mat, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&scalar));
 	}
 	template<>
@@ -147,14 +147,14 @@ namespace Faust
 	void MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::operator+=(const @FAUST_SCALAR_FOR_GM@& scalar)
 	{
 			auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
-			spm_funcs->add_scalar(gpu_mat, reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&scalar));
+			spm_funcs->add_scalar(gpu_mat, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&scalar));
 	}
 	template<>
 	void MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::operator-=(const @FAUST_SCALAR_FOR_GM@& scalar)
 	{
 			auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
-			spm_funcs->sub_scalar(gpu_mat, reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&scalar));
+			spm_funcs->sub_scalar(gpu_mat, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&scalar));
 	}
 	template<>
@@ -178,7 +178,7 @@ namespace Faust
 				*ncols = getNbCol();
 			if(nnz != nullptr)
 				*nnz = getNonZeros();
-			spm_funcs->tocpu(gpu_mat, row_ptr, col_ind, value_ptr);
+			spm_funcs->tocpu(gpu_mat, row_ptr, col_ind, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(value_ptr));
 		}
 	template<>

--- a/src/faust_linear_operator/GPU2/faust_Vect_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_Vect_gpu.cpp.in
@@ -47,7 +47,7 @@ namespace Faust
 			if(size() == v.size())
 			{
 				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-				auto gpu_mat = dsm_funcs->togpu(this->getNbRow(), 1, const_cast<@FAUST_SCALAR_FOR_GM@*>(v.getData()));
+				auto gpu_mat = dsm_funcs->togpu(this->getNbRow(), 1, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(v.getData()));
 				dsm_funcs->free(this->gpu_mat);
 				this->gpu_mat = gpu_mat;
 			}
@@ -60,7 +60,7 @@ namespace Faust
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
 			FSFG _max_coeff;
-			dsm_funcs->max(this->gpu_mat, &_max_coeff);
+			dsm_funcs->max(this->gpu_mat, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&_max_coeff));
 			return _max_coeff;
 		}
@@ -69,7 +69,7 @@ namespace Faust
 		{
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
 			FSFG _min_coeff;
-			dsm_funcs->min(this->gpu_mat, &_min_coeff);
+			dsm_funcs->min(this->gpu_mat, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&_min_coeff));
 			return _min_coeff;
 		}
@@ -78,7 +78,7 @@ namespace Faust
 		{
 			FSFG alpha;
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-			dsm_funcs->dot(this->gpu_mat, v.gpu_mat, &alpha);
+			dsm_funcs->dot(this->gpu_mat, v.gpu_mat, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&alpha));
 			return alpha;
 		}
@@ -87,14 +87,18 @@ namespace Faust
 		{
 			FSFG s;
 			auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-			dsm_funcs->sum(this->gpu_mat, reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&s));
+			dsm_funcs->sum(this->gpu_mat, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&s));
 			return s;
 		}
 		template<>
 			FSFG Vect<FSFG,GPU2>::mean() const
 			{
-				return sum()/size();
+				//				return sum()/size();
+				FSFG m;
+				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
+				dsm_funcs->mean(this->gpu_mat, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&m));
+				return m;
 			}
 		template<>
@@ -147,7 +151,7 @@ namespace Faust
 			void Vect<FSFG,GPU2>::setValues(const FSFG& val)
 			{
 				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-				dsm_funcs->setval(this->gpu_mat, reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(const_cast<FSFG*>(&val)));
+				dsm_funcs->setval(this->gpu_mat, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(const_cast<FSFG*>(&val)));
 			}
 		template<>
@@ -156,7 +160,7 @@ namespace Faust
 				FSFG e;
 				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-				dsm_funcs->mean_relerr(this->get_gpu_mat_ptr(), ref_vec.get_gpu_mat_ptr(), reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&e));
+				dsm_funcs->mean_relerr(this->get_gpu_mat_ptr(), ref_vec.get_gpu_mat_ptr(), (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&e));
 				return e;
 			}
@@ -165,7 +169,7 @@ namespace Faust
 			{
 				FSFG coeff;
 				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-				dsm_funcs->coeff(this->gpu_mat, i, 0, reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&coeff));
+				dsm_funcs->coeff(this->gpu_mat, i, 0, (@GM_SCALAR@*) reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&coeff));
 				return coeff;
 			}
@@ -174,6 +178,6 @@ namespace Faust
 			void Vect<FSFG,GPU2>::set_coeff(faust_unsigned_int i, const FSFG& val)
 			{
 				auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
-				dsm_funcs->set_coeff(this->gpu_mat, i, 0, reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&val));
+				dsm_funcs->set_coeff(this->gpu_mat, i, 0, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&val));
 			}
 }