Fix multiple bugs in GPU matfaust.Faust.real/imag/double/single for Faust SPARSE matrices.

A bug in MatSparse<FPP, GPU2>::set() was due to the fact that the function was setting a matrix from CSR buffer while mex code was sending a CSC matrix (matlab default format). Another bug in Transform<FPP, GPU2>::get_fact was due to int32_t/faust_unsigned_int confusion.

Fix multiple bugs in GPU matfaust.Faust.real/imag/double/single for Faust SPARSE matrices.
b0a38a7c · hhakim · cb6e53ea · b0a38a7c · b0a38a7c · b0a38a7c
Commit b0a38a7c authored 1 year ago by hhakim
--- a/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.cpp.in
@@ -191,12 +191,6 @@ namespace Faust
 				sp_mat.resize(nnz, nrows, ncols);
 			}
 			tocpu(sp_mat.getRowPtr(), sp_mat.getColInd(), sp_mat.getValuePtr());
-			// although the above tocpu copy works perfectly if sp_mat is copied later with the MatSparse Cpu copy ctor
-			// then there is a segfault on the underlying SparseMatrix eigen object (why?)
-			// workaround: copy the buffer in a clean new MatSparse to avoid later issues
-			// mitigations: normally the copy to cpu is just to verify what's on the GPU memory so the performance is not so important
-			//TOFIX
-			sp_mat = MatSparse<@FAUST_SCALAR_FOR_GM@,Cpu>(nnz, nrows, ncols, sp_mat.getValuePtr(), sp_mat.getRowPtr(), sp_mat.getColInd());
 		}


@@ -212,7 +206,7 @@ namespace Faust
 		void Faust::MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::transpose()
 		{
 			auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
-		    spm_funcs->transpose(gpu_mat);
+			spm_funcs->transpose(gpu_mat);
 		}

 	template<>
@@ -262,34 +256,38 @@ namespace Faust
 		}

 	template<>
-		void Faust::MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::set(int32_t nnz, int32_t nrows, int32_t ncols, @FAUST_SCALAR_FOR_GM@* values, int32_t* rowptr, int32_t* colids)
+		void Faust::MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::set(int32_t nnz, int32_t nrows, int32_t ncols, @FAUST_SCALAR_FOR_GM@* values, int32_t* rowids, int32_t* colptr)
 		{
 			auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
 			if(gpu_mat != nullptr)
-				spm_funcs->cpu_set(gpu_mat, nnz, nrows, ncols, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(values), rowptr, colids);
+				// the resizing is made in gpu_mod if needed
+				// we set a CSR matrix with the buffers of a CSC matrix, so nrows and ncols are swapped
+				spm_funcs->cpu_set(gpu_mat, nnz, ncols, nrows, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(values), colptr, rowids);
+			// convert to CSR matrix
+			this->transpose();
 		}

 	template<>
-		void Faust::MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::set(int32_t nnz, int32_t nrows, int32_t ncols, @FAUST_SCALAR_FOR_GM@* values, size_t* rowptr, size_t* colids)
+		void Faust::MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::set(int32_t nnz, int32_t nrows, int32_t ncols, @FAUST_SCALAR_FOR_GM@* values, size_t* rowids, size_t* colptr)
 		{
-			int32_t* rowptr2 = new int32_t[nrows+1];
-			int32_t* colids2 = new int32_t[nnz];
+			int32_t* rowids2 = new int32_t[nnz];
+			int32_t* colptr2 = new int32_t[ncols+1];
 			
-			for(int32_t i=0;i < nrows+1; i++)
+			for(int32_t i=0;i < nnz; i++)
 			{
-				//TODO: verify if rowptr[i] is not larger than int32_t capability
+				//TODO: verify if rowids[i] is not larger than int32_t capability
 				// if not raise an exception
-				rowptr2[i] = (int32_t) rowptr[i];
+				rowids2[i] = (int32_t) rowids[i];
 			}

-			for(int32_t i=0;i < nnz; i++)
+			for(int32_t i=0;i < ncols+1; i++)
 			{
-				//TODO: verify if colids[i] is not larger than int32_t capability
-				colids2[i] = (int32_t) colids[i];
+				//TODO: verify if colptr[i] is not larger than int32_t capability
+				colptr2[i] = (int32_t) colptr[i];
 			}
-			set(nnz, nrows, ncols, values, rowptr2, colids2);
-			delete [] rowptr2;
-			delete [] colids2;
+			set(nnz, nrows, ncols, values, rowids2, colptr2);
+			delete [] rowids2;
+			delete [] colptr2;
 		}

 	template<>

--- a/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatSparse_gpu.h
@@ -84,8 +84,12 @@ namespace Faust
 				void setEyes();
 				void setIdentity(int32_t dim);
 				void setZeros();
-				void set(int32_t nnz, int32_t nrows, int32_t ncols, FPP* values, int32_t* rowptr, int32_t* colids);
-				void set(int32_t nnz, int32_t nrows, int32_t ncols, FPP* values, size_t* rowptr, size_t* colids);
+				/**
+				 * Set this from CSC matrix buffers.
+				 * TODO: this function should be called set_from_csc (and a function set_from_csr added), the same things must be done in CPU counterpart
+				 */
+				void set(int32_t nnz, int32_t nrows, int32_t ncols, FPP* values, int32_t* rowids, int32_t* colptr);
+				void set(int32_t nnz, int32_t nrows, int32_t ncols, FPP* values, size_t* rowids, size_t* colptr);
 				MatSparse<FPP, GPU2>* clone(const int32_t dev_id=-1, const void* stream=nullptr) const;
 				MatGeneric<FPP,GPU2>* Clone(const bool isOptimize=false) const;
 				void real(MatSparse<Real<FPP>, GPU2>& real_mat) const;

--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.hpp
@@ -349,17 +349,22 @@ namespace Faust
 			auto gen_mat = get_fact(id, /*cloning*/ false);
 			auto smat = dynamic_cast<MatSparse<FPP,GPU2>*>(gen_mat);
 			MatSparse<FPP,Cpu> cpu_smat;
+			// warning: arguments are of type faust_unsigned_int in this function but tocpu below uses int32_t
+			int32_t nr, nc, nz;
 			if(transpose)
 			{
 				auto t_smat = smat->clone();
 				t_smat->transpose();
-				t_smat->tocpu(d_outer_count_ptr, d_inner_ptr, d_elts, (int32_t*) num_rows, (int32_t*) num_cols, (int32_t*) nnz);
+				t_smat->tocpu(d_outer_count_ptr, d_inner_ptr, d_elts, &nr, &nc, &nz);
 				delete t_smat;
 			}
 			else
 			{
-				smat->tocpu(d_outer_count_ptr, d_inner_ptr, d_elts, (int32_t*) num_rows, (int32_t*) num_cols, (int32_t*) nnz);
+				smat->tocpu(d_outer_count_ptr, d_inner_ptr, d_elts, &nr, &nc, &nz);
 			}
+			*num_rows =  nr;
+			*num_cols =  nc;
+			*nnz = nz;
 		}