Add GPU MatBSR (class skeleton + few implementations: instantiation and tocpu func.).

41106267 · hhakim · e93f86ee · 41106267 · 41106267 · 41106267
Commit 41106267 authored 3 years ago by hhakim
--- a/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.cpp.in
+#include "faust_MatBSR_gpu.h"
+namespace Faust
+{
+	template<>
+		MatBSR<@FAUST_SCALAR_FOR_GM@,GPU2>::MatBSR(
+				const faust_unsigned_int nrows,
+				const faust_unsigned_int ncols,
+				const faust_unsigned_int bnrows,
+				const faust_unsigned_int bncols,
+				const faust_unsigned_int bnnz,
+				const @FAUST_SCALAR_FOR_GM@* bdata,
+				const int32_t* browptr,
+				const int32_t* bcolinds,
+				const int32_t dev_id/*=-1*/,
+				const void* stream/*=nullptr*/)
+		{
+			auto bsr_funcs = GPUModHandler::get_singleton()->bsr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			auto gp_funcs = GPUModHandler::get_singleton()->gp_funcs();
+			auto cur_dev_id = gp_funcs->cur_dev();
+			gp_funcs->set_dev(dev_id);
+			//TODO: gpu_stream
+			//	gpu_mat = spm_funcs->togpu_stream();
+			gpu_mat = bsr_funcs->togpu(nrows, ncols, bnrows, bncols,  (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(bdata), browptr, bcolinds, bnnz);
+			gp_funcs->set_dev(cur_dev_id);
+		}
+
+	template<>
+		void MatBSR<@FAUST_SCALAR_FOR_GM@,GPU2>::tocpu(int32_t* browptr, int32_t* bcolinds, @FAUST_SCALAR_FOR_GM@* bdata, int32_t* nrows/*=nullptr*/, int32_t* ncols/*=nullptr*/, int32_t *bnrows/*=nullptr*/, int32_t *bncols/*=nullptr*/, int32_t* bnnz/*=nullptr*/) const
+		{
+			auto bsr_funcs = GPUModHandler::get_singleton()->bsr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			bsr_funcs->tocpu(gpu_mat, browptr, bcolinds, (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(bdata), nrows, ncols, bnrows, bncols, bnnz);
+		}
+
+	template<>
+		void MatBSR<@FAUST_SCALAR_FOR_GM@,GPU2>::tocpu(MatBSR<@FAUST_SCALAR_FOR_GM@, Cpu> &cpu_mat) const
+		{
+			auto bsr_funcs = GPUModHandler::get_singleton()->bsr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			int32_t nrows, ncols, bnrows, bncols, bnnz;
+			bsr_funcs->info(gpu_mat, &nrows, &ncols, &bnrows, &bncols, &bnnz);
+			MatBSR<@FAUST_SCALAR_FOR_GM@, Cpu> M(nrows, ncols, bnrows, bncols, bnnz);
+			bsr_funcs->tocpu(gpu_mat, const_cast<int*>(M.get_browptr()), const_cast<int*>(M.get_bcolinds()), (@GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(const_cast<@FAUST_SCALAR_FOR_GM@*>(M.get_bdata())), nullptr, nullptr, nullptr, nullptr, nullptr);
+			cpu_mat = std::move(M);
+		}
+}
+
--- a/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.h
+#ifndef __FAUST_MATBSR_GPU2__
+#define __FAUST_MATBSR_GPU2__
+#ifdef USE_GPU_MOD
+#include <complex>
+#include "faust_gpu_mod_utils.h"
+#include "faust_constant.h"
+#include "faust_MatGeneric_gpu.h"
+#include "faust_MatBSR.h"
+namespace Faust
+{
+	template<typename FPP, FDevice DEVICE>
+		class MatSparse;
+	template<typename FPP, FDevice DEVICE>
+		class MatDense;
+	template<typename FPP, FDevice DEVICE>
+		class MatBSR;
+
+	template<typename FPP>
+		class MatBSR<FPP, GPU2> : public MatGeneric<FPP,GPU2>
+		{
+
+
+			gm_BSRMat_t gpu_mat;
+			/** \brief Inits from CPU buffers.
+			 *
+			 */
+
+			public:
+			/*********** ctors **************/
+			MatBSR(const faust_unsigned_int nrows,
+					const faust_unsigned_int ncols,
+					const faust_unsigned_int bnrows,
+					const faust_unsigned_int bncols,
+					const faust_unsigned_int bnnz,
+					const FPP* bdata,
+					const int32_t* browptr,
+					const int32_t* bcolinds,
+					const int32_t dev_id=-1,
+					const void* stream=nullptr);
+
+			MatBSR(const MatBSR<FPP, Cpu>& mat,
+					const int32_t dev_id=-1,
+					const void* stream=nullptr);
+
+			/*********** MatGeneric member functions **************/
+			void setZeros();
+			size_t getNBytes() const;
+			MatType getType() const;
+			int32_t getNbRow() const;
+			int32_t getNbCol() const;
+			MatGeneric<FPP,GPU2>* clone(const int32_t dev_id=-1, const void* stream=nullptr) const;
+			MatGeneric<FPP,GPU2>* Clone(const bool isOptimize=false) const;
+			void* get_gpu_mat_ptr() const;
+			faust_unsigned_int getNonZeros() const;
+			void transpose();
+			void conjugate();
+			void adjoint();
+			MatGeneric<FPP,GPU2>* get_rows(faust_unsigned_int row_id_start, faust_unsigned_int num_rows) const;
+			MatGeneric<FPP,GPU2>* get_rows(faust_unsigned_int* row_ids, faust_unsigned_int num_rows) const;
+			Faust::MatGeneric<FPP,GPU2>* get_cols(faust_unsigned_int col_id_start, faust_unsigned_int num_cols) const;
+			Faust::MatGeneric<FPP,GPU2>* get_cols(faust_unsigned_int* col_ids, faust_unsigned_int num_cols) const;
+
+			void Display() const;
+			Real<FPP> norm() const;
+
+			virtual void set_gpu_mat_ptr(void*);
+			/*********** own member functions **************/
+
+			void tocpu(int32_t* browptr, int32_t* bcolinds, FPP* bdata, int32_t* nrows=nullptr, int32_t* ncols=nullptr, int32_t *bnrows=nullptr, int32_t *bncol=nullptr, int32_t* bnnz=nullptr) const;
+			void tocpu(MatBSR<FPP, Cpu> &cpu_mat) const;
+
+		};
+}
+#include "faust_MatBSR_gpu.hpp"
+#endif
+#endif
--- a/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_MatBSR_gpu.hpp
+namespace Faust 
+{
+	template<typename FPP>
+		MatBSR<FPP, GPU2>::MatBSR(const MatBSR<FPP, Cpu>& mat,
+				const int32_t dev_id/*=-1*/,
+				const void* stream/*=nullptr*/) : MatBSR<FPP,GPU2>(
+					mat.getNbRow(),
+					mat.getNbCol(),
+					mat.getNbBlockRow(),
+					mat.getNbBlockCol(),
+					mat.getNBlocks(),
+					mat.get_bdata(),
+					mat.get_browptr(),
+					mat.get_bcolinds(),
+					dev_id,
+					stream)
+
+	{
+
+	}
+
+	template<typename FPP>
+		void MatBSR<FPP, GPU2>::setZeros()
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		size_t MatBSR<FPP, GPU2>::getNBytes() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		MatType MatBSR<FPP, GPU2>::getType() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		int32_t MatBSR<FPP, GPU2>::getNbRow() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		int32_t MatBSR<FPP, GPU2>::getNbCol() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		MatGeneric<FPP,GPU2>* MatBSR<FPP, GPU2>::clone(const int32_t dev_id/*=*-1*/, const void* stream/*=*nullptr*/) const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		MatGeneric<FPP,GPU2>* MatBSR<FPP, GPU2>::Clone(const bool isOptimize/*=*false*/) const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		void* MatBSR<FPP, GPU2>::get_gpu_mat_ptr() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		faust_unsigned_int MatBSR<FPP, GPU2>::getNonZeros() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		void MatBSR<FPP, GPU2>::transpose()
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		void MatBSR<FPP, GPU2>::conjugate()
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		void MatBSR<FPP, GPU2>::adjoint()
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		MatGeneric<FPP,GPU2>* MatBSR<FPP, GPU2>::get_rows(faust_unsigned_int row_id_start, faust_unsigned_int num_rows) const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		MatGeneric<FPP,GPU2>* MatBSR<FPP, GPU2>::get_rows(faust_unsigned_int* row_ids, faust_unsigned_int num_rows) const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		Faust::MatGeneric<FPP,GPU2>* MatBSR<FPP, GPU2>::get_cols(faust_unsigned_int col_id_start, faust_unsigned_int num_cols) const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		Faust::MatGeneric<FPP,GPU2>* MatBSR<FPP, GPU2>::get_cols(faust_unsigned_int* col_ids, faust_unsigned_int num_cols) const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+
+	template<typename FPP>
+		void MatBSR<FPP, GPU2>::Display() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		Real<FPP> MatBSR<FPP, GPU2>::norm() const
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+
+	template<typename FPP>
+		void MatBSR<FPP, GPU2>::set_gpu_mat_ptr(void*)
+		{
+			//TODO: implement (maybe by moving into .cpp.in
+		}
+};