Add a module faust_linear_algebra with gemm and gemv (as in CPU equivalent...

Add a module faust_linear_algebra with gemm and gemv (as in CPU equivalent module) relying on MatDense<GPU2> (now static) eponym methods.

Add a module faust_linear_algebra with gemm and gemv (as in CPU equivalent...
453683e4 · hhakim · f633a9a8 · 453683e4 · 453683e4 · 453683e4
Commit 453683e4 authored 3 years ago by hhakim
--- a/src/algorithm/factorization/faust_palm4msa2020.h
+++ b/src/algorithm/factorization/faust_palm4msa2020.h
@@ -8,6 +8,7 @@
 #include "faust_MatGeneric_gpu.h"
 #include "faust_MatDense_gpu.h"
 #include "faust_MatSparse_gpu.h"
+#include "faust_linear_algebra_gpu.h"
 #include "faust_TransformHelper_gpu.h"
 #endif
 #include "faust_Params.h"

--- a/src/faust_linear_operator/CPU/faust_linear_algebra.h
+++ b/src/faust_linear_operator/CPU/faust_linear_algebra.h
@@ -65,6 +65,7 @@ namespace Faust
 	template<typename FPP>
 		void add(const MatDense<FPP,Cpu> & A, const MatDense<FPP,Cpu> & B, MatDense<FPP,Cpu> & C);
+	// Computes alpha*typeA(A)*typeB(B)+ beta*C into C.
 	template<typename FPP>
 		void gemm_gen(const Faust::MatGeneric<FPP,Cpu> & A,const Faust::MatGeneric<FPP,Cpu> & B, Faust::MatDense<FPP,Cpu> & C,const FPP & alpha, const FPP & beta, char  typeA, char  typeB);

--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.cpp.in
@@ -514,7 +514,7 @@ namespace Faust
 		}
 	template <>
-		void gemm(const MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &A, const MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &B, MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &C,
+		void Faust::MatDense<@FAUST_SCALAR_FOR_GM@, GPU2>::gemm(const MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &A, const MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &B, MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &C,
 				const @FAUST_SCALAR_FOR_GM@& alpha, const @FAUST_SCALAR_FOR_GM@& beta, const char opA, const char opB)
 		{
 			gm_Op gop_A, gop_B;
@@ -525,7 +525,7 @@ namespace Faust
 		}
 	template <>
-		void gemv(const MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &A, const Vect<@FAUST_SCALAR_FOR_GM@, GPU2> &B, Vect<@FAUST_SCALAR_FOR_GM@, GPU2> &C,
+		void Faust::MatDense<@FAUST_SCALAR_FOR_GM@, GPU2>::gemv(const MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> &A, const Vect<@FAUST_SCALAR_FOR_GM@, GPU2> &B, Vect<@FAUST_SCALAR_FOR_GM@, GPU2> &C,
 				const @FAUST_SCALAR_FOR_GM@& alpha, const @FAUST_SCALAR_FOR_GM@& beta, const char opA, const char opB/*='N'*/)
 		{
 			gm_Op gop_A, gop_B;

--- a/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_MatDense_gpu.h
@@ -10,11 +10,6 @@
 #include "faust_gpu_mod_utils.h"
 namespace Faust
 {
-	template <typename FPP>
-	void gemm(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
-	template <typename FPP>
-	void gemv(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &B, Vect<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB='N');
 	template<typename FPP, FDevice DEVICE>
@@ -24,9 +19,9 @@ namespace Faust
 		{
 			friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
 			friend MatSparse<FPP,GPU2>;
-			friend void gemm<>(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
+//			friend void gemm<>(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
+//
-			friend void gemv<>(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &B, Vect<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
+//			friend void gemv<>(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &B, Vect<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
 			public:
 				MatDense(const faust_unsigned_int nbRow,
@@ -133,6 +128,8 @@ namespace Faust
 				void prox_sp(int32_t k, bool normalized=false, bool pos=false) const;
 				void prox_spcol(int32_t k, bool normalized=false, bool pos=false) const;
 				void prox_splin(int32_t k, bool normalized=false, bool pos=false) const;
+				static void gemm(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
+				static void gemv(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &B, Vect<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB='N');
 			protected:
 				gm_DenseMat_t gpu_mat;

--- a/src/faust_linear_operator/GPU2/faust_linear_algebra_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_linear_algebra_gpu.h
+#ifndef LINALGEBRA_GPU2_H
+#define LINALGEBRA_GPU2_H
+#include "faust_constant.h"
+#include "faust_MatDense_gpu.h"
+#include "faust_Vect_gpu.h"
+namespace Faust
+{
+	// Computes alpha*typeA(A)*typeB(B)+ beta*C into C.
+	template<typename FPP>
+		void gemm(const MatDense<FPP,GPU2> & A,const MatDense<FPP,GPU2> & B, MatDense<FPP,GPU2> & C,const FPP  alpha, const FPP  beta, char  opA, char  opB);
+	// Computes alpha*opA(A)*b+ beta*c into c.
+	template<typename FPP>
+		void gemv(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &b, Vect<FPP, GPU2> &c, const FPP& alpha, const FPP& beta, const char opA);
+	// Computes alpha*opA(A)*opB(B)+ beta*C into C.
+	template<typename FPP>
+		void gemm_gen(const MatGeneric<FPP,GPU2> & A, const MatGeneric<FPP,GPU2> & B, MatDense<FPP,GPU2> & C, const FPP  alpha, const FPP  beta, char  opA, char  opB);
+	//	TODO: implements using MatSparse::multiply, warning: 'H' is not supported for opB (see gpu_mod / https://docs.nvidia.com/cuda/archive/9.2/cusparse/index.html cusparseTcsrmm2 for more details), so do a copy-conjugate manually beforehand
+//	template<typename FPP>
+//		void spgemm(const MatSparse<FPP,Cpu> & A,const MatDense<FPP,Cpu> & B, MatDense<FPP,Cpu> & C,const FPP & alpha, const FPP & beta, char opA, char opB);
+//
+//	template<typename FPP>
+//		void spgemm(const MatDense<FPP,Cpu> & A,const MatSparse<FPP,Cpu> & B, MatDense<FPP,Cpu> & C,const FPP & alpha, const FPP & beta, char opA, char opB);
+}
+#include "faust_linear_algebra_gpu.hpp"
+#endif
--- a/src/faust_linear_operator/GPU2/faust_linear_algebra_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_linear_algebra_gpu.hpp
+namespace Faust
+{
+	template<typename FPP>
+		void gemm(const MatDense<FPP,GPU2> & A,const MatDense<FPP,GPU2> & B, MatDense<FPP,GPU2> & C,const FPP  alpha, const FPP  beta, char  typeA, char  typeB)
+		{
+			MatDense<FPP, GPU2>::gemm(A, B, C, alpha, beta, typeA, typeB);
+		}
+	template<typename FPP>
+		void gemv(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &b, Vect<FPP, GPU2> &c, const FPP& alpha, const FPP& beta, const char opA)
+		{
+			MatDense<FPP, GPU2>::gemv(A, b, c, alpha, beta);
+		}
+	template<typename FPP>
+		void gemm_gen(const MatGeneric<FPP,GPU2> & A, const MatGeneric<FPP,GPU2> & B, MatDense<FPP,GPU2> & C, const FPP  alpha, const FPP  beta, char  typeA, char  typeB)
+		{
+			const MatSparse<FPP, GPU2>* spA;
+			const MatSparse<FPP, GPU2>* spB;
+			const MatDense<FPP, GPU2>* dsA;
+			const MatDense<FPP, GPU2>* dsB;
+			// downcast an call the proper function
+			spA = dynamic_cast<const Faust::MatSparse<FPP,GPU2>*>(&A);
+			if(! spA)
+				dsA = dynamic_cast<const Faust::MatDense<FPP,GPU2>*>(&A);
+			spB = dynamic_cast<const Faust::MatSparse<FPP,GPU2>*>(&B);
+			if(! spB)
+				dsB = dynamic_cast<const Faust::MatDense<FPP,GPU2>*>(&B);
+			if(spA && spB)
+				throw std::runtime_error("gemm on two MatSparse is not supported.");
+			else if(spA)
+				throw std::runtime_error("spgemm is not supported yet on GPU2."); //TODO: after spgemm impl.
+//				spgemm(*spA, *dsB, C, alpha, beta, typeA, typeB);
+			else if(spB)
+				throw std::runtime_error("spgemm is not supported yet on GPU2."); //TODO: after spgemm impl.
+//				spgemm(*dsA, *spB, C, alpha, beta, typeA, typeB);
+			else
+				gemm(*dsA, *dsB, C, alpha, beta, typeA, typeB);
+		}
+}