Add TransformHelper<FPP,GPU2>::sliceMultiply and update to gpu_mod@f6f99e42.

ef26d239 · hhakim · 45a7038b · f6f99e42 · 1ec80777 · f6f99e42
Commit ef26d239 authored 3 years ago by hhakim
--- a/gpu_mod @ f6f99e42
+++ b/gpu_mod @ f6f99e42
-Subproject commit 1ec807770f677154c026b8d1679f86f089ca937f
+Subproject commit f6f99e42280ce50bbe96c7364b303671b9397934
--- a/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.h
@@ -49,6 +49,7 @@ namespace Faust
 				Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> &x);
 				void multiply(const FPP* cpu_x, FPP* cpu_y);
 				void multiply(const FPP* cpu_x, int x_ncols, FPP* cpu_y);
+				FPP* sliceMultiply(const Slice s[2], const FPP* cpu_X, FPP* cpu_out/*=nullptr*/, int X_ncols/*=1*/) const;
 				Real<FPP> normFro(const bool full_array=true, const int batch_size=1) const;
 				Real<FPP> normL1(const bool full_array=true, const int batch_size=1) const;
 				Real<FPP> normInf(const bool full_array=true, const int batch_size=1) const;

--- a/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.hpp
@@ -364,7 +364,7 @@ namespace Faust
    template<typename FPP>
        MatDense<FPP,Cpu> TransformHelper<FPP,GPU2>::multiply(const Faust::MatDense<FPP,Cpu> &A)
        {
-            MatDense<FPP,GPU2> M = this->multiply(MatDense<FPP,GPU2>(A), transpose, conjugate);
+            MatDense<FPP,GPU2> M = this->multiply(MatDense<FPP,GPU2>(A));
            return M.tocpu();
        }

@@ -372,7 +372,7 @@ namespace Faust
        Vect<FPP,Cpu> TransformHelper<FPP,GPU2>::multiply(const Faust::Vect<FPP,Cpu> &A)
        {
            Vect<FPP,GPU2> gpu_A(A.size(), A.getData());
-            Vect<FPP,GPU2> v = this->multiply(gpu_A , transpose, conjugate); //TODO: handle transpose and conjugate
+            Vect<FPP,GPU2> v = this->multiply(gpu_A); //TODO: handle transpose and conjugate
            return v.tocpu();
        }

@@ -381,7 +381,7 @@ namespace Faust
        {
            int32_t in_vec_size = this->getNbCol();
            Vect<FPP,GPU2> gpu_A(in_vec_size, cpu_in_buf);
-            Vect<FPP,GPU2> v = this->multiply(gpu_A , transpose, conjugate); //TODO: handle transpose and conjugate
+            Vect<FPP,GPU2> v = this->multiply(gpu_A); //TODO: handle transpose and conjugate
            v.tocpu(cpu_out_buf);
        }

@@ -394,7 +394,7 @@ namespace Faust
            else
                x_nrows = this->transform->getNbCol();
            MatDense<FPP,GPU2> gpu_x(x_nrows, x_ncols, cpu_x_buf, false);
-            MatDense<FPP,GPU2> gpu_M = this->multiply(gpu_x, transpose, conjugate); //TODO: handle transpose and conjugate
+            MatDense<FPP,GPU2> gpu_M = this->multiply(gpu_x); //TODO: handle transpose and conjugate
                                                                                    // TODO: fix this function, it works until here then it segfaults or gives a cuda error with tocpu (even if I use a cpu matdense set locally)
            gpu_M.tocpu(cpu_out_buf, nullptr);
        }
@@ -855,6 +855,18 @@ namespace Faust
 			this->eval_fancy_idx_Transform();
 		}

+	template<typename FPP>
+		FPP* Faust::TransformHelper<FPP,GPU2>::sliceMultiply(const Slice s[2], const FPP* cpu_X, FPP* cpu_out/*=nullptr*/, int X_ncols/*=1*/) const
+		{
+			//TODO: take care of eval_sliced_Transform calls
+            int32_t X_nrows;
+			X_nrows = this->getNbCol(); // transpose and slice aware and not evaluating the slice
+            MatDense<FPP,GPU2> gpu_X(X_nrows, X_ncols, cpu_X, false);
+            MatDense<FPP,GPU2> gpu_M = this->transform->sliceMultiply(s, gpu_X);
+            gpu_M.tocpu(cpu_out, nullptr);
+			return cpu_out;
+		}
+
 }

 #include "faust_TransformHelper_cat_gpu.hpp"
--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.cpp.in
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.cpp.in
@@ -383,6 +383,41 @@ namespace Faust
 			}
 		}

+	template<>
+		MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::sliceMultiply(const Slice s[2], MatDense<@FAUST_SCALAR_FOR_GM@, GPU2>& gpu_X, const char opThis) const
+		{
+			auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
+			if(gpu_mat_arr == nullptr) throw std::runtime_error("gpu_mat_arr is nullptr");
+			//TODO: refactor this code with other functions doing the same
+			gm_Op op;
+			faust_unsigned_int out_nrows, out_ncols;
+			if(opThis == 'N')
+			{
+				op = OP_NOTRANSP;
+				out_nrows = getNbRow();
+				out_ncols = getNbCol();
+			}
+			else if(opThis == 'T')
+				op = OP_TRANSP;
+			else if(opThis == 'H')
+				op = OP_CONJTRANSP;
+			else
+				throw std::runtime_error("Invalid opThis");
+			int rs_size, cs_size;
+			if(s[0].start_id != 0 || s[0].end_id != getNbRow())
+				rs_size = s[0].end_id-s[0].start_id; // end_id is not included in the slice
+			else
+				rs_size = 0;
+			if(s[1].start_id != 0 || s[1].end_id != getNbRow())
+				cs_size = s[1].end_id-s[1].start_id; // end_id is not included in the slice
+			else
+				cs_size = 0;
+			MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
+			// TODO: call chain_matmul_by_dsm_one if both cs_size and rs_size are null
+			out.gpu_mat = marr_funcs->sliced_chain_matmul_by_dsm_one(this->gpu_mat_arr, (int) s[0].start_id, (int) rs_size, (int) s[1].start_id, (int) cs_size, op, gpu_X.gpu_mat);
+			return out;
+		}
+
 	template<>
 		void Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiplyLeft(const Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2> & A)
 		{

--- a/src/faust_linear_operator/GPU2/faust_Transform_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_Transform_gpu.h
@@ -7,6 +7,7 @@
 #include "faust_MatSparse_gpu.h"
 #include "faust_MatDense_gpu.h"
 #include "faust_RefManager.h"
+#include "faust_Slice.h"
 #include <vector>

 namespace Faust
@@ -66,6 +67,7 @@ namespace Faust
 			void get_product(MatDense<FPP,GPU2>& M, const char opThis='N', const bool isConj=false) const;
 			MatDense<FPP,GPU2> multiply(const MatDense<FPP,GPU2> &A, const char opThis);
 			void multiply(const Transform<FPP,GPU2> & A);
+			MatDense<FPP, GPU2> sliceMultiply(const Slice s[2], MatDense<FPP, GPU2>& gpu_X, const char opThis) const;
 			void multiplyLeft(const Transform<FPP,GPU2> & A);
 			void multiply(const FPP& a, const int32_t id=-1);
 			Vect<FPP,GPU2> multiply(const Vect<FPP,GPU2>& x, const char opThis='N');