Add a mat argument to pyfaust.Faust.optimize_time in order to run the...

Add a mat argument to pyfaust.Faust.optimize_time in order to run the benchmark on F@mat instead of F.toarray(). The function is implemented in the C++ core only for the CPU backend.

Add a mat argument to pyfaust.Faust.optimize_time in order to run the...
b3b9fede · hhakim · c75d035d · b3b9fede · b3b9fede · b3b9fede
Commit b3b9fede authored 3 years ago by hhakim
--- a/src/faust_linear_operator/CPU/faust_TransformHelper.h
+++ b/src/faust_linear_operator/CPU/faust_TransformHelper.h
@@ -172,6 +172,7 @@ namespace Faust
 			virtual TransformHelper<FPP,Cpu>* optimize_multiply(std::function<void()> f, const bool transp=false, const bool inplace=false, const int nsamples=1, const char* op_name="unamed_op");
 			virtual TransformHelper<FPP,Cpu>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
 			virtual TransformHelper<FPP,Cpu>* optimize_time_full(const bool transp=false, const bool inplace=false, const int nsamples=1);
+			TransformHelper<FPP,Cpu>* optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp=false, const bool inplace=false, const int nsamples=1);
 			/**
 			  \brief Returns the left hand side factors of this from index 0 to id included (as a new TransformHelper obj).
@@ -213,8 +214,6 @@ namespace Faust
 			void convertToDense();
 			template<typename FPP2>
 			TransformHelper<Real<FPP>, Cpu>* real();
-			private:
-			MatDense<FPP,Cpu> multiply_dynprog(const MatGeneric<FPP,Cpu> &A, MatDense<FPP, Cpu> &out);
 		};

--- a/src/faust_linear_operator/CPU/faust_TransformHelper.hpp
+++ b/src/faust_linear_operator/CPU/faust_TransformHelper.hpp
@@ -164,7 +164,6 @@ namespace Faust {
 			}
 #endif
 			switch(this->mul_order_opt_mode)
 			{
 				case GREEDY_ALL_ENDS:
@@ -186,7 +185,12 @@ namespace Faust {
 					}
 					break;
 				case DYNPROG:
-					this->multiply_dynprog(A, M);
+					{
+						std::vector<Faust::MatGeneric<FPP,Cpu>*> data = this->transform->data;
+						if(this->is_transposed)
+							std::reverse(data.begin(), data.end());
+						M = std::move(dynprog_multiply(data, this->isTransposed2char(), &A));
+					}
 					break;
 				case CPP_PROD_PAR_REDUC:
 				case OMP_PROD_PAR_REDUC:
@@ -249,16 +253,6 @@ namespace Faust {
 			memcpy(y, y_vec.getData(), sizeof(FPP)*y_vec.size());
 		}
-template<typename FPP>
-	MatDense<FPP,Cpu> TransformHelper<FPP,Cpu>::multiply_dynprog(const MatGeneric<FPP,Cpu> &A, MatDense<FPP, Cpu> &out)
-	{ // specific scope for variable initialized here
-		std::vector<Faust::MatGeneric<FPP,Cpu>*> data = this->transform->data;
-		if(this->is_transposed)
-			std::reverse(data.begin(), data.end());
-		out = std::move(dynprog_multiply(data, this->isTransposed2char(), &A));
-	}
 	template<typename FPP>
 		MatDense<FPP,Cpu> TransformHelper<FPP,Cpu>::multiply(const MatDense<FPP,Cpu> &A, const bool transpose, const bool conjugate)
 		{
@@ -274,7 +268,6 @@ template<typename FPP>
 #endif
 			switch(this->mul_order_opt_mode)
 			{
 				case GREEDY_ALL_ENDS:
@@ -297,7 +290,10 @@ template<typename FPP>
 					break;
 				case DYNPROG:
 					{
-						this->multiply_dynprog(A, M);
+						std::vector<Faust::MatGeneric<FPP,Cpu>*> data = this->transform->data;
+						if(this->is_transposed)
+							std::reverse(data.begin(), data.end());
+						M = std::move(dynprog_multiply(data, this->isTransposed2char(), &A));
 					}
 					break;
 				case CPP_PROD_PAR_REDUC:
@@ -362,6 +358,21 @@ template<typename FPP>
 			return this->optimize_multiply([this](){this->get_product();}, transp, inplace, nsamples, "Faust-toarray");
 		}
+	template<typename FPP>
+		TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples)
+		{
+			std::function<void(void)> benchmark_func;
+			auto md = dynamic_cast<const MatDense<FPP,Cpu>*>(test_mat);
+			auto ms = dynamic_cast<const MatSparse<FPP,Cpu>*>(test_mat);
+			if(! md && ! ms)
+				throw std::runtime_error("optimize_time_prod supports only MatDense or MatSparse benchmarking.");
+			return this->optimize_multiply([this, ms, md]()
+					{
+					if(md) this->multiply(*md);
+					else /* ms != nullptr */ this->multiply(*ms);
+					}, transp, inplace, nsamples, "Faust-matrix product");
+		}
 	template<typename FPP>
 		TransformHelper<FPP,Cpu>* TransformHelper<FPP,Cpu>::optimize_multiply(std::function<void()> f, const bool transp /* deft to false */, const bool inplace, /* deft to 1 */ const int nsamples, const char* op_name)
 		{

--- a/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.h
+++ b/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.h
@@ -90,6 +90,8 @@ namespace Faust
 				TransformHelper<FPP,GPU2>* conjugate();
 				TransformHelper<FPP,GPU2>* adjoint();
 				TransformHelper<FPP,GPU2>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
+				TransformHelper<FPP,GPU2>* optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp/*=false*/, const bool inplace/*=false*/, const int nsamples/*=1*/);
 				TransformHelper<FPP,GPU2>* optimize(const bool transp=false);
 				TransformHelper<FPP,GPU2>* clone(int32_t dev_id=-1, void* stream=nullptr);
 				void get_fact(const faust_unsigned_int id,

--- a/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.hpp
+++ b/src/faust_linear_operator/GPU2/faust_TransformHelper_gpu.hpp
@@ -440,7 +440,7 @@ namespace Faust
 		}
 	template<typename FPP>
-		void Faust::TransformHelper<FPP,GPU2>::pack_factors(faust_unsigned_int start_id, faust_unsigned_int end_id,const int mul_order_opt_mode/*=DEFAULT_L2R*/)
+		void Faust::TransformHelper<FPP,GPU2>::pack_factors(faust_unsigned_int start_id, faust_unsigned_int end_id,const int mul_order_opt_mode/*=DEFAULT*/)
 		{
 			if(start_id < 0 || start_id >= size())
 				throw out_of_range("start_id is out of range.");
@@ -611,6 +611,13 @@ namespace Faust
 //			return gpu_thn;
 		}
+	template<typename FPP>
+		TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize_time_prod(const MatGeneric<FPP, Cpu>* test_mat, const bool transp/*=false*/, const bool inplace/*=false*/, const int nsamples/*=1*/)
+		{
+			throw std::runtime_error("optimize_time_prod is yet to implement in Faust C++ core for GPU.");
+			return nullptr;
+		}
 	template<typename FPP>
 		TransformHelper<FPP,GPU2>* TransformHelper<FPP,GPU2>::optimize(const bool transp/*=false*/)
 		{

--- a/wrapper/python/pyfaust.py
+++ b/wrapper/python/pyfaust.py
@@ -2134,7 +2134,7 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
        F_opt = Faust(core_obj=F.m_faust.optimize(transp))
        return F_opt
-    def optimize_time(F, transp=False, inplace=False, nsamples=1):
+    def optimize_time(F, transp=False, inplace=False, nsamples=1, mat=None):
        """
        Returns a Faust configured with the quickest Faust-matrix multiplication mode (benchmark ran on the fly).
@@ -2142,11 +2142,7 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
        available differ by the order used to compute the matrix chain
        multiplication or by the use (or unuse) of libraries to performs the
        calculation.
-        The evaluated methods in the benchmark are listed in
+        The evaluated methods in the benchmark are listed in pyfaust.FaustMulMode.
-        pyfaust.FaustMulMode but note that FaustMulMode.CPP_PROD_PAR_REDUC and
-        FaustMulMode.OMP_PROD_PAR_REDUC are excluded from the benchmark because
-        it doesn't worth it in any case when Eigen multithread is enabled
-        (which is the case in any package of pyfaust delivered).
        Although depending on the package you installed and the capability of your
        hardware the methods based on Torch library can be used.
@@ -2159,6 +2155,10 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
            calculated in order to measure time taken by each method (it could matter
            to discriminate methods when the performances are similar). By default,
            only one product is computed to evaluate the method.
+            mat: if not None must be a numpy.ndarray or a
+            scipy.sparse.csr_matrix. Use this argument to run the benchmark on
+            the Faust multiplication by mat instead of Faust.toarray() (if mat
+            is None).
        Returns:
            The optimized Faust.
@@ -2167,11 +2167,11 @@ class Faust(numpy.lib.mixins.NDArrayOperatorsMixin):
        """
        if(inplace):
-            F.m_faust.optimize_time(transp, inplace, nsamples)
+            F.m_faust.optimize_time(transp, inplace, nsamples, M=mat)
            return F
        else:
            F_opt = Faust(core_obj=F.m_faust.optimize_time(transp, inplace,
-                                                          nsamples))
+                                                          nsamples, M=mat))
            return F_opt
    def copy(F, dev='cpu'):

--- a/wrapper/python/src/FaustCoreCpp.h
+++ b/wrapper/python/src/FaustCoreCpp.h
@@ -142,6 +142,9 @@ class FaustCoreCpp
    FaustCoreCpp<FPP,DEV>* optimize_storage(const bool time=false);
    FaustCoreCpp<FPP,DEV>* optimize(const bool transp=false);
    FaustCoreCpp<FPP,DEV>* optimize_time(const bool transp=false, const bool inplace=false, const int nsamples=1);
+    FaustCoreCpp<FPP,DEV>* optimize_time(const FPP* value_x,int nbrow_x,int nbcol_x, const bool transp=false, const bool inplace=false, const int nsamples=1);
+    FaustCoreCpp<FPP,DEV>* optimize_time(const FPP* x_data, int* x_row_ptr, int* x_id_col, int x_nnz, int x_nrows, int x_ncols, const bool transp=false, const bool inplace=false, const int nsamples=1);
    const bool isTransposed();
    FaustCoreCpp<FPP,DEV>* transpose()const;
    FaustCoreCpp<FPP,DEV>* conjugate()const;

--- a/wrapper/python/src/FaustCoreCpp.hpp
+++ b/wrapper/python/src/FaustCoreCpp.hpp
@@ -392,6 +392,40 @@ FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const bool transp /*
 #endif
 }
+template<typename FPP, FDevice DEV>
+FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const FPP* x_data, int* x_row_ptr, int* x_id_col, int x_nnz, int x_nrows, int x_ncols, const bool transp /* deft to false*/, const bool inplace /* default to false */, const int nsamples /* default to 1*/)
+{
+    Faust::MatSparse<FPP, Cpu> X(x_nnz, x_nrows, x_ncols, x_data, x_row_ptr, x_id_col);
+    if(inplace)
+        this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
+    else
+    {
+        auto th = this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
+        return new FaustCoreCpp<FPP,DEV>(th);
+    }
+#ifdef FAUST_VERBOSE
+    std::cout << "FaustCoreCpp::optimize_time() th=" << th << "core=" << core << std::endl;
+#endif
+}
+template<typename FPP, FDevice DEV>
+FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize_time(const FPP* x_data, int x_nrows, int x_ncols, const bool transp /* deft to false*/, const bool inplace /* default to false */, const int nsamples /* default to 1*/)
+{
+    Faust::MatDense<FPP, Cpu> X(x_data, x_nrows, x_ncols);
+    if(inplace)
+        this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
+    else
+    {
+        auto th = this->transform->optimize_time_prod(&X, transp, inplace, nsamples);
+        return new FaustCoreCpp<FPP,DEV>(th);
+    }
+#ifdef FAUST_VERBOSE
+    std::cout << "FaustCoreCpp::optimize_time() th=" << th << "core=" << core << std::endl;
+#endif
+}
 template<typename FPP, FDevice DEV>
 FaustCoreCpp<FPP,DEV>* FaustCoreCpp<FPP,DEV>::optimize(const bool transp /* deft to false*/)
 {

--- a/wrapper/python/src/FaustCoreGenCy.pxd.in
+++ b/wrapper/python/src/FaustCoreGenCy.pxd.in
@@ -61,6 +61,12 @@ cdef extern from "FaustCoreCpp.h":
        @CPP_CORE_CLASS@[FPP]* optimize_time(const bool transp, const bool inplace,
                                       const int nsamples)
+        @CPP_CORE_CLASS@[FPP]* optimize_time(FPP* x_data, int* x_row_ptr, int* x_id_col, int x_nnz, int x_nrows, int x_ncols, const bool transp, const bool inplace,
+                                       const int nsamples)
+        @CPP_CORE_CLASS@[FPP]* optimize_time(FPP* x_data, int x_nrows, int
+                                             x_ncols, const bool transp, const bool inplace,
+                                       const int nsamples)
        @CPP_CORE_CLASS@[FPP]* optimize(const bool transp)
        @CPP_CORE_CLASS@[FPP]* optimize_storage(const bool time)
        const bool isTransposed()

--- a/wrapper/python/src/_FaustCoreGen.pyx.in
+++ b/wrapper/python/src/_FaustCoreGen.pyx.in
@@ -553,15 +553,63 @@ cdef class FaustCoreGen@TYPE_NAME@@PROC@:
        core.@CORE_OBJ@ = self.@CORE_OBJ@.optimize(transp)
        return core
-    def optimize_time(self, transp=False, inplace=False, nsamples=1):
+    def optimize_time(self, transp=False, inplace=False, nsamples=1, M=None):
-        if(inplace):
+        cdef @TYPE@[:,:] M_data
-            self.@CORE_OBJ@.optimize_time(transp, inplace, nsamples)
+        cdef int [:] M_indices
+        cdef int [:] M_indptr
+        cdef @TYPE@ [:] M_csr_data
+        M_is_dense = False
+        if M is None:
+            # optimize time according to Faust.toarray()
+            if(inplace):
+                self.@CORE_OBJ@.optimize_time(transp, inplace, nsamples)
+            else:
+                core = @CORE_CLASS@(core=True)
+                core.@CORE_OBJ@ = self.@CORE_OBJ@.optimize_time(transp,
+                                                                inplace,
+                                                                nsamples)
+                return core
        else:
-            core = @CORE_CLASS@(core=True)
+            # optimize time according to F@M
-            core.@CORE_OBJ@ = self.@CORE_OBJ@.optimize_time(transp,
+            if isinstance(M, np.ndarray):
-                                                            inplace,
+                M_is_dense = True
-                                                            nsamples)
+                M_nrows = M.shape[0]
-            return core
+                M_ncols = M.shape[1]
+                M_data = M
+            elif isinstance(M, csr_matrix):
+                M_nrows = M.shape[0]
+                M_ncols = M.shape[1]
+                M_csr_data = M.data
+                M_indices = M.indices
+                M_indptr = M.indptr
+                M_nnz = M.nnz
+            else:
+                 raise TypeError("M must be a np.ndarray or a csr_matrix.")
+            if(inplace):
+                if M_is_dense:
+                    self.@CORE_OBJ@.optimize_time(&M_data[0,0], M_nrows, M_ncols, transp, inplace, nsamples)
+                else:
+                    self.@CORE_OBJ@.optimize_time(&M_csr_data[0], &M_indptr[0],
+                                                  &M_indices[0],
+                                                  M_nnz, M_nrows, M_ncols, transp, inplace, nsamples)
+            else:
+                core = @CORE_CLASS@(core=True)
+                if M_is_dense:
+                    core.@CORE_OBJ@ = \
+                    self.@CORE_OBJ@.optimize_time(&M_data[0,0], M_nrows, M_ncols,
+                                                                    transp,
+                                                                    inplace,
+                                                                    nsamples)
+                else:
+                    core.@CORE_OBJ@ = \
+                    self.@CORE_OBJ@.optimize_time(&M_csr_data[0], &M_indptr[0],
+                                                  &M_indices[0],
+                                                  M_nnz, M_nrows, M_ncols,
+                                                  transp,
+                                                  inplace,
+                                                  nsamples)
+                return core
    def conj(self):
        core = @CORE_CLASS@(core=True)