Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 042dcd6c authored by hhakim's avatar hhakim
Browse files

New TransformHelper<FPP,GPU2> class and misc update in other GPU2 classes.

Add TransformHelper<FPP,GPU2> and quick tests (yet to enhance).
Refactor TransformHelper<FPP,GPU2> and TransformHelper<FPP,Cpu> into parent class TransformHelperGen (yet to finish).
Add Transform::update(), multiply(MatDense<FPP,GPU2>), update get_product, add factor iterator and begin()/end().
Update MatDense<FPP,GPU2>/MatSparse<FPP,GPU2> : move operator overload and ctor, getType.
Update MatDense<FPP,GPU2> tests.
Fix some Vect functions.
Move hpp.in GPU2 files to cpp.in as they are only template specializations (types as variables generated by cmake).
parent 59467cfe
Branches
Tags
No related merge requests found
Showing
with 738 additions and 204 deletions
......@@ -619,10 +619,12 @@ if(USE_GPU_MOD)
set(GM_SCALAR double)
set(GM_REINTERPRET_CAST_SCALAR double)
configure_file(${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_gen.hpp.in ${FAUST_LINEAR_OPERATOR_CPU_SRC_DIR}/faust_gpu_mod_double.hpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu_double.hpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu_double.hpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu_double.hpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu.hpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu_double.hpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatDense_gpu_double.cpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_MatSparse_gpu_double.cpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Transform_gpu_double.cpp)
configure_file(${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu.cpp.in ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR}/faust_Vect_gpu_double.cpp)
#target_sources(${FAUST_TARGET} PUBLIC ${GPU_MOD_CPP_FILES})
#list(APPEND CPP_FILES )
set(FAUST_SCALAR_FOR_GM complex<double>)
set(GM_SCALAR cuDoubleComplex)
set(GM_REINTERPRET_CAST_SCALAR double)
......@@ -649,12 +651,6 @@ if(FAUST_TORCH)
message(STATUS TORCH_LIBRARY=${TORCH_LIBRARY})
endif()
# before add Matlab directory, because its includes depend on faust's
if(USE_GPU_MOD)
include_directories(${FAUST_TARGET} ${GPU_MOD_INCLUDE_DIR} ${GPU_MOD_INCLUDE_DIR}/../build)
include_directories(${FAUST_TARGET} ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR})
endif()
########### add_subdirectory ###############
if (BUILD_WRAPPER_PYTHON)
......@@ -750,12 +746,15 @@ if(BUILD_MULTITHREAD)
endif()
if(USE_GPU_MOD)
include_directories(${FAUST_TARGET} ${GPU_MOD_INCLUDE_DIR} ${GPU_MOD_INCLUDE_DIR}/../build)
include_directories(${FAUST_TARGET} ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR})
target_include_directories(${FAUST_TARGET} PUBLIC ${GPU_MOD_INCLUDE_DIR} ${GPU_MOD_INCLUDE_DIR}/../build)
target_link_libraries(${FAUST_TARGET} ${GPU_MOD_LIBS})
if(UNIX)
target_link_libraries(${FAUST_TARGET} dl)
endif(UNIX)
install(FILES ${GPU_MOD_INCLUDE_DIR}/../build/libgm.so DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_EXECUTE WORLD_READ)
include_directories(${FAUST_TARGET} ${FAUST_SRC_LINEAR_OPERATOR_GPU2_DIR})
endif()
# In case of GPU, the "CUDA" library libfaust_matrix_cu.a is linked with faust (libfaust_matrix_cu.a is not a OBJECT library)
......
Subproject commit 1fd7b2e7d72c6acd004dcc08b4571ec9c84895d0
Subproject commit 5da8834a803570fe70b2f2b6435b47da39fb509a
......@@ -204,7 +204,7 @@ if(MATIO_LIB_FILE AND MATIO_INC_DIR AND BUILD_READ_MAT_FILE AND NOT NOCPPTESTS)
endif()
if(USE_GPU_MOD)
list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod)
list(APPEND tests faust_gpu_mod hierarchical2020_gpu test_matdense_gpu_mod test_matsparse_gpu_mod test_transform_gpu_mod test_vect_gpu_mod test_transform_helper_gpu_mod)
endif()
foreach(TEST_FPP float double complex<float> complex<double>)
......
#include "faust_MatDense_gpu2.h"
#include <cstdlib>
using namespace Faust;
int main(int argc, char** argv)
{
MatDense<FPP,GPU2> mat;
return EXIT_SUCCESS;
}
......@@ -2,6 +2,7 @@
#include "faust_gpu_mod.h"
#include "faust_MatDense_gpu.h"
#include "faust_MatSparse.h"
#include "faust_MatSparse_gpu.h"
#include "faust_Vect_gpu.h"
#include "faust_gpu_mod_utils.h"
#include "faust_cuda_device.h"
......@@ -452,6 +453,17 @@ void test_mul_vec()
assert(err < 1e-3);
}
void test_gpu_matsparse2matdense()
{
cout << "test MatDense<FPP,GPU2>::MatDense<FPP,GPU2>(MatSparse<FPP,GPU2>&)" << endl;
auto sM = Faust::MatSparse<double,Cpu>::randMat(22, 33, .2);
Faust::MatSparse<double,GPU2> sM_gpu(*sM);
MatDense<double,GPU2> dM_gpu(sM_gpu);
cout << sM_gpu.norm() << " " << dM_gpu.norm() << endl;
assert(abs(sM_gpu.norm()-dM_gpu.norm())< 1e-4);
cout << "OK" << endl;
}
int main(int argc, char** argv)
{
Faust::enable_gpu_mod();
......@@ -475,5 +487,6 @@ int main(int argc, char** argv)
test_get_device();
test_move();
test_mul_vec();
test_gpu_matsparse2matdense();
return EXIT_SUCCESS;
}
#include <cstdlib>
#include <iostream>
#include <vector>
#include "faust_MatDense.h"
#include "faust_TransformHelper_gpu.h"
#include "faust_TransformHelper.h"
#include "faust_gpu_mod_utils.h"
using namespace Faust;
using namespace std;
int main()
{
Faust::enable_gpu_mod();
TransformHelper<double, GPU2> th;
auto cpu_mat1 = MatDense<double,Cpu>::randMat(12,24);
auto cpu_mat2 = MatDense<double,Cpu>::randMat(24,32);
auto cpu_mat3 = Faust::MatSparse<double,Cpu>::randMat(32, 22, .3);
MatDense<double,GPU2> gpu_mat1(*cpu_mat1);
MatDense<double,GPU2> gpu_mat2(*cpu_mat2);
Faust::MatSparse<double,GPU2> gpu_mat3(*cpu_mat3);
vector<MatGeneric<double,GPU2>*> gpu_fact_list = {&gpu_mat1, &gpu_mat2, &gpu_mat3};
TransformHelper<double, GPU2> th2(gpu_fact_list);
int flag;
th2.Display();
cout << "gpu norm fro:" << th2.normFro() << endl;
cout << "gpu spectral norm fro:" << th2.spectralNorm(100, 1e-3, flag) << endl;
vector<MatGeneric<double,Cpu>*> cpu_fact_list = {cpu_mat1, cpu_mat2, cpu_mat3};
TransformHelper<double, Cpu> th2_cpu(cpu_fact_list);
th2_cpu.display();
cout << "cpu norm fro:" <<th2_cpu.normFro() << endl;
cout << "cpu spectral norm:" <<th2_cpu.spectralNorm(100,1e-3, flag) << endl;
for(int i=0;i<th2.size();i++)
cout << " fact "<< i<< " is sparse: " << th2_cpu.is_fact_sparse(i) << " is dense: " << th2_cpu.is_fact_dense(i) << endl;
// test update
auto cpu_mat4 = MatDense<double,Cpu>::randMat(24,32);
auto cpu_mat5 = Faust::MatSparse<double,Cpu>::randMat(32, 22, .3);
MatDense<double,GPU2> gpu_mat4(*cpu_mat4);
Faust::MatSparse<double,GPU2> gpu_mat5(*cpu_mat5);
th2.update(gpu_mat4, 1);
th2.update(gpu_mat5, 2);
cout << "norm of updated gpu dmat:" << static_cast<MatDense<double,GPU2>*>(th2.get_gen_fact_nonconst(1))->norm() << endl;
cout << "norm of updated gpu smat:" << static_cast<Faust::MatSparse<double,GPU2>*>(th2.get_gen_fact_nonconst(2))->norm() << endl;
cout << "norm of gpu source dmat:" << gpu_mat4.norm() << endl;
cout << "norm of gpu source smat:" << gpu_mat5.norm() << endl;
cout << "norm of cpu source dmat:" << cpu_mat4->norm() << endl;
cout << "norm of cpu source smat:" << cpu_mat5->norm() << endl;
th2.Display();
cout << "============ multiply" << endl;
auto cpu_mat6 = Faust::MatDense<double,Cpu>::randMat(22, 32);
MatDense<double,GPU2> gpu_mat6(*cpu_mat6);
auto M_gpu = th2.multiply(gpu_mat6);
auto M_cpu = th2_cpu.multiply(*cpu_mat6);
cout << "M_cpu.norm() " << M_cpu.norm() << endl;
cout << "M_gpu.norm() " << M_gpu.norm() << endl;
return EXIT_FAILURE;
}
......@@ -43,6 +43,7 @@
#define __FAUST_TRANSFORM_HELPER___
#include <memory>
#include "faust_TransformHelperGen.h"
#include "faust_RefManager.h"
#include "faust_exception.h"
#include "faust_Transform.h"
......@@ -62,11 +63,9 @@ namespace Faust {
template<typename FPP>
using transf_iterator = typename Transform<FPP,Cpu>::transf_iterator;
template<typename FPP,FDevice DEVICE> class Transform;
template<typename FPP,FDevice DEVICE> class Vect;
template<typename FPP,FDevice DEVICE> class MatDense;
template<typename FPP,FDevice DEVICE> class MatGeneric;
#ifdef USE_GPU_MOD
template<typename FPP> class FaustGPU;
#endif
enum RandFaustType {
DENSE,
......@@ -80,21 +79,12 @@ namespace Faust {
};
template<typename FPP>
class TransformHelper<FPP,Cpu> {
class TransformHelper<FPP,Cpu> : public TransformHelperGen<FPP,Cpu> {
static std::default_random_engine generator;
static bool seed_init;
bool is_transposed;
bool is_conjugate;
bool is_sliced;
Slice slices[2];
bool is_fancy_indexed;
int mul_order_opt_mode;
int Fv_mul_mode;
faust_unsigned_int * fancy_indices[2];
faust_unsigned_int fancy_num_rows;
faust_unsigned_int fancy_num_cols;
shared_ptr<Transform<FPP,Cpu>> transform;
#ifdef FAUST_TORCH
std::vector<torch::Tensor> tensor_data;
#endif
......@@ -131,7 +121,7 @@ namespace Faust {
TransformHelper<FPP, Cpu>* multiply(FPP& scalar);
template<typename Head, typename ... Tail>
void push_back_(Head& h, Tail&... t);
//
void push_back_();
void push_back(const MatGeneric<FPP,Cpu>* M, const bool optimizedCopy=false, const bool copying=true);
void pop_back();
......@@ -191,7 +181,7 @@ namespace Faust {
TransformHelper<FPP,Cpu>* horzcat(const TransformHelper<FPP,Cpu>*);
bool isTransposed() const;
bool isConjugate() const;
const char isTransposed2char() const;
// const char isTransposed2char() const;
double normL1() const;
double normFro() const;
double normInf() const;
......
This diff is collapsed.
......@@ -3,6 +3,7 @@
typedef @FAUST_SCALAR_FOR_GM@ FSFG;
#endif
//TODO: move to cpp.in
#include "faust_MatDense_gpu.h"
namespace Faust
{
template<>
......@@ -42,6 +43,8 @@ namespace Faust
gpu_mat = dsm_funcs->togpu_stream(nbRow, nbCol, const_cast<FSFG*>(data), stream);
else if(! no_alloc)
gpu_mat = dsm_funcs->create(nbRow, nbCol);
else
gpu_mat = nullptr;
gp_funcs->set_dev(cur_dev_id);
}
......@@ -49,7 +52,8 @@ namespace Faust
MatDense<FSFG,GPU2>::~MatDense()
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
dsm_funcs->free(gpu_mat);
if(gpu_mat != nullptr)
dsm_funcs->free(gpu_mat);
}
template<>
......@@ -65,6 +69,8 @@ namespace Faust
const int32_t dev_id/*=-1*/,
const void* stream/*=nullptr*/) : MatDense<FSFG,GPU2>(mat.getNbRow(), mat.getNbCol(), mat.getData(), /*no_alloc*/ mat.getData() == nullptr, dev_id, stream){}
template<>
void Faust::MatDense<FSFG,GPU2>::multiply(MatDense<FSFG, GPU2> &other, const char op_this)
{
......@@ -151,7 +157,10 @@ namespace Faust
void Faust::MatDense<FSFG,GPU2>::resize(const faust_unsigned_int nbRow, const faust_unsigned_int nbCol)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
dsm_funcs->resize(gpu_mat, getNbRow(), getNbCol());
if(gpu_mat != nullptr)
dsm_funcs->resize(gpu_mat, getNbRow(), getNbCol());
else
gpu_mat = dsm_funcs->create(nbRow, nbCol);
#ifndef NDEBUG
int32_t new_nrows, new_ncols;
dsm_funcs->info(gpu_mat, &new_nrows, &new_ncols);
......@@ -159,6 +168,17 @@ namespace Faust
#endif
}
template<>
Faust::MatDense<FSFG,GPU2>::MatDense(const MatSparse<FSFG,GPU2>& mat) : MatDense<FSFG,GPU2>()
{
if(mat.get_gpu_mat_ptr() != nullptr)
{
resize(mat.getNbRow(), mat.getNbCol());
auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs(FSFG(0));
spm_funcs->copy2dense(mat.get_gpu_mat_ptr(), this->gpu_mat);
}
}
template<>
void Faust::MatDense<FSFG,GPU2>::setOnes()
{
......@@ -410,8 +430,9 @@ namespace Faust
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
auto gpu_mat = dsm_funcs->togpu(getNbRow(), getNbCol(), const_cast<FSFG*>(A.getData()));
dsm_funcs->free(this->gpu_mat);
auto gpu_mat = dsm_funcs->togpu(A.getNbRow(), A.getNbCol(), const_cast<FSFG*>(A.getData()));
if(this->gpu_mat != nullptr)
dsm_funcs->free(this->gpu_mat);
this->gpu_mat = gpu_mat;
}
......@@ -423,12 +444,17 @@ namespace Faust
}
template<>
void Faust::MatDense<FSFG, GPU2>::operator=(const MatDense<FSFG, GPU2>& A)
Faust::MatDense<FSFG, GPU2>& Faust::MatDense<FSFG, GPU2>::operator=(const MatDense<FSFG, GPU2>& A)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
auto gpu_mat = dsm_funcs->clone(A.gpu_mat);
dsm_funcs->free(this->gpu_mat);
this->gpu_mat = gpu_mat;
if(A.gpu_mat != nullptr)
{
resize(A.getNbRow(), A.getNbCol());
dsm_funcs->copy(A.gpu_mat, this->gpu_mat);
}
else
this->gpu_mat = nullptr;
return *this;
}
template <>
......@@ -492,11 +518,48 @@ namespace Faust
throw std::runtime_error("gpu_mat is nullptr");
}
template<typename FSFG>
template<>
void MatDense<FSFG,GPU2>::multiply(const Vect<FSFG, GPU2>& vec, Vect<FSFG, GPU2>& out_vec) const
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
dsm_funcs->mul_gpu_dsm_ext(this->gpu_mat, vec.get_gpu_mat_ptr(), out_vec.get_gpu_mat_ptr(), OP_NOTRANSP, OP_NOTRANSP);
}
template<>
MatType MatDense<FSFG,GPU2>::getType() const
{
return Dense;
}
template<>
MatDense<FSFG,GPU2>& MatDense<FSFG,GPU2>::operator=(MatDense<FSFG,GPU2>&& mat)
{
if(this->gpu_mat != nullptr)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
dsm_funcs->free(this->gpu_mat);
}
this->gpu_mat = mat.gpu_mat;
mat.gpu_mat = nullptr;
return *this;
}
template<>
MatDense<FSFG,GPU2>::MatDense(MatDense<FSFG,GPU2>&& mat)
{
int32_t dev_id = 0;
if(this->gpu_mat != nullptr)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(FSFG(0));
dsm_funcs->free(this->gpu_mat);
}
// dev_id = mat.getDevice();
// auto gp_funcs = GPUModHandler::get_singleton()->gp_funcs();
this->gpu_mat = mat.gpu_mat;
mat.gpu_mat = nullptr;
}
};
......@@ -3,18 +3,23 @@
#ifdef USE_GPU_MOD
#include "faust_MatDense.h"
#include "faust_MatGeneric_gpu.h"
#include "faust_Vect_gpu.h"
#include "faust_MatSparse_gpu.h"
#include "faust_gpu_mod_utils.h"
#include <cstdint>
namespace Faust
{
template <typename FPP>
void gemm(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
template<typename FPP, FDevice DEVICE>
class MatDense;
template<typename FPP>
class MatDense<FPP, GPU2> : public MatGeneric<FPP,GPU2>
{
friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
friend MatSparse<FPP,GPU2>;
friend void gemm<>(const MatDense<FPP, GPU2> &A, const MatDense<FPP, GPU2> &B, MatDense<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA, const char opB);
public:
......@@ -28,9 +33,12 @@ namespace Faust
MatDense();
MatDense(const MatDense<FPP,Cpu>& mat, const int32_t dev_id=-1, const void* stream=nullptr);
MatDense(MatDense<FPP,GPU2> && mat);
MatDense(const MatSparse<FPP,GPU2> & mat);
~MatDense();
void operator=(const MatDense<FPP,GPU2> & A);
MatDense<FPP,GPU2>& operator=(MatDense<FPP,GPU2> && mat);
MatDense<FPP,GPU2>& operator=(const MatDense<FPP,GPU2> & A);
void operator=(const MatDense<FPP,Cpu> & A);
void operator=(const MatSparse<FPP,Cpu> & A);
// *this = *this + A
......@@ -88,10 +96,11 @@ namespace Faust
MatDense<FPP, Cpu> tocpu(const void* stream=nullptr) const;
void Display() const;
std::string to_string(const bool transpose=false, const bool displaying_small_mat_elts=false) const;
MatType getType() const;
int32_t getNbRow() const;
int32_t getNbCol() const;
faust_unsigned_int getNonZeros() const;
private:
protected:
gm_DenseMat_t gpu_mat;
void* get_gpu_mat_ptr() const;
void set_gpu_mat_ptr(void*);
......@@ -99,6 +108,6 @@ namespace Faust
}
#include "faust_MatDense_gpu_double.hpp"
#endif
#endif
#ifndef __FAUST_MATGENERIC_GPU__
#define __FAUST_MATGENERIC_GPU__
#include "faust_constant.h"
namespace Faust
{
template<typename FPP, FDevice DEVICE> class Transform;
template<typename FPP, FDevice DEVICE> class MatGeneric;
template<typename FPP>
class Transform<FPP,GPU2>;
//TODO: this class is temporary, ideally MatSparse<FPP,GPU2> and MatDense<FPP,GPU2> should extend the MatGeneric<FPP, Device> class
//TODO: keep this class until MatSparse<FPP,GPU2> and MatDense<FPP,GPU2> fully implement the MatGeneric<FPP, Device> methods
// The interest of this class is mostly to make Transform capable of storing generic matrix
// TODO: this class should extends MatGeneric<FPP,Device>
template<typename FPP>
class MatGeneric<FPP, GPU2>
{
friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
friend Transform<FPP,GPU2>; // needs access to get_gpu_mat_ptr
virtual void set_gpu_mat_ptr(void*)=0;
protected:
bool is_identity;
bool is_zeros;
public:
virtual MatType getType() const=0;
virtual int32_t getNbRow() const=0;
virtual int32_t getNbCol() const=0;
virtual MatGeneric<FPP,GPU2>* clone(const int32_t dev_id=-1, const void* stream=nullptr) const=0;
......
//TODO: move to cpp.in
#include "faust_MatSparse_gpu.h"
#include "faust_MatDense_gpu.h"
#ifndef _FSFG_
#define _FSFG_
typedef @FAUST_SCALAR_FOR_GM@ FSFG;
#endif
namespace Faust
{
template<>
......@@ -79,10 +85,11 @@ namespace Faust
}
template<>
void MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::operator=(const MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>& mat)
MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>& MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>::operator=(const MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>& mat)
{
auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
spm_funcs->copy(mat.gpu_mat, gpu_mat);
return *this;
}
template<>
......@@ -282,4 +289,50 @@ namespace Faust
{
this->gpu_mat = gpu_mat;
}
template<>
MatType MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>::getType() const
{
return Sparse;
}
template<>
MatSparse<FSFG,GPU2>& MatSparse<FSFG,GPU2>::operator=(MatSparse<FSFG,GPU2>&& mat)
{
if(this->gpu_mat != nullptr)
{
auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs(FSFG(0));
spm_funcs->free(this->gpu_mat);
}
this->gpu_mat = mat.gpu_mat;
mat.gpu_mat = nullptr;
return *this;
}
template<>
MatSparse<@FAUST_SCALAR_FOR_GM@, GPU2>::MatSparse(const MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>& mat)
{
//TODO: process the copy totally on gpu side
if(mat.get_gpu_mat_ptr() != nullptr)
{
auto cpu_dmat = mat.tocpu();
MatSparse<@FAUST_SCALAR_FOR_GM@,Cpu> cpu_smat(cpu_dmat);
MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2> gpu_smat(cpu_smat);
*this = std::move(gpu_smat);
}
}
template<>
MatSparse<FSFG,GPU2>::MatSparse(MatSparse<FSFG,GPU2>&& mat)
{
if(this->gpu_mat != nullptr)
{
auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs(FSFG(0));
spm_funcs->free(this->gpu_mat);
}
this->gpu_mat = mat.gpu_mat;
mat.gpu_mat = nullptr;
}
};
......@@ -4,14 +4,20 @@
#include "faust_gpu_mod_utils.h"
#include "faust_constant.h"
#include "faust_MatGeneric_gpu.h"
#include "faust_MatSparse.h"
namespace Faust
{
template<typename FPP, FDevice DEVICE>
class MatSparse;
template<typename FPP, FDevice DEVICE>
class MatDense;
template<typename FPP>
class MatSparse<FPP, GPU2> : public MatGeneric<FPP,GPU2>
{
friend Transform<FPP,GPU2>; // need to access to get_gpu_mat_ptr
friend MatDense<FPP,GPU2>;
public:
/** \brief Inits from CPU buffers.
*
......@@ -37,9 +43,14 @@ namespace Faust
const int32_t dev_id=-1,
const void* stream=nullptr);
MatSparse(const MatDense<FPP,GPU2>& mat);
MatSparse(MatSparse<FPP,GPU2> && mat);
MatSparse<FPP,GPU2>& operator=(MatSparse<FPP,GPU2> && mat);
MatSparse();
void operator=(const MatSparse<FPP, GPU2>& mat);
MatSparse<FPP,GPU2>& operator=(const MatSparse<FPP, GPU2>& mat);
void operator=(const MatSparse<FPP, Cpu>& mat);
void operator*=(const FPP& alpha);
void operator/=(const FPP& alpha);
......@@ -72,6 +83,7 @@ namespace Faust
int32_t getDevice() const;
void Display() const;
std::string to_string(const bool transpose=false, const bool displaying_small_mat_elts=false) const;
MatType getType() const;
~MatSparse();
private:
......@@ -82,6 +94,6 @@ namespace Faust
};
#include "faust_MatSparse_gpu_double.hpp"
#endif
#endif
#ifndef __TRANSFORM_HELPER_GPU2__
#define __TRANSFORM_HELPER_GPU2__
#include "faust_constant.h"
#include "faust_Transform_gpu.h"
//#include "faust_Transform.h"
#include "faust_TransformHelperGen.h"
#include <memory>
namespace Faust
{
template<typename FPP, FDevice DEVICE> class TransformHelper;
template<typename FPP, FDevice DEVICE> class TransformHelperGen;
template<typename FPP>
class TransformHelper<FPP,GPU2> : public TransformHelperGen<FPP,GPU2>
{
public:
TransformHelper();
#ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL
template<typename ...GList> TransformHelper(GList& ... t);
#endif
void push_back(const MatGeneric<FPP,GPU2>* M, const bool optimizedCopy=false, const bool copying=true);
template<typename Head, typename ... Tail>
void push_back_(Head& h, Tail&... t);
void push_back_();
void Display() const;
MatDense<FPP,GPU2> get_product();
MatDense<FPP,GPU2> multiply(const Faust::MatDense<FPP,GPU2> &A, const bool transpose=false, const bool conjugate=false);
Real<FPP> normFro() const;
faust_unsigned_int size() const;
void update_total_nnz() const;
Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag);
bool is_fact_sparse(int id) const;
bool is_fact_dense(int id) const;
MatGeneric<FPP,GPU2>* get_gen_fact_nonconst(const faust_unsigned_int id) const;
void update(const MatGeneric<FPP, GPU2>& M, const faust_unsigned_int id);
};
}
#include "faust_TransformHelper_gpu.hpp"
#endif
namespace Faust
{
template<typename FPP,FDevice DEVICE> class Transform;
template<typename FPP>
TransformHelper<FPP,GPU2>::TransformHelper() : TransformHelperGen<FPP,GPU2>()
{
}
#ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL
template<typename FPP>
template<typename ... GList>
TransformHelper<FPP,GPU2>::TransformHelper(GList& ... t): TransformHelper<FPP,GPU2>()
{
this->push_back_(t...);
}
#endif
template<typename FPP>
void TransformHelper<FPP,GPU2>::push_back(const MatGeneric<FPP,GPU2>* M, const bool optimizedCopy/*=false*/, const bool copying/*=true*/)
{
//optimizedCopy is ignored because not handled yet by Transform<FPP,GPU2> // TODO ? (it's not used by wrappers anyway)
this->transform->push_back(M, copying);
}
template<typename FPP>
void TransformHelper<FPP,GPU2>::Display() const
{
this->transform->Display();
}
template<typename FPP>
template<typename Head, typename ... Tail>
void TransformHelper<FPP,GPU2>::push_back_(Head& h, Tail&... t)
{
for(auto it=h.begin(); it < h.end(); it++)
{
auto f = *it;
this->push_back(f, false, false);
}
this->push_back_(t...);
}
template<typename FPP>
void TransformHelper<FPP,GPU2>::push_back_()
{
// do nothing, here just for empty tail of above function
}
template<typename FPP>
MatDense<FPP,GPU2> TransformHelper<FPP,GPU2>::get_product()
{
return this->transform->get_product();
}
template<typename FPP>
Real<FPP> TransformHelper<FPP,GPU2>::normFro() const
{
return this->transform->get_product().norm();
}
template<typename FPP>
faust_unsigned_int TransformHelper<FPP,GPU2>::size() const
{
return this->transform->size();
}
template<typename FPP>
void TransformHelper<FPP,GPU2>::update_total_nnz() const
{
this->transform->update_total_nnz();
}
template<typename FPP>
Real<FPP> TransformHelper<FPP,GPU2>::spectralNorm(int32_t nb_iter_max, float threshold, int& flag)
{
return this->transform->spectralNorm(nb_iter_max, threshold, flag);
}
template<typename FPP>
bool TransformHelper<FPP,GPU2>::is_fact_sparse(int id) const
{
return this->transform->is_fact_sparse(id);
}
template<typename FPP>
bool TransformHelper<FPP,GPU2>::is_fact_dense(int id) const
{
return this->transform->is_fact_dense(id);
}
template<typename FPP>
MatGeneric<FPP,GPU2>* TransformHelper<FPP,GPU2>::get_gen_fact_nonconst(const faust_unsigned_int id) const
{
return this->transform->get_fact(id, false);
}
template<typename FPP>
void TransformHelper<FPP,GPU2>::update(const MatGeneric<FPP, GPU2>& M,const faust_unsigned_int id)
{
return this->transform->update(M, id);
}
template<typename FPP>
MatDense<FPP,GPU2> TransformHelper<FPP,GPU2>::multiply(const Faust::MatDense<FPP,GPU2> &A, const bool transpose /* deft to false */, const bool conjugate)
{
this->is_transposed ^= transpose;
this->is_conjugate ^= conjugate;
MatDense<FPP,GPU2> M = this->transform->multiply(A, this->isTransposed2char());
this->is_transposed ^= transpose;
this->is_conjugate ^= conjugate;
return M;
}
}
#include "faust_Transform_gpu.h"
namespace Faust
{
......@@ -47,6 +48,8 @@ namespace Faust
marr_funcs->addgpu_anymat(gpu_mat_arr, M->get_gpu_mat_ptr());
}
template<>
int32_t Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::size() const
{
......@@ -80,6 +83,29 @@ namespace Faust
return M;
}
template<>
void Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::update(const MatGeneric<@FAUST_SCALAR_FOR_GM@, GPU2>& M, const faust_unsigned_int id)
{
auto fact = get_fact(id, false);
auto fact_type = fact->getType();
if(M.getType() != fact_type)
throw std::runtime_error("The factor matrix to update is not of the same type (dense or sparse) as the input matrix.");
if(fact_type == Dense)
{
// fact to update is dense
auto dfact = dynamic_cast<MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>*>(fact);
auto dM = dynamic_cast<const MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>*>(&M);
*dfact = *dM;
}
else
{
// fact to update is sparse
auto sfact = dynamic_cast<MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>*>(fact);
auto sM = dynamic_cast<const MatSparse<@FAUST_SCALAR_FOR_GM@,GPU2>*>(&M);
*sfact = *sM;
}
}
template<>
void Transform<@FAUST_SCALAR_FOR_GM@, GPU2>::get_facts(std::vector<MatGeneric<@FAUST_SCALAR_FOR_GM@,GPU2>*> &factors, bool cloning_facts/*=true*/) const
{
......@@ -185,12 +211,32 @@ namespace Faust
template<>
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_product() const
void Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_product(MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>& M, const char opThis/*='N'*/, const bool isConj/*=false*/) const
{
auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
auto gpu_mat = marr_funcs->chain_matmul_one(gpu_mat_arr, OP_NOTRANSP);
MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> M;
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
gm_Op op;
if(opThis == 'N')
op = OP_NOTRANSP;
else if(opThis == 'T')
op = OP_TRANSP;
else if(opThis == 'H')
op = OP_CONJTRANSP;
else
throw std::runtime_error("Invalid opThis");
auto gpu_mat = marr_funcs->chain_matmul_one(gpu_mat_arr, op);
if(M.gpu_mat != nullptr)
dsm_funcs->free(M.gpu_mat);
//TODO: rather to delete use a marr_funcs function that allows to pass a pre-allocated output buffer
M.gpu_mat = gpu_mat;
if(isConj && opThis != 'H') M.conjugate();
}
template<>
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::get_product(const char opThis/*='N'*/, const bool isConj/*=false*/) const
{
MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> M;
this->get_product(M, opThis, isConj);
return M;
}
......@@ -272,4 +318,84 @@ namespace Faust
}
}
}
template<>
bool Faust::Transform<@FAUST_SCALAR_FOR_GM@, GPU2>::is_fact_sparse(int id) const
{
return get_fact(id)->getType() == Sparse;
}
template<>
bool Faust::Transform<@FAUST_SCALAR_FOR_GM@, GPU2>::is_fact_dense(int id) const
{
return get_fact(id)->getType() == Dense;
}
template<>
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> &A, const char opThis)
{
auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
gm_Op op;
if(A.gpu_mat == nullptr)
throw std::runtime_error("MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> argument is not initialized.");
if(gpu_mat_arr == nullptr)
throw std::runtime_error("No factors in Transform.");
int32_t out_nrows = this->getNbCol(), out_ncols = A.getNbCol(); // transpose/adjoint case
if(opThis == 'N')
{
op = OP_NOTRANSP;
out_nrows = getNbRow();
out_ncols = getNbCol();
}
else if(opThis == 'T')
op = OP_TRANSP;
else if(opThis == 'H')
op = OP_CONJTRANSP;
else
throw std::runtime_error("Invalid opThis");
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
out.gpu_mat = marr_funcs->chain_matmul_by_dsm_one(this->gpu_mat_arr, op, A.gpu_mat);
return out;
}
template<>
MatGeneric<@FAUST_SCALAR_FOR_GM@,GPU2>* Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::operator*() const
{
return container.get_fact(index, /*cloning_fact*/ false);
}
template<>
Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator& Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::operator++()
{
index++;
return *this;
}
template<>
Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::operator++(int)
{
iterator copy(*this);
(*this)++;
return copy;
}
template<>
Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator::iterator(const Transform<@FAUST_SCALAR_FOR_GM@, GPU2>& container, size_t index) : index(index), container(container)
{
}
template<>
Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::begin() const
{
return Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator(*this, 0);
}
template<>
Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::end() const
{
return Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::iterator(*this, size());
}
}
......@@ -4,44 +4,71 @@
#include "faust_gpu_mod_utils.h"
#include "faust_constant.h"
#include "faust_MatGeneric_gpu.h"
#include "faust_MatSparse_gpu.h"
#include "faust_MatDense_gpu.h"
#include <vector>
namespace Faust
{
template<typename FPP, FDevice DEVICE> class Transform;
template<typename FPP>
class Transform<FPP,GPU2>
{
gm_MatArray_t gpu_mat_arr;
public:
Transform();
Transform(const std::vector<MatGeneric<FPP,GPU2>*> &factors);
Transform(const Transform<FPP,GPU2>& t);
~Transform();
void operator=(const Transform<FPP,GPU2>& t);
void push_back(const MatGeneric<FPP,GPU2>*, bool copying=true);
void push_first(const MatGeneric<FPP,GPU2>*, bool copying=true);
void pop_front();
void pop_back();
void clear();
MatGeneric<FPP,GPU2>* get_fact(int32_t id, bool cloning_fact=true) const;
void get_facts(std::vector<MatGeneric<FPP,GPU2>*> &factors, bool cloning_facts=true) const;
void transpose();
int32_t getNbRow()const;
int32_t getNbCol()const;
void Display() const;
int32_t size() const;
faust_unsigned_int get_total_nnz() const;
void update_total_nnz() const;
void scalarMultiply(const FPP& alpha);
MatDense<FPP,GPU2> get_product() const;
void multiply(const Transform<FPP,GPU2> & A);
void multiplyLeft(const Transform<FPP,GPU2> & A);
Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag);
Transform();
Transform(const std::vector<MatGeneric<FPP,GPU2>*> &factors);
Transform(const Transform<FPP,GPU2>& t);
~Transform();
void operator=(const Transform<FPP,GPU2>& t);
void push_back(const MatGeneric<FPP,GPU2>*, bool copying=true);
void push_first(const MatGeneric<FPP,GPU2>*, bool copying=true);
void pop_front();
void pop_back();
void clear();
void update(const MatGeneric<FPP, GPU2>& M, const faust_unsigned_int id);
MatGeneric<FPP,GPU2>* get_fact(int32_t id, bool cloning_fact=true) const;
void get_facts(std::vector<MatGeneric<FPP,GPU2>*> &factors, bool cloning_facts=true) const;
bool is_fact_sparse(int id) const;
bool is_fact_dense(int id) const;
void transpose();
int32_t getNbRow()const;
int32_t getNbCol()const;
void Display() const;
int32_t size() const;
faust_unsigned_int get_total_nnz() const;
void update_total_nnz() const;
void scalarMultiply(const FPP& alpha);
MatDense<FPP,GPU2> get_product(const char opThis='N', const bool isConj=false) const;
void get_product(MatDense<FPP,GPU2>& M, const char opThis='N', const bool isConj=false) const;
MatDense<FPP,GPU2> multiply(const Faust::MatDense<FPP,GPU2> &A, const char opThis);
void multiply(const Transform<FPP,GPU2> & A);
void multiplyLeft(const Transform<FPP,GPU2> & A);
Real<FPP> spectralNorm(int32_t nb_iter_max, float threshold, int& flag);
// using transf_iterator = typename std::vector<Faust::MatGeneric<FPP,Cpu>*>::const_iterator;
//
// transf_iterator begin() const;
//
// transf_iterator end() const;
public:
class iterator : public std::iterator<std::output_iterator_tag, MatGeneric<FPP,GPU2>*>
{
public:
explicit iterator(const Transform<FPP, GPU2>& container, size_t index = 0);
MatGeneric<FPP,GPU2>* operator*() const;
iterator & operator++();
//post-increment op
iterator operator++(int);
private:
size_t index;
const Transform<FPP, GPU2> & container;
};
Transform<FPP,GPU2>::iterator begin() const;
Transform<FPP,GPU2>::iterator end() const;
};
}
#include "faust_Transform_gpu_double.hpp"
#endif
#endif
#include "faust_Vect_gpu.h"
#ifndef _FSFG_
#define _FSFG_
typedef @FAUST_SCALAR_FOR_GM@ FSFG;
......@@ -6,12 +7,12 @@ typedef @FAUST_SCALAR_FOR_GM@ FSFG;
namespace Faust
{
template<typename FSFG>
template<>
Vect<FSFG,GPU2>::Vect():MatDense<FSFG,GPU2>()
{
}
template<typename FSFG>
template<>
Vect<FSFG,GPU2>::Vect(const faust_unsigned_int size,
const FSFG* cpu_data,
const bool no_alloc,
......@@ -20,19 +21,19 @@ namespace Faust
{
}
template<typename FSFG>
template<>
faust_unsigned_int Vect<FSFG,GPU2>::size() const
{
return this->getNbRow();
}
template<typename FSFG>
void resize(const faust_unsigned_int size)
template<>
void Vect<FSFG,GPU2>::resize(const faust_unsigned_int size)
{
MatDense<FSFG, GPU2>::resize(size, 1);
}
template<typename FSFG>
template<>
void Vect<FSFG,GPU2>::operator=(const Vect<FSFG,GPU2> & v)
{
if(size() == v.size())
......@@ -41,7 +42,7 @@ namespace Faust
throw std::runtime_error("Dimensions must agree.");
}
template<typename FSFG>
template<>
void Vect<FSFG,GPU2>::operator=(const Vect<FSFG,Cpu> & v)
{
if(size() == v.size())
......@@ -55,7 +56,7 @@ namespace Faust
throw std::runtime_error("Dimensions must agree.");
}
template<typename FSFG>
template<>
FSFG Vect<FSFG,GPU2>::max()
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
......@@ -64,7 +65,7 @@ namespace Faust
return max_coeff;
}
template<typename FSFG>
template<>
FSFG Vect<FSFG,GPU2>::min()
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
......@@ -73,8 +74,8 @@ namespace Faust
return min_coeff;
}
template<typename FSFG>
FSFG Vect<FSFG,GPU2>::dot(const Vect<FSFG,Cpu> &v)
template<>
FSFG Vect<FSFG,GPU2>::dot(const Vect<FSFG,GPU2> &v)
{
FSFG alpha;
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
......@@ -82,7 +83,7 @@ namespace Faust
return alpha;
}
template<typename FSFG>
template<>
FSFG Vect<FSFG,GPU2>::sum() const
{
FSFG s;
......@@ -91,66 +92,66 @@ namespace Faust
return s;
}
template<typename FSFG>
template<>
FSFG Vect<FSFG,GPU2>::mean() const
{
return sum()/size();
}
template<typename FSFG>
template<>
void Vect<FSFG,GPU2>::operator*=(const Vect<FSFG,GPU2> &v)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->elt_wise_mul=(this->gpu_mat, v.gpu_mat);
dsm_funcs->elt_wise_mul(this->gpu_mat, v.gpu_mat);
}
template<typename FSFG>
template<>
void Vect<FSFG,GPU2>::operator/=(const Vect<FSFG,GPU2> &v)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->elt_wise_div=(this->gpu_mat, v.gpu_mat);
dsm_funcs->elt_wise_div(this->gpu_mat, v.gpu_mat);
}
template<typename FSFG>
void Vect<FSFG,GPU2>::operator==(const Vect<FSFG,GPU2> &v) const
template<>
bool Vect<FSFG,GPU2>::operator==(const Vect<FSFG,GPU2> &v) const
{
return this->get_gpu_mat_ptr() == v.get_gpu_mat_ptr();
}
template<typename FSFG>
void Vect<FSFG,GPU2>::operator!=(const Vect<FSFG,GPU2> &v) const
template<>
bool Vect<FSFG,GPU2>::operator!=(const Vect<FSFG,GPU2> &v) const
{
return ! (*this)==v;
return ! (*this==v);
}
template<typename FSFG>
template<>
Vect<FSFG,Cpu> Vect<FSFG,GPU2>::tocpu(const void* stream/*=nullptr*/) const
{
auto matvec = MatDense<FSFG,GPU2>::tocpu(stream);
return Vect<FSFG, Cpu>(size(), matvec.getData());
}
template<typename FSFG>
template<>
void Vect<FSFG,GPU2>::Display() const
{
Vect<FSFG, Cpu> v = tocpu();
v.Display();
}
template<typename FSFG>
template<>
void Vect<FSFG,GPU2>::setValues(const FSFG& val)
{
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->setval(this->gpu_mat, reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&val));
dsm_funcs->setval(this->gpu_mat, reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(const_cast<FSFG*>(&val)));
}
template<typename FSFG>
template<>
FSFG Vect<FSFG,GPU2>::mean_relative_error(const Vect<FSFG,GPU2>& ref_vec) const
{
FSFG e;
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs(@FAUST_SCALAR_FOR_GM@(0));
dsm_funcs->mean_relerr(this->get_gpu_mat_ptr(), ref_vec->get_gpu_mat_ptr(), reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&e));
dsm_funcs->mean_relerr(this->get_gpu_mat_ptr(), ref_vec.get_gpu_mat_ptr(), reinterpret_cast<@GM_REINTERPRET_CAST_SCALAR@*>(&e));
}
......
......@@ -28,11 +28,11 @@ namespace Faust
void operator=(const Vect<FPP,Cpu> &v);
void operator*=(const Vect<FPP,GPU2> &v);
void operator/=(const Vect<FPP,GPU2> &v);
void operator==(const Vect<FPP,GPU2> &v)const;
void operator!=(const Vect<FPP,GPU2> &v)const;
bool operator==(const Vect<FPP,GPU2> &v)const;
bool operator!=(const Vect<FPP,GPU2> &v)const;
FPP max();
FPP min();
FPP dot(const Vect<FPP,Cpu> &v);
FPP dot(const Vect<FPP,GPU2> &v);
FPP sum() const;
FPP mean() const;
void Display() const;
......@@ -43,5 +43,5 @@ namespace Faust
void setEyes() = delete;
};
}
#include "faust_Vect_gpu_double.hpp"
#endif
#ifndef __FAUST_TRANSFORM_HELPER_DEVICE__
#define __FAUST_TRANSFORM_HELPER_DEVICE__
#include "faust_Slice.h"
#include <memory>
namespace Faust
{
template<typename FPP,FDevice DEVICE> class Transform;
template<typename FPP,FDevice DEVICE> class TransformHelper;
template<typename FPP,FDevice DEVICE> class Vect;
template<typename FPP,FDevice DEVICE> class MatDense;
template<typename FPP,FDevice DEVICE> class MatGeneric;
template<typename FPP, FDevice DEV>
class TransformHelperGen
{
public:
TransformHelperGen();
#ifndef IGNORE_TRANSFORM_HELPER_VARIADIC_TPL
template<typename ...GList> TransformHelperGen(GList& ... t);
#endif
virtual void push_back(const MatGeneric<FPP,DEV>* M, const bool optimizedCopy=false, const bool copying=true)=0;
const char isTransposed2char() const;
protected:
bool is_transposed;
bool is_conjugate;
bool is_sliced;
Slice slices[2];
bool is_fancy_indexed;
faust_unsigned_int * fancy_indices[2];
faust_unsigned_int fancy_num_rows;
faust_unsigned_int fancy_num_cols;
std::shared_ptr<Transform<FPP,DEV>> transform;
};
}
#include "faust_TransformHelperGen.hpp"
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment