Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ef26d239 authored by hhakim's avatar hhakim
Browse files

Add TransformHelper<FPP,GPU2>::sliceMultiply and update to gpu_mod@f6f99e42.

parent 45a7038b
Branches
Tags
No related merge requests found
Subproject commit 1ec807770f677154c026b8d1679f86f089ca937f
Subproject commit f6f99e42280ce50bbe96c7364b303671b9397934
......@@ -49,6 +49,7 @@ namespace Faust
Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> &x);
void multiply(const FPP* cpu_x, FPP* cpu_y);
void multiply(const FPP* cpu_x, int x_ncols, FPP* cpu_y);
FPP* sliceMultiply(const Slice s[2], const FPP* cpu_X, FPP* cpu_out/*=nullptr*/, int X_ncols/*=1*/) const;
Real<FPP> normFro(const bool full_array=true, const int batch_size=1) const;
Real<FPP> normL1(const bool full_array=true, const int batch_size=1) const;
Real<FPP> normInf(const bool full_array=true, const int batch_size=1) const;
......
......@@ -364,7 +364,7 @@ namespace Faust
template<typename FPP>
MatDense<FPP,Cpu> TransformHelper<FPP,GPU2>::multiply(const Faust::MatDense<FPP,Cpu> &A)
{
MatDense<FPP,GPU2> M = this->multiply(MatDense<FPP,GPU2>(A), transpose, conjugate);
MatDense<FPP,GPU2> M = this->multiply(MatDense<FPP,GPU2>(A));
return M.tocpu();
}
......@@ -372,7 +372,7 @@ namespace Faust
Vect<FPP,Cpu> TransformHelper<FPP,GPU2>::multiply(const Faust::Vect<FPP,Cpu> &A)
{
Vect<FPP,GPU2> gpu_A(A.size(), A.getData());
Vect<FPP,GPU2> v = this->multiply(gpu_A , transpose, conjugate); //TODO: handle transpose and conjugate
Vect<FPP,GPU2> v = this->multiply(gpu_A); //TODO: handle transpose and conjugate
return v.tocpu();
}
......@@ -381,7 +381,7 @@ namespace Faust
{
int32_t in_vec_size = this->getNbCol();
Vect<FPP,GPU2> gpu_A(in_vec_size, cpu_in_buf);
Vect<FPP,GPU2> v = this->multiply(gpu_A , transpose, conjugate); //TODO: handle transpose and conjugate
Vect<FPP,GPU2> v = this->multiply(gpu_A); //TODO: handle transpose and conjugate
v.tocpu(cpu_out_buf);
}
......@@ -394,7 +394,7 @@ namespace Faust
else
x_nrows = this->transform->getNbCol();
MatDense<FPP,GPU2> gpu_x(x_nrows, x_ncols, cpu_x_buf, false);
MatDense<FPP,GPU2> gpu_M = this->multiply(gpu_x, transpose, conjugate); //TODO: handle transpose and conjugate
MatDense<FPP,GPU2> gpu_M = this->multiply(gpu_x); //TODO: handle transpose and conjugate
// TODO: fix this function, it works until here then it segfaults or gives a cuda error with tocpu (even if I use a cpu matdense set locally)
gpu_M.tocpu(cpu_out_buf, nullptr);
}
......@@ -855,6 +855,18 @@ namespace Faust
this->eval_fancy_idx_Transform();
}
template<typename FPP>
FPP* Faust::TransformHelper<FPP,GPU2>::sliceMultiply(const Slice s[2], const FPP* cpu_X, FPP* cpu_out/*=nullptr*/, int X_ncols/*=1*/) const
{
//TODO: take care of eval_sliced_Transform calls
int32_t X_nrows;
X_nrows = this->getNbCol(); // transpose and slice aware and not evaluating the slice
MatDense<FPP,GPU2> gpu_X(X_nrows, X_ncols, cpu_X, false);
MatDense<FPP,GPU2> gpu_M = this->transform->sliceMultiply(s, gpu_X);
gpu_M.tocpu(cpu_out, nullptr);
return cpu_out;
}
}
#include "faust_TransformHelper_cat_gpu.hpp"
......@@ -383,6 +383,41 @@ namespace Faust
}
}
template<>
MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::sliceMultiply(const Slice s[2], MatDense<@FAUST_SCALAR_FOR_GM@, GPU2>& gpu_X, const char opThis) const
{
auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
if(gpu_mat_arr == nullptr) throw std::runtime_error("gpu_mat_arr is nullptr");
//TODO: refactor this code with other functions doing the same
gm_Op op;
faust_unsigned_int out_nrows, out_ncols;
if(opThis == 'N')
{
op = OP_NOTRANSP;
out_nrows = getNbRow();
out_ncols = getNbCol();
}
else if(opThis == 'T')
op = OP_TRANSP;
else if(opThis == 'H')
op = OP_CONJTRANSP;
else
throw std::runtime_error("Invalid opThis");
int rs_size, cs_size;
if(s[0].start_id != 0 || s[0].end_id != getNbRow())
rs_size = s[0].end_id-s[0].start_id; // end_id is not included in the slice
else
rs_size = 0;
if(s[1].start_id != 0 || s[1].end_id != getNbRow())
cs_size = s[1].end_id-s[1].start_id; // end_id is not included in the slice
else
cs_size = 0;
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
// TODO: call chain_matmul_by_dsm_one if both cs_size and rs_size are null
out.gpu_mat = marr_funcs->sliced_chain_matmul_by_dsm_one(this->gpu_mat_arr, (int) s[0].start_id, (int) rs_size, (int) s[1].start_id, (int) cs_size, op, gpu_X.gpu_mat);
return out;
}
template<>
void Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiplyLeft(const Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2> & A)
{
......
......@@ -7,6 +7,7 @@
#include "faust_MatSparse_gpu.h"
#include "faust_MatDense_gpu.h"
#include "faust_RefManager.h"
#include "faust_Slice.h"
#include <vector>
namespace Faust
......@@ -66,6 +67,7 @@ namespace Faust
void get_product(MatDense<FPP,GPU2>& M, const char opThis='N', const bool isConj=false) const;
MatDense<FPP,GPU2> multiply(const MatDense<FPP,GPU2> &A, const char opThis);
void multiply(const Transform<FPP,GPU2> & A);
MatDense<FPP, GPU2> sliceMultiply(const Slice s[2], MatDense<FPP, GPU2>& gpu_X, const char opThis) const;
void multiplyLeft(const Transform<FPP,GPU2> & A);
void multiply(const FPP& a, const int32_t id=-1);
Vect<FPP,GPU2> multiply(const Vect<FPP,GPU2>& x, const char opThis='N');
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment