Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 733be5e1 authored by hhakim's avatar hhakim
Browse files

Implement Faust::TransformHelper<FPP, GPU2>::indexMultiply.

parent 93216a54
Branches
Tags
No related merge requests found
......@@ -49,7 +49,8 @@ namespace Faust
Vect<FPP,Cpu> multiply(const Vect<FPP,Cpu> &x);
void multiply(const FPP* cpu_x, FPP* cpu_y);
void multiply(const FPP* cpu_x, int x_ncols, FPP* cpu_y);
FPP* sliceMultiply(const Slice s[2], const FPP* cpu_X, FPP* cpu_out/*=nullptr*/, int X_ncols/*=1*/) const;
FPP* sliceMultiply(const Slice s[2], const FPP* cpu_X, FPP* cpu_out=nullptr, int X_ncols=1) const;
FPP* indexMultiply(faust_unsigned_int* ids[2], size_t id_lens[2], const FPP* X, int ncols=1, FPP* out=nullptr) const;
Real<FPP> normFro(const bool full_array=true, const int batch_size=1) const;
Real<FPP> normL1(const bool full_array=true, const int batch_size=1) const;
Real<FPP> normInf(const bool full_array=true, const int batch_size=1) const;
......
......@@ -923,6 +923,26 @@ namespace Faust
return cpu_out;
}
template<typename FPP>
FPP* Faust::TransformHelper<FPP,GPU2>::indexMultiply(faust_unsigned_int* ids[2], size_t id_lens[2], const FPP* cpu_X, int X_ncols/*=1*/, FPP* cpu_out/*=nullptr*/) const
{
int32_t X_nrows;
if(id_lens[0] > 0)
X_nrows = id_lens[1];
else
X_nrows = this->getNbCol();
MatDense<FPP,GPU2> gpu_X(X_nrows, X_ncols, cpu_X, false);
MatDense<FPP,GPU2> gpu_M = this->transform->indexMultiply(ids, id_lens, gpu_X, this->isTransposed2char());
if(cpu_out == nullptr)
{
auto out_nrows = id_lens[0]>0?id_lens[0]:this->getNbRow();
auto out_ncols = X_ncols;
cpu_out = new FPP[out_nrows*out_ncols*sizeof(FPP)];
}
gpu_M.tocpu(cpu_out, nullptr);
return cpu_out;
}
}
#include "faust_TransformHelper_cat_gpu.hpp"
......@@ -388,7 +388,7 @@ namespace Faust
{
auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
if(gpu_mat_arr == nullptr) throw std::runtime_error("gpu_mat_arr is nullptr");
//TODO: refactor this code with other functions doing the same
//TODO: refactor this code with other functions doing the same about op
gm_Op op;
faust_unsigned_int out_nrows, out_ncols;
if(opThis == 'N')
......@@ -413,11 +413,38 @@ namespace Faust
else
cs_size = 0;
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
// TODO: call chain_matmul_by_dsm_one if both cs_size and rs_size are null
// if both cs_size and rs_size are null the following call will call chain_matmul_by_dsm_one
out.gpu_mat = marr_funcs->sliced_chain_matmul_by_dsm_one(this->gpu_mat_arr, (int) s[0].start_id, (int) rs_size, (int) s[1].start_id, (int) cs_size, op, gpu_X.gpu_mat);
return out;
}
template<>
MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::indexMultiply(faust_unsigned_int* ids[2], size_t id_lens[2], MatDense<@FAUST_SCALAR_FOR_GM@, GPU2>& gpu_X, const char opThis) const
{
auto marr_funcs = GPUModHandler::get_singleton()->marr_funcs((@FAUST_SCALAR_FOR_GM@)(0));
if(gpu_mat_arr == nullptr) throw std::runtime_error("gpu_mat_arr is nullptr");
//TODO: refactor this code with other functions doing the same about op
gm_Op op;
faust_unsigned_int out_nrows, out_ncols;
if(opThis == 'N')
{
op = OP_NOTRANSP;
out_nrows = getNbRow();
out_ncols = getNbCol();
}
else if(opThis == 'T')
op = OP_TRANSP;
else if(opThis == 'H')
op = OP_CONJTRANSP;
else
throw std::runtime_error("Invalid opThis");
MatDense<@FAUST_SCALAR_FOR_GM@,GPU2> out(out_nrows, out_ncols, nullptr, /*no_alloc*/true);
// if both cs_size and rs_size are null the following call will call chain_matmul_by_dsm_one
out.gpu_mat = marr_funcs->indexed_chain_matmul_by_dsm_one(this->gpu_mat_arr, ids, id_lens, op, gpu_X.gpu_mat);
return out;
}
template<>
void Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2>::multiplyLeft(const Faust::Transform<@FAUST_SCALAR_FOR_GM@,GPU2> & A)
{
......
......@@ -68,6 +68,7 @@ namespace Faust
MatDense<FPP,GPU2> multiply(const MatDense<FPP,GPU2> &A, const char opThis);
void multiply(const Transform<FPP,GPU2> & A);
MatDense<FPP, GPU2> sliceMultiply(const Slice s[2], MatDense<FPP, GPU2>& gpu_X, const char opThis) const;
MatDense<FPP, GPU2> indexMultiply(faust_unsigned_int* ids[2], size_t id_lens[2], MatDense<FPP, GPU2>& gpu_X, const char opThis) const;
void multiplyLeft(const Transform<FPP,GPU2> & A);
void multiply(const FPP& a, const int32_t id=-1);
Vect<FPP,GPU2> multiply(const Vect<FPP,GPU2>& x, const char opThis='N');
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment