Mentions légales du service

Skip to content
Snippets Groups Projects
Commit c477df16 authored by hhakim's avatar hhakim
Browse files

Implement GPU2 spgemm prototypes as on CPU and use them to implement gemm_gen...

Implement GPU2 spgemm prototypes as on CPU and use them to implement gemm_gen for GPU2 + update to gpu_mod@0f2c75e4.
parent 3ed7504a
No related branches found
No related tags found
No related merge requests found
Subproject commit 48c6b03a5b5e17b69ca21e4bc6c300f53e5e10f5
Subproject commit 0f2c75e43083e6bacc0fae9e1ce5ce23873b75ed
......@@ -262,9 +262,9 @@ template<typename FPP>
void Faust::spgemm(const Faust::MatSparse<FPP,Cpu> & A,const Faust::MatDense<FPP,Cpu> & B, Faust::MatDense<FPP,Cpu> & C,const FPP & alpha, const FPP & beta, char typeA, char typeB)
{
//TODO: refactoring should be done to avoid repeating similar block of code for different cases (typeA,typeB,alpha,beta)
//#ifdef __COMPILE_TIMERS__
// A.t_gemm.start();
//#endif
//#ifdef __COMPILE_TIMERS__
// A.t_gemm.start();
//#endif
faust_unsigned_int nbRowOpA,nbRowOpB,nbColOpA,nbColOpB;
if (((&(C.mat)) == (&(B.mat))))
......@@ -309,7 +309,7 @@ void Faust::spgemm(const Faust::MatSparse<FPP,Cpu> & A,const Faust::MatDense<FPP
handleError("linear_algebra", "Faust::spgemm : invalid dimension for output matrix C");
}
C.resize(nbRowOpA,nbColOpB);
C.resize(nbRowOpA,nbColOpB);
......@@ -453,9 +453,9 @@ void Faust::spgemm(const Faust::MatSparse<FPP,Cpu> & A,const Faust::MatDense<FPP
}
C.isZeros = false;
C.set_id(false);
//#ifdef __COMPILE_TIMERS__
//A.t_gemm.stop();
//#endif
//#ifdef __COMPILE_TIMERS__
//A.t_gemm.stop();
//#endif
}
template<typename FPP>
......@@ -905,6 +905,7 @@ namespace Faust
{
template<typename FPP> void gemm_gen(const MatGeneric<FPP, Cpu>& A, const MatGeneric<FPP, Cpu>& B, MatDense<FPP, Cpu>& out, const FPP alpha/*=FPP(1.0)*/, const FPP beta/*=(0.0)*/, const char opA/*='N'*/, const char opB/*='N'*/)
{
//TODO: refactor this function with at least 3 new functions gemm_spA, gemm_dsA, gemm_bsrA
std::runtime_error type_err("faust_linear_algebra mul function doesn't handle other type of factors than MatDense, MatSparse or MatBSR.");
if(opA != 'N' && opA != 'T' && opA != 'H')
throw std::runtime_error("opA must be among 'N', 'T', 'H'");
......
......@@ -485,6 +485,22 @@ namespace Faust
mat.gpu_mat = gpu_dmat;
}
template<>
void MatSparse<FSFG, GPU2>::spgemm(const MatSparse<FSFG,GPU2> & A, const MatDense<FSFG,GPU2> & B, MatDense<FSFG,GPU2> & C, const FSFG & alpha, const FSFG & beta, char opA, char opB)
{
gm_Op gop_A;
gm_Op gop_B;
char2gm_Op(opA, gop_A);
char2gm_Op(opB, gop_B);
auto spm_funcs = GPUModHandler::get_singleton()->spm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
auto dsm_funcs = GPUModHandler::get_singleton()->dsm_funcs((@FAUST_SCALAR_FOR_GM@)(0));
spm_funcs->gemm(A.gpu_mat, B.gpu_mat, C.gpu_mat,
(const @GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&alpha),
(const @GM_SCALAR@*) reinterpret_cast<const @GM_REINTERPRET_CAST_SCALAR@*>(&beta),
gop_A, gop_B);
}
template<>
size_t Faust::MatSparse<FSFG,GPU2>::getNBytes() const
{
......
......@@ -101,6 +101,7 @@ namespace Faust
MatType getType() const;
void multiply(Vect<FPP,GPU2>& vec, char opThis='N') const;
void multiply(MatDense<FPP,GPU2>& vec, char opThis='N') const;
static void spgemm(const MatSparse<FPP,GPU2> & A, const MatDense<FPP,GPU2> & B, MatDense<FPP,GPU2> & C, const FPP & alpha, const FPP & beta, const char opA, const char opB);
MatBSR<FPP, GPU2> to_bsr(int bsize) const;
~MatSparse();
......
......@@ -12,20 +12,22 @@ namespace Faust
template<typename FPP>
void gemm(const MatDense<FPP,GPU2> & A,const MatDense<FPP,GPU2> & B, MatDense<FPP,GPU2> & C,const FPP alpha, const FPP beta, char opA, char opB);
// Computes alpha*opA(A)*b+ beta*c into c.
// Computes alpha*opA(A)*b+ beta*c into C.
template<typename FPP>
void gemv(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &b, Vect<FPP, GPU2> &c, const FPP& alpha, const FPP& beta, const char opA);
void gemv(const MatDense<FPP, GPU2> &A, const Vect<FPP, GPU2> &b, Vect<FPP, GPU2> &C, const FPP& alpha, const FPP& beta, const char opA);
// Computes alpha*opA(A)*opB(B)+ beta*C into C.
template<typename FPP>
void gemm_gen(const MatGeneric<FPP,GPU2> & A, const MatGeneric<FPP,GPU2> & B, MatDense<FPP,GPU2> & C, const FPP alpha, const FPP beta, char opA, char opB);
// TODO: implements using MatSparse::multiply, warning: 'H' is not supported for opB (see gpu_mod / https://docs.nvidia.com/cuda/archive/9.2/cusparse/index.html cusparseTcsrmm2 for more details), so do a copy-conjugate manually beforehand
// template<typename FPP>
// void spgemm(const MatSparse<FPP,Cpu> & A,const MatDense<FPP,Cpu> & B, MatDense<FPP,Cpu> & C,const FPP & alpha, const FPP & beta, char opA, char opB);
//
// template<typename FPP>
// void spgemm(const MatDense<FPP,Cpu> & A,const MatSparse<FPP,Cpu> & B, MatDense<FPP,Cpu> & C,const FPP & alpha, const FPP & beta, char opA, char opB);
// Computes alpha*opA(A)*opB(B)+ beta*C into C.
template<typename FPP>
void spgemm(const MatSparse<FPP,GPU2> & A,const MatDense<FPP,GPU2> & B, MatDense<FPP,GPU2> & C,const FPP & alpha, const FPP & beta, char opA, char opB);
// Computes alpha*opA(A)*opB(B)+ beta*C into C.
// \param impl_meth: in any case this function rely on previous spgemm prototype, if impl_meth is 1 then transpose/transconjugate is used to avoid converting A and B to another type of matrix, otherwise (impl_meth is any other value) A is converted to a MatSparse and B to a MatDense
template<typename FPP>
void spgemm(const MatDense<FPP,GPU2> & A,const MatSparse<FPP,GPU2> & B, MatDense<FPP,GPU2> & C,const FPP & alpha, const FPP & beta, char opA, char opB, int impl_meth = 1);
}
#include "faust_linear_algebra_gpu.hpp"
......
......@@ -13,6 +13,95 @@ namespace Faust
MatDense<FPP, GPU2>::gemv(A, b, c, alpha, beta);
}
template<typename FPP>
void spgemm(const MatSparse<FPP,GPU2> & A,const MatDense<FPP,GPU2> & B, MatDense<FPP,GPU2> & C,const FPP & alpha, const FPP & beta, char opA, char opB)
{
MatSparse<FPP, GPU2>::spgemm(A, B, C, alpha, beta, opA, opB);
}
template<typename FPP>
void spgemm(const MatDense<FPP,GPU2> & A, const MatSparse<FPP,GPU2> & B, MatDense<FPP,GPU2> & C, const FPP & alpha, const FPP & beta, char opA, char opB, int impl_meth/* = 1*/)
{
//TODO: benchmark the two methods (impl_meth == 1 and 2)
if (impl_meth == 1)
{
// transpose / adjoint the product to rely on other signature of spgemm (MatSparse B as lhs matrix -- i.e. A)
char nopA, nopB;
MatDense<FPP, GPU2> nA(A);
MatSparse<FPP, GPU2> nB(B);
if(opA == 'N' && opB == 'N')
{
nopA = 'T';
nopB = 'T';
C.resize(nB.getNbCol(), nA.getNbRow());
spgemm(nB, nA, C, alpha, beta, nopB, nopA);
C.transpose();
}
else if(opA == 'N' && opB == 'T')
{
nopA = 'T';
C.resize(nB.getNbRow(), nA.getNbRow());
spgemm(nB, nA, C, alpha, beta, opB, nopA);
C.transpose();
}
else if(opA == 'T' && opB == 'N')
{
nopB = 'T';
C.resize(nB.getNbCol(), nA.getNbCol());
spgemm(nB, nA, C, alpha, beta, nopB, opA);
C.transpose();
}
else if(opA == 'T' && opB == 'T')
{
C.resize(nB.getNbRow(), nA.getNbCol());
spgemm(nB, nA, C, alpha, beta, opB, opA);
C.transpose();
}
else if(opA == 'N' && opB == 'H')
{
nopA = 'H';
C.resize(nB.getNbRow(), nA.getNbRow());
spgemm(nB, nA, C, alpha, beta, opB, nopA);
C.adjoint();
}
else if(opA == 'H' && opB == 'N')
{
nopB = 'H';
C.resize(nB.getNbCol(), nA.getNbCol());
spgemm(nB, nA, C, alpha, beta, nopB, opA);
C.adjoint();
}
else if(opA == 'H' && opB == 'H')
{
C.resize(nB.getNbRow(), nA.getNbCol());
spgemm(nB, nA, C, alpha, beta, opB, opA);
C.adjoint();
}
else if(opA == 'H' && opB == 'T')
{
nopA = 'N';
nB.conjugate();
nopB = 'N';
C.resize(nB.getNbRow(), nA.getNbCol());
spgemm(nB, nA, C, alpha, beta, nopB, nopA);
C.adjoint();
}
else if(opA == 'T' && opB == 'H')
{
nA.conjugate();
nopA = 'N';
nopB = 'N';
C.resize(nB.getNbRow(), nA.getNbCol());
spgemm(nB, nA, C, alpha, beta, nopB, nopA);
C.adjoint();
}
}
else {
spgemm(MatSparse<FPP, GPU2>(A), MatDense<FPP, GPU2>(B), C, alpha, beta, opA, opB);
}
}
template<typename FPP>
void gemm_gen(const MatGeneric<FPP,GPU2> & A, const MatGeneric<FPP,GPU2> & B, MatDense<FPP,GPU2> & C, const FPP alpha, const FPP beta, char typeA, char typeB)
{
......@@ -20,7 +109,7 @@ namespace Faust
const MatSparse<FPP, GPU2>* spB;
const MatDense<FPP, GPU2>* dsA;
const MatDense<FPP, GPU2>* dsB;
// downcast an call the proper function
// downcast and call the proper function
spA = dynamic_cast<const Faust::MatSparse<FPP,GPU2>*>(&A);
if(! spA)
dsA = dynamic_cast<const Faust::MatDense<FPP,GPU2>*>(&A);
......@@ -30,11 +119,9 @@ namespace Faust
if(spA && spB)
throw std::runtime_error("gemm on two MatSparse is not supported.");
else if(spA)
throw std::runtime_error("spgemm is not supported yet on GPU2."); //TODO: after spgemm impl.
// spgemm(*spA, *dsB, C, alpha, beta, typeA, typeB);
spgemm(*spA, *dsB, C, alpha, beta, typeA, typeB);
else if(spB)
throw std::runtime_error("spgemm is not supported yet on GPU2."); //TODO: after spgemm impl.
// spgemm(*dsA, *spB, C, alpha, beta, typeA, typeB);
spgemm(*dsA, *spB, C, alpha, beta, typeA, typeB);
else
gemm(*dsA, *dsB, C, alpha, beta, typeA, typeB);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment