Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 3376b752 authored by hhakim's avatar hhakim
Browse files

Update MatDense<FPP,GPU2>::multiply(MatSparse<FPP,Cpu> freeing gpug memory and...

Update MatDense<FPP,GPU2>::multiply(MatSparse<FPP,Cpu> freeing gpug memory and add a new signature to output in MatDense<FPP,GPU2>.
parent 2ac9f7ba
No related merge requests found
......@@ -10,32 +10,48 @@ using namespace Faust;
void test_mul_gpu_dense()
{
faust_unsigned_int nrows = 10, ncols = 10;
faust_unsigned_int nrows2 = 10, ncols2 = 15;
faust_unsigned_int nrows = 90, ncols = 90;
faust_unsigned_int nrows2 = 90, ncols2 = 150;
double data[100];
auto cpu_mat1 = Faust::MatDense<double,Cpu>::randMat(nrows,ncols);
MatDense<double,GPU2> gpu_mat1(nrows, ncols, cpu_mat1->getData());
auto cpu_mat2 = Faust::MatDense<double,Cpu>::randMat(nrows2,ncols2);
MatDense<double,GPU2> gpu_mat2(nrows2, ncols2, cpu_mat2->getData());
// test MatDense<FPP,GPU2> * MatDense<FPP,GPU2>
cout << "Mul. GPUDense*GPUDense in GPUDense" << endl;
gpu_mat1.multiply(gpu_mat2);
auto cpu_mat1_mat2_test = gpu_mat2.tocpu();
auto cpu_mat1_mat2_ref = *cpu_mat2;
cpu_mat1->multiply(cpu_mat1_mat2_ref, 'N');
cout << "ref norm: " << cpu_mat1_mat2_ref.norm() << endl;
cout << "test norm: " << cpu_mat1_mat2_test.norm() << endl;
auto err_diff = cpu_mat1_mat2_ref;
err_diff -= cpu_mat1_mat2_test;
cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
// test MatDense<FPP,GPU2> * MatDense<FPP,Cpu>
cout << "Mul. GPUDense*CPUDense in CPUDense" << endl;
cpu_mat1_mat2_test = *cpu_mat2;
gpu_mat1.multiply(cpu_mat1_mat2_test);
err_diff = cpu_mat1_mat2_ref;
err_diff -= cpu_mat1_mat2_test;
cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
// test MatDense<FPP,GPU2> * MatSparse<FPP,Cpu>
cout << "Mul. GPUDense*CPUSparse in CPUDense" << endl;
Faust::MatSparse<double,Cpu> cpu_mat2_sparse(*cpu_mat2);
// cout << cpu_mat2_sparse.to_string(false, true) << endl;
// cout << cpu_mat2->to_string(false, true) << endl;
gpu_mat1.multiply(cpu_mat2_sparse, cpu_mat1_mat2_test);
err_diff = cpu_mat1_mat2_ref;
err_diff -= cpu_mat1_mat2_test;
cout << "ref norm: " << cpu_mat1_mat2_ref.norm() << endl;
cout << "test norm: " << cpu_mat1_mat2_test.norm() << endl;
// cout << cpu_mat1_mat2_ref.to_string(false, true) << endl;
// cout << cpu_mat1_mat2_test.to_string(false, true) << endl;
cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
cout << "Mul. GPUDense*CPUSparse in GPUDense" << endl;
MatDense<double, GPU2> gpu_mat1_mat2_test(nrows, ncols2);
gpu_mat1.multiply(cpu_mat2_sparse, gpu_mat1_mat2_test);
auto gpu_mat1_mat2_test_to_cpu = gpu_mat1_mat2_test.tocpu();
err_diff = gpu_mat1_mat2_test_to_cpu;
err_diff -= cpu_mat1_mat2_ref;
cout << "err mul.: " << err_diff.norm()/cpu_mat1_mat2_ref.norm() << endl;
}
......
......@@ -21,6 +21,7 @@ namespace Faust
void multiply(MatDense<FPP,Cpu> &other, const char op_this='N');
// void multiply(MatSparse<FPP, Cpu> &other, MatDense<FPP, GPU2>& output, const char op_this='N');
void multiply(const MatSparse<FPP, Cpu> &other, MatDense<FPP, Cpu>& output, const char op_this='N');
void multiply(const MatSparse<FPP, Cpu> &other, MatDense<FPP, GPU2>& output, const char op_this='N');
MatDense<FPP, Cpu> tocpu();
~MatDense<FPP, GPU2>();
private:
......
......@@ -53,11 +53,24 @@ void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const MatSparse<@FAUS
auto dsm_funcs = ((gm_DenseMatFunc_@GM_SCALAR@*)this->dsm_funcs);
//togpu(int32_t nrows, int32_t ncols, int32_t nnz, int32_t* row_ptr, int32_t* col_inds, @GM_SCALAR@* values)
auto other_gpu = spm_funcs->togpu(other.getNbRow(), other.getNbCol(), other.getNonZeros(), (int32_t*) other.getRowPtr(), (int32_t*) other.getColInd(), (double*) other.getValuePtr());
MatDense<@FAUST_SCALAR_FOR_GM@, GPU2> output_gpu_mat(getNbRow(), other.getNbCol());
// mul_gpu_spm_ext(gm_DenseMat_t, gm_SparseMat_t, gm_DenseMat_t output, gm_Op, gm_Op);
dsm_funcs->mul_gpu_spm_ext(gpu_mat, other_gpu, output_gpu_mat.gpu_mat, OP_NOTRANSP, OP_NOTRANSP);
auto output_gpu_mat = dsm_funcs->mul_gpu_spm_ext(gpu_mat, other_gpu, nullptr, OP_NOTRANSP, OP_NOTRANSP);
output = MatDense<@FAUST_SCALAR_FOR_GM@, Cpu>(getNbRow(), other.getNbCol()); //TODO: manage transpose case
dsm_funcs->tocpu(output_gpu_mat.gpu_mat, output.getData());
dsm_funcs->tocpu(output_gpu_mat, output.getData());
dsm_funcs->free(output_gpu_mat);
spm_funcs->free(other_gpu);
}
template<>
void Faust::MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>::multiply(const MatSparse<@FAUST_SCALAR_FOR_GM@, Cpu> &other, MatDense<@FAUST_SCALAR_FOR_GM@,GPU2>& output, const char op_this)
{
auto spm_funcs = ((gm_SparseMatFunc_@GM_SCALAR@*) this->spm_funcs);
auto dsm_funcs = ((gm_DenseMatFunc_@GM_SCALAR@*) this->dsm_funcs);
auto other_gpu = spm_funcs->togpu(other.getNbRow(), other.getNbCol(), other.getNonZeros(), (int32_t*) other.getRowPtr(), (int32_t*) other.getColInd(), (double*) other.getValuePtr());
if(output.gpu_mat != nullptr)
dsm_funcs->free(output.gpu_mat);
output.gpu_mat = dsm_funcs->mul_gpu_spm_ext(gpu_mat, other_gpu, nullptr, OP_NOTRANSP, OP_NOTRANSP);
spm_funcs->free(other_gpu);
}
template<>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment