Mentions légales du service

Skip to content
Snippets Groups Projects
Commit dab3da47 authored by hhakim's avatar hhakim
Browse files

Add GPU2 MatButterfly::getNonZeros()/transpose() and unit tests.

parent 71d4408a
No related branches found
No related tags found
No related merge requests found
......@@ -145,6 +145,43 @@ void test_clone(MatButterfly<FPP, GPU2> &gpu_bm, MatButterfly<FPP, Cpu> &cpu_bm)
cout << "OK" << endl;
}
void test_getNonZeros(MatButterfly<FPP, GPU2> &gpu_bm, MatButterfly<FPP, Cpu> &cpu_bm)
{
cout << "Test MatButterfly<FPP, GPU2>::getNonZeros()" << endl;
assert(gpu_bm.getNonZeros() == cpu_bm.getNonZeros());
cout << "OK" << endl;
}
void test_transpose(MatButterfly<FPP, GPU2> &gpu_bm, MatButterfly<FPP, Cpu> &cpu_bm)
{
cout << "Test MatButterfly<FPP, GPU2>::transpose()" << endl;
auto size = cpu_bm.getNbRow();
auto rmat = MatDense<FPP, Cpu>::randMat(size, size);
MatDense<FPP, Cpu> ref_mat(*rmat);
MatDense<FPP, GPU2> test_mat(*rmat);
auto gpu_clone = gpu_bm.clone();
auto cpu_clone = cpu_bm.Clone();
cpu_clone->transpose();
cpu_clone->multiply(ref_mat, 'N');
gpu_clone->transpose();
gpu_clone->multiply(test_mat, 'N');
auto test_mat_cpu = test_mat.tocpu();
assert(verifyMatEq(test_mat_cpu, ref_mat));
// undo the transpose
cpu_clone->transpose();
cpu_clone->multiply(ref_mat, 'N');
gpu_clone->transpose();
gpu_clone->multiply(test_mat, 'N');
test_mat_cpu = test_mat.tocpu();
assert(verifyMatEq(test_mat_cpu, ref_mat));
delete rmat;
delete gpu_clone;
delete cpu_clone;
cout << "OK" << endl;
}
int main(int argc, char** argv)
{
Faust::enable_gpu_mod();
......@@ -169,6 +206,8 @@ int main(int argc, char** argv)
test_get_colsrows(gpu_bm, cpu_bm);
test_norm(gpu_bm, cpu_bm);
test_clone(gpu_bm, cpu_bm);
test_getNonZeros(gpu_bm, cpu_bm);
test_transpose(gpu_bm, cpu_bm);
return 0;
}
......@@ -28,11 +28,12 @@ namespace Faust
MatType getType() const {return Butterfly;} //TODO: move def in hpp
int32_t getNbRow() const {return d1.size();} //TODO: move def in hpp
int32_t getNbCol() const {return d1.size();} //TODO: move def in hpp
faust_unsigned_int getNonZeros() const;
MatButterfly<FPP,GPU2>* clone(const int32_t dev_id=-1, const void* stream=nullptr) const;
MatButterfly<FPP,GPU2>* Clone(const bool isOptimize=false) const;
void transpose();
void init_transpose();
/* void* get_gpu_mat_ptr() const;
faust_unsigned_int getNonZeros() const;
void transpose();
void conjugate();
void adjoint();*/
//! \brief Returns a sub-group of rows of this matrix as the same type of matrix
......
......@@ -13,6 +13,7 @@ namespace Faust
auto sd_ids_vec = cpu_bmat.get_subdiag_ids();
subdiag_ids = new int[sd_ids_vec.size()];
memcpy(subdiag_ids, sd_ids_vec.data(), sizeof(int) * sd_ids_vec.size());
d2t.resize(0);
}
template<typename FPP>
......@@ -25,7 +26,8 @@ namespace Faust
template<typename FPP>
void MatButterfly<FPP, GPU2>::multiply(MatDense<FPP, GPU2> &other, const char op_this)
{
butterfly_diag_prod(other, d1, d2, subdiag_ids);
bool use_d2t = is_transp ^ op_this == 'T';
butterfly_diag_prod(other, d1, use_d2t?d2t:d2, subdiag_ids);
}
template<typename FPP>
......@@ -107,4 +109,48 @@ namespace Faust
return clone();
}
}
template<typename FPP>
faust_unsigned_int MatButterfly<FPP, GPU2>::getNonZeros() const
{
return d1.getNonZeros() + d2.getNonZeros();
}
template<typename FPP>
void MatButterfly<FPP, GPU2>::transpose()
{
init_transpose(); // free cost if already called once
is_transp = ! is_transp;
}
template<typename FPP>
void MatButterfly<FPP, GPU2>::init_transpose()
{
//TODO: simplify in case of symmetric matrix (it happens for the FFT)
if(d2t.size() == 0)
{
//TODO: do it all in GPU memory
auto size = d2.size();
FPP *d2_ptr, *d2t_ptr;
auto cpu_d2 = d2.tocpu();
d2_ptr = cpu_d2.getData();
d2t.resize(size);
Vect<FPP, Cpu> cpu_d2t(size);
d2t_ptr = cpu_d2t.getData();
auto d_offset = size >> (level+1);
// D1 doesn't change
// swap every pair of D2 contiguous blocks to form D2T
for(int i = 0;i < size; i += d_offset * 2)
{
// swap two next blocks of size d_offset into d2t_ptr
std::copy(d2_ptr + i, d2_ptr + i + d_offset, d2t_ptr + i + d_offset);
std::copy(d2_ptr + i + d_offset, d2_ptr + i + 2 * d_offset, d2t_ptr + i);
}
d2t = cpu_d2t;
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment