Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 73a57855 authored by hhakim's avatar hhakim
Browse files

Update faust_torch module.

- Managing torch row major-order inconsistency with faust column major-order representation. Does it by computing like it was the transpose product. It allows to avoid useless but heavy copies.
- New function to display tensors.
- faust_torch code documentation.
- Minor change in faust_torch test.
parent 4e6a1efb
Branches
Tags
No related merge requests found
...@@ -32,10 +32,10 @@ vector<double> count(vector<std::chrono::duration<double>> durs) ...@@ -32,10 +32,10 @@ vector<double> count(vector<std::chrono::duration<double>> durs)
double calc_relerr(Tensor t, MatDense<FPP,Cpu> m) double calc_relerr(Tensor t, MatDense<FPP,Cpu> m)
{ {
MatDense<FPP,Cpu> aux; MatDense<FPP,Cpu> aux;
torch_Tensor_to_faust_MatDense(t, aux); convTensorToMatDense(t, aux);
aux -= m; aux -= m;
// Tensor taux; // Tensor taux;
// faust_MatDense_to_torch_Tensor(m, taux); // convMatDenseToTensor(m, taux);
// taux -= t; // taux -= t;
// cout << "tens reldiff:" << norm(taux.flatten())/norm(t) << endl; // cout << "tens reldiff:" << norm(taux.flatten())/norm(t) << endl;
return aux.norm()/m.norm(); return aux.norm()/m.norm();
...@@ -51,7 +51,7 @@ double calc_relerr(MatDense<FPP,Cpu> m1, MatDense<FPP,Cpu> m2) ...@@ -51,7 +51,7 @@ double calc_relerr(MatDense<FPP,Cpu> m1, MatDense<FPP,Cpu> m2)
double tensor_norm(Tensor t) double tensor_norm(Tensor t)
{ {
MatDense<FPP,Cpu> aux; MatDense<FPP,Cpu> aux;
torch_Tensor_to_faust_MatDense(t, aux); convTensorToMatDense(t, aux);
// cout << torch::norm(t.flatten())<< endl; //can't static_ast c10::Scalar to double // cout << torch::norm(t.flatten())<< endl; //can't static_ast c10::Scalar to double
return aux.norm(); return aux.norm();
} }
...@@ -79,7 +79,7 @@ double sum_count(vector<double> counts) ...@@ -79,7 +79,7 @@ double sum_count(vector<double> counts)
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
int min_size = 100; int min_size = 128;
int max_size = 1024; int max_size = 1024;
unsigned int min_nfacts = 10; unsigned int min_nfacts = 10;
unsigned int max_nfacts = 20; unsigned int max_nfacts = 20;
...@@ -149,10 +149,12 @@ int main(int argc, char** argv) ...@@ -149,10 +149,12 @@ int main(int argc, char** argv)
facs.push_back(*it); facs.push_back(*it);
} }
/** test Faust to TensorList */ /** test Faust to TensorList */
faust_matvec_to_torch_TensorList(facs, tl, at::kCPU, /*clone */ false); convMatGenListToTensorList(facs, tl, at::kCPU, /*clone */ false);
cout << "Faust to TensorList:" << endl;
display_TensorList(tl);
t = tensor_chain_mul(tl); t = tensor_chain_mul(tl);
// t = torch::chain_matmul(tl); // t = torch::chain_matmul(tl);
torch_Tensor_to_faust_MatDense(t, aux); convTensorToMatDense(t, aux);
cout << "torch toarray norm: " << setprecision(15) << norm(t.flatten()) << endl; cout << "torch toarray norm: " << setprecision(15) << norm(t.flatten()) << endl;
cout << "faust toarray norm: " << setprecision(15) << F->normFro() << endl; cout << "faust toarray norm: " << setprecision(15) << F->normFro() << endl;
cout << "toarray faust-torch reldiff: " << calc_relerr(t,F->get_product()) << endl; cout << "toarray faust-torch reldiff: " << calc_relerr(t,F->get_product()) << endl;
...@@ -165,7 +167,7 @@ int main(int argc, char** argv) ...@@ -165,7 +167,7 @@ int main(int argc, char** argv)
/** test MatDense to torch::Tensor conv. */ /** test MatDense to torch::Tensor conv. */
auto M = Faust::MatDense<FPP,Cpu>::randMat(F->getNbCol(), F->getNbRow()); auto M = Faust::MatDense<FPP,Cpu>::randMat(F->getNbCol(), F->getNbRow());
cout << "norm(M): " << M->norm() << endl; cout << "norm(M): " << M->norm() << endl;
faust_MatDense_to_torch_Tensor(*M, taux); convMatDenseToTensor(*M, taux);
cout << "M to tensor error: " << calc_relerr(taux, *M) << endl; cout << "M to tensor error: " << calc_relerr(taux, *M) << endl;
int i=0; int i=0;
// for(auto ts : tl) // for(auto ts : tl)
...@@ -178,7 +180,7 @@ int main(int argc, char** argv) ...@@ -178,7 +180,7 @@ int main(int argc, char** argv)
// } // }
/** prepare product refs (MatDense and Tensor)*/ /** prepare product refs (MatDense and Tensor)*/
ref = F->multiply(*M); ref = F->multiply(*M);
faust_MatDense_to_torch_Tensor(ref, tref); convMatDenseToTensor(ref, tref);
/** Measure time of nsamples F*M (pure Faust) */ /** Measure time of nsamples F*M (pure Faust) */
for(int i=0;i<nsamples;i++) for(int i=0;i<nsamples;i++)
{ {
...@@ -189,6 +191,7 @@ int main(int argc, char** argv) ...@@ -189,6 +191,7 @@ int main(int argc, char** argv)
} }
errors[PURE_FAUST] = 0; errors[PURE_FAUST] = 0;
pnorms[PURE_FAUST] = ref.norm(); pnorms[PURE_FAUST] = ref.norm();
cout << "(1) done." << endl;
/** Measure time of nsamples F*M (Faust-torch) */ /** Measure time of nsamples F*M (Faust-torch) */
for(int i=0;i<nsamples;i++) for(int i=0;i<nsamples;i++)
{ {
...@@ -197,6 +200,7 @@ int main(int argc, char** argv) ...@@ -197,6 +200,7 @@ int main(int argc, char** argv)
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
times[FAUST_TORCH].push_back(end - start); times[FAUST_TORCH].push_back(end - start);
} }
cout << "(2) done." << endl;
errors[FAUST_TORCH] = calc_relerr(ref, out); errors[FAUST_TORCH] = calc_relerr(ref, out);
pnorms[FAUST_TORCH] = out.norm(); pnorms[FAUST_TORCH] = out.norm();
for(int i=0;i<nsamples;i++) for(int i=0;i<nsamples;i++)
...@@ -206,6 +210,7 @@ int main(int argc, char** argv) ...@@ -206,6 +210,7 @@ int main(int argc, char** argv)
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
times[FAUST_TORCH_CHAIN_OPT].push_back(end - start); times[FAUST_TORCH_CHAIN_OPT].push_back(end - start);
} }
cout << "(3) done." << endl;
errors[FAUST_TORCH_CHAIN_OPT] = calc_relerr(ref, out); errors[FAUST_TORCH_CHAIN_OPT] = calc_relerr(ref, out);
pnorms[FAUST_TORCH_CHAIN_OPT] = out.norm(); pnorms[FAUST_TORCH_CHAIN_OPT] = out.norm();
for(int i=0;i<nsamples;i++) for(int i=0;i<nsamples;i++)
...@@ -215,11 +220,12 @@ int main(int argc, char** argv) ...@@ -215,11 +220,12 @@ int main(int argc, char** argv)
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
times[FAUST_TORCH_CONTIGUOUS_DENSE_TO_TORCH].push_back(end - start); times[FAUST_TORCH_CONTIGUOUS_DENSE_TO_TORCH].push_back(end - start);
} }
cout << "(4) done." << endl;
errors[FAUST_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = calc_relerr(ref, out); errors[FAUST_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = calc_relerr(ref, out);
pnorms[FAUST_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = out.norm(); pnorms[FAUST_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = out.norm();
/** Measure time of nsamples F*M (Faust-torch without accounting matrix-to-tensor conversion time) */ /** Measure time of nsamples F*M (Faust-torch without accounting matrix-to-tensor conversion time) */
Tensor tM; Tensor tM;
faust_MatDense_to_torch_Tensor(*M, tM); convMatDenseToTensor(*M, tM);
for(int i=0;i<nsamples;i++) for(int i=0;i<nsamples;i++)
{ {
auto start = std::chrono::system_clock::now(); auto start = std::chrono::system_clock::now();
...@@ -227,6 +233,7 @@ int main(int argc, char** argv) ...@@ -227,6 +233,7 @@ int main(int argc, char** argv)
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
times[PURE_TORCH].push_back(end - start); times[PURE_TORCH].push_back(end - start);
} }
cout << "(5) done." << endl;
errors[PURE_TORCH] = calc_relerr(t, ref); errors[PURE_TORCH] = calc_relerr(t, ref);
pnorms[PURE_TORCH] = tensor_norm(t); pnorms[PURE_TORCH] = tensor_norm(t);
for(int i=0;i<nsamples;i++) for(int i=0;i<nsamples;i++)
...@@ -236,6 +243,7 @@ int main(int argc, char** argv) ...@@ -236,6 +243,7 @@ int main(int argc, char** argv)
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
times[PURE_TORCH_CHAIN_OPT].push_back(end - start); times[PURE_TORCH_CHAIN_OPT].push_back(end - start);
} }
cout << "(6) done." << endl;
errors[PURE_TORCH_CHAIN_OPT] = calc_relerr(t, ref); errors[PURE_TORCH_CHAIN_OPT] = calc_relerr(t, ref);
pnorms[PURE_TORCH_CHAIN_OPT] = tensor_norm(t); pnorms[PURE_TORCH_CHAIN_OPT] = tensor_norm(t);
for(int i=0;i<nsamples;i++) for(int i=0;i<nsamples;i++)
...@@ -245,6 +253,7 @@ int main(int argc, char** argv) ...@@ -245,6 +253,7 @@ int main(int argc, char** argv)
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
times[PURE_TORCH_CONTIGUOUS_DENSE_TO_TORCH].push_back(end - start); times[PURE_TORCH_CONTIGUOUS_DENSE_TO_TORCH].push_back(end - start);
} }
cout << "(7) done." << endl;
errors[PURE_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = calc_relerr(t, ref); errors[PURE_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = calc_relerr(t, ref);
pnorms[PURE_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = tensor_norm(t); pnorms[PURE_TORCH_CONTIGUOUS_DENSE_TO_TORCH] = tensor_norm(t);
for(int i=0; i < median_times.size(); i ++) for(int i=0; i < median_times.size(); i ++)
...@@ -266,6 +275,6 @@ int main(int argc, char** argv) ...@@ -266,6 +275,6 @@ int main(int argc, char** argv)
cout << "stats: median time (secs) / speedup / cumutime / errVSFaust / pnorms" << endl; cout << "stats: median time (secs) / speedup / cumutime / errVSFaust / pnorms" << endl;
for(int i=0;i<nmeths;i++) for(int i=0;i<nmeths;i++)
{ {
cout << "("<<i+1<<"): " <</* count(time_pure_faust) <<*/ median_times[i] << " / " << speedups[i] << " / " << cumu_times[i] << " / " << errors[i] << " / " << pnorms[i] << endl; cout << "("<<i+1<<"): " <</* count(time_pure_faust) <<*/ median_times[i] << " / " << setprecision(3) << speedups[i] << " / " << cumu_times[i] << " / " << errors[i] << " / " << pnorms[i] << endl;
} }
} }
...@@ -13,12 +13,19 @@ namespace Faust ...@@ -13,12 +13,19 @@ namespace Faust
* *
* \param dev the device at::kCPU or at::kCUDA. * \param dev the device at::kCPU or at::kCUDA.
* \param clone true to copy the Faust matrix data to create the tensor, false to use the same data without copying (false by default). * \param clone true to copy the Faust matrix data to create the tensor, false to use the same data without copying (false by default).
* \param transpose is true by default because it's more efficient to handle the difference of data storage between torch and faust ; row-major order for torch, column-major order for faust.
*/ */
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void faust_MatDense_to_torch_Tensor(const Faust::MatDense<FPP,D> & dm, torch::Tensor & t, at::DeviceType dev = at::kCPU, const bool clone = false); void convMatDenseToTensor(const Faust::MatDense<FPP,D> & dm, torch::Tensor & t, at::DeviceType dev = at::kCPU, const bool clone = false, const bool transpose = true);
/**
* Converts a torch::Tensor (t) to Faust::MatDense (dm).
*
* \param transpose see convMatDenseToTensor.
*
*/
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void torch_Tensor_to_faust_MatDense(const torch::Tensor & t, Faust::MatDense<FPP,D> & dm); void convTensorToMatDense(const torch::Tensor & t, Faust::MatDense<FPP,D> & dm, const bool transpose = true);
/** /**
...@@ -28,35 +35,54 @@ namespace Faust ...@@ -28,35 +35,54 @@ namespace Faust
* *
*\param dev the device at::kCPU or at::kCUDA. *\param dev the device at::kCPU or at::kCUDA.
*\param clone true to copy the Faust matrix data to create the tensor, false to use the same data without copying (false by default). *\param clone true to copy the Faust matrix data to create the tensor, false to use the same data without copying (false by default).
*\param transpose is true by default because it's more efficient to handle the difference of data storage between torch and faust ; row-major order for torch, column-major order for faust. For MatSparse it does not really matter (because data copying is still necessary) but it's preferable to be consistent with what is done for MatDense.
*/ */
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void faust_MatSparse_to_torch_Tensor(Faust::MatSparse<FPP,D> & spm, torch::Tensor & t, at::DeviceType dev = at::kCPU, const bool clone = false); void convMatSparseToTensor(const Faust::MatSparse<FPP,D> & spm, torch::Tensor & t, at::DeviceType dev, const bool clone, const bool transpose = true);
/** /**
* Converts a Faust::MatGeneric vector to a torch::TensorList (vector alias). * Converts a Faust::MatGeneric vector to a torch::TensorList (vector alias).
* *
*\param dev the device at::kCPU or at::kCUDA. *\param dev the device at::kCPU or at::kCUDA.
* \param clone true to copy the Faust matrices data to create the tensors, false to use the same data without copying (false by default). * \param clone true to copy the Faust matrices data to create the tensors, false to use the same data without copying (false by default).
* \param transpose to true implies that the ml factors will be converted and stored into tl in reverse order (the goal is to compute efficiently the ml product -- cf. tensor_chain_mul, through tl defined as the transpose product. It's more efficient).
*/ */
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void faust_matvec_to_torch_TensorList(const std::vector<Faust::MatGeneric<FPP,D>*> & ml, std::vector<torch::Tensor> &tl, at::DeviceType dev = at::kCPU, const bool clone = false); void convMatGenListToTensorList(const std::vector<Faust::MatGeneric<FPP,D>*> & ml, std::vector<torch::Tensor> &tl, at::DeviceType dev = at::kCPU, const bool clone = false, const bool transpose = true);
/** /**
* Computes the tensor chain product of ml and applies it optionally to the tensor op. * Computes the tensor chain product of tl and applies it optionally to the tensor op.
* *
* \param tl the sequence of tensors to compute the product.
* \param if op is not nullptr the functions returns the product of tl and op.
*\param dev the device at::kCPU or at::kCUDA. *\param dev the device at::kCPU or at::kCUDA.
* \param chain_opt if true then the function pass the hand to tensor_chain_mul_opt.
* \param contiguous_dense_to_torch If true consecutive/contiguous dense factors will be computed (as intermediary product) through torch::chain_matmul() as it is always done when tl is full of only dense factors (e.g.: if tl = {S1, S2, D3, D4, D5, S6} and the letter D represents a dense factor, S a sparse factor, D3*D4*D4 will be calculated in one call of torch::chain_matmul while the remaining products will be computed one by one. This option and chain_opt are exclusive. If chain_opt is true, this boolean is forced to false. Nota: as far as I've tested torch::chain_matmul can't work with sparse Tensor-s (hence these option and function).
* \param op_on_left: if true op*tl is computed, otherwise tl*op is computed. It is set to true by default because this is the optimal scenario (the transpose product scenario) regarding the column-major order of Faust::Matdense and the row-major order of torch. This default value is consistent with the default value of transpose in other functions.
*
* Returns the result as a Tensor. * Returns the result as a Tensor.
*/ */
torch::Tensor tensor_chain_mul(const std::vector<torch::Tensor>& ml, const torch::Tensor* op= nullptr, at::DeviceType dev = at::kCPU, const bool chain_opt = false, const bool contiguous_dense_to_torch = false); torch::Tensor tensor_chain_mul(const std::vector<torch::Tensor>& tl, const torch::Tensor* op= nullptr, at::DeviceType dev = at::kCPU, const bool chain_opt = false, const bool contiguous_dense_to_torch = false, const bool op_on_left = true);
torch::Tensor tensor_chain_mul_opt(const std::vector<torch::Tensor>& ml, const torch::Tensor* op, at::DeviceType dev = at::kCPU); /**
* Computes the tensor chain product of tl and applies it optionally to the tensor op.
*
* This function does the same as tensor_chain_mul except that it optimizes the matrix chain product choosing an order of computation that minimizes the cost.
*
* \param tl the sequence of tensors to compute the product.
* \param if op is not nullptr the functions returns the product of tl and op.
*\param dev the device at::kCPU or at::kCUDA.
* \param op_on_left: if true op*tl is computed, otherwise tl*op is computed. It is set to true by default because this is the optimal scenario (the transpose product scenario) regarding the column-major order of Faust::Matdense and the row-major order of torch. This default value is consistent with the default value of transpose in other functions.
*
* Returns the result as a Tensor.
*/
torch::Tensor tensor_chain_mul_opt(const std::vector<torch::Tensor>& tl, const torch::Tensor* op, at::DeviceType dev = at::kCPU, const bool op_on_left = true);
/** /**
* Computes the matrix chain product of ml and applies it optionally to the matrix op if provided. * Computes the matrix chain product of ml and applies it optionally to the matrix op if provided.
* *
* This function converts all the matrices to Tensors before and then computes the tensor product. * This function converts all the matrices to Tensors before and then computes the tensor product (using tensor_chain_mul(TensorList, Tensor) just above).
* *
*\note Complex tensors are not available in libtorch, an exception is thrown when FPP is complex. * \note Complex tensors are not available in libtorch, an exception is thrown when FPP is complex.
* *
* \param on_gpu true ot use the GPU backend, false for the CPU backend (false by default). * \param on_gpu true ot use the GPU backend, false for the CPU backend (false by default).
* \param clone true to copy the Faust matrices data to create the tensors, false to use the same data without copying (false by default). * \param clone true to copy the Faust matrices data to create the tensors, false to use the same data without copying (false by default).
...@@ -64,7 +90,7 @@ namespace Faust ...@@ -64,7 +90,7 @@ namespace Faust
* Returns the result as a Faust::MatDense. * Returns the result as a Faust::MatDense.
*/ */
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void tensor_chain_mul(const std::vector<Faust::MatGeneric<FPP,D>*>& ml, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op = nullptr, const bool on_gpu = false, const bool clone = false, const bool chain_opt = false, const bool contiguous_dense_to_torch = false); void tensor_chain_mul(const std::vector<Faust::MatGeneric<FPP,D>*>& ml, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op = nullptr, const bool on_gpu = false, const bool clone = false, const bool chain_opt = false, const bool contiguous_dense_to_torch = false, const bool transpose = true);
/** /**
* Computes the matrix chain product of tl and applies it optionally to the matrix op if provided. * Computes the matrix chain product of tl and applies it optionally to the matrix op if provided.
...@@ -76,11 +102,18 @@ namespace Faust ...@@ -76,11 +102,18 @@ namespace Faust
* \param on_gpu true ot use the GPU backend, false for the CPU backend (false by default). * \param on_gpu true ot use the GPU backend, false for the CPU backend (false by default).
* \param clone true to copy the Faust matrices data to create the tensors, false to use the same data without copying (false by default). * \param clone true to copy the Faust matrices data to create the tensors, false to use the same data without copying (false by default).
* \param contiguous_dense_to_torch if true then torch::chain_matmul is used to computed intermediary product of dense contiguous factors. Note that if chain_opt is true, this option can't be true and is internally set to false silently. * \param contiguous_dense_to_torch if true then torch::chain_matmul is used to computed intermediary product of dense contiguous factors. Note that if chain_opt is true, this option can't be true and is internally set to false silently.
*
* Returns the result as a Faust::MatDense. * Returns the result as a Faust::MatDense.
*/ */
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void tensor_chain_mul(const std::vector<torch::Tensor>& tl, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op, const bool on_gpu, const bool clone, const bool chain_opt, const bool contiguous_dense_to_torch); void tensor_chain_mul(const std::vector<torch::Tensor>& tl, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op, const bool on_gpu, const bool clone, const bool chain_opt, const bool contiguous_dense_to_torch, const bool transpose = true);
/**
* This function display a Tensor list (size and storage format of Tensor-s, DENSE or SPARSE).
*
* \param transpose if true the transpose TensorList of tl is displayed. It's true by default to be consistent with other functions.
*/
void display_TensorList(std::vector<torch::Tensor>& tl, const bool transpose = true);
} }
#include "faust_torch.hpp" #include "faust_torch.hpp"
#endif #endif
...@@ -2,7 +2,7 @@ namespace Faust ...@@ -2,7 +2,7 @@ namespace Faust
{ {
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void faust_MatSparse_to_torch_Tensor(const Faust::MatSparse<FPP,D> & spm, torch::Tensor & t, at::DeviceType dev, const bool clone) void convMatSparseToTensor(const Faust::MatSparse<FPP,D> & spm, torch::Tensor & t, at::DeviceType dev, const bool clone, const bool transpose /* = true*/)
{ {
torch::Tensor values = torch::from_blob(const_cast<FPP*>(spm.getValuePtr()), {spm.getNonZeros()}, torch::TensorOptions().dtype(torch::kFloat64).device(dev));//.clone(); torch::Tensor values = torch::from_blob(const_cast<FPP*>(spm.getValuePtr()), {spm.getNonZeros()}, torch::TensorOptions().dtype(torch::kFloat64).device(dev));//.clone();
// cout << "tensor values:" << values << endl; // cout << "tensor values:" << values << endl;
...@@ -28,71 +28,119 @@ namespace Faust ...@@ -28,71 +28,119 @@ namespace Faust
col = col.to(torch::kI64); // mandatory conversion because torch forces to use same size types for indices and values (even if indices are integers and values floats) col = col.to(torch::kI64); // mandatory conversion because torch forces to use same size types for indices and values (even if indices are integers and values floats)
// cout << "tensor row:" << row << endl; // cout << "tensor row:" << row << endl;
delete [] rows; delete [] rows;
torch::Tensor indices = at::stack({row, col}, /* dim */ 0); torch::Tensor indices;
t = torch::sparse_coo_tensor(indices, values); if(transpose)
{
//reverse row and col to take the matrix as a transpose mat
indices = at::stack({col, row}, /* dim */ 0);
t = torch::sparse_coo_tensor(indices, values);
t.sparse_resize_({spm.getNbCol(), spm.getNbRow()}, t.sparse_dim(), t.dense_dim());
}
else
{
indices = at::stack({row, col}, /* dim */ 0);
t = torch::sparse_coo_tensor(indices, values);
t.sparse_resize_({spm.getNbRow(), spm.getNbCol()}, t.sparse_dim(), t.dense_dim());
}
// cout << "tensor size: " << t.size(0) << " x " << t.size(1) << " t is sparse:" << t.is_sparse() << endl; // cout << "tensor size: " << t.size(0) << " x " << t.size(1) << " t is sparse:" << t.is_sparse() << endl;
assert(t._nnz() == spm.getNonZeros());
assert(t.size(0) == spm.getNbCol() && t.size(1) == spm.getNbRow());
} }
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void faust_MatDense_to_torch_Tensor(const Faust::MatDense<FPP,D> & dm, torch::Tensor & t, at::DeviceType dev, const bool clone) void convMatDenseToTensor(const Faust::MatDense<FPP,D> & dm, torch::Tensor & t, at::DeviceType dev, const bool clone, const bool transpose /* = true*/)
{ {
t = torch::from_blob(const_cast<FPP*>(dm.getData()), {dm.getNbCol(), dm.getNbRow()},torch::TensorOptions().dtype(torch::kFloat64).device(dev));//.clone(); uint64_t nrows, ncols;
t = t.t();
if(clone) // number of nrows and ncols are inverted because the data is taken as a transpose matrix (Faust::MatDense is column-major order while torch is row-major order)
// it saves the need/transpose
nrows = dm.getNbCol();
ncols = dm.getNbRow();
t = torch::from_blob(const_cast<FPP*>(dm.getData()), {nrows, ncols},torch::TensorOptions().dtype(torch::kFloat64).device(dev));//.clone();
if(! transpose)
// need to transpose when transpose is false! conversion to torch row-major order
// while Faust is in col-major order (the conversion is equivalent to a transpose)
t = t.t();
if(clone && transpose)
t = t.clone(); t = t.clone();
// if clone == true && transpose == false // the transpose above already cloned the data
} }
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void torch_Tensor_to_faust_MatDense(const torch::Tensor & t, Faust::MatDense<FPP,D> & dm) void convTensorToMatDense(const torch::Tensor & t, Faust::MatDense<FPP,D> & dm, const bool transpose /* = true*/)
{ {
dm = Faust::MatDense<FPP,Cpu>(t.data_ptr<FPP>(), t.size(1), t.size(0)); if(transpose)
dm.transpose(); {
dm = Faust::MatDense<FPP,Cpu>(t.data_ptr<FPP>(), t.size(1), t.size(0));
}
else
{
dm = Faust::MatDense<FPP,Cpu>(t.data_ptr<FPP>(), t.size(1), t.size(0));
dm.transpose();
// need to transpose when transpose is false! conversion from torch row-major order
// while Faust is in col-major order (the conversion is equivalent to a transpose)
}
} }
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void faust_matvec_to_torch_TensorList(const std::vector<Faust::MatGeneric<FPP,D>*> & ml, std::vector<torch::Tensor> &tl, at::DeviceType dev, const bool clone) void convMatGenListToTensorList(const std::vector<Faust::MatGeneric<FPP,D>*> & ml, std::vector<torch::Tensor> &tl, at::DeviceType dev, const bool clone /* = false*/, const bool transpose /* = true*/)
{ {
const Faust::MatSparse<FPP,D> *spm; const Faust::MatSparse<FPP,D> *spm;
const Faust::MatDense<FPP,D> *dm; const Faust::MatDense<FPP,D> *dm;
// torch::Tensor t;
tl.resize(ml.size()); tl.resize(ml.size());
int i = 0; int i;
for(auto m : ml) if(transpose)
{
i = tl.size()-1; // transpose order == reverse order
for(auto m : ml)
{
if(spm = dynamic_cast<Faust::MatSparse<FPP,D>*>(m))
convMatSparseToTensor(*spm, tl[i--], dev, clone);
else if(dm = dynamic_cast<Faust::MatDense<FPP,D>*>(m))
convMatDenseToTensor(*dm, tl[i--], dev, clone);
}
}
else
{ {
if(spm = dynamic_cast<Faust::MatSparse<FPP,D>*>(m)) i = 0;
faust_MatSparse_to_torch_Tensor(*spm, tl[i++], dev, clone); for(auto m : ml)
// faust_MatSparse_to_torch_Tensor(*spm, t, dev, clone); {
else if(dm = dynamic_cast<Faust::MatDense<FPP,D>*>(m)) if(spm = dynamic_cast<Faust::MatSparse<FPP,D>*>(m))
faust_MatDense_to_torch_Tensor(*dm, tl[i++], dev, clone); convMatSparseToTensor(*spm, tl[i++], dev, clone);
// faust_MatDense_to_torch_Tensor(*dm, t, dev, clone); else if(dm = dynamic_cast<Faust::MatDense<FPP,D>*>(m))
// tl.push_back(t); convMatDenseToTensor(*dm, tl[i++], dev, clone);
}
} }
} }
torch::Tensor tensor_chain_mul(const std::vector<torch::Tensor>& ml, const torch::Tensor* op, at::DeviceType dev, const bool chain_opt, const bool contiguous_dense_to_torch) torch::Tensor tensor_chain_mul(const std::vector<torch::Tensor>& tl, const torch::Tensor* op, at::DeviceType dev, const bool chain_opt, const bool contiguous_dense_to_torch, const bool op_on_left /*=true*/)
{ {
bool all_dense = true; bool all_dense = true;
std::vector<torch::Tensor> mlc; std::vector<torch::Tensor> tlc;
for(auto t: ml) for(auto t: tl)
{ {
all_dense &= !t.is_sparse(); all_dense &= !t.is_sparse();
mlc.push_back(t); tlc.push_back(t);
} }
if(op) if(op)
{ {
all_dense &= !op->is_sparse(); all_dense &= !op->is_sparse();
mlc.push_back(*op); if(op_on_left)
tlc.insert(tlc.begin(), *op);
else
tlc.push_back(*op);
} }
if(all_dense) if(all_dense)
return torch::chain_matmul(mlc); //chain_opt is useless because I suppose torch does its own chain opt. return torch::chain_matmul(tlc); //chain_opt is useless because I suppose torch does its own chain opt.
if(chain_opt) if(chain_opt)
return std::move(tensor_chain_mul_opt(mlc, nullptr, dev)); return std::move(tensor_chain_mul_opt(tlc, nullptr, dev));
auto it = mlc.end()-1; auto it = tlc.end()-1;
auto res = *(it); auto res = *(it);
if(res.is_sparse()) if(res.is_sparse())
res = res.to_dense(); res = res.to_dense();
std::vector<torch::Tensor> dense_contiguous_facts; std::vector<torch::Tensor> dense_contiguous_facts;
while(it != mlc.begin()) while(it != tlc.begin())
{ {
auto f = *(--it); auto f = *(--it);
if(f.is_sparse()) if(f.is_sparse())
...@@ -112,12 +160,20 @@ namespace Faust ...@@ -112,12 +160,20 @@ namespace Faust
else else
res = torch::matmul(f, res); res = torch::matmul(f, res);
} }
assert(res.size(0) == ml.size(0)); if(contiguous_dense_to_torch && dense_contiguous_facts.size() > 0)
assert(op == nullptr && res.size(1) == ml.size(1) || res.size(1) == op->size(1)); {
//multiply chain of dense tensors at the end/start of tlc
dense_contiguous_facts.push_back(res);
res = torch::chain_matmul(dense_contiguous_facts);
dense_contiguous_facts.erase(dense_contiguous_facts.begin(), dense_contiguous_facts.end());
}
// don't worry assert is enabled only in debug mode (when DEBUG is defined)
assert((op != nullptr && op_on_left && res.size(0) == op.size(0)) || ((! op_on_left || op == nullptr) && res.size(0) == tl[0].size(0)) || op == nullptr);
assert(((op == nullptr || op_on_left) && res.size(1) == (*(tl.end()-1)).size(1)) || op != nullptr && res.size(1) == op->size(1));
return std::move(res); //explicit move but should work auto because Tensor class overrides move operator= and ctor return std::move(res); //explicit move but should work auto because Tensor class overrides move operator= and ctor
} }
torch::Tensor tensor_chain_mul_opt(const std::vector<torch::Tensor>& ml, const torch::Tensor* op, at::DeviceType dev) torch::Tensor tensor_chain_mul_opt(const std::vector<torch::Tensor>& ml, const torch::Tensor* op, at::DeviceType dev, const bool op_on_left /* = true */)
{ {
// cost to apply a on b // cost to apply a on b
auto cost = [](const torch::Tensor &a, const torch::Tensor &b) auto cost = [](const torch::Tensor &a, const torch::Tensor &b)
...@@ -125,7 +181,7 @@ namespace Faust ...@@ -125,7 +181,7 @@ namespace Faust
uint64_t a_cost = a.size(0)*a.size(1); uint64_t a_cost = a.size(0)*a.size(1);
uint64_t b_cost; uint64_t b_cost;
if(b.is_sparse()) if(b.is_sparse())
b_cost = b._nnz(); b_cost = b._nnz();
else else
b_cost = b.size(1); b_cost = b.size(1);
return a_cost*b_cost; return a_cost*b_cost;
...@@ -149,7 +205,10 @@ namespace Faust ...@@ -149,7 +205,10 @@ namespace Faust
std::vector<const torch::Tensor*> mlc; std::vector<const torch::Tensor*> mlc;
for(int i=0;i<ml.size();i++) mlc.push_back(&ml[i]); for(int i=0;i<ml.size();i++) mlc.push_back(&ml[i]);
if(op != nullptr) if(op != nullptr)
mlc.push_back(op); if(op_on_left)
mlc.insert(mlc.begin(), op);
else
mlc.push_back(op);
std::vector<uint64_t> costs(mlc.size()-1); std::vector<uint64_t> costs(mlc.size()-1);
for(int i=0;i<costs.size();i++) for(int i=0;i<costs.size();i++)
costs[i] = cost(*mlc[i], *mlc[i+1]); costs[i] = cost(*mlc[i], *mlc[i+1]);
...@@ -158,7 +217,6 @@ namespace Faust ...@@ -158,7 +217,6 @@ namespace Faust
res_list.push_back(res); res_list.push_back(res);
while(mlc.size() > 1) while(mlc.size() > 1)
{ {
// for (int i = 0; i < mlc.size(); i++) cout << mlc[i] << "[" << mlc[i]->size(0) << "x" << mlc[i]->size(1) << "] "; cout << endl;
auto i = argmin(costs); auto i = argmin(costs);
auto f1 = mlc[i]; auto f1 = mlc[i];
auto f2 = mlc[i+1]; auto f2 = mlc[i+1];
...@@ -167,7 +225,6 @@ namespace Faust ...@@ -167,7 +225,6 @@ namespace Faust
res = new torch::Tensor(); res = new torch::Tensor();
res_list.push_back(res); res_list.push_back(res);
} }
// cout << "argmin i:" << i << endl;
if(f2->is_sparse()) if(f2->is_sparse())
if(f1->is_sparse()) if(f1->is_sparse())
*res = at::_sparse_mm(*f1, f2->to_dense()); *res = at::_sparse_mm(*f1, f2->to_dense());
...@@ -196,15 +253,15 @@ namespace Faust ...@@ -196,15 +253,15 @@ namespace Faust
} }
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void tensor_chain_mul(const std::vector<Faust::MatGeneric<FPP,D>*>& ml, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op, const bool on_gpu, const bool clone, const bool chain_opt, const bool contiguous_dense_to_torch) void tensor_chain_mul(const std::vector<Faust::MatGeneric<FPP,D>*>& ml, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op, const bool on_gpu, const bool clone, const bool chain_opt, const bool contiguous_dense_to_torch, const bool transpose /* = true */)
{ {
std::vector<torch::Tensor> tl; std::vector<torch::Tensor> tl;
faust_matvec_to_torch_TensorList(ml, tl, on_gpu?at::kCUDA:at::kCPU, clone); convMatGenListToTensorList(ml, tl, on_gpu?at::kCUDA:at::kCPU, clone, transpose);
tensor_chain_mul(tl, out, op, on_gpu, clone, chain_opt, contiguous_dense_to_torch); tensor_chain_mul(tl, out, op, on_gpu, clone, chain_opt, contiguous_dense_to_torch, transpose);
} }
template<typename FPP, FDevice D> template<typename FPP, FDevice D>
void tensor_chain_mul(const std::vector<torch::Tensor>& tl, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op, const bool on_gpu, const bool clone, const bool chain_opt, const bool contiguous_dense_to_torch) void tensor_chain_mul(const std::vector<torch::Tensor>& tl, Faust::MatDense<FPP,Cpu> & out, const Faust::MatGeneric<FPP,D>* op, const bool on_gpu, const bool clone, const bool chain_opt, const bool contiguous_dense_to_torch, const bool transpose /* = true */)
{ {
torch::Tensor top, tres; torch::Tensor top, tres;
const Faust::MatSparse<FPP,D> *spm; const Faust::MatSparse<FPP,D> *spm;
...@@ -212,15 +269,22 @@ namespace Faust ...@@ -212,15 +269,22 @@ namespace Faust
if(op) if(op)
{ {
if(spm = dynamic_cast<const Faust::MatSparse<FPP,D>*>(op)) if(spm = dynamic_cast<const Faust::MatSparse<FPP,D>*>(op))
faust_MatSparse_to_torch_Tensor(*spm, top, on_gpu?at::kCUDA:at::kCPU, clone); convMatSparseToTensor(*spm, top, on_gpu?at::kCUDA:at::kCPU, clone, transpose);
else if(dm = dynamic_cast<const Faust::MatDense<FPP,D>*>(op)) else if(dm = dynamic_cast<const Faust::MatDense<FPP,D>*>(op))
faust_MatDense_to_torch_Tensor(*dm, top, on_gpu?at::kCUDA:at::kCPU, clone); convMatDenseToTensor(*dm, top, on_gpu?at::kCUDA:at::kCPU, clone, transpose);
tres = tensor_chain_mul(tl, &top, on_gpu?at::kCUDA:at::kCPU, chain_opt, contiguous_dense_to_torch); tres = tensor_chain_mul(tl, &top, on_gpu?at::kCUDA:at::kCPU, chain_opt, contiguous_dense_to_torch, transpose /* op_on_left if transpose */);
} }
else else
tres = tensor_chain_mul(tl, static_cast<torch::Tensor*>(nullptr), on_gpu?at::kCUDA:at::kCPU, chain_opt, contiguous_dense_to_torch); tres = tensor_chain_mul(tl, static_cast<torch::Tensor*>(nullptr), on_gpu?at::kCUDA:at::kCPU, chain_opt, contiguous_dense_to_torch, transpose /* op_on_left if transpose */);
out = Faust::MatDense<FPP,Cpu>(tres.data_ptr<FPP>(), tres.size(1), tres.size(0)); convTensorToMatDense(tres, out, transpose);
out.transpose(); }
void display_TensorList(std::vector<torch::Tensor>& tl, const bool transpose /*= true*/)
{
if(transpose)
for (int i = tl.size()-1; i >= 0; i--) {cout << "Tensor: " << tl.size()-1-i << " [" << tl[i].size(1) << "x" << tl[i].size(0) << "] " << (tl[i].is_sparse()?"SPARSE":"DENSE"); cout << endl;}
else
for (int i = 0; i < tl.size(); i++) {cout << "Tensor: " << i << " [" << tl[i].size(0) << "x" << tl[i].size(1) << "] " << (tl[i].is_sparse()?"SPARSE":"DENSE"); cout << endl;};
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment