#ifndef FCHEBSYMM2LHANDLER_HPP #define FCHEBSYMM2LHANDLER_HPP #include #include "../../Utils/FBlas.hpp" #include "./FChebTensor.hpp" #include "./FChebSymmetries.hpp" #include "./FChebM2LHandler.hpp" /** * @author Matthias Messner (matthias.matthias@inria.fr) * Please read the license */ /*! Choose either \a FULLY_PIVOTED_ACASVD or \a PARTIALLY_PIVOTED_ACASVD or \a ONLY_SVD. */ //#define ONLY_SVD //#define FULLY_PIVOTED_ACASVD #define PARTIALLY_PIVOTED_ACASVD /*! The fully pivoted adaptive cross approximation (fACA) compresses a far-field interaction as \f$K\sim UV^\top\f$. The fACA requires all entries to be computed beforehand, then the compression follows in \f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy \f$\varepsilon\f$. The matrix K will be destroyed as a result. @param[in] K far-field to be approximated @param[in] eps prescribed accuracy @param[out] U matrix containing \a k column vectors @param[out] V matrix containing \a k row vectors @param[out] k final low-rank depends on prescribed accuracy \a eps */ template static void fACA(FReal *const K, const double eps, FReal* &U, FReal* &V, unsigned int &k) { static const int nnodes = ORDER*ORDER*ORDER; // control vectors (true if not used, false if used) bool r[nnodes], c[nnodes]; for (int i=0; i eps*eps * norm2K); //////////////////////////////////////////////// } /*! The partially pivoted adaptive cross approximation (pACA) compresses a far-field interaction as \f$K\sim UV^\top\f$. The pACA computes the matrix entries on the fly, as they are needed. The compression follows in \f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy \f$\varepsilon\f$. The matrix K will be destroyed as a result. @tparam ComputerType the functor type which allows to compute matrix entries @param[in] Computer the entry-computer functor @param[in] eps prescribed accuracy @param[in] nx number of rows @param[in] ny number of cols @param[out] U matrix containing \a k column vectors @param[out] V matrix containing \a k row vectors @param[out] k final low-rank depends on prescribed accuracy \a eps */ template void pACA(const ComputerType& Computer, const FReal eps, const unsigned int nx, const unsigned int ny, FReal* &U, FReal* &V, unsigned int &k) { // control vectors (true if not used, false if used) bool *const r = new bool[nx]; bool *const c = new bool[ny]; for (unsigned int i=0; i eps*eps * norm2S); delete [] r; delete [] c; } /*! Precomputes the 16 far-field interactions (due to symmetries in their arrangement all 316 far-field interactions can be represented by permutations of the 16 we compute in this function). Depending on whether FACASVD is defined or not, either ACA+SVD or only SVD is used to compress them. */ template static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, const double Epsilon, FReal* K[343], int LowRank[343]) { std::cout << "\nComputing 16 far-field interactions for cells of width w = " << CellWidth << std::endl; static const unsigned int nnodes = ORDER*ORDER*ORDER; // interpolation points of source (Y) and target (X) cell FPoint X[nnodes], Y[nnodes]; // set roots of target cell (X) FChebTensor::setRoots(FPoint(0.,0.,0.), CellWidth, X); // temporary matrix FReal* U = new FReal [nnodes*nnodes]; // needed for the SVD unsigned int INFO; const unsigned int LWORK = 2 * (3*nnodes + nnodes); FReal *const WORK = new FReal [LWORK]; FReal *const VT = new FReal [nnodes*nnodes]; FReal *const S = new FReal [nnodes]; // initialize timer FTic time; FReal overall_time(0.); FReal elapsed_time(0.); unsigned int counter = 0; for (int i=2; i<=3; ++i) { for (int j=0; j<=i; ++j) { for (int k=0; k<=j; ++k) { // assemble matrix and apply weighting matrices const FPoint cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k)); FChebTensor::setRoots(cy, CellWidth, Y); FReal weights[nnodes]; FChebTensor::setRootOfWeights(weights); // now the entry-computer is responsible for weighting the matrix entries EntryComputer Computer(nnodes, X, nnodes, Y, weights); // start timer time.tic(); #if (defined ONLY_SVD || defined FULLY_PIVOTED_ACASVD) Computer(0, nnodes, 0, nnodes, U); #endif /* // applying weights //////////////////////////////////////// FReal weights[nnodes]; FChebTensor::setRootOfWeights(weights); for (unsigned int n=0; n(U, Epsilon, UU, VV, rank); #else pACA(Computer, Epsilon, nnodes, nnodes, UU, VV, rank); #endif // QR decomposition FReal* phi = new FReal [rank*rank]; { // QR of U and V FReal* tauU = new FReal [rank]; INFO = FBlas::geqrf(nnodes, rank, UU, tauU, LWORK, WORK); assert(INFO==0); FReal* tauV = new FReal [rank]; INFO = FBlas::geqrf(nnodes, rank, VV, tauV, LWORK, WORK); assert(INFO==0); // phi = Ru Rv' FReal* rU = new FReal [2 * rank*rank]; FReal* rV = rU + rank*rank; FBlas::setzero(2 * rank*rank, rU); for (unsigned int l=0; l V and then Qu V FReal *const V = new FReal [aca_rank * rank]; for (unsigned int r=0; r(S, Epsilon); // store const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); assert(K[idx]==NULL); K[idx] = new FReal [2*rank*nnodes]; LowRank[idx] = rank; for (unsigned int r=0; r class SymmetryHandler; /*! Specialization for homogeneous kernel functions */ template class SymmetryHandler { static const unsigned int nnodes = ORDER*ORDER*ORDER; // M2L operators FReal* K[343]; int LowRank[343]; public: // permutation vectors and permutated indices unsigned int pvectors[343][nnodes]; unsigned int pindices[343]; /** Constructor: with 16 small SVDs */ template SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const double Epsilon, const FReal, const unsigned int) { // init all 343 item to zero, because effectively only 16 exist for (unsigned int t=0; t<343; ++t) { K[t] = NULL; LowRank[t] = 0; } // set permutation vector and indices const FChebSymmetries Symmetries; for (int i=-3; i<=3; ++i) for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3); pindices[idx] = 0; if (abs(i)>1 || abs(j)>1 || abs(k)>1) pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]); } // precompute 16 M2L operators const FReal ReferenceCellWidth = FReal(2.); precompute(MatrixKernel, ReferenceCellWidth, Epsilon, K, LowRank); } /** Destructor */ ~SymmetryHandler() { for (unsigned int t=0; t<343; ++t) if (K[t]!=NULL) delete [] K[t]; } /*! return the t-th approximated far-field interactions*/ const FReal *const getK(const unsigned int, const unsigned int t) const { return K[t]; } /*! return the t-th approximated far-field interactions*/ const int getLowRank(const unsigned int, const unsigned int t) const { return LowRank[t]; } }; /*! Specialization for non-homogeneous kernel functions */ template class SymmetryHandler { static const unsigned int nnodes = ORDER*ORDER*ORDER; // Height of octree; needed only in the case of non-homogeneous kernel functions const unsigned int TreeHeight; // M2L operators for all levels in the octree FReal*** K; int** LowRank; public: // permutation vectors and permutated indices unsigned int pvectors[343][nnodes]; unsigned int pindices[343]; /** Constructor: with 16 small SVDs */ template SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const double Epsilon, const FReal RootCellWidth, const unsigned int inTreeHeight) : TreeHeight(inTreeHeight) { // init all 343 item to zero, because effectively only 16 exist K = new FReal** [TreeHeight]; LowRank = new int* [TreeHeight]; K[0] = NULL; K[1] = NULL; LowRank[0] = NULL; LowRank[1] = NULL; for (unsigned int l=2; l Symmetries; for (int i=-3; i<=3; ++i) for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3); pindices[idx] = 0; if (abs(i)>1 || abs(j)>1 || abs(k)>1) pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]); } // precompute 16 M2L operators at all levels having far-field interactions FReal CellWidth = RootCellWidth / FReal(2.); // at level 1 CellWidth /= FReal(2.); // at level 2 for (unsigned int l=2; l(MatrixKernel, CellWidth, Epsilon, K[l], LowRank[l]); CellWidth /= FReal(2.); // at level l+1 } } /** Destructor */ ~SymmetryHandler() { for (unsigned int l=0; l #include /** * Computes, compresses and stores the 16 M2L kernels in a binary file. */ template static void ComputeAndCompressAndStoreInBinaryFile(const MatrixKernelClass *const MatrixKernel, const FReal Epsilon) { static const unsigned int nnodes = ORDER*ORDER*ORDER; // compute and compress //////////// FReal* K[343]; int LowRank[343]; for (unsigned int idx=0; idx<343; ++idx) { K[idx] = NULL; LowRank[idx] = 0; } precompute(MatrixKernel, FReal(2.), Epsilon, K, LowRank); // write to binary file //////////// FTic time; time.tic(); // start computing process const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream sstream; sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin"; const std::string filename(sstream.str()); std::ofstream stream(filename.c_str(), std::ios::out | std::ios::binary | std::ios::trunc); if (stream.good()) { stream.seekp(0); for (unsigned int idx=0; idx<343; ++idx) if (K[idx]!=NULL) { // 1) write index stream.write(reinterpret_cast(&idx), sizeof(int)); // 2) write low rank (int) int rank = LowRank[idx]; stream.write(reinterpret_cast(&rank), sizeof(int)); // 3) write U and V (both: rank*nnodes * FReal) FReal *const U = K[idx]; FReal *const V = K[idx] + rank*nnodes; stream.write(reinterpret_cast(U), sizeof(FReal)*rank*nnodes); stream.write(reinterpret_cast(V), sizeof(FReal)*rank*nnodes); } } else throw std::runtime_error("File could not be opened to write"); stream.close(); // write info std::cout << "Compressed M2L operators stored in binary file " << filename << " in " << time.tacAndElapsed() << "sec." << std::endl; // free memory ///////////////////// for (unsigned int t=0; t<343; ++t) if (K[t]!=NULL) delete [] K[t]; } /** * Reads the 16 compressed M2L kernels from the binary files and writes them * in K and the respective low-rank in LowRank. */ template void ReadFromBinaryFile(const FReal Epsilon, FReal* K[343], int LowRank[343]) { // compile time constants const unsigned int nnodes = ORDER*ORDER*ORDER; // find filename const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream sstream; sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin"; const std::string filename(sstream.str()); // read binary file std::ifstream istream(filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); const std::ifstream::pos_type size = istream.tellg(); if (size<=0) throw std::runtime_error("The requested binary file does not yet exist. Exit."); if (istream.good()) { istream.seekg(0); // 1) read index (int) int _idx; istream.read(reinterpret_cast(&_idx), sizeof(int)); // loop to find 16 compressed m2l operators for (int idx=0; idx<343; ++idx) { K[idx] = NULL; LowRank[idx] = 0; // if it exists if (idx == _idx) { // 2) read low rank (int) int rank; istream.read(reinterpret_cast(&rank), sizeof(int)); LowRank[idx] = rank; // 3) read U and V (both: rank*nnodes * FReal) K[idx] = new FReal [2*rank*nnodes]; FReal *const U = K[idx]; FReal *const V = K[idx] + rank*nnodes; istream.read(reinterpret_cast(U), sizeof(FReal)*rank*nnodes); istream.read(reinterpret_cast(V), sizeof(FReal)*rank*nnodes); // 1) read next index istream.read(reinterpret_cast(&_idx), sizeof(int)); } } } else throw std::runtime_error("File could not be opened to read"); istream.close(); } #endif