Commit a113ea91 authored by Pierre BLANCHARD's avatar Pierre BLANCHARD

Significant modif in Chebyshev FMM (may impact your library): Provide a...

Significant modif in Chebyshev FMM (may impact your library): Provide a variant of ChebM2L without compression (new name FCheb) and use prefix FChebCmp for the compressed variant (former FCheb); Now FChebTensorial does exactly the same thing as FCheb but for tensorial kernel matrices.
parent d36016e5
// ===================================================================================
// Copyright ScalFmm 2016 INRIA, Olivier Coulaud, Bérenger Bramas,
// Matthias Messner olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the
// FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
// An extension to the license is given to allow static linking of scalfmm
// inside a proprietary application (no matter its license).
// See the main license file for more details.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FCHEBKERNEL_HPP
#define FCHEBKERNEL_HPP
#include "../../Utils/FGlobal.hpp"
#include "../../Utils/FSmartPointer.hpp"
#include "./FAbstractChebKernel.hpp"
#include "./FChebCmpM2LHandler.hpp"
class FTreeCoordinate;
/**
* @author Matthias Messner(matthias.messner@inria.fr)
* @class FChebCmpKernel
* @brief Chebyshev interpolation based FMM operators for general non oscillatory kernels..
*
* This class implements the Chebyshev interpolation based FMM operators. It
* implements all interfaces (P2P, P2M, M2M, M2L, L2L, L2P) which are required by
* the FFmmAlgorithm, FFmmAlgorithmThread ...
*
* @tparam CellClass Type of cell
* @tparam ContainerClass Type of container to store particles
* @tparam MatrixKernelClass Type of matrix kernel function
* @tparam ORDER Chebyshev interpolation order
*
* The ORDER sets the accuracy of the Chebyshev FMM while the EPSILON parameter introduces extra error but optimize the M2L step.
* In fact, in the Chebyshev FMM we perform compression on the M2L operators using various low rank approximation techniques
* (see https://arxiv.org/abs/1210.7292 for further details). Therefore we use a second accuracy criterion, namely EPSILON,
* in order to set the accuracy of these methods. For most kernels that we tested and in particular for 1/r, setting EPSILON=10^-ORDER d
* oes not introduce extra error in the FMM and captures the rank efficiently. If you think that for your kernel you need a better
* approximation of the M2L operators, then you can try to set EPSILON to 10 ^- (ORDER+{1,2,...}).
*/
template < class FReal, class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1>
class FChebCmpKernel
: public FAbstractChebKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
{
// private types
typedef FChebCmpM2LHandler<FReal, ORDER,MatrixKernelClass> M2LHandlerClass;
// using from
typedef FAbstractChebKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
AbstractBaseClass;
/// Needed for P2P and M2L operators
const MatrixKernelClass *const MatrixKernel;
/// Needed for M2L operator
FSmartPointer< M2LHandlerClass,FSmartPointerMemory> M2LHandler;
public:
/**
* The constructor initializes all constant attributes and it reads the
* precomputed and compressed M2L operators from a binary file (an
* runtime_error is thrown if the required file is not valid).
*
* @param[in] epsilon The compression parameter for M2L operator.
*
* The M2L optimized Chebyshev FMM implemented in ScalFMM are kernel dependent, but keeping EPSILON=10^-ORDER is usually fine.
* On the other hand you can short-circuit this feature by setting EPSILON to the machine accuracy,
* but this will significantly slow down the computations.
*
*/
FChebCmpKernel(const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, const MatrixKernelClass *const inMatrixKernel,
const FReal Epsilon)
: FAbstractChebKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter),
MatrixKernel(inMatrixKernel),
M2LHandler(new M2LHandlerClass(MatrixKernel,Epsilon))
{
// read precomputed compressed m2l operators from binary file
//M2LHandler->ReadFromBinaryFileAndSet();
M2LHandler->ComputeAndCompressAndSet();
}
/**
* The constructor initializes all constant attributes and it reads the
* precomputed and compressed M2L operators from a binary file (an
* runtime_error is thrown if the required file is not valid).
* Same as \see above constructor but the epsilon is automatically set to EPSILON=10^-ORDER
*/
FChebCmpKernel(const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, const MatrixKernelClass *const inMatrixKernel)
: FChebCmpKernel(inTreeHeight, inBoxWidth,inBoxCenter,inMatrixKernel,FMath::pow(10.0,static_cast<FReal>(-ORDER)))
{
}
void P2M(CellClass* const LeafCell,
const ContainerClass* const SourceParticles)
{
const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
// 1) apply Sy
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getMultipole(0), SourceParticles);
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 2) apply B
M2LHandler->applyB(LeafCell->getMultipole(idxRhs), LeafCell->getMultipole(idxRhs) + AbstractBaseClass::nnodes);
}
}
void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/)
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs),
ParentCell->getMultipole(idxRhs));
}
}
// 2) apply B
M2LHandler->applyB(ParentCell->getMultipole(idxRhs), ParentCell->getMultipole(idxRhs) + AbstractBaseClass::nnodes);
}
}
// void M2L(CellClass* const FRestrict TargetCell,
// const CellClass* SourceCells[],
// const int NumSourceCells,
// const int TreeLevel) const
// {
// const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel)));
// const FTreeCoordinate& cx = TargetCell->getCoordinate();
// for (int idx=0; idx<NumSourceCells; ++idx) {
// const FTreeCoordinate& cy = SourceCells[idx]->getCoordinate();
// const int transfer[3] = {cy.getX()-cx.getX(),
// cy.getY()-cx.getY(),
// cy.getZ()-cx.getZ()};
// M2LHandler->applyC(transfer, CellWidth,
// SourceCells[idx]->getMultipole() + AbstractBaseClass::nnodes,
// TargetCell->getLocal() + AbstractBaseClass::nnodes);
// }
// }
void M2L(CellClass* const FRestrict TargetCell, const CellClass* SourceCells[],
const int neighborPositions[], const int inSize, const int TreeLevel) override {
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxRhs) + AbstractBaseClass::nnodes;
const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel)));
for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
const int idx = neighborPositions[idxExistingNeigh];
M2LHandler->applyC(idx, CellWidth, SourceCells[idxExistingNeigh]->getMultipole(idxRhs) + AbstractBaseClass::nnodes,
CompressedLocalExpansion);
}
}
}
// void M2L(CellClass* const FRestrict TargetCell, const CellClass* SourceCells[],
// const int neighborPositions[], const int inSize, const int TreeLevel) override {
// const unsigned int rank = M2LHandler.getRank();
// FBlas::scal(343*rank, FReal(0.), MultipoleExpansion);
// const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel)));
// for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
// const int idx = neighborPositions[idxExistingNeigh];
// FBlas::copy(rank, const_cast<FReal *const>(SourceCells[idxExistingNeigh]->getMultipole())+AbstractBaseClass::nnodes,
// MultipoleExpansion+idx*rank);
//
// M2LHandler->applyC(CellWidth, MultipoleExpansion, TargetCell->getLocal() + AbstractBaseClass::nnodes);
// }
void L2L(const CellClass* const FRestrict ParentCell,
CellClass* FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/)
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply U
M2LHandler->applyU(ParentCell->getLocal(idxRhs) + AbstractBaseClass::nnodes,
const_cast<CellClass*>(ParentCell)->getLocal(idxRhs));
// 2) apply Sx
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyL2L(ChildIndex, ParentCell->getLocal(idxRhs), ChildCells[ChildIndex]->getLocal(idxRhs));
}
}
}
}
void L2P(const CellClass* const LeafCell,
ContainerClass* const TargetParticles)
{
const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply U
M2LHandler->applyU(LeafCell->getLocal(idxRhs) + AbstractBaseClass::nnodes, const_cast<CellClass*>(LeafCell)->getLocal(idxRhs));
}
//// 2.a) apply Sx
//AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
// AbstractBaseClass::BoxWidthLeaf,
// LeafCell->getLocal(0),
// TargetParticles);
//// 2.b) apply Px (grad Sx)
//AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter,
// AbstractBaseClass::BoxWidthLeaf,
// LeafCell->getLocal(0),
// TargetParticles);
// 2.c) apply Sx and Px (grad Sx)
AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getLocal(0), TargetParticles);
}
void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
if(inTargets == inSources){
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
}
}
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
int nbNeighborsToCompute = 0;
while(nbNeighborsToCompute < inSize
&& neighborPositions[nbNeighborsToCompute] < 14){
nbNeighborsToCompute += 1;
}
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2P(inTargets,inNeighbors,nbNeighborsToCompute,MatrixKernel);
}
void P2PRemote(const FTreeCoordinate& /*inPosition*/,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
const ContainerClass* const inNeighbors[], const int /*neighborPositions*/[],
const int inSize) override {
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
}
};
#endif //FCHEBKERNELS_HPP
// [--END--]
// ===================================================================================
// Copyright ScalFmm 2016 INRIA, Olivier Coulaud, Bérenger Bramas,
// Matthias Messner olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the
// FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
// An extension to the license is given to allow static linking of scalfmm
// inside a proprietary application (no matter its license).
// See the main license file for more details.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FCHEBM2LHANDLER_HPP
#define FCHEBM2LHANDLER_HPP
#include <numeric>
#include <stdexcept>
#include <string>
#include <sstream>
#include <fstream>
#include <typeinfo>
#include "Utils/FBlas.hpp"
#include "Utils/FTic.hpp"
#include "FChebTensor.hpp"
#include "Utils/FSvd.hpp"
template <class FReal, int ORDER>
unsigned int Compress(const FReal epsilon, const unsigned int ninteractions,
FReal* &U, FReal* &C, FReal* &B);
/**
* @author Matthias Messner (matthias.messner@inria.fr)
* @class FChebCmpM2LHandler
* Please read the license
*
* This class precomputes and compresses the M2L operators
* \f$[K_1,\dots,K_{316}]\f$ for all (\f$7^3-3^3 = 316\f$ possible interacting
* cells in the far-field) interactions for the Chebyshev interpolation
* approach. The class uses the compression via a truncated SVD and represents
* the compressed M2L operator as \f$K_t \sim U C_t B^\top\f$ with
* \f$t=1,\dots,316\f$. The truncation rank is denoted by \f$r\f$ and is
* determined by the prescribed accuracy \f$\varepsilon\f$. Hence, the
* originally \f$K_t\f$ of size \f$\ell^3\times\ell^3\f$ times \f$316\f$ for
* all interactions is reduced to only one \f$U\f$ and one \f$B\f$, each of
* size \f$\ell^3\times r\f$, and \f$316\f$ \f$C_t\f$, each of size \f$r\times
* r\f$.
*
* PB: FChebCmpM2LHandler does not seem to support non_homogeneous kernels!
* In fact nothing appears to handle this here (i.e. adapt scaling and storage
* to MatrixKernelClass::Type). Given the relatively important cost of the
* Chebyshev variant, it is probably a choice not to have implemented this
* feature here but instead in the ChebyshevSym variant. But what if the
* kernel is non homogeneous and non symmetric (e.g. Dislocations)...
*
* TODO Specialize class (see UnifM2LHandler) OR prevent from using this
* class with non homogeneous kernels ?!
*
* @tparam ORDER interpolation order \f$\ell\f$
*/
template <class FReal, int ORDER, class MatrixKernelClass>
class FChebCmpM2LHandler : FNoCopyable
{
enum {order = ORDER,
nnodes = TensorTraits<ORDER>::nnodes,
ninteractions = 316}; // 7^3 - 3^3 (max num cells in far-field)
const MatrixKernelClass *const MatrixKernel;
FReal *U, *C, *B;
const FReal epsilon; //<! accuracy which determines trucation of SVD
unsigned int rank; //<! truncation rank, satisfies @p epsilon
static const std::string getFileName(FReal epsilon)
{
const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f');
std::stringstream stream;
stream << "m2l_k"<< MatrixKernelClass::getID() << "_" << precision_type
<< "_o" << order << "_e" << epsilon << ".bin";
return stream.str();
}
public:
FChebCmpM2LHandler(const MatrixKernelClass *const inMatrixKernel, const FReal _epsilon)
: MatrixKernel(inMatrixKernel), U(nullptr), C(nullptr), B(nullptr), epsilon(_epsilon), rank(0)
{}
~FChebCmpM2LHandler()
{
if (U != nullptr) delete [] U;
if (B != nullptr) delete [] B;
if (C != nullptr) delete [] C;
}
/**
* Computes, compresses and sets the matrices \f$Y, C_t, B\f$
*/
void ComputeAndCompressAndSet()
{
// measure time
FTic time; time.tic();
// check if aready set
if (U||C||B) throw std::runtime_error("Compressed M2L operator already set");
rank = ComputeAndCompress(MatrixKernel, epsilon, U, C, B);
unsigned long sizeM2L = 343*rank*rank*sizeof(FReal);
// write info
std::cout << "Compressed and set M2L operators (" << long(sizeM2L) << " B) in "
<< time.tacAndElapsed() << "sec." << std::endl;
}
/**
* Computes, compresses, writes to binary file, reads it and sets the matrices \f$Y, C_t, B\f$
*/
void ComputeAndCompressAndStoreInBinaryFileAndReadFromFileAndSet()
{
FChebCmpM2LHandler<FReal, ORDER,MatrixKernelClass>::ComputeAndCompressAndStoreInBinaryFile(epsilon);
this->ReadFromBinaryFileAndSet();
}
/**
* Computes and compressed all \f$K_t\f$.
*
* @param[in] epsilon accuracy
* @param[out] U matrix of size \f$\ell^3\times r\f$
* @param[out] C matrix of size \f$r\times 316 r\f$ storing \f$[C_1,\dots,C_{316}]\f$
* @param[out] B matrix of size \f$\ell^3\times r\f$
*/
static unsigned int ComputeAndCompress(const MatrixKernelClass *const MatrixKernel, const FReal epsilon, FReal* &U, FReal* &C, FReal* &B);
/**
* Computes, compresses and stores the matrices \f$Y, C_t, B\f$ in a binary
* file
*/
static void ComputeAndCompressAndStoreInBinaryFile(const MatrixKernelClass *const MatrixKernel, const FReal epsilon);
/**
* Reads the matrices \f$Y, C_t, B\f$ from the respective binary file
*/
void ReadFromBinaryFileAndSet();
/**
* @return rank of the SVD compressed M2L operators
*/
unsigned int getRank() const {return rank;}
/**
* Expands potentials \f$x+=UX\f$ of a target cell. This operation can be
* seen as part of the L2L operation.
*
* @param[in] X compressed local expansion of size \f$r\f$
* @param[out] x local expansion of size \f$\ell^3\f$
*/
void applyU(const FReal *const X, FReal *const x) const
{
FBlas::gemva(nnodes, rank, 1., U, const_cast<FReal*>(X), x);
}
/**
* Compressed M2L operation \f$X+=C_tY\f$, where \f$Y\f$ is the compressed
* multipole expansion and \f$X\f$ is the compressed local expansion, both
* of size \f$r\f$. The index \f$t\f$ denotes the transfer vector of the
* target cell to the source cell.
*
* @param[in] transfer transfer vector
* @param[in] Y compressed multipole expansion
* @param[out] X compressed local expansion
* @param[in] CellWidth needed for the scaling of the compressed M2L operators which are based on a homogeneous matrix kernel computed for the reference cell width \f$w=2\f$, ie in \f$[-1,1]^3\f$.
*/
void applyC(const int transfer[3], FReal CellWidth,
const FReal *const Y, FReal *const X) const
{
const unsigned int idx
= (transfer[0]+3)*7*7 + (transfer[1]+3)*7 + (transfer[2]+3);
const FReal scale(MatrixKernel->getScaleFactor(CellWidth));
FBlas::gemva(rank, rank, scale, C + idx*rank*rank, const_cast<FReal*>(Y), X);
}
void applyC(const unsigned int idx, FReal CellWidth,
const FReal *const Y, FReal *const X) const
{
const FReal scale(MatrixKernel->getScaleFactor(CellWidth));
FBlas::gemva(rank, rank, scale, C + idx*rank*rank, const_cast<FReal*>(Y), X);
}
void applyC(FReal CellWidth,
const FReal *const Y, FReal *const X) const
{
const FReal scale(MatrixKernel->getScaleFactor(CellWidth));
FBlas::gemva(rank, rank * 343, scale, C, const_cast<FReal*>(Y), X);
}
/**
* Compresses densities \f$Y=B^\top y\f$ of a source cell. This operation
* can be seen as part of the M2M operation.
*
* @param[in] y multipole expansion of size \f$\ell^3\f$
* @param[out] Y compressed multipole expansion of size \f$r\f$
*/
void applyB(FReal *const y, FReal *const Y) const
{
FBlas::gemtv(nnodes, rank, 1., B, y, Y);
}
};
//////////////////////////////////////////////////////////////////////
// definition ////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
template <class FReal, int ORDER, class MatrixKernelClass>
unsigned int
FChebCmpM2LHandler<FReal, ORDER, MatrixKernelClass>::ComputeAndCompress(const MatrixKernelClass *const MatrixKernel,
const FReal epsilon,
FReal* &U,
FReal* &C,
FReal* &B)
{
// allocate memory and store compressed M2L operators
if (U||C||B) throw std::runtime_error("Compressed M2L operators are already set");
// interpolation points of source (Y) and target (X) cell
FPoint<FReal> X[nnodes], Y[nnodes];
// set roots of target cell (X)
FChebTensor<FReal, order>::setRoots(FPoint<FReal>(0.,0.,0.), FReal(2.), X);
// allocate memory and compute 316 m2l operators
FReal *_U, *_C, *_B;
_U = _B = nullptr;
_C = new FReal [nnodes*nnodes * ninteractions];
unsigned int counter = 0;
for (int i=-3; i<=3; ++i) {
for (int j=-3; j<=3; ++j) {
for (int k=-3; k<=3; ++k) {
if (abs(i)>1 || abs(j)>1 || abs(k)>1) {
// set roots of source cell (Y)
const FPoint<FReal> cy(FReal(2.*i), FReal(2.*j), FReal(2.*k));
FChebTensor<FReal, order>::setRoots(cy, FReal(2.), Y);
// evaluate m2l operator
for (unsigned int n=0; n<nnodes; ++n)
for (unsigned int m=0; m<nnodes; ++m)
_C[counter*nnodes*nnodes + n*nnodes + m]
= MatrixKernel->evaluate(X[m], Y[n]);
// increment interaction counter
counter++;
}
}
}
}
if (counter != ninteractions)
throw std::runtime_error("Number of interactions must correspond to 316");
//////////////////////////////////////////////////////////
FReal weights[nnodes];
FChebTensor<FReal, order>::setRootOfWeights(weights);
for (unsigned int i=0; i<316; ++i)
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(nnodes, weights[n], _C+i*nnodes*nnodes + n, nnodes); // scale rows
FBlas::scal(nnodes, weights[n], _C+i*nnodes*nnodes + n * nnodes); // scale cols
}
//////////////////////////////////////////////////////////
// svd compression of M2L
const unsigned int rank = Compress<FReal, ORDER>(epsilon, ninteractions, _U, _C, _B);
if (!(rank>0)) throw std::runtime_error("Low rank must be larger then 0!");
// store U
U = new FReal [nnodes * rank];
FBlas::copy(rank*nnodes, _U, U);
delete [] _U;
// store B
B = new FReal [nnodes * rank];
FBlas::copy(rank*nnodes, _B, B);
delete [] _B;
// store C
counter = 0;
C = new FReal [343 * rank*rank];
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k) {
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
if (abs(i)>1 || abs(j)>1 || abs(k)>1) {
FBlas::copy(rank*rank, _C + counter*rank*rank, C + idx*rank*rank);
counter++;
} else FBlas::setzero(rank*rank, C + idx*rank*rank);
}
if (counter != ninteractions)
throw std::runtime_error("Number of interactions must correspond to 316");
delete [] _C;
//////////////////////////////////////////////////////////
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(rank, FReal(1.) / weights[n], U+n, nnodes); // scale rows
FBlas::scal(rank, FReal(1.) / weights[n], B+n, nnodes); // scale rows
}
//////////////////////////////////////////////////////////
// return low rank
return rank;
}
template <class FReal, int ORDER, class MatrixKernelClass>
void
FChebCmpM2LHandler<FReal, ORDER, MatrixKernelClass>::ComputeAndCompressAndStoreInBinaryFile(const MatrixKernelClass *const MatrixKernel, const FReal epsilon)
{
// measure time
FTic time; time.tic();
// start computing process
FReal *U, *C, *B;
U = C = B = nullptr;
const unsigned int rank = ComputeAndCompress(MatrixKernel, epsilon, U, C, B);
// store into binary file
const std::string filename(getFileName(epsilon));
std::ofstream stream(filename.c_str(),
std::ios::out | std::ios::binary | std::ios::trunc);
if (stream.good()) {
stream.seekp(0);
// 1) write number of interpolation points (int)
int _nnodes = nnodes;
stream.write(reinterpret_cast<char*>(&_nnodes), sizeof(int));
// 2) write low rank (int)
int _rank = rank;
stream.write(reinterpret_cast<char*>(&_rank), sizeof(int));
// 3) write U (rank*nnodes * FReal)
stream.write(reinterpret_cast<char*>(U), sizeof(FReal)*rank*nnodes);
// 4) write B (rank*nnodes * FReal)
stream.write(reinterpret_cast<char*>(B), sizeof(FReal)*rank*nnodes);
// 5) write 343 C (343 * rank*rank * FReal)
stream.write(reinterpret_cast<char*>(C), sizeof(FReal)*rank*rank*343);
} else throw std::runtime_error("File could not be opened to write");
stream.close();
// free memory
if (U != nullptr) delete [] U;
if (B != nullptr) delete [] B;
if (C != nullptr) delete [] C;
// write info
std::cout << "Compressed M2L operators ("<< rank << ") stored in binary file " << filename
<< " in " << time.tacAndElapsed() << "sec." << std::endl;
}
template <class FReal, int ORDER, class MatrixKernelClass>
void
FChebCmpM2LHandler<FReal, ORDER, MatrixKernelClass>::ReadFromBinaryFileAndSet()
{
// measure time
FTic time; time.tic();
// start reading process
if (U||C||B) throw std::runtime_error("Compressed M2L operator already set");
const std::string filename(getFileName(epsilon));
std::ifstream stream(filename.c_str(),
std::ios::in | std::ios::binary | std::ios::ate);
const std::ifstream::pos_type size = stream.tellg();
if (size<=0) {
std::cout << "Info: The requested binary file " << filename
<< " does not yet exist. Compute it now ... " << std::endl;
this->ComputeAndCompressAndStoreInBinaryFileAndReadFromFileAndSet();
return;
}
if (stream.good()) {
stream.seekg(0);
// 1) read number of interpolation points (int)
int npts;
stream.read(reinterpret_cast<char*>(&npts), sizeof(int));
if (npts!=nnodes) throw std::runtime_error("nnodes and npts do not correspond");
// 2) read low rank (int)
stream.read(reinterpret_cast<char*>(&rank), sizeof(int));
// 3) write U (rank*nnodes * FReal)
U = new FReal [rank*nnodes];
stream.read(reinterpret_cast<char*>(U), sizeof(FReal)*rank*nnodes);
// 4) write B (rank*nnodes * FReal)
B = new FReal [rank*nnodes];
stream.read(reinterpret_cast<char*>(B), sizeof(FReal)*rank*nnodes);
// 5) write 343 C (343 * rank*rank * FReal)
C = new FReal [343 * rank*rank];
stream.read(reinterpret_cast<char*>(C), sizeof(FReal)*rank*rank*343);
} else throw std::runtime_error("File could not be opened to read");
stream.close();
// write info
std::cout << "Compressed M2L operators (" << rank << ") read from binary file "
<< filename << " in " << time.tacAndElapsed() << "sec." << std::endl;
}
/*
unsigned int ReadRankFromBinaryFile(const std::string& filename)
{
// start reading process
std::ifstream stream(filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
const std::ifstream::pos_type size = stream.tellg();
if (size<=0) throw std::runtime_error("The requested binary file does not exist.");
unsigned int rank = -1;
if (stream.good()) {
stream.seekg(0);
// 1) read number of interpolation points (int)
int npts;
stream.read(reinterpret_cast<char*>(&npts), sizeof(int));