FUnifTensorialKernel.hpp 13.8 KB
Newer Older
1
// ===================================================================================
2 3 4 5
// Copyright ScalFmm 2016 INRIA, Olivier Coulaud, Bérenger Bramas,
// Matthias Messner olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the
// FMM.
6
//
7
// This software is governed by the CeCILL-C and LGPL licenses and
8
// abiding by the rules of distribution of free software.
9 10 11
// An extension to the license is given to allow static linking of scalfmm
// inside a proprietary application (no matter its license).
// See the main license file for more details.
12 13 14 15
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 17 18
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
19
// ===================================================================================
20 21
// Keep in private GIT

22 23 24 25
#ifndef FUNIFTENSORIALKERNEL_HPP
#define FUNIFTENSORIALKERNEL_HPP

#include "../../Utils/FGlobal.hpp"
26

27 28 29
#include "../../Utils/FSmartPointer.hpp"

#include "./FAbstractUnifKernel.hpp"
30 31
#include "./FUnifM2LHandler.hpp"
#include "./FUnifTensorialM2LHandler.hpp" //PB: temporary version
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46

class FTreeCoordinate;

/**
 * @author Pierre Blanchard (pierre.blanchard@inria.fr)
 * @class FUnifTensorialKernel
 * @brief
 * Please read the license
 *
 * This kernels implement the Lagrange interpolation based FMM operators. It
 * implements all interfaces (P2P,P2M,M2M,M2L,L2L,L2P) which are required by
 * the FFmmAlgorithm and FFmmAlgorithmThread.
 *
 * PB: 3 IMPORTANT remarks !!!
 *
47 48
 * 1) Handling tensorial kernels (DIM,NRHS,NLHS) and having multiple rhs 
 * (NVALS) are considered 2 distinct features and are currently combined.
49
 *
50 51 52 53 54 55
 * 2) When it comes to applying M2L it is NOT much faster to loop over 
 * NRHSxNLHS inside applyM2L (at least for the Lagrange case).
 * 2-bis) During precomputation the tensorial matrix kernels are evaluated 
 * blockwise, but this is not always possible. 
 * In fact, in the ChebyshevSym variant the matrix kernel needs to be 
 * evaluated compo-by-compo since we currently use a scalar ACA.
56
 *
57 58 59
 * 3) We currently use multiple 1D FFT instead of multidim FFT since embedding
 * is circulant. Multidim FFT could be used if embedding were block circulant.
 * TODO investigate possibility of block circulant embedding
60 61 62 63 64 65
 *
 * @tparam CellClass Type of cell
 * @tparam ContainerClass Type of container to store particles
 * @tparam MatrixKernelClass Type of matrix kernel function
 * @tparam ORDER Lagrange interpolation order
 */
66
template < class FReal, class CellClass, class ContainerClass,   class MatrixKernelClass, int ORDER, int NVALS = 1>
67
class FUnifTensorialKernel
68
    : public FAbstractUnifKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
69
{
70 71 72 73
    enum {nRhs = MatrixKernelClass::NRHS,
          nLhs = MatrixKernelClass::NLHS,
          nPot = MatrixKernelClass::NPOT,
          nPV = MatrixKernelClass::NPV};
74 75 76

protected://PB: for OptiDis

77
    // private types
78
    typedef FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,MatrixKernelClass::Type> M2LHandlerClass;
79

80
    // using from
81
    typedef FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
82
    AbstractBaseClass;
83

84 85
    /// Needed for P2P and M2L operators
    const MatrixKernelClass *const MatrixKernel;
86

87 88
    /// Needed for M2L operator
    const M2LHandlerClass M2LHandler;
89

90 91 92
    /// Leaf level separation criterion
    const int LeafLevelSeparationCriterion;

93
public:
94 95 96 97 98 99 100
    /**
     * The constructor initializes all constant attributes and it reads the
     * precomputed and compressed M2L operators from a binary file (an
     * runtime_error is thrown if the required file is not valid).
     */
    FUnifTensorialKernel(const int inTreeHeight,
                         const FReal inBoxWidth,
101
                         const FPoint<FReal>& inBoxCenter,
102
                         const MatrixKernelClass *const inMatrixKernel,
103 104
                         const FReal inBoxWidthExtension,
                         const int inLeafLevelSeparationCriterion = 1)
105
    : FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter,inBoxWidthExtension),
106 107
      MatrixKernel(inMatrixKernel),
      M2LHandler(MatrixKernel,
108
                 inTreeHeight,
109
                 inBoxWidth,
110 111 112
                 inBoxWidthExtension,
                 inLeafLevelSeparationCriterion), 
      LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
113 114 115 116 117 118
    { }


    void P2M(CellClass* const LeafCell,
             const ContainerClass* const SourceParticles)
    {
119
        const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
        const FReal ExtendedLeafCellWidth(AbstractBaseClass::BoxWidthLeaf 
                                          + AbstractBaseClass::BoxWidthExtension);

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){

            // 1) apply Sy
            AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, ExtendedLeafCellWidth,
                                                      LeafCell->getMultipole(idxV*nRhs), SourceParticles);

            for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
                // update multipole index
                int idxMul = idxV*nRhs + idxRhs;

                // 2) apply Discrete Fourier Transform
                M2LHandler.applyZeroPaddingAndDFT(LeafCell->getMultipole(idxMul), 
                                                  LeafCell->getTransformedMultipole(idxMul));
136

137
            }
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
        }// NVALS
    }


    void M2M(CellClass* const FRestrict ParentCell,
             const CellClass*const FRestrict *const FRestrict ChildCells,
             const int TreeLevel)
    {
        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
                // update multipole index
                int idxMul = idxV*nRhs + idxRhs;

                // 1) apply Sy
                FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentCell->getMultipole(idxMul));
                for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
                    if (ChildCells[ChildIndex]){
                        AbstractBaseClass::Interpolator->applyM2M(ChildIndex, 
                                                                  ChildCells[ChildIndex]->getMultipole(idxMul),
                                                                  ParentCell->getMultipole(idxMul), 
                                                                  TreeLevel/*Cell width extension specific*/);
                    }
                }
                // 2) Apply Discete Fourier Transform
                M2LHandler.applyZeroPaddingAndDFT(ParentCell->getMultipole(idxMul), 
                                                  ParentCell->getTransformedMultipole(idxMul));
            }
        }// NVALS
    }


169 170
    void M2L(CellClass* const FRestrict TargetCell, const CellClass* SourceCells[],
             const int neighborPositions[], const int inSize, const int TreeLevel)  override {
171 172 173 174 175 176 177 178 179 180 181
        const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel)));
        const FReal ExtendedCellWidth(CellWidth + AbstractBaseClass::BoxWidthExtension);
        const FReal scale(MatrixKernel->getScaleFactor(ExtendedCellWidth));

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){

                // update local index
                const int idxLoc = idxV*nLhs + idxLhs;

                // load transformed local expansion
182
                FComplex<FReal> *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc);
183 184 185 186 187 188 189 190 191 192

                // update idxRhs
                const int idxRhs = idxLhs % nPV; 

                // update multipole index
                const int idxMul = idxV*nRhs + idxRhs;

                // get index in matrix kernel
                const unsigned int d = MatrixKernel->getPosition(idxLhs);

193 194
                for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
                    const int idx = neighborPositions[idxExistingNeigh];
195

196 197 198
                    M2LHandler.applyFC(idx, TreeLevel, scale, d,
                                       SourceCells[idxExistingNeigh]->getTransformedMultipole(idxMul),
                                       TransformedLocalExpansion);
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231

                }
            }// NLHS=NPOT*NPV
        }// NVALS
    }


    void L2L(const CellClass* const FRestrict ParentCell,
             CellClass* FRestrict *const FRestrict ChildCells,
             const int TreeLevel)
    {
        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
                int idxLoc = idxV*nLhs + idxLhs;
                // 1) Apply Inverse Discete Fourier Transform
                M2LHandler.unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxLoc),
                                                    const_cast<CellClass*>(ParentCell)->getLocal(idxLoc));
                // 2) apply Sx
                for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
                    if (ChildCells[ChildIndex]){
                        AbstractBaseClass::Interpolator->applyL2L(ChildIndex, 
                                                                  ParentCell->getLocal(idxLoc), 
                                                                  ChildCells[ChildIndex]->getLocal(idxLoc),
                                                                  TreeLevel/*Cell width extension specific*/);
                    }
                }
            }
        }// NVALS
    }

    void L2P(const CellClass* const LeafCell,
             ContainerClass* const TargetParticles)
    {
232
        const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
        const FReal ExtendedLeafCellWidth(AbstractBaseClass::BoxWidthLeaf 
                                          + AbstractBaseClass::BoxWidthExtension);

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
                int idxLoc = idxV*nLhs + idxLhs;
                // 1)  Apply Inverse Discete Fourier Transform
                M2LHandler.unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxLoc), 
                                                    const_cast<CellClass*>(LeafCell)->getLocal(idxLoc));

            }

            // 2.a) apply Sx
            AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, ExtendedLeafCellWidth,
                                                      LeafCell->getLocal(idxV*nLhs), TargetParticles);

            // 2.b) apply Px (grad Sx)
            AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, ExtendedLeafCellWidth,
                                                              LeafCell->getLocal(idxV*nLhs), TargetParticles);

        }// NVALS
    }

256
    void P2P(const FTreeCoordinate& inPosition,
257
             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
258 259
             ContainerClass* const inNeighbors[], const int neighborPositions[],
             const int inSize) override {
260 261 262 263 264 265 266 267 268 269 270
        // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
        if(LeafLevelSeparationCriterion==1) {
            if(inTargets == inSources){
                P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
            }
            else{
                const ContainerClass* const srcPtr[1] = {inSources};
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
            }
271
        }
272 273 274
        // Nearfield interactions are only computed within the target leaf
        else if(LeafLevelSeparationCriterion==0){
            DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
275
        }
276
        // If criterion equals -1 then no P2P need to be performed.
277 278 279 280 281 282
    }

    void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
             ContainerClass* const FRestrict inTargets,
             ContainerClass* const inNeighbors[], const int neighborPositions[],
             const int inSize) override {
283 284 285 286 287 288
        int nbNeighborsToCompute = 0;
        while(nbNeighborsToCompute < inSize
              && neighborPositions[nbNeighborsToCompute] < 14){
            nbNeighborsToCompute += 1;
        }
        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2P(inTargets,inNeighbors,nbNeighborsToCompute,MatrixKernel);
289 290 291 292 293
    }


    void P2PRemote(const FTreeCoordinate& /*inPosition*/,
                   ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
294
                   const ContainerClass* const inNeighbors[], const int /*neighborPositions*/[],
295
                   const int inSize) override {
296 297 298 299 300 301 302
        // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
        if(LeafLevelSeparationCriterion==1) 
            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
        // Nearfield interactions are only computed within the target leaf
        if(LeafLevelSeparationCriterion==0) 
            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,0,MatrixKernel);
        // If criterion equals -1 then no P2P need to be performed.        
303
    }
304 305 306 307

};


308
#endif //FUNIFTENSORIALKERNEL_HPP
309 310

// [--END--]