FUnifTensorialKernel.hpp 13.5 KB
Newer Older
1
// See LICENCE file at project root
2 3
// Keep in private GIT

4 5 6 7
#ifndef FUNIFTENSORIALKERNEL_HPP
#define FUNIFTENSORIALKERNEL_HPP

#include "../../Utils/FGlobal.hpp"
8

9 10 11
#include "../../Utils/FSmartPointer.hpp"

#include "./FAbstractUnifKernel.hpp"
12 13
#include "./FUnifM2LHandler.hpp"
#include "./FUnifTensorialM2LHandler.hpp" //PB: temporary version
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28

class FTreeCoordinate;

/**
 * @author Pierre Blanchard (pierre.blanchard@inria.fr)
 * @class FUnifTensorialKernel
 * @brief
 * Please read the license
 *
 * This kernels implement the Lagrange interpolation based FMM operators. It
 * implements all interfaces (P2P,P2M,M2M,M2L,L2L,L2P) which are required by
 * the FFmmAlgorithm and FFmmAlgorithmThread.
 *
 * PB: 3 IMPORTANT remarks !!!
 *
29
 * 1) Handling tensorial kernels (DIM,NRHS,NLHS) and having multiple rhs
30
 * (NVALS) are considered 2 distinct features and are currently combined.
31
 *
32
 * 2) When it comes to applying M2L it is NOT much faster to loop over
33
 * NRHSxNLHS inside applyM2L (at least for the Lagrange case).
34 35 36
 * 2-bis) During precomputation the tensorial matrix kernels are evaluated
 * blockwise, but this is not always possible.
 * In fact, in the ChebyshevSym variant the matrix kernel needs to be
37
 * evaluated compo-by-compo since we currently use a scalar ACA.
38
 *
39 40 41
 * 3) We currently use multiple 1D FFT instead of multidim FFT since embedding
 * is circulant. Multidim FFT could be used if embedding were block circulant.
 * TODO investigate possibility of block circulant embedding
42 43 44 45 46 47
 *
 * @tparam CellClass Type of cell
 * @tparam ContainerClass Type of container to store particles
 * @tparam MatrixKernelClass Type of matrix kernel function
 * @tparam ORDER Lagrange interpolation order
 */
48
template < class FReal, class CellClass, class ContainerClass,   class MatrixKernelClass, int ORDER, int NVALS = 1>
49
class FUnifTensorialKernel
50
    : public FAbstractUnifKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
51
{
52 53 54 55
    enum {nRhs = MatrixKernelClass::NRHS,
          nLhs = MatrixKernelClass::NLHS,
          nPot = MatrixKernelClass::NPOT,
          nPV = MatrixKernelClass::NPV};
56 57 58

protected://PB: for OptiDis

59
    // private types
60
    typedef FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,MatrixKernelClass::Type> M2LHandlerClass;
61

62
    // using from
63
    typedef FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
64
    AbstractBaseClass;
65

66 67
    /// Needed for P2P and M2L operators
    const MatrixKernelClass *const MatrixKernel;
68

69 70
    /// Needed for M2L operator
    const M2LHandlerClass M2LHandler;
71

72 73 74
    /// Leaf level separation criterion
    const int LeafLevelSeparationCriterion;

75
public:
76 77 78 79 80 81 82
    /**
     * The constructor initializes all constant attributes and it reads the
     * precomputed and compressed M2L operators from a binary file (an
     * runtime_error is thrown if the required file is not valid).
     */
    FUnifTensorialKernel(const int inTreeHeight,
                         const FReal inBoxWidth,
83
                         const FPoint<FReal>& inBoxCenter,
84
                         const MatrixKernelClass *const inMatrixKernel,
85 86
                         const FReal inBoxWidthExtension,
                         const int inLeafLevelSeparationCriterion = 1)
87
    : FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter,inBoxWidthExtension),
88 89
      MatrixKernel(inMatrixKernel),
      M2LHandler(MatrixKernel,
90
                 inTreeHeight,
91
                 inBoxWidth,
92
                 inBoxWidthExtension,
93
                 inLeafLevelSeparationCriterion),
94
      LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
95 96 97
    { }


98 99 100
    template<class SymbolicData>
    void P2M(typename CellClass::multipole_t* const LeafMultipole,
             const SymbolicData* const LeafSymbData,
101 102
             const ContainerClass* const SourceParticles)
    {
103 104
        const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafSymbData->getCoordinate()));
        const FReal ExtendedLeafCellWidth(AbstractBaseClass::BoxWidthLeaf
105 106 107 108 109 110
                                          + AbstractBaseClass::BoxWidthExtension);

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){

            // 1) apply Sy
            AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, ExtendedLeafCellWidth,
111
                                                      LeafMultipole->get(idxV*nRhs), SourceParticles);
112 113 114 115 116 117

            for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
                // update multipole index
                int idxMul = idxV*nRhs + idxRhs;

                // 2) apply Discrete Fourier Transform
118 119
                M2LHandler.applyZeroPaddingAndDFT(LeafMultipole->get(idxMul),
                                                  LeafMultipole->getTransformed(idxMul));
120

121
            }
122 123 124 125
        }// NVALS
    }


126 127 128 129 130
    template<class SymbolicData>
    void M2M(typename CellClass::multipole_t* const FRestrict ParentMultipole,
             const SymbolicData* const ParentSymb,
             const typename CellClass::multipole_t*const FRestrict *const FRestrict ChildMultipoles,
             const SymbolicData* const /*ChildSymbs*/[])
131 132 133 134 135 136 137
    {
        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
                // update multipole index
                int idxMul = idxV*nRhs + idxRhs;

                // 1) apply Sy
138
                FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentMultipole->get(idxMul));
139
                for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
140 141 142
                    if (ChildMultipoles[ChildIndex]){
                        AbstractBaseClass::Interpolator->applyM2M(
                            ChildIndex,
143 144
                            ChildMultipoles[ChildIndex]->get(idxMul),
                            ParentMultipole->get(idxMul),
145
                            ParentSymb->getLevel()/*Cell width extension specific*/);
146 147 148
                    }
                }
                // 2) Apply Discete Fourier Transform
149 150
                M2LHandler.applyZeroPaddingAndDFT(ParentMultipole->get(idxMul),
                                                  ParentMultipole->getTransformed(idxMul));
151 152 153 154 155
            }
        }// NVALS
    }


156 157 158 159 160 161 162 163 164
    template<class SymbolicData>
    void M2L(typename CellClass::local_expansion_t * const FRestrict TargetExpansion,
             const SymbolicData* const TargetSymb,
             const typename CellClass::multipole_t * const FRestrict SourceMultipoles[],
             const SymbolicData* const FRestrict /*SourceSymbs*/[],
             const int neighborPositions[],
             const int inSize)
    {
        const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(1 << TargetSymb->getLevel()));
165 166 167 168 169 170 171 172 173 174
        const FReal ExtendedCellWidth(CellWidth + AbstractBaseClass::BoxWidthExtension);
        const FReal scale(MatrixKernel->getScaleFactor(ExtendedCellWidth));

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){

                // update local index
                const int idxLoc = idxV*nLhs + idxLhs;

                // load transformed local expansion
175
                FComplex<FReal> *const TransformedLocalExpansion = TargetExpansion->getTransformed(idxLoc);
176 177

                // update idxRhs
178
                const int idxRhs = idxLhs % nPV;
179 180 181 182 183 184 185

                // update multipole index
                const int idxMul = idxV*nRhs + idxRhs;

                // get index in matrix kernel
                const unsigned int d = MatrixKernel->getPosition(idxLhs);

186 187
                for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
                    const int idx = neighborPositions[idxExistingNeigh];
188

189
                    M2LHandler.applyFC(idx, static_cast<int>(TargetSymb->getLevel()), scale, d,
190
                                       SourceMultipoles[idxExistingNeigh]->getTransformed(idxMul),
191
                                       TransformedLocalExpansion);
192 193 194 195 196 197
                }
            }// NLHS=NPOT*NPV
        }// NVALS
    }


198 199 200 201 202
    template<class SymbolicData>
    void L2L(const typename CellClass::local_expansion_t * const FRestrict ParentExpansion,
             const SymbolicData* const ParentSymb,
             typename CellClass::local_expansion_t * FRestrict * const FRestrict ChildExpansions,
             const SymbolicData* const /*ChildSymbs*/[])
203 204 205 206 207
    {
        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
                int idxLoc = idxV*nLhs + idxLhs;
                // 1) Apply Inverse Discete Fourier Transform
208
                M2LHandler.unapplyZeroPaddingAndDFT(
209 210
                    ParentExpansion->getTransformed(idxLoc),
                    const_cast<typename CellClass::local_expansion_t*>(ParentExpansion)->get(idxLoc));
211 212
                // 2) apply Sx
                for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
213 214 215
                    if (ChildExpansions[ChildIndex]){
                        AbstractBaseClass::Interpolator->applyL2L(
                            ChildIndex,
216 217
                            ParentExpansion->get(idxLoc),
                            ChildExpansions[ChildIndex]->get(idxLoc),
218
                            static_cast<int>(ParentSymb->getLevel())/*Cell width extension specific*/);
219 220 221 222 223 224
                    }
                }
            }
        }// NVALS
    }

225 226 227
    template<class SymbolicData>
    void L2P(const typename CellClass::local_expansion_t* const LeafExpansion,
             const SymbolicData* const LeafSymbData,
228 229
             ContainerClass* const TargetParticles)
    {
230 231
        const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafSymbData->getCoordinate()));
        const FReal ExtendedLeafCellWidth(AbstractBaseClass::BoxWidthLeaf
232 233 234 235 236 237
                                          + AbstractBaseClass::BoxWidthExtension);

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
                int idxLoc = idxV*nLhs + idxLhs;
                // 1)  Apply Inverse Discete Fourier Transform
238
                M2LHandler.unapplyZeroPaddingAndDFT(
239 240
                    LeafExpansion->getTransformed(idxLoc),
                    const_cast<typename CellClass::local_expansion_t*>(LeafExpansion)->get(idxLoc));
241 242 243 244 245

            }

            // 2.a) apply Sx
            AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, ExtendedLeafCellWidth,
246
                                                      LeafExpansion->get(idxV*nLhs), TargetParticles);
247 248 249

            // 2.b) apply Px (grad Sx)
            AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, ExtendedLeafCellWidth,
250
                                                              LeafExpansion->get(idxV*nLhs), TargetParticles);
251 252 253 254

        }// NVALS
    }

255
    void P2P(const FTreeCoordinate& inPosition,
256
             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
257 258
             ContainerClass* const inNeighbors[], const int neighborPositions[],
             const int inSize) override {
259 260 261 262 263 264 265 266 267 268 269
        // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
        if(LeafLevelSeparationCriterion==1) {
            if(inTargets == inSources){
                P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
            }
            else{
                const ContainerClass* const srcPtr[1] = {inSources};
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
            }
270
        }
271 272 273
        // Nearfield interactions are only computed within the target leaf
        else if(LeafLevelSeparationCriterion==0){
            DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
274
        }
275
        // If criterion equals -1 then no P2P need to be performed.
276 277 278 279 280 281
    }

    void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
             ContainerClass* const FRestrict inTargets,
             ContainerClass* const inNeighbors[], const int neighborPositions[],
             const int inSize) override {
282 283 284 285 286 287
        int nbNeighborsToCompute = 0;
        while(nbNeighborsToCompute < inSize
              && neighborPositions[nbNeighborsToCompute] < 14){
            nbNeighborsToCompute += 1;
        }
        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2P(inTargets,inNeighbors,nbNeighborsToCompute,MatrixKernel);
288 289 290 291 292
    }


    void P2PRemote(const FTreeCoordinate& /*inPosition*/,
                   ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
293
                   const ContainerClass* const inNeighbors[], const int /*neighborPositions*/[],
294
                   const int inSize) override {
295
        // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
296
        if(LeafLevelSeparationCriterion==1)
297 298
            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
        // Nearfield interactions are only computed within the target leaf
299
        if(LeafLevelSeparationCriterion==0)
300
            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,0,MatrixKernel);
301
        // If criterion equals -1 then no P2P need to be performed.
302
    }
303 304 305 306

};


307
#endif //FUNIFTENSORIALKERNEL_HPP
308 309

// [--END--]