FUnifTensorialKernel.hpp 14.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
16 17
// Keep in private GIT

18 19 20 21
#ifndef FUNIFTENSORIALKERNEL_HPP
#define FUNIFTENSORIALKERNEL_HPP

#include "../../Utils/FGlobal.hpp"
22

23 24 25
#include "../../Utils/FSmartPointer.hpp"

#include "./FAbstractUnifKernel.hpp"
26 27
#include "./FUnifM2LHandler.hpp"
#include "./FUnifTensorialM2LHandler.hpp" //PB: temporary version
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

class FTreeCoordinate;

/**
 * @author Pierre Blanchard (pierre.blanchard@inria.fr)
 * @class FUnifTensorialKernel
 * @brief
 * Please read the license
 *
 * This kernels implement the Lagrange interpolation based FMM operators. It
 * implements all interfaces (P2P,P2M,M2M,M2L,L2L,L2P) which are required by
 * the FFmmAlgorithm and FFmmAlgorithmThread.
 *
 * PB: 3 IMPORTANT remarks !!!
 *
43
 * 1) Handling tensorial kernels (DIM,NRHS,NLHS) and having multiple rhs
44
 * (NVALS) are considered 2 distinct features and are currently combined.
45
 *
46
 * 2) When it comes to applying M2L it is NOT much faster to loop over
47
 * NRHSxNLHS inside applyM2L (at least for the Lagrange case).
48 49 50
 * 2-bis) During precomputation the tensorial matrix kernels are evaluated
 * blockwise, but this is not always possible.
 * In fact, in the ChebyshevSym variant the matrix kernel needs to be
51
 * evaluated compo-by-compo since we currently use a scalar ACA.
52
 *
53 54 55
 * 3) We currently use multiple 1D FFT instead of multidim FFT since embedding
 * is circulant. Multidim FFT could be used if embedding were block circulant.
 * TODO investigate possibility of block circulant embedding
56 57 58 59 60 61
 *
 * @tparam CellClass Type of cell
 * @tparam ContainerClass Type of container to store particles
 * @tparam MatrixKernelClass Type of matrix kernel function
 * @tparam ORDER Lagrange interpolation order
 */
62
template < class FReal, class CellClass, class ContainerClass,   class MatrixKernelClass, int ORDER, int NVALS = 1>
63
class FUnifTensorialKernel
64
    : public FAbstractUnifKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
65
{
66 67 68 69
    enum {nRhs = MatrixKernelClass::NRHS,
          nLhs = MatrixKernelClass::NLHS,
          nPot = MatrixKernelClass::NPOT,
          nPV = MatrixKernelClass::NPV};
70 71 72

protected://PB: for OptiDis

73
    // private types
74
    typedef FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,MatrixKernelClass::Type> M2LHandlerClass;
75

76
    // using from
77
    typedef FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
78
    AbstractBaseClass;
79

80 81
    /// Needed for P2P and M2L operators
    const MatrixKernelClass *const MatrixKernel;
82

83 84
    /// Needed for M2L operator
    const M2LHandlerClass M2LHandler;
85

86 87 88
    /// Leaf level separation criterion
    const int LeafLevelSeparationCriterion;

89
public:
90 91 92 93 94 95 96
    /**
     * The constructor initializes all constant attributes and it reads the
     * precomputed and compressed M2L operators from a binary file (an
     * runtime_error is thrown if the required file is not valid).
     */
    FUnifTensorialKernel(const int inTreeHeight,
                         const FReal inBoxWidth,
97
                         const FPoint<FReal>& inBoxCenter,
98
                         const MatrixKernelClass *const inMatrixKernel,
99 100
                         const FReal inBoxWidthExtension,
                         const int inLeafLevelSeparationCriterion = 1)
101
    : FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter,inBoxWidthExtension),
102 103
      MatrixKernel(inMatrixKernel),
      M2LHandler(MatrixKernel,
104
                 inTreeHeight,
105
                 inBoxWidth,
106
                 inBoxWidthExtension,
107
                 inLeafLevelSeparationCriterion),
108
      LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
109 110 111
    { }


112 113 114
    template<class SymbolicData>
    void P2M(typename CellClass::multipole_t* const LeafMultipole,
             const SymbolicData* const LeafSymbData,
115 116
             const ContainerClass* const SourceParticles)
    {
117 118
        const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafSymbData->getCoordinate()));
        const FReal ExtendedLeafCellWidth(AbstractBaseClass::BoxWidthLeaf
119 120 121 122 123 124
                                          + AbstractBaseClass::BoxWidthExtension);

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){

            // 1) apply Sy
            AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, ExtendedLeafCellWidth,
125
                                                      LeafMultipole->getMultipole(idxV*nRhs), SourceParticles);
126 127 128 129 130 131

            for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
                // update multipole index
                int idxMul = idxV*nRhs + idxRhs;

                // 2) apply Discrete Fourier Transform
132 133
                M2LHandler.applyZeroPaddingAndDFT(LeafMultipole->getMultipole(idxMul),
                                                  LeafMultipole->getTransformedMultipole(idxMul));
134

135
            }
136 137 138 139
        }// NVALS
    }


140 141 142 143 144
    template<class SymbolicData>
    void M2M(typename CellClass::multipole_t* const FRestrict ParentMultipole,
             const SymbolicData* const ParentSymb,
             const typename CellClass::multipole_t*const FRestrict *const FRestrict ChildMultipoles,
             const SymbolicData* const /*ChildSymbs*/[])
145 146 147 148 149 150 151
    {
        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
                // update multipole index
                int idxMul = idxV*nRhs + idxRhs;

                // 1) apply Sy
152
                FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentMultipole->getMultipole(idxMul));
153
                for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
154 155 156 157 158 159
                    if (ChildMultipoles[ChildIndex]){
                        AbstractBaseClass::Interpolator->applyM2M(
                            ChildIndex,
                            ChildMultipoles[ChildIndex]->getMultipole(idxMul),
                            ParentMultipole->getMultipole(idxMul),
                            ParentSymb->getLevel()/*Cell width extension specific*/);
160 161 162
                    }
                }
                // 2) Apply Discete Fourier Transform
163 164
                M2LHandler.applyZeroPaddingAndDFT(ParentMultipole->getMultipole(idxMul),
                                                  ParentMultipole->getTransformedMultipole(idxMul));
165 166 167 168 169
            }
        }// NVALS
    }


170 171 172 173 174 175 176 177 178
    template<class SymbolicData>
    void M2L(typename CellClass::local_expansion_t * const FRestrict TargetExpansion,
             const SymbolicData* const TargetSymb,
             const typename CellClass::multipole_t * const FRestrict SourceMultipoles[],
             const SymbolicData* const FRestrict /*SourceSymbs*/[],
             const int neighborPositions[],
             const int inSize)
    {
        const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(1 << TargetSymb->getLevel()));
179 180 181 182 183 184 185 186 187 188
        const FReal ExtendedCellWidth(CellWidth + AbstractBaseClass::BoxWidthExtension);
        const FReal scale(MatrixKernel->getScaleFactor(ExtendedCellWidth));

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){

                // update local index
                const int idxLoc = idxV*nLhs + idxLhs;

                // load transformed local expansion
189
                FComplex<FReal> *const TransformedLocalExpansion = TargetExpansion->getTransformedLocal(idxLoc);
190 191

                // update idxRhs
192
                const int idxRhs = idxLhs % nPV;
193 194 195 196 197 198 199

                // update multipole index
                const int idxMul = idxV*nRhs + idxRhs;

                // get index in matrix kernel
                const unsigned int d = MatrixKernel->getPosition(idxLhs);

200 201
                for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
                    const int idx = neighborPositions[idxExistingNeigh];
202

203 204
                    M2LHandler.applyFC(idx, static_cast<int>(TargetSymb->getLevel()), scale, d,
                                       SourceMultipoles[idxExistingNeigh]->getTransformedMultipole(idxMul),
205
                                       TransformedLocalExpansion);
206 207 208 209 210 211
                }
            }// NLHS=NPOT*NPV
        }// NVALS
    }


212 213 214 215 216
    template<class SymbolicData>
    void L2L(const typename CellClass::local_expansion_t * const FRestrict ParentExpansion,
             const SymbolicData* const ParentSymb,
             typename CellClass::local_expansion_t * FRestrict * const FRestrict ChildExpansions,
             const SymbolicData* const /*ChildSymbs*/[])
217 218 219 220 221
    {
        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
                int idxLoc = idxV*nLhs + idxLhs;
                // 1) Apply Inverse Discete Fourier Transform
222 223 224
                M2LHandler.unapplyZeroPaddingAndDFT(
                    ParentExpansion->getTransformedLocal(idxLoc),
                    const_cast<typename CellClass::local_expansion_t*>(ParentExpansion)->getLocal(idxLoc));
225 226
                // 2) apply Sx
                for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
227 228 229 230 231 232
                    if (ChildExpansions[ChildIndex]){
                        AbstractBaseClass::Interpolator->applyL2L(
                            ChildIndex,
                            ParentExpansion->getLocal(idxLoc),
                            ChildExpansions[ChildIndex]->getLocal(idxLoc),
                            static_cast<int>(ParentSymb->getLevel())/*Cell width extension specific*/);
233 234 235 236 237 238
                    }
                }
            }
        }// NVALS
    }

239 240 241
    template<class SymbolicData>
    void L2P(const typename CellClass::local_expansion_t* const LeafExpansion,
             const SymbolicData* const LeafSymbData,
242 243
             ContainerClass* const TargetParticles)
    {
244 245
        const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafSymbData->getCoordinate()));
        const FReal ExtendedLeafCellWidth(AbstractBaseClass::BoxWidthLeaf
246 247 248 249 250 251
                                          + AbstractBaseClass::BoxWidthExtension);

        for(int idxV = 0 ; idxV < NVALS ; ++idxV){
            for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
                int idxLoc = idxV*nLhs + idxLhs;
                // 1)  Apply Inverse Discete Fourier Transform
252 253 254
                M2LHandler.unapplyZeroPaddingAndDFT(
                    LeafExpansion->getTransformedLocal(idxLoc),
                    const_cast<typename CellClass::local_expansion_t*>(LeafExpansion)->getLocal(idxLoc));
255 256 257 258 259

            }

            // 2.a) apply Sx
            AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, ExtendedLeafCellWidth,
260
                                                      LeafExpansion->getLocal(idxV*nLhs), TargetParticles);
261 262 263

            // 2.b) apply Px (grad Sx)
            AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, ExtendedLeafCellWidth,
264
                                                              LeafExpansion->getLocal(idxV*nLhs), TargetParticles);
265 266 267 268

        }// NVALS
    }

269
    void P2P(const FTreeCoordinate& inPosition,
270
             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
271 272
             ContainerClass* const inNeighbors[], const int neighborPositions[],
             const int inSize) override {
273 274 275 276 277 278 279 280 281 282 283
        // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
        if(LeafLevelSeparationCriterion==1) {
            if(inTargets == inSources){
                P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
            }
            else{
                const ContainerClass* const srcPtr[1] = {inSources};
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
            }
284
        }
285 286 287
        // Nearfield interactions are only computed within the target leaf
        else if(LeafLevelSeparationCriterion==0){
            DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
288
        }
289
        // If criterion equals -1 then no P2P need to be performed.
290 291 292 293 294 295
    }

    void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
             ContainerClass* const FRestrict inTargets,
             ContainerClass* const inNeighbors[], const int neighborPositions[],
             const int inSize) override {
296 297 298 299 300 301
        int nbNeighborsToCompute = 0;
        while(nbNeighborsToCompute < inSize
              && neighborPositions[nbNeighborsToCompute] < 14){
            nbNeighborsToCompute += 1;
        }
        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2P(inTargets,inNeighbors,nbNeighborsToCompute,MatrixKernel);
302 303 304 305 306
    }


    void P2PRemote(const FTreeCoordinate& /*inPosition*/,
                   ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
307
                   const ContainerClass* const inNeighbors[], const int /*neighborPositions*/[],
308
                   const int inSize) override {
309
        // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
310
        if(LeafLevelSeparationCriterion==1)
311 312
            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
        // Nearfield interactions are only computed within the target leaf
313
        if(LeafLevelSeparationCriterion==0)
314
            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,0,MatrixKernel);
315
        // If criterion equals -1 then no P2P need to be performed.
316
    }
317 318 319 320

};


321
#endif //FUNIFTENSORIALKERNEL_HPP
322 323

// [--END--]