FUnifTensorialKernel.hpp 9.91 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FUNIFTENSORIALKERNEL_HPP
#define FUNIFTENSORIALKERNEL_HPP

#include "../../Utils/FGlobal.hpp"
#include "../../Utils/FTrace.hpp"
#include "../../Utils/FSmartPointer.hpp"

#include "./FAbstractUnifKernel.hpp"
24 25
#include "./FUnifM2LHandler.hpp"
#include "./FUnifTensorialM2LHandler.hpp" //PB: temporary version
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

class FTreeCoordinate;

/**
 * @author Pierre Blanchard (pierre.blanchard@inria.fr)
 * @class FUnifTensorialKernel
 * @brief
 * Please read the license
 *
 * This kernels implement the Lagrange interpolation based FMM operators. It
 * implements all interfaces (P2P,P2M,M2M,M2L,L2L,L2P) which are required by
 * the FFmmAlgorithm and FFmmAlgorithmThread.
 *
 * PB: 3 IMPORTANT remarks !!!
 *
41 42
 * 1) Handling tensorial kernels (DIM,NRHS,NLHS) and having multiple rhs (NVALS) 
 * are considered 2 separate features and are currently combined.
43 44 45 46 47
 *
 * 2) The present tensorial version is the most naive one. All tensorial aspects 
 * are handled in the kernel. A more optimal version would be to consider looping 
 * over nRhs/nLhs inside Interpolator::applyP2M/L2P in order to avoid extra 
 * evaluation of the interpolating polynomials. When it comes to applying M2L it is 
48
 * NOT much faster to loop over NRHSxNLHS inside applyM2L (at least for the Lagrange case)
49
 * 2-bis) The evaluation of the kernel matrix (see M2LHandler) should be done at once 
50 51
 * instead of compo-by-compo (TODO). On the other hand, the ChebyshevSym tensorial kernel 
 * requires the matrix kernel to be evaluated compo-by-compo since we currently use a scalar ACA.
52 53 54 55 56 57 58 59 60 61 62 63 64 65
 *
 * 3) We currently use multiple 1D FFT instead of multidim FFT. 
 * TODO switch to multidim if relevant in considered range of size 
 * (see testFFTW and testFFTWMultidim).
 *
 * @tparam CellClass Type of cell
 * @tparam ContainerClass Type of container to store particles
 * @tparam MatrixKernelClass Type of matrix kernel function
 * @tparam ORDER Lagrange interpolation order
 */
template < class CellClass,	class ContainerClass,	class MatrixKernelClass, int ORDER, int NVALS = 1>
class FUnifTensorialKernel
  : public FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
{
66
  enum {nRhs = MatrixKernelClass::NRHS,
67
        nLhs = MatrixKernelClass::NLHS};
68 69 70

protected://PB: for OptiDis

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
  // private types
  typedef FUnifTensorialM2LHandler<ORDER,MatrixKernelClass> M2LHandlerClass;

  // using from
  typedef FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
  AbstractBaseClass;

  /// Needed for M2L operator
  FSmartPointer<  M2LHandlerClass,FSmartPointerMemory> M2LHandler;

public:
  /**
   * The constructor initializes all constant attributes and it reads the
   * precomputed and compressed M2L operators from a binary file (an
   * runtime_error is thrown if the required file is not valid).
   */
  FUnifTensorialKernel(const int inTreeHeight,
88 89
                       const FReal inBoxWidth,
                       const FPoint& inBoxCenter)
90
    : FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,
91 92
                                                                                       inBoxWidth,
                                                                                       inBoxCenter),
93 94 95 96 97 98 99 100 101 102 103 104
      M2LHandler(new M2LHandlerClass())
  {
    // read precomputed compressed m2l operators from binary file
    //M2LHandler->ReadFromBinaryFileAndSet(); // PB: TODO?
    M2LHandler->ComputeAndSet();
  }


  void P2M(CellClass* const LeafCell,
           const ContainerClass* const SourceParticles)
  {
    const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); 
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    for(int idxV = 0 ; idxV < NVALS ; ++idxV){
      for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
        // update multipole index
        int idxMul = idxV*nRhs + idxRhs;

        // 1) apply Sy
        AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
                                                  LeafCell->getMultipole(idxMul), SourceParticles);

        // 2) apply Discrete Fourier Transform
        M2LHandler->applyZeroPaddingAndDFT(LeafCell->getMultipole(idxMul), 
                                           LeafCell->getTransformedMultipole(idxMul));

      }
    }// NVALS
120 121 122 123 124 125 126
  }


  void M2M(CellClass* const FRestrict ParentCell,
           const CellClass*const FRestrict *const FRestrict ChildCells,
           const int /*TreeLevel*/)
  {
127 128 129 130 131 132 133 134 135 136 137 138
    for(int idxV = 0 ; idxV < NVALS ; ++idxV){
      for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
        // update multipole index
        int idxMul = idxV*nRhs + idxRhs;

        // 1) apply Sy
        FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentCell->getMultipole(idxMul));
        for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
          if (ChildCells[ChildIndex]){
            AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxMul),
                                                      ParentCell->getMultipole(idxMul));
          }
139
        }
140 141 142
        // 2) Apply Discete Fourier Transform
        M2LHandler->applyZeroPaddingAndDFT(ParentCell->getMultipole(idxMul), 
                                           ParentCell->getTransformedMultipole(idxMul));
143
      }
144
    }// NVALS
145 146 147 148 149 150 151 152
  }


  void M2L(CellClass* const FRestrict TargetCell,
           const CellClass* SourceCells[343],
           const int /*NumSourceCells*/,
           const int TreeLevel)
  {
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
    const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel)));

    for(int idxV = 0 ; idxV < NVALS ; ++idxV){
      for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){
        // update local index
        int idxLoc = idxV*nLhs + idxLhs;
        // load transformed local expansion
        FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc);

        for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){
          // update multipole index
          int idxMul = idxV*nRhs + idxRhs;
          // update kernel index such that: x_i = K_{ij}y_j 
          int idxK = idxLhs*nRhs + idxRhs;

          for (int idx=0; idx<343; ++idx){
            if (SourceCells[idx]){
              M2LHandler->applyFC(idx, CellWidth, 
                                  SourceCells[idx]->getTransformedMultipole(idxMul),
                                  TransformedLocalExpansion,idxK);

            }
175
          }
176 177 178
        }// NRHS
      }// NLHS
    }// NVALS
179 180 181 182 183 184 185
  }


  void L2L(const CellClass* const FRestrict ParentCell,
           CellClass* FRestrict *const FRestrict ChildCells,
           const int /*TreeLevel*/)
  {
186 187 188 189 190 191 192 193 194 195 196
    for(int idxV = 0 ; idxV < NVALS ; ++idxV){
      for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
        int idxLoc = idxV*nLhs + idxLhs;
        // 1) Apply Inverse Discete Fourier Transform
        M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxLoc),
                                             const_cast<CellClass*>(ParentCell)->getLocal(idxLoc));
        // 2) apply Sx
        for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
          if (ChildCells[ChildIndex]){
            AbstractBaseClass::Interpolator->applyL2L(ChildIndex, ParentCell->getLocal(idxLoc), ChildCells[ChildIndex]->getLocal(idxLoc));
          }
197 198
        }
      }
199
    }// NVALS
200 201 202 203 204 205 206
  }

  void L2P(const CellClass* const LeafCell,
           ContainerClass* const TargetParticles)
  {
    const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));

207 208 209 210 211 212
    for(int idxV = 0 ; idxV < NVALS ; ++idxV){
      for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
        int idxLoc = idxV*nLhs + idxLhs;
        // 1)  Apply Inverse Discete Fourier Transform
        M2LHandler->unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxLoc), 
                                             const_cast<CellClass*>(LeafCell)->getLocal(idxLoc));
213

214 215 216
        // 2.a) apply Sx
        AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
                                                  LeafCell->getLocal(idxLoc), TargetParticles);
217

218 219 220
        // 2.b) apply Px (grad Sx)
        AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
                                                          LeafCell->getLocal(idxLoc), TargetParticles);
221

222 223
      }
    }// NVALS
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
  }

  void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
           ContainerClass* const FRestrict TargetParticles,
           const ContainerClass* const FRestrict /*SourceParticles*/,
           ContainerClass* const NeighborSourceParticles[27],
           const int /* size */)
  {
    DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles);
  }


  void P2PRemote(const FTreeCoordinate& /*inPosition*/,
                 ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
                 ContainerClass* const inNeighbors[27], const int /*inSize*/){
    DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27);
  }

};


245
#endif //FUNIFTENSORIALKERNEL_HPP
246 247

// [--END--]