testChebTensorialAlgorithm.cpp 12.7 KB
Newer Older
1
// ===================================================================================
2 3 4 5
// Copyright ScalFmm 2016 INRIA, Olivier Coulaud, Bérenger Bramas,
// Matthias Messner olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the
// FMM.
6
//
7
// This software is governed by the CeCILL-C and LGPL licenses and
8
// abiding by the rules of distribution of free software.
9 10 11
// An extension to the license is given to allow static linking of scalfmm
// inside a proprietary application (no matter its license).
// See the main license file for more details.
12 13 14 15
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 17 18
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
19
// ===================================================================================
20 21
//
// #author P. Blanchard
22
// ==== CMAKE =====
23
// @FUSE_BLAS
24 25 26 27 28 29 30
// ================

#include <iostream>

#include <cstdio>
#include <cstdlib>

31
#include "Files/FFmaGenericLoader.hpp"
32 33


34
#include "Kernels/Interpolation/FInterpMatrixKernel_TensorialInteractions.hpp"
35 36
#include "Kernels/Chebyshev/FChebCell.hpp"
#include "Kernels/Chebyshev/FChebTensorialKernel.hpp"
37

38 39
#include "Components/FSimpleLeaf.hpp"
#include "Kernels/P2P/FP2PParticleContainerIndexed.hpp"
40

41
#include "Utils/FParameters.hpp"
42
#include "Utils/FParameterNames.hpp"
43
#include "Utils/FMemUtils.hpp"
44

45 46
#include "Containers/FOctree.hpp"
#include "Containers/FVector.hpp"
47

48 49
#include "Core/FFmmAlgorithm.hpp"
#include "Core/FFmmAlgorithmThread.hpp"
50 51

/**
52
 * This program runs the FMM Algorithm with the Chebyshev kernel and compares the results with a direct computation.
53 54 55 56 57
 */

// Simply create particles and try the kernels
int main(int argc, char* argv[])
{
58 59 60 61 62
    FHelpDescribeAndExit(argc, argv,
                         "Run the chebyshev FMM kernel and compare the accuracy with a direct computation.",
                         FParameterDefinitions::InputFile, FParameterDefinitions::OctreeHeight,
                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbThreads);

63
    typedef double FReal;
64 65 66 67
  const char* const filename       = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
  const unsigned int TreeHeight    = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 3);
  const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2);
  const unsigned int NbThreads     = FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, 1);
68 69 70 71 72 73 74 75 76 77 78

#ifdef _OPENMP
  omp_set_num_threads(NbThreads);
  std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl;
#else
  std::cout << "\n>> Sequential version.\n" << std::
#endif

    // init timer
    FTic time;

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    // typedefs and infos
    typedef FInterpMatrixKernel_R_IJ<FReal> MatrixKernelClass;
    MatrixKernelClass::printInfo();

    // useful features of matrix kernel
    const unsigned int NPV  = MatrixKernelClass::NPV;
    const unsigned int NPOT = MatrixKernelClass::NPOT;
    const unsigned int NRHS = MatrixKernelClass::NRHS;
    const unsigned int NLHS = MatrixKernelClass::NLHS;

    const FReal CoreWidth = 0.;
    std::cout << "Core width: a=" << CoreWidth << std::endl;
    std::cout << std::endl;

    // Build matrix kernel
    const MatrixKernelClass MatrixKernel(CoreWidth);

    // init particles position and physical value
    struct TestParticle{
        FPoint<FReal> position;
        FReal forces[3][NPOT];
        FReal physicalValue[NPV];
        FReal potential[NPOT];
    };
103 104

  // open particle file
105
  FFmaGenericLoader<FReal> loader(filename);
106 107 108
  if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!");

  TestParticle* const particles = new TestParticle[loader.getNumberOfParticles()];
109
  for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
110
    FPoint<FReal> position;
111 112 113 114
    FReal physicalValue = 0.0;
    loader.fillParticle(&position,&physicalValue);
    // get copy
    particles[idxPart].position       = position;
115 116 117 118 119
    // Set physical values
    for(unsigned idxPV = 0; idxPV<NPV;++idxPV){
    //   Either copy same physical value in each component
      particles[idxPart].physicalValue[idxPV]  = physicalValue; 
    // ... or set random value
120
//      particles[idxPart].physicalValue[idxPV]  = physicalValue*FReal(rand())/FReal(RAND_MAX);
121 122 123 124 125 126 127
    }

    for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
      particles[idxPart].potential[idxPot]      = 0.0;
      particles[idxPart].forces[0][idxPot]      = 0.0;
      particles[idxPart].forces[1][idxPot]      = 0.0;
      particles[idxPart].forces[2][idxPot]      = 0.0;
128
    }
129 130 131 132 133 134 135 136
  }

  ////////////////////////////////////////////////////////////////////

  { // begin direct computation
    std::cout << "\nDirect Computation ... " << std::endl;
    time.tic();
    {
137 138
      for(FSize idxTarget = 0 ; idxTarget < loader.getNumberOfParticles() ; ++idxTarget){
        for(FSize idxOther =  idxTarget + 1 ; idxOther < loader.getNumberOfParticles() ; ++idxOther){
139 140 141 142 143 144 145 146 147
          FP2P::MutualParticlesKIJ(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(),
                                   particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue,
                                   particles[idxTarget].forces[0], particles[idxTarget].forces[1],
                                   particles[idxTarget].forces[2], particles[idxTarget].potential,
                                   particles[idxOther].position.getX(), particles[idxOther].position.getY(),
                                   particles[idxOther].position.getZ(), particles[idxOther].physicalValue,
                                   particles[idxOther].forces[0], particles[idxOther].forces[1],
                                   particles[idxOther].forces[2], particles[idxOther].potential,
                                   &MatrixKernel);
148 149 150 151 152 153 154 155 156 157 158
        }
      }
    }
    time.tac();
    std::cout << "Done  " << "(@Direct Computation = "
              << time.elapsed() << "s)." << std::endl;

  } // end direct computation

  ////////////////////////////////////////////////////////////////////

159
  {	// begin Chebyshev kernel
160 161

    // accuracy
162
    const unsigned int ORDER = 5 ;
163 164 165 166
    // set box width extension
    // ... either deduce from element size
  const FReal LeafCellWidth = FReal(loader.getBoxWidth()) / FReal(FMath::pow(2.,TreeHeight-1));
  const FReal ElementSize = LeafCellWidth / FReal(3.);  
167
  const FReal BoxWidthExtension = 0.*ElementSize; // depends on type of element
168 169 170 171 172 173 174 175 176
    // ... or set to arbitrary value (0. means no extension)
//    const FReal BoxWidthExtension = FReal(0.); 

    std::cout << "LeafCellWidth=" << LeafCellWidth 
              << ", BoxWidthExtension=" << BoxWidthExtension <<std::endl;

    // stop execution if interactions are homog and box extension is required 
    if(MatrixKernelClass::Type==HOMOGENEOUS && BoxWidthExtension>0.)
      throw std::runtime_error("Extension of box width is not yet supported for homogeneous kernels! Work-around: artificially set Type to NON_HOMOGENEOUS.");
177

178
    typedef FP2PParticleContainerIndexed<FReal, NRHS,NLHS> ContainerClass;
179

180 181 182 183
    typedef FSimpleLeaf<FReal, ContainerClass >  LeafClass;
    typedef FChebCell<FReal,ORDER,NRHS,NLHS> CellClass;
    typedef FOctree<FReal,CellClass,ContainerClass,LeafClass> OctreeClass;
    typedef FChebTensorialKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
184 185 186 187 188 189 190 191 192 193 194 195 196
    typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
    //  typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;

    // init oct-tree
    OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox());


    { // -----------------------------------------------------
      std::cout << "Creating & Inserting " << loader.getNumberOfParticles()
                << " particles ..." << std::endl;
      std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
      time.tic();

197
      for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
198
        // put in tree
199 200
        // PB: here we have to know NPV...
        if(NPV==1)
201
          tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0]);
202
        else if(NPV==3)
203 204 205 206
          tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]);
        else 
          std::runtime_error("Insert correct number of physical values!");

207 208 209 210 211 212 213 214
      }

      time.tac();
      std::cout << "Done  " << "(@Creating and Inserting Particles = "
                << time.elapsed() << "s)." << std::endl;
    } // -----------------------------------------------------

    { // -----------------------------------------------------
215
      std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl;
216
      time.tic();
217
      KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel, BoxWidthExtension);
218 219 220 221 222 223 224 225 226
      FmmClass algorithm(&tree, &kernels);
      algorithm.execute();
      time.tac();
      std::cout << "Done  " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl;
    } // -----------------------------------------------------


    { // -----------------------------------------------------
      std::cout << "\nError computation ... " << std::endl;
227 228
      FMath::FAccurater<FReal> potentialDiff[NPOT];
      FMath::FAccurater<FReal> fx[NPOT], fy[NPOT], fz[NPOT];
229

230 231
      FReal checkPotential[20000][NPOT];
      FReal checkfx[20000][NPOT];
232 233 234 235

      { // Check that each particle has been summed with all other

        tree.forEachLeaf([&](LeafClass* leaf){
236
            for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
237

238 239 240 241
              const FReal*const potentials = leaf->getTargets()->getPotentials(idxPot);
              const FReal*const forcesX = leaf->getTargets()->getForcesX(idxPot);
              const FReal*const forcesY = leaf->getTargets()->getForcesY(idxPot);
              const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxPot);
242 243
              const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
              const FVector<FSize>& indexes = leaf->getTargets()->getIndexes();
244

245 246
              for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
                const FSize indexPartOrig = indexes[idxPart];
247 248

                //PB: store potential in array[nbParticles]
249 250 251 252 253 254 255 256
                checkPotential[indexPartOrig][idxPot]=potentials[idxPart];
                checkfx[indexPartOrig][idxPot]=forcesX[idxPart];

                // update accuracy
                potentialDiff[idxPot].add(particles[indexPartOrig].potential[idxPot],potentials[idxPart]);
                fx[idxPot].add(particles[indexPartOrig].forces[0][idxPot],forcesX[idxPart]);
                fy[idxPot].add(particles[indexPartOrig].forces[1][idxPot],forcesY[idxPart]);
                fz[idxPot].add(particles[indexPartOrig].forces[2][idxPot],forcesZ[idxPart]);
257
              }
258
            }// NPOT
259 260 261 262
          });
      }

      // Print for information
263 264 265 266 267 268 269 270
      std::cout << "\nRelative Inf/L2 errors: " << std::endl;
      std::cout << "  Potential: " << std::endl;
      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
        std::cout << "    " << idxPot << ": "
                  << potentialDiff[idxPot].getRelativeInfNorm() << ", " 
                  << potentialDiff[idxPot].getRelativeL2Norm() 
                  << std::endl;
      }
271
      std::cout << std::endl;
272 273 274 275 276 277 278
      std::cout << "  Fx: " << std::endl; 
      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
        std::cout << "    " << idxPot << ": "
                  << fx[idxPot].getRelativeInfNorm() << ", " 
                  << fx[idxPot].getRelativeL2Norm()
                  << std::endl;
      }
279
      std::cout  << std::endl;
280 281 282 283 284 285 286
      std::cout << "  Fy: " << std::endl; 
      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
        std::cout << "    " << idxPot << ": "
                  << fy[idxPot].getRelativeInfNorm() << ", " 
                  << fy[idxPot].getRelativeL2Norm()
                  << std::endl;
      }
287
      std::cout  << std::endl;
288 289 290 291 292 293 294
      std::cout << "  Fz: " << std::endl; 
      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
        std::cout << "    " << idxPot << ": "
                  << fz[idxPot].getRelativeInfNorm() << ", " 
                  << fz[idxPot].getRelativeL2Norm()
                  << std::endl;
      }
295
      std::cout << std::endl;
296

297 298
    } // -----------------------------------------------------

299
  } // end Chebyshev kernel
300 301 302

  return 0;
}