// =================================================================================== // Copyright ScalFmm 2016 INRIA, Olivier Coulaud, Bérenger Bramas, // Matthias Messner olivier.coulaud@inria.fr, berenger.bramas@inria.fr // This software is a computer program whose purpose is to compute the // FMM. // // This software is governed by the CeCILL-C and LGPL licenses and // abiding by the rules of distribution of free software. // An extension to the license is given to allow static linking of scalfmm // inside a proprietary application (no matter its license). // See the main license file for more details. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. // "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== // ==== CMAKE ===== // @FUSE_BLAS // ================ // Keep in private GIT // @SCALFMM_PRIVATE #include #include #include #include "../../Src/Utils/FTic.hpp" #include "../../Src/Utils/FParameters.hpp" #include "../../Src/Files/FFmaGenericLoader.hpp" #include "../../Src/Containers/FOctree.hpp" #include "../../Src/Containers/FVector.hpp" #include "../../Src/Core/FFmmAlgorithm.hpp" #include "../../Src/Core/FFmmAlgorithmThread.hpp" // chebyshev kernel #include "../../Src/Kernels/Chebyshev/FChebCell.hpp" #include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" // spherical kernel #include "../../Src/Kernels/Spherical/FSphericalKernel.hpp" #include "../../Src/Kernels/Spherical/FSphericalBlasKernel.hpp" #include "../../Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp" #include "../../Src/Kernels/Spherical/FSphericalRotationKernel.hpp" #include "../../Src/Kernels/Spherical/FSphericalCell.hpp" #include "../../Src/Components/FSimpleLeaf.hpp" #include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp" #include "../../Src/Utils/FParameterNames.hpp" /** * This program compares two different kernels, eg., the Chebyshev kernel with * the SphericalBlas kernel. */ // Simply create particles and try the kernels int main(int argc, char* argv[]) { FHelpDescribeAndExit(argc, argv, "Compare lots of kernels.", FParameterDefinitions::InputFile, FParameterDefinitions::OctreeHeight, FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbThreads, FParameterDefinitions::SHDevelopment); // get info from commandline const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2); const unsigned int NbThreads = FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, omp_get_max_threads()); omp_set_num_threads(NbThreads); std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; // init timer FTic time; // interaction kernel evaluator typedef double FReal; typedef FInterpMatrixKernelR MatrixKernelClass; const MatrixKernelClass MatrixKernel; struct TestParticle{ FPoint position; FReal forces[3]; FReal physicalValue; FReal potential; }; // open particle file FFmaGenericLoader loader(filename); if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); TestParticle* const particles = new TestParticle[loader.getNumberOfParticles()]; for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ FPoint position; FReal physicalValue = 0.0; loader.fillParticle(&position,&physicalValue); // get copy particles[idxPart].position = position; particles[idxPart].physicalValue = physicalValue; particles[idxPart].potential = 0.0; particles[idxPart].forces[0] = 0.0; particles[idxPart].forces[1] = 0.0; particles[idxPart].forces[2] = 0.0; } { for(FSize idxTarget = 0 ; idxTarget < loader.getNumberOfParticles() ; ++idxTarget){ for(FSize idxOther = idxTarget + 1 ; idxOther < loader.getNumberOfParticles() ; ++idxOther){ FP2P::MutualParticles(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), particles[idxTarget].position.getZ(),particles[idxTarget].physicalValue, &particles[idxTarget].forces[0],&particles[idxTarget].forces[1], &particles[idxTarget].forces[2],&particles[idxTarget].potential, particles[idxOther].position.getX(), particles[idxOther].position.getY(), particles[idxOther].position.getZ(),particles[idxOther].physicalValue, &particles[idxOther].forces[0],&particles[idxOther].forces[1], &particles[idxOther].forces[2],&particles[idxOther].potential,&MatrixKernel); } } } //////////////////////////////////////////////////////////////////// { // begin Chebyshev kernel // accuracy const unsigned int ORDER = 7; const FReal epsilon = FReal(1e-7); // typedefs typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf LeafClass; typedef FChebCell CellClass; typedef FOctree OctreeClass; //typedef FChebKernel KernelClass; typedef FChebSymKernel KernelClass; //typedef FFmmAlgorithm FmmClass; typedef FFmmAlgorithmThread FmmClass; // init oct-tree OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); { // ----------------------------------------------------- std::cout << "Creating & Inserting " << loader.getNumberOfParticles() << " particles ..." << std::endl; std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; time.tic(); for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ // put in tree tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue); } time.tac(); std::cout << "Done " << "(@Creating and Inserting Particles = " << time.elapsed() << "s)." << std::endl; } // ----------------------------------------------------- { // ----------------------------------------------------- std::cout << "\nChebyshev FMM ... " << std::endl; time.tic(); KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel, epsilon); FmmClass algorithm(&tree, &kernels); algorithm.execute(); time.tac(); std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl; } // ----------------------------------------------------- FMath::FAccurater potentialDiff; FMath::FAccurater fx, fy, fz; { // Check that each particle has been summed with all other tree.forEachLeaf([&](LeafClass* leaf){ const FReal*const potentials = leaf->getTargets()->getPotentials(); const FReal*const forcesX = leaf->getTargets()->getForcesX(); const FReal*const forcesY = leaf->getTargets()->getForcesY(); const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); const FVector& indexes = leaf->getTargets()->getIndexes(); for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ const FSize indexPartOrig = indexes[idxPart]; potentialDiff.add(particles[indexPartOrig].potential,potentials[idxPart]); fx.add(particles[indexPartOrig].forces[0],forcesX[idxPart]); fy.add(particles[indexPartOrig].forces[1],forcesY[idxPart]); fz.add(particles[indexPartOrig].forces[2],forcesZ[idxPart]); } }); } // Print for information std::cout << "Potential " << potentialDiff << std::endl; std::cout << "Fx " << fx << std::endl; std::cout << "Fy " << fy << std::endl; std::cout << "Fz " << fz << std::endl; } // end Chebyshev kernel //////////////////////////////////////////////////////////////////// { // begin FFmaBlas kernel // accuracy const int DevP = FParameters::getValue(argc, argv, FParameterDefinitions::SHDevelopment.options, 11); // typedefs typedef FSphericalCell CellClass; typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf LeafClass; typedef FOctree OctreeClass; typedef FSphericalBlockBlasKernel KernelClass; typedef FFmmAlgorithmThread FmmClass; // init cell class and oct-tree CellClass::Init(DevP, true); // only for blas // OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); { // ----------------------------------------------------- std::cout << "Creating & Inserting " << loader.getNumberOfParticles() << " particles ..." << std::endl; std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; time.tic(); for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ // put in tree tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue); } time.tac(); std::cout << "Done " << "(@Creating and Inserting Particles = " << time.elapsed() << "s)." << std::endl; } // ----------------------------------------------------- // ----------------------------------------------------- std::cout << "\nFFmaBlas FMM ..." << std::endl; time.tic(); KernelClass kernels(DevP, TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); FmmClass algorithm(&tree, &kernels); algorithm.execute(); time.tac(); std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl; // ----------------------------------------------------- FMath::FAccurater potentialDiff; FMath::FAccurater fx, fy, fz; { // Check that each particle has been summed with all other tree.forEachLeaf([&](LeafClass* leaf){ const FReal*const potentials = leaf->getTargets()->getPotentials(); const FReal*const forcesX = leaf->getTargets()->getForcesX(); const FReal*const forcesY = leaf->getTargets()->getForcesY(); const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); const FVector& indexes = leaf->getTargets()->getIndexes(); for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ const FSize indexPartOrig = indexes[idxPart]; potentialDiff.add(particles[indexPartOrig].potential,potentials[idxPart]); fx.add(particles[indexPartOrig].forces[0],forcesX[idxPart]); fy.add(particles[indexPartOrig].forces[1],forcesY[idxPart]); fz.add(particles[indexPartOrig].forces[2],forcesZ[idxPart]); } }); } // Print for information std::cout << "Potential " << potentialDiff << std::endl; std::cout << "Fx " << fx << std::endl; std::cout << "Fy " << fy << std::endl; std::cout << "Fz " << fz << std::endl; } // end FFmaBlas kernel delete[] particles; return 0; }