From 6151cdfa19fbe3fb10e1bab1dd71ef75e261edfb Mon Sep 17 00:00:00 2001 From: berenger-bramas <berenger-bramas@2616d619-271b-44dc-8df4-d4a8f33a7222> Date: Tue, 29 Mar 2011 12:22:46 +0000 Subject: [PATCH] Upgrading iteration performances & adding simple threaded versions git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@19 2616d619-271b-44dc-8df4-d4a8f33a7222 --- Sources/Containers/FList.hpp | 42 +++ Sources/Containers/FOctree.hpp | 9 +- Sources/Containers/FSubOctree.hpp | 2 +- Sources/Core/FAbstractKernels.hpp | 12 +- Sources/Core/FBasicKernels.hpp | 24 +- Sources/Core/FFMMAlgorithm.hpp | 36 +- Sources/Core/FFMMAlgorithmThreaded.hpp | 285 ++++++++++++++++ .../Core/FFMMAlgorithmThreadedInterval.hpp | 317 ++++++++++++++++++ Sources/Fmb/FAbstractFmbKernels.hpp | 37 +- Sources/Fmb/FExtendFmbCell.hpp | 9 + Sources/Fmb/FFmbKernelsForces.hpp | 4 +- Sources/Fmb/FFmbKernelsPotential.hpp | 4 +- Sources/Fmb/FFmbKernelsPotentialForces.hpp | 46 ++- Tests/testFMMAlgorithm.cpp | 55 ++- Tests/testFMMAlgorithmThreaded.cpp | 278 +++++++++++++++ Tests/testFmbAlgorithm.cpp | 4 +- 16 files changed, 1109 insertions(+), 55 deletions(-) create mode 100644 Sources/Core/FFMMAlgorithmThreaded.hpp create mode 100644 Sources/Core/FFMMAlgorithmThreadedInterval.hpp create mode 100644 Tests/testFMMAlgorithmThreaded.cpp diff --git a/Sources/Containers/FList.hpp b/Sources/Containers/FList.hpp index b19a7b838..f47778564 100644 --- a/Sources/Containers/FList.hpp +++ b/Sources/Containers/FList.hpp @@ -213,6 +213,48 @@ public: }; + class ConstBasicIterator { + private: + const Node* iter; //< current node on the list + + public: + /** + * Constructor needs the target list + * @param the list to iterate on + */ + ConstBasicIterator(const FList& list) : iter(list.root){ + } + + /** to progress on the list */ + void progress(){ + if(this->iter) this->iter = this->iter->next; + } + + /** + * Current pointed value + * current iterator must be valide (isValide()) to use this function + */ + Object value(){ + return this->iter->target; + } + + /** + * Current pointed value + * current iterator must be valide (isValide()) to use this function + */ + const Object& value() const{ + return this->iter->target; + } + + /** + * To know if an iterator is at the end of the list + * @return true if the current iterator can progress and access to value, else false + */ + bool isValide() const{ + return iter; + } + }; + }; #endif //FLIST_HPP diff --git a/Sources/Containers/FOctree.hpp b/Sources/Containers/FOctree.hpp index 50dfdc6cf..ab42c3971 100644 --- a/Sources/Containers/FOctree.hpp +++ b/Sources/Containers/FOctree.hpp @@ -192,6 +192,10 @@ public: this->currentLocalIndex = TransposeIndex(this->current.tree->getLeftLeafIndex(), (this->current.tree->getSubOctreeHeight() - this->currentLocalLevel - 1) ); } + Iterator() : currentLocalLevel(0), currentLocalIndex(0) { + current.tree = 0; + } + /** Copy constructor * @param other source iterator to copy */ @@ -206,7 +210,10 @@ public: * @return this after copy */ Iterator& operator=(const Iterator& other){ - memcpy(this, &other, sizeof(Iterator)); + this->current = other.current ; + this->currentLocalLevel = other.currentLocalLevel ; + this->currentLocalIndex = other.currentLocalIndex ; + return *this; } /** diff --git a/Sources/Containers/FSubOctree.hpp b/Sources/Containers/FSubOctree.hpp index 419e1e19b..4fa93d6cd 100644 --- a/Sources/Containers/FSubOctree.hpp +++ b/Sources/Containers/FSubOctree.hpp @@ -34,7 +34,7 @@ * @warning Give the particuleClass & cellClass */ template< class ParticuleClass, class CellClass > -class FAbstractSubOctree : public FAssertable{ +class FAbstractSubOctree : protected FAssertable{ protected: const int subOctreeHeight; //< Height of this suboctree const int subOctreePosition; //< Level of the current suboctree in the global tree (0 if node) diff --git a/Sources/Core/FAbstractKernels.hpp b/Sources/Core/FAbstractKernels.hpp index d2b0e6cf3..3133e04f7 100644 --- a/Sources/Core/FAbstractKernels.hpp +++ b/Sources/Core/FAbstractKernels.hpp @@ -20,22 +20,22 @@ public: virtual void init(){} /** P2M */ - virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) = 0; + virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) = 0; /** M2M */ - virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) = 0; + virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) = 0; /** M2L */ - virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) = 0; + virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) = 0; /** L2L */ - virtual void L2L(CellClass* const pole, CellClass** const child, const int inLevel) = 0; + virtual void L2L(const CellClass* const pole, CellClass** const child, const int inLevel) = 0; /** L2P */ - virtual void L2P(CellClass* const pole, FList<ParticuleClass*>* const particules) = 0; + virtual void L2P(const CellClass* const pole, FList<ParticuleClass*>* const particules) = 0; /** P2P */ - virtual void P2P(FList<ParticuleClass*>* const pole, FList<ParticuleClass*>** const directNeighbors, const int size) = 0; + virtual void P2P(FList<ParticuleClass*>* const pole, const FList<ParticuleClass*>*const* const directNeighbors, const int size) = 0; }; diff --git a/Sources/Core/FBasicKernels.hpp b/Sources/Core/FBasicKernels.hpp index d1f0d4fd1..dab607318 100644 --- a/Sources/Core/FBasicKernels.hpp +++ b/Sources/Core/FBasicKernels.hpp @@ -26,33 +26,33 @@ public: virtual void init(){} /** Print the number of particules */ - virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) { - FDEBUG( FDebug::Controller << "P2M : " << particules->getSize() << "\n" ); + virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) { + //FDEBUG( FDebug::Controller << "P2M : " << particules->getSize() << "\n" ); } /** Print the morton index */ - virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) { - FDEBUG( FDebug::Controller << "M2M : " << pole->getMortonIndex() << "\n" ); + virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) { + //FDEBUG( FDebug::Controller << "M2M : " << pole->getMortonIndex() << "\n" ); } /** Print the morton index */ - virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) { - FDEBUG( FDebug::Controller << "M2L : " << pole->getMortonIndex() << " (" << size << ")\n" ); + virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) { + //FDEBUG( FDebug::Controller << "M2L : " << pole->getMortonIndex() << " (" << size << ")\n" ); } /** Print the morton index */ - virtual void L2L(CellClass* const local, CellClass** const child, const int inLevel) { - FDEBUG( FDebug::Controller << "L2L : " << local->getMortonIndex() << "\n" ); + virtual void L2L(const CellClass* const local, CellClass** const child, const int inLevel) { + //FDEBUG( FDebug::Controller << "L2L : " << local->getMortonIndex() << "\n" ); } /** Print the number of particules */ - virtual void L2P(CellClass* const pole, FList<ParticuleClass*>* const particules){ - FDEBUG( FDebug::Controller << "L2P : " << particules->getSize() << "\n" ); + virtual void L2P(const CellClass* const pole, FList<ParticuleClass*>* const particules){ + //FDEBUG( FDebug::Controller << "L2P : " << particules->getSize() << "\n" ); } /** Print the number of particules */ - virtual void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) { - FDEBUG( FDebug::Controller << "P2P : " << currentBox->getSize() << " (" << size << ")\n" ); + virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) { + //FDEBUG( FDebug::Controller << "P2P : " << currentBox->getSize() << " (" << size << ")\n" ); } }; diff --git a/Sources/Core/FFMMAlgorithm.hpp b/Sources/Core/FFMMAlgorithm.hpp index 59777ff4a..b88fe6c7d 100644 --- a/Sources/Core/FFMMAlgorithm.hpp +++ b/Sources/Core/FFMMAlgorithm.hpp @@ -19,7 +19,7 @@ * It just iterates on a tree and call the kernls with good arguments */ template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight> -class FFMMAlgorithm : public FAssertable{ +class FFMMAlgorithm : protected FAssertable{ FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree; //< The octree to work on KernelClass<ParticuleClass, CellClass>* const kernels; //< The kernels @@ -36,6 +36,7 @@ public: : tree(inTree) , kernels(inKernels) { assert(tree, "tree cannot be null", __LINE__, __FILE__); assert(kernels, "kernels cannot be null", __LINE__, __FILE__); + FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithm\n")); } /** Default destructor */ @@ -82,6 +83,9 @@ public: typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree); octreeIterator.gotoBottomLeft(); octreeIterator.moveUp(); + + typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator); + // for each levels for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){ // for each cells @@ -90,8 +94,10 @@ public: // child is an array (of 8 child) that may be null kernels->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel); } while(octreeIterator.moveRight()); - octreeIterator.moveUp(); - octreeIterator.gotoLeft(); + //octreeIterator.moveUp(); + //octreeIterator.gotoLeft(); + avoidGotoLeftIterator.moveUp(); + octreeIterator = avoidGotoLeftIterator; } FDEBUG_TIME(counter.tac();); @@ -106,16 +112,21 @@ public: { // first M2L typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree); octreeIterator.moveDown(); + + typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator); + CellClass* neighbors[208]; // for each levels for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ // for each cells do{ const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel); - kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel); + if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel); } while(octreeIterator.moveRight()); - octreeIterator.gotoLeft(); - octreeIterator.moveDown(); + //octreeIterator.gotoLeft(); + //octreeIterator.moveDown(); + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; } } FDEBUG_TIME(counter.tac();); @@ -126,6 +137,9 @@ public: { // second L2L typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree); octreeIterator.moveDown(); + + typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator); + const int heightMinusOne = OctreeHeight - 1; // for each levels exepted leaf level for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){ @@ -133,8 +147,10 @@ public: do{ kernels->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel); } while(octreeIterator.moveRight()); - octreeIterator.gotoLeft(); - octreeIterator.moveDown(); + //octreeIterator.gotoLeft(); + //octreeIterator.moveDown(); + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; } } @@ -148,6 +164,8 @@ public: FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); FDEBUG_TIME(counter.tic();); + const int heightMinusOne = OctreeHeight - 1; + typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree); octreeIterator.gotoBottomLeft(); // There is a maximum of 26 neighbors @@ -156,7 +174,7 @@ public: do{ kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentList()); // need the current particules and neighbors particules - const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),OctreeHeight-1); + const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne); kernels->P2P( octreeIterator.getCurrentList() , neighbors, counter); } while(octreeIterator.moveRight()); diff --git a/Sources/Core/FFMMAlgorithmThreaded.hpp b/Sources/Core/FFMMAlgorithmThreaded.hpp new file mode 100644 index 000000000..eac79bf56 --- /dev/null +++ b/Sources/Core/FFMMAlgorithmThreaded.hpp @@ -0,0 +1,285 @@ +#ifndef FFMMALGORITHMTHREADED_HPP +#define FFMMALGORITHMTHREADED_HPP +// /!\ Please, you must read the license at the bottom of this page + +#include "../Utils/FAssertable.hpp" +#include "../Utils/FDebug.hpp" +#include "../Utils/FTic.hpp" +#include "../Utils/FOpenMPThread.hpp" + +#include "../Containers/FOctree.hpp" + +#include <omp.h> + + +/** +* @author Berenger Bramas (berenger.bramas@inria.fr) +* @class FFMMAlgorithmThreaded +* @brief +* Please read the license +* +*/ +template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight> +class FFMMAlgorithmThreaded : protected FAssertable{ + typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator; + + static const int NbThreads = 4; + + FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree; //< The octree to work on + KernelClass<ParticuleClass, CellClass>* kernels[NbThreads]; //< The kernels + + + FDEBUG_TIME(FTic counter); //< In case of debug count the time + +public: + /** The constructor need the octree and the kernels used for computation + * @param inTree the octree + * @param inKernels the kernels + * an assert is launched if one of the arguments is null + */ + FFMMAlgorithmThreaded(FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const inTree, + KernelClass<ParticuleClass, CellClass>* const inKernels) + : tree(inTree) { + assert(tree, "tree cannot be null", __LINE__, __FILE__); + assert(kernels, "kernels cannot be null", __LINE__, __FILE__); + + for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){ + this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass>(*inKernels); + } + + FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithmThreaded\n")); + } + + /** Default destructor */ + virtual ~FFMMAlgorithmThreaded(){ + for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){ + delete this->kernels[idxThread]; + } + } + + /** To execute the fmm algorithm + * Call this function to run the complete algo + */ + void execute(){ + for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){ + this->kernels[idxThread]->init(); + } + + bottomPass(); + upwardPass(); + + downardPass(); + + directPass(); + } + + /** P2M */ + void bottomPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); + FDEBUG_TIME(counter.tic();); + + FOctreeIterator octreeIterator(tree); + // Iterate on leafs + octreeIterator.gotoBottomLeft(); + + omp_lock_t mutex; + omp_init_lock(&mutex); + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + omp_set_lock(&mutex); + while(!stop){ + CellClass*const cell = octreeIterator.getCurrentCell(); + const FList<ParticuleClass*>* const particules = octreeIterator.getCurrentList(); + if(!octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + // We need the current cell that represent the leaf + // and the list of particules + kernels[threadId]->P2M( cell , particules); + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + omp_destroy_lock(&mutex); + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + } + + /** M2M */ + void upwardPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + + FOctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + octreeIterator.moveUp(); + + omp_lock_t mutex; + omp_init_lock(&mutex); + // for each levels + for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){ + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + omp_set_lock(&mutex); + // for each cells + while(!stop){ + // We need the current cell and the child + // child is an array (of 8 child) that may be null + CellClass*const cell = octreeIterator.getCurrentCell(); + const CellClass*const*const child = octreeIterator.getCurrentChild(); + if(!octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + kernels[threadId]->M2M( cell , child, idxLevel); + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + octreeIterator.moveUp(); + octreeIterator.gotoLeft(); + } + omp_destroy_lock(&mutex); + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + } + + /** M2L L2L */ + void downardPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + + { // first M2L + FOctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + + omp_lock_t mutex; + omp_init_lock(&mutex); + + // for each levels + for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + CellClass* neighbors[208]; + omp_set_lock(&mutex); + // for each cells + while(!stop){ + CellClass* const cell = octreeIterator.getCurrentCell(); + const MortonIndex cellIndex = octreeIterator.getCurrentGlobalIndex(); + if(!octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + const int counter = tree->getDistantNeighbors(neighbors, cellIndex,idxLevel); + if(counter) kernels[threadId]->M2L( cell, neighbors, counter, idxLevel); + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + octreeIterator.gotoLeft(); + octreeIterator.moveDown(); + } + omp_destroy_lock(&mutex); + } + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + + FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + { // second L2L + FOctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + const int heightMinusOne = OctreeHeight - 1; + + omp_lock_t mutex; + omp_init_lock(&mutex); + // for each levels exepted leaf level + for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){ + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + omp_set_lock(&mutex); + // for each cells + while(!stop){ + const CellClass * const cell = octreeIterator.getCurrentCell(); + CellClass ** const child = octreeIterator.getCurrentChild(); + if(!octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + kernels[threadId]->L2L( cell, child, idxLevel); + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + + octreeIterator.gotoLeft(); + octreeIterator.moveDown(); + } + omp_destroy_lock(&mutex); + } + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + + } + + /** P2P */ + void directPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + + FOctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + + omp_lock_t mutex; + omp_init_lock(&mutex); + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + // There is a maximum of 26 neighbors + FList<ParticuleClass*>* neighbors[26]; + const int heightMinusOne = OctreeHeight - 1; + + omp_set_lock(&mutex); + // for each leafs + while(!stop){ + const CellClass * const cell = octreeIterator.getCurrentCell(); + FList<ParticuleClass*>* const particules = octreeIterator.getCurrentList(); + const MortonIndex cellIndex = octreeIterator.getCurrentGlobalIndex(); + if(!octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + kernels[threadId]->L2P(cell, particules); + // need the current particules and neighbors particules + const int counter = tree->getLeafsNeighbors(neighbors, cellIndex,heightMinusOne); + kernels[threadId]->P2P( particules , neighbors, counter); + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + omp_destroy_lock(&mutex); + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + + } + +}; + + +#endif //FFMMALGORITHMTHREADED_HPP + +// [--LICENSE--] diff --git a/Sources/Core/FFMMAlgorithmThreadedInterval.hpp b/Sources/Core/FFMMAlgorithmThreadedInterval.hpp new file mode 100644 index 000000000..6822e2788 --- /dev/null +++ b/Sources/Core/FFMMAlgorithmThreadedInterval.hpp @@ -0,0 +1,317 @@ +#ifndef FFMMALGORITHMTHREADEDTHREADED_HPP +#define FFMMALGORITHMTHREADEDTHREADED_HPP +// /!\ Please, you must read the license at the bottom of this page + +#include "../Utils/FAssertable.hpp" +#include "../Utils/FDebug.hpp" +#include "../Utils/FTic.hpp" +#include "../Utils/FOpenMPThread.hpp" + +#include "../Containers/FOctree.hpp" + +#include <omp.h> + + +/** +* @author Berenger Bramas (berenger.bramas@inria.fr) +* @class FFMMAlgorithmThreadedInterval +* @brief +* Please read the license +* +*/ +template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight> +class FFMMAlgorithmThreadedInterval : protected FAssertable{ + typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator; + + static const int NbThreads = 1; + static const int SizeInterval = 50; + + FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree; //< The octree to work on + KernelClass<ParticuleClass, CellClass>* kernels[NbThreads]; //< The kernels + + FDEBUG_TIME(FTic counter); //< In case of debug count the time + +public: + /** The constructor need the octree and the kernels used for computation + * @param inTree the octree + * @param inKernels the kernels + * an assert is launched if one of the arguments is null + */ + FFMMAlgorithmThreadedInterval(FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const inTree, + KernelClass<ParticuleClass, CellClass>* const inKernels) + : tree(inTree) { + assert(tree, "tree cannot be null", __LINE__, __FILE__); + assert(kernels, "kernels cannot be null", __LINE__, __FILE__); + + for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){ + this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass>(); + *this->kernels[idxThread] = *inKernels; + } + + FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithmThreadedInterval\n")); + } + + /** Default destructor */ + virtual ~FFMMAlgorithmThreadedInterval(){ + for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){ + delete this->kernels[idxThread]; + } + } + + /** To execute the fmm algorithm + * Call this function to run the complete algo + */ + void execute(){ + for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){ + this->kernels[idxThread]->init(); + } + + bottomPass(); + upwardPass(); + + downardPass(); + + directPass(); + } + + /** P2M */ + void bottomPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); + FDEBUG_TIME(counter.tic();); + + FOctreeIterator octreeIterator(tree); + // Iterate on leafs + octreeIterator.gotoBottomLeft(); + + omp_lock_t mutex; + omp_init_lock(&mutex); + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + + FOctreeIterator threadIter; + int idxSizeInterval = 0; + + omp_set_lock(&mutex); + while(!stop){ + threadIter = octreeIterator; + for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval); + if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + while(idxSizeInterval--){ + // We need the current cell that represent the leaf + // and the list of particules + kernels[threadId]->P2M( threadIter.getCurrentCell() , threadIter.getCurrentList()); + threadIter.moveRight(); + } + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + omp_destroy_lock(&mutex); + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + } + + /** M2M */ + void upwardPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + + FOctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + octreeIterator.moveUp(); + + omp_lock_t mutex; + omp_init_lock(&mutex); + // for each levels + for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){ + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + + FOctreeIterator threadIter; + int idxSizeInterval = 0; + + omp_set_lock(&mutex); + // for each cells + while(!stop){ + threadIter = octreeIterator; + for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval); + if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + while(idxSizeInterval--){ + // We need the current cell and the child + // child is an array (of 8 child) that may be null + kernels[threadId]->M2M( threadIter.getCurrentCell() , threadIter.getCurrentChild(), idxLevel); + threadIter.moveRight(); + } + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + octreeIterator.moveUp(); + octreeIterator.gotoLeft(); + } + omp_destroy_lock(&mutex); + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + } + + /** M2L L2L */ + void downardPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + + { // first M2L + FOctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + + omp_lock_t mutex; + omp_init_lock(&mutex); + + // for each levels + for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + + FOctreeIterator threadIter; + int idxSizeInterval = 0; + CellClass* neighbors[208]; + + omp_set_lock(&mutex); + // for each cells + while(!stop){ + threadIter = octreeIterator; + for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval); + if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + while(idxSizeInterval--){ + const int counter = tree->getDistantNeighbors(neighbors, threadIter.getCurrentGlobalIndex(),idxLevel); + if(counter) kernels[threadId]->M2L( threadIter.getCurrentCell(), neighbors, counter, idxLevel); + threadIter.moveRight(); + } + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + octreeIterator.gotoLeft(); + octreeIterator.moveDown(); + } + omp_destroy_lock(&mutex); + } + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + + FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + { // second L2L + FOctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + const int heightMinusOne = OctreeHeight - 1; + + omp_lock_t mutex; + omp_init_lock(&mutex); + // for each levels exepted leaf level + for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){ + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + + FOctreeIterator threadIter; + int idxSizeInterval = 0; + + omp_set_lock(&mutex); + // for each cells + while(!stop){ + threadIter = octreeIterator; + for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval); + if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + while(idxSizeInterval--){ + kernels[threadId]->L2L( threadIter.getCurrentCell(), threadIter.getCurrentChild(), idxLevel); + threadIter.moveRight(); + } + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + + octreeIterator.gotoLeft(); + octreeIterator.moveDown(); + } + omp_destroy_lock(&mutex); + } + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + + } + + /** P2P */ + void directPass(){ + FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); + FDEBUG_TIME(counter.tic();); + + FOctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + const int heightMinusOne = OctreeHeight - 1; + + omp_lock_t mutex; + omp_init_lock(&mutex); + bool stop = false; + #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads) + { + const int threadId = omp_get_thread_num(); + // There is a maximum of 26 neighbors + FList<ParticuleClass*>* neighbors[26]; + FOctreeIterator threadIter; + int idxSizeInterval = 0; + + omp_set_lock(&mutex); + // for each leafs + while(!stop){ + threadIter = octreeIterator; + for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval); + if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true; + omp_unset_lock(&mutex); + + while(idxSizeInterval--){ + kernels[threadId]->L2P(threadIter.getCurrentCell(), threadIter.getCurrentList()); + // need the current particules and neighbors particules + const int counter = tree->getLeafsNeighbors(neighbors, threadIter.getCurrentGlobalIndex(),heightMinusOne); + kernels[threadId]->P2P( threadIter.getCurrentList() , neighbors, counter); + threadIter.moveRight(); + } + + omp_set_lock(&mutex); + } + omp_unset_lock(&mutex); + } + omp_destroy_lock(&mutex); + + FDEBUG_TIME(counter.tac();); + FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; ) + + } + +}; + + +#endif //FFMMALGORITHMTHREADEDTHREADED_HPP + +// [--LICENSE--] diff --git a/Sources/Fmb/FAbstractFmbKernels.hpp b/Sources/Fmb/FAbstractFmbKernels.hpp index 67511048b..97355548d 100644 --- a/Sources/Fmb/FAbstractFmbKernels.hpp +++ b/Sources/Fmb/FAbstractFmbKernels.hpp @@ -52,9 +52,9 @@ protected: static const int FMB_Info_nexp_size = (FMB_Info_P + 1) * (FMB_Info_P + 1); // Level of the tree - const int treeHeight; + int treeHeight; // Width of the box at the root level - const double treeWidthAtRoot; + double treeWidthAtRoot; // transfer_M2M_container FComplexe*** transitionM2M; @@ -94,7 +94,7 @@ protected: F3DPosition force_sum; double potential_sum; - const bool FMB_Info_up_to_P_in_M2L; + bool FMB_Info_up_to_P_in_M2L; ////////////////////////////////////////////////////////////////// // Allocation @@ -381,8 +381,11 @@ protected: for(int idxm = 0 , idxmMod4 = 0; idxm <= FMB_Info_P ; ++idxm, ++idxmMod4){ if(idxmMod4 == 4) idxmMod4 = 0; const double angle = idxm*inSphere.phi + PiArrayInner[idxmMod4]; - (cosSin + idxm)->setReal(FMath::Sin(angle + M_PI_2)); + (cosSin + idxm)->setReal(FMath::Sin(angle + FMath::FPiDiv2)); (cosSin + idxm)->setImag(FMath::Sin(angle)); + + //printf("l=%d \t inSphere.phi=%f \t this->PiArrayOuter[idxlMod4]=%f \t angle=%f \t FMath::Sin(angle + FMath::FPiDiv2)=%f \t FMath::Sin(angle)=%f\n", + // idxm, inSphere.phi, this->PiArrayInner[idxmMod4], angle, FMath::Sin(angle + FMath::FPiDiv2) , FMath::Sin(angle)); } // Initialization of associated_Legendre_function_Array: @@ -403,6 +406,9 @@ protected: - (l+m)*(*(start_ptr_associated_Legendre_function_Array + expansion_Redirection_array_for_j[l-1] + m))) / inSphere.sinTheta); p_theta_derivated_term->setReal(magnitude * (cosSin + m)->getReal()); p_theta_derivated_term->setImag(magnitude * (cosSin + m)->getImag()); + + //printf("magnitude=%f r_l=%f p_spherical_harmonic_Inner_coefficients_array=%f real=%f imag=%f\n", + // magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag()); } // m=l: @@ -415,6 +421,9 @@ protected: p_theta_derivated_term->setReal(magnitude * (cosSin + m)->getReal()); p_theta_derivated_term->setImag(magnitude * (cosSin + m)->getImag()); + //printf("magnitude=%f r_l=%f p_spherical_harmonic_Inner_coefficients_array=%f real=%f imag=%f\n", + // magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag()); + ++p_term; ++p_theta_derivated_term; ++p_spherical_harmonic_Inner_coefficients_array; @@ -555,6 +564,12 @@ public: buildPrecompute(); } + FAbstractFmbKernels(const FAbstractFmbKernels& other) + : treeHeight(other.treeHeight), treeWidthAtRoot(other.treeWidthAtRoot), + transitionM2M(0), transitionL2L(0), potential_sum(0), FMB_Info_up_to_P_in_M2L(true) { + buildPrecompute(); + } + virtual void init(){ force_sum = F3DPosition(); potential_sum = 0; @@ -595,9 +610,9 @@ public: * Phi(x) = sum_{n=0}^{+} sum_{m=-n}^{n} M_n^m O_n^{-m} (x - *p_center) * */ - void P2M(CellClass* const inPole, FList<ParticuleClass*>* const inParticules) { + void P2M(CellClass* const inPole, const FList<ParticuleClass*>* const inParticules) { - for(typename FList<ParticuleClass*>::BasicIterator iterParticule(*inParticules); + for(typename FList<ParticuleClass*>::ConstBasicIterator iterParticule(*inParticules); iterParticule.isValide() ; iterParticule.progress()){ Spherical spherical; @@ -649,7 +664,7 @@ public: * * Warning: if j-n < |k-l| we do nothing. */ - void M2M(CellClass* const inPole, CellClass** const inChild, const int inLevel) { + void M2M(CellClass* const inPole, const CellClass*const* const inChild, const int inLevel) { // We do NOT have: for(l=n-j+k; l<=j-n+k ;++l){} <=> for(l=-n; l<=n ;++l){if (j-n >= abs(k-l)){}} // But we have: for(k=MAX(0,n-j+l); k<=j-n+l; ++k){} <=> for(k=0; k<=j; ++k){if (j-n >= abs(k-l)){}} @@ -769,7 +784,7 @@ public: *Remark: here we have always j+n >= |-k-l| * */ - void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) { + void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) { FTreeCoordinate coordCenter; coordCenter.setPositionFromMorton(pole->getMortonIndex(),inLevel); @@ -901,7 +916,7 @@ public: * *Warning: if |l-k| > n-j, we do nothing. */ - void L2L(CellClass* const pole, CellClass** const child, const int inLevel) { + void L2L(const CellClass* const pole, CellClass** const child, const int inLevel) { for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ // if no child at this position @@ -990,9 +1005,9 @@ public: } } - virtual void L2P(CellClass* const local, FList<ParticuleClass*>* const particules) = 0; + virtual void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules) = 0; - virtual void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) = 0; + virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) = 0; }; diff --git a/Sources/Fmb/FExtendFmbCell.hpp b/Sources/Fmb/FExtendFmbCell.hpp index 2c682ca57..a93acf992 100644 --- a/Sources/Fmb/FExtendFmbCell.hpp +++ b/Sources/Fmb/FExtendFmbCell.hpp @@ -41,6 +41,15 @@ public: return *this; } + /** Get Multipole */ + const FComplexe* getMultipole() const { + return this->multipole_exp; + } + /** Get Local */ + const FComplexe* getLocal() const { + return this->local_exp; + } + /** Get Multipole */ FComplexe* getMultipole() { return this->multipole_exp; diff --git a/Sources/Fmb/FFmbKernelsForces.hpp b/Sources/Fmb/FFmbKernelsForces.hpp index db60940f5..74a75e9c3 100644 --- a/Sources/Fmb/FFmbKernelsForces.hpp +++ b/Sources/Fmb/FFmbKernelsForces.hpp @@ -33,7 +33,7 @@ class FFmbKernelsForces : public FAbstractFmbKernels<ParticuleClass,CellClass> { /** bodies_L2P * expansion_L2P_add_to_force_vector */ - void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){ + void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){ typename FList<ParticuleClass*>::BasicIterator iterTarget(*particules); while( iterTarget.isValide() ){ //printf("Morton %lld\n",local->getMortonIndex()); @@ -182,7 +182,7 @@ class FFmbKernelsForces : public FAbstractFmbKernels<ParticuleClass,CellClass> { * ) * */ - void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) { + void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) { typename FList<ParticuleClass*>::BasicIterator iterTarget(*currentBox); while( iterTarget.isValide() ){ for(int idxDirectNeighbors = 0 ; idxDirectNeighbors < size ; ++idxDirectNeighbors){ diff --git a/Sources/Fmb/FFmbKernelsPotential.hpp b/Sources/Fmb/FFmbKernelsPotential.hpp index 3e350f8ab..a357a0d8b 100644 --- a/Sources/Fmb/FFmbKernelsPotential.hpp +++ b/Sources/Fmb/FFmbKernelsPotential.hpp @@ -28,7 +28,7 @@ public: /** bodies_L2P * expansion_L2P_add_to_force_vector */ - void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){ + void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){ typename FList<ParticuleClass*>::BasicIterator iterTarget(*particules); while( iterTarget.isValide() ){ //printf("Morton %lld\n",local->getMortonIndex()); @@ -88,7 +88,7 @@ public: * ) * */ - void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) { + void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) { typename FList<ParticuleClass*>::BasicIterator iterTarget(*currentBox); while( iterTarget.isValide() ){ for(int idxDirectNeighbors = 0 ; idxDirectNeighbors < size ; ++idxDirectNeighbors){ diff --git a/Sources/Fmb/FFmbKernelsPotentialForces.hpp b/Sources/Fmb/FFmbKernelsPotentialForces.hpp index 8c288d8db..a2542994e 100644 --- a/Sources/Fmb/FFmbKernelsPotentialForces.hpp +++ b/Sources/Fmb/FFmbKernelsPotentialForces.hpp @@ -27,7 +27,7 @@ public: /** bodies_L2P * expansion_L2P_add_to_force_vector */ - void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){ + void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){ typename FList<ParticuleClass*>::BasicIterator iterTarget(*particules); while( iterTarget.isValide() ){ //printf("Morton %lld\n",local->getMortonIndex()); @@ -37,6 +37,21 @@ public: positionToSphere( iterTarget.value()->getPosition() - local->getPosition(), &spherical ); + /*printf("\t\t bodies_it_Get_p_position(&it) x = %lf \t y = %lf \t z = %lf \n", + (iterTarget.value()->getPosition()).getX(), + (iterTarget.value()->getPosition()).getY(), + (iterTarget.value()->getPosition()).getZ()); + printf("\t\t p_leaf_center x = %lf \t y = %lf \t z = %lf \n", + (local->getPosition()).getX(), + (local->getPosition()).getY(), + (local->getPosition()).getZ());*/ + /*printf("\t\t p_position_to_leaf_center x = %lf \t y = %lf \t z = %lf \n", + (iterTarget.value()->getPosition() - local->getPosition()).getX(), + (iterTarget.value()->getPosition() - local->getPosition()).getY(), + (iterTarget.value()->getPosition() - local->getPosition()).getZ());*/ + //printf("\t\t phi = %lf \t cos = %lf \t sin = %lf \t r= %lf \n", + // spherical.phi,spherical.cosTheta,spherical.sinTheta,spherical.r); + harmonicInnerThetaDerivated( spherical, FAbstractFmbKernels<ParticuleClass,CellClass>::current_thread_Y, FAbstractFmbKernels<ParticuleClass,CellClass>::current_thread_Y_theta_derivated); // The maximum degree used here will be P. @@ -60,6 +75,15 @@ public: force_vector_in_local_base.setY( force_vector_in_local_base.getY() + exp_term_aux.getReal()); + + //printf("\t j = %d \t exp_term_aux real = %lf imag = %lf \n", + // j, + // exp_term_aux.getReal(), + // exp_term_aux.getImag()); + //printf("\t force_vector_in_local_base x = %lf \t y = %lf \t z = %lf \n", + // force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ()); + + ++p_local_exp_term; ++p_Y_term; ++p_Y_theta_derivated_term; @@ -77,10 +101,23 @@ public: force_vector_in_local_base.setZ( force_vector_in_local_base.getZ() - 2 * k * exp_term_aux.getImag()); // F_theta: + //printf("\t\t k = %d \t j = %d \t exp_term_aux real = %lf imag = %lf \n", + // k,j, + // exp_term_aux.getReal(), + // exp_term_aux.getImag()); + //printf("\t\t\t p_Y_term->getReal = %lf \t p_Y_term->getImag = %lf \n",p_Y_term->getReal(),p_Y_term->getImag()); + exp_term_aux.setReal( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getReal()) - (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getImag()) ); exp_term_aux.setImag( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getImag()) + (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getReal()) ); force_vector_in_local_base.setY(force_vector_in_local_base.getY() + 2 * exp_term_aux.getReal()); + + //printf("\t\t k = %d \t j = %d \t exp_term_aux real = %lf imag = %lf \n", + // k,j, + // exp_term_aux.getReal(), + // exp_term_aux.getImag()); + //printf("\t\t force_vector_in_local_base x = %lf \t y = %lf \t z = %lf \n", + // force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ()); } } @@ -181,11 +218,12 @@ public: * ) * */ - void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) { + void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) { typename FList<ParticuleClass*>::BasicIterator iterTarget(*currentBox); while( iterTarget.isValide() ){ + for(int idxDirectNeighbors = 0 ; idxDirectNeighbors < size ; ++idxDirectNeighbors){ - typename FList<ParticuleClass*>::BasicIterator iterSource(*directNeighbors[idxDirectNeighbors]); + typename FList<ParticuleClass*>::ConstBasicIterator iterSource(*directNeighbors[idxDirectNeighbors]); while( iterSource.isValide() ){ DIRECT_COMPUTATION_NO_MUTUAL_SOFT(&iterTarget.value(), iterSource.value()); @@ -209,7 +247,7 @@ public: FAbstractFmbKernels<ParticuleClass,CellClass>::force_sum += iterTarget.value()->getForces(); //printf("They contains energy (res = %f, potential = %f, value = %f)\n", - //potential_sum, iterTarget.value()->getPotential(), iterTarget.value()->getValue()); + // potential_sum, iterTarget.value()->getPotential(), iterTarget.value()->getValue()); FAbstractFmbKernels<ParticuleClass,CellClass>::potential_sum += iterTarget.value()->getPotential() * iterTarget.value()->getValue(); iterTarget.progress(); diff --git a/Tests/testFMMAlgorithm.cpp b/Tests/testFMMAlgorithm.cpp index a7375e1ef..3e9db4a7c 100644 --- a/Tests/testFMMAlgorithm.cpp +++ b/Tests/testFMMAlgorithm.cpp @@ -73,12 +73,12 @@ template< class ParticuleClass, class CellClass> class MyTestKernels : public FAbstractKernels<ParticuleClass,CellClass> { public: // Before upward - virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) { + virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) { // the pole represents all particules under pole->setDataUp(particules->getSize()); } // During upward - virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) { + virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) { // A parent represents the sum of the child for(int idx = 0 ; idx < 8 ; ++idx){ if(child[idx]){ @@ -87,14 +87,14 @@ public: } } // Before Downward - virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) { + virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) { // The pole is impacted by what represent other poles for(int idx = 0 ; idx < size ; ++idx){ pole->setDataDown(pole->getDataDown() + distantNeighbors[idx]->getDataUp()); } } // During Downward - virtual void L2L(CellClass* const local, CellClass** const child, const int inLevel) { + virtual void L2L(const CellClass* const local, CellClass** const child, const int inLevel) { // Each child is impacted by the father for(int idx = 0 ; idx < 8 ; ++idx){ if(child[idx]){ @@ -103,7 +103,7 @@ public: } } // After Downward - virtual void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){ + virtual void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){ // The particules is impacted by the parent cell typename FList<ParticuleClass*>::BasicIterator iter(*particules); while( iter.isValide() ){ @@ -112,7 +112,7 @@ public: } } // After Downward - virtual void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) { + virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) { // Each particules targeted is impacted by the particules sources long inc = currentBox->getSize() - 1; for(int idx = 0 ; idx < size ; ++idx){ @@ -215,5 +215,48 @@ int main(int , char ** ){ return 0; } +/* +Creating 2000000 particules ... +Done (0.12328s). +Inserting particules ... +Done (3.06648s). +Working on particules ... + Start Bottom Pass + Finished (0.835397s) + Start Upward Pass + Finished (1.12351s) + Start Downward Pass (M2L) + Finished (32.5013s) + Start Downward Pass (L2L) + Finished (1.13309s) + Start Direct Pass + Finished (6.53526s) +Done (42.1288s). +Check Result +Done +Deleting particules ... +Done (0.0397179s). + +Creating 2000000 particules ... +Done (0.114306s). +Inserting particules ... +Done (3.0618s). +Working on particules ... + Start Bottom Pass + Finished (0.832s) + Start Upward Pass + Finished (1.11856s) + Start Downward Pass (M2L) + Finished (31.396s) + Start Downward Pass (L2L) + Finished (1.13624s) + Start Direct Pass + Finished (6.5025s) +Done (40.9855s). +Check Result +Done +Deleting particules ... +Done (0.0378419s). + */ // [--LICENSE--] diff --git a/Tests/testFMMAlgorithmThreaded.cpp b/Tests/testFMMAlgorithmThreaded.cpp new file mode 100644 index 000000000..b7b2207b8 --- /dev/null +++ b/Tests/testFMMAlgorithmThreaded.cpp @@ -0,0 +1,278 @@ +// /!\ Please, you must read the license at the bottom of this page + +#include <iostream> + +#include <stdio.h> +#include <stdlib.h> + +#include "../Sources/Utils/FTic.hpp" + +#include "../Sources/Containers/FOctree.hpp" +#include "../Sources/Containers/FList.hpp" + +#include "../Sources/Utils/FAssertable.hpp" +#include "../Sources/Utils/F3DPosition.hpp" + +#include "../Sources/Core/FBasicParticule.hpp" +#include "../Sources/Core/FBasicCell.hpp" +#include "../Sources/Core/FBasicKernels.hpp" + +#include "../Sources/Core/FFMMAlgorithm.hpp" +#include "../Sources/Core/FFMMAlgorithmThreaded.hpp" +#include "../Sources/Core/FFMMAlgorithmThreadedInterval.hpp" +#include "../Sources/Core/FAbstractKernels.hpp" + +// Compile by : g++ testFMMAlgorithmThreaded.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp -lgomp -fopenmp -O2 -o testFMMAlgorithmThreaded.exe + +/** This program show an example of use of + * the fmm basic algo + * it also check that eachh particules is little or longer + * related that each other + */ + + +/** Custom particule class */ +class MyTestParticule : public FBasicParticule{ +protected: + // To store data during downard pass + long dataDown; +public: + MyTestParticule(): dataDown(0){ + } + long getDataDown() const { + return this->dataDown; + } + void setDataDown(const long inData){ + this->dataDown = inData; + } +}; + +/** Custom cell */ +class MyTestCell : public FBasicCell { + // To store data during upward and downward pass + long dataUp, dataDown; +public: + MyTestCell(): dataUp(0) , dataDown(0){ + } + long getDataUp() const { + return this->dataUp; + } + void setDataUp(const long inData){ + this->dataUp = inData; + } + long getDataDown() const { + return this->dataDown; + } + void setDataDown(const long inData){ + this->dataDown = inData; + } +}; + +/** Custom Kernel + * This kernel simply store in each element the number of elements + * it represents + */ +template< class ParticuleClass, class CellClass> +class MyTestKernels : public FAbstractKernels<ParticuleClass,CellClass> { +public: + // Before upward + virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) { + // the pole represents all particules under + pole->setDataUp(particules->getSize()); + } + // During upward + virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) { + // A parent represents the sum of the child + for(int idx = 0 ; idx < 8 ; ++idx){ + if(child[idx]){ + pole->setDataUp(pole->getDataUp() + child[idx]->getDataUp()); + } + } + } + // Before Downward + virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) { + // The pole is impacted by what represent other poles + for(int idx = 0 ; idx < size ; ++idx){ + pole->setDataDown(pole->getDataDown() + distantNeighbors[idx]->getDataUp()); + } + } + // During Downward + virtual void L2L(const CellClass* const local, CellClass** const child, const int inLevel) { + // Each child is impacted by the father + for(int idx = 0 ; idx < 8 ; ++idx){ + if(child[idx]){ + child[idx]->setDataDown(local->getDataDown() + child[idx]->getDataDown()); + } + } + } + // After Downward + virtual void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){ + // The particules is impacted by the parent cell + typename FList<ParticuleClass*>::BasicIterator iter(*particules); + while( iter.isValide() ){ + iter.value()->setDataDown(iter.value()->getDataDown() + local->getDataDown()); + iter.progress(); + } + } + // After Downward + virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) { + // Each particules targeted is impacted by the particules sources + long inc = currentBox->getSize() - 1; + for(int idx = 0 ; idx < size ; ++idx){ + inc += directNeighbors[idx]->getSize(); + } + + typename FList<ParticuleClass*>::BasicIterator iter(*currentBox); + while( iter.isValide() ){ + iter.value()->setDataDown(iter.value()->getDataDown() + inc); + iter.progress(); + } + } +}; + +// Simply create particules and try the kernels +int main(int , char ** ){ + const int NbLevels = 10;//10; + const int SizeSubLevels = 3;//3 + const long NbPart = 2000000;//2000000 + MyTestParticule* particules = new MyTestParticule[NbPart]; + FTic counter; + + srand ( 1 ); // volontary set seed to constant + + // ----------------------------------------------------- + std::cout << "Creating " << NbPart << " particules ..." << std::endl; + counter.tic(); + for(long idxPart = 0 ; idxPart < NbPart ; ++idxPart){ + particules[idxPart].setPosition(double(rand())/RAND_MAX,double(rand())/RAND_MAX,double(rand())/RAND_MAX); + } + + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + // ----------------------------------------------------- + + FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels> tree(1.0,F3DPosition(0.5,0.5,0.5)); + + // ----------------------------------------------------- + std::cout << "Inserting particules ..." << std::endl; + counter.tic(); + for(long idxPart = 0 ; idxPart < NbPart ; ++idxPart){ + tree.insert(&particules[idxPart]); + } + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + // ----------------------------------------------------- + + std::cout << "Working on particules ..." << std::endl; + counter.tic(); + + // MyTestKernels FBasicKernels + MyTestKernels<MyTestParticule, MyTestCell> kernels; + //FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmThreadedInterval + FFMMAlgorithmThreaded<MyTestKernels, MyTestParticule, MyTestCell, NbLevels, SizeSubLevels> algo(&tree,&kernels); + algo.execute(); + + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + + // ----------------------------------------------------- + + std::cout << "Check Result\n"; + { // Check that each particule has been summed with all other + FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree); + octreeIterator.gotoBottomLeft(); + do{ + if(octreeIterator.getCurrentCell()->getDataUp() != octreeIterator.getCurrentList()->getSize() ){ + std::cout << "Problem P2M : " << (octreeIterator.getCurrentCell()->getDataUp() - octreeIterator.getCurrentList()->getSize()) << "\n"; + } + } while(octreeIterator.moveRight()); + } + { // Ceck if there is number of NbPart summed at level 1 + FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree); + octreeIterator.moveDown(); + long res = 0; + do{ + res += octreeIterator.getCurrentCell()->getDataUp(); + } while(octreeIterator.moveRight()); + if(res != NbPart){ + std::cout << "Problem M2M at level 1 : " << res << "\n"; + } + } + { // Ceck if there is number of NbPart summed at level 1 + FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree); + octreeIterator.gotoBottomLeft(); + for(int idxLevel = NbLevels - 1 ; idxLevel > 1 ; --idxLevel ){ + long res = 0; + do{ + res += octreeIterator.getCurrentCell()->getDataUp(); + } while(octreeIterator.moveRight()); + if(res != NbPart){ + std::cout << "Problem M2M at level " << idxLevel << " : " << res << "\n"; + } + octreeIterator.moveUp(); + octreeIterator.gotoLeft(); + } + } + { // Check that each particule has been summed with all other + FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree); + octreeIterator.gotoBottomLeft(); + do{ + FList<MyTestParticule*>::BasicIterator iter(*octreeIterator.getCurrentList()); + while( iter.isValide() ){ + // If a particules has been impacted by less than NbPart - 1 (the current particule) + // there is a problem + if(iter.value()->getDataDown() != NbPart - 1){ + std::cout << "Problem L2P + P2P : " << iter.value()->getDataDown() << "\n"; + } + iter.progress(); + } + } while(octreeIterator.moveRight()); + } + std::cout << "Done\n"; + + // ----------------------------------------------------- + std::cout << "Deleting particules ..." << std::endl; + counter.tic(); + for(long idxPart = 0 ; idxPart < NbPart ; ++idxPart){ + particules[idxPart].~MyTestParticule(); + } + delete [] particules; + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + // ----------------------------------------------------- + + return 0; +} + +/* + 2000000 particules + +FFMMAlgorithm + Start Bottom Pass + Finished (0.840611s) + Start Upward Pass + Finished (1.12125s) + Start Downward Pass (M2L) + Finished (31.815s) + Start Downward Pass (L2L) + Finished (1.13794s) + Start Direct Pass + Finished (6.51685s) +Done (41.4319s). + +FFMMAlgorithmThreaded + Start Bottom Pass + Finished (1.05805s) + Start Upward Pass + Finished (1.15517s) + Start Downward Pass (M2L) + Finished (16.2212s) + Start Downward Pass (L2L) + Finished (1.00922s) + Start Direct Pass + Finished (3.10627s) +Done (22.5614s). + + */ + +// [--LICENSE--] diff --git a/Tests/testFmbAlgorithm.cpp b/Tests/testFmbAlgorithm.cpp index a0af0866f..f1fb79fd3 100644 --- a/Tests/testFmbAlgorithm.cpp +++ b/Tests/testFmbAlgorithm.cpp @@ -18,6 +18,8 @@ #include "../Sources/Fmb/FExtendFmbCell.hpp" #include "../Sources/Core/FFMMAlgorithm.hpp" +#include "../Sources/Core/FFMMAlgorithmThreaded.hpp" +#include "../Sources/Core/FFMMAlgorithmThreadedInterval.hpp" #include "../Sources/Fmb/FFmbKernelsPotentialForces.hpp" #include "../Sources/Fmb/FFmbKernelsForces.hpp" @@ -96,7 +98,7 @@ int main(int , char ** ){ counter.tic(); FFmbKernelsPotentialForces<FmbParticule, FmbCell> kernels(NbLevels,loader.getBoxWidth());//FFmbKernelsPotentialForces FFmbKernelsForces FFmbKernelsPotential - FFMMAlgorithm<FFmbKernelsPotentialForces, FmbParticule, FmbCell, NbLevels, SizeSubLevels> algo(&tree,&kernels); + FFMMAlgorithmThreaded<FFmbKernelsPotentialForces, FmbParticule, FmbCell, NbLevels, SizeSubLevels> algo(&tree,&kernels); algo.execute(); counter.tac(); -- GitLab