diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f52e37e51df5bd787e34c8b293a20c1d54bbc2f..7901be546b6e142d248c8030ef7ae923069cafd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,10 +9,10 @@ endif(insource) # Options OPTION( SCALFMM_USE_CBLAS "Set to ON to build ScaFMM with BLAS" ON ) OPTION( SCALFMM_USE_MPI "Set to ON to build ScaFMM with MPI" ON ) -OPTION( BUILD_TESTS "Set to ON to build fonctionnalities Tests" ON ) -OPTION( BUILD_UTESTS "Set to ON to build UTests" ON ) +OPTION( SCALFMM_BUILD_TESTS "Set to ON to build fonctionnalities Tests" ON ) +OPTION( SCALFMM_BUILD_UTESTS "Set to ON to build UTests" ON ) -# MPI option +# MPI option has to be set before project if( SCALFMM_USE_MPI ) SET(CMAKE_CXX_COMPILER mpicxx) endif() @@ -42,14 +42,14 @@ CONFIGURE_FILE( ${CMAKE_SOURCE_DIR}/Src/ScalFmmConfig.h.cmake add_subdirectory(Src) # Build - Tests -MESSAGE( STATUS "BUILD_TESTS = ${BUILD_TESTS}" ) -if( BUILD_TESTS ) +MESSAGE( STATUS "SCALFMM_BUILD_TESTS = ${SCALFMM_BUILD_TESTS}" ) +if( SCALFMM_BUILD_TESTS ) add_subdirectory(Tests) endif() # Build - UTests -MESSAGE( STATUS "BUILD_UTESTS = ${BUILD_UTESTS}" ) -if( BUILD_UTESTS ) +MESSAGE( STATUS "SCALFMM_BUILD_UTESTS = ${SCALFMM_BUILD_UTESTS}" ) +if( SCALFMM_BUILD_UTESTS ) add_subdirectory(UTests) endif() diff --git a/Src/CMakeLists.txt b/Src/CMakeLists.txt index c258069d83bf9c968dd5865466f4d9605b56e856..f930e4b78c532b6d07a917f09a167e40514f5683 100644 --- a/Src/CMakeLists.txt +++ b/Src/CMakeLists.txt @@ -20,6 +20,6 @@ add_library( # Adding the entire project dir as an include dir INCLUDE_DIRECTORIES( - ${CMAKE_BINARY_DIR}/Sources + ${CMAKE_BINARY_DIR}/Src ) diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp index 8010b0e45941ad28a35b7fc369223ed5146ebabf..ee0aacfc34ff7e4ada1d08a662c57d222cebb299 100644 --- a/Src/Core/FFmmAlgorithm.hpp +++ b/Src/Core/FFmmAlgorithm.hpp @@ -33,9 +33,6 @@ class FFmmAlgorithm : protected FAssertable{ Octree* const tree; //< The octree to work on KernelClass<ParticleClass, CellClass, OctreeHeight>* const kernels; //< The kernels - FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time - FDEBUG(FTic computationCounter); //< In case of debug: to count computation time - public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on @@ -62,23 +59,26 @@ public: void execute(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); - kernels->init(); - bottomPass(); upwardPass(); downardPass(); directPass(); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // P2M + ///////////////////////////////////////////////////////////////////////////// + /** P2M */ void bottomPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); FOctreeIterator octreeIterator(tree); @@ -90,21 +90,23 @@ public: FDEBUG(computationCounter.tic()); kernels->P2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentListSrc()); FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } while(octreeIterator.moveRight()); - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Upward + ///////////////////////////////////////////////////////////////////////////// + /** M2M */ void upwardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); // Start from leal level - 1 FOctreeIterator octreeIterator(tree); @@ -122,27 +124,31 @@ public: FDEBUG(computationCounter.tic()); kernels->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel); FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } while(octreeIterator.moveRight()); avoidGotoLeftIterator.moveUp(); octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft(); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Downward + ///////////////////////////////////////////////////////////////////////////// + /** M2L L2L */ void downardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); { // first M2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + FOctreeIterator octreeIterator(tree); octreeIterator.moveDown(); @@ -153,25 +159,24 @@ public: for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ // for each cells do{ - FDEBUG(computationCounter.tic()); const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel); + FDEBUG(computationCounter.tic()); if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel); FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } while(octreeIterator.moveRight()); avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; } + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); - - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( totalComputation = 0 ); - { // second L2L + + { // second L2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter ); + FOctreeIterator octreeIterator(tree); octreeIterator.moveDown(); @@ -185,26 +190,29 @@ public: FDEBUG(computationCounter.tic()); kernels->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel); FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } while(octreeIterator.moveRight()); avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; } + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Direct + ///////////////////////////////////////////////////////////////////////////// + /** P2P */ void directPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); const int heightMinusOne = OctreeHeight - 1; @@ -216,16 +224,17 @@ public: do{ FDEBUG(computationCounter.tic()); kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets()); + FDEBUG(computationCounter.tac()); // need the current particles and neighbors particles const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne); + FDEBUG(computationCounter.tic()); kernels->P2P( octreeIterator.getCurrentListTargets(), octreeIterator.getCurrentListSrc() , neighbors, counter); FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } while(octreeIterator.moveRight()); - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp index b1b14fb07c1bc4c2da521d0e07cfc0cc4d7e0387..7826d5c7d1956918303f4fef34c920a57167aafa 100644 --- a/Src/Core/FFmmAlgorithmThread.hpp +++ b/Src/Core/FFmmAlgorithmThread.hpp @@ -40,15 +40,14 @@ class FFmmAlgorithmThread : protected FAssertable{ typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel; Octree* const tree; //< The octree to work on - Kernel* kernels[FThreadNumbers]; //< The kernels - - FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time - FDEBUG(FTic computationCounter); //< In case of debug: to count computation time + Kernel** kernels; //< The kernels OctreeIterator* iterArray; static const int SizeShape = 3*3*3; - int shapeLeaf[SizeShape]; + int shapeLeaf[SizeShape]; + + const int MaxThreads; public: /** The constructor need the octree and the kernels used for computation @@ -57,12 +56,12 @@ public: * An assert is launched if one of the arguments is null */ FFmmAlgorithmThread(Octree* const inTree, Kernel* const inKernels) - : tree(inTree) , iterArray(0) { + : tree(inTree) , kernels(0), iterArray(0), MaxThreads(omp_get_max_threads()) { assert(tree, "tree cannot be null", __LINE__, __FILE__); - assert(kernels, "kernels cannot be null", __LINE__, __FILE__); - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels = new Kernel*[MaxThreads]; + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); } @@ -71,9 +70,10 @@ public: /** Default destructor */ virtual ~FFmmAlgorithmThread(){ - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ delete this->kernels[idxThread]; } + delete [] this->kernels; } /** @@ -103,10 +103,6 @@ public: iterArray = new OctreeIterator[leafs]; assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__); - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ - this->kernels[idxThread]->init(); - } - bottomPass(); upwardPass(); @@ -120,11 +116,15 @@ public: FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // P2M + ///////////////////////////////////////////////////////////////////////////// + /** P2M */ void bottomPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); - FDEBUG( counterTime.tic() ); + FDEBUG(FTic counterTime); OctreeIterator octreeIterator(tree); int leafs = 0; @@ -135,8 +135,8 @@ public: ++leafs; } while(octreeIterator.moveRight()); - FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + FDEBUG(FTic computationCounter); + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -146,20 +146,23 @@ public: myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc()); } } - FDEBUG(computationCounter.tac()); + FDEBUG(computationCounter.tac() ); - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Upward + ///////////////////////////////////////////////////////////////////////////// + /** M2M */ void upwardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); // Start from leal level - 1 OctreeIterator octreeIterator(tree); @@ -179,7 +182,7 @@ public: octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft(); FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -190,23 +193,27 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Downard + ///////////////////////////////////////////////////////////////////////////// + /** M2L L2L */ void downardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); { // first M2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); OctreeIterator avoidGotoLeftIterator(octreeIterator); @@ -223,7 +230,7 @@ public: octreeIterator = avoidGotoLeftIterator; FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; CellClass* neighbors[208]; @@ -234,17 +241,17 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( totalComputation = 0 ); { // second L2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); @@ -263,7 +270,7 @@ public: octreeIterator = avoidGotoLeftIterator; FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -272,21 +279,25 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Direct + ///////////////////////////////////////////////////////////////////////////// + /** P2P */ void directPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); OctreeIterator* shapeArray[SizeShape]; int countShape[SizeShape]; @@ -317,7 +328,7 @@ public: FDEBUG(computationCounter.tic()); for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ const int leafAtThisShape = this->shapeLeaf[idxShape]; - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; // There is a maximum of 26 neighbors @@ -338,9 +349,8 @@ public: delete [] shapeArray[idxShape]; } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index f53c638c50bceab6df36e31ae02041d7778e491a..4b88452e0e6fe52ae2bf58cfa85c64d4f5d339f3 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -44,20 +44,17 @@ typedef FSingleApplication ApplicationImplementation; * schedule(runtime) */ template<template< class ParticleClass, class CellClass, int OctreeHeight> class KernelClass, - class ParticleClass, class CellClass, - template<class ParticleClass> class LeafClass, - int OctreeHeight, int SubtreeHeight> -class FFmmAlgorithmThreadProc : protected FAssertable, protected ApplicationImplementation{ +class ParticleClass, class CellClass, +template<class ParticleClass> class LeafClass, +int OctreeHeight, int SubtreeHeight> + class FFmmAlgorithmThreadProc : protected FAssertable, protected ApplicationImplementation{ // To reduce the size of variable type based on foctree in this file typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree; typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator; typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel; Octree* const tree; //< The octree to work on - Kernel* kernels[FThreadNumbers]; //< The kernels - - FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time - FDEBUG(FTic computationCounter); //< In case of debug: to count computation time + Kernel** kernels; //< The kernels OctreeIterator* iterArray; OctreeIterator* previousIterArray; @@ -69,6 +66,8 @@ class FFmmAlgorithmThreadProc : protected FAssertable, protected ApplicationImpl int leftOffsets[OctreeHeight]; int rightOffsets[OctreeHeight]; + const int MaxThreads; + void run(){} void swapArray(){ @@ -84,13 +83,14 @@ public: * An assert is launched if one of the arguments is null */ FFmmAlgorithmThreadProc(Octree* const inTree, Kernel* const inKernels, const int inArgc, char ** const inArgv ) - : ApplicationImplementation(inArgc,inArgv), tree(inTree) , iterArray(0), - previousIterArray(0), previousLeft(0),previousRight(0), previousSize(0) { + : ApplicationImplementation(inArgc,inArgv), tree(inTree) , kernels(0), iterArray(0), + previousIterArray(0), previousLeft(0),previousRight(0), previousSize(0), + MaxThreads(omp_get_max_threads()) { assert(tree, "tree cannot be null", __LINE__, __FILE__); - assert(kernels, "kernels cannot be null", __LINE__, __FILE__); - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels = new Kernel*[MaxThreads]; + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); } @@ -99,9 +99,10 @@ public: /** Default destructor */ virtual ~FFmmAlgorithmThreadProc(){ - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ delete this->kernels[idxThread]; } + delete [] this->kernels; } /** @@ -125,11 +126,6 @@ public: previousIterArray = new OctreeIterator[leafs]; assert(previousIterArray, "previousIterArray bad alloc", __LINE__, __FILE__); - // init kernels - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ - this->kernels[idxThread]->init(); - } - // init offsets for(int idxOff = 0 ; idxOff < OctreeHeight ; ++idxOff){ leftOffsets[idxOff] = 0; @@ -154,6 +150,10 @@ public: FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Utils functions + ///////////////////////////////////////////////////////////////////////////// + int getLeft(const int idProc, const int inSize, const int nbOfProc) const { const float step = (float(inSize) / nbOfProc); return int(FMath::Ceil(step * idProc)); @@ -171,11 +171,15 @@ public: return int(position/step); } + ///////////////////////////////////////////////////////////////////////////// + // P2M + ///////////////////////////////////////////////////////////////////////////// + /** P2M */ void bottomPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); - FDEBUG( counterTime.tic() ); + FDEBUG(FTic counterTime); OctreeIterator octreeIterator(tree); const int nbProcess = processCount(); @@ -196,11 +200,11 @@ public: this->previousRight = endIdx - 1; this->previousSize = leafs; - FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + FDEBUG(FTic computationCounter); +#pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; - #pragma omp for +#pragma omp for for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ // We need the current cell that represent the leaf // and the list of particles @@ -214,18 +218,24 @@ public: this->previousRight = endIdx - 1; this->previousSize = leafs; - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Upward + ///////////////////////////////////////////////////////////////////////////// + /** M2M */ void upwardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + FDEBUG(FTic sendCounter); + FDEBUG(FTic receiveCounter); // Start from leal level - 1 OctreeIterator octreeIterator(tree); @@ -238,7 +248,6 @@ public: // for each levels for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){ - //print(); int leafs = 0; // for each cells @@ -252,37 +261,32 @@ public: const int startIdx = getLeft(idPorcess,leafs,nbProcess); const int endIdx = getRight(idPorcess,leafs,nbProcess); - //std::cout << idPorcess << ">>--startIdx " << (startIdx) << " endIdx " << (endIdx) << std::endl; - //std::cout << idPorcess << ">>--previousLeft " << (previousLeft) << " previousRight " << (previousRight) << std::endl; - - //std::cout << "level " << idxLevel << " start " << startIdx << " end " << endIdx << std::endl; - if(startIdx < leafs){ + FDEBUG(sendCounter.tic()); int leftOffset = 0; { const MortonIndex MostLeftChild = iterArray[startIdx].getCurrentGlobalIndex() << 3; const MortonIndex leftChildIter = previousIterArray[previousLeft].getCurrentGlobalIndex(); - //std::cout << idPorcess << ">>--MostLeftChild " << (MostLeftChild) << " leftChildIter " << (leftChildIter) << std::endl; + if(leftChildIter < MostLeftChild){ int parentOffset = startIdx - 1; MortonIndex parentIndex = iterArray[parentOffset].getCurrentGlobalIndex(); MortonIndex childIndex = 0; while( (childIndex = previousIterArray[previousLeft+leftOffset].getCurrentGlobalIndex()) < MostLeftChild){ childIndex >>= 3; - //std::cout << "before loop" << std::endl; + while(childIndex != parentIndex){ if(childIndex < parentIndex) --parentOffset; else ++parentOffset; - //std::cout << "parentOffset " << parentOffset << " parentIndex " << parentIndex << " childIndex " << childIndex << std::endl; + parentIndex = iterArray[parentOffset].getCurrentGlobalIndex(); - //std::cout << "parentOffset " << parentOffset << " parentIndex " << parentIndex << " childIndex " << childIndex << std::endl; + } - //std::cout << "before send" << std::endl; + const int idxReceiver = getProc(parentOffset,leafs,nbProcess); sendData(idxReceiver,sizeof(CellClass),previousIterArray[this->previousLeft+leftOffset].getCurrentCell(),previousLeft+leftOffset); - //std::cout << idPorcess << "\t>>-- sends left to " << (idxReceiver) << " index " << (previousLeft+leftOffset) << std::endl; + ++leftOffset; - //std::cout << "before end big loop" << std::endl; } } else if(this->previousLeft > 0 && leftChildIter > MostLeftChild){ @@ -291,13 +295,12 @@ public: } } } - //std::cout << idPorcess << ">>--leftOffset " << (leftOffset) << std::endl; int rightOffset = 0; { const MortonIndex MostRightChild = (iterArray[endIdx-1].getCurrentGlobalIndex() << 3) | 7; const MortonIndex rightChildIter = previousIterArray[previousRight].getCurrentGlobalIndex(); - //std::cout << idPorcess << ">>--MostRightChild " << (MostRightChild) << " rightChildIter " << (rightChildIter) << std::endl; + if(this->previousRight < this->previousSize - 1 && rightChildIter < MostRightChild){ while( previousIterArray[previousRight-rightOffset+1].getCurrentGlobalIndex() <= MostRightChild){ --rightOffset; @@ -316,60 +319,55 @@ public: } const int idxReceiver = getProc(parentOffset,leafs,nbProcess); sendData(idxReceiver,sizeof(CellClass),previousIterArray[this->previousRight-rightOffset].getCurrentCell(),previousRight-rightOffset); - //std::cout << idPorcess << "\t>>-- sends right to " << (idxReceiver) << " index " << (previousRight+rightOffset) << std::endl; + ++rightOffset; } } } - //std::cout << idPorcess << ">>--rightOffset " << (rightOffset) << std::endl; + FDEBUG(sendCounter.tac()); leftOffsets[idxLevel+1] = leftOffset; rightOffsets[idxLevel+1] = rightOffset; - #pragma omp parallel num_threads(FThreadNumbers) +#pragma omp parallel { // received computed data - #pragma omp single +#pragma omp single { + FDEBUG(receiveCounter.tic()); int needToReceive = FMath::Max(0,-rightOffset) + FMath::Max(0,-leftOffset); CellClass tempCell; int source = 0, tag = 0, filled = 0; - //std::cout << idPorcess << ">>--Will receive " << needToReceive << std::endl; - while(needToReceive){ receiveData(sizeof(CellClass),&tempCell,&source,&tag,&filled); if(filled){ *previousIterArray[tag].getCurrentCell() = tempCell; } --needToReceive; - //std::cout << idPorcess << ">>receive tag " << (tag) << " tempCell.up " << tempCell.getDataUp() << " source " << source << std::endl; } - //std::cout << idPorcess << ">>--All receive--" << std::endl; + FDEBUG(receiveCounter.tac()); } - #pragma omp single nowait +#pragma omp single nowait { FDEBUG(computationCounter.tic()); } Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; - #pragma omp for +#pragma omp for for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ myThreadkernels->M2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentChild(), idxLevel); } - #pragma omp single nowait +#pragma omp single nowait { FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } } } else { - //std::cout << "I am out startIdx " << startIdx << " endIdx " << endIdx << std::endl; - int parentOffset = leafs - 1; MortonIndex parentIndex = iterArray[parentOffset].getCurrentGlobalIndex(); @@ -381,11 +379,9 @@ public: } const int idxReceiver = getProc(parentOffset,leafs,nbProcess); sendData(idxReceiver,sizeof(CellClass),previousIterArray[idxLeafs].getCurrentCell(),idxLeafs); - //std::cout << idPorcess << "\t>>-- sends all to " << (idxReceiver) << " index " << idxLeafs << std::endl; } leftOffsets[idxLevel+1] = (previousRight-previousLeft) + 1; - //std::cout << "left off set at " << idxLevel+1 << " = " << leftOffsets[idxLevel+1] << std::endl; } swapArray(); @@ -396,22 +392,28 @@ public: processBarrier(); } - //print(); - - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); + FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" ); + FDEBUG( FDebug::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Downard + ///////////////////////////////////////////////////////////////////////////// + /** M2L L2L */ void downardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); { // first M2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + FDEBUG(FTic sendCounter); + FDEBUG(FTic receiveCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); OctreeIterator avoidGotoLeftIterator(octreeIterator); @@ -450,17 +452,18 @@ public: alreadySent[idxProc] = new FBoolArray(leafs); } - //std::cout << "There are " << leafs << " leafs" << std::endl; - - //print(); - - #pragma omp parallel num_threads(FThreadNumbers) +#pragma omp parallel { CellClass* neighbors[208]; MortonIndex neighborsIndexes[208]; Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; - #pragma omp for +#pragma omp single nowait + { + FDEBUG(sendCounter.tic()); + } + +#pragma omp for for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ const int neighborsCounter = tree->getDistantNeighborsWithIndex(neighbors, neighborsIndexes, iterArray[idxLeafs].getCurrentGlobalIndex(),idxLevel); bool needData = false; @@ -495,17 +498,15 @@ public: // Find receiver and send him the cell const int idxReceiver = getProc(cellPositionInArray,leafs,nbProcess); - #pragma omp critical(CheckToSend) +#pragma omp critical(CheckToSend) { if(!alreadySent[idxReceiver]->get(idxLeafs)){ - //std::cout << idPorcess << ">>--idxLeafs " << (idxLeafs) << " idxReceiver " << (idxReceiver) - // << " cellPositionInArray " << (cellPositionInArray) << " indexCell " << indexCell<< std::endl; sendData(idxReceiver,sizeof(CellClass),iterArray[idxLeafs].getCurrentCell(),idxLeafs); alreadySent[idxReceiver]->set(idxLeafs,true); needData = true; } } - #pragma omp critical(CheckToReceive) +#pragma omp critical(CheckToReceive) { if(!alreadySent[idPorcess]->get(cellPositionInArray)){ ++needToReceive; @@ -516,7 +517,6 @@ public: } } if(needData){ - //std::cout << idPorcess << ">>this cell need data " << idxLeafs << " index " << iterArray[idxLeafs].getCurrentGlobalIndex() << " neighborsCounter " << neighborsCounter << std::endl; const int currentCell = idxLeafs - startIdx; unfinishedCells[currentCell] = new LimitCell(); unfinishedCells[currentCell]->counter = neighborsCounter; @@ -524,15 +524,19 @@ public: alreadySent[idPorcess]->set(idxLeafs,true); } else if(neighborsCounter){ - //std::cout << idPorcess << ">>compute directly " << idxLeafs << " index " << iterArray[idxLeafs].getCurrentGlobalIndex() << std::endl; myThreadkernels->M2L( iterArray[idxLeafs].getCurrentCell() , neighbors, neighborsCounter, idxLevel); } } +#pragma omp single nowait + { + FDEBUG(sendCounter.tac()); + } + // received computed data - #pragma omp single +#pragma omp single { - //std::cout << idPorcess << ">>--needToReceive " << (needToReceive) << std::endl; + FDEBUG(receiveCounter.tic()); CellClass tempCell; int source = 0, tag = 0, filled = 0; @@ -543,19 +547,27 @@ public: *iterArray[tag].getCurrentCell() = tempCell; } --needToReceive; - - //std::cout << idPorcess << ">>receive tag " << (tag) << " tempCell.up " << tempCell.getDataUp() << std::endl; } + FDEBUG(receiveCounter.tac()); } - #pragma omp for +#pragma omp single nowait + { + FDEBUG(computationCounter.tic()); + } + +#pragma omp for for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ if(alreadySent[idPorcess]->get(idxLeafs)){ - //std::cout << idPorcess << ">>finish to compute " << idxLeafs << " index " << iterArray[idxLeafs].getCurrentGlobalIndex() << std::endl; myThreadkernels->M2L( iterArray[idxLeafs].getCurrentCell() , unfinishedCells[idxLeafs-startIdx]->neighbors, unfinishedCells[idxLeafs-startIdx]->counter, idxLevel); delete unfinishedCells[idxLeafs-startIdx]; } } + +#pragma omp single nowait + { + FDEBUG(computationCounter.tac()); + } } delete [] unfinishedCells; @@ -567,18 +579,20 @@ public: processBarrier(); } + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); + FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" ); + FDEBUG( FDebug::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" ); } - //print(); - - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( totalComputation = 0 ); { // second L2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + FDEBUG(FTic sendCounter); + FDEBUG(FTic receiveCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); @@ -589,61 +603,50 @@ public: const int idPorcess = processId(); // for each levels exepted leaf level - for(int idxLevel = 2 ; idxLevel <= heightMinusOne ; ++idxLevel ){ - //print(); - - // keep data - /*swapArray(); - this->previousLeft = startIdx; - this->previousRight = endIdx - 1; - this->previousSize = leafs;*/ - - int leafs = 0; - // for each cells - do{ - iterArray[leafs] = octreeIterator; - ++leafs; - } while(octreeIterator.moveRight()); - avoidGotoLeftIterator.moveDown(); - octreeIterator = avoidGotoLeftIterator; + for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ - const int startIdx = getLeft(idPorcess,leafs,nbProcess); - const int endIdx = getRight(idPorcess,leafs,nbProcess); + int leafs = 0; + // for each cells + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; - std::cout << "At level " << idxLevel << " left " << startIdx << " right " << endIdx << std::endl; + const int startIdx = getLeft(idPorcess,leafs,nbProcess); + const int endIdx = getRight(idPorcess,leafs,nbProcess); - const int currentLeft = startIdx; - const int currentRight = endIdx -1; + const int currentLeft = startIdx; + const int currentRight = endIdx -1; - #pragma omp parallel num_threads(FThreadNumbers) +#pragma omp parallel { // send computed data - #pragma omp single nowait +#pragma omp single nowait { + FDEBUG(sendCounter.tic()); const int leftOffset = -leftOffsets[idxLevel]; for(int idxLeafs = 1 ; idxLeafs <= leftOffset ; ++idxLeafs){ const int idxReceiver = getProc((currentLeft-idxLeafs),leafs,nbProcess); sendData(idxReceiver,sizeof(CellClass),iterArray[currentLeft-idxLeafs].getCurrentCell(),currentLeft-idxLeafs); - //std::cout << idPorcess << "\t>>-- sends (1) to " << (idxReceiver) << " index " << (currentLeft-idxLeafs) << std::endl; } const int rightOffset = -rightOffsets[idxLevel]; for(int idxLeafs = 1 ; idxLeafs <= rightOffset ; ++idxLeafs){ const int idxReceiver = getProc((currentRight+idxLeafs),leafs,nbProcess); sendData(idxReceiver,sizeof(CellClass),iterArray[currentRight+idxLeafs].getCurrentCell(),currentRight+idxLeafs); - //std::cout << idPorcess << "\t>>-- sends (2) to " << (idxReceiver) << " index " << (currentRight+idxLeafs) << " currentRight " << currentRight << std::endl; } - //std::cout << idPorcess << ">>--Will send " << (leftOffset) << " and " << (rightOffset) << std::endl; + FDEBUG(sendCounter.tac()); } // received computed data - #pragma omp single +#pragma omp single { + FDEBUG(receiveCounter.tic()); int needToReceive = FMath::Max(0,rightOffsets[idxLevel]) + FMath::Max(0,leftOffsets[idxLevel]); CellClass tempCell; int source = 0, tag = 0, filled = 0; - //std::cout << idPorcess << ">>--needToReceive " << (needToReceive) << std::endl; - while(needToReceive){ receiveData(sizeof(CellClass),&tempCell,&source,&tag,&filled); if(filled){ @@ -651,40 +654,44 @@ public: } --needToReceive; - //std::cout << idPorcess << ">>receive tag " << (tag) << " tempCell.down " << tempCell.getDataDown() << " from " << source << std::endl; } - //std::cout << "all received" << std::endl; + FDEBUG(receiveCounter.tac()); } } - if(idxLevel != heightMinusOne){ - #pragma omp parallel num_threads(FThreadNumbers) + if(idxLevel != heightMinusOne){ + FDEBUG(computationCounter.tic()); +#pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; - #pragma omp for +#pragma omp for for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ myThreadkernels->L2L( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentChild(), idxLevel); } } - } - - processBarrier(); + FDEBUG(computationCounter.tac()); + processBarrier(); + } } - } - //print(); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); + FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" ); + FDEBUG( FDebug::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" ); + } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Direct + ///////////////////////////////////////////////////////////////////////////// + /** P2P */ void directPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); + FDEBUG(FTic counterTime); const int LeafIndex = OctreeHeight - 1; int leafs = 0; @@ -698,7 +705,7 @@ public: } while(octreeIterator.moveRight()); } - FDEBUG(computationCounter.tic()); + FDEBUG(FTic computationCounter); const int nbProcess = processCount(); const int idPorcess = processId(); @@ -706,13 +713,13 @@ public: const int startIdx = getLeft(idPorcess,leafs,nbProcess); const int endIdx = getRight(idPorcess,leafs,nbProcess); - #pragma omp parallel num_threads(FThreadNumbers) +#pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; // There is a maximum of 26 neighbors FList<ParticleClass*>* neighbors[26]; - #pragma omp for +#pragma omp for for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets()); // need the current particles and neighbors particles @@ -722,12 +729,16 @@ public: } FDEBUG(computationCounter.tac()); - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } + ///////////////////////////////////////////////////////////////////////////// + // Test function + ///////////////////////////////////////////////////////////////////////////// + /** This function test the octree to be sure that the fmm algorithm * has worked completly. */ @@ -740,8 +751,6 @@ public: OctreeIterator octreeIteratorValide(valideTree); octreeIteratorValide.gotoBottomLeft(); - //std::cout << "We start at level " << OctreeHeight - 1 << std::endl; - for(int level = OctreeHeight - 1 ; level > 0 ; --level){ int NbLeafs = 0; do{ @@ -752,7 +761,6 @@ public: const int startIdx = getLeft(processId(),NbLeafs,processCount()); const int endIdx = getRight(processId(),NbLeafs,processCount()); // Check that each particle has been summed with all other - //std::cout << "level " << level << " start " << startIdx << " end " << endIdx << std::endl; for(int idx = 0 ; idx < startIdx ; ++idx){ octreeIterator.moveRight(); @@ -828,23 +836,11 @@ public: } std::cout << "Done\n"; - - //print(); - //print(valideTree); - } - - void print(){ - OctreeIterator octreeIterator(tree); - for(int idxLevel = OctreeHeight - 1 ; idxLevel > 1 ; --idxLevel ){ - do{ - std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t"; - } while(octreeIterator.moveRight()); - std::cout << "\n"; - octreeIterator.gotoLeft(); - octreeIterator.moveDown(); - } } + /** To print an octree + * used to debug and understand how the values were passed + */ void print(Octree* const valideTree){ OctreeIterator octreeIterator(valideTree); for(int idxLevel = OctreeHeight - 1 ; idxLevel > 1 ; --idxLevel ){ diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp index e89f29e3b2853491d9a12c8e97ffecca9149dd52..b5a575753d215589d1dd3195b6756527d8c7825b 100644 --- a/Src/Core/FFmmAlgorithmThreadTsm.hpp +++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp @@ -40,13 +40,12 @@ class FFmmAlgorithmThreadTsm : protected FAssertable{ typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel; Octree* const tree; //< The octree to work on - Kernel* kernels[FThreadNumbers]; //< The kernels - - FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time - FDEBUG(FTic computationCounter); //< In case of debug: to count computation time + Kernel** kernels; //< The kernels OctreeIterator* iterArray; + const int MaxThreads; + public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on @@ -54,12 +53,12 @@ public: * An assert is launched if one of the arguments is null */ FFmmAlgorithmThreadTsm(Octree* const inTree, Kernel* const inKernels) - : tree(inTree) , iterArray(0) { + : tree(inTree) , kernels(0), iterArray(0), MaxThreads(omp_get_max_threads()) { assert(tree, "tree cannot be null", __LINE__, __FILE__); - assert(kernels, "kernels cannot be null", __LINE__, __FILE__); - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels = new Kernel*[MaxThreads]; + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); } @@ -68,9 +67,10 @@ public: /** Default destructor */ virtual ~FFmmAlgorithmThreadTsm(){ - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ delete this->kernels[idxThread]; } + delete [] this->kernels; } /** @@ -90,7 +90,7 @@ public: iterArray = new OctreeIterator[leafs]; assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__); - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ this->kernels[idxThread]->init(); } @@ -111,7 +111,7 @@ public: void bottomPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); - FDEBUG( counterTime.tic() ); + FDEBUG( FTic counterTime ); OctreeIterator octreeIterator(tree); int leafs = 0; @@ -122,8 +122,8 @@ public: ++leafs; } while(octreeIterator.moveRight()); - FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + FDEBUG(FTic computationCounter); + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -152,8 +152,8 @@ public: void upwardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); // Start from leal level - 1 OctreeIterator octreeIterator(tree); @@ -173,7 +173,7 @@ public: octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft(); FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -199,23 +199,23 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } FDEBUG( counterTime.tac() ); FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } /** M2L L2L */ void downardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); { // first M2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); OctreeIterator avoidGotoLeftIterator(octreeIterator); @@ -232,7 +232,7 @@ public: octreeIterator = avoidGotoLeftIterator; FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; CellClass* neighbors[208]; @@ -259,17 +259,16 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( totalComputation = 0 ); { // second L2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); @@ -288,7 +287,7 @@ public: octreeIterator = avoidGotoLeftIterator; FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -308,13 +307,11 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } @@ -322,7 +319,7 @@ public: void directPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); + FDEBUG(FTic counterTime); int leafs = 0; { @@ -336,8 +333,8 @@ public: } const int heightMinusOne = OctreeHeight - 1; - FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + FDEBUG(FTic computationCounter); + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; // There is a maximum of 26 neighbors @@ -354,7 +351,7 @@ public: FDEBUG(computationCounter.tac()); FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } diff --git a/Src/Core/FFmmAlgorithmThreadUs.hpp b/Src/Core/FFmmAlgorithmThreadUs.hpp index c8bd5a14d4a7834993ea6e8e9d6c2331a83eccd0..5d73dd677e167117fec68a9bd3240014aae6c002 100644 --- a/Src/Core/FFmmAlgorithmThreadUs.hpp +++ b/Src/Core/FFmmAlgorithmThreadUs.hpp @@ -40,13 +40,12 @@ class FFmmAlgorithmThreadUs : protected FAssertable{ typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel; Octree* const tree; //< The octree to work on - Kernel* kernels[FThreadNumbers]; //< The kernels - - FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time - FDEBUG(FTic computationCounter); //< In case of debug: to count computation time + Kernel** kernels; //< The kernels OctreeIterator* iterArray; + const int MaxThreads; + public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on @@ -54,12 +53,12 @@ public: * An assert is launched if one of the arguments is null */ FFmmAlgorithmThreadUs(Octree* const inTree, Kernel* const inKernels) - : tree(inTree) , iterArray(0) { + : tree(inTree), kernels(0), iterArray(0), MaxThreads(omp_get_max_threads()) { assert(tree, "tree cannot be null", __LINE__, __FILE__); - assert(kernels, "kernels cannot be null", __LINE__, __FILE__); - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels = new Kernel*[MaxThreads]; + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); } @@ -68,9 +67,10 @@ public: /** Default destructor */ virtual ~FFmmAlgorithmThreadUs(){ - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ delete this->kernels[idxThread]; } + delete [] this->kernels; } /** @@ -90,7 +90,7 @@ public: iterArray = new OctreeIterator[leafs]; assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__); - for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ this->kernels[idxThread]->init(); } @@ -111,7 +111,7 @@ public: void bottomPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); - FDEBUG( counterTime.tic() ); + FDEBUG(FTic counterTime); OctreeIterator octreeIterator(tree); int leafs = 0; @@ -122,8 +122,8 @@ public: ++leafs; } while(octreeIterator.moveRight()); - FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + FDEBUG(FTic computationCounter); + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -135,8 +135,7 @@ public: } FDEBUG(computationCounter.tac()); - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } @@ -145,8 +144,8 @@ public: void upwardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); // Start from leal level - 1 OctreeIterator octreeIterator(tree); @@ -166,7 +165,7 @@ public: octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft(); FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -177,23 +176,22 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } /** M2L L2L */ void downardPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( double totalComputation = 0 ); { // first M2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); OctreeIterator avoidGotoLeftIterator(octreeIterator); @@ -210,7 +208,7 @@ public: octreeIterator = avoidGotoLeftIterator; FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; CellClass* neighbors[208]; @@ -221,17 +219,16 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); - FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); - FDEBUG( totalComputation = 0 ); { // second L2L + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG(FTic counterTime); + FDEBUG(FTic computationCounter); + OctreeIterator octreeIterator(tree); octreeIterator.moveDown(); @@ -250,7 +247,7 @@ public: octreeIterator = avoidGotoLeftIterator; FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; #pragma omp for @@ -259,13 +256,11 @@ public: } } FDEBUG(computationCounter.tac()); - FDEBUG(totalComputation += computationCounter.elapsed()); } + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); } - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); - FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } @@ -273,7 +268,7 @@ public: void directPass(){ FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); - FDEBUG( counterTime.tic() ); + FDEBUG(FTic counterTime); int leafs = 0; { @@ -287,8 +282,8 @@ public: } const int heightMinusOne = OctreeHeight - 1; - FDEBUG(computationCounter.tic()); - #pragma omp parallel num_threads(FThreadNumbers) + FDEBUG(FTic computationCounter); + #pragma omp parallel { Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; // There is a maximum of 26 neighbors @@ -304,8 +299,7 @@ public: } FDEBUG(computationCounter.tac()); - FDEBUG( counterTime.tac() ); - FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); } diff --git a/Src/ScalFmmConfig.h.cmake b/Src/ScalFmmConfig.h.cmake index b0b9f85ca0ec4efbdb1567fb8fb5d3040458c529..26775a7adc17494cec0caffc6f4fec0166f58ada 100644 --- a/Src/ScalFmmConfig.h.cmake +++ b/Src/ScalFmmConfig.h.cmake @@ -1,6 +1,17 @@ #ifndef SSCALFMMCONFIG_H #define SSCALFMMCONFIG_H -#cmakedefine FUSE_MKL_AS_BLAS +/////////////////////////////////////////////////////// +// Blas +/////////////////////////////////////////////////////// + +#cmakedefine SCALFMM_USE_CBLAS +#cmakedefine SCALFMM_USE_MKL_AS_BLAS + +/////////////////////////////////////////////////////// +// MPI +/////////////////////////////////////////////////////// + +#cmakedefine SCALFMM_USE_MPI #endif // CONFIG_H diff --git a/Src/Utils/FGlobal.hpp b/Src/Utils/FGlobal.hpp index c170160fa7cb6274d01b9e1beba6e13b7d27642f..aa8397c46795ba2262d31068d585e4a00dac6786 100644 --- a/Src/Utils/FGlobal.hpp +++ b/Src/Utils/FGlobal.hpp @@ -27,18 +27,6 @@ // Uncomment the next line to use trace mode //#define SCALFMM_USE_TRACE -/////////////////////////////////////////////////////// -// MPI -/////////////////////////////////////////////////////// - -#define SCALFMM_USE_MPI - -/////////////////////////////////////////////////////// -// Threads -/////////////////////////////////////////////////////// - -static const int FThreadNumbers = 1; - /////////////////////////////////////////////////////// // Types /////////////////////////////////////////////////////// diff --git a/Src/Utils/FTic.hpp b/Src/Utils/FTic.hpp index 51fe87db19fb833cfdb56e3e7175e71157edde1f..2a990aba46e6a26eb13b4b953867dd657c4c389d 100644 --- a/Src/Utils/FTic.hpp +++ b/Src/Utils/FTic.hpp @@ -29,12 +29,14 @@ */ class FTic { private: - double start; //< start time (tic) - double end; //< stop time (tac) + double start; //< start time (tic) + double end; //< stop time (tac) + double cumulate; //< the cumulate time public: /** Constructor */ - FTic() : start(0.0), end(0.0) { + FTic() : start(0.0), end(0.0), cumulate(0.0) { + tic(); } /** Tic : start <= current time */ @@ -45,6 +47,7 @@ public: /** Tac : end <= current time */ void tac(){ this->end = FTic::GetTime(); + cumulate += elapsed(); } /** Return end - start @@ -53,6 +56,19 @@ public: return this->end - this->start; } + /** Return cumulate + * @return the time elapsed between ALL tic & tac in second */ + double cumulated() const{ + return cumulate; + } + + /** Return end - start + * @return the time elapsed between tic & tac in second */ + double tacAndElapsed() { + tac(); + return elapsed(); + } + /** Global get time * @return a global time * GetTickCount on windows diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 3fa605710327cbef8405e48aa2ab8dbf16d8b3f4..d4d6aaaf75b03327bc505fc45f5c3b216bc4e241 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -25,7 +25,7 @@ file( # Adding the project sources dir as an include dir INCLUDE_DIRECTORIES( - ${CMAKE_BINARY_DIR}/Sources + ${CMAKE_BINARY_DIR}/Src ) # Add execs - 1 cpp = 1 exec diff --git a/Tests/testTic.cpp b/Tests/testTic.cpp index 872876b0afb4791517ae780679d9dd9a26587c6e..da77a545d2c80deda3c22837e8f70aab79bebd76 100644 --- a/Tests/testTic.cpp +++ b/Tests/testTic.cpp @@ -15,16 +15,31 @@ int main(){ std::cout << ">> It is only interesting to wath the code to understand\n"; std::cout << ">> how to use FTic time counter.\n"; ////////////////////////////////////////////////////////////// - + { FTic counter; - counter.tic(); usleep(1500000); //Sleep(1500); //on windows counter.tac(); - std::cout << counter.elapsed() << " (s)\n"; - - return 0; + } + { + FTic counter; + usleep(1500000); + //Sleep(1500); //on windows + std::cout << counter.tacAndElapsed() << " (s)\n"; + } + { + FTic counter; + usleep(1500000); + //Sleep(1500); //on windows + counter.tac(); + counter.tic(); + usleep(1500000); + //Sleep(1500); //on windows + std::cout << counter.tacAndElapsed() << " (s)\n"; + std::cout << counter.cumulated() << " (s)\n"; + } + return 0; } diff --git a/UTests/CMakeLists.txt b/UTests/CMakeLists.txt index f705b02065cd3123c4145199faea3a4559328801..41deb71ffefe66b8486d09deb2e87656a3ca9df9 100644 --- a/UTests/CMakeLists.txt +++ b/UTests/CMakeLists.txt @@ -22,7 +22,7 @@ file( # Adding the project sources dir as an include dir INCLUDE_DIRECTORIES( - ${CMAKE_BINARY_DIR}/Sources + ${CMAKE_BINARY_DIR}/Src ) # Add execs - 1 cpp = 1 exec