diff --git a/CMakeLists.txt b/CMakeLists.txt index d19c1b840a5d66ade774f58240f8a03cbec8d83a..0de5e1ec4cc4120af766a39d5dc25e809e11808a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,6 @@ MESSAGE(STATUS " CXX ${CMAKE_CXX_COMPILER_ID}" ) # Options OPTION( ScalFMM_USE_BLAS "Set to ON to build ScaFMM with BLAS" OFF ) OPTION( ScalFMM_USE_FFT "Set to ON to build ScaFMM with FFTW" OFF ) -OPTION( ScalFMM_USE_TRACE "Set to ON to print trace or use itac trace" OFF ) OPTION( ScalFMM_BUILD_TESTS "Set to ON to build functionalities Tests" OFF ) OPTION( ScalFMM_BUILD_UTESTS "Set to ON to build UTests" OFF ) OPTION( ScalFMM_BUILD_DEBUG "Set to ON to build in Debug" OFF ) @@ -95,7 +94,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #-Wshadow -Wpointer-arith -Wcast-qual -Wconversion -Wall -Wnosign-conversion ") else() # NOT INTEL - SET(ScaLFMM_CXX_FLAGS "${ScaLFMM_CXX_FLAGS} -Wzero-as-null-pointer-constant") + if(NOT ${ScalFMM_USE_MPI}) + SET(ScaLFMM_CXX_FLAGS "${ScaLFMM_CXX_FLAGS} -Wzero-as-null-pointer-constant") + endif() SET(AVX_FLAGS "-mtune=native -march=native") IF (APPLE) SET(SSE_FLAGS "-msse3 -mfpmath=sse") # -mtune=native -march=native @@ -160,16 +161,7 @@ if( ScalFMM_ATTACHE_SOURCE ) # ADD_DEFINITIONS(-g) SET(ScaLFMM_CXX_FLAGS "${ScaLFMM_CXX_FLAGS} -g") endif(ScalFMM_ATTACHE_SOURCE) -############################################################################## -# Trace # -############################################################################## -MESSAGE( STATUS "ScalFMM_USE_TRACE = ${ScalFMM_USE_TRACE}" ) -if( ScalFMM_USE_TRACE ) - OPTION( ScalFMM_USE_ITAC "Set to ON to use itac trace" OFF ) - if( ScaLFMM_USE_ITAC ) - ADD_DEFINITIONS(-I$VT_ROOT/include -trace) - endif() -endif() + ############################################################################## # Blas option # ############################################################################## diff --git a/Src/AdaptiveTree/FAdaptChebSymKernel.hpp b/Src/AdaptiveTree/FAdaptChebSymKernel.hpp index eea36754dba2ddce7ff63ba84cc3b51197973274..3e465abc1d56f03f1051edd45191271fc93d9bb5 100755 --- a/Src/AdaptiveTree/FAdaptChebSymKernel.hpp +++ b/Src/AdaptiveTree/FAdaptChebSymKernel.hpp @@ -16,7 +16,7 @@ // =================================================================================== #include "Utils/FGlobal.hpp" -#include "Utils/FTrace.hpp" + #include "Utils/FPoint.hpp" #include "Adaptative/FAdaptiveCell.hpp" diff --git a/Src/AdaptiveTree/FAdaptSeqAlgorithm.hpp b/Src/AdaptiveTree/FAdaptSeqAlgorithm.hpp index 50d109ec55274197ff57a5a1f0f739109fecf2bb..12420f99c314e5b87fbafc8c25c8369387d0f40e 100755 --- a/Src/AdaptiveTree/FAdaptSeqAlgorithm.hpp +++ b/Src/AdaptiveTree/FAdaptSeqAlgorithm.hpp @@ -20,7 +20,7 @@ #include "Utils/FGlobal.hpp" #include "Utils/FAssert.hpp" #include "Utils/FLog.hpp" -#include "Utils/FTrace.hpp" + #include "Utils/FTic.hpp" #include "Containers/FOctree.hpp" @@ -70,9 +70,7 @@ public: * To execute the fmm algorithm * Call this function to run the complete algorithm */ - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - + void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ if(operationsToProceed & FFmmP2M) bottomPass(); if(operationsToProceed & FFmmM2M) upwardPass(); @@ -90,8 +88,7 @@ private: ///////////////////////////////////////////////////////////////////////////// /** P2M */ - void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); + void bottomPass(){ FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -125,8 +122,7 @@ private: ///////////////////////////////////////////////////////////////////////////// /** M2M */ - void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); + void upwardPass(){ FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -205,9 +201,7 @@ private: ///////////////////////////////////////////////////////////////////////////// /** M2L */ - void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - + void transferPass(){ FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -299,8 +293,7 @@ private: ///////////////////////////////////////////////////////////////////////////// /** L2L */ - void downardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); + void downardPass(){ FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter ); @@ -360,8 +353,7 @@ private: ///////////////////////////////////////////////////////////////////////////// /** P2P */ - void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); + void directPass(){ FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounterL2P); diff --git a/Src/AdaptiveTree/FAdaptUnifKernel.hpp b/Src/AdaptiveTree/FAdaptUnifKernel.hpp index cd3c021a4908e415d5ac7db82d2f31a340bedd2e..5bd70ae319a4aeadaf25dc6a0a16c50a8ec58e8b 100755 --- a/Src/AdaptiveTree/FAdaptUnifKernel.hpp +++ b/Src/AdaptiveTree/FAdaptUnifKernel.hpp @@ -16,7 +16,7 @@ // =================================================================================== #include "Utils/FGlobal.hpp" -#include "Utils/FTrace.hpp" + #include "Utils/FPoint.hpp" #include "Adaptative/FAdaptiveCell.hpp" diff --git a/Src/Components/FTestKernels.hpp b/Src/Components/FTestKernels.hpp index bed6b0486612faa04644daf2649a3ff42480858b..c705f0a1b73fab9b7320f4301b1087be591ef8a8 100755 --- a/Src/Components/FTestKernels.hpp +++ b/Src/Components/FTestKernels.hpp @@ -22,7 +22,7 @@ #include "FAbstractKernels.hpp" #include "../Containers/FOctree.hpp" #include "../Utils/FGlobal.hpp" -#include "../Utils/FTrace.hpp" + /** diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp index f180110d035d78a7cc86ac79024fb7deb45c0440..cc275c215a418f5ce76e0994d4edca2c73dd5ba0 100755 --- a/Src/Core/FFmmAlgorithm.hpp +++ b/Src/Core/FFmmAlgorithm.hpp @@ -20,7 +20,7 @@ #include "../Utils/FGlobal.hpp" #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Containers/FOctree.hpp" @@ -71,8 +71,6 @@ public: * Call this function to run the complete algorithm */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - if(operationsToProceed & FFmmP2M) bottomPass(); if(operationsToProceed & FFmmM2M) upwardPass(); @@ -91,7 +89,6 @@ private: /** P2M */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -118,7 +115,6 @@ private: /** M2M */ void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -160,8 +156,6 @@ private: /** M2L */ void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -204,7 +198,6 @@ private: /** L2L */ void downardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter ); @@ -244,7 +237,6 @@ private: /** P2P */ void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounterL2P); diff --git a/Src/Core/FFmmAlgorithmPeriodic.hpp b/Src/Core/FFmmAlgorithmPeriodic.hpp index f56b53d9872c06d264bda5180da906013fed3937..8ffa9e4823f9ba27ff3cfd700c48fe255d1f21a6 100755 --- a/Src/Core/FFmmAlgorithmPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmPeriodic.hpp @@ -6,7 +6,7 @@ #include "../Utils/FGlobalPeriodic.hpp" #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Utils/FMemUtils.hpp" @@ -69,7 +69,6 @@ public: */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ FAssertLF(kernels, "kernels cannot be null"); - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); if(operationsToProceed & FFmmP2M) bottomPass(); @@ -93,7 +92,6 @@ public: /** P2M */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -120,7 +118,6 @@ public: /** M2M */ void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -161,8 +158,6 @@ public: /** M2L L2L */ void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -201,7 +196,6 @@ public: void downardPass(){ // second L2L - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter ); @@ -239,7 +233,6 @@ public: /** P2P */ void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounterL2P); @@ -513,7 +506,6 @@ public: * Finally the L2L */ void processPeriodicLevels(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Periodic Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp index e044bb8fef67b672b08f9c9494677e57f720a553..99c14d5cd05aaf6d751912be7586578ad102bf5e 100755 --- a/Src/Core/FFmmAlgorithmSectionTask.hpp +++ b/Src/Core/FFmmAlgorithmSectionTask.hpp @@ -20,7 +20,7 @@ #include "../Utils/FGlobal.hpp" #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Containers/FOctree.hpp" @@ -84,7 +84,6 @@ public: * Call this function to run the complete algorithm */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); #pragma omp parallel { @@ -119,7 +118,6 @@ private: /** P2M */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG(FTic counterTime); @@ -148,7 +146,6 @@ private: /** M2M */ void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -189,8 +186,6 @@ private: /** M2L L2L */ void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -241,7 +236,6 @@ private: ///////////////////////////////////////////////////////////////////////////// void downardPass(){ // second L2L - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -280,7 +274,6 @@ private: /** P2P */ void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -330,7 +323,6 @@ private: } void L2PPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart L2P Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp index 8419122c1b12857c746f23335819d9f551290678..176eeea24f500f1fb6ba26bd0712acb73645df48 100755 --- a/Src/Core/FFmmAlgorithmTask.hpp +++ b/Src/Core/FFmmAlgorithmTask.hpp @@ -20,7 +20,7 @@ #include "../Utils/FGlobal.hpp" #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Containers/FOctree.hpp" @@ -84,7 +84,6 @@ public: * Call this function to run the complete algorithm */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); if(operationsToProceed & FFmmP2M) bottomPass(); @@ -104,7 +103,6 @@ private: /** P2M */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG(FTic counterTime); @@ -138,7 +136,6 @@ private: /** M2M */ void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -185,7 +182,6 @@ private: /** M2L L2L */ void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -241,7 +237,6 @@ private: ///////////////////////////////////////////////////////////////////////////// void downardPass(){ // second L2L - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -286,7 +281,6 @@ private: /** P2P */ void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp index d8118067b45186fd2bb7102c8cede999576a9d29..683c3f74b0926474f9b4f0e0e11337a8c0d47344 100755 --- a/Src/Core/FFmmAlgorithmThread.hpp +++ b/Src/Core/FFmmAlgorithmThread.hpp @@ -19,7 +19,7 @@ #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Utils/FGlobal.hpp" @@ -92,7 +92,6 @@ public: * Call this function to run the complete algorithm */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ this->shapeLeaf[idxShape] = 0; @@ -132,7 +131,6 @@ private: /** P2M */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG(FTic counterTime); @@ -169,7 +167,6 @@ private: /** M2M */ void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -220,7 +217,6 @@ private: /** M2L L2L */ void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -269,7 +265,6 @@ private: ///////////////////////////////////////////////////////////////////////////// void downardPass(){ // second L2L - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -318,7 +313,6 @@ private: /** P2P */ void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index 1ab56ac5e37fa4254b5c6342c06fc2689dfc4131..60ec601fd55a4770ffe3ab0377de46593530b707 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -21,7 +21,7 @@ // #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Utils/FGlobal.hpp" @@ -167,12 +167,9 @@ public: * Call this function to run the complete algorithm */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace( __FUNCTION__, "Fmm" , __FILE__ , __LINE__ ) ); // Count leaf this->numberOfLeafs = 0; { - FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) ); - Interval myFullInterval; {//Building the interval with the first and last leaves (and count the number of leaves) typename OctreeClass::Iterator octreeIterator(tree); @@ -252,7 +249,7 @@ public: if(operationsToProceed & FFmmL2L) downardPass(); - if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPassOld(); + if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass(); // delete array @@ -274,7 +271,6 @@ private: * It is similar to multi threaded version. */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -288,12 +284,12 @@ private: } while(octreeIterator.moveRight()); FLOG(computationCounter.tic()); - #pragma omp parallel +#pragma omp parallel { // Each thread get its own kernel KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; // Parallel iteration on the leaves - #pragma omp for nowait +#pragma omp for nowait for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){ myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc()); } @@ -310,7 +306,6 @@ private: /** M2M */ void upwardPass(){ const int MaxSizePerCell = CellClass::GetSize(); - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -384,12 +379,12 @@ private: } FLOG(parallelCounter.tic()); - #pragma omp parallel +#pragma omp parallel { const int threadNumber = omp_get_thread_num(); KernelClass* myThreadkernels = (kernels[threadNumber]); //This single section post and receive the comms, and then do the M2M associated with it. - #pragma omp single nowait +#pragma omp single nowait { FLOG(singleCounter.tic()); // Master proc never send @@ -485,7 +480,7 @@ private: }//End Of Single section // All threads proceed the M2M - #pragma omp for nowait +#pragma omp for nowait for( int idxCell = nbCellsToSkip ; idxCell < nbCellsForThreads ; ++idxCell){ myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel); } @@ -508,8 +503,6 @@ private: void transferPass(){ const int MaxSizePerCell = CellClass::GetSize(); - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -542,12 +535,11 @@ private: FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight]; memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight); - #pragma omp parallel +#pragma omp parallel { - #pragma omp master +#pragma omp master { { - FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) ); FLOG(prepareCounter.tic()); std::unique_ptr iterArrayLocal(new typename OctreeClass::Iterator[numberOfLeafs]); @@ -696,7 +688,7 @@ private: KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; const CellClass* neighbors[343]; - #pragma omp single nowait +#pragma omp single nowait { typename OctreeClass::Iterator octreeIterator(tree); octreeIterator.moveDown(); @@ -726,7 +718,7 @@ private: { const int chunckSize = FMath::Max(1, numberOfCells/(omp_get_num_threads()*omp_get_num_threads())); for(int idxCell = 0 ; idxCell < numberOfCells ; idxCell += chunckSize){ - #pragma omp task +#pragma omp task { const int nbCellToCompute = FMath::Min(chunckSize, numberOfCells-idxCell); for(int idxCellToCompute = idxCell ; idxCellToCompute < idxCell+nbCellToCompute ; ++idxCellToCompute){ @@ -737,10 +729,10 @@ private: } } - #pragma omp taskwait +#pragma omp taskwait for(int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){ - #pragma omp task +#pragma omp task { kernels[idxThread]->finishedLevelM2L(idxLevel); } @@ -753,7 +745,6 @@ private: { - FTRACE( FTrace::FRegion regionTrace("Compute Received data", __FUNCTION__ , __FILE__ , __LINE__) ); FLOG(receiveCounter.tic()); typename OctreeClass::Iterator octreeIterator(tree); octreeIterator.moveDown(); @@ -807,14 +798,14 @@ private: // Compute this cells FLOG(computationCounter.tic()); - #pragma omp parallel +#pragma omp parallel { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; MortonIndex neighborsIndex[189]; int neighborsPosition[189]; const CellClass* neighbors[343]; - #pragma omp for schedule(static) nowait +#pragma omp for schedule(static) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // compute indexes memset(neighbors, 0, 343 * sizeof(CellClass*)); @@ -877,7 +868,6 @@ private: void downardPass(){ // second L2L const int MaxSizePerCell = CellClass::GetSize(); - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -937,18 +927,18 @@ private: } } - #pragma omp parallel +#pragma omp parallel { int threadNumber = omp_get_thread_num(); KernelClass* myThreadkernels = (kernels[threadNumber]); - #pragma omp single +#pragma omp single nowait { FLOG(prepareCounter.tic()); int iterRequests = 0; // Post the receive if(hasToReceive){ FMpi::MpiAssert( MPI_Irecv( recvBuffer.data(), recvBuffer.getCapacity(), MPI_PACKED, idxProcToReceive, - FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__ ); + FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__ ); } // We have to be sure that we are not sending if we have no work in the current level @@ -967,7 +957,7 @@ private: } // Post the send message FMpi::MpiAssert( MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, idxProcSend, - FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__); + FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__); // Inc and check the counter nbMessageSent += 1; FAssertLF(nbMessageSent <= 7); @@ -995,12 +985,12 @@ private: FLOG(prepareCounter.tac()); } - #pragma omp single nowait +#pragma omp single nowait { FLOG(computationCounter.tic()); } // Threads are working on all the cell of our working interval at that level - #pragma omp for nowait +#pragma omp for nowait for(int idxCell = nbCellsToSkip ; idxCell < totalNbCellsAtLevel ; ++idxCell){ myThreadkernels->L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel); } @@ -1033,13 +1023,13 @@ private: /** P2P */ - void directPassOld(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); + void directPass(){ FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG( FTic counterTime); FLOG( FTic prepareCounter); FLOG( FTic gatherCounter); FLOG( FTic waitCounter); + FLOG(FTic computationCounter); /////////////////////////////////////////////////// // Prepare data to send receive @@ -1059,129 +1049,17 @@ private: memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess); /* This a nbProcess x nbProcess matrix of integer - * let U and V be id of processes : - * globalReceiveMap[U*nbProcess + V] == size of information needed by V and own by U - */ + * let U and V be id of processes : + * globalReceiveMap[U*nbProcess + V] == size of information needed by V and own by U + */ int*const globalReceiveMap = new int[nbProcess * nbProcess]; memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess); FBoolArray leafsNeedOther(this->numberOfLeafs); int countNeedOther = 0; - { - FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) ); - // Copy leafs - { - typename OctreeClass::Iterator octreeIterator(tree); - octreeIterator.gotoBottomLeft(); - int idxLeaf = 0; - do{ - this->iterArray[idxLeaf++] = octreeIterator; - } while(octreeIterator.moveRight()); - } - - // Number of cells max - //const int limite = 1 << (this->OctreeHeight - 1); - // pointer to send - FVector*const toSend = new FVector[nbProcess]; - - // array that will be send to other processus for them to build the globalReceiveMap - int partsToSend[nbProcess]; - memset(partsToSend, 0, sizeof(int) * nbProcess); - - // To know if a leaf has been already sent to a proc - int alreadySent[nbProcess]; - - //Will store the indexes of the neighbors of current cell - MortonIndex indexesNeighbors[26]; - //Obviously unused - //int uselessIndexArray[26]; - - for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){ - memset(alreadySent, 0, sizeof(int) * nbProcess); - bool needOther = false; - //Get the neighbors of current cell in indexesNeighbors, and their number in neighCount - const int neighCount = (iterArray[idxLeaf].getCurrentGlobalCoordinate()).getNeighborsIndexes(OctreeHeight,indexesNeighbors); - //Loop over the neighbor leafs - for(int idxNeigh = 0 ; idxNeigh < neighCount ; ++idxNeigh){ - //Test if leaf belongs to someone else (false if it's mine) - if(indexesNeighbors[idxNeigh] < (intervals[idProcess].leftIndex) || (intervals[idProcess].rightIndex) < indexesNeighbors[idxNeigh]){ - needOther = true; - - // find the proc that will need current leaf - int procToReceive = idProcess; - while( procToReceive != 0 && indexesNeighbors[idxNeigh] < intervals[procToReceive].leftIndex){ - --procToReceive; //scroll process "before" current process - } - - while( procToReceive != nbProcess - 1 && (intervals[procToReceive].rightIndex) < indexesNeighbors[idxNeigh]){ - ++procToReceive;//scroll process "after" current process - } - // Test : Not Already Send && USELESS TEST ? - if( !alreadySent[procToReceive] && intervals[procToReceive].leftIndex <= indexesNeighbors[idxNeigh] && indexesNeighbors[idxNeigh] <= intervals[procToReceive].rightIndex){ - - alreadySent[procToReceive] = 1; - toSend[procToReceive].push( iterArray[idxLeaf] ); - partsToSend[procToReceive] += iterArray[idxLeaf].getCurrentListSrc()->getSavedSize(); - partsToSend[procToReceive] += int(sizeof(MortonIndex)); - } - } - } - - if(needOther){ //means that something need to be sent (or received) - leafsNeedOther.set(idxLeaf,true); - ++countNeedOther; - } - } - - // No idea why it is mandatory there, could it be a few line before, - for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ - if(partsToSend[idxProc]){ - partsToSend[idxProc] += int(sizeof(int)); - } - } - - //Share to all processus globalReceiveMap - FLOG(gatherCounter.tic()); - FMpi::MpiAssert( MPI_Allgather( partsToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, comm.getComm()), __LINE__ ); - FLOG(gatherCounter.tac()); - - //Prepare receive - for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ - if(globalReceiveMap[idxProc * nbProcess + idProcess]){ //if idxProc has sth for me. - //allocate buffer of right size - recvBuffer[idxProc] = new FMpiBufferReader(comm.getComm(),globalReceiveMap[idxProc * nbProcess + idProcess]); - FMpi::MpiAssert( MPI_Irecv(recvBuffer[idxProc]->data(), recvBuffer[idxProc]->getCapacity(), MPI_PACKED, - idxProc, FMpi::TagFmmP2P, comm.getComm(), &requests[iterRequest++]) , __LINE__ ); - } - } - - nbMessagesToRecv = iterRequest; - // Prepare send - for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ - if(toSend[idxProc].getSize() != 0){ - sendBuffer[idxProc] = new FMpiBufferWriter(comm.getComm(),globalReceiveMap[idProcess*nbProcess+idxProc]); - // << is equivalent to write(). - (*sendBuffer[idxProc]) << toSend[idxProc].getSize(); - for(int idxLeaf = 0 ; idxLeaf < toSend[idxProc].getSize() ; ++idxLeaf){ - (*sendBuffer[idxProc]) << toSend[idxProc][idxLeaf].getCurrentGlobalIndex(); - toSend[idxProc][idxLeaf].getCurrentListSrc()->save(*sendBuffer[idxProc]); - } - //TEST BERENGER - //if(sendBuffer[idxProc]->getSize() != partsToSend[idxProc]){ - FMpi::MpiAssert( MPI_Isend( sendBuffer[idxProc]->data(), sendBuffer[idxProc]->getSize() , MPI_PACKED , - idxProc, FMpi::TagFmmP2P, comm.getComm(), &requests[iterRequest++]) , __LINE__ ); - - } - } - - delete[] toSend; - } - FLOG(prepareCounter.tac()); - - /////////////////////////////////////////////////// - // Prepare data for thread P2P - /////////////////////////////////////////////////// + // To store the result + OctreeClass otherP2Ptree( tree->getHeight(), tree->getSubHeight(), tree->getBoxWidth(), tree->getBoxCenter() ); // init const int LeafIndex = OctreeHeight - 1; @@ -1194,303 +1072,14 @@ private: FVector leafsNeedOtherData(countNeedOther); - // split data - { - FTRACE( FTrace::FRegion regionTrace( "Split" , __FUNCTION__ , __FILE__ , __LINE__) ); - - typename OctreeClass::Iterator octreeIterator(tree); - octreeIterator.gotoBottomLeft(); - - // to store which shape for each leaf - typename OctreeClass::Iterator* const myLeafs = new typename OctreeClass::Iterator[this->numberOfLeafs]; - int*const shapeType = new int[this->numberOfLeafs]; - - for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){ - myLeafs[idxLeaf] = octreeIterator; - - const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); - const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); - shapeType[idxLeaf] = shape; - - ++shapeLeaf[shape]; - - octreeIterator.moveRight(); - } - - int startPosAtShape[SizeShape]; - startPosAtShape[0] = 0; - for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){ - startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaf[idxShape-1]; - } - - int idxInArray = 0; - for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf, ++idxInArray){ - const int shapePosition = shapeType[idxInArray]; - - leafsDataArray[startPosAtShape[shapePosition]].coord = myLeafs[idxInArray].getCurrentGlobalCoordinate(); - leafsDataArray[startPosAtShape[shapePosition]].cell = myLeafs[idxInArray].getCurrentCell(); - leafsDataArray[startPosAtShape[shapePosition]].targets = myLeafs[idxInArray].getCurrentListTargets(); - leafsDataArray[startPosAtShape[shapePosition]].sources = myLeafs[idxInArray].getCurrentListSrc(); - if( leafsNeedOther.get(idxLeaf) ) leafsNeedOtherData.push(leafsDataArray[startPosAtShape[shapePosition]]); - - ++startPosAtShape[shapePosition]; - } - - delete[] shapeType; - delete[] myLeafs; - } - - - ////////////////////////////////////////////////////////// - // Computation P2P that DO NOT need others data - ////////////////////////////////////////////////////////// - FTRACE( FTrace::FRegion regionP2PTrace("Compute P2P", __FUNCTION__ , __FILE__ , __LINE__) ); - - FLOG(FTic computationCounter); - -#pragma omp parallel - { - KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]); - // There is a maximum of 26 neighbors - ContainerClass* neighbors[27]; - int previous = 0; - - for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ - const int endAtThisShape = shapeLeaf[idxShape] + previous; - -#pragma omp for - for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){ - LeafData& currentIter = leafsDataArray[idxLeafs]; - myThreadkernels.L2P(currentIter.cell, currentIter.targets); - - // need the current particles and neighbors particles - const int counter = tree->getLeafsNeighbors(neighbors, currentIter.coord, LeafIndex); - myThreadkernels.P2P( currentIter.coord,currentIter.targets, - currentIter.sources, neighbors, counter); - - } - previous = endAtThisShape; - } - } - FLOG(computationCounter.tac()); - FTRACE( regionP2PTrace.end() ); - - ////////////////////////////////////////////////////////// - // Waitsend receive - ////////////////////////////////////////////////////////// - - FLOG(FTic computation2Counter); - - // Create an octree with leaves from others - OctreeClass otherP2Ptree( tree->getHeight(), tree->getSubHeight(), tree->getBoxWidth(), tree->getBoxCenter() ); - int complete = 0; - int*const indexMessage = new int[nbProcess * 2]; - while( complete != iterRequest){ - memset(indexMessage, 0, sizeof(int) * nbProcess * 2); - int countMessages = 0; - // Wait data - FLOG(waitCounter.tic()); - MPI_Waitsome(iterRequest, requests, &countMessages, indexMessage, status); - - FLOG(waitCounter.tac()); - complete += countMessages; - - - for(int idxRcv = 0 ; idxRcv < countMessages ; ++idxRcv){ - if( indexMessage[idxRcv] < nbMessagesToRecv ){ - const int idxProc = status[idxRcv].MPI_SOURCE; - int nbLeaves; - (*recvBuffer[idxProc]) >> nbLeaves; - for(int idxLeaf = 0 ; idxLeaf < nbLeaves ; ++idxLeaf){ - MortonIndex leafIndex; - (*recvBuffer[idxProc]) >> leafIndex; - otherP2Ptree.createLeaf(leafIndex)->getSrc()->restore((*recvBuffer[idxProc])); - } - delete recvBuffer[idxProc]; - recvBuffer[idxProc] = nullptr; - } - } - } - delete[] indexMessage; - - ////////////////////////////////////////////////////////// - // Computation P2P that need others data - ////////////////////////////////////////////////////////// - - FTRACE( FTrace::FRegion regionOtherTrace("Compute P2P Other", __FUNCTION__ , __FILE__ , __LINE__) ); - FLOG( computation2Counter.tic() ); - -#pragma omp parallel - { - KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]); - // There is a maximum of 26 neighbors - ContainerClass* neighbors[27]; - MortonIndex indexesNeighbors[27]; - int indexArray[26]; - // Box limite - const int nbLeafToProceed = leafsNeedOtherData.getSize(); - -#pragma omp for - for(int idxLeafs = 0 ; idxLeafs < nbLeafToProceed ; ++idxLeafs){ - LeafData currentIter = leafsNeedOtherData[idxLeafs]; - - // need the current particles and neighbors particles - int counter = 0; - memset( neighbors, 0, sizeof(ContainerClass*) * 27); - - // Take possible data - const int nbNeigh = currentIter.coord.getNeighborsIndexes(OctreeHeight, indexesNeighbors, indexArray); - - for(int idxNeigh = 0 ; idxNeigh < nbNeigh ; ++idxNeigh){ - if(indexesNeighbors[idxNeigh] < (intervals[idProcess].leftIndex) || (intervals[idProcess].rightIndex) < indexesNeighbors[idxNeigh]){ - ContainerClass*const hypotheticNeighbor = otherP2Ptree.getLeafSrc(indexesNeighbors[idxNeigh]); - if(hypotheticNeighbor){ - neighbors[ indexArray[idxNeigh] ] = hypotheticNeighbor; - ++counter; - } - } - } - myThreadkernels.P2PRemote( currentIter.cell->getCoordinate(), currentIter.targets, - currentIter.sources, neighbors, counter); - - } - - } - - for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ - delete sendBuffer[idxProc]; - delete recvBuffer[idxProc]; - } - delete[] globalReceiveMap; - delete[] leafsDataArray; - - FLOG(computation2Counter.tac()); - - - FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = " << counterTime.tacAndElapsed() << " s)\n" ); - FLOG( FLog::Controller << "\t\t Computation L2P + P2P : " << computationCounter.elapsed() << " s\n" ); - FLOG( FLog::Controller << "\t\t Computation P2P 2 : " << computation2Counter.elapsed() << " s\n" ); - FLOG( FLog::Controller << "\t\t Prepare P2P : " << prepareCounter.elapsed() << " s\n" ); - FLOG( FLog::Controller << "\t\t Gather P2P : " << gatherCounter.elapsed() << " s\n" ); - FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.elapsed() << " s\n" ); - - } - - /** P2P */ - void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); - FLOG( FTic counterTime); - FLOG( FTic prepareCounter); - FLOG( FTic gatherCounter); - FLOG( FTic waitCounter); - FLOG(FTic computation2Counter); - FLOG(FTic computationCounter); - - /////////////////////////////////////////////////// - // Prepar data to send receive - /////////////////////////////////////////////////// - FLOG(prepareCounter.tic()); - - // To send in asynchronous way - MPI_Request requests[2 * nbProcess]; - MPI_Status status[2 * nbProcess]; - int iterRequest = 0; - int nbMessagesToRecv = 0; - - FMpiBufferWriter**const sendBuffer = new FMpiBufferWriter*[nbProcess]; - memset(sendBuffer, 0, sizeof(FMpiBufferWriter*) * nbProcess); - - FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess]; - memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess); - - /* This a nbProcess x nbProcess matrix of integer - * let U and V be id of processes : - * globalReceiveMap[U*nbProcess + V] == size of information needed by V and own by U - */ - int*const globalReceiveMap = new int[nbProcess * nbProcess]; - memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess); - - FVector * leafsNeedOtherData; - LeafData* leafsDataArray; - OctreeClass* otherP2Ptree; - - FBoolArray leafsNeedOther(this->numberOfLeafs); - int countNeedOther = 0; - - /////////////////////////////////////////////////// - // Prepare data for thread P2P - /////////////////////////////////////////////////// - - // init - const int LeafIndex = OctreeHeight - 1; - const int SizeShape = 3*3*3; - - int shapeLeaf[SizeShape]; - memset(shapeLeaf,0,SizeShape*sizeof(int)); - - leafsDataArray = new LeafData[this->numberOfLeafs]; - - leafsNeedOtherData = new FVector(countNeedOther); - - - // This first part is sequential, we split the datas between - // colors to avoid writing concurrency later with omp threads - - // split data - { - FTRACE( FTrace::FRegion regionTrace( "Split" , __FUNCTION__ , __FILE__ , __LINE__) ); - - typename OctreeClass::Iterator octreeIterator(tree); - octreeIterator.gotoBottomLeft(); - - // to store which shape for each leaf - typename OctreeClass::Iterator* const myLeafs = new typename OctreeClass::Iterator[this->numberOfLeafs]; - int*const shapeType = new int[this->numberOfLeafs]; - - for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){ - myLeafs[idxLeaf] = octreeIterator; - - const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); - const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); - shapeType[idxLeaf] = shape; - - ++shapeLeaf[shape]; - - octreeIterator.moveRight(); - } - - int startPosAtShape[SizeShape]; - startPosAtShape[0] = 0; - for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){ - startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaf[idxShape-1]; - } - - int idxInArray = 0; - for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf, ++idxInArray){ - const int shapePosition = shapeType[idxInArray]; - - leafsDataArray[startPosAtShape[shapePosition]].coord = myLeafs[idxInArray].getCurrentGlobalCoordinate(); - leafsDataArray[startPosAtShape[shapePosition]].cell = myLeafs[idxInArray].getCurrentCell(); - leafsDataArray[startPosAtShape[shapePosition]].targets = myLeafs[idxInArray].getCurrentListTargets(); - leafsDataArray[startPosAtShape[shapePosition]].sources = myLeafs[idxInArray].getCurrentListSrc(); - if( leafsNeedOther.get(idxLeaf) ) leafsNeedOtherData->push(leafsDataArray[startPosAtShape[shapePosition]]); + FVector*const toSend = new FVector[nbProcess]; + int partsToSend[nbProcess]; + memset(partsToSend, 0, sizeof(int) * nbProcess); - ++startPosAtShape[shapePosition]; - } - - delete[] shapeType; - delete[] myLeafs; - } - - //At this point, we start with the parallel section - //One thread will be in charge of communication - //Two comm : AllGather then iSend and IRecv -#pragma omp parallel + #pragma omp parallel { -#pragma omp single nowait + #pragma omp master // MUST WAIT to fill leafsNeedOther { - FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) ); // Copy leafs { typename OctreeClass::Iterator octreeIterator(tree); @@ -1501,22 +1090,10 @@ private: } while(octreeIterator.moveRight()); } - // Number of cells max - //const int limite = 1 << (this->OctreeHeight - 1); - // pointer to send - FVector*const toSend = new FVector[nbProcess]; - - // array that will be send to other processus for them to build the globalReceiveMap - int partsToSend[nbProcess]; - memset(partsToSend, 0, sizeof(int) * nbProcess); - - // To know if a leaf has been already sent to a proc int alreadySent[nbProcess]; //Will store the indexes of the neighbors of current cell MortonIndex indexesNeighbors[26]; - //Obviously unused - //int uselessIndexArray[26]; for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){ memset(alreadySent, 0, sizeof(int) * nbProcess); @@ -1538,7 +1115,7 @@ private: while( procToReceive != nbProcess - 1 && (intervals[procToReceive].rightIndex) < indexesNeighbors[idxNeigh]){ ++procToReceive;//scroll process "after" current process } - // Test : Not Already Send && USELESS TEST ? + // Test : Not Already Send && be sure someone hold this interval if( !alreadySent[procToReceive] && intervals[procToReceive].leftIndex <= indexesNeighbors[idxNeigh] && indexesNeighbors[idxNeigh] <= intervals[procToReceive].rightIndex){ alreadySent[procToReceive] = 1; @@ -1561,7 +1138,12 @@ private: partsToSend[idxProc] += int(sizeof(int)); } } + } + #pragma omp barrier + + #pragma omp master // nowait + { //Share to all processus globalReceiveMap FLOG(gatherCounter.tic()); FMpi::MpiAssert( MPI_Allgather( partsToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, comm.getComm()), __LINE__ ); @@ -1588,8 +1170,7 @@ private: (*sendBuffer[idxProc]) << toSend[idxProc][idxLeaf].getCurrentGlobalIndex(); toSend[idxProc][idxLeaf].getCurrentListSrc()->save(*sendBuffer[idxProc]); } - //TEST BERENGER - //if(sendBuffer[idxProc]->getSize() != partsToSend[idxProc]){ + FMpi::MpiAssert( MPI_Isend( sendBuffer[idxProc]->data(), sendBuffer[idxProc]->getSize() , MPI_PACKED , idxProc, FMpi::TagFmmP2P, comm.getComm(), &requests[iterRequest++]) , __LINE__ ); @@ -1597,14 +1178,12 @@ private: } delete[] toSend; + + ////////////////////////////////////////////////////////// // Waitsend receive ////////////////////////////////////////////////////////// - FLOG(computation2Counter.tic()); - - // Create an octree with leaves from others - otherP2Ptree = new OctreeClass( tree->getHeight(), tree->getSubHeight(), tree->getBoxWidth(), tree->getBoxCenter() ); int complete = 0; int*const indexMessage = new int[nbProcess * 2]; while( complete != iterRequest){ @@ -1626,72 +1205,131 @@ private: for(int idxLeaf = 0 ; idxLeaf < nbLeaves ; ++idxLeaf){ MortonIndex leafIndex; (*recvBuffer[idxProc]) >> leafIndex; - otherP2Ptree->createLeaf(leafIndex)->getSrc()->restore((*recvBuffer[idxProc])); + otherP2Ptree.createLeaf(leafIndex)->getSrc()->restore((*recvBuffer[idxProc])); } delete recvBuffer[idxProc]; - recvBuffer[idxProc] = 0; + recvBuffer[idxProc] = nullptr; } } } delete[] indexMessage; + } + + /////////////////////////////////////////////////// + // Prepare data for thread P2P + /////////////////////////////////////////////////// + + #pragma omp single // MUST WAIT! + { + typename OctreeClass::Iterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + + // to store which shape for each leaf + typename OctreeClass::Iterator* const myLeafs = new typename OctreeClass::Iterator[this->numberOfLeafs]; + int*const shapeType = new int[this->numberOfLeafs]; + + for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){ + myLeafs[idxLeaf] = octreeIterator; + + const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); + const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + shapeType[idxLeaf] = shape; + + ++shapeLeaf[shape]; + + octreeIterator.moveRight(); + } + + int startPosAtShape[SizeShape]; + startPosAtShape[0] = 0; + for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){ + startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaf[idxShape-1]; + } + + int idxInArray = 0; + for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf, ++idxInArray){ + const int shapePosition = shapeType[idxInArray]; + + leafsDataArray[startPosAtShape[shapePosition]].coord = myLeafs[idxInArray].getCurrentGlobalCoordinate(); + leafsDataArray[startPosAtShape[shapePosition]].cell = myLeafs[idxInArray].getCurrentCell(); + leafsDataArray[startPosAtShape[shapePosition]].targets = myLeafs[idxInArray].getCurrentListTargets(); + leafsDataArray[startPosAtShape[shapePosition]].sources = myLeafs[idxInArray].getCurrentListSrc(); + if( leafsNeedOther.get(idxLeaf) ) leafsNeedOtherData.push(leafsDataArray[startPosAtShape[shapePosition]]); + + ++startPosAtShape[shapePosition]; + } + + delete[] shapeType; + delete[] myLeafs; - }//End single section + FLOG(prepareCounter.tac()); + } - FLOG(prepareCounter.tac()); ////////////////////////////////////////////////////////// // Computation P2P that DO NOT need others data ////////////////////////////////////////////////////////// - FTRACE( FTrace::FRegion regionP2PTrace("Compute P2P", __FUNCTION__ , __FILE__ , __LINE__) ); - FLOG(computationCounter.tic()); + { + KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); - KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]); - // There is a maximum of 26 neighbors - ContainerClass* neighbors[27]; - int previous = 0; + #pragma omp single nowait + { + FLOG(computationCounter.tic()); + int previous = 0; - for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ - const int endAtThisShape = shapeLeaf[idxShape] + previous; + for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ + const int endAtThisShape = shapeLeaf[idxShape] + previous; + const int chunckSize = FMath::Max(1, (endAtThisShape-previous)/(omp_get_num_threads()*omp_get_num_threads())); -#pragma omp for schedule(auto) - for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){ - LeafData& currentIter = leafsDataArray[idxLeafs]; - myThreadkernels.L2P(currentIter.cell, currentIter.targets); + for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; idxLeafs += chunckSize){ + const int nbLeavesInTask = FMath::Min(endAtThisShape-idxLeafs, chunckSize); + #pragma omp task + { + // There is a maximum of 26 neighbors + ContainerClass* neighbors[27]; - // need the current particles and neighbors particles - const int counter = tree->getLeafsNeighbors(neighbors, currentIter.coord, LeafIndex); - myThreadkernels.P2P( currentIter.coord,currentIter.targets, - currentIter.sources, neighbors, counter); - } + for(int idxTaskLeaf = idxLeafs ; idxTaskLeaf < (idxLeafs + nbLeavesInTask) ; ++idxTaskLeaf){ + LeafData& currentIter = leafsDataArray[idxTaskLeaf]; + myThreadkernels->L2P(currentIter.cell, currentIter.targets); - previous = endAtThisShape; - } - //} + // need the current particles and neighbors particles + const int counter = tree->getLeafsNeighbors(neighbors, currentIter.coord, LeafIndex); + myThreadkernels->P2P( currentIter.coord,currentIter.targets, + currentIter.sources, neighbors, counter); + } + } + } + previous = endAtThisShape; + #pragma omp taskwait + } + FLOG(computationCounter.tac()); + } + } - FLOG(computationCounter.tac()); - FTRACE( regionP2PTrace.end() ); + // Wait the come to finish (and the previous computation also) + #pragma omp barrier ////////////////////////////////////////////////////////// // Computation P2P that need others data ////////////////////////////////////////////////////////// - - FTRACE( FTrace::FRegion regionOtherTrace("Compute P2P Other", __FUNCTION__ , __FILE__ , __LINE__) ); + #pragma omp master + { FLOG( computation2Counter.tic() ); } { - /*KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);*/ + KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]); // There is a maximum of 26 neighbors - /*ContainerClass* neighbors[27];*/ + ContainerClass* neighbors[27]; MortonIndex indexesNeighbors[27]; int indexArray[26]; // Box limite - const int nbLeafToProceed = leafsNeedOtherData->getSize(); + const int nbLeafToProceed = leafsNeedOtherData.getSize(); -#pragma omp for schedule(auto) nowait + #pragma omp for schedule(static) for(int idxLeafs = 0 ; idxLeafs < nbLeafToProceed ; ++idxLeafs){ - LeafData currentIter = (*leafsNeedOtherData)[idxLeafs]; + LeafData currentIter = leafsNeedOtherData[idxLeafs]; // need the current particles and neighbors particles int counter = 0; @@ -1702,20 +1340,20 @@ private: for(int idxNeigh = 0 ; idxNeigh < nbNeigh ; ++idxNeigh){ if(indexesNeighbors[idxNeigh] < (intervals[idProcess].leftIndex) || (intervals[idProcess].rightIndex) < indexesNeighbors[idxNeigh]){ - ContainerClass*const hypotheticNeighbor = otherP2Ptree->getLeafSrc(indexesNeighbors[idxNeigh]); + ContainerClass*const hypotheticNeighbor = otherP2Ptree.getLeafSrc(indexesNeighbors[idxNeigh]); if(hypotheticNeighbor){ neighbors[ indexArray[idxNeigh] ] = hypotheticNeighbor; ++counter; } } } - myThreadkernels.P2PRemote( currentIter.cell->getCoordinate(), currentIter.targets, currentIter.sources, neighbors, counter); - }//End For + + } } - }//End parallel section + } for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ delete sendBuffer[idxProc]; @@ -1735,7 +1373,6 @@ private: FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.elapsed() << " s\n" ); } - }; diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp index ee0e1066f6e987af3ba8d6a600ce49edbdb0c687..0eee73e3c0096fca1d691e76303d2181148014e5 100755 --- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp @@ -19,7 +19,7 @@ #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Utils/FGlobal.hpp" #include "../Utils/FMemUtils.hpp" diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp index 317001c7c7ad039ded46545bcd02c4f277a41c7e..f1c212f4d57a90eefcd958bcdfa8cee311480a6d 100755 --- a/Src/Core/FFmmAlgorithmThreadTsm.hpp +++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp @@ -19,7 +19,7 @@ #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Utils/FGlobal.hpp" @@ -88,8 +88,6 @@ public: * Call this function to run the complete algorithm */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - // Count leaf int numberOfLeafs = 0; typename OctreeClass::Iterator octreeIterator(tree); @@ -118,7 +116,6 @@ public: /** P2M */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG( FTic counterTime ); @@ -159,7 +156,6 @@ public: /** M2M */ void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -220,8 +216,6 @@ public: /** M2L */ void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -283,8 +277,6 @@ public: /* L2L */ void downardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); - FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); FLOG(FTic computationCounter); @@ -339,7 +331,6 @@ public: /** P2P */ void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp index 86df966b45a4cbeef8467558bf8a1466541e4522..63abd81fb477751568ef26c830f5aa7c98f1e809 100755 --- a/Src/Core/FFmmAlgorithmTsm.hpp +++ b/Src/Core/FFmmAlgorithmTsm.hpp @@ -19,7 +19,7 @@ #include "../Utils/FAssert.hpp" #include "../Utils/FLog.hpp" -#include "../Utils/FTrace.hpp" + #include "../Utils/FTic.hpp" #include "../Containers/FOctree.hpp" @@ -73,7 +73,6 @@ public: * Call this function to run the complete algorithm */ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); if(operationsToProceed & FFmmP2M) bottomPass(); @@ -88,7 +87,6 @@ public: /** P2M */ void bottomPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG( counterTime.tic() ); FLOG( double totalComputation = 0 ); @@ -121,7 +119,6 @@ public: /** M2M */ void upwardPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG( counterTime.tic() ); FLOG( double totalComputation = 0 ); @@ -176,7 +173,6 @@ public: /** M2L */ void transferPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG( counterTime.tic() ); FLOG( double totalComputation = 0 ); @@ -281,7 +277,6 @@ public: /** P2P */ void directPass(){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); FLOG( counterTime.tic() ); FLOG( double totalComputation = 0 ); diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp index 2308eed3023b988273dbb382e6b8c74ddde505a0..3067d553ddbad7a61f3b3dd7c00794ce1b551025 100755 --- a/Src/Files/FMpiTreeBuilder.hpp +++ b/Src/Files/FMpiTreeBuilder.hpp @@ -21,7 +21,7 @@ #include "../Utils/FBitonicSort.hpp" #include "../Utils/FMemUtils.hpp" -#include "../Utils/FTrace.hpp" + #include "../Containers/FVector.hpp" #include "../BalanceTree/FLeafBalance.hpp" @@ -167,7 +167,6 @@ public: static void MergeSplitedLeaves(const FMpi::FComm& communicator, IndexedParticle* workingArray, FSize* workingSize, FSize ** leavesOffsetInParticles, ParticleClass** particlesArrayInLeafOrder, FSize* const leavesSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); const int myRank = communicator.processId(); const int nbProcs = communicator.processCount(); @@ -324,7 +323,6 @@ public: const FSize leavesOffsetInParticles[], const ParticleClass particlesArrayInLeafOrder[], const FSize currentNbLeaves, const FSize currentNbParts, FAbstractBalanceAlgorithm * balancer){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); const int myRank = communicator.processId(); const int nbProcs = communicator.processCount(); diff --git a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp index 68879728e31ef3208c1adf60e86aea0bb2b617ff..2d7a46816730a3ea40db5c1d679d5f56d376e9bf 100755 --- a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp +++ b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp @@ -17,7 +17,7 @@ #define FABSTRACTCHEBKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "../../Components/FAbstractKernels.hpp" diff --git a/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp b/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp index cfd9720fef95c1c155526272d51c4db577d1e6b2..f02ffacf99327e4a423a0cc2478f588c69841a2d 100755 --- a/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp @@ -19,7 +19,7 @@ #include #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "../../Components/FAbstractKernels.hpp" diff --git a/Src/Kernels/Chebyshev/FChebKernel.hpp b/Src/Kernels/Chebyshev/FChebKernel.hpp index 6dbd318493e5b45cd7528c300719f3023d47a31b..ab37a9e25c32e95de7cc271621366572aac30e7f 100755 --- a/Src/Kernels/Chebyshev/FChebKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebKernel.hpp @@ -17,7 +17,7 @@ #define FCHEBKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "./FAbstractChebKernel.hpp" diff --git a/Src/Kernels/Chebyshev/FChebSymKernel.hpp b/Src/Kernels/Chebyshev/FChebSymKernel.hpp index df3394853f2ce41bbf2a52ef17da9051771cfe73..52c7500d4c6cd9fc953d8cd6c5cd62df6216911a 100755 --- a/Src/Kernels/Chebyshev/FChebSymKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebSymKernel.hpp @@ -16,7 +16,7 @@ // =================================================================================== #include "Utils/FGlobal.hpp" -#include "Utils/FTrace.hpp" + #include "Utils/FSmartPointer.hpp" #include "FAbstractChebKernel.hpp" diff --git a/Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp index 3072778ebb97eb8f38b35955f07f6e21225c30b0..34b29b0abce5b52579deb1a75c8f2320b51f4065 100755 --- a/Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp @@ -3,7 +3,7 @@ // [--License--] #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "./FAbstractChebKernel.hpp" diff --git a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp index dcf24d479badfe3dbe434b997d8710649cfa1e66..02906a979fb34c1c714ff54b664487ad7b75f8cc 100755 --- a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp @@ -17,7 +17,7 @@ #define FCHEBTENSORIALKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "./FAbstractChebKernel.hpp" diff --git a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp index 08bd2a7769396a0662826db460f99b3baecab702..93e3559ee9374bb62b669b02fd9339a4e7748a7c 100755 --- a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp +++ b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp @@ -20,7 +20,7 @@ #include "../../Components/FAbstractKernels.hpp" #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FMemUtils.hpp" #include "../../Utils/FSmartPointer.hpp" #include "../../Utils/FPoint.hpp" diff --git a/Src/Kernels/Uniform/FAbstractUnifKernel.hpp b/Src/Kernels/Uniform/FAbstractUnifKernel.hpp index 889d2b7169427b71e4e01ed0241efefcc82656ce..3ce311df585ded164d425266868708e957efcbd5 100644 --- a/Src/Kernels/Uniform/FAbstractUnifKernel.hpp +++ b/Src/Kernels/Uniform/FAbstractUnifKernel.hpp @@ -17,7 +17,7 @@ #define FABSTRACTUNIFKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "../../Components/FAbstractKernels.hpp" diff --git a/Src/Kernels/Uniform/FUnifDenseKernel.hpp b/Src/Kernels/Uniform/FUnifDenseKernel.hpp index 95a9e32695592d7ccf6580028159933a1813eac6..95aa4d447d4d631402840bc27c1fabef467de206 100644 --- a/Src/Kernels/Uniform/FUnifDenseKernel.hpp +++ b/Src/Kernels/Uniform/FUnifDenseKernel.hpp @@ -17,7 +17,7 @@ #define FUNIFDENSEKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "./FAbstractUnifKernel.hpp" diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp index a4893b70bbf2f8b06313791877375fe8f49e1405..31b0b4914643b41dba2aae62c65d35bf256f9b61 100644 --- a/Src/Kernels/Uniform/FUnifKernel.hpp +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -17,7 +17,7 @@ #define FUNIFKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "./FAbstractUnifKernel.hpp" diff --git a/Src/Kernels/Uniform/FUnifSymKernel.hpp b/Src/Kernels/Uniform/FUnifSymKernel.hpp index 07f65fc854d0d498af52ef5181274290c5a069c7..e42e79c8c5f74b0431f6498c1c09f1cfc8bbb42e 100755 --- a/Src/Kernels/Uniform/FUnifSymKernel.hpp +++ b/Src/Kernels/Uniform/FUnifSymKernel.hpp @@ -17,7 +17,7 @@ #define FUNIFSYMKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" // Originally in M2LHandler but transferred to the kernel for the symmetric version diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp index 4fa6e3ad8741a31ccfc828397650100e3d64a8a1..84aa4e5645e25a88998d409e371e0a3106951e8d 100644 --- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp @@ -17,7 +17,7 @@ #define FUNIFTENSORIALKERNEL_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FTrace.hpp" + #include "../../Utils/FSmartPointer.hpp" #include "./FAbstractUnifKernel.hpp" diff --git a/Src/ScalFmmConfig.h.cmake b/Src/ScalFmmConfig.h.cmake index 58e581659dda3ed58bb477e2dd1443d814a0e33d..0eca4c9bbd51b7d093183cb1b334263ffef702b2 100755 --- a/Src/ScalFmmConfig.h.cmake +++ b/Src/ScalFmmConfig.h.cmake @@ -42,14 +42,6 @@ #cmakedefine ScalFMM_USE_MPI -/////////////////////////////////////////////////////// -// Trace & Itac -/////////////////////////////////////////////////////// - -#cmakedefine ScalFMM_USE_TRACE -#cmakedefine ScalFMM_USE_ITAC -#cmakedefine ScalFMM_USE_EZTRACE - /////////////////////////////////////////////////////// // Memory trace /////////////////////////////////////////////////////// diff --git a/Src/Utils/FBitonicSort.hpp b/Src/Utils/FBitonicSort.hpp index d6635ba620a833c4c2539776c2d2658a5705e633..ae33003d5224055fdb9f69e77c26dc3785848c6e 100755 --- a/Src/Utils/FBitonicSort.hpp +++ b/Src/Utils/FBitonicSort.hpp @@ -19,7 +19,7 @@ #include #include -#include "FTrace.hpp" + #include "FMpi.hpp" #include "FQuickSort.hpp" #include "FAssert.hpp" @@ -144,7 +144,6 @@ public: endfor */ static void Sort(SortType array[], const IndexType size, const FMpi::FComm& comm){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Bitonic" , __FILE__ , __LINE__) ); const int np = comm.processCount(); const int rank = comm.processId(); diff --git a/Src/Utils/FQuickSort.hpp b/Src/Utils/FQuickSort.hpp index 65bde87e6656fb047cc6203b15a4e2cfcaff9d2d..009dfd2390913308ef18baac4fbd767379b900e3 100755 --- a/Src/Utils/FQuickSort.hpp +++ b/Src/Utils/FQuickSort.hpp @@ -25,7 +25,7 @@ #include "FGlobal.hpp" #include "FMemUtils.hpp" -#include "FTrace.hpp" + /** This class is parallel quick sort * It hold a mpi version diff --git a/Src/Utils/FTrace.cpp b/Src/Utils/FTrace.cpp deleted file mode 100755 index a4dc61938217871bf067f694b643962afb975b0a..0000000000000000000000000000000000000000 --- a/Src/Utils/FTrace.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// =================================================================================== -// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner -// olivier.coulaud@inria.fr, berenger.bramas@inria.fr -// This software is a computer program whose purpose is to compute the FMM. -// -// This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". -// "http://www.gnu.org/licenses". -// =================================================================================== -#include "FTrace.hpp" -#ifdef ScalFMM_USE_TRACE - -#if !defined (ScalFMM_USE_ITAC) && !defined (ScalFMM_USE_EZTRACE) -int FTrace::Deep = 0; -FTic FTrace::TimeSinceBegining; -#endif - -#endif // ScalFMM_USE_TRACE - diff --git a/Src/Utils/FTrace.hpp b/Src/Utils/FTrace.hpp deleted file mode 100755 index 7429b21fd22ccd1b9b4ad41b6bd8503a88ea6103..0000000000000000000000000000000000000000 --- a/Src/Utils/FTrace.hpp +++ /dev/null @@ -1,230 +0,0 @@ -// =================================================================================== -// Copyright ScalFmm 2011 INRIA -// olivier.coulaud@inria.fr, berenger.bramas@inria.fr -// This software is a computer program whose purpose is to compute the FMM. -// -// This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". -// "http://www.gnu.org/licenses". -// =================================================================================== -#ifndef FTRACE_HPP -#define FTRACE_HPP - - -#include "FGlobal.hpp" - -/** -* @author Berenger Bramas (berenger.bramas@inria.fr) -* @class FTrace -* Please read the license -* -* This class is used to print trace data during processing. -* You have to use the FTRACE macro as shown in the example. -* -* -* FTRACE( FTrace::FRegion regionTrace("Sub part of function", __FUNCTION__ , __FILE__ , __LINE__) ); -* ... -* FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) ); -* -*/ - -#ifndef ScalFMM_USE_TRACE - - #define FTRACE( X ) - - class FTrace { - public: - class FRegion { - public: - FRegion(const char*, const char*, const char* = nullptr, const int = 0){} - void end(){} - }; - class FFunction { - public: - FFunction(const char*, const char*, const char* = nullptr, const int = 0){} - }; - }; - - -#else - - #define FTRACE( X ) X - - #ifdef ScalFMM_USE_ITAC - - #include - - class FTrace { - public: - class FRegion { - VT_Region region; - public: - FRegion(const char*const regionName, const char*const className) - : region( regionName, className) {} - FRegion(const char*const regionName, const char*const className, const char* const file, const int line) - : region( regionName, className, file, line ) {} - void end(){ - region.end(); - } - }; - - class FFunction { - VT_Function function; - public: - FFunction(const char*const functionName, const char*const className) - : function( functionName, className) {} - FFunction(const char*const functionName, const char*const className, const char* const file, const int line) - : function( functionName, className, file, line ) {} - }; - }; - - #else - #ifdef ScalFMM_USE_EZTRACE - - #include - - class FTrace { - static const unsigned IdModule = 0xCC00; // must be between 0x1000 and 0xff00 - static unsigned BuildMask(const char* phrase){ - unsigned mask = 0; - if(phrase){ - while( *phrase ){ - mask = (mask<<1) ^ (*phrase++) ^ ((mask>>15)&1); - } - } - return ((mask & 0xFF) | IdModule) & ~0x2; // 0x[IdModule][Mask] last bits has to 01 - } - public: - class FRegion { - const unsigned mask; - bool hadFinished; - public: - FRegion(const char*const regionName, const char*const className) - : mask(BuildMask(regionName)), hadFinished(false) { - EZTRACE_EVENT2(mask, regionName, className); - } - FRegion(const char*const regionName, const char*const className, const char* const file, const int line) - : mask(BuildMask(regionName)), hadFinished(false) { - EZTRACE_EVENT4(mask, regionName, className, file, line); - } - ~FRegion(){ - end(); - } - void end(){ - if( !hadFinished ){ - hadFinished = true; - EZTRACE_EVENT0(mask^0x3); - } - } - }; - - class FFunction { - const unsigned mask; - public: - FFunction(const char*const functionName, const char*const className) - : mask(BuildMask(functionName)) { - EZTRACE_EVENT2(mask, functionName, className); - } - FFunction(const char*const functionName, const char*const className, const char* const file, const int line) - : mask(BuildMask(functionName)) { - EZTRACE_EVENT4(mask, functionName, className, file, line); - } - ~FFunction(){ - EZTRACE_EVENT0(mask^0x3); - } - }; - }; - - #else - - #include - #include - - #include "FTic.hpp" - - class FTrace{ - static int Deep; - static FTic TimeSinceBegining; - - static void PrintTab(){ - std::cout << "{" << std::setw( 6 ) << TimeSinceBegining.tacAndElapsed() << "s} "; - for(int idxDeep = 0 ; idxDeep < Deep ; ++idxDeep){ - std::cout << '\t'; - } - } - - public: - class FRegion { - bool closed; - void close(){ - if(!closed){ - closed = true; - --FTrace::Deep; - } - } - public: - FRegion(const char*const regionName, const char*const className) - : closed(false) { - FTrace::PrintTab(); - std::cout << "@Region: " << regionName << " (" << className << ")\n"; - ++FTrace::Deep; - } - FRegion(const char*const regionName, const char*const className, const char* const file, const int line) - : closed(false) { - FTrace::PrintTab(); - std::cout << "@Region: " << regionName << " (" << className << ")" << " -- line " << line << " file " << file << "\n"; - ++FTrace::Deep; - } - ~FRegion(){ - close(); - } - void end(){ - close(); - } - }; - - class FFunction { - bool closed; - void close(){ - if(!closed){ - closed = true; - --FTrace::Deep; - } - } - public: - FFunction(const char*const functionName, const char*const className) - : closed(false){ - FTrace::PrintTab(); - std::cout << "@Function: " << functionName << " (" << className << ")\n"; - ++FTrace::Deep; - } - FFunction(const char*const functionName, const char*const className, const char* const file, const int line) - : closed(false) { - FTrace::PrintTab(); - std::cout << "@Function: " << functionName << " (" << className << ")" << " -- line " << line << " file " << file << "\n"; - ++FTrace::Deep; - } - ~FFunction(){ - close(); - } - }; - - friend class FRegion; - friend class FFunction; - }; - - #endif //ScalFMM_USE_EZTRACE - - #endif //ScalFMM_USE_ITAC - -#endif //ScalFMM_USE_TRACE - -#endif //FTRACE_HPP - - diff --git a/Tests/Kernels/testSphericalProcAlgorithm.cpp b/Tests/Kernels/testSphericalProcAlgorithm.cpp index 40d8497f1975ad75f4797c0a6649559c61093c02..abce40c516f4861e06b9dd920eff90ded3af642b 100755 --- a/Tests/Kernels/testSphericalProcAlgorithm.cpp +++ b/Tests/Kernels/testSphericalProcAlgorithm.cpp @@ -337,7 +337,6 @@ int main(int argc, char ** argv){ std::cout << "Done " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl; { // get sum forces&potential - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Sum Result" , __FILE__ , __LINE__) ); FReal potential = 0; FReal fx = 0.0, fy = 0.0, fz = 0.0; diff --git a/Tests/noDist/testSphericalBlasBlockProc.cpp b/Tests/noDist/testSphericalBlasBlockProc.cpp index c6f15f4081a01b068f27b2bd99184590ee316840..6949d219a7e7737c444c88519ad4ba1f64e3a51c 100644 --- a/Tests/noDist/testSphericalBlasBlockProc.cpp +++ b/Tests/noDist/testSphericalBlasBlockProc.cpp @@ -172,8 +172,6 @@ int main(int argc, char ** argv){ std::cout << "Done " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl; { // get sum forces&potential - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Sum Result" , __FILE__ , __LINE__) ); - FReal potential = 0; FReal fx = 0.0, fy = 0.0, fz = 0.0; diff --git a/Tests/noDist/testSphericalBlasProc.cpp b/Tests/noDist/testSphericalBlasProc.cpp index 3365cad926630c9142369eaf19464cb8a1ca3134..1c3f396408497c9c12f7a7ec8afd06d1fb4622ac 100644 --- a/Tests/noDist/testSphericalBlasProc.cpp +++ b/Tests/noDist/testSphericalBlasProc.cpp @@ -172,7 +172,6 @@ int main(int argc, char ** argv){ std::cout << "Done " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl; { // get sum forces&potential - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Sum Result" , __FILE__ , __LINE__) ); FReal potential = 0; FReal fx = 0.0, fy = 0.0, fz = 0.0;