diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index 2115736ace96d97dff2070ba412594951c826fc9..649f1a21be80d7a424e6ad58e57f3fca1b6ad765 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -45,107 +45,116 @@ /** * @author Berenger Bramas (berenger.bramas@inria.fr) - * @class FFmmAlgorithmThreadProc - * @brief + * * Please read the license * - * This class is a threaded FMM algorithm with mpi. - * It just iterates on a tree and call the kernels with good arguments. - * It used the inspector-executor model : - * iterates on the tree and builds an array to work in parallel on this array + * This class is a threaded FMM algorithm distributed using MPI. It iterates on + * a tree and call the kernels with good arguments. It uses the inspector - + * executor model : iterates on the tree and builds an array to work in parallel + * on this array * - * Of course this class does not deallocate pointer given in arguements. + * This class does not free pointers given in arguements. * * Threaded & based on the inspector-executor model - * schedule(runtime) export OMP_NUM_THREADS=2 - * export OMPI_CXX=`which g++-4.4` - * mpirun -np 2 valgrind --suppressions=/usr/share/openmpi/openmpi-valgrind.supp - * --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes - * ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp + * + * schedule(runtime) export OMP_NUM_THREADS=2 + * export OMPI_CXX=`which g++-4.4` + * mpirun -np 2 valgrind --suppressions=/usr/share/openmpi/openmpi-valgrind.supp + * --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 + * --track-fds=yes ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp */ template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTimers { - OctreeClass* const tree; //< The octree to work on - KernelClass** kernels; //< The kernels - - const FMpi::FComm& comm; //< MPI comm +private: + OctreeClass* const tree; ///< The octree to work on + KernelClass** kernels; ///< The kernels - typename OctreeClass::Iterator* iterArray; //Will be used to store pointers to cells/leafs to work with - typename OctreeClass::Iterator* iterArrayComm; //Will be used to store pointers to cells/leafs to send/rcv - int numberOfLeafs; //< To store the size at the previous level + const FMpi::FComm& comm; ///< MPI comm - const int MaxThreads; //< the max number of thread allowed by openmp + /// Used to store pointers to cells/leafs to work with + typename OctreeClass::Iterator* iterArray; + /// Used to store pointers to cells/leafs to send/rcv + typename OctreeClass::Iterator* iterArrayComm; - const int nbProcess; //< Number of process - const int idProcess; //< Id of current process - - const int OctreeHeight; //<Height of the tree + int numberOfLeafs; ///< To store the size at the previous level + const int MaxThreads; ///< Max number of thread allowed by openmp + const int nbProcess; ///< Process count + const int idProcess; ///< Current process id + const int OctreeHeight; ///< Tree height const int leafLevelSeperationCriteria; /** An interval is the morton index interval - * that a proc use (it holds data in this interval) - */ + * that a proc uses (i.e. it holds data in this interval) */ struct Interval{ MortonIndex leftIndex; MortonIndex rightIndex; }; - /** My interval */ + + /// Current process interval Interval*const intervals; - /** All process intervals */ + /// All processes intervals Interval*const workingIntervalsPerLevel; - /** Get an interval from proc id and level */ + /// Get an interval from a process id and tree level Interval& getWorkingInterval( int level, int proc){ return workingIntervalsPerLevel[OctreeHeight * proc + level]; } + /// Get an interval from a process id and tree level const Interval& getWorkingInterval( int level, int proc) const { return workingIntervalsPerLevel[OctreeHeight * proc + level]; } - /** To know if a proc has work at a given level (if it hold cells and was responsible of them) */ + /// Does \a procIdx have work at given \a idxLevel + /** i.e. does it hold cells and is responsible of them ? */ bool procHasWorkAtLevel(const int idxLevel , const int idxProc) const { return getWorkingInterval(idxLevel, idxProc).leftIndex <= getWorkingInterval(idxLevel, idxProc).rightIndex; } - /** Return true if the idxProc left cell at idxLevel+1 has the same parent as us for our right cell */ + /** True if the \a idxProc left cell at \a idxLevel+1 has the same parent as us for our right cell */ bool procCoversMyRightBorderCell(const int idxLevel , const int idxProc) const { return (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3) == (getWorkingInterval((idxLevel+1) ,idxProc).leftIndex >>3); } - /** Return true if the idxProc right cell at idxLevel+1 has the same parent as us for our left cell */ + /** True if the idxProc right cell at idxLevel+1 has the same parent as us for our left cell */ bool procCoversMyLeftBorderCell(const int idxLevel , const int idxProc) const { return (getWorkingInterval((idxLevel+1) , idxProc).rightIndex >>3) == (getWorkingInterval((idxLevel+1) , idProcess).leftIndex>>3); } public: - /** Get current proc interval at level */ + /// Get current process interval at given \a level Interval& getWorkingInterval( int level){ return getWorkingInterval(level, idProcess); } - /** Does the current proc has some work at this level */ + /// Does the current process has some work at this level ? bool hasWorkAtLevel( int level){ return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).rightIndex) < (getWorkingInterval(level, idProcess).rightIndex); } - /** The constructor need the octree and the kernels used for computation + /**@brief Constructor * @param inTree the octree to work on * @param inKernels the kernels to call + * * An assert is launched if one of the arguments is null */ - FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) - : tree(inTree) , kernels(nullptr), comm(inComm), iterArray(nullptr),iterArrayComm(nullptr),numberOfLeafs(0), - MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()), - OctreeHeight(tree->getHeight()), - leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), - intervals(new Interval[inComm.processCount()]), - workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) - { + FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : + tree(inTree), + kernels(nullptr), + comm(inComm), + iterArray(nullptr), + iterArrayComm(nullptr), + numberOfLeafs(0), + MaxThreads(omp_get_max_threads()), + nbProcess(inComm.processCount()), + idProcess(inComm.processId()), + OctreeHeight(tree->getHeight()), + leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), + intervals(new Interval[inComm.processCount()]), + workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { FAssertLF(tree, "tree cannot be null"); - this->kernels = new KernelClass*[MaxThreads]; #pragma omp parallel for schedule(static) for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ @@ -160,7 +169,8 @@ public: FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n"); FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n"); } - /** Default destructor */ + + /// Default destructor virtual ~FFmmAlgorithmThreadProc(){ for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ delete this->kernels[idxThread]; @@ -250,16 +260,6 @@ protected: workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ ); } - // run; - // if(operationsToProceed & FFmmP2M) bottomPass(); - - // if(operationsToProceed & FFmmM2M) upwardPass(); - - // if(operationsToProceed & FFmmM2L) transferPass(); - - // if(operationsToProceed & FFmmL2L) downardPass(); - - // if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); Timers[P2MTimer].tic(); if(operationsToProceed & FFmmP2M) bottomPass(); Timers[P2MTimer].tac(); diff --git a/Src/Files/FFmaGenericLoader.hpp b/Src/Files/FFmaGenericLoader.hpp index 32da78f8849644c66549afd00ba81bfacb729c96..f08e3fc729a43623155c0a4d8eb5c9943799d36c 100644 --- a/Src/Files/FFmaGenericLoader.hpp +++ b/Src/Files/FFmaGenericLoader.hpp @@ -213,6 +213,22 @@ private: FReal * tmpVal; ///< Temporary array to read data /// Count of other data pieces to read in a particle record after the 4 first ones. unsigned int otherDataToRead; + + void open_file(const std::string filename, const bool binary) { + if(binary) { + this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary); + } + else { + this->file = new std::fstream(filename.c_str(),std::ifstream::in) ; + } + // test if open + if(! this->file->is_open()){ + std::cerr << "File "<< filename<<" not opened! Error: " << strerror(errno) <<std::endl; + std::exit( EXIT_FAILURE); + } + std::cout << "Opened file "<< filename << std::endl; + } + public: /** * This constructor opens a file using the given mode and reads its @@ -228,18 +244,7 @@ public: file(nullptr), binaryFile(binary), centerOfBox(0.0,0.0,0.0), boxWidth(0.0), nbParticles(0), tmpVal(nullptr), otherDataToRead(0) { - if(binary) { - this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary); - } - else { - this->file = new std::fstream(filename.c_str(),std::ifstream::in) ; - } - // test if open - if(! this->file->is_open()){ - std::cerr << "File "<< filename<<" not opened! " <<std::endl; - std::exit( EXIT_FAILURE); - } - std::cout << "Open file "<< filename << std::endl; + this->open_file(filename, binary); this->readHeader(); } @@ -256,29 +261,20 @@ public: */ FFmaGenericLoader(const std::string & filename) : file(nullptr),binaryFile(false), centerOfBox(0.0,0.0,0.0),boxWidth(0.0),nbParticles(0),tmpVal(nullptr),otherDataToRead(0) { - - std::string ext(".bfma"); // open particle file - if(filename.find(ext) != std::string::npos) { + if( filename.find(".bfma") != std::string::npos ) { binaryFile = true; - this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary); - } - else if(filename.find(".fma")!=std::string::npos ) { - this->file = new std::fstream(filename.c_str(),std::ifstream::in) ; - } - else { + } else if( filename.find(".fma")!=std::string::npos ) { + binaryFile = false; + } else { std::cout << "FFmaGenericLoader: " << "Only .fma or .bfma input file are allowed. Got " << filename << "." << std::endl; std::exit ( EXIT_FAILURE) ; } - // test if open - if(! this->file->is_open()){ - std::cerr << "File "<< filename<<" not opened! " <<std::endl; - std::exit( EXIT_FAILURE); - } - std::cout << "Open file "<< filename << std::endl; + + this->open_file(filename, binaryFile); this->readHeader(); } diff --git a/Src/Utils/FAlgorithmTimers.hpp b/Src/Utils/FAlgorithmTimers.hpp index d73c1e890b6e0db1ab21fb2b0bd99e7843499737..ee41b38f438d0c850e55198a5d8c0074698aae47 100644 --- a/Src/Utils/FAlgorithmTimers.hpp +++ b/Src/Utils/FAlgorithmTimers.hpp @@ -18,53 +18,60 @@ #define FALGORITHMTIMERS_HPP /** - * @brief This class provide a way for the different algorithms to - * store the time spent in each operator. + * @brief Collection of timers for FMM operators. * + * This class provide a way for the different algorithms to + * store the time spent in each operator. */ class FAlgorithmTimers{ -protected: - FTic * Timers; - public: - - enum FTimers{ - P2MTimer=0, - M2MTimer=1, - M2LTimer=2, - L2LTimer=3, - L2PTimer=4, - P2PTimer=5, - NearTimer=6, + /// The timer names + enum FTimers { + P2MTimer, + M2MTimer, + M2LTimer, + L2LTimer, + L2PTimer, + P2PTimer, + NearTimer, + nbTimers ///< Timer count }; - const int nbTimers = 7; +protected: + /// Timer array + FTic Timers[nbTimers]; - FAlgorithmTimers() : Timers(nullptr) +public: + /// Constructor: resets all timers + FAlgorithmTimers() { - Timers = new FTic[nbTimers]; - for(int i=0; i<nbTimers ; ++i){ + for(int i = 0; i < nbTimers ; ++i){ Timers[i].reset(); } } - virtual ~FAlgorithmTimers(){ - delete[] Timers; - } + /// Default copy contructor + FAlgorithmTimers(const FAlgorithmTimers&) = default; + /// Default move contructor + FAlgorithmTimers(FAlgorithmTimers&&) = default; + /// Returns the timer array const FTic * getAllTimers() const { return Timers; } + /// Returns the timer count int getNbOfTimerRecorded() const { return nbTimers; } + /// Elapsed time between last FTic::tic() and FTic::tac() for given timer. double getTime(FTimers OpeTimer) const{ //assert to verify size return Timers[OpeTimer].elapsed(); } + /// Cumulated time between all FTic::tic() and FTic::tac() for given timer. double getCumulatedTime(FTimers OpeTimer) const{ //assert to verify size return Timers[OpeTimer].cumulated(); diff --git a/Src/Utils/FParameters.hpp b/Src/Utils/FParameters.hpp index a62b9212efceb6933573e5c71663810c5951c7d9..3a8031fcb3426a8837d2f8ab45f5a18caba6bc14 100644 --- a/Src/Utils/FParameters.hpp +++ b/Src/Utils/FParameters.hpp @@ -159,7 +159,7 @@ namespace FParameters{ inline const VariableType getValue(const int argc, const char* const * const argv, const std::vector<const char*>& inNames, const VariableType& defaultValue = VariableType(), const bool caseSensible = false){ for(const char* name : inNames){ const int position = findParameter(argc, argv, name, caseSensible); - FAssertLF(position == NotFound || position != argc - 1); + FAssertLF(position == NotFound || position != argc - 1, "Could no find a value for argument: ",name, ". " ); if(position != NotFound && position != argc - 1){ return StrToOther(argv[position+1],defaultValue); } @@ -172,7 +172,7 @@ namespace FParameters{ inline const char* getStr(const int argc, const char* const * const argv, const std::vector<const char*>& inNames, const char* const inDefault, const bool caseSensible = false){ for(const char* name : inNames){ const int position = findParameter(argc, argv, name, caseSensible); - FAssertLF(position == NotFound || position != argc - 1); + FAssertLF(position == NotFound || position != argc - 1, "Could no find a value for argument: ",name, ". "); if(position != NotFound && position != argc - 1){ return argv[position+1]; } diff --git a/Tests/noDist/PerfTest.cpp b/Tests/noDist/PerfTest.cpp index a8c48f3327f52243dd6a691b7134711c026b254c..e036f686329e6e9f2ecd7beb0776a81106c83f4d 100644 --- a/Tests/noDist/PerfTest.cpp +++ b/Tests/noDist/PerfTest.cpp @@ -43,6 +43,7 @@ #include "PerfTest/AlgoLoaderThreadProc.hpp" #endif +#define HOST_NAME_MAX 64 /** * \brief Runs a generic sequence of actions to use an algorithm. @@ -66,22 +67,30 @@ void runperf(FPerfTestParams& params) AlgoLoader<TreeLoader, KernelLoader> algoLoader(params, treeLoader, kernelLoader); algoLoader.run(); - auto& algo = *(algoLoader._algo); + char hostname[HOST_NAME_MAX]; + memset(hostname,'\0',HOST_NAME_MAX); + if ( -1 == gethostname(hostname, HOST_NAME_MAX-1) ) { + perror("Could not get hostname"); + strncpy(hostname, "unknown", HOST_NAME_MAX); + } + std::cout << "@@ " + << "host:" << hostname << " " << "algo:" << params.algo << " " - << "file:" << params.filename.substr(params.filename.find_last_of('/')+1) << " " + << "file:" << params.filename.substr( + params.filename.find_last_of('/')+1 ) << " " << "particles:" << treeLoader._loader.getNumberOfParticles() << " " - << "threads:" << params.nbThreads << " " - << "height:" << params.treeHeight << " " - << "subheight:" << params.subTreeHeight << " " + << "procs:" << params.nbProcs << " " + << "threads:" << params.nbThreads << " " + << "height:" << params.treeHeight << " " + << "subheight:" << params.subTreeHeight << " " << algoLoader.getRunInfoString() - << "P2M:" << algo.getCumulatedTime(FAlgorithmTimers::P2MTimer) << " " - << "M2M:" << algo.getCumulatedTime(FAlgorithmTimers::M2MTimer) << " " - << "M2L:" << algo.getCumulatedTime(FAlgorithmTimers::M2LTimer) << " " - << "L2L:" << algo.getCumulatedTime(FAlgorithmTimers::L2LTimer) << " " - << "P2PL2P:" << algo.getCumulatedTime(FAlgorithmTimers::NearTimer) << " " + << "P2M:" << algoLoader.getCumulatedTime(FAlgorithmTimers::P2MTimer) << " " + << "M2M:" << algoLoader.getCumulatedTime(FAlgorithmTimers::M2MTimer) << " " + << "M2L:" << algoLoader.getCumulatedTime(FAlgorithmTimers::M2LTimer) << " " + << "L2L:" << algoLoader.getCumulatedTime(FAlgorithmTimers::L2LTimer) << " " + << "P2PL2P:" << algoLoader.getCumulatedTime(FAlgorithmTimers::NearTimer) << " " << std::endl; - } namespace ParName { @@ -96,7 +105,6 @@ namespace ParName { int main (int argc, char** argv) { - // Parameter handling ////////////// FHelpDescribeAndExit(argc, argv, "Performance test program for FMM balancing techniques. " @@ -127,11 +135,20 @@ int main (int argc, char** argv) std::string prefix("mpi-"); if( params.algo.substr(0, prefix.size()) == prefix ) { params.mpiContext = new FMpi(argc,argv); + params.nbProcs = params.mpiContext->global().processCount(); } #endif } // End of Parameter handling /////// + char hostname[HOST_NAME_MAX]; + memset(hostname,'\0',HOST_NAME_MAX); + if ( -1 == gethostname(hostname, HOST_NAME_MAX-1) ) { + perror("Could not get hostname"); + strncpy(hostname, "unknown", HOST_NAME_MAX); + } + std::cout << "Hostname: " << hostname << std::endl; + omp_set_num_threads(params.nbThreads); using FReal = double; diff --git a/Tests/noDist/PerfTest/AlgoLoaderCostZones.hpp b/Tests/noDist/PerfTest/AlgoLoaderCostZones.hpp index dae69fad379424adc24b69e66c16556643012c3b..017c553815d7001b208fbcf2c2b8177a01a5b258 100644 --- a/Tests/noDist/PerfTest/AlgoLoaderCostZones.hpp +++ b/Tests/noDist/PerfTest/AlgoLoaderCostZones.hpp @@ -101,6 +101,11 @@ public: std::string getRunInfoString() const { return _infostring.str(); } + + double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const { + return _algo->getCumulatedTime(timerName); + } + }; diff --git a/Tests/noDist/PerfTest/AlgoLoaderSectionTask.hpp b/Tests/noDist/PerfTest/AlgoLoaderSectionTask.hpp index 7cbe7a2d4a8f006579283f0fd258b5060fb4ced3..245e3c477f3a8f6f848cf08980bf7aaaed6093ef 100644 --- a/Tests/noDist/PerfTest/AlgoLoaderSectionTask.hpp +++ b/Tests/noDist/PerfTest/AlgoLoaderSectionTask.hpp @@ -48,6 +48,10 @@ public: new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel))); _algo->execute(); } + + double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const { + return _algo->getCumulatedTime(timerName); + } }; diff --git a/Tests/noDist/PerfTest/AlgoLoaderTask.hpp b/Tests/noDist/PerfTest/AlgoLoaderTask.hpp index 98209b31cb83da4e4228806950f580cc6a79dc7c..2422f7e7c6efb7c7a27fa6b916424276c785a992 100644 --- a/Tests/noDist/PerfTest/AlgoLoaderTask.hpp +++ b/Tests/noDist/PerfTest/AlgoLoaderTask.hpp @@ -48,6 +48,10 @@ public: new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel))); _algo->execute(); } + + double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const { + return _algo->getCumulatedTime(timerName); + } }; diff --git a/Tests/noDist/PerfTest/AlgoLoaderThread.hpp b/Tests/noDist/PerfTest/AlgoLoaderThread.hpp index 6efed6ff0fd7f4f417df3f22c0d6bd5e91349358..322ec6fd205e875a07d67ae26a6bde4dc1160ec3 100644 --- a/Tests/noDist/PerfTest/AlgoLoaderThread.hpp +++ b/Tests/noDist/PerfTest/AlgoLoaderThread.hpp @@ -71,6 +71,10 @@ public: return sstr.str(); } + double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const { + return _algo->getCumulatedTime(timerName); + } + }; diff --git a/Tests/noDist/PerfTest/AlgoLoaderThreadBalance.hpp b/Tests/noDist/PerfTest/AlgoLoaderThreadBalance.hpp index ebc147332cda437f23f54e8fee5bae98fd9ec556..05cb80de0bccb8d9ae9e26634e5f2a6beecc2eaf 100644 --- a/Tests/noDist/PerfTest/AlgoLoaderThreadBalance.hpp +++ b/Tests/noDist/PerfTest/AlgoLoaderThreadBalance.hpp @@ -58,6 +58,10 @@ public: return ""; } + double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const { + return _algo->getCumulatedTime(timerName); + } + }; diff --git a/Tests/noDist/PerfTest/AlgoLoaderThreadProc.hpp b/Tests/noDist/PerfTest/AlgoLoaderThreadProc.hpp index 1930836dbd05202eba90fc700d0d385209023cbd..9798e4733d0e758782cccb57098fc844837ac314 100644 --- a/Tests/noDist/PerfTest/AlgoLoaderThreadProc.hpp +++ b/Tests/noDist/PerfTest/AlgoLoaderThreadProc.hpp @@ -47,6 +47,10 @@ public: /// The #FMMClass algorithm instance std::unique_ptr<FMMClass> _algo; + /// Array of MPI gathered cumulated times + double timers[FAlgorithmTimers::nbTimers] {0}; + + AlgoLoaderThreadProc(FPerfTestParams& params, TreeLoader& treeLoader, KernelLoader& kernelLoader) : @@ -56,11 +60,26 @@ public: _algo(nullptr) { } + void run() { _algo = std::unique_ptr<FMMClass>( new FMMClass(_mpiContext->global(), &(_treeLoader._tree), &(_kernelLoader._kernel))); _algo->execute(); + + for( int idxTimer = 0; idxTimer < FAlgorithmTimers::nbTimers; ++idxTimer ) { + timers[idxTimer] = _algo->getCumulatedTime(FAlgorithmTimers::FTimers(idxTimer)); + } + + if( _mpiContext->global().processId() == 0) { + MPI_Reduce(MPI_IN_PLACE, timers, FAlgorithmTimers::nbTimers, MPI_DOUBLE, MPI_MAX, 0, _mpiContext->global().getComm()); + } else { + MPI_Reduce(timers, NULL, FAlgorithmTimers::nbTimers, MPI_DOUBLE, MPI_MAX, 0, _mpiContext->global().getComm()); + } + } + + double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const { + return timers[timerName]; } }; diff --git a/Tests/noDist/PerfTest/PerfTestUtils.hpp b/Tests/noDist/PerfTest/PerfTestUtils.hpp index 70a56cdacfd775612cde4e649a1a219d3752a71f..cc4020467774f17d916461405304e8550da88db6 100644 --- a/Tests/noDist/PerfTest/PerfTestUtils.hpp +++ b/Tests/noDist/PerfTest/PerfTestUtils.hpp @@ -26,6 +26,7 @@ struct FPerfTestParams { std::string filename = ""; ///< Particles file. std::string algo = "task"; ///< Algorithm to run. int omp_chunk_size = 0; ///< OpenMP chunk size for basic algorithm (FFmmAlgorithmThread) + int nbProcs = 1; #ifdef SCALFMM_USE_MPI FMpi* mpiContext = nullptr; #endif diff --git a/Tests/noDist/testFMpiFmaDivider.cpp b/Tests/noDist/testFMpiFmaDivider.cpp index 25a1971d09a283f1489f7a4c94fe3a7d4fa6a396..200f1a122648b9b11d1b1a8ca1563710e1939aa2 100644 --- a/Tests/noDist/testFMpiFmaDivider.cpp +++ b/Tests/noDist/testFMpiFmaDivider.cpp @@ -17,7 +17,8 @@ int main(int argc, char** argv) FMpiFmaDivider<FReal> divider(args.inFileName(), - args.outFileName() + "_" + std::to_string(args.zoneCount()) + args.outFileName() + "_" + args.dispatchPolicyString() + + "_" + std::to_string(args.zoneCount()) + "z_h" + std::to_string(args.treeHeight()), args.outFileExt(), args.zoneCount(), diff --git a/Tests/noDist/testFMpiFmaDividerArgs.hpp b/Tests/noDist/testFMpiFmaDividerArgs.hpp index 95e6d196df2d21f3eb0a7f5654239eac946dfe39..d61ecf9a7559c275d2971c0953072d5f6eaf0e81 100644 --- a/Tests/noDist/testFMpiFmaDividerArgs.hpp +++ b/Tests/noDist/testFMpiFmaDividerArgs.hpp @@ -16,7 +16,6 @@ class testFMpiFmaDividerArgs { const int _treeHeightInit = 5; - const int _subTreeHeightInit = 1; const int _zoneCountInit = omp_get_max_threads(); const int _verboseInit = 0; const char* _inFileNameInit = ""; @@ -53,14 +52,6 @@ public: return getValue(_argc, _argv, OctreeHeight.options, _treeHeightInit); } - int subTreeHeight() const { - using namespace FParameterDefinitions; - using namespace FParameters; - - return getValue(_argc, _argv, OctreeSubHeight.options, - _subTreeHeightInit); - } - int zoneCount() const { using namespace FParameterDefinitions; using namespace FParameters; @@ -112,11 +103,17 @@ public: return ext; } - int dispatchPolicy() { + std::string dispatchPolicyString() const { using namespace FParameterDefinitions; using namespace FParameters; std::string str = getStr(_argc, _argv, DispatchPolicy.options, _dispatchPolicyInit); + return str; + } + + int dispatchPolicy() const { + std::string str = dispatchPolicyString(); + if ( "particles" == str ) { return 0; } else { @@ -137,12 +134,12 @@ public: "Loads an FMA file into a tree and runs a pseudo FMM algorithm " "through it to compute load balancing.", OctreeHeight, - OctreeSubHeight, InputFile, OutputFileBasename, OutputFileExtension, ZoneCount, - EnabledVerbose + DispatchPolicy + ); return 0; }