Commit 90d89c0d authored by Quentin Khan's avatar Quentin Khan

Remove PerfTest from master until merge with adaptive branch

parent 825b04b0
// ==== CMAKE ====
// Keep in private GIT
// @SCALFMM_PRIVATE
/**
* \file
* \author Quentin Khan
*
* This program is used to run different performance tests for the various
* algorithms that have been implemented for ScalFMM.
*
* See the PerfUtils.hpp file classes for some more in depth information. Run
* with argument --help for usage information.
*/
#include <iostream>
#include <string>
#include "Utils/FParameters.hpp"
#include "Utils/FParameterNames.hpp"
#include "PerfTest/PerfTestUtils.hpp"
#include "PerfTest/TreeLoaderBasic.hpp"
#include "PerfTest/TreeLoaderFCheb.hpp"
#ifdef SCALFMM_USE_MPI
#include "PerfTest/TreeLoaderMpiSplitFCheb.hpp"
#include "PerfTest/TreeLoaderMpiGenericFCheb.hpp"
#endif
#include "PerfTest/KernelLoaderFChebSym.hpp"
#include "PerfTest/AlgoLoaderThread.hpp"
#include "PerfTest/AlgoLoaderTask.hpp"
#include "PerfTest/AlgoLoaderSectionTask.hpp"
#include "PerfTest/AlgoLoaderCostZones.hpp"
#include "PerfTest/AlgoLoaderThreadBalance.hpp"
#ifdef SCALFMM_USE_MPI
#include "PerfTest/AlgoLoaderThreadProc.hpp"
#endif
#define HOST_NAME_MAX 64
/**
* \brief Runs a generic sequence of actions to use an algorithm.
*
* This function runs the basic steps that are needed to run an FMM algorithm
* over a set of particles. It does the following steps :
*
* - Load a tree using the class defined as a TreeLoader
* - Prepare the needed kernels using the KernelLoader
* - Prepare and run the algorithm using the AlgorithmLoader
*
* See documentation of FTreeLoader, FKernelLoader, FAlgoLoader.
*/
template <class TreeLoader,
template <typename TL_1> class KernelLoader,
template <typename TL_2, template <typename TL_3> class KL> class AlgoLoader>
void runperf(FPerfTestParams& params)
{
TreeLoader treeLoader(params);
KernelLoader<TreeLoader> kernelLoader(params, treeLoader);
AlgoLoader<TreeLoader, KernelLoader> algoLoader(params, treeLoader, kernelLoader);
algoLoader.run();
char hostname[HOST_NAME_MAX];
memset(hostname,'\0',HOST_NAME_MAX);
if ( -1 == gethostname(hostname, HOST_NAME_MAX-1) ) {
perror("Could not get hostname");
strncpy(hostname, "unknown", HOST_NAME_MAX);
}
std::cout << "@@ "
<< "host:" << hostname << " "
<< "algo:" << params.algo << " "
<< "file:" << params.filename.substr(
params.filename.find_last_of('/')+1 ) << " "
<< "particles:" << treeLoader._loader.getNumberOfParticles() << " "
<< "procs:" << params.nbProcs << " "
<< "threads:" << params.nbThreads << " "
<< "height:" << params.treeHeight << " "
<< "subheight:" << params.subTreeHeight << " "
<< algoLoader.getRunInfoString()
<< "P2M:" << algoLoader.getCumulatedTime(FAlgorithmTimers::P2MTimer) << " "
<< "M2M:" << algoLoader.getCumulatedTime(FAlgorithmTimers::M2MTimer) << " "
<< "M2L:" << algoLoader.getCumulatedTime(FAlgorithmTimers::M2LTimer) << " "
<< "L2L:" << algoLoader.getCumulatedTime(FAlgorithmTimers::L2LTimer) << " "
<< "P2PL2P:" << algoLoader.getCumulatedTime(FAlgorithmTimers::NearTimer) << " "
<< std::endl;
}
namespace ParName {
const FParameterNames Algo = {{"--algo"},"Algorithm to run (basic, task, costzones, sectiontask, autobalance"
#ifdef SCALFMM_USE_MPI
", mpi-split, mpi-generic"
#endif
")."};
const FParameterNames Schedule = {{"--schedule"},"OpenMP scheduling policy (static, dynamic)."};
const FParameterNames ChunkSize = {{"--chunk-size"},"OpenMP chunk size for basic dynamic algorithm."};
}
int main (int argc, char** argv)
{
// Parameter handling //////////////
FHelpDescribeAndExit(argc, argv,
"Performance test program for FMM balancing techniques. "
#ifdef SCALFMM_USE_MPI
"This program has been compiled with MPI superpowers !"
#endif
,
FParameterDefinitions::InputFile,
FParameterDefinitions::OctreeHeight,
FParameterDefinitions::OctreeSubHeight,
FParameterDefinitions::NbThreads,
ParName::Algo,
ParName::Schedule,
ParName::ChunkSize);
FPerfTestParams params;
{
using namespace FParameterDefinitions;
using namespace FParameters;
params.filename = getStr(argc,argv,InputFile.options,
"../Data/unitCubeXYZQ100.bfma");
params.treeHeight = getValue(argc, argv, OctreeHeight.options, 5);
params.subTreeHeight = getValue(argc, argv, OctreeSubHeight.options, 2);
params.nbThreads = getValue(argc, argv, NbThreads.options, 1);
params.algo = getStr(argc,argv,ParName::Algo.options,"task");
params.omp_chunk_size = getValue(argc, argv, ParName::ChunkSize.options, 0);
#ifdef SCALFMM_USE_MPI
std::string prefix("mpi-");
if( params.algo.substr(0, prefix.size()) == prefix ) {
params.mpiContext = new FMpi(argc,argv);
params.nbProcs = params.mpiContext->global().processCount();
}
#endif
}
// End of Parameter handling ///////
char hostname[HOST_NAME_MAX];
memset(hostname,'\0',HOST_NAME_MAX);
if ( -1 == gethostname(hostname, HOST_NAME_MAX-1) ) {
perror("Could not get hostname");
strncpy(hostname, "unknown", HOST_NAME_MAX);
}
std::cout << "Hostname: " << hostname << std::endl;
omp_set_num_threads(params.nbThreads);
using FReal = double;
constexpr const int ORDER = 7;
if( "basic" == params.algo ) {
runperf<TreeLoaderFCheb<FReal,ORDER>,
KernelLoaderFChebSym,
AlgoLoaderThread>
(params);
} else if( "task" == params.algo ) {
runperf<TreeLoaderFCheb<FReal,ORDER>,
KernelLoaderFChebSym,
AlgoLoaderTask>
(params);
} else if ( "costzones" == params.algo ) {
runperf<TreeLoaderFCheb<FReal,ORDER>,
KernelLoaderFChebSym,
AlgoLoaderCostZones>
(params);
} else if ( "sectiontask" == params.algo ) {
runperf<TreeLoaderFCheb<FReal,ORDER>,
KernelLoaderFChebSym,
AlgoLoaderSectionTask>
(params);
} else if ( "autobalance" == params.algo ) {
runperf<TreeLoaderFCheb<FReal,ORDER>,
KernelLoaderFChebSym,
AlgoLoaderThreadBalance>
(params);
#ifdef SCALFMM_USE_MPI
} else if ( "mpi-split" == params.algo ) {
runperf<TreeLoaderMpiSplitFCheb<FReal,ORDER>,
KernelLoaderFChebSym,
AlgoLoaderThreadProc>
(params);
} else if ( "mpi-generic" == params.algo ) {
runperf<TreeLoaderMpiGenericFCheb<FReal,ORDER>,
KernelLoaderFChebSym,
AlgoLoaderThreadProc>
(params);
#endif
} else {
std::cout << "Unknown algorithm: " << params.algo << std::endl;
}
#ifdef SCALFMM_USE_MPI
if( nullptr != params.mpiContext ) {
delete params.mpiContext;
}
#endif
}
// ==== CMAKE ====
// Keep in private GIT
// @SCALFMM_PRIVATE
#ifndef _ALGOLOADERCOSTZONES_HPP_
#define _ALGOLOADERCOSTZONES_HPP_
#include <memory>
#include <sstream>
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmThread.hpp"
#include "BalanceTree/FFmmAlgorithmThreadBalanced.hpp"
#include "BalanceTree/FCostCell.hpp"
#include "BalanceTree/FCostZones.hpp"
/**
* \brief Algorithm loader for FFmmAlgorithmThreadBalanced.
*
* See FAlgoLoader documentation.
*
* \warning : This loader requires that the KernelLoader supply a type definition
* for a `CostKernelClass`
*/
template <class _TreeLoader, template<typename> class _KernelLoader>
class AlgoLoaderCostZones : public FAlgoLoader<_TreeLoader, _KernelLoader> {
public:
// Types definitions
/// The TreeLoader type that is used.
using TreeLoader = _TreeLoader;
using KernelLoader = _KernelLoader<TreeLoader>;
using FReal = typename TreeLoader::FReal;
using CellClass = typename TreeLoader::CellClass;
using ContainerClass = typename TreeLoader::ContainerClass;
using LeafClass = typename TreeLoader::LeafClass;
using OctreeClass = typename TreeLoader::OctreeClass;
using KernelClass = typename KernelLoader::KernelClass;
using CostKernelClass= typename KernelLoader::CostKernelClass;
static_assert(std::is_base_of<FCostCellTypeTrait, CellClass>::value,
"The tree cells must derive from FCostCell.");
using FMMClass = FFmmAlgorithmThreadBalanced
<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
using CostFmmClass = FFmmAlgorithmThread
<OctreeClass, CellClass, ContainerClass, CostKernelClass, LeafClass>;
std::stringstream _infostring;
TreeLoader& _treeLoader;
KernelLoader& _kernelLoader;
std::unique_ptr<FMMClass> _algo;
/// Builds the loader
AlgoLoaderCostZones(FPerfTestParams& /*params*/,
TreeLoader& treeLoader,
KernelLoader& kernelLoader) :
_treeLoader(treeLoader),
_kernelLoader(kernelLoader),
_algo(nullptr) {
}
/// Computes the tree cells costs then runs the costzones and FMM algorithms.
void run() {
// The tree loader holds the tree structure
OctreeClass* p_tree = &(_treeLoader._tree);
// Compute tree cells costs
CostFmmClass costAlgo(p_tree, &(_kernelLoader._costKernel));
this->time.tic();
costAlgo.execute();
this->time.tac();
std::cout << "Generating tree cost: " << this->time.elapsed() << "s.\n";
_infostring << "costgen:" << this->time.elapsed() << " ";
// Compute cost zones
FCostZones<OctreeClass, CellClass> costzones(p_tree, omp_get_max_threads());
this->time.tic();
costzones.run();
this->time.tac();
std::cout << "Generating cost zones: " << this->time.elapsed() << "s.\n";
_infostring << "zonegen:" << this->time.elapsed() << " ";
// Execute FFM algorithm
this->time.tic();
_algo = std::unique_ptr<FMMClass>(
new FMMClass(p_tree, &(_kernelLoader._kernel),
costzones.getZoneBounds(), costzones.getLeafZoneBounds()));
_algo->execute();
this->time.tac();
}
std::string getRunInfoString() const {
return _infostring.str();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
#endif
// ==== CMAKE ====
// Keep in private GIT
// @SCALFMM_PRIVATE
#ifndef _ALGOLOADERSECTIONTASK_HPP_
#define _ALGOLOADERSECTIONTASK_HPP_
#include <memory>
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmSectionTask.hpp"
template <class _TreeLoader, template<typename> class _KernelLoader>
class AlgoLoaderSectionTask : public FAlgoLoader<_TreeLoader, _KernelLoader> {
public:
using TreeLoader = _TreeLoader;
using KernelLoader = _KernelLoader<TreeLoader>;
using FReal = typename TreeLoader::FReal;
using CellClass = typename TreeLoader::CellClass;
using ContainerClass = typename TreeLoader::ContainerClass;
using LeafClass = typename TreeLoader::LeafClass;
using OctreeClass = typename TreeLoader::OctreeClass;
using KernelClass = typename KernelLoader::KernelClass;
using FMMClass = FFmmAlgorithmSectionTask<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
TreeLoader& _treeLoader;
KernelLoader& _kernelLoader;
std::unique_ptr<FMMClass> _algo;
AlgoLoaderSectionTask(FPerfTestParams& /*params*/,
TreeLoader& treeLoader,
KernelLoader& kernelLoader) :
_treeLoader(treeLoader),
_kernelLoader(kernelLoader),
_algo(nullptr) {
}
void run() {
_algo = std::unique_ptr<FMMClass>(
new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->execute();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
#endif
// ==== CMAKE ====
// Keep in private GIT
// @SCALFMM_PRIVATE
#ifndef _ALGOLOADERTASK_HPP_
#define _ALGOLOADERTASK_HPP_
#include <memory>
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmTask.hpp"
template <class _TreeLoader, template<typename> class _KernelLoader>
class AlgoLoaderTask : public FAlgoLoader<_TreeLoader, _KernelLoader> {
public:
using TreeLoader = _TreeLoader;
using KernelLoader = _KernelLoader<TreeLoader>;
using FReal = typename TreeLoader::FReal;
using CellClass = typename TreeLoader::CellClass;
using ContainerClass = typename TreeLoader::ContainerClass;
using LeafClass = typename TreeLoader::LeafClass;
using OctreeClass = typename TreeLoader::OctreeClass;
using KernelClass = typename KernelLoader::KernelClass;
using FMMClass = FFmmAlgorithmTask<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
TreeLoader& _treeLoader;
KernelLoader& _kernelLoader;
std::unique_ptr<FMMClass> _algo;
AlgoLoaderTask(FPerfTestParams& /*params*/,
TreeLoader& treeLoader,
KernelLoader& kernelLoader) :
_treeLoader(treeLoader),
_kernelLoader(kernelLoader),
_algo(nullptr) {
}
void run() {
_algo = std::unique_ptr<FMMClass>(
new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->execute();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
#endif
// ==== CMAKE ====
// Keep in private GIT
// @SCALFMM_PRIVATE
#ifndef _ALGOLOADERTHREAD_HPP_
#define _ALGOLOADERTHREAD_HPP_
#include <memory>
#include <sstream>
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmThread.hpp"
/**
* \brief Algorithm loader for FFmmAlgorithmThread
*
* See FAlgoLoader.
*/
template <class _TreeLoader, template<typename> class _KernelLoader>
class AlgoLoaderThread : public FAlgoLoader<_TreeLoader, _KernelLoader> {
public:
// Type definitions, allows them to be reused by other classes
using TreeLoader = _TreeLoader;
using KernelLoader = _KernelLoader<TreeLoader>;
using FReal = typename TreeLoader::FReal;
using CellClass = typename TreeLoader::CellClass;
using ContainerClass = typename TreeLoader::ContainerClass;
using LeafClass = typename TreeLoader::LeafClass;
using OctreeClass = typename TreeLoader::OctreeClass;
using KernelClass = typename KernelLoader::KernelClass;
/// FMM algorithm class
using FMMClass = FFmmAlgorithmThread<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
/// The tree loader (FTreeLoader) that was used
TreeLoader& _treeLoader;
/// The kernel loader (FKernelLoader) that was used
KernelLoader& _kernelLoader;
unsigned int _omp_chunk_size; ///< Chunk size for OpenMP
/// The #FMMClass algorithm instance
std::unique_ptr<FMMClass> _algo;
AlgoLoaderThread(FPerfTestParams& params,
TreeLoader& treeLoader,
KernelLoader& kernelLoader) :
_treeLoader(treeLoader),
_kernelLoader(kernelLoader),
_omp_chunk_size(params.omp_chunk_size),
_algo(nullptr) {
}
void run() {
_algo = std::unique_ptr<FMMClass>(
new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->setChunkSize(_omp_chunk_size);
_algo->execute();
}
virtual std::string getRunInfoString() const {
std::stringstream sstr;
sstr << "chunksize:" << _omp_chunk_size << " ";
return sstr.str();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
#endif
// ==== CMAKE ====
// Keep in private GIT
// @SCALFMM_PRIVATE
#ifndef _ALGOLOADERTHREADBALANCE_HPP_
#define _ALGOLOADERTHREADBALANCE_HPP_
#include <memory>
#include <sstream>
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmThreadBalance.hpp"
/**
* \brief An algorithm loader for FFmmAlgorithmBalance
*
* See FAlgoLoader documentation.
*/
template <class _TreeLoader, template<typename> class _KernelLoader>
class AlgoLoaderThreadBalance : public FAlgoLoader<_TreeLoader, _KernelLoader> {
public:
using TreeLoader = _TreeLoader;
using KernelLoader = _KernelLoader<TreeLoader>;
using FReal = typename TreeLoader::FReal;
using CellClass = typename TreeLoader::CellClass;
using ContainerClass = typename TreeLoader::ContainerClass;
using LeafClass = typename TreeLoader::LeafClass;
using OctreeClass = typename TreeLoader::OctreeClass;
using KernelClass = typename KernelLoader::KernelClass;
using FMMClass = FFmmAlgorithmThreadBalance<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
TreeLoader& _treeLoader;
KernelLoader& _kernelLoader;
std::unique_ptr<FMMClass> _algo;
AlgoLoaderThreadBalance(FPerfTestParams& params,
TreeLoader& treeLoader,
KernelLoader& kernelLoader) :
_treeLoader(treeLoader),
_kernelLoader(kernelLoader),
_algo(nullptr) {
}
void run() {
_algo = std::unique_ptr<FMMClass>(
new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->execute();
}
virtual std::string getRunInfoString() const {
return "";
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
#endif
// ==== CMAKE ====
// Keep in private GIT
// @SCALFMM_PRIVATE
#ifndef _ALGOLOADERTHREADPROC_HPP_
#define _ALGOLOADERTHREADPROC_HPP_
#include <memory>
#include <sstream>
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmThreadProc.hpp"
#include "Utils/FMpi.hpp"
/**
* \brief Algorithm loader for FFmmAlgorithmThread
*
* See FAlgoLoader.
*/
template <class _TreeLoader, template<typename> class _KernelLoader>
class AlgoLoaderThreadProc : public FAlgoLoader<_TreeLoader, _KernelLoader> {
public:
// Type definitions, allows them to be reused by other classes
using TreeLoader = _TreeLoader;
using KernelLoader = _KernelLoader<TreeLoader>;
using FReal = typename TreeLoader::FReal;
using CellClass = typename TreeLoader::CellClass;
using ContainerClass = typename TreeLoader::ContainerClass;
using LeafClass = typename TreeLoader::LeafClass;
using OctreeClass = typename TreeLoader::OctreeClass;
using KernelClass = typename KernelLoader::KernelClass;
/// FMM algorithm class
using FMMClass = FFmmAlgorithmThreadProc<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
FMpi* _mpiContext;
/// The tree loader (FTreeLoader) that was used
TreeLoader& _treeLoader;
/// The kernel loader (FKernelLoader) that was used
KernelLoader& _kernelLoader;
/// The #FMMClass algorithm instance
std::unique_ptr<FMMClass> _algo;
/// Array of MPI gathered cumulated times
double timers[FAlgorithmTimers::nbTimers] {0};
AlgoLoaderThreadProc(FPerfTestParams& params,
TreeLoader& treeLoader,
KernelLoader& kernelLoader) :
_mpiContext(params.mpiContext),
_treeLoader(treeLoader),
_kernelLoader(kernelLoader),
_algo(nullptr) {
}
void run() {
_algo = std::unique_ptr<FMMClass>(
new FMMClass(_mpiContext->global(), &(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->execute();
for( int idxTimer = 0; idxTimer < FAlgorithmTimers::nbTimers; ++idxTimer ) {
timers[idxTimer] = _algo->getCumulatedTime(FAlgorithmTimers::FTimers(idxTimer));
}
if( _mpiContext->global().processId() == 0) {
MPI_Reduce(MPI_IN_PLACE, timers, FAlgorithmTimers::nbTimers, MPI_DOUBLE, MPI_MAX, 0, _mpiContext->global().getComm());
} else {
MPI_Reduce(timers, NULL, FAlgorithmTimers::nbTimers, MPI_DOUBLE, MPI_MAX, 0, _mpiContext->global().getComm());
}
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {