Commit bd29df69 authored by Quentin Khan's avatar Quentin Khan

Merge branch 'development' into 'master'

parents fefb8080 9315ae7c
......@@ -45,107 +45,116 @@
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmThreadProc
* @brief
*
* Please read the license
*
* This class is a threaded FMM algorithm with mpi.
* It just iterates on a tree and call the kernels with good arguments.
* It used the inspector-executor model :
* iterates on the tree and builds an array to work in parallel on this array
* This class is a threaded FMM algorithm distributed using MPI. It iterates on
* a tree and call the kernels with good arguments. It uses the inspector -
* executor model : iterates on the tree and builds an array to work in parallel
* on this array
*
* Of course this class does not deallocate pointer given in arguements.
* This class does not free pointers given in arguements.
*
* Threaded & based on the inspector-executor model
* schedule(runtime) export OMP_NUM_THREADS=2
* export OMPI_CXX=`which g++-4.4`
* mpirun -np 2 valgrind --suppressions=/usr/share/openmpi/openmpi-valgrind.supp
* --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes
* ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
*
* schedule(runtime) export OMP_NUM_THREADS=2
* export OMPI_CXX=`which g++-4.4`
* mpirun -np 2 valgrind --suppressions=/usr/share/openmpi/openmpi-valgrind.supp
* --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20
* --track-fds=yes ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTimers {
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
const FMpi::FComm& comm; //< MPI comm
private:
OctreeClass* const tree; ///< The octree to work on
KernelClass** kernels; ///< The kernels
typename OctreeClass::Iterator* iterArray; //Will be used to store pointers to cells/leafs to work with
typename OctreeClass::Iterator* iterArrayComm; //Will be used to store pointers to cells/leafs to send/rcv
int numberOfLeafs; //< To store the size at the previous level
const FMpi::FComm& comm; ///< MPI comm
const int MaxThreads; //< the max number of thread allowed by openmp
/// Used to store pointers to cells/leafs to work with
typename OctreeClass::Iterator* iterArray;
/// Used to store pointers to cells/leafs to send/rcv
typename OctreeClass::Iterator* iterArrayComm;
const int nbProcess; //< Number of process
const int idProcess; //< Id of current process
const int OctreeHeight; //<Height of the tree
int numberOfLeafs; ///< To store the size at the previous level
const int MaxThreads; ///< Max number of thread allowed by openmp
const int nbProcess; ///< Process count
const int idProcess; ///< Current process id
const int OctreeHeight; ///< Tree height
const int leafLevelSeperationCriteria;
/** An interval is the morton index interval
* that a proc use (it holds data in this interval)
*/
* that a proc uses (i.e. it holds data in this interval) */
struct Interval{
MortonIndex leftIndex;
MortonIndex rightIndex;
};
/** My interval */
/// Current process interval
Interval*const intervals;
/** All process intervals */
/// All processes intervals
Interval*const workingIntervalsPerLevel;
/** Get an interval from proc id and level */
/// Get an interval from a process id and tree level
Interval& getWorkingInterval( int level, int proc){
return workingIntervalsPerLevel[OctreeHeight * proc + level];
}
/// Get an interval from a process id and tree level
const Interval& getWorkingInterval( int level, int proc) const {
return workingIntervalsPerLevel[OctreeHeight * proc + level];
}
/** To know if a proc has work at a given level (if it hold cells and was responsible of them) */
/// Does \a procIdx have work at given \a idxLevel
/** i.e. does it hold cells and is responsible of them ? */
bool procHasWorkAtLevel(const int idxLevel , const int idxProc) const {
return getWorkingInterval(idxLevel, idxProc).leftIndex <= getWorkingInterval(idxLevel, idxProc).rightIndex;
}
/** Return true if the idxProc left cell at idxLevel+1 has the same parent as us for our right cell */
/** True if the \a idxProc left cell at \a idxLevel+1 has the same parent as us for our right cell */
bool procCoversMyRightBorderCell(const int idxLevel , const int idxProc) const {
return (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3) == (getWorkingInterval((idxLevel+1) ,idxProc).leftIndex >>3);
}
/** Return true if the idxProc right cell at idxLevel+1 has the same parent as us for our left cell */
/** True if the idxProc right cell at idxLevel+1 has the same parent as us for our left cell */
bool procCoversMyLeftBorderCell(const int idxLevel , const int idxProc) const {
return (getWorkingInterval((idxLevel+1) , idxProc).rightIndex >>3) == (getWorkingInterval((idxLevel+1) , idProcess).leftIndex>>3);
}
public:
/** Get current proc interval at level */
/// Get current process interval at given \a level
Interval& getWorkingInterval( int level){
return getWorkingInterval(level, idProcess);
}
/** Does the current proc has some work at this level */
/// Does the current process has some work at this level ?
bool hasWorkAtLevel( int level){
return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).rightIndex) < (getWorkingInterval(level, idProcess).rightIndex);
}
/** The constructor need the octree and the kernels used for computation
/**@brief Constructor
* @param inTree the octree to work on
* @param inKernels the kernels to call
*
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), comm(inComm), iterArray(nullptr),iterArrayComm(nullptr),numberOfLeafs(0),
MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
OctreeHeight(tree->getHeight()),
leafLevelSeperationCriteria(inLeafLevelSeperationCriteria),
intervals(new Interval[inComm.processCount()]),
workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()])
{
FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) :
tree(inTree),
kernels(nullptr),
comm(inComm),
iterArray(nullptr),
iterArrayComm(nullptr),
numberOfLeafs(0),
MaxThreads(omp_get_max_threads()),
nbProcess(inComm.processCount()),
idProcess(inComm.processId()),
OctreeHeight(tree->getHeight()),
leafLevelSeperationCriteria(inLeafLevelSeperationCriteria),
intervals(new Interval[inComm.processCount()]),
workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) {
FAssertLF(tree, "tree cannot be null");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
......@@ -160,7 +169,8 @@ public:
FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
}
/** Default destructor */
/// Default destructor
virtual ~FFmmAlgorithmThreadProc(){
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
delete this->kernels[idxThread];
......@@ -250,16 +260,6 @@ protected:
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
}
// run;
// if(operationsToProceed & FFmmP2M) bottomPass();
// if(operationsToProceed & FFmmM2M) upwardPass();
// if(operationsToProceed & FFmmM2L) transferPass();
// if(operationsToProceed & FFmmL2L) downardPass();
// if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P));
Timers[P2MTimer].tic();
if(operationsToProceed & FFmmP2M) bottomPass();
Timers[P2MTimer].tac();
......
......@@ -213,6 +213,22 @@ private:
FReal * tmpVal; ///< Temporary array to read data
/// Count of other data pieces to read in a particle record after the 4 first ones.
unsigned int otherDataToRead;
void open_file(const std::string filename, const bool binary) {
if(binary) {
this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary);
}
else {
this->file = new std::fstream(filename.c_str(),std::ifstream::in) ;
}
// test if open
if(! this->file->is_open()){
std::cerr << "File "<< filename<<" not opened! Error: " << strerror(errno) <<std::endl;
std::exit( EXIT_FAILURE);
}
std::cout << "Opened file "<< filename << std::endl;
}
public:
/**
* This constructor opens a file using the given mode and reads its
......@@ -228,18 +244,7 @@ public:
file(nullptr), binaryFile(binary), centerOfBox(0.0,0.0,0.0), boxWidth(0.0),
nbParticles(0), tmpVal(nullptr), otherDataToRead(0)
{
if(binary) {
this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary);
}
else {
this->file = new std::fstream(filename.c_str(),std::ifstream::in) ;
}
// test if open
if(! this->file->is_open()){
std::cerr << "File "<< filename<<" not opened! " <<std::endl;
std::exit( EXIT_FAILURE);
}
std::cout << "Open file "<< filename << std::endl;
this->open_file(filename, binary);
this->readHeader();
}
......@@ -256,29 +261,20 @@ public:
*/
FFmaGenericLoader(const std::string & filename) : file(nullptr),binaryFile(false),
centerOfBox(0.0,0.0,0.0),boxWidth(0.0),nbParticles(0),tmpVal(nullptr),otherDataToRead(0) {
std::string ext(".bfma");
// open particle file
if(filename.find(ext) != std::string::npos) {
if( filename.find(".bfma") != std::string::npos ) {
binaryFile = true;
this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary);
}
else if(filename.find(".fma")!=std::string::npos ) {
this->file = new std::fstream(filename.c_str(),std::ifstream::in) ;
}
else {
} else if( filename.find(".fma")!=std::string::npos ) {
binaryFile = false;
} else {
std::cout << "FFmaGenericLoader: "
<< "Only .fma or .bfma input file are allowed. Got "
<< filename << "."
<< std::endl;
std::exit ( EXIT_FAILURE) ;
}
// test if open
if(! this->file->is_open()){
std::cerr << "File "<< filename<<" not opened! " <<std::endl;
std::exit( EXIT_FAILURE);
}
std::cout << "Open file "<< filename << std::endl;
this->open_file(filename, binaryFile);
this->readHeader();
}
......
......@@ -18,53 +18,60 @@
#define FALGORITHMTIMERS_HPP
/**
* @brief This class provide a way for the different algorithms to
* store the time spent in each operator.
* @brief Collection of timers for FMM operators.
*
* This class provide a way for the different algorithms to
* store the time spent in each operator.
*/
class FAlgorithmTimers{
protected:
FTic * Timers;
public:
enum FTimers{
P2MTimer=0,
M2MTimer=1,
M2LTimer=2,
L2LTimer=3,
L2PTimer=4,
P2PTimer=5,
NearTimer=6,
/// The timer names
enum FTimers {
P2MTimer,
M2MTimer,
M2LTimer,
L2LTimer,
L2PTimer,
P2PTimer,
NearTimer,
nbTimers ///< Timer count
};
const int nbTimers = 7;
protected:
/// Timer array
FTic Timers[nbTimers];
FAlgorithmTimers() : Timers(nullptr)
public:
/// Constructor: resets all timers
FAlgorithmTimers()
{
Timers = new FTic[nbTimers];
for(int i=0; i<nbTimers ; ++i){
for(int i = 0; i < nbTimers ; ++i){
Timers[i].reset();
}
}
virtual ~FAlgorithmTimers(){
delete[] Timers;
}
/// Default copy contructor
FAlgorithmTimers(const FAlgorithmTimers&) = default;
/// Default move contructor
FAlgorithmTimers(FAlgorithmTimers&&) = default;
/// Returns the timer array
const FTic * getAllTimers() const {
return Timers;
}
/// Returns the timer count
int getNbOfTimerRecorded() const {
return nbTimers;
}
/// Elapsed time between last FTic::tic() and FTic::tac() for given timer.
double getTime(FTimers OpeTimer) const{
//assert to verify size
return Timers[OpeTimer].elapsed();
}
/// Cumulated time between all FTic::tic() and FTic::tac() for given timer.
double getCumulatedTime(FTimers OpeTimer) const{
//assert to verify size
return Timers[OpeTimer].cumulated();
......
......@@ -159,7 +159,7 @@ namespace FParameters{
inline const VariableType getValue(const int argc, const char* const * const argv, const std::vector<const char*>& inNames, const VariableType& defaultValue = VariableType(), const bool caseSensible = false){
for(const char* name : inNames){
const int position = findParameter(argc, argv, name, caseSensible);
FAssertLF(position == NotFound || position != argc - 1);
FAssertLF(position == NotFound || position != argc - 1, "Could no find a value for argument: ",name, ". " );
if(position != NotFound && position != argc - 1){
return StrToOther(argv[position+1],defaultValue);
}
......@@ -172,7 +172,7 @@ namespace FParameters{
inline const char* getStr(const int argc, const char* const * const argv, const std::vector<const char*>& inNames, const char* const inDefault, const bool caseSensible = false){
for(const char* name : inNames){
const int position = findParameter(argc, argv, name, caseSensible);
FAssertLF(position == NotFound || position != argc - 1);
FAssertLF(position == NotFound || position != argc - 1, "Could no find a value for argument: ",name, ". ");
if(position != NotFound && position != argc - 1){
return argv[position+1];
}
......
......@@ -43,6 +43,7 @@
#include "PerfTest/AlgoLoaderThreadProc.hpp"
#endif
#define HOST_NAME_MAX 64
/**
* \brief Runs a generic sequence of actions to use an algorithm.
......@@ -66,22 +67,30 @@ void runperf(FPerfTestParams& params)
AlgoLoader<TreeLoader, KernelLoader> algoLoader(params, treeLoader, kernelLoader);
algoLoader.run();
auto& algo = *(algoLoader._algo);
char hostname[HOST_NAME_MAX];
memset(hostname,'\0',HOST_NAME_MAX);
if ( -1 == gethostname(hostname, HOST_NAME_MAX-1) ) {
perror("Could not get hostname");
strncpy(hostname, "unknown", HOST_NAME_MAX);
}
std::cout << "@@ "
<< "host:" << hostname << " "
<< "algo:" << params.algo << " "
<< "file:" << params.filename.substr(params.filename.find_last_of('/')+1) << " "
<< "file:" << params.filename.substr(
params.filename.find_last_of('/')+1 ) << " "
<< "particles:" << treeLoader._loader.getNumberOfParticles() << " "
<< "threads:" << params.nbThreads << " "
<< "height:" << params.treeHeight << " "
<< "subheight:" << params.subTreeHeight << " "
<< "procs:" << params.nbProcs << " "
<< "threads:" << params.nbThreads << " "
<< "height:" << params.treeHeight << " "
<< "subheight:" << params.subTreeHeight << " "
<< algoLoader.getRunInfoString()
<< "P2M:" << algo.getCumulatedTime(FAlgorithmTimers::P2MTimer) << " "
<< "M2M:" << algo.getCumulatedTime(FAlgorithmTimers::M2MTimer) << " "
<< "M2L:" << algo.getCumulatedTime(FAlgorithmTimers::M2LTimer) << " "
<< "L2L:" << algo.getCumulatedTime(FAlgorithmTimers::L2LTimer) << " "
<< "P2PL2P:" << algo.getCumulatedTime(FAlgorithmTimers::NearTimer) << " "
<< "P2M:" << algoLoader.getCumulatedTime(FAlgorithmTimers::P2MTimer) << " "
<< "M2M:" << algoLoader.getCumulatedTime(FAlgorithmTimers::M2MTimer) << " "
<< "M2L:" << algoLoader.getCumulatedTime(FAlgorithmTimers::M2LTimer) << " "
<< "L2L:" << algoLoader.getCumulatedTime(FAlgorithmTimers::L2LTimer) << " "
<< "P2PL2P:" << algoLoader.getCumulatedTime(FAlgorithmTimers::NearTimer) << " "
<< std::endl;
}
namespace ParName {
......@@ -96,7 +105,6 @@ namespace ParName {
int main (int argc, char** argv)
{
// Parameter handling //////////////
FHelpDescribeAndExit(argc, argv,
"Performance test program for FMM balancing techniques. "
......@@ -127,11 +135,20 @@ int main (int argc, char** argv)
std::string prefix("mpi-");
if( params.algo.substr(0, prefix.size()) == prefix ) {
params.mpiContext = new FMpi(argc,argv);
params.nbProcs = params.mpiContext->global().processCount();
}
#endif
}
// End of Parameter handling ///////
char hostname[HOST_NAME_MAX];
memset(hostname,'\0',HOST_NAME_MAX);
if ( -1 == gethostname(hostname, HOST_NAME_MAX-1) ) {
perror("Could not get hostname");
strncpy(hostname, "unknown", HOST_NAME_MAX);
}
std::cout << "Hostname: " << hostname << std::endl;
omp_set_num_threads(params.nbThreads);
using FReal = double;
......
......@@ -101,6 +101,11 @@ public:
std::string getRunInfoString() const {
return _infostring.str();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
......
......@@ -48,6 +48,10 @@ public:
new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->execute();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
......
......@@ -48,6 +48,10 @@ public:
new FMMClass(&(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->execute();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
......
......@@ -71,6 +71,10 @@ public:
return sstr.str();
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
......
......@@ -58,6 +58,10 @@ public:
return "";
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return _algo->getCumulatedTime(timerName);
}
};
......
......@@ -47,6 +47,10 @@ public:
/// The #FMMClass algorithm instance
std::unique_ptr<FMMClass> _algo;
/// Array of MPI gathered cumulated times
double timers[FAlgorithmTimers::nbTimers] {0};
AlgoLoaderThreadProc(FPerfTestParams& params,
TreeLoader& treeLoader,
KernelLoader& kernelLoader) :
......@@ -56,11 +60,26 @@ public:
_algo(nullptr) {
}
void run() {
_algo = std::unique_ptr<FMMClass>(
new FMMClass(_mpiContext->global(), &(_treeLoader._tree), &(_kernelLoader._kernel)));
_algo->execute();
for( int idxTimer = 0; idxTimer < FAlgorithmTimers::nbTimers; ++idxTimer ) {
timers[idxTimer] = _algo->getCumulatedTime(FAlgorithmTimers::FTimers(idxTimer));
}
if( _mpiContext->global().processId() == 0) {
MPI_Reduce(MPI_IN_PLACE, timers, FAlgorithmTimers::nbTimers, MPI_DOUBLE, MPI_MAX, 0, _mpiContext->global().getComm());
} else {
MPI_Reduce(timers, NULL, FAlgorithmTimers::nbTimers, MPI_DOUBLE, MPI_MAX, 0, _mpiContext->global().getComm());
}
}
double getCumulatedTime(FAlgorithmTimers::FTimers timerName) const {
return timers[timerName];
}
};
......
......@@ -26,6 +26,7 @@ struct FPerfTestParams {
std::string filename = ""; ///< Particles file.
std::string algo = "task"; ///< Algorithm to run.
int omp_chunk_size = 0; ///< OpenMP chunk size for basic algorithm (FFmmAlgorithmThread)
int nbProcs = 1;
#ifdef SCALFMM_USE_MPI
FMpi* mpiContext = nullptr;
#endif
......
......@@ -17,7 +17,8 @@ int main(int argc, char** argv)
FMpiFmaDivider<FReal>
divider(args.inFileName(),
args.outFileName() + "_" + std::to_string(args.zoneCount())
args.outFileName() + "_" + args.dispatchPolicyString()
+ "_" + std::to_string(args.zoneCount())
+ "z_h" + std::to_string(args.treeHeight()),
args.outFileExt(),
args.zoneCount(),
......
......@@ -16,7 +16,6 @@
class testFMpiFmaDividerArgs {
const int _treeHeightInit = 5;
const int _subTreeHeightInit = 1;
const int _zoneCountInit = omp_get_max_threads();
const int _verboseInit = 0;
const char* _inFileNameInit = "";
......@@ -53,14 +52,6 @@ public:
return getValue(_argc, _argv, OctreeHeight.options, _treeHeightInit);
}
int subTreeHeight() const {
using namespace FParameterDefinitions;
using namespace FParameters;
return getValue(_argc, _argv, OctreeSubHeight.options,
_subTreeHeightInit);
}
int zoneCount() const {
using namespace FParameterDefinitions;
using namespace FParameters;
......@@ -112,11 +103,17 @@ public:
return ext;
}
int dispatchPolicy() {
std::string dispatchPolicyString() const {
using namespace FParameterDefinitions;
using namespace FParameters;
std::string str = getStr(_argc, _argv, DispatchPolicy.options, _dispatchPolicyInit);
return str;
}
int dispatchPolicy() const {
std::string str = dispatchPolicyString();
if ( "particles" == str ) {
return 0;
} else {
......@@ -137,12 +134,12 @@ public:
"Loads an FMA file into a tree and runs a pseudo FMM algorithm "
"through it to compute load balancing.",
OctreeHeight,
OctreeSubHeight,
InputFile,
OutputFileBasename,
OutputFileExtension,
ZoneCount,
EnabledVerbose
DispatchPolicy
);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment