Commit efa505d5 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille
parents 5108b944 64cdf22a
This diff is collapsed.
......@@ -43,9 +43,9 @@ if(NOT DEFINED PROCESSOR_COUNT)
if(APPLE)
find_program(cmd_sys_pro "system_profiler")
if(cmd_sys_pro)
execute_process(COMMAND ${cmd_sys_pro} OUTPUT_VARIABLE info)
string(REGEX REPLACE "^.*Total Number Of Cores: ([0-9]+).*$" "\\1"
NUMBER_OF_CPU "${info}")
execute_process(COMMAND ${cmd_sys_pro} SPHardwareDataType OUTPUT_VARIABLE info)
string(REGEX REPLACE "^.*Total Number of Cores: ([0-9]+).*$" "\\1"
NUMBER_OF_CPU "${info}")
endif()
endif()
......
......@@ -264,10 +264,21 @@ if(PTSCOTCH_LIBRARIES)
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
endif()
if(UNIX OR WIN32)
set(Z_LIBRARY "Z_LIBRARY-NOTFOUND")
find_library(Z_LIBRARY NAMES z)
if(Z_LIBRARY)
list(APPEND REQUIRED_LIBS "-lz")
endif()
set(M_LIBRARY "M_LIBRARY-NOTFOUND")
find_library(M_LIBRARY NAMES m)
if(M_LIBRARY)
list(APPEND REQUIRED_LIBS "-lm")
endif()
list(APPEND REQUIRED_LIBS "-lz -lrt")
set(RT_LIBRARY "RT_LIBRARY-NOTFOUND")
find_library(RT_LIBRARY NAMES rt)
if(RT_LIBRARY)
list(APPEND REQUIRED_LIBS "-lrt")
endif()
# set required libraries for link
set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
......
......@@ -233,10 +233,21 @@ if(SCOTCH_LIBRARIES)
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
endif()
if(UNIX OR WIN32)
set(Z_LIBRARY "Z_LIBRARY-NOTFOUND")
find_library(Z_LIBRARY NAMES z)
if(Z_LIBRARY)
list(APPEND REQUIRED_LIBS "-lz")
endif()
set(M_LIBRARY "M_LIBRARY-NOTFOUND")
find_library(M_LIBRARY NAMES m)
if(M_LIBRARY)
list(APPEND REQUIRED_LIBS "-lm")
endif()
list(APPEND REQUIRED_LIBS "-lz -lrt")
set(RT_LIBRARY "RT_LIBRARY-NOTFOUND")
find_library(RT_LIBRARY NAMES rt)
if(RT_LIBRARY)
list(APPEND REQUIRED_LIBS "-lrt")
endif()
# set required libraries for link
set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
......
......@@ -62,6 +62,7 @@ public:
FAssertLF(tree, "tree cannot be null");
FAssertLF(kernels, "kernels cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
......
......@@ -67,6 +67,7 @@ public:
FAssertLF(tree, "tree cannot be null");
FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1");
FAssertLF(leafLevelSeperationCriteria < 3, "Separation criteria should be < 3");
FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight());
......
......@@ -27,6 +27,7 @@
#include "../Containers/FVector.hpp"
#include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
......@@ -45,7 +46,7 @@
*
* Upon destruction, this class does not deallocate pointers given to its constructor.
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmSectionTask : public FAbstractAlgorithm, public FAlgorithmTimers {
OctreeClass* const tree; ///< The octree to work on
......@@ -74,13 +75,14 @@ public:
FAssertLF(tree, "tree cannot be null");
FAssertLF(inKernels, "kernels cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
#pragma omp parallel num_threads(MaxThreads)
{
#pragma omp critical (InitFFmmAlgorithmSectionTask)
{
this->kernels[idxThread] = new KernelClass(*inKernels);
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
......@@ -327,7 +329,7 @@ protected:
// There is a maximum of 26 neighbors
ContainerClass* neighbors[27];
const int SizeShape = 3*3*3;
const int SizeShape = P2PExclusionClass::SizeShape;
FVector<typename OctreeClass::Iterator> shapes[SizeShape];
typename OctreeClass::Iterator octreeIterator(tree);
......@@ -337,7 +339,7 @@ protected:
// Coloring all the cells
do{
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
shapes[shapePosition].push(octreeIterator);
......
......@@ -27,6 +27,7 @@
#include "../Containers/FVector.hpp"
#include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
......@@ -39,7 +40,7 @@
*
* Of course this class does not deallocate pointer given in arguements.
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers {
OctreeClass* const tree; //< The octree to work on
......@@ -49,7 +50,7 @@ class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers {
const int OctreeHeight;
const int leafLevelSeperationCriteria;
const int leafLevelSeparationCriteria;
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
......@@ -58,20 +59,21 @@ public:
*/
FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria)
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria)
{
FAssertLF(tree, "tree cannot be null");
FAssertLF(inKernels, "kernels cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
#pragma omp critical (InitFFmmAlgorithmTask)
{
this->kernels[idxThread] = new KernelClass(*inKernels);
}
}
#pragma omp parallel num_threads(MaxThreads)
{
#pragma omp critical (InitFFmmAlgorithmTask)
{
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
......@@ -239,7 +241,7 @@ protected:
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
FLOG(FTic counterTimeLevel);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
// for each cell we apply the M2L with all cells in the implicit interaction list
do{
#pragma omp task firstprivate(octreeIterator) private(neighbors) shared(idxLevel)
......@@ -286,7 +288,7 @@ protected:
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
FLOG(FTic counterTimeLevel);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
// for each cells
do{
//#pragma omp task default(none) firstprivate(octreeIterator,separationCriteria) private( neighbors) shared(idxLevel)
......@@ -388,7 +390,7 @@ protected:
// There is a maximum of 26 neighbors
ContainerClass* neighbors[27];
const int SizeShape = 3*3*3;
const int SizeShape = P2PExclusionClass::SizeShape;
FVector<typename OctreeClass::Iterator> shapes[SizeShape];
typename OctreeClass::Iterator octreeIterator(tree);
......@@ -397,7 +399,7 @@ protected:
// for each leafs
do{
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
shapes[shapePosition].push(octreeIterator);
......
......@@ -27,6 +27,7 @@
#include "../Containers/FOctree.hpp"
#include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
#include <omp.h>
......@@ -45,7 +46,7 @@
*
* This class does not deallocate pointers given to its constructor.
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
OctreeClass* const tree; ///< The octree to work on.
KernelClass** kernels; ///< The kernels.
......@@ -53,7 +54,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
typename OctreeClass::Iterator* iterArray;
int leafsNumber;
static const int SizeShape = 3*3*3;
static const int SizeShape = P2PExclusionClass::SizeShape;
int shapeLeaf[SizeShape];
const int MaxThreads; ///< The maximum number of threads.
......@@ -62,7 +63,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
int userChunkSize;
const int leafLevelSeperationCriteria;
const int leafLevelSeparationCriteria;
public:
/** Class constructor
......@@ -79,15 +80,17 @@ public:
const int inUserChunkSize = 10, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
userChunkSize(inUserChunkSize), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
userChunkSize(inUserChunkSize), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
FAssertLF(tree, "tree cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
FAssertLF(0 < userChunkSize, "Chunk size should be > 0");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
#pragma omp critical (InitFFmmAlgorithmThread)
#pragma omp parallel num_threads(MaxThreads)
{
#pragma omp critical (InitFFmmAlgorithmThread)
{
this->kernels[idxThread] = new KernelClass(*inKernels);
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
......@@ -138,7 +141,7 @@ protected:
do{
++leafsNumber;
const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)];
++this->shapeLeaf[P2PExclusionClass::GetShapeIdx(coord)];
} while(octreeIterator.moveRight());
iterArray = new typename OctreeClass::Iterator[leafsNumber];
......@@ -296,7 +299,7 @@ protected:
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
FLOG(FTic counterTimeLevel);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
int numberOfCells = 0;
// for each cells
do{
......@@ -439,7 +442,7 @@ protected:
//iterArray[leafs] = octreeIterator;
//++leafs;
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
omp_set_lock(&lockShape[shapePosition]);
const int positionToWork = startPosAtShape[shapePosition]++;
......
......@@ -11,6 +11,7 @@
#include "../Containers/FOctree.hpp"
#include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
#include <omp.h>
#include <vector>
......@@ -29,18 +30,18 @@
*
* This class does not deallocate pointers given to its constructor.
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmThreadBalance : public FAbstractAlgorithm, public FAlgorithmTimers{
OctreeClass* const tree; ///< The octree to work on.
KernelClass** kernels; ///< The kernels.
static const int SizeShape = 3*3*3;
static const int SizeShape = P2PExclusionClass::SizeShape;
const int MaxThreads; ///< The maximum number of threads.
const int OctreeHeight; ///< The height of the given tree.
const int leafLevelSeperationCriteria;
const int leafLevelSeparationCriteria;
public:
/** Class constructor
......@@ -57,15 +58,16 @@ public:
const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
FAssertLF(tree, "tree cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
#pragma omp critical (InitFFmmAlgorithmThreadBalance)
#pragma omp parallel num_threads(MaxThreads)
{
#pragma omp critical (InitFFmmAlgorithmThreadBalance)
{
this->kernels[idxThread] = new KernelClass(*inKernels);
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
......@@ -205,7 +207,7 @@ protected:
do{
++leafsNumber;
const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
++shapeLeaves[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)];
++shapeLeaves[P2PExclusionClass::GetShapeIdx(coord)];
} while(octreeIterator.moveRight());
}
......@@ -346,6 +348,7 @@ protected:
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber);
// Prepare the P2P
const int LeafIndex = OctreeHeight - 1;
leafsDataArray.reset(new LeafData[leafsNumber]);
......@@ -365,7 +368,7 @@ protected:
// for each leafs
for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
const int positionToWork = startPosAtShape[shapePosition]++;
......@@ -542,7 +545,7 @@ protected:
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
FLOG(FTic counterTimeLevel);
FLOG(computationCounter.tic());
#pragma omp parallel
......
......@@ -40,6 +40,7 @@
#include <sys/time.h>
#include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
#include <memory>
......@@ -63,7 +64,7 @@
* --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20
* --track-fds=yes ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTimers {
private:
OctreeClass* const tree; ///< The octree to work on
......@@ -82,7 +83,7 @@ private:
const int idProcess; ///< Current process id
const int OctreeHeight; ///< Tree height
const int leafLevelSeperationCriteria;
const int leafLevelSeparationCriteria;
/** An interval is the morton index interval
* that a proc uses (i.e. it holds data in this interval) */
......@@ -150,17 +151,18 @@ public:
nbProcess(inComm.processCount()),
idProcess(inComm.processId()),
OctreeHeight(tree->getHeight()),
leafLevelSeperationCriteria(inLeafLevelSeperationCriteria),
leafLevelSeparationCriteria(inLeafLevelSeperationCriteria),
intervals(new Interval[inComm.processCount()]),
workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) {
FAssertLF(tree, "tree cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
#pragma omp parallel num_threads(MaxThreads)
{
#pragma omp critical (InitFFmmAlgorithmThreadProc)
{
this->kernels[idxThread] = new KernelClass(*inKernels);
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
......@@ -188,7 +190,10 @@ protected:
*/
void executeCore(const unsigned operationsToProceed) override {
// Count leaf
this->numberOfLeafs = 0;
#ifdef SCALFMM_TRACE_ALGO
eztrace_start();
#endif
this->numberOfLeafs = 0;
{
Interval myFullInterval;
{//Building the interval with the first and last leaves (and count the number of leaves)
......@@ -260,31 +265,61 @@ protected:
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
}
#ifdef SCALFMM_TRACE_ALGO
Timers[P2MTimer].tic();
eztrace_enter_event("P2M", EZTRACE_YELLOW);
#endif
if(operationsToProceed & FFmmP2M) bottomPass();
Timers[P2MTimer].tac();
#ifdef SSCALFMM_TRACE_ALGO
eztrace_leave_event();
eztrace_enter_event("M2M", EZTRACE_PINK);
#endif
Timers[M2MTimer].tic();
if(operationsToProceed & FFmmM2M) upwardPass();
Timers[M2MTimer].tac();
if(operationsToProceed & FFmmM2M) upwardPass();
Timers[M2MTimer].tac();
Timers[M2LTimer].tic();
#ifdef SCALFMM_TRACE_ALGO
eztrace_leave_event();
eztrace_enter_event("M2L", EZTRACE_GREEN);
#endif
Timers[M2LTimer].tic();
if(operationsToProceed & FFmmM2L) transferPass();
Timers[M2LTimer].tac();
Timers[L2LTimer].tic();
#ifdef SCALFMM_TRACE_ALGO
eztrace_leave_event();
eztrace_enter_event("L2L", EZTRACE_PINK);
#endif
Timers[L2LTimer].tic();
if(operationsToProceed & FFmmL2L) downardPass();
Timers[L2LTimer].tac();
Timers[NearTimer].tic();
#ifdef SCALFMM_TRACE_ALGO
eztrace_leave_event();
eztrace_enter_event("L2P+P2P", EZTRACE_BLUE);
#endif
Timers[NearTimer].tic();
if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P));
Timers[NearTimer].tac();
#ifdef SCALFMM_TRACE_ALGO
eztrace_leave_event();
eztrace_stop();
#endif
// delete array
delete [] iterArray;
delete [] iterArrayComm;
iterArray = nullptr;
delete [] iterArrayComm;
iterArray = nullptr;
iterArrayComm = nullptr;
#ifdef SCALFMM_TRACE_ALGO
eztrace_stop();
#endif
}
/////////////////////////////////////////////////////////////////////////////
......@@ -626,7 +661,7 @@ protected:
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
if(!procHasWorkAtLevel(idxLevel, idProcess)){
avoidGotoLeftIterator.moveDown();
......@@ -784,7 +819,7 @@ protected:
// Now we can compute all the data
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
if(!procHasWorkAtLevel(idxLevel, idProcess)){
avoidGotoLeftIterator.moveDown();
......@@ -851,7 +886,7 @@ protected:
// compute the second time
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
if(!procHasWorkAtLevel(idxLevel, idProcess)){
avoidGotoLeftIterator.moveDown();
......@@ -1199,7 +1234,7 @@ protected:
// init
const int LeafIndex = OctreeHeight - 1;
const int SizeShape = 3*3*3;
const int SizeShape = P2PExclusionClass::SizeShape;
int shapeLeaf[SizeShape];
memset(shapeLeaf,0,SizeShape*sizeof(int));
......@@ -1360,7 +1395,7 @@ protected:
myLeafs[idxLeaf] = octreeIterator;
const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int shape = P2PExclusionClass::GetShapeIdx(coord);
shapeType[idxLeaf] = shape;
++shapeLeaf[shape];
......
......@@ -38,6 +38,7 @@
#include <omp.h>
#include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
#include <memory>
......@@ -61,7 +62,7 @@
* --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes
* ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
*/
template<class FReal, class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
template<class FReal, class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm {
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
......@@ -83,7 +84,7 @@ class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm {
const int OctreeHeight;
const int leafLevelSeperationCriteria;
const int leafLevelSeparationCriteria;
public:
struct Interval{
......@@ -117,11 +118,11 @@ public:
void setKernel(KernelClass*const inKernels){
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
#pragma omp parallel num_threads(MaxThreads)
{
#pragma omp critical (InitFFmmAlgorithmThreadProcPeriodic)
{
this->kernels[idxThread] = new KernelClass(*inKernels);
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
}
......@@ -146,12 +147,13 @@ public:
numberOfLeafs(0),
MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
OctreeHeight(tree->getHeight()),
leafLevelSeperationCriteria(inLeafLevelSeperationCriteria),
leafLevelSeparationCriteria(inLeafLevelSeperationCriteria),
intervals(new Interval[inComm.processCount()]),
workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) {
FAssertLF(tree, "tree cannot be null");
FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight());
......@@ -787,7 +789,7 @@ protected:
// Find the M2L neigbors of a cell
const int counter = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(),
idxLevel,
neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeperationCriteria);
neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeparationCriteria);
memset(alreadySent, false, sizeof(bool) * nbProcess);
bool needOther = false;
......@@ -913,7 +915,7 @@ protected:
for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){
const int fackLevel = idxLevel + offsetRealTree;
const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeparationCriteria);
if(!procHasWorkAtLevel(idxLevel, idProcess)){
avoidGotoLeftIterator.moveDown();
......@@ -981,7 +983,7 @@ protected:
for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){
const int fackLevel = idxLevel + offsetRealTree;
const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeparationCriteria);
if(!procHasWorkAtLevel(idxLevel, idProcess)){
avoidGotoLeftIterator.moveDown();
......@@ -1347,7 +1349,7 @@ protected:
// init
const int LeafIndex = OctreeHeight - 1;
const int SizeShape = 3*3*3;
const int SizeShape = P2PExclusionClass::SizeShape;
int shapeLeaf[SizeShape];
memset(shapeLeaf,0,SizeShape*sizeof(int));
......@@ -1510,7 +1512,7 @@ protected:
myLeafs[idxLeaf] = octreeIterator;
const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int shape = P2PExclusionClass::GetShapeIdx(coord);
shapeType[idxLeaf] = shape;
++shapeLeaf[shape];
......
......@@ -55,7 +55,7 @@ class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm, public FAlgorithmTimer
const int OctreeHeight;
const int leafLevelSeperationCriteria;
const int leafLevelSeparationCriteria;
public:
/** The constructor need the octree and the kernels used for computation
......@@ -65,16 +65,17 @@ public:
*/
FFmmAlgorithmThreadTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), iterArray(nullptr),
MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
FAssertLF(tree, "tree cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
#pragma omp critical (InitFFmmAlgorithmThreadTsm)
#pragma omp parallel num_threads(MaxThreads)
{
#pragma omp critical (InitFFmmAlgorithmTsm)
{
this->kernels[idxThread] = new KernelClass(*inKernels);
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
......@@ -250,7 +251,7 @@ protected:
// for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
FLOG(FTic counterTimeLevel);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
int numberOfCells = 0;
// for each cells
......
......@@ -46,7 +46,7 @@ class FFmmAlgorithmTsm : public FAbstractAlgorithm{