Commit efa505d5 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille
Browse files
parents 5108b944 64cdf22a
This diff is collapsed.
...@@ -43,9 +43,9 @@ if(NOT DEFINED PROCESSOR_COUNT) ...@@ -43,9 +43,9 @@ if(NOT DEFINED PROCESSOR_COUNT)
if(APPLE) if(APPLE)
find_program(cmd_sys_pro "system_profiler") find_program(cmd_sys_pro "system_profiler")
if(cmd_sys_pro) if(cmd_sys_pro)
execute_process(COMMAND ${cmd_sys_pro} OUTPUT_VARIABLE info) execute_process(COMMAND ${cmd_sys_pro} SPHardwareDataType OUTPUT_VARIABLE info)
string(REGEX REPLACE "^.*Total Number Of Cores: ([0-9]+).*$" "\\1" string(REGEX REPLACE "^.*Total Number of Cores: ([0-9]+).*$" "\\1"
NUMBER_OF_CPU "${info}") NUMBER_OF_CPU "${info}")
endif() endif()
endif() endif()
......
...@@ -264,10 +264,21 @@ if(PTSCOTCH_LIBRARIES) ...@@ -264,10 +264,21 @@ if(PTSCOTCH_LIBRARIES)
if(CMAKE_THREAD_LIBS_INIT) if(CMAKE_THREAD_LIBS_INIT)
list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
endif() endif()
if(UNIX OR WIN32) set(Z_LIBRARY "Z_LIBRARY-NOTFOUND")
find_library(Z_LIBRARY NAMES z)
if(Z_LIBRARY)
list(APPEND REQUIRED_LIBS "-lz")
endif()
set(M_LIBRARY "M_LIBRARY-NOTFOUND")
find_library(M_LIBRARY NAMES m)
if(M_LIBRARY)
list(APPEND REQUIRED_LIBS "-lm") list(APPEND REQUIRED_LIBS "-lm")
endif() endif()
list(APPEND REQUIRED_LIBS "-lz -lrt") set(RT_LIBRARY "RT_LIBRARY-NOTFOUND")
find_library(RT_LIBRARY NAMES rt)
if(RT_LIBRARY)
list(APPEND REQUIRED_LIBS "-lrt")
endif()
# set required libraries for link # set required libraries for link
set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
......
...@@ -233,10 +233,21 @@ if(SCOTCH_LIBRARIES) ...@@ -233,10 +233,21 @@ if(SCOTCH_LIBRARIES)
if(CMAKE_THREAD_LIBS_INIT) if(CMAKE_THREAD_LIBS_INIT)
list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
endif() endif()
if(UNIX OR WIN32) set(Z_LIBRARY "Z_LIBRARY-NOTFOUND")
find_library(Z_LIBRARY NAMES z)
if(Z_LIBRARY)
list(APPEND REQUIRED_LIBS "-lz")
endif()
set(M_LIBRARY "M_LIBRARY-NOTFOUND")
find_library(M_LIBRARY NAMES m)
if(M_LIBRARY)
list(APPEND REQUIRED_LIBS "-lm") list(APPEND REQUIRED_LIBS "-lm")
endif() endif()
list(APPEND REQUIRED_LIBS "-lz -lrt") set(RT_LIBRARY "RT_LIBRARY-NOTFOUND")
find_library(RT_LIBRARY NAMES rt)
if(RT_LIBRARY)
list(APPEND REQUIRED_LIBS "-lrt")
endif()
# set required libraries for link # set required libraries for link
set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
......
...@@ -62,6 +62,7 @@ public: ...@@ -62,6 +62,7 @@ public:
FAssertLF(tree, "tree cannot be null"); FAssertLF(tree, "tree cannot be null");
FAssertLF(kernels, "kernels cannot be null"); FAssertLF(kernels, "kernels cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
......
...@@ -67,6 +67,7 @@ public: ...@@ -67,6 +67,7 @@ public:
FAssertLF(tree, "tree cannot be null"); FAssertLF(tree, "tree cannot be null");
FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1"); FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1");
FAssertLF(leafLevelSeperationCriteria < 3, "Separation criteria should be < 3");
FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight()); FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight());
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "../Containers/FVector.hpp" #include "../Containers/FVector.hpp"
#include "FCoreCommon.hpp" #include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
/** /**
* @author Berenger Bramas (berenger.bramas@inria.fr) * @author Berenger Bramas (berenger.bramas@inria.fr)
...@@ -45,7 +46,7 @@ ...@@ -45,7 +46,7 @@
* *
* Upon destruction, this class does not deallocate pointers given to its constructor. * Upon destruction, this class does not deallocate pointers given to its constructor.
*/ */
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmSectionTask : public FAbstractAlgorithm, public FAlgorithmTimers { class FFmmAlgorithmSectionTask : public FAbstractAlgorithm, public FAlgorithmTimers {
OctreeClass* const tree; ///< The octree to work on OctreeClass* const tree; ///< The octree to work on
...@@ -74,13 +75,14 @@ public: ...@@ -74,13 +75,14 @@ public:
FAssertLF(tree, "tree cannot be null"); FAssertLF(tree, "tree cannot be null");
FAssertLF(inKernels, "kernels cannot be null"); FAssertLF(inKernels, "kernels cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads]; this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static) #pragma omp parallel num_threads(MaxThreads)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ {
#pragma omp critical (InitFFmmAlgorithmSectionTask) #pragma omp critical (InitFFmmAlgorithmSectionTask)
{ {
this->kernels[idxThread] = new KernelClass(*inKernels); this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
} }
} }
...@@ -327,7 +329,7 @@ protected: ...@@ -327,7 +329,7 @@ protected:
// There is a maximum of 26 neighbors // There is a maximum of 26 neighbors
ContainerClass* neighbors[27]; ContainerClass* neighbors[27];
const int SizeShape = 3*3*3; const int SizeShape = P2PExclusionClass::SizeShape;
FVector<typename OctreeClass::Iterator> shapes[SizeShape]; FVector<typename OctreeClass::Iterator> shapes[SizeShape];
typename OctreeClass::Iterator octreeIterator(tree); typename OctreeClass::Iterator octreeIterator(tree);
...@@ -337,7 +339,7 @@ protected: ...@@ -337,7 +339,7 @@ protected:
// Coloring all the cells // Coloring all the cells
do{ do{
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
shapes[shapePosition].push(octreeIterator); shapes[shapePosition].push(octreeIterator);
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "../Containers/FVector.hpp" #include "../Containers/FVector.hpp"
#include "FCoreCommon.hpp" #include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
/** /**
* @author Berenger Bramas (berenger.bramas@inria.fr) * @author Berenger Bramas (berenger.bramas@inria.fr)
...@@ -39,7 +40,7 @@ ...@@ -39,7 +40,7 @@
* *
* Of course this class does not deallocate pointer given in arguements. * Of course this class does not deallocate pointer given in arguements.
*/ */
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers { class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers {
OctreeClass* const tree; //< The octree to work on OctreeClass* const tree; //< The octree to work on
...@@ -49,7 +50,7 @@ class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers { ...@@ -49,7 +50,7 @@ class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers {
const int OctreeHeight; const int OctreeHeight;
const int leafLevelSeperationCriteria; const int leafLevelSeparationCriteria;
public: public:
/** The constructor need the octree and the kernels used for computation /** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on * @param inTree the octree to work on
...@@ -58,20 +59,21 @@ public: ...@@ -58,20 +59,21 @@ public:
*/ */
FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), : tree(inTree) , kernels(nullptr),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria)
{ {
FAssertLF(tree, "tree cannot be null"); FAssertLF(tree, "tree cannot be null");
FAssertLF(inKernels, "kernels cannot be null"); FAssertLF(inKernels, "kernels cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads]; this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static) #pragma omp parallel num_threads(MaxThreads)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ {
#pragma omp critical (InitFFmmAlgorithmTask) #pragma omp critical (InitFFmmAlgorithmTask)
{ {
this->kernels[idxThread] = new KernelClass(*inKernels); this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
} }
} }
FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
...@@ -239,7 +241,7 @@ protected: ...@@ -239,7 +241,7 @@ protected:
// for each levels // for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
FLOG(FTic counterTimeLevel); FLOG(FTic counterTimeLevel);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
// for each cell we apply the M2L with all cells in the implicit interaction list // for each cell we apply the M2L with all cells in the implicit interaction list
do{ do{
#pragma omp task firstprivate(octreeIterator) private(neighbors) shared(idxLevel) #pragma omp task firstprivate(octreeIterator) private(neighbors) shared(idxLevel)
...@@ -286,7 +288,7 @@ protected: ...@@ -286,7 +288,7 @@ protected:
// for each levels // for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
FLOG(FTic counterTimeLevel); FLOG(FTic counterTimeLevel);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
// for each cells // for each cells
do{ do{
//#pragma omp task default(none) firstprivate(octreeIterator,separationCriteria) private( neighbors) shared(idxLevel) //#pragma omp task default(none) firstprivate(octreeIterator,separationCriteria) private( neighbors) shared(idxLevel)
...@@ -388,7 +390,7 @@ protected: ...@@ -388,7 +390,7 @@ protected:
// There is a maximum of 26 neighbors // There is a maximum of 26 neighbors
ContainerClass* neighbors[27]; ContainerClass* neighbors[27];
const int SizeShape = 3*3*3; const int SizeShape = P2PExclusionClass::SizeShape;
FVector<typename OctreeClass::Iterator> shapes[SizeShape]; FVector<typename OctreeClass::Iterator> shapes[SizeShape];
typename OctreeClass::Iterator octreeIterator(tree); typename OctreeClass::Iterator octreeIterator(tree);
...@@ -397,7 +399,7 @@ protected: ...@@ -397,7 +399,7 @@ protected:
// for each leafs // for each leafs
do{ do{
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
shapes[shapePosition].push(octreeIterator); shapes[shapePosition].push(octreeIterator);
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "../Containers/FOctree.hpp" #include "../Containers/FOctree.hpp"
#include "FCoreCommon.hpp" #include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
#include <omp.h> #include <omp.h>
...@@ -45,7 +46,7 @@ ...@@ -45,7 +46,7 @@
* *
* This class does not deallocate pointers given to its constructor. * This class does not deallocate pointers given to its constructor.
*/ */
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
OctreeClass* const tree; ///< The octree to work on. OctreeClass* const tree; ///< The octree to work on.
KernelClass** kernels; ///< The kernels. KernelClass** kernels; ///< The kernels.
...@@ -53,7 +54,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ ...@@ -53,7 +54,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
typename OctreeClass::Iterator* iterArray; typename OctreeClass::Iterator* iterArray;
int leafsNumber; int leafsNumber;
static const int SizeShape = 3*3*3; static const int SizeShape = P2PExclusionClass::SizeShape;
int shapeLeaf[SizeShape]; int shapeLeaf[SizeShape];
const int MaxThreads; ///< The maximum number of threads. const int MaxThreads; ///< The maximum number of threads.
...@@ -62,7 +63,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ ...@@ -62,7 +63,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
int userChunkSize; int userChunkSize;
const int leafLevelSeperationCriteria; const int leafLevelSeparationCriteria;
public: public:
/** Class constructor /** Class constructor
...@@ -79,15 +80,17 @@ public: ...@@ -79,15 +80,17 @@ public:
const int inUserChunkSize = 10, const int inLeafLevelSeperationCriteria = 1) const int inUserChunkSize = 10, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0), : tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
userChunkSize(inUserChunkSize), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { userChunkSize(inUserChunkSize), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
FAssertLF(tree, "tree cannot be null"); FAssertLF(tree, "tree cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
FAssertLF(0 < userChunkSize, "Chunk size should be > 0");
this->kernels = new KernelClass*[MaxThreads]; this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static) #pragma omp parallel num_threads(MaxThreads)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ {
#pragma omp critical (InitFFmmAlgorithmThread) #pragma omp critical (InitFFmmAlgorithmThread)
{ {
this->kernels[idxThread] = new KernelClass(*inKernels); this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
} }
} }
...@@ -138,7 +141,7 @@ protected: ...@@ -138,7 +141,7 @@ protected:
do{ do{
++leafsNumber; ++leafsNumber;
const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)]; ++this->shapeLeaf[P2PExclusionClass::GetShapeIdx(coord)];
} while(octreeIterator.moveRight()); } while(octreeIterator.moveRight());
iterArray = new typename OctreeClass::Iterator[leafsNumber]; iterArray = new typename OctreeClass::Iterator[leafsNumber];
...@@ -296,7 +299,7 @@ protected: ...@@ -296,7 +299,7 @@ protected:
// for each levels // for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
FLOG(FTic counterTimeLevel); FLOG(FTic counterTimeLevel);
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
int numberOfCells = 0; int numberOfCells = 0;
// for each cells // for each cells
do{ do{
...@@ -439,7 +442,7 @@ protected: ...@@ -439,7 +442,7 @@ protected:
//iterArray[leafs] = octreeIterator; //iterArray[leafs] = octreeIterator;
//++leafs; //++leafs;
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
omp_set_lock(&lockShape[shapePosition]); omp_set_lock(&lockShape[shapePosition]);
const int positionToWork = startPosAtShape[shapePosition]++; const int positionToWork = startPosAtShape[shapePosition]++;
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "../Containers/FOctree.hpp" #include "../Containers/FOctree.hpp"
#include "FCoreCommon.hpp" #include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
#include <omp.h> #include <omp.h>
#include <vector> #include <vector>
...@@ -29,18 +30,18 @@ ...@@ -29,18 +30,18 @@
* *
* This class does not deallocate pointers given to its constructor. * This class does not deallocate pointers given to its constructor.
*/ */
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmThreadBalance : public FAbstractAlgorithm, public FAlgorithmTimers{ class FFmmAlgorithmThreadBalance : public FAbstractAlgorithm, public FAlgorithmTimers{
OctreeClass* const tree; ///< The octree to work on. OctreeClass* const tree; ///< The octree to work on.
KernelClass** kernels; ///< The kernels. KernelClass** kernels; ///< The kernels.
static const int SizeShape = 3*3*3; static const int SizeShape = P2PExclusionClass::SizeShape;
const int MaxThreads; ///< The maximum number of threads. const int MaxThreads; ///< The maximum number of threads.
const int OctreeHeight; ///< The height of the given tree. const int OctreeHeight; ///< The height of the given tree.
const int leafLevelSeperationCriteria; const int leafLevelSeparationCriteria;
public: public:
/** Class constructor /** Class constructor
...@@ -57,15 +58,16 @@ public: ...@@ -57,15 +58,16 @@ public:
const int inLeafLevelSeperationCriteria = 1) const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), : tree(inTree) , kernels(nullptr),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
FAssertLF(tree, "tree cannot be null"); FAssertLF(tree, "tree cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
this->kernels = new KernelClass*[MaxThreads]; this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel for schedule(static) #pragma omp parallel num_threads(MaxThreads)
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ {
#pragma omp critical (InitFFmmAlgorithmThreadBalance) #pragma omp critical (InitFFmmAlgorithmThreadBalance)
{ {
this->kernels[idxThread] = new KernelClass(*inKernels); this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
} }
} }
...@@ -205,7 +207,7 @@ protected: ...@@ -205,7 +207,7 @@ protected:
do{ do{
++leafsNumber; ++leafsNumber;
const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
++shapeLeaves[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)]; ++shapeLeaves[P2PExclusionClass::GetShapeIdx(coord)];
} while(octreeIterator.moveRight()); } while(octreeIterator.moveRight());
} }
...@@ -346,6 +348,7 @@ protected: ...@@ -346,6 +348,7 @@ protected:
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber]; workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
} }
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()]; WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber);
// Prepare the P2P // Prepare the P2P
const int LeafIndex = OctreeHeight - 1; const int LeafIndex = OctreeHeight - 1;
leafsDataArray.reset(new LeafData[leafsNumber]); leafsDataArray.reset(new LeafData[leafsNumber]);
...@@ -365,7 +368,7 @@ protected: ...@@ -365,7 +368,7 @@ protected:
// for each leafs // for each leafs
for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){ for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
const int positionToWork = startPosAtShape[shapePosition]++; const int positionToWork = startPosAtShape[shapePosition]++;
...@@ -542,7 +545,7 @@ protected: ...@@ -542,7 +545,7 @@ protected:
// for each levels // for each levels
for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
FLOG(FTic counterTimeLevel); FLOG(FTic counterTimeLevel);
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include <sys/time.h> #include <sys/time.h>
#include "FCoreCommon.hpp" #include "FCoreCommon.hpp"
#include "FP2PExclusion.hpp"
#include <memory> #include <memory>
...@@ -63,7 +64,7 @@ ...@@ -63,7 +64,7 @@
* --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 * --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20
* --track-fds=yes ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp * --track-fds=yes ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
*/ */
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTimers { class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTimers {
private: private:
OctreeClass* const tree; ///< The octree to work on