Commit 817763c3 authored by BRAMAS Berenger's avatar BRAMAS Berenger

add a way to swtich between static schedule (almost) or with a chunk size...

add a way to swtich between static schedule (almost) or with a chunk size divide by nb threads square
parent d5b97abe
......@@ -60,6 +60,18 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
const int OctreeHeight; ///< The height of the given tree.
const bool staticSchedule;
template <class NumType>
NumType getChunkSize(const NumType inSize) const {
if(staticSchedule){
return FMath::Max(NumType(1) , NumType(double(inSize)/double(omp_get_max_threads())) );
}
else{
return FMath::Max(NumType(1) , inSize/NumType(omp_get_max_threads()*omp_get_max_threads()));
}
}
public:
/** Class constructor
*
......@@ -69,9 +81,11 @@ public:
*
* \except An exception is thrown if one of the arguments is NULL.
*/
FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels)
FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels,
const bool inStaticSchedule = true)
: tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) {
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
staticSchedule(inStaticSchedule) {
FAssertLF(tree, "tree cannot be null");
......@@ -87,6 +101,7 @@ public:
FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
FLOG(FLog::Controller << "FFmmAlgorithmThread (Max Thread " << omp_get_max_threads() << ")\n");
FLOG(FLog::Controller << "\t static schedule " << (staticSchedule?"TRUE":"FALSE") << ")\n");
}
/** Default destructor */
......@@ -162,7 +177,7 @@ protected:
++leafs;
} while(octreeIterator.moveRight());
const int chunkSize = FMath::Max(1 , leafs/(omp_get_max_threads()*omp_get_max_threads()));
const int chunkSize = getChunkSize(leafs);
FLOG(FTic computationCounter);
#pragma omp parallel
......@@ -215,7 +230,7 @@ protected:
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads()));
const int chunkSize = getChunkSize(numberOfCells);
FLOG(computationCounter.tic());
#pragma omp parallel
......@@ -271,7 +286,7 @@ protected:
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads()));
const int chunkSize = getChunkSize(numberOfCells);
FLOG(computationCounter.tic());
#pragma omp parallel
......@@ -329,7 +344,7 @@ protected:
octreeIterator = avoidGotoLeftIterator;
FLOG(computationCounter.tic());
const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads()));
const int chunkSize = getChunkSize(numberOfCells);
#pragma omp parallel
{
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
......@@ -429,7 +444,7 @@ protected:
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
const int endAtThisShape = this->shapeLeaf[idxShape] + previous;
const int chunkSize = FMath::Max(1 , endAtThisShape/(omp_get_num_threads()*omp_get_num_threads()));
const int chunkSize = getChunkSize(endAtThisShape);
#pragma omp for schedule(dynamic, chunkSize)
for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){
LeafData& currentIter = leafsDataArray[idxLeafs];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment