Commit abc94b1b authored by BRAMAS Berenger's avatar BRAMAS Berenger

add a num threads statement in all omp parallel kernels

parent a0740fd0
...@@ -179,7 +179,7 @@ protected: ...@@ -179,7 +179,7 @@ protected:
*/ */
void executeCore(const unsigned operationsToProceed) override { void executeCore(const unsigned operationsToProceed) override {
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp master #pragma omp master
{ {
......
...@@ -106,7 +106,7 @@ protected: ...@@ -106,7 +106,7 @@ protected:
*/ */
void executeCore(const unsigned operationsToProceed) override { void executeCore(const unsigned operationsToProceed) override {
Timers[P2MTimer].tic(); Timers[P2MTimer].tic();
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp sections #pragma omp sections
{ {
......
...@@ -130,7 +130,7 @@ protected: ...@@ -130,7 +130,7 @@ protected:
FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
FLOG(FTic counterTime); FLOG(FTic counterTime);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp single nowait #pragma omp single nowait
{ {
...@@ -163,7 +163,7 @@ protected: ...@@ -163,7 +163,7 @@ protected:
FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
FLOG(FTic counterTime); FLOG(FTic counterTime);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp single nowait #pragma omp single nowait
{ {
...@@ -229,7 +229,7 @@ protected: ...@@ -229,7 +229,7 @@ protected:
FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); );
FLOG(FTic counterTime); FLOG(FTic counterTime);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp single nowait #pragma omp single nowait
{ {
...@@ -276,7 +276,7 @@ protected: ...@@ -276,7 +276,7 @@ protected:
FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); );
FLOG(FTic counterTime); FLOG(FTic counterTime);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp single nowait #pragma omp single nowait
{ {
...@@ -335,7 +335,7 @@ protected: ...@@ -335,7 +335,7 @@ protected:
FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); );
FLOG(FTic counterTime); FLOG(FTic counterTime);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp single nowait #pragma omp single nowait
{ {
...@@ -386,7 +386,7 @@ protected: ...@@ -386,7 +386,7 @@ protected:
const int heightMinusOne = OctreeHeight - 1; const int heightMinusOne = OctreeHeight - 1;
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp single nowait #pragma omp single nowait
......
...@@ -192,7 +192,7 @@ protected: ...@@ -192,7 +192,7 @@ protected:
const int chunkSize = this->getChunkSize(leafs); const int chunkSize = this->getChunkSize(leafs);
FLOG(FTic computationCounter); FLOG(FTic computationCounter);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize) #pragma omp for nowait schedule(dynamic, chunkSize)
...@@ -245,7 +245,7 @@ protected: ...@@ -245,7 +245,7 @@ protected:
const int chunkSize = this->getChunkSize(numberOfCells); const int chunkSize = this->getChunkSize(numberOfCells);
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize) #pragma omp for nowait schedule(dynamic, chunkSize)
...@@ -312,7 +312,7 @@ protected: ...@@ -312,7 +312,7 @@ protected:
const int chunkSize = this->getChunkSize(numberOfCells); const int chunkSize = this->getChunkSize(numberOfCells);
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const CellClass* neighbors[342]; const CellClass* neighbors[342];
...@@ -369,7 +369,7 @@ protected: ...@@ -369,7 +369,7 @@ protected:
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
const int chunkSize = this->getChunkSize(numberOfCells); const int chunkSize = this->getChunkSize(numberOfCells);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize) #pragma omp for nowait schedule(dynamic, chunkSize)
...@@ -421,7 +421,7 @@ protected: ...@@ -421,7 +421,7 @@ protected:
startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + this->shapeLeaf[idxShape-1]; startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + this->shapeLeaf[idxShape-1];
} }
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
const float step = float(this->leafsNumber) / float(omp_get_num_threads()); const float step = float(this->leafsNumber) / float(omp_get_num_threads());
......
...@@ -215,7 +215,7 @@ protected: ...@@ -215,7 +215,7 @@ protected:
std::unique_ptr<WorkloadTemp*[]> workloadBufferThread(new WorkloadTemp*[MaxThreads]); std::unique_ptr<WorkloadTemp*[]> workloadBufferThread(new WorkloadTemp*[MaxThreads]);
memset(workloadBufferThread.get(), 0, MaxThreads*sizeof(WorkloadTemp*)); memset(workloadBufferThread.get(), 0, MaxThreads*sizeof(WorkloadTemp*));
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp single #pragma omp single
{ {
...@@ -452,7 +452,7 @@ protected: ...@@ -452,7 +452,7 @@ protected:
FLOG(FTic counterTime); FLOG(FTic counterTime);
FLOG(FTic computationCounter); FLOG(FTic computationCounter);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const int nbCellsToCompute = workloadP2M[omp_get_thread_num()].nbElements; const int nbCellsToCompute = workloadP2M[omp_get_thread_num()].nbElements;
...@@ -491,7 +491,7 @@ protected: ...@@ -491,7 +491,7 @@ protected:
FLOG(FTic counterTimeLevel); FLOG(FTic counterTimeLevel);
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const int nbCellsToCompute = workloadM2M[idxLevel][omp_get_thread_num()].nbElements; const int nbCellsToCompute = workloadM2M[idxLevel][omp_get_thread_num()].nbElements;
...@@ -547,7 +547,7 @@ protected: ...@@ -547,7 +547,7 @@ protected:
const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
FLOG(FTic counterTimeLevel); FLOG(FTic counterTimeLevel);
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const int nbCellsToCompute = workloadM2L[idxLevel][omp_get_thread_num()].nbElements; const int nbCellsToCompute = workloadM2L[idxLevel][omp_get_thread_num()].nbElements;
...@@ -595,7 +595,7 @@ protected: ...@@ -595,7 +595,7 @@ protected:
FLOG(FTic counterTimeLevel); FLOG(FTic counterTimeLevel);
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const int nbCellsToCompute = workloadL2L[idxLevel][omp_get_thread_num()].nbElements; const int nbCellsToCompute = workloadL2L[idxLevel][omp_get_thread_num()].nbElements;
...@@ -626,7 +626,7 @@ protected: ...@@ -626,7 +626,7 @@ protected:
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
void L2P(){ void L2P(){
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const int nbCellsToCompute = workloadL2P[omp_get_thread_num()].nbElements; const int nbCellsToCompute = workloadL2P[omp_get_thread_num()].nbElements;
...@@ -656,7 +656,7 @@ protected: ...@@ -656,7 +656,7 @@ protected:
FLOG(FTic computationCounter); FLOG(FTic computationCounter);
FLOG(FTic computationCounterP2P); FLOG(FTic computationCounterP2P);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
FLOG(if(!omp_get_thread_num()) computationCounter.tic()); FLOG(if(!omp_get_thread_num()) computationCounter.tic());
......
...@@ -349,7 +349,7 @@ protected: ...@@ -349,7 +349,7 @@ protected:
} while(octreeIterator.moveRight()); } while(octreeIterator.moveRight());
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
// Each thread get its own kernel // Each thread get its own kernel
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
...@@ -452,7 +452,7 @@ protected: ...@@ -452,7 +452,7 @@ protected:
} }
FLOG(parallelCounter.tic()); FLOG(parallelCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]);
//This single section post and receive the comms, and then do the M2M associated with it. //This single section post and receive the comms, and then do the M2M associated with it.
...@@ -641,7 +641,7 @@ protected: ...@@ -641,7 +641,7 @@ protected:
FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight]; FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight];
memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight); memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp master #pragma omp master
{ {
...@@ -947,7 +947,7 @@ protected: ...@@ -947,7 +947,7 @@ protected:
// Compute this cells // Compute this cells
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
MortonIndex neighborsIndex[/*189+26+1*/216]; MortonIndex neighborsIndex[/*189+26+1*/216];
...@@ -1080,7 +1080,7 @@ protected: ...@@ -1080,7 +1080,7 @@ protected:
} }
} }
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]);
#pragma omp single nowait #pragma omp single nowait
...@@ -1251,7 +1251,7 @@ protected: ...@@ -1251,7 +1251,7 @@ protected:
FSize partsToSend[nbProcess]; FSize partsToSend[nbProcess];
memset(partsToSend, 0, sizeof(FSize) * nbProcess); memset(partsToSend, 0, sizeof(FSize) * nbProcess);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp master // MUST WAIT to fill leafsNeedOther #pragma omp master // MUST WAIT to fill leafsNeedOther
if(p2pEnabled){ if(p2pEnabled){
......
...@@ -357,7 +357,7 @@ protected: ...@@ -357,7 +357,7 @@ protected:
} while(octreeIterator.moveRight()); } while(octreeIterator.moveRight());
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
// Each thread get its own kernel // Each thread get its own kernel
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
...@@ -456,7 +456,7 @@ protected: ...@@ -456,7 +456,7 @@ protected:
} }
FLOG(parallelCounter.tic()); FLOG(parallelCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]);
//This single section post and receive the comms, and then do the M2M associated with it. //This single section post and receive the comms, and then do the M2M associated with it.
...@@ -738,7 +738,7 @@ protected: ...@@ -738,7 +738,7 @@ protected:
FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight]; FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight];
memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight); memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp master #pragma omp master
{ {
...@@ -1038,7 +1038,7 @@ protected: ...@@ -1038,7 +1038,7 @@ protected:
// Compute this cells // Compute this cells
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
MortonIndex neighborsIndex[/*189+26+1*/216]; MortonIndex neighborsIndex[/*189+26+1*/216];
...@@ -1187,7 +1187,7 @@ protected: ...@@ -1187,7 +1187,7 @@ protected:
} }
} }
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]);
#pragma omp single nowait #pragma omp single nowait
...@@ -1359,7 +1359,7 @@ protected: ...@@ -1359,7 +1359,7 @@ protected:
FSize partsToSend[nbProcess]; FSize partsToSend[nbProcess];
memset(partsToSend, 0, sizeof(FSize) * nbProcess); memset(partsToSend, 0, sizeof(FSize) * nbProcess);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
#pragma omp master // MUST WAIT to fill leafsNeedOther #pragma omp master // MUST WAIT to fill leafsNeedOther
if(p2pEnabled){ if(p2pEnabled){
......
...@@ -141,7 +141,7 @@ protected: ...@@ -141,7 +141,7 @@ protected:
const int chunkSize = FMath::Max(1 , numberOfLeafs/(omp_get_max_threads()*omp_get_max_threads())); const int chunkSize = FMath::Max(1 , numberOfLeafs/(omp_get_max_threads()*omp_get_max_threads()));
FLOG(FTic computationCounter); FLOG(FTic computationCounter);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize) #pragma omp for nowait schedule(dynamic, chunkSize)
...@@ -198,7 +198,7 @@ protected: ...@@ -198,7 +198,7 @@ protected:
const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads())); const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads()));
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize) #pragma omp for nowait schedule(dynamic, chunkSize)
...@@ -265,7 +265,7 @@ protected: ...@@ -265,7 +265,7 @@ protected:
const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads())); const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads()));
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const CellClass* neighbors[342]; const CellClass* neighbors[342];
...@@ -334,7 +334,7 @@ protected: ...@@ -334,7 +334,7 @@ protected:
const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads())); const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads()));
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize) #pragma omp for nowait schedule(dynamic, chunkSize)
...@@ -383,7 +383,7 @@ protected: ...@@ -383,7 +383,7 @@ protected:
const int heightMinusOne = OctreeHeight - 1; const int heightMinusOne = OctreeHeight - 1;
FLOG(FTic computationCounter); FLOG(FTic computationCounter);
#pragma omp parallel #pragma omp parallel num_threads(MaxThreads)
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
// There is a maximum of 26 neighbors // There is a maximum of 26 neighbors
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment