Commit 341fdb57 authored by COULAUD Olivier's avatar COULAUD Olivier
Browse files

Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/scalfmm/scalfmm

# By piacibel (3) and bramas (2)
# Via piacibel
* 'master' of git+ssh://scm.gforge.inria.fr//gitroot/scalfmm/scalfmm:
  finals update from MPI/Thread to MPI/Thread Periodic
  Logs updated
  Update neighbors computation in C kernel api
  Move thread kernel inside the task to ensure correct value
  Changes of Mpi version moved to PeriodiqueMpi version (except P2P)
parents 8e713246 4eee45b0
...@@ -94,17 +94,17 @@ void Scalfmm_execute_kernel(Scalfmm_Handle handle, struct Scalfmm_Kernel_Descrip ...@@ -94,17 +94,17 @@ void Scalfmm_execute_kernel(Scalfmm_Handle handle, struct Scalfmm_Kernel_Descrip
//< This function fill the childFullPosition[3] with [0;1] to know the position of a child relatively to //< This function fill the childFullPosition[3] with [0;1] to know the position of a child relatively to
//< its position from its parent //< its position from its parent
inline void Scalfmm_utils_parentChildPosition(int childPosition, int* childFullPosition){ inline void Scalfmm_utils_parentChildPosition(int childPosition, int* childFullPosition){
childFullPosition[0] = childPosition%2; childFullPosition[2] = (childPosition%2 ? 1 : -1);
childFullPosition[1] = (childPosition/2)%2; childFullPosition[1] = ((childPosition/2)%2 ? 1 : -1);
childFullPosition[2] = (childPosition/4)%2; childFullPosition[0] = ((childPosition/4)%2 ? 1 : -1);
} }
//< This function fill the childFullPosition[3] with [-3;3] to know the position of a interaction //< This function fill the childFullPosition[3] with [-3;3] to know the position of a interaction
//< cell relatively to its position from the target //< cell relatively to its position from the target
inline void Scalfmm_utils_interactionPosition(int interactionPosition, int* srcPosition){ inline void Scalfmm_utils_interactionPosition(int interactionPosition, int* srcPosition){
srcPosition[0] = interactionPosition%7 - 3; srcPosition[2] = interactionPosition%7 - 3;
srcPosition[1] = (interactionPosition/7)%7 - 3; srcPosition[1] = (interactionPosition/7)%7 - 3;
srcPosition[2] = (interactionPosition/49)%7 - 3; srcPosition[0] = (interactionPosition/49)%7 - 3;
} }
#endif // CKERNELAPI_H #endif // CKERNELAPI_H
...@@ -492,8 +492,8 @@ private: ...@@ -492,8 +492,8 @@ private:
FLOG(computationCounter.tac()); FLOG(computationCounter.tac());
FLOG( FLog::Controller << "\tFinished (@Upward Pass (M2M) = " << counterTime.elapsed() << " s)\n" ); FLOG( FLog::Controller << "\tFinished (@Upward Pass (M2M) = " << counterTime.elapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FLOG( FLog::Controller << "\t\t Wait : " << singleCounter.cumulated() << " s\n" ); FLOG( FLog::Controller << "\t\t Single : " << singleCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Wait : " << parallelCounter.cumulated() << " s\n" ); FLOG( FLog::Controller << "\t\t Parallel : " << parallelCounter.cumulated() << " s\n" );
} }
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
...@@ -679,15 +679,12 @@ private: ...@@ -679,15 +679,12 @@ private:
delete[] status; delete[] status;
FLOG(sendCounter.tac()); FLOG(sendCounter.tac());
} }//End of Master region
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// Do M2L // Do M2L
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const CellClass* neighbors[343];
#pragma omp single nowait #pragma omp single nowait
{ {
typename OctreeClass::Iterator octreeIterator(tree); typename OctreeClass::Iterator octreeIterator(tree);
...@@ -720,6 +717,9 @@ private: ...@@ -720,6 +717,9 @@ private:
for(int idxCell = 0 ; idxCell < numberOfCells ; idxCell += chunckSize){ for(int idxCell = 0 ; idxCell < numberOfCells ; idxCell += chunckSize){
#pragma omp task #pragma omp task
{ {
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
const CellClass* neighbors[343];
const int nbCellToCompute = FMath::Min(chunckSize, numberOfCells-idxCell); const int nbCellToCompute = FMath::Min(chunckSize, numberOfCells-idxCell);
for(int idxCellToCompute = idxCell ; idxCellToCompute < idxCell+nbCellToCompute ; ++idxCellToCompute){ for(int idxCellToCompute = idxCell ; idxCellToCompute < idxCell+nbCellToCompute ; ++idxCellToCompute){
const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), idxLevel); const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), idxLevel);
...@@ -727,7 +727,7 @@ private: ...@@ -727,7 +727,7 @@ private:
} }
} }
} }
} }//End of task spawning
#pragma omp taskwait #pragma omp taskwait
...@@ -1030,6 +1030,7 @@ private: ...@@ -1030,6 +1030,7 @@ private:
FLOG( FTic gatherCounter); FLOG( FTic gatherCounter);
FLOG( FTic waitCounter); FLOG( FTic waitCounter);
FLOG(FTic computationCounter); FLOG(FTic computationCounter);
FLOG(FTic computation2Counter);
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// Prepare data to send receive // Prepare data to send receive
...@@ -1076,9 +1077,9 @@ private: ...@@ -1076,9 +1077,9 @@ private:
int partsToSend[nbProcess]; int partsToSend[nbProcess];
memset(partsToSend, 0, sizeof(int) * nbProcess); memset(partsToSend, 0, sizeof(int) * nbProcess);
#pragma omp parallel #pragma omp parallel
{ {
#pragma omp master // MUST WAIT to fill leafsNeedOther #pragma omp master // MUST WAIT to fill leafsNeedOther
{ {
// Copy leafs // Copy leafs
{ {
...@@ -1140,9 +1141,9 @@ private: ...@@ -1140,9 +1141,9 @@ private:
} }
} }
#pragma omp barrier #pragma omp barrier
#pragma omp master // nowait #pragma omp master // nowait
{ {
//Share to all processus globalReceiveMap //Share to all processus globalReceiveMap
FLOG(gatherCounter.tic()); FLOG(gatherCounter.tic());
...@@ -1219,7 +1220,7 @@ private: ...@@ -1219,7 +1220,7 @@ private:
// Prepare data for thread P2P // Prepare data for thread P2P
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
#pragma omp single // MUST WAIT! #pragma omp single // MUST WAIT!
{ {
typename OctreeClass::Iterator octreeIterator(tree); typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft(); octreeIterator.gotoBottomLeft();
...@@ -1271,9 +1272,7 @@ private: ...@@ -1271,9 +1272,7 @@ private:
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
{ {
KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); #pragma omp single nowait
#pragma omp single nowait
{ {
FLOG(computationCounter.tic()); FLOG(computationCounter.tic());
int previous = 0; int previous = 0;
...@@ -1284,8 +1283,9 @@ private: ...@@ -1284,8 +1283,9 @@ private:
for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; idxLeafs += chunckSize){ for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; idxLeafs += chunckSize){
const int nbLeavesInTask = FMath::Min(endAtThisShape-idxLeafs, chunckSize); const int nbLeavesInTask = FMath::Min(endAtThisShape-idxLeafs, chunckSize);
#pragma omp task #pragma omp task
{ {
KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]);
// There is a maximum of 26 neighbors // There is a maximum of 26 neighbors
ContainerClass* neighbors[27]; ContainerClass* neighbors[27];
...@@ -1302,20 +1302,20 @@ private: ...@@ -1302,20 +1302,20 @@ private:
} }
previous = endAtThisShape; previous = endAtThisShape;
#pragma omp taskwait #pragma omp taskwait
} }
FLOG(computationCounter.tac()); FLOG(computationCounter.tac());
} }
} }
// Wait the come to finish (and the previous computation also) // Wait the come to finish (and the previous computation also)
#pragma omp barrier #pragma omp barrier
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// Computation P2P that need others data // Computation P2P that need others data
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
#pragma omp master #pragma omp master
{ FLOG( computation2Counter.tic() ); } { FLOG( computation2Counter.tic() ); }
{ {
...@@ -1327,7 +1327,7 @@ private: ...@@ -1327,7 +1327,7 @@ private:
// Box limite // Box limite
const int nbLeafToProceed = leafsNeedOtherData.getSize(); const int nbLeafToProceed = leafsNeedOtherData.getSize();
#pragma omp for schedule(static) #pragma omp for schedule(static)
for(int idxLeafs = 0 ; idxLeafs < nbLeafToProceed ; ++idxLeafs){ for(int idxLeafs = 0 ; idxLeafs < nbLeafToProceed ; ++idxLeafs){
LeafData currentIter = leafsNeedOtherData[idxLeafs]; LeafData currentIter = leafsNeedOtherData[idxLeafs];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment