Commit 80a0057b authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

Change MPI from waitsome to waitall in P2P

parent 67885622
......@@ -86,8 +86,8 @@ class FFmmAlgorithmThreadProc : public FAbstractAlgorithm {
* that a proc use (it holds data in this interval)
*/
struct Interval{
MortonIndex leftIndex;
MortonIndex rightIndex;
MortonIndex leftIndex;
MortonIndex rightIndex;
};
/** My interval */
Interval*const intervals;
......@@ -96,37 +96,37 @@ class FFmmAlgorithmThreadProc : public FAbstractAlgorithm {
/** Get an interval from proc id and level */
Interval& getWorkingInterval( int level, int proc){
return workingIntervalsPerLevel[OctreeHeight * proc + level];
return workingIntervalsPerLevel[OctreeHeight * proc + level];
}
const Interval& getWorkingInterval( int level, int proc) const {
return workingIntervalsPerLevel[OctreeHeight * proc + level];
return workingIntervalsPerLevel[OctreeHeight * proc + level];
}
/** To know if a proc has work at a given level (if it hold cells and was responsible of them) */
bool procHasWorkAtLevel(const int idxLevel , const int idxProc) const {
return getWorkingInterval(idxLevel, idxProc).leftIndex <= getWorkingInterval(idxLevel, idxProc).rightIndex;
return getWorkingInterval(idxLevel, idxProc).leftIndex <= getWorkingInterval(idxLevel, idxProc).rightIndex;
}
/** Return true if the idxProc left cell at idxLevel+1 has the same parent as us for our right cell */
bool procCoversMyRightBorderCell(const int idxLevel , const int idxProc) const {
return (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3) == (getWorkingInterval((idxLevel+1) ,idxProc).leftIndex >>3);
return (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3) == (getWorkingInterval((idxLevel+1) ,idxProc).leftIndex >>3);
}
/** Return true if the idxProc right cell at idxLevel+1 has the same parent as us for our left cell */
bool procCoversMyLeftBorderCell(const int idxLevel , const int idxProc) const {
return (getWorkingInterval((idxLevel+1) , idxProc).rightIndex >>3) == (getWorkingInterval((idxLevel+1) , idProcess).leftIndex>>3);
return (getWorkingInterval((idxLevel+1) , idxProc).rightIndex >>3) == (getWorkingInterval((idxLevel+1) , idProcess).leftIndex>>3);
}
public:
/** Get current proc interval at level */
Interval& getWorkingInterval( int level){
return getWorkingInterval(level, idProcess);
return getWorkingInterval(level, idProcess);
}
/** Does the current proc has some work at this level */
bool hasWorkAtLevel( int level){
return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).rightIndex) < (getWorkingInterval(level, idProcess).rightIndex);
return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).rightIndex) < (getWorkingInterval(level, idProcess).rightIndex);
}
/** The constructor need the octree and the kernels used for computation
......@@ -135,31 +135,31 @@ public:
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels)
: tree(inTree) , kernels(nullptr), comm(inComm), iterArray(nullptr),iterArrayComm(nullptr),numberOfLeafs(0),
MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]),
workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()])
: tree(inTree) , kernels(nullptr), comm(inComm), iterArray(nullptr),iterArrayComm(nullptr),numberOfLeafs(0),
MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]),
workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()])
{
FAssertLF(tree, "tree cannot be null");
FAssertLF(tree, "tree cannot be null");
this->kernels = new KernelClass*[MaxThreads];
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass(*inKernels);
}
this->kernels = new KernelClass*[MaxThreads];
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass(*inKernels);
}
FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
}
/** Default destructor */
virtual ~FFmmAlgorithmThreadProc(){
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
delete this->kernels[idxThread];
}
delete [] this->kernels;
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
delete this->kernels[idxThread];
}
delete [] this->kernels;
delete [] intervals;
delete [] workingIntervalsPerLevel;
delete [] intervals;
delete [] workingIntervalsPerLevel;
}
/**
......@@ -167,96 +167,96 @@ public:
* Call this function to run the complete algorithm
*/
void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
// Count leaf
this->numberOfLeafs = 0;
{
Interval myFullInterval;
{//Building the interval with the first and last leaves (and count the number of leaves)
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
myFullInterval.leftIndex = octreeIterator.getCurrentGlobalIndex();
do{
++this->numberOfLeafs;
} while(octreeIterator.moveRight());
myFullInterval.rightIndex = octreeIterator.getCurrentGlobalIndex();
}
// Allocate a number to store the pointer of the cells at a level
iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs];
FAssertLF(iterArray, "iterArray bad alloc");
FAssertLF(iterArrayComm, "iterArrayComm bad alloc");
// We get the leftIndex/rightIndex indexes from each procs
FMpi::MpiAssert( MPI_Allgather( &myFullInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()), __LINE__ );
// Build my intervals for all levels
std::unique_ptr<Interval[]> myIntervals(new Interval[OctreeHeight]);
// At leaf level we know it is the full interval
myIntervals[OctreeHeight - 1] = myFullInterval;
// We can estimate the interval for each level by using the parent/child relation
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
myIntervals[idxLevel].leftIndex = myIntervals[idxLevel+1].leftIndex >> 3;
myIntervals[idxLevel].rightIndex = myIntervals[idxLevel+1].rightIndex >> 3;
}
// Process 0 uses the estimates as real intervals, but other processes
// should remove cells that belong to others
if(idProcess != 0){
//We test for each level if process on left (idProcess-1) own cell I thought I owned
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
// At h-1 the working limit is the parent of the right cell of the proc on the left
MortonIndex workingLimitAtLevel = intervals[idProcess-1].rightIndex >> 3;
// We check if we have no more work to do
int nullIntervalFromLevel = 0;
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 && nullIntervalFromLevel == 0 ; --idxLevel){
while(octreeIterator.getCurrentGlobalIndex() <= workingLimitAtLevel){
if( !octreeIterator.moveRight() ){
// We cannot move right we are not owner of any more cell
nullIntervalFromLevel = idxLevel;
break;
}
}
// If we are responsible for some cells at this level keep the first index
if(nullIntervalFromLevel == 0){
myIntervals[idxLevel].leftIndex = octreeIterator.getCurrentGlobalIndex();
octreeIterator.moveUp();
workingLimitAtLevel >>= 3;
}
}
// In case we are not responsible for any cells we put the leftIndex = rightIndex+1
for(int idxLevel = nullIntervalFromLevel ; idxLevel >= 1 ; --idxLevel){
myIntervals[idxLevel].leftIndex = myIntervals[idxLevel].rightIndex + 1;
}
}
// We get the leftIndex/rightIndex indexes from each procs
FMpi::MpiAssert( MPI_Allgather( myIntervals.get(), int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
}
// run;
if(operationsToProceed & FFmmP2M) bottomPass();
if(operationsToProceed & FFmmM2M) upwardPass();
if(operationsToProceed & FFmmM2L) transferPass();
if(operationsToProceed & FFmmL2L) downardPass();
if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass();
// delete array
delete [] iterArray;
delete [] iterArrayComm;
iterArray = nullptr;
iterArrayComm = nullptr;
// Count leaf
this->numberOfLeafs = 0;
{
Interval myFullInterval;
{//Building the interval with the first and last leaves (and count the number of leaves)
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
myFullInterval.leftIndex = octreeIterator.getCurrentGlobalIndex();
do{
++this->numberOfLeafs;
} while(octreeIterator.moveRight());
myFullInterval.rightIndex = octreeIterator.getCurrentGlobalIndex();
}
// Allocate a number to store the pointer of the cells at a level
iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs];
FAssertLF(iterArray, "iterArray bad alloc");
FAssertLF(iterArrayComm, "iterArrayComm bad alloc");
// We get the leftIndex/rightIndex indexes from each procs
FMpi::MpiAssert( MPI_Allgather( &myFullInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()), __LINE__ );
// Build my intervals for all levels
std::unique_ptr<Interval[]> myIntervals(new Interval[OctreeHeight]);
// At leaf level we know it is the full interval
myIntervals[OctreeHeight - 1] = myFullInterval;
// We can estimate the interval for each level by using the parent/child relation
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
myIntervals[idxLevel].leftIndex = myIntervals[idxLevel+1].leftIndex >> 3;
myIntervals[idxLevel].rightIndex = myIntervals[idxLevel+1].rightIndex >> 3;
}
// Process 0 uses the estimates as real intervals, but other processes
// should remove cells that belong to others
if(idProcess != 0){
//We test for each level if process on left (idProcess-1) own cell I thought I owned
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
// At h-1 the working limit is the parent of the right cell of the proc on the left
MortonIndex workingLimitAtLevel = intervals[idProcess-1].rightIndex >> 3;
// We check if we have no more work to do
int nullIntervalFromLevel = 0;
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 && nullIntervalFromLevel == 0 ; --idxLevel){
while(octreeIterator.getCurrentGlobalIndex() <= workingLimitAtLevel){
if( !octreeIterator.moveRight() ){
// We cannot move right we are not owner of any more cell
nullIntervalFromLevel = idxLevel;
break;
}
}
// If we are responsible for some cells at this level keep the first index
if(nullIntervalFromLevel == 0){
myIntervals[idxLevel].leftIndex = octreeIterator.getCurrentGlobalIndex();
octreeIterator.moveUp();
workingLimitAtLevel >>= 3;
}
}
// In case we are not responsible for any cells we put the leftIndex = rightIndex+1
for(int idxLevel = nullIntervalFromLevel ; idxLevel >= 1 ; --idxLevel){
myIntervals[idxLevel].leftIndex = myIntervals[idxLevel].rightIndex + 1;
}
}
// We get the leftIndex/rightIndex indexes from each procs
FMpi::MpiAssert( MPI_Allgather( myIntervals.get(), int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
}
// run;
if(operationsToProceed & FFmmP2M) bottomPass();
if(operationsToProceed & FFmmM2M) upwardPass();
if(operationsToProceed & FFmmM2L) transferPass();
if(operationsToProceed & FFmmL2L) downardPass();
if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass();
// delete array
delete [] iterArray;
delete [] iterArrayComm;
iterArray = nullptr;
iterArrayComm = nullptr;
}
private:
......@@ -271,32 +271,32 @@ private:
* It is similar to multi threaded version.
*/
void bottomPass(){
FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
typename OctreeClass::Iterator octreeIterator(tree);
// Copy the ptr to leaves in array
octreeIterator.gotoBottomLeft();
int leafs = 0;
do{
iterArray[leafs++] = octreeIterator;
} while(octreeIterator.moveRight());
FLOG(computationCounter.tic());
FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
typename OctreeClass::Iterator octreeIterator(tree);
// Copy the ptr to leaves in array
octreeIterator.gotoBottomLeft();
int leafs = 0;
do{
iterArray[leafs++] = octreeIterator;
} while(octreeIterator.moveRight());
FLOG(computationCounter.tic());
#pragma omp parallel
{
// Each thread get its own kernel
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
// Parallel iteration on the leaves
{
// Each thread get its own kernel
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
// Parallel iteration on the leaves
#pragma omp for nowait
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
}
}
FLOG(computationCounter.tac());
FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
}
}
FLOG(computationCounter.tac());
FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
}
/////////////////////////////////////////////////////////////////////////////
......@@ -305,195 +305,195 @@ private:
/** M2M */
void upwardPass(){
const int MaxSizePerCell = CellClass::GetSize();
FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
FLOG(FTic singleCounter);
FLOG(FTic parallelCounter);
// Start from leal level (height-1)
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
// The proc to send the shared cells to
// Starting to the proc on the left this variable will go to 0
int currentProcIdToSendTo = (idProcess - 1);
// There are a maximum of 1 sends and 8-1 receptions
MPI_Request requests[8];
MPI_Status status[8];
// Maximum data per message is:
FMpiBufferWriter sendBuffer(comm.getComm(), 7*MaxSizePerCell + 1);
const int recvBufferOffset = (7 * MaxSizePerCell + 1);
FMpiBufferReader recvBuffer(comm.getComm(), 7*recvBufferOffset);
CellClass recvBufferCells[8];
// The first proc that send to me a cell
// This variable will go to nbProcess
int firstProcThatSend = idProcess + 1;
FLOG(computationCounter.tic());
// We work from height-1 to 1
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
// Does my cells are covered by my neighbors working interval and so I have no more work?
const bool noMoreWorkForMe = (idProcess != 0 && !procHasWorkAtLevel(idxLevel+1, idProcess));
if(noMoreWorkForMe){
FAssertLF(procHasWorkAtLevel(idxLevel, idProcess) == false);
break;
}
// Copy and count ALL the cells (even the ones outside the working interval)
int totalNbCellsAtLevel = 0;
do{
iterArray[totalNbCellsAtLevel++] = octreeIterator;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;
int iterMpiRequests = 0; // The iterator for send/recv requests
int nbCellsToSkip = 0; // The number of cells to send
// Skip all the cells that are out of my working interval
while(nbCellsToSkip < totalNbCellsAtLevel && iterArray[nbCellsToSkip].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).leftIndex){
++nbCellsToSkip;
}
// We need to know if we will recv something in order to know if threads skip the last cell
int nbCellsForThreads = totalNbCellsAtLevel; // totalNbCellsAtLevel or totalNbCellsAtLevel-1
bool hasToReceive = false;
if(idProcess != nbProcess-1 && procHasWorkAtLevel(idxLevel , idProcess)){
// Find the first proc that may send to me
while(firstProcThatSend < nbProcess && !procHasWorkAtLevel(idxLevel+1, firstProcThatSend) ){
firstProcThatSend += 1;
}
// Do we have to receive?
if(firstProcThatSend < nbProcess && procHasWorkAtLevel(idxLevel+1, firstProcThatSend) && procCoversMyRightBorderCell(idxLevel, firstProcThatSend) ){
hasToReceive = true;
// Threads do not compute the last cell, we will do it once data are received
nbCellsForThreads -= 1;
}
}
FLOG(parallelCounter.tic());
const int MaxSizePerCell = CellClass::GetSize();
FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
FLOG(FTic singleCounter);
FLOG(FTic parallelCounter);
// Start from leal level (height-1)
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
// The proc to send the shared cells to
// Starting to the proc on the left this variable will go to 0
int currentProcIdToSendTo = (idProcess - 1);
// There are a maximum of 1 sends and 8-1 receptions
MPI_Request requests[8];
MPI_Status status[8];
// Maximum data per message is:
FMpiBufferWriter sendBuffer(comm.getComm(), 7*MaxSizePerCell + 1);
const int recvBufferOffset = (7 * MaxSizePerCell + 1);
FMpiBufferReader recvBuffer(comm.getComm(), 7*recvBufferOffset);
CellClass recvBufferCells[8];
// The first proc that send to me a cell
// This variable will go to nbProcess
int firstProcThatSend = idProcess + 1;
FLOG(computationCounter.tic());
// We work from height-1 to 1
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
// Does my cells are covered by my neighbors working interval and so I have no more work?
const bool noMoreWorkForMe = (idProcess != 0 && !procHasWorkAtLevel(idxLevel+1, idProcess));
if(noMoreWorkForMe){
FAssertLF(procHasWorkAtLevel(idxLevel, idProcess) == false);
break;
}
// Copy and count ALL the cells (even the ones outside the working interval)
int totalNbCellsAtLevel = 0;
do{
iterArray[totalNbCellsAtLevel++] = octreeIterator;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;
int iterMpiRequests = 0; // The iterator for send/recv requests
int nbCellsToSkip = 0; // The number of cells to send
// Skip all the cells that are out of my working interval
while(nbCellsToSkip < totalNbCellsAtLevel && iterArray[nbCellsToSkip].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).leftIndex){
++nbCellsToSkip;
}
// We need to know if we will recv something in order to know if threads skip the last cell
int nbCellsForThreads = totalNbCellsAtLevel; // totalNbCellsAtLevel or totalNbCellsAtLevel-1
bool hasToReceive = false;
if(idProcess != nbProcess-1 && procHasWorkAtLevel(idxLevel , idProcess)){
// Find the first proc that may send to me
while(firstProcThatSend < nbProcess && !procHasWorkAtLevel(idxLevel+1, firstProcThatSend) ){
firstProcThatSend += 1;
}
// Do we have to receive?
if(firstProcThatSend < nbProcess && procHasWorkAtLevel(idxLevel+1, firstProcThatSend) && procCoversMyRightBorderCell(idxLevel, firstProcThatSend) ){
hasToReceive = true;
// Threads do not compute the last cell, we will do it once data are received
nbCellsForThreads -= 1;
}
}
FLOG(parallelCounter.tic());
#pragma omp parallel
{
const int threadNumber = omp_get_thread_num();
KernelClass* myThreadkernels = (kernels[threadNumber]);
//This single section post and receive the comms, and then do the M2M associated with it.
{
const int threadNumber = omp_get_thread_num();
KernelClass* myThreadkernels = (kernels[threadNumber]);
//This single section post and receive the comms, and then do the M2M associated with it.
#pragma omp single nowait
{
FLOG(singleCounter.tic());
// Master proc never send
if(idProcess != 0){
// Skip process that have no work at that level
while( currentProcIdToSendTo && !procHasWorkAtLevel(idxLevel, currentProcIdToSendTo) ){
--currentProcIdToSendTo;
}
// Does the next proc that has work is sharing the parent of my left cell
if(procHasWorkAtLevel(idxLevel, currentProcIdToSendTo) && procCoversMyLeftBorderCell(idxLevel, currentProcIdToSendTo)){
FAssertLF(nbCellsToSkip != 0);
char packageFlags = 0;
sendBuffer.write(packageFlags);
// Only the cell the most on the right out of my working interval should be taken in
// consideration (at pos nbCellsToSkip-1) other (x < nbCellsToSkip-1) have already been sent
const CellClass* const* const child = iterArray[nbCellsToSkip-1].getCurrentChild();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
// Check if child exists and it was part of my working interval
if( child[idxChild] && getWorkingInterval((idxLevel+1), idProcess).leftIndex <= child[idxChild]->getMortonIndex() ){
// Add the cell to the buffer
child[idxChild]->serializeUp(sendBuffer);
packageFlags = char(packageFlags | (0x1 << idxChild));
}
}
// Add the flag as first value
sendBuffer.writeAt(0,packageFlags);
// Post the message
MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, currentProcIdToSendTo,
FMpi::TagFmmM2M + idxLevel, comm.getComm(), &requests[iterMpiRequests++]);
}
}
//Post receive, Datas needed in several parts of the section
int nbProcThatSendToMe = 0;
if(hasToReceive){
//Test : if the firstProcThatSend father minimal value in interval is lesser than mine
int idProcSource = firstProcThatSend;
// Find the last proc that should send to me
while( idProcSource < nbProcess
&& ( !procHasWorkAtLevel(idxLevel+1, idProcSource) || procCoversMyRightBorderCell(idxLevel, idProcSource) )){
if(procHasWorkAtLevel(idxLevel+1, idProcSource) && procCoversMyRightBorderCell(idxLevel, idProcSource)){
MPI_Irecv(&recvBuffer.data()[nbProcThatSendToMe * recvBufferOffset], recvBufferOffset, MPI_PACKED,
idProcSource, FMpi::TagFmmM2M + idxLevel, comm.getComm(), &requests[iterMpiRequests++]);
nbProcThatSendToMe += 1;
FAssertLF(nbProcThatSendToMe <= 7);
}
++idProcSource;
}
}
//Wait For the comms, and do the work
// Are we sending or waiting anything?
if(iterMpiRequests){
FAssertLF(iterMpiRequests <= 8);
MPI_Waitall( iterMpiRequests, requests, status);
}
// We had received something so we need to proceed the last M2M
if( hasToReceive ){
FAssertLF(iterMpiRequests != 0);
CellClass* currentChild[8];
memcpy(currentChild, iterArray[totalNbCellsAtLevel - 1].getCurrentChild(), 8 * sizeof(CellClass*));
// Retreive data and merge my child and the child from others