Commit 3e053f86 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Clean the M2M (should be tested)

parent 0301bfc3
......@@ -302,7 +302,7 @@ private:
// This variable is the proc responsible
// of the shared cells
int sendToProc = idProcess;
int currentProcIdToSendTo = idProcess;
// There are a maximum of 8-1 sends and 8-1 receptions
MPI_Request requests[14];
......@@ -357,11 +357,11 @@ private:
}
sendBuffer.writeAt(0,state);
while( sendToProc && iterArray[cellsToSend].getCurrentGlobalIndex() <= getWorkingInterval(idxLevel , sendToProc - 1).rightIndex){
--sendToProc;
while( currentProcIdToSendTo && iterArray[cellsToSend].getCurrentGlobalIndex() <= getWorkingInterval(idxLevel , currentProcIdToSendTo - 1).rightIndex){
--currentProcIdToSendTo;
}
MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, sendToProc,
MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, currentProcIdToSendTo,
FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
}
// We may need to receive something
......@@ -478,7 +478,6 @@ private:
/** M2M */
void upwardPass(){
const int MaxSizePerCell = CellClass::GetSize();
FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
......@@ -493,9 +492,8 @@ private:
octreeIterator.moveUp();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
// This variable is the proc responsible
// of the shared cells
int sendToProc = idProcess;
// The proc to send the shared cells to (will move to left)
int currentProcIdToSendTo = idProcess;
// There are a maximum of 8-1 sends and 8-1 receptions
MPI_Request requests[14];
......@@ -507,167 +505,155 @@ private:
FMpiBufferReader recvBuffer(comm.getComm(), nbProcess*recvBufferOffset);
CellClass recvBufferCells[8];
// The first proc that send to me a cell
int firstProcThatSend = idProcess + 1;
FLOG(computationCounter.tic());
//Loop for work
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
if(idProcess != 0
&& getWorkingInterval((idxLevel+1), idProcess).rightIndex <= getWorkingInterval((idxLevel+1), idProcess - 1).rightIndex){
// Does my cells are covered by my neighbors working interval
const bool noMoreWorkForMe = (idProcess != 0 && getWorkingInterval((idxLevel+1), idProcess).rightIndex <= getWorkingInterval((idxLevel+1), idProcess - 1).rightIndex);
if(noMoreWorkForMe){
FAssertLF(getWorkingInterval(idxLevel, idProcess).rightIndex < getWorkingInterval(idxLevel, idProcess).leftIndex);
break;
}
// copy cells to work with
int numberOfCells = 0;
// for each cells
// Copy and count ALL the cells (even the ones outside the working interval)
int totalNbCellsAtLevel = 0;
do{
iterArray[numberOfCells++] = octreeIterator;
iterArray[totalNbCellsAtLevel++] = octreeIterator;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;
int cellsToSend = -1;
int iterRequests = 0;
int nbCellsToSend = 0; // The number of cells to send
int iterMpiRequests = 0; // The iterator for send/recv requests
int nbCellsForThreads = totalNbCellsAtLevel; // totalNbCellsAtLevel or totalNbCellsAtLevel-1
int endIndex = numberOfCells;
//Test if i'm not the last, and I need st to compute my last M2M
if((idProcess != nbProcess-1) &&
((getWorkingInterval(idxLevel+1,idProcess+1)).leftIndex >>3) <= ((getWorkingInterval(idxLevel+1,idProcess)).rightIndex)>>3){
endIndex--;
nbCellsForThreads -= 1;
}
while(iterArray[cellsToSend+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).leftIndex){
++cellsToSend;
// Skip all the cells that are out of my working interval
while(iterArray[nbCellsToSend].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).leftIndex){
++nbCellsToSend;
}
if((iterArray[0].getCurrentGlobalIndex() == getWorkingInterval(idxLevel, idProcess-1).rightIndex) && numberOfCells==1){
cellsToSend++;
}
// TODO REMOVE
// if((iterArray[0].getCurrentGlobalIndex() == getWorkingInterval(idxLevel, idProcess-1).rightIndex) && numberOfCells==1){
// nbCellsToSend++;
//}
// END REMOVE
FLOG(parallelCounter.tic());
#pragma omp parallel
#pragma omp parallel
{
const int threadNumber = omp_get_thread_num();
KernelClass* myThreadkernels = (kernels[threadNumber]);
//This single section is supposed post and receive the comms, and then do the M2M associated with its.
#pragma omp single nowait
#pragma omp single nowait
{
FLOG(singleCounter.tic());
//Datas needed in several parts of the section
bool hasToReceive;
int endProcThatSend;
const bool firstCellAreOutOfMyInterval = idProcess != 0 && (getWorkingInterval((idxLevel+1), idProcess).leftIndex >>3) <= (getWorkingInterval((idxLevel+1), idProcess - 1).rightIndex >>3);
//Post Send
if(idProcess != 0
&& (getWorkingInterval((idxLevel+1), idProcess).leftIndex >>3) <= (getWorkingInterval((idxLevel+1), idProcess - 1).rightIndex >>3)){
// if(cellsToSend == -1){
// fprintf(stderr,"You found One : %d, nbofCells %d Intervals : %d{%lld,%lld} %d{%lld,%lld} Left : %d{%lld,%lld} %d{%lld,%lld} %lld\n",
// idProcess,numberOfCells,
// idxLevel, getWorkingInterval(idxLevel).leftIndex,
// getWorkingInterval(idxLevel).rightIndex,
// idxLevel+1, getWorkingInterval(idxLevel+1).leftIndex,
// getWorkingInterval(idxLevel+1).rightIndex,
// idxLevel, getWorkingInterval(idxLevel,idProcess-1).leftIndex,
// getWorkingInterval(idxLevel,idProcess-1).rightIndex,
// idxLevel+1,getWorkingInterval(idxLevel+1,idProcess-1).leftIndex,
// getWorkingInterval(idxLevel+1,idProcess-1).rightIndex,iterArray[0].getCurrentGlobalIndex());
// //cellsToSend +=1;
// }
char state = 0;
sendBuffer.write(state);
const CellClass* const* const child = iterArray[cellsToSend].getCurrentChild();
if(firstCellAreOutOfMyInterval){
FAssertLF(nbCellsToSend != 0);
char packageFlags = 0;
sendBuffer.write(packageFlags);
// Only the cell the most on the right out of my working interval should be taken in
// consideration (at pos nbCellsToSend-1) other (x < nbCellsToSend-1) have already been sent
const CellClass* const* const child = iterArray[nbCellsToSend-1].getCurrentChild();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
// Check if child exists and it was part of my working interval
if( child[idxChild] && getWorkingInterval((idxLevel+1), idProcess).leftIndex <= child[idxChild]->getMortonIndex() ){
// Add the cell to the buffer
child[idxChild]->serializeUp(sendBuffer);
state = char(state | (0x1 << idxChild));
packageFlags = char(packageFlags | (0x1 << idxChild));
}
}
sendBuffer.writeAt(0,state);
// Add the flag as first value
sendBuffer.writeAt(0,packageFlags);
while( sendToProc && iterArray[cellsToSend].getCurrentGlobalIndex() <= getWorkingInterval(idxLevel , sendToProc - 1).rightIndex){
--sendToProc;
// Find the proc to send to
while( currentProcIdToSendTo && iterArray[nbCellsToSend-1].getCurrentGlobalIndex() <= getWorkingInterval(idxLevel , currentProcIdToSendTo - 1).rightIndex){
--currentProcIdToSendTo;
}
MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, sendToProc,
FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
// Post the message
MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, currentProcIdToSendTo,
FMpi::TagFmmM2M, comm.getComm(), &requests[iterMpiRequests++]);
}
//Post receive
{
// We may need to receive something
hasToReceive = false;
endProcThatSend = firstProcThatSend;
if(idProcess != nbProcess - 1){ // if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
while(firstProcThatSend < nbProcess
&& (getWorkingInterval((idxLevel+1), firstProcThatSend).rightIndex) <= (getWorkingInterval((idxLevel+1), idProcess).rightIndex)){
// Second condition :: while firstProcThatSend rightIndex morton index is < to myself rightIndex interval
++firstProcThatSend;
}
if(firstProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1), firstProcThatSend).leftIndex >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3) ){
//Test : if the firstProcThatSend father minimal value in interval is lesser than mine
endProcThatSend = firstProcThatSend;
while( endProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1) ,endProcThatSend).leftIndex >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3)){
++endProcThatSend;
}
//Post receive, Datas needed in several parts of the section
bool hasToReceive = false;
int endProcThatSend = firstProcThatSend;
if(idProcess != nbProcess - 1){ // if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
// Find the first proc that send to me
while(firstProcThatSend < nbProcess
&& (getWorkingInterval((idxLevel+1), firstProcThatSend).rightIndex) <= (getWorkingInterval((idxLevel+1), idProcess).rightIndex)){
// Second condition :: while firstProcThatSend rightIndex morton index is < to myself rightIndex interval
++firstProcThatSend;
}
if(firstProcThatSend != endProcThatSend){
hasToReceive = true;
// If there is one and it has to send me data
if(firstProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1), firstProcThatSend).leftIndex >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3) ){
//Test : if the firstProcThatSend father minimal value in interval is lesser than mine
endProcThatSend = firstProcThatSend;
// Find the last proc that should send to me
while( endProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1) ,endProcThatSend).leftIndex >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).rightIndex>>3)){
++endProcThatSend;
}
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc ){
MPI_Irecv(&recvBuffer.data()[idxProc * recvBufferOffset], recvBufferOffset, MPI_PACKED,
idxProc, FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
}
// If there is at least one proc that send me something
if(firstProcThatSend != endProcThatSend){
FAssertLF(endProcThatSend - firstProcThatSend <= 7);
hasToReceive = true;
// Post a recv for each of them
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc ){
MPI_Irecv(&recvBuffer.data()[idxProc * recvBufferOffset], recvBufferOffset, MPI_PACKED,
idxProc, FMpi::TagFmmM2M, comm.getComm(), &requests[iterMpiRequests++]);
}
}
}
}
//Wait For the comms, and do the work
{
// Are we sending or waiting anything?
if(iterRequests){
MPI_Waitall( iterRequests, requests, status);
// we were receiving data
if( hasToReceive ){
CellClass* currentChild[8];
memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*));
// retreive data and merge my child and the child from others
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc){
recvBuffer.seek(idxProc * recvBufferOffset);
int state = int(recvBuffer.getValue<char>());
int position = 0;
while( state && position < 8){
while(!(state & 0x1)){
state >>= 1;
++position;
}
FAssertLF(!currentChild[position], "Already has a cell here");
recvBufferCells[position].deserializeUp(recvBuffer);
currentChild[position] = (CellClass*) &recvBufferCells[position];
//Wait For the comms, and do the work
// Are we sending or waiting anything?
if(iterMpiRequests){
MPI_Waitall( iterMpiRequests, requests, status);
// we were receiving data
if( hasToReceive ){
CellClass* currentChild[8];
memcpy(currentChild, iterArray[totalNbCellsAtLevel - 1].getCurrentChild(), 8 * sizeof(CellClass*));
// Retreive data and merge my child and the child from others
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc){
recvBuffer.seek(idxProc * recvBufferOffset);
int state = int(recvBuffer.getValue<char>());
int position = 0;
while( state && position < 8){
while(!(state & 0x1)){
state >>= 1;
++position;
}
}
// Finally compute
(*kernels[threadNumber]).M2M( iterArray[numberOfCells - 1].getCurrentCell() , currentChild, idxLevel);
firstProcThatSend = endProcThatSend - 1;
FAssertLF(!currentChild[position], "Already has a cell here");
recvBufferCells[position].deserializeUp(recvBuffer);
currentChild[position] = (CellClass*) &recvBufferCells[position];
state >>= 1;
++position;
}
}
// Finally compute
(*kernels[threadNumber]).M2M( iterArray[totalNbCellsAtLevel - 1].getCurrentCell() , currentChild, idxLevel);
firstProcThatSend = endProcThatSend - 1;
}
}
sendBuffer.reset();
......@@ -675,18 +661,11 @@ private:
FLOG(singleCounter.tac());
}//End Of Single section
#pragma omp for nowait
for( int idxCell = cellsToSend+1 ; idxCell < endIndex ; ++idxCell){
// All threads proceed the M2M
#pragma omp for nowait
for( int idxCell = nbCellsToSend ; idxCell < nbCellsForThreads ; ++idxCell){
myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
// for(int k=0 ; k< 8 ; ++k){
// if(iterArray[idxCell].getCurrentChild()[k]){
// FILE * fd = fopen("ResM2MNearNew","a+");
// fprintf(fd,"%lld\t% lld\t %d\n",iterArray[idxCell].getCurrentCell()->getMortonIndex(),iterArray[idxCell].getCurrentChild()[k]->getMortonIndex(),idxLevel);
// fclose(fd);
// }
// }
}
}//End of parallel section
FLOG(parallelCounter.tac());
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment