Commit 38091310 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille
Browse files

Fix for the error occuring in M2M, still bugged

parent eb759b06
......@@ -123,6 +123,7 @@ public:
{
FAssertLF(tree, "tree cannot be null");
this->kernels = new KernelClass*[MaxThreads];
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass(*inKernels);
......@@ -155,7 +156,7 @@ public:
FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
Interval myLastInterval;
{
{//Building the leaf interval
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
myLastInterval.min = octreeIterator.getCurrentGlobalIndex();
......@@ -174,11 +175,13 @@ public:
Interval*const myIntervals = new Interval[OctreeHeight];
myIntervals[OctreeHeight - 1] = myLastInterval;
//Building the intervals for all the cells for at each level using bitshifting
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
myIntervals[idxLevel].min = myIntervals[idxLevel+1].min >> 3;
myIntervals[idxLevel].max = myIntervals[idxLevel+1].max >> 3;
}
if(idProcess != 0){
//We test for each level if process on left (idProcess-1) own cell I thought I owned
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
......@@ -194,10 +197,15 @@ public:
currentLimit >>= 3;
}
}
printf("Proc::%d From leaf %lld to leaf %lld\n",idProcess,myLastInterval.min,myLastInterval.max);
// We get the min/max indexes from each procs
FMpi::MpiAssert( MPI_Allgather( myIntervals, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
//Print for each proc the working interval at each level
// for(int idL = 0 ; idL < OctreeHeight ; ++idL){
// printf("Proc::%d From cell %lld to cell %lld\n",
// idProcess,getWorkingInterval(idL).min,getWorkingInterval(idL).max);
// }
delete[] myIntervals;
}
......@@ -251,7 +259,6 @@ private:
}
}
FLOG(computationCounter.tac());
FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
}
......@@ -511,13 +518,19 @@ private:
int endIndex = numberOfCells;
//Test if i'm not the last, and I need st to compute my last M2M
if((idProcess != nbProcess-1) && ((getWorkingInterval(idxLevel+1,idProcess+1)).min >>3) <= ((getWorkingInterval(idxLevel+1,idProcess)).max)>>3){
if((idProcess != nbProcess-1) &&
((getWorkingInterval(idxLevel+1,idProcess+1)).min >>3) <= ((getWorkingInterval(idxLevel+1,idProcess)).max)>>3){
endIndex--;
}
while(iterArray[cellsToSend+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).min){
++cellsToSend;
}
if((iterArray[0].getCurrentGlobalIndex() == getWorkingInterval(idxLevel, idProcess-1).max) && numberOfCells==1){
cellsToSend++;
}
FLOG(parallelCounter.tic());
#pragma omp parallel
{
......@@ -535,6 +548,19 @@ private:
if(idProcess != 0
&& (getWorkingInterval((idxLevel+1), idProcess).min >>3) <= (getWorkingInterval((idxLevel+1), idProcess - 1).max >>3)){
// if(cellsToSend == -1){
// fprintf(stderr,"You found One : %d, nbofCells %d Intervals : %d{%lld,%lld} %d{%lld,%lld} Left : %d{%lld,%lld} %d{%lld,%lld} %lld\n",
// idProcess,numberOfCells,
// idxLevel, getWorkingInterval(idxLevel).min,
// getWorkingInterval(idxLevel).max,
// idxLevel+1, getWorkingInterval(idxLevel+1).min,
// getWorkingInterval(idxLevel+1).max,
// idxLevel, getWorkingInterval(idxLevel,idProcess-1).min,
// getWorkingInterval(idxLevel,idProcess-1).max,
// idxLevel+1,getWorkingInterval(idxLevel+1,idProcess-1).min,
// getWorkingInterval(idxLevel+1,idProcess-1).max,iterArray[0].getCurrentGlobalIndex());
// //cellsToSend +=1;
// }
char state = 0;
sendBuffer.write(state);
......@@ -593,11 +619,14 @@ private:
}
//Wait For the comms, and do the work
{
// Are we sending or waiting anything?
if(iterRequests){
MPI_Waitall( iterRequests, requests, status);
// we were receiving data
if( hasToReceive ){
CellClass* currentChild[8];
memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*));
......@@ -633,6 +662,7 @@ private:
recvBuffer.seek(0);
FLOG(singleCounter.tac());
}//End Of Single section
#pragma omp for nowait
for( int idxCell = cellsToSend+1 ; idxCell < endIndex ; ++idxCell){
myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
......@@ -649,7 +679,6 @@ private:
FLOG(parallelCounter.tac());
}
FLOG(counterTime.tac());
FLOG(computationCounter.tac());
FLOG( FLog::Controller << "\tFinished (@Upward Pass (M2M) = " << counterTime.elapsed() << " s)\n" );
......@@ -978,6 +1007,10 @@ private:
}
}
}
// FILE * fd = fopen("res0","a+");
// fprintf(fd,"Q %d\t %d\t %d\t %d\n",
// idProcess,idxLevel,idxCell,counter);
// fclose(fd);
// need to compute
if(counter){
myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
......@@ -1006,12 +1039,14 @@ private:
delete[] requests;
delete[] status;
FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Gather : " << gatherCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Prepare : " << prepareCounter.cumulated() << " s\n" );
}
......@@ -1052,15 +1087,15 @@ private:
FMpiBufferWriter** sendBuffer;
FMpiBufferReader** recvBuffer;
// #pragma omp parallel /*default(none)*/ shared(m2lSelf,globalReceiveMap,requests,sendBuffer,recvBuffer,iterRequest,status,singleCounter,toSend,gatherCounter,sendCounter)
#pragma omp parallel /*default(none)*/ shared(globalReceiveMap,requests,sendBuffer,recvBuffer,iterRequest,status,toSend)
// #pragma omp parallel /*default(none)*/ shared(m2lSelf,globalReceiveMap,requests,sendBuffer,recvBuffer,iterRequest,status,singleCounter,toSend,gatherCounter,sendCounter)
#pragma omp parallel /*default(none)*/ shared(globalReceiveMap,requests,sendBuffer,recvBuffer,iterRequest,status,toSend) FLOG(shared(m2lSelf,singleCounter,gatherCounter,sendCounter))
{
int threadIdx = omp_get_thread_num();
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
#pragma omp single nowait
#pragma omp single //nowait
{
FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
FLOG(singleCounter.tic());
......@@ -1261,15 +1296,18 @@ private:
FLOG(computationCounter.tic());
{
KernelClass * const myThreadkernels = kernels[threadIdx];
const CellClass* neighbors[343];
//Change kind
#pragma omp for schedule(static,1) //nowait
for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel);
if(counter){
myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
}
}
myThreadkernels->finishedLevelM2L(idxLevel);
}
......@@ -1278,16 +1316,16 @@ private:
}
FLOG(m2lSelf.tac());
}//End of parallel section
//Synch between threads at implict barrier
//Synch between threads at implict barrier
FTRACE( FTrace::FRegion regionTrace("Compute Received data", __FUNCTION__ , __FILE__ , __LINE__) );
FTRACE( FTrace::FRegion regionTrace("Compute Received data", __FUNCTION__ , __FILE__ , __LINE__) );
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.moveDown();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
// compute the second time
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.moveDown();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
// compute the second time
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
if(idProcess != 0
&& getWorkingInterval(idxLevel, idProcess).max <= getWorkingInterval(idxLevel, idProcess - 1).max){
......@@ -1344,10 +1382,15 @@ for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
int neighborsPosition[189];
const CellClass* neighbors[343];
#pragma omp for schedule(dynamic) nowait
printf("Proc %d, idThread %d, idxLevel %d, number : %d \n",
idProcess,omp_get_thread_num(),idxLevel,numberOfCells);
#pragma omp for schedule(dynamic) //nowait
for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
// compute indexes
memset(neighbors, 0, 343 * sizeof(CellClass*));
memset(neighborsPosition, 0, 189 * sizeof(int));
memset(neighborsIndex, 0, 189 * sizeof(MortonIndex));
const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition);
int counter = 0;
......@@ -1365,6 +1408,10 @@ for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
}
}
}
// FILE * fd = fopen("res1","a+");
// fprintf(fd,"Q %d\t %d\t %d\t %d\n",
// idProcess,idxLevel,idxCell,counter);
// fclose(fd);
// need to compute
if(counter){
myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
......@@ -1375,30 +1422,30 @@ for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
}//End of parallel section
FLOG(computationCounter.tac());
FLOG(m2lFar.tac());
}
}
for(int idxComm = 0 ; idxComm < nbProcess * OctreeHeight; ++idxComm){
for(int idxComm = 0 ; idxComm < nbProcess * OctreeHeight; ++idxComm){
delete sendBuffer[idxComm];
delete recvBuffer[idxComm];
}
for(int idxComm = 0 ; idxComm < OctreeHeight; ++idxComm){
}
for(int idxComm = 0 ; idxComm < OctreeHeight; ++idxComm){
delete leafsNeedOther[idxComm];
}
delete[] sendBuffer;
delete[] recvBuffer;
delete[] indexToSend;
delete[] leafsNeedOther;
delete[] globalReceiveMap;
delete[] requests;
delete[] status;
FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Single : " << singleCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Self : " << m2lSelf.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Far : " << m2lFar.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Gather : " << gatherCounter.elapsed() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Send : " << sendCounter.elapsed() << " s\n" );
}
delete[] sendBuffer;
delete[] recvBuffer;
delete[] indexToSend;
delete[] leafsNeedOther;
delete[] globalReceiveMap;
delete[] requests;
delete[] status;
FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Single : " << singleCounter.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Self : " << m2lSelf.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Far : " << m2lFar.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Gather : " << gatherCounter.elapsed() << " s\n" );
FLOG( FLog::Controller << "\t\t M2L Send : " << sendCounter.elapsed() << " s\n" );
}
//////////////////////////////////////////////////////////////////
......@@ -1583,6 +1630,10 @@ FLOG( FLog::Controller << "\t\t M2L Send : " << sendCounter.elapsed() << " s\n"
while(iterArray[firstCellWork+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel , idProcess).min){
++firstCellWork;
}
if(iterArray[firstCellWork+1].getCurrentGlobalIndex() == getWorkingInterval(idxLevel , idProcess).min && numberOfCells==1){
printf("You Mouchard\n");
firstCellWork++;
}
#pragma omp parallel
{
......@@ -1640,6 +1691,18 @@ FLOG( FLog::Controller << "\t\t M2L Send : " << sendCounter.elapsed() << " s\n"
FLOG(waitCounter.tac());
if(needToRecv){
// if(idProcess == 53){
// fprintf(stderr,"You found One : %d, nbofCells %d Intervals : %d{%lld,%lld} %d{%lld,%lld} Left : %d{%lld,%lld} %d{%lld,%lld} %lld\n",
// idProcess,numberOfCells,
// idxLevel, getWorkingInterval(idxLevel).min,
// getWorkingInterval(idxLevel).max,
// idxLevel+1, getWorkingInterval(idxLevel+1).min,
// getWorkingInterval(idxLevel+1).max,
// idxLevel, getWorkingInterval(idxLevel,idProcess-1).min,
// getWorkingInterval(idxLevel,idProcess-1).max,
// idxLevel+1,getWorkingInterval(idxLevel+1,idProcess-1).min,
// getWorkingInterval(idxLevel+1,idProcess-1).max,iterArray[0].getCurrentGlobalIndex());
// }
// Need to compute
FLOG(computationCounter.tic());
iterArray[firstCellWork].getCurrentCell()->deserializeDown(recvBuffer);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment