Commit f4e19fa8 authored by berenger-bramas's avatar berenger-bramas
Browse files

Remove a bug in the "in-place" data copy.

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@183 2616d619-271b-44dc-8df4-d4a8f33a7222
parent c2c1c114
......@@ -32,9 +32,7 @@ class FLightOctree {
delete next[idxNext];
}
}
/** Insert a cell */
void insert(const MortonIndex& index, const void* const cell, const int level){
/** As long as the level is not 0 go downward */
if(level){
const int host = (index >> (3 * (level-1))) & 0x07;
if(!next[host]){
......@@ -46,7 +44,6 @@ class FLightOctree {
data = cell;
}
}
/** Retreive data */
const void* getCell(const MortonIndex& index, const int level) const {
if(level){
const int host = (index >> (3 * (level-1))) & 0x07;
......@@ -61,19 +58,16 @@ class FLightOctree {
}
};
/** The tree root */
Node root;
public:
FLightOctree(){
}
/** Insert a cell into the light octree */
void insertCell(const MortonIndex& index, const void* const cell, const int level){
root.insert(index, cell, level);
}
/** Get a cell or null if it does not exist */
const void* getCell(const MortonIndex& index, const int level) const{
return root.getCell(index, level);
}
......@@ -103,6 +97,7 @@ public:
template<class OctreeClass, class ParticleClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmThreadProc : protected FAssertable {
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
......@@ -160,7 +155,7 @@ public:
}
FDEBUG(FDebug::Controller << "FFmmAlgorithmThreadProc\n");
FDEBUG(FDebug::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ".\n");
FDEBUG(FDebug::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
}
/** Default destructor */
......@@ -197,6 +192,7 @@ public:
// We get the min/max indexes from each procs
mpiassert( MPI_Allgather( &intervals[idProcess], sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, MPI_COMM_WORLD), __LINE__ );
for(int idxLevel = 0 ; idxLevel < OctreeHeight ; ++idxLevel){
const int offset = idxLevel * nbProcess;
// Then we can compute min/max per level and per proc
......@@ -321,6 +317,7 @@ public:
// There are a maximum of 8-1 sends and 8-1 receptions
MPI_Request requests[14];
MPI_Status status[14];
// Maximum data per message is:
const int recvBufferOffset = 8 * CellClass::SerializedSizeUp + 1;
......@@ -389,7 +386,7 @@ public:
MPI_Irecv(&recvBuffer[idxProc * recvBufferOffset], recvBufferOffset, MPI_BYTE, idxProc, 0, MPI_COMM_WORLD, &requests[iterRequests++]);
}
}
}
}
FDEBUG(prepareCounter.tac());
// Compute
......@@ -408,7 +405,7 @@ public:
// Are we sending or waiting anything?
if(iterRequests){
FDEBUG(waitCounter.tic());
MPI_Waitall( iterRequests, requests, 0);
MPI_Waitall( iterRequests, requests, status);
FDEBUG(waitCounter.tac());
// we were receiving data
......@@ -517,7 +514,7 @@ public:
leafsNeedOther[idxLevel] = new FBoolArray(numberOfCells);
// Which cells potentialy need other data and in the same time
// Which cell potentialy needs other data and in the same time
// are potentialy needed by other
MortonIndex neighborsIndexes[208];
for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
......@@ -591,6 +588,7 @@ public:
// Then they can send and receive (because they know what they will receive)
// To send in asynchrone way
MPI_Request requests[2 * nbProcess * OctreeHeight];
MPI_Status status[2 * nbProcess * OctreeHeight];
int iterRequest = 0;
struct CellToSend{
......@@ -675,7 +673,7 @@ public:
//////////////////////////////////////////////////////////////////
// Wait to receive every things (and send every things)
MPI_Waitall(iterRequest, requests, 0);
MPI_Waitall(iterRequest, requests, status);
{
FDEBUG(receiveCounter.tic());
......@@ -781,6 +779,7 @@ public:
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
MPI_Request requests[nbProcess];
MPI_Status status[nbProcess];
const int heightMinusOne = OctreeHeight - 1;
......@@ -849,7 +848,7 @@ public:
if(iterRequests){
// process
FDEBUG(waitCounter.tic());
MPI_Waitall( iterRequests, requests, 0);
MPI_Waitall( iterRequests, requests, status);
FDEBUG(waitCounter.tac());
if(needToRecv){
......@@ -891,6 +890,7 @@ public:
// To send in asynchrone way
MPI_Request requests[2 * nbProcess];
MPI_Status status[2 * nbProcess];
int iterRequest = 0;
ParticleClass* sendBuffer[nbProcess];
......@@ -1121,7 +1121,7 @@ public:
// Wait data
FDEBUG(waitCounter.tic());
MPI_Waitall(iterRequest, requests, 0);
MPI_Waitall(iterRequest, requests, status);
FDEBUG(waitCounter.tac());
// Create an octree with leaves from others
......
......@@ -94,8 +94,8 @@ class FMpiTreeBuilder{
}
};
IndexedParticle* intervals; //< Current intervals
int nbLeavesInIntervals; //< Nb intervals
char* intervals;
int nbLeavesInIntervals;
private:
// Forbid copy
......@@ -124,6 +124,7 @@ public:
{
// create particles
IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
memset(realParticlesIndexed, 0, sizeof(IndexedParticle)* loader.getNumberOfParticles());
F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
FTreeCoordinate host;
const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / (1 << (NbLevels - 1) );
......@@ -144,6 +145,7 @@ public:
MortonIndex otherFirstIndex = -1;
{
FMpi::Request req[2];
MPI_Status status[2];
int reqiter = 0;
if( 0 < rank && outputSize){
MPI_Isend( &outputArray[0].index, 1, MPI_LONG_LONG, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
......@@ -152,22 +154,26 @@ public:
MPI_Irecv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
}
MPI_Waitall(reqiter,req,0);
MPI_Waitall(reqiter,req,status);
if( 0 < rank && !outputSize){
MPI_Send( &otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, 0, MPI_COMM_WORLD);
}
}
MPI_Request req[2];
MPI_Status status[2];
int reqiter = 0;
// at this point every one know the first index of his right neighbors
const bool needToRecvBeforeSend = (rank != 0 && ((outputSize && otherFirstIndex == outputArray[0].index ) || !outputSize));
if( needToRecvBeforeSend || (rank == nbProcs - 1) ){
int sendByOther = 0;
MPI_Status status;
MPI_Probe(rank - 1, 0, MPI_COMM_WORLD, &status);
MPI_Get_count( &status, MPI_BYTE, &sendByOther);
MPI_Status probStatus;
MPI_Probe(rank - 1, 0, MPI_COMM_WORLD, &probStatus);
MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther);
if(sendByOther){
sendByOther /= sizeof(IndexedParticle);
......@@ -179,26 +185,28 @@ public:
memcpy(&outputArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
delete[] reallocOutputArray;
MPI_Irecv(outputArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
MPI_Recv(outputArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &probStatus);
}
else{
MPI_Irecv(0, 0, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
}
}
if(rank != nbProcs - 1){
long idxPart = outputSize - 1 ;
while(idxPart >= 0 && outputArray[idxPart].index == otherFirstIndex){
--idxPart;
}
long toSend = outputSize - 1 - idxPart;
const long toSend = outputSize - 1 - idxPart;
MPI_Isend( &outputArray[idxPart + 1], toSend * sizeof(IndexedParticle), MPI_BYTE, rank + 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
if( rank != 0 && !needToRecvBeforeSend ){
if( rank != 0 && !needToRecvBeforeSend && (rank != nbProcs - 1)){
int sendByOther = 0;
MPI_Status status;
MPI_Probe(rank - 1, 0, MPI_COMM_WORLD, &status);
MPI_Get_count( &status, MPI_BYTE, &sendByOther);
MPI_Status probStatus;
MPI_Probe(rank - 1, 0, MPI_COMM_WORLD, &probStatus);
MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther);
if(sendByOther){
sendByOther /= sizeof(IndexedParticle);
......@@ -206,7 +214,7 @@ public:
MPI_Irecv(tempBuffer, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
MPI_Waitall(reqiter,req,0);
MPI_Waitall(reqiter,req, status);
reqiter = 0;
const IndexedParticle* const reallocOutputArray = outputArray;
......@@ -224,35 +232,38 @@ public:
}
}
}
MPI_Waitall(reqiter,req,0);
MPI_Waitall(reqiter,req,status);
}
nbLeavesInIntervals = 0;
if(outputSize){
intervals = new char[outputSize * (sizeof(ParticleClass) + sizeof(int))];
int leavesCounter = 0;
MortonIndex previousIndex = -1;
char* writeIndex = intervals;
int* writeCounter = 0;
MortonIndex previousIndex = -1;
char* writeIndex = reinterpret_cast<char*>(outputArray);
int* writeCounter = 0;
for( int idxPart = 0; idxPart < outputSize ; ++idxPart){
printf("X inserted %f (idxPart %d)\n", outputArray[idxPart].particle.getPosition().getX(), idxPart );
for( int idxPart = 0; idxPart < outputSize ; ++idxPart){
if( outputArray[idxPart].index != previousIndex ){
previousIndex = outputArray[idxPart].index;
++leavesCounter;
if( outputArray[idxPart].index != previousIndex ){
previousIndex = outputArray[idxPart].index;
++nbLeavesInIntervals;
writeCounter = reinterpret_cast<int*>( writeIndex );
writeIndex += sizeof(int);
writeCounter = reinterpret_cast<int*>( writeIndex );
writeIndex += sizeof(int);
(*writeCounter) = 0;
}
(*writeCounter) = 0;
}
memcpy(writeIndex, &outputArray[idxPart].particle, sizeof(ParticleClass));
memcpy(writeIndex, &outputArray[idxPart].particle, sizeof(ParticleClass));
printf("X inserted %f (idxPart %d)\n", ((ParticleClass*)writeIndex)->getPosition().getX(), idxPart );
writeIndex += sizeof(ParticleClass);
++(*writeCounter);
writeIndex += sizeof(ParticleClass);
++(*writeCounter);
}
}
intervals = outputArray;
nbLeavesInIntervals = leavesCounter;
delete [] outputArray;
return true;
}
......@@ -333,6 +344,7 @@ public:
const int iCanSendToRight = nbLeafs;
MPI_Request requests[2];
MPI_Status status[2];
int iterRequest = 0;
int hasBeenSentToLeft = 0;
......@@ -351,7 +363,7 @@ public:
///////////////////////////////
if(nbLeafs){
particlesToSend = reinterpret_cast<char*>(intervals);
particlesToSend = intervals;
int currentLeafPosition = 0;
......@@ -414,11 +426,11 @@ public:
// Now prepare to receive data
while(countReceive--){
MPI_Status status;
MPI_Probe(sourceToWhileRecv, 0, MPI_COMM_WORLD, &status);
MPI_Status recvStatus;
MPI_Probe(sourceToWhileRecv, 0, MPI_COMM_WORLD, &recvStatus);
// receive from left
if(status.MPI_SOURCE == rank - 1){
MPI_Get_count( &status, MPI_BYTE, &sizeOfLeftBuffer);
if(recvStatus.MPI_SOURCE == rank - 1){
MPI_Get_count( &recvStatus, MPI_BYTE, &sizeOfLeftBuffer);
toRecvFromLeft = new char[sizeOfLeftBuffer];
sizeOfLeftData = sizeOfLeftBuffer;
MPI_Irecv(toRecvFromLeft, sizeOfLeftBuffer, MPI_BYTE, rank - 1 , 0 , MPI_COMM_WORLD, &requests[iterRequest++]);
......@@ -427,7 +439,7 @@ public:
}
// receive from right
else{
MPI_Get_count( &status, MPI_BYTE, &sizeOfRightBuffer);
MPI_Get_count( &recvStatus, MPI_BYTE, &sizeOfRightBuffer);
toRecvFromRight = new char[sizeOfRightBuffer];
sizeOfRightData = sizeOfRightBuffer;
MPI_Irecv(toRecvFromRight, sizeOfRightBuffer, MPI_BYTE, rank + 1 , 0 , MPI_COMM_WORLD, &requests[iterRequest++]);
......@@ -439,7 +451,7 @@ public:
///////////////////////////////
// Wait send receive
///////////////////////////////
MPI_Waitall(iterRequest, requests, 0);
MPI_Waitall(iterRequest, requests, status);
// We can delete the buffer use to send our particles only
printf("Wait passed...\n");
......@@ -469,9 +481,9 @@ public:
printf("Elapsed %lf\n", counter.tacAndElapsed());
MPI_Send(toRecvFromRight, arrayIdxRight, MPI_BYTE , rank - 1, 0, MPI_COMM_WORLD);
if(hasBeenSentToLeft < iNeedToSendLeftCount){
MPI_Status status;
MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Get_count( &status, MPI_BYTE, &sizeOfRightData);
MPI_Status probStatus;
MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &probStatus);
MPI_Get_count( &probStatus, MPI_BYTE, &sizeOfRightData);
if(sizeOfRightBuffer < sizeOfRightData){
sizeOfRightBuffer = sizeOfRightData;
delete[] toRecvFromRight;
......@@ -497,9 +509,9 @@ public:
printf("Elapsed %lf\n", counter.tacAndElapsed());
MPI_Send(toRecvFromLeft, arrayIdxLeft, MPI_BYTE , rank + 1, 0, MPI_COMM_WORLD);
if(hasBeenSentToRight < iNeedToSendRightCount){
MPI_Status status;
MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Get_count( &status, MPI_BYTE, &sizeOfLeftData);
MPI_Status probStatus;
MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &probStatus);
MPI_Get_count( &probStatus, MPI_BYTE, &sizeOfLeftData);
if(sizeOfLeftBuffer < sizeOfLeftData){
sizeOfLeftBuffer = sizeOfLeftData;
delete[] toRecvFromLeft;
......@@ -534,9 +546,9 @@ public:
printf("Elapsed %lf\n", counter.tacAndElapsed());
if(hasToBeReceivedFromLeft){
MPI_Status status;
MPI_Probe( rank - 1, 0, MPI_COMM_WORLD, &status);
MPI_Get_count( &status, MPI_BYTE, &sizeOfLeftData);
MPI_Status probStatus;
MPI_Probe( rank - 1, 0, MPI_COMM_WORLD, &probStatus);
MPI_Get_count( &probStatus, MPI_BYTE, &sizeOfLeftData);
if(sizeOfLeftBuffer < sizeOfLeftData){
sizeOfLeftBuffer = sizeOfLeftData;
delete[] toRecvFromLeft;
......@@ -568,10 +580,10 @@ public:
printf("Elapsed %lf\n", counter.tacAndElapsed());
if(hasToBeReceivedFromRight){
MPI_Status status;
MPI_Status probStatus;
printf("Probe\n");
MPI_Probe( rank + 1, 0, MPI_COMM_WORLD, &status);
MPI_Get_count( &status, MPI_BYTE, &sizeOfRightData);
MPI_Probe( rank + 1, 0, MPI_COMM_WORLD, &probStatus);
MPI_Get_count( &probStatus, MPI_BYTE, &sizeOfRightData);
printf("Receive %d bytes from right\n", sizeOfRightData);
if(sizeOfRightBuffer < sizeOfRightData){
sizeOfRightBuffer = sizeOfRightData;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment