FMpiTreeBuilder.hpp 26 KB
Newer Older
1
2
3
4
5
6
#ifndef FMPITREEBUILDER_H
#define FMPITREEBUILDER_H


#include "../Utils/FQuickSort.hpp"

berenger-bramas's avatar
berenger-bramas committed
7
8
9
10
11
12

/** This class manage the loading of particles
  * for the mpi version.
  * It use a BinLoader and then sort the data
  * with a parallel quick sort.
  */
13
template<class ParticleClass>
14
class FMpiTreeBuilder{
berenger-bramas's avatar
berenger-bramas committed
15
16
17
    /** This method has been tacken from the octree
      * it computes a tree coordinate (x or y or z) from real position
      */
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
    static long getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) {
            const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel;
            const long index = FMath::dfloor(indexFReal);
            if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * index ) ){
                    return index - 1;
            }
            return index;
    }

    /** get current rank */
    static int MpiGetRank(MPI_Comm comm = MPI_COMM_WORLD){
        int rank(0);
        MPI_Comm_rank(comm, &rank);
        return rank;
    }

    /** get current nb procs */
    static int MpiGetNbProcs(MPI_Comm comm = MPI_COMM_WORLD){
        int nb(0);
        MPI_Comm_size(comm, &nb);
        return nb;
    }

berenger-bramas's avatar
berenger-bramas committed
41
    /** receive data from a tag function */
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
    static void receiveDataFromTag(const int inSize, const int inTag, void* const inData, int* const inSource = 0, int* const inFilledSize = 0){
        MPI_Status status;
        MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, inTag, MPI_COMM_WORLD, &status);
        if(inSource) *inSource = status.MPI_SOURCE;
        if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize);
    }

    template< class T >
    static T GetLeft(const T inSize) {
        const float step = (float(inSize) / MpiGetNbProcs());
        return T(FMath::Ceil(step * MpiGetRank()));
    }

    template< class T >
    static T GetRight(const T inSize) {
        const float step = (float(inSize) / MpiGetNbProcs());
        const T res = T(FMath::Ceil(step * (MpiGetRank()+1)));
        if(res > inSize) return inSize;
        else return res;
    }

    template< class T >
    static T GetOtherRight(const T inSize, const int other) {
        const float step = (float(inSize) / MpiGetNbProcs());
        const T res = T(FMath::Ceil(step * (other+1)));
        if(res > inSize) return inSize;
        else return res;
    }

berenger-bramas's avatar
berenger-bramas committed
71
72
73
    /** This struct is used to represent a particles group to
      * sort them easily
      */
74
75
76
77
78
79
80
81
82
83
    struct ParticlesGroup {
        int number;
        int positionInArray;
        MortonIndex index;
        ParticlesGroup(const int inNumber = 0 , const int inPositionInArray = 0, const MortonIndex inIndex = 0)
            : number(inNumber), positionInArray(inPositionInArray), index(inIndex) {
        }
    };


berenger-bramas's avatar
berenger-bramas committed
84
85
86
87
    /** A particle may not have a MortonIndex Method
      * but they are sorted on their morton index
      * so this struct store a particle and its index
      */
88
89
90
91
92
93
94
95
96
    struct IndexedParticle{
        MortonIndex index;
        ParticleClass particle;

        operator MortonIndex(){
            return this->index;
        }
    };

97
98
    char* intervals;
    int nbLeavesInIntervals;
99
100

private:
berenger-bramas's avatar
berenger-bramas committed
101
    // Forbid copy
102
103
104
    FMpiTreeBuilder(const FMpiTreeBuilder&){}
    FMpiTreeBuilder& operator=(const FMpiTreeBuilder&){return *this;}

105
public:
berenger-bramas's avatar
berenger-bramas committed
106
    /** Constructor */
107
108
109
110
    FMpiTreeBuilder()
        :  intervals(0), nbLeavesInIntervals(0) {
    }

berenger-bramas's avatar
berenger-bramas committed
111
    /** Destructor */
112
113
114
115
    virtual ~FMpiTreeBuilder(){
        delete [] intervals;
    }

berenger-bramas's avatar
berenger-bramas committed
116
    /** Split and sort the file */
117
118
    template <class LoaderClass>
    bool splitAndSortFile(LoaderClass& loader, const int NbLevels){
119
120
121
122
123
124
125
126
        const int rank = MpiGetRank();
        const int nbProcs = MpiGetNbProcs();

        IndexedParticle* outputArray = 0;
        long outputSize = 0;
        {
            // create particles
            IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
127
            memset(realParticlesIndexed, 0, sizeof(IndexedParticle)* loader.getNumberOfParticles());
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
            F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
            FTreeCoordinate host;
            const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / (1 << (NbLevels - 1) );
            for(long idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
                loader.fillParticle(realParticlesIndexed[idxPart].particle);
                host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
                host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
                host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
                realParticlesIndexed[idxPart].index = host.getMortonIndex(NbLevels - 1);
            }

            // sort particles
            FQuickSort::QsMpi<IndexedParticle,MortonIndex>(realParticlesIndexed, loader.getNumberOfParticles(),outputArray,outputSize);
            delete [] (realParticlesIndexed);
        }
        // be sure there is no splited leaves
        {
            MortonIndex otherFirstIndex = -1;
            {
                FMpi::Request req[2];
148
                MPI_Status status[2];
149
150
151
152
153
154
155
156
                int reqiter = 0;
                if( 0 < rank && outputSize){
                    MPI_Isend( &outputArray[0].index, 1, MPI_LONG_LONG, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
                }
                if( rank != nbProcs - 1){
                    MPI_Irecv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
                }

157
                MPI_Waitall(reqiter,req,status);
158
159
160
161
162

                if( 0 < rank && !outputSize){
                    MPI_Send( &otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, 0, MPI_COMM_WORLD);
                }
            }
163
164


165
            MPI_Request req[2];
166
            MPI_Status status[2];
167
            int reqiter = 0;
168

169
            // at this point every one know the first index of his right neighbors
170
            const bool needToRecvBeforeSend = (rank != 0 && ((outputSize && otherFirstIndex == outputArray[0].index ) || !outputSize));
171
            if( needToRecvBeforeSend || (rank == nbProcs - 1) ){
172
173
                int sendByOther = 0;

174
175
176
                MPI_Status probStatus;
                MPI_Probe(rank - 1, 0, MPI_COMM_WORLD, &probStatus);
                MPI_Get_count( &probStatus,  MPI_BYTE, &sendByOther);
177

178
                if(sendByOther){
179
                    sendByOther /= sizeof(IndexedParticle);
180
181
182
183
184
185
186
187
                    const IndexedParticle* const reallocOutputArray = outputArray;
                    const long reallocOutputSize = outputSize;

                    outputSize += sendByOther;
                    outputArray = new IndexedParticle[outputSize];
                    memcpy(&outputArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
                    delete[] reallocOutputArray;

188
                    MPI_Recv(outputArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &probStatus);
189
190
191
                }
                else{
                    MPI_Irecv(0, 0, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
192
193
                }
            }
194

195
            if(rank != nbProcs - 1){
196

197
198
199
200
                long idxPart = outputSize - 1 ;
                while(idxPart >= 0 && outputArray[idxPart].index == otherFirstIndex){
                    --idxPart;
                }
201
                const long toSend = outputSize - 1 - idxPart;
202
203
                MPI_Isend( &outputArray[idxPart + 1], toSend * sizeof(IndexedParticle), MPI_BYTE, rank + 1, 0, MPI_COMM_WORLD, &req[reqiter++]);

204
                if( rank != 0 && !needToRecvBeforeSend && (rank != nbProcs - 1)){
205
206
                    int sendByOther = 0;

207
208
209
                    MPI_Status probStatus;
                    MPI_Probe(rank - 1, 0, MPI_COMM_WORLD, &probStatus);
                    MPI_Get_count( &probStatus,  MPI_BYTE, &sendByOther);
210

211
                    if(sendByOther){
212
213
214
215
216
                        sendByOther /= sizeof(IndexedParticle);
                        char* const tempBuffer = new char[sizeof(IndexedParticle) * sendByOther];

                        MPI_Irecv(tempBuffer, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);

217
                        MPI_Waitall(reqiter,req, status);
218
219
                        reqiter = 0;

220
221
222
223
224
225
226
                        const IndexedParticle* const reallocOutputArray = outputArray;
                        const long reallocOutputSize = outputSize;

                        outputSize += sendByOther;
                        outputArray = new IndexedParticle[outputSize];
                        memcpy(&outputArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
                        delete[] reallocOutputArray;
227
228
                        memcpy(outputArray, tempBuffer, sendByOther * sizeof(IndexedParticle));
                        delete[] tempBuffer;
229
                    }
230
231
                    else{
                        MPI_Irecv( 0, 0, MPI_BYTE, rank - 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
232
233
234
                    }
                }
            }
235
            MPI_Waitall(reqiter,req,status);
236
237
        }

238
239
240
        nbLeavesInIntervals = 0;
        if(outputSize){
            intervals = new char[outputSize * (sizeof(ParticleClass) + sizeof(int))];
241

242
243
244
            MortonIndex previousIndex = -1;
            char* writeIndex = intervals;
            int* writeCounter = 0;
245

246
247
            for( int idxPart = 0; idxPart < outputSize ; ++idxPart){
                printf("X inserted %f (idxPart %d)\n", outputArray[idxPart].particle.getPosition().getX(), idxPart );
248

249
250
251
                if( outputArray[idxPart].index != previousIndex ){
                    previousIndex = outputArray[idxPart].index;
                    ++nbLeavesInIntervals;
252

253
254
                    writeCounter = reinterpret_cast<int*>( writeIndex );
                    writeIndex += sizeof(int);
255

256
257
                    (*writeCounter) = 0;
                }
258

259
260
                memcpy(writeIndex, &outputArray[idxPart].particle, sizeof(ParticleClass));
                printf("X inserted %f (idxPart %d)\n", ((ParticleClass*)writeIndex)->getPosition().getX(), idxPart );
261

262
263
264
                writeIndex += sizeof(ParticleClass);
                ++(*writeCounter);
            }
265
        }
266
        delete [] outputArray;
267
268
269
270

        return true;
    }

berenger-bramas's avatar
berenger-bramas committed
271
    /** Put the interval into a tree */
272
    template <class OctreeClass>
273
    bool intervalsToTree(OctreeClass& realTree){
274
275
276
        const int rank = MpiGetRank();
        const int nbProcs = MpiGetNbProcs();

277
278
        FTic counter;
        counter.tic();
279
280
281
282
        //////////////////////////////////////////////////////////////////////////////////
        // We inform the master proc about the data we have
        //////////////////////////////////////////////////////////////////////////////////

283
        int nbLeafs = nbLeavesInIntervals;
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

        // receive from left and right
        if((rank == 0)){
            MPI_Send(&nbLeafs, sizeof(int), MPI_BYTE , 1, 0, MPI_COMM_WORLD);
        }
        else if(rank == nbProcs - 1){
            MPI_Send(&nbLeafs, sizeof(int), MPI_BYTE , rank - 1, 0, MPI_COMM_WORLD);
        }
        // receive
        int leftLeafs = 0;
        int rightLeafs = 0;
        if(!(rank == 0) && rank != nbProcs - 1){
            for(int idxToReceive = 0 ; idxToReceive < 2 ; ++idxToReceive){
                int source(0);
                int temp = 0;
                receiveDataFromTag(sizeof(int), 0, &temp, &source);
                if(source < rank){ // come from left
                    leftLeafs = temp;
                    temp += nbLeafs;
                    MPI_Send(&temp, sizeof(int), MPI_BYTE , rank + 1, 0, MPI_COMM_WORLD);
                }
                else { // come from right
                    rightLeafs = temp;
                    temp += nbLeafs;
                    MPI_Send(&temp, sizeof(int), MPI_BYTE , rank - 1, 0, MPI_COMM_WORLD);
                }
            }
        }
        else {
            if((rank == 0)){ // come from right
                receiveDataFromTag(sizeof(int), 0, &rightLeafs);
            }
            else { // come from left
                receiveDataFromTag(sizeof(int), 0, &leftLeafs);
            }
        }

321

322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
        //////////////////////////////////////////////////////////////////////////////////
        // We balance the data
        //////////////////////////////////////////////////////////////////////////////////

        const int totalNbLeafs = (leftLeafs + nbLeafs + rightLeafs);
        const int myLeftLeaf = GetLeft(totalNbLeafs);
        const int myRightLeaf = GetRight(totalNbLeafs);

        const bool iNeedToSendToLeft = leftLeafs < myLeftLeaf;
        const bool iNeedToSendToRight = myRightLeaf < leftLeafs + nbLeafs;

        const bool iWillReceiveFromRight = leftLeafs + nbLeafs < myRightLeaf;
        const bool iWillReceiveFromLeft = leftLeafs > myLeftLeaf;

        const bool iDoNotHaveEnoughtToSendRight = myRightLeaf < leftLeafs;
        const bool iDoNotHaveEnoughtToSendLeft = leftLeafs + nbLeafs < myLeftLeaf;


340
341
        const int iNeedToSendLeftCount = myLeftLeaf - leftLeafs;
        const int iCanSendToLeft = nbLeafs;
342

343
344
        const int iNeedToSendRightCount = leftLeafs + nbLeafs - myRightLeaf;
        const int iCanSendToRight = nbLeafs;
345

346
        MPI_Request requests[2];
347
        MPI_Status status[2];
348
        int iterRequest = 0;
349

350
351
        int hasBeenSentToLeft = 0;
        int hasBeenSentToRight = 0;
352

353
        char* particlesToSend = 0;
354

355
356
357
358
359
360

        printf("on my left %d on my right %d\n",leftLeafs, rightLeafs);
        printf("iNeedToSendLeftCount %d iCanSendToLeft %d\n",iNeedToSendLeftCount, iCanSendToLeft);
        printf("iNeedToSendRightCount %d iCanSendToRight %d \n",iNeedToSendRightCount, iCanSendToRight);
        printf("Elapsed %lf\n", counter.tacAndElapsed());

361
362
363
364
        ///////////////////////////////
        // Manage data we already have
        ///////////////////////////////

365
        if(nbLeafs){
366
            particlesToSend = intervals;
367

368
            int currentLeafPosition = 0;
369
370
371
372

            //Send to Left (the first leaves
            if(iNeedToSendToLeft){
                for(int idxLeaf = 0 ; idxLeaf < iNeedToSendLeftCount && idxLeaf < iCanSendToLeft ; ++idxLeaf){
373
                    currentLeafPosition += ((*(int*)&particlesToSend[currentLeafPosition]) * sizeof(ParticleClass)) + sizeof(int);
374
                }
375
                hasBeenSentToLeft = FMath::Min(iNeedToSendLeftCount, iCanSendToLeft);
376
377
                MPI_Isend(particlesToSend, currentLeafPosition, MPI_BYTE , rank - 1, 0, MPI_COMM_WORLD, &requests[iterRequest++]);
                printf("I send to left %d bytes %d leaves\n", currentLeafPosition, hasBeenSentToLeft);
378
            }
379
            printf("Elapsed %lf\n", counter.tacAndElapsed());
380

381
382
383
            // Insert the particles I host and that belong to me
            const int beginForMe = (iNeedToSendToLeft ? FMath::Min(iNeedToSendLeftCount,iCanSendToLeft) : 0);
            const int endForMe = nbLeafs - (iNeedToSendToRight ? FMath::Min(iNeedToSendRightCount,iCanSendToRight) : 0);
384
            printf("I insert my particles from %d to %d \n", beginForMe, endForMe);
385
            for(int idxLeaf = beginForMe ; idxLeaf < endForMe ; ++idxLeaf){
386

387
388
389
390
391
                const int nbPartInLeaf = (*(int*)&particlesToSend[currentLeafPosition]);
                ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&particlesToSend[currentLeafPosition] + sizeof(int));

                for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){
                    realTree.insert(particles[idxPart]);
392
                }
393

394
                currentLeafPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int);
395
            }
396
397
            printf("Done\n");
            printf("Elapsed %lf\n", counter.tacAndElapsed());
398

399
400
            //Send to Right (the right-est leaves
            if(iNeedToSendToRight){
401
                const int beginWriteIndex = currentLeafPosition;
402

403
                for(int idxLeaf = 0 ; idxLeaf < iNeedToSendRightCount && idxLeaf < iCanSendToRight ; ++idxLeaf){
404
                    currentLeafPosition += (*(int*)&particlesToSend[currentLeafPosition]* sizeof(ParticleClass)) + sizeof(int);
405
                }
406
407

                hasBeenSentToRight = FMath::Min(iNeedToSendRightCount, iCanSendToRight);
408
409
                MPI_Isend( &particlesToSend[beginWriteIndex], currentLeafPosition - beginWriteIndex, MPI_BYTE , rank + 1, 0, MPI_COMM_WORLD, &requests[iterRequest++]);
                printf("I send to right %d bytes %d leaves\n", currentLeafPosition - beginWriteIndex, hasBeenSentToRight);
410
            }
411
            printf("Elapsed %lf\n", counter.tacAndElapsed());
412
413
        }

414
415
416
        char* toRecvFromLeft = 0;
        char* toRecvFromRight = 0;
        int countReceive = int(iWillReceiveFromLeft) + int(iWillReceiveFromRight);
417
418
419
420
421
422
423
424
425
        int sizeOfLeftBuffer = 0;
        int sizeOfRightBuffer = 0;
        int sizeOfRightData = 0;
        int sizeOfLeftData = 0;

        int sourceToWhileRecv = MPI_ANY_SOURCE;

        printf("countReceive %d\n", countReceive);
        printf("Elapsed %lf\n", counter.tacAndElapsed());
426

427
428
        // Now prepare to receive data
        while(countReceive--){
429
430
            MPI_Status recvStatus;
            MPI_Probe(sourceToWhileRecv, 0, MPI_COMM_WORLD, &recvStatus);
431
            // receive from left
432
433
            if(recvStatus.MPI_SOURCE == rank - 1){
                MPI_Get_count( &recvStatus,  MPI_BYTE, &sizeOfLeftBuffer);
434
435
436
437
438
                toRecvFromLeft = new char[sizeOfLeftBuffer];
                sizeOfLeftData = sizeOfLeftBuffer;
                MPI_Irecv(toRecvFromLeft, sizeOfLeftBuffer, MPI_BYTE, rank - 1 , 0 , MPI_COMM_WORLD, &requests[iterRequest++]);
                sourceToWhileRecv = rank + 1;
                printf("I will receive %d bytes from left\n", sizeOfLeftBuffer);
439
440
441
            }
            // receive from right
            else{
442
                MPI_Get_count( &recvStatus,  MPI_BYTE, &sizeOfRightBuffer);
443
444
445
446
447
                toRecvFromRight = new char[sizeOfRightBuffer];
                sizeOfRightData = sizeOfRightBuffer;
                MPI_Irecv(toRecvFromRight, sizeOfRightBuffer, MPI_BYTE, rank + 1 , 0 , MPI_COMM_WORLD, &requests[iterRequest++]);
                sourceToWhileRecv = rank - 1;
                printf("I will receive %d bytes from right\n", sizeOfRightBuffer);
448
449
            }
        }
450
451
452
453

        ///////////////////////////////
        // Wait send receive
        ///////////////////////////////
454
        MPI_Waitall(iterRequest, requests, status);
455
        // We can delete the buffer use to send our particles only
456
457
458

        printf("Wait passed...\n");
        printf("Elapsed %lf\n", counter.tacAndElapsed());
459
460
461
462
463
464
465
466
467
468
469
470

        ///////////////////////////////
        // Process received data
        // and transfer if needed
        ///////////////////////////////
        // We have to receive from right and transfere to left
        int hasToBeReceivedFromLeft = leftLeafs - myLeftLeaf;
        int hasToBeReceivedFromRight = myRightLeaf - (leftLeafs + nbLeafs);
        int arrayIdxRight = 0;
        int arrayIdxLeft = 0;

        if(iDoNotHaveEnoughtToSendLeft){
471
            printf("iDoNotHaveEnoughtToSendLeft\n");
472
473
            do{
                arrayIdxRight = 0;
474
                while(arrayIdxRight < sizeOfRightData && hasBeenSentToLeft < iNeedToSendLeftCount){
475
476
477
                    const int particlesInThisLeaf = *(int*)&toRecvFromRight[arrayIdxRight];
                    arrayIdxRight += sizeof(int) + sizeof(ParticleClass) * particlesInThisLeaf;
                    --hasToBeReceivedFromRight;
478
                    ++hasBeenSentToLeft;
479
                }
480
481
                printf("Send %d to left, total leaves sent %d / %d\n", arrayIdxRight, hasBeenSentToLeft, iNeedToSendLeftCount);
                printf("Elapsed %lf\n", counter.tacAndElapsed());
482
483
                MPI_Send(toRecvFromRight, arrayIdxRight, MPI_BYTE , rank - 1, 0, MPI_COMM_WORLD);
                if(hasBeenSentToLeft < iNeedToSendLeftCount){
484
485
486
                    MPI_Status probStatus;
                    MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &probStatus);
                    MPI_Get_count( &probStatus,  MPI_BYTE, &sizeOfRightData);
487
488
                    if(sizeOfRightBuffer < sizeOfRightData){
                        sizeOfRightBuffer = sizeOfRightData;
489
                        delete[] toRecvFromRight;
490
                        toRecvFromRight = new char[sizeOfRightData];
491
                    }
492
493
                    printf("Receive %d bytes from right\n", sizeOfRightData);
                    MPI_Recv(toRecvFromRight, sizeOfRightData, MPI_BYTE, rank + 1 , 0 , MPI_COMM_WORLD, MPI_STATUS_IGNORE);
494
                }
495
496
497
498
            } while(hasBeenSentToLeft < iNeedToSendLeftCount);
        }
        // We have to receive from left and transfere to right
        else if(iDoNotHaveEnoughtToSendRight){
499
            printf("iDoNotHaveEnoughtToSendRight\n");
500
501
            do{
                arrayIdxLeft = 0;
502
                while(arrayIdxLeft < sizeOfLeftData && hasBeenSentToRight < iNeedToSendRightCount){
503
504
505
                    const int particlesInThisLeaf = *(int*)&toRecvFromLeft[arrayIdxLeft];
                    arrayIdxLeft += sizeof(int) + sizeof(ParticleClass) * particlesInThisLeaf;
                    --hasToBeReceivedFromLeft;
506
                    ++hasBeenSentToRight;
507
                }
508
509
                printf("Send %d to right, total leaves sent %d / %d\n", arrayIdxLeft, hasBeenSentToRight, iNeedToSendRightCount);
                printf("Elapsed %lf\n", counter.tacAndElapsed());
510
511
                MPI_Send(toRecvFromLeft, arrayIdxLeft, MPI_BYTE , rank + 1, 0, MPI_COMM_WORLD);
                if(hasBeenSentToRight < iNeedToSendRightCount){
512
513
514
                    MPI_Status probStatus;
                    MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &probStatus);
                    MPI_Get_count( &probStatus,  MPI_BYTE, &sizeOfLeftData);
515
516
                    if(sizeOfLeftBuffer < sizeOfLeftData){
                        sizeOfLeftBuffer = sizeOfLeftData;
517
                        delete[] toRecvFromLeft;
518
                        toRecvFromLeft = new char[sizeOfLeftData];
519
                    }
520
521
                    printf("Receive %d bytes from left", sizeOfLeftData);
                    MPI_Recv(toRecvFromLeft, sizeOfLeftData, MPI_BYTE, rank - 1 , 0 , MPI_COMM_WORLD, MPI_STATUS_IGNORE);
522
523
524
525
                }
            } while(hasBeenSentToRight < iNeedToSendRightCount);
        }

526
527
        printf("Finished to send\n");

528
        if(iWillReceiveFromLeft ){ // I need to wait
529
            printf("iWillReceiveFromLeft (hasToBeReceivedFromLeft %d leaves)\n", hasToBeReceivedFromLeft);
530
            do{
531
                while(arrayIdxLeft < sizeOfLeftData){
532
533
534
535
536
537
538
539
                    const int particlesInThisLeaf = *(int*)&toRecvFromLeft[arrayIdxLeft];
                    arrayIdxLeft += sizeof(int);
                    ParticleClass*const particles = reinterpret_cast<ParticleClass*>(&toRecvFromLeft[arrayIdxLeft]);
                    arrayIdxLeft += sizeof(ParticleClass) * particlesInThisLeaf;

                    for( int idxPart = 0 ; idxPart < particlesInThisLeaf ; ++idxPart){
                        realTree.insert( particles[ idxPart ] );
                    }
540

541
                    --hasToBeReceivedFromLeft;
542
                }
543
544
                arrayIdxLeft = 0;

545
546
547
                printf("Remains hasToBeReceivedFromLeft %d leaves to receive\n",hasToBeReceivedFromLeft);
                printf("Elapsed %lf\n", counter.tacAndElapsed());

548
                if(hasToBeReceivedFromLeft){
549
550
551
                    MPI_Status probStatus;
                    MPI_Probe( rank - 1, 0, MPI_COMM_WORLD, &probStatus);
                    MPI_Get_count( &probStatus,  MPI_BYTE, &sizeOfLeftData);
552
553
                    if(sizeOfLeftBuffer < sizeOfLeftData){
                        sizeOfLeftBuffer = sizeOfLeftData;
554
                        delete[] toRecvFromLeft;
555
                        toRecvFromLeft = new char[sizeOfLeftData];
556
                    }
557
558
                    printf("Received %d bytes from left\n", sizeOfLeftData);
                    MPI_Recv(toRecvFromLeft, sizeOfLeftData, MPI_BYTE, rank - 1 , 0 , MPI_COMM_WORLD, MPI_STATUS_IGNORE);
559
560
                }
            } while(hasToBeReceivedFromLeft);
561
        }
562
        if(iWillReceiveFromRight){
563
            printf("iWillReceiveFromRight (hasToBeReceivedFromRight %d leaves)\n", hasToBeReceivedFromRight);
564
            do{
565
                while(arrayIdxRight < sizeOfRightData){
566
567
568
569
570
571
572
573
                    const int particlesInThisLeaf = *(int*)&toRecvFromRight[arrayIdxRight];
                    arrayIdxRight += sizeof(int);
                    ParticleClass*const particles = reinterpret_cast<ParticleClass*>(&toRecvFromRight[arrayIdxRight]);
                    arrayIdxRight += sizeof(ParticleClass) * particlesInThisLeaf;

                    for( int idxPart = 0 ; idxPart < particlesInThisLeaf ; ++idxPart){
                        realTree.insert( particles[ idxPart ] );
                    }
574

575
                    --hasToBeReceivedFromRight;
576
                }
577
578
                arrayIdxRight = 0;

579
580
581
                printf("Remains hasToBeReceivedFromRight %d leaves to receive\n",hasToBeReceivedFromRight);
                printf("Elapsed %lf\n", counter.tacAndElapsed());

582
                if(hasToBeReceivedFromRight){
583
                    MPI_Status probStatus;
584
                    printf("Probe\n");
585
586
                    MPI_Probe( rank + 1, 0, MPI_COMM_WORLD, &probStatus);
                    MPI_Get_count( &probStatus,  MPI_BYTE, &sizeOfRightData);
587
588
589
                    printf("Receive %d bytes from right\n", sizeOfRightData);
                    if(sizeOfRightBuffer < sizeOfRightData){
                        sizeOfRightBuffer = sizeOfRightData;
590
                        delete[] toRecvFromRight;
591
                        toRecvFromRight = new char[sizeOfRightData];
592
                    }
593
                    MPI_Recv(toRecvFromRight, sizeOfRightData, MPI_BYTE, rank + 1 , 0 , MPI_COMM_WORLD, MPI_STATUS_IGNORE);
594
595
                }
            } while(hasToBeReceivedFromRight);
596
597
        }

598
599
600
        // Delete reception buffers
        delete[] toRecvFromLeft;
        delete[] toRecvFromRight;
601
602
603
604
605
606

        return true;
    }
};

#endif // FMPITREEBUILDER_H