Attention une mise à jour du service Gitlab va être effectuée le mardi 18 janvier (et non lundi 17 comme annoncé précédemment) entre 18h00 et 18h30. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes.

FMpiTreeBuilder.hpp 19.5 KB
Newer Older
1
2
3
#ifndef FMPITREEBUILDER_H
#define FMPITREEBUILDER_H

4
#include "../Utils/FMpi.hpp"
5
#include "../Utils/FQuickSort.hpp"
6
#include "../Utils/FMemUtils.hpp"
7
#include "../Utils/FTrace.hpp"
berenger-bramas's avatar
berenger-bramas committed
8
9
10
11
12
13

/** This class manage the loading of particles
  * for the mpi version.
  * It use a BinLoader and then sort the data
  * with a parallel quick sort.
  */
14
template<class ParticleClass>
15
class FMpiTreeBuilder{
berenger-bramas's avatar
berenger-bramas committed
16
17
18
    /** This method has been tacken from the octree
      * it computes a tree coordinate (x or y or z) from real position
      */
19
20
    static long getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) {
            const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel;
21
22
            const long index = long(FMath::dfloor(indexFReal));
            if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
                    return index - 1;
            }
            return index;
    }

    /** get current rank */
    static int MpiGetRank(MPI_Comm comm = MPI_COMM_WORLD){
        int rank(0);
        MPI_Comm_rank(comm, &rank);
        return rank;
    }

    /** get current nb procs */
    static int MpiGetNbProcs(MPI_Comm comm = MPI_COMM_WORLD){
        int nb(0);
        MPI_Comm_size(comm, &nb);
        return nb;
    }

berenger-bramas's avatar
berenger-bramas committed
42
    /** receive data from a tag function */
43
44
45
46
47
48
49
50
51
    static void receiveDataFromTag(const int inSize, const int inTag, void* const inData, int* const inSource = 0, int* const inFilledSize = 0){
        MPI_Status status;
        MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, inTag, MPI_COMM_WORLD, &status);
        if(inSource) *inSource = status.MPI_SOURCE;
        if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize);
    }

    template< class T >
    static T GetLeft(const T inSize) {
52
53
        const double step = (double(inSize) / double(MpiGetNbProcs()));
        return T(FMath::Ceil(step * double(MpiGetRank())));
54
55
56
57
    }

    template< class T >
    static T GetRight(const T inSize) {
58
59
        const double step = (double(inSize) / double(MpiGetNbProcs()));
        const T res = T(FMath::Ceil(step * double(MpiGetRank()+1)));
60
61
62
63
64
65
        if(res > inSize) return inSize;
        else return res;
    }

    template< class T >
    static T GetOtherRight(const T inSize, const int other) {
66
        const double step = (double(inSize) / MpiGetNbProcs());
67
68
69
70
71
        const T res = T(FMath::Ceil(step * (other+1)));
        if(res > inSize) return inSize;
        else return res;
    }

72
73
74
75
76
77
78
79
80
81
82
    template< class T >
    static T GetOtherLeft(const T inSize, const int other) {
        const double step = (double(inSize) / double(MpiGetNbProcs()));
        return T(FMath::Ceil(step * double(other)));
    }

    template <class T1, class T2>
    static T1 Min( const T1 v1, const T2 v2){
        return (v1 < v2 ? v1 : v2);
    }

berenger-bramas's avatar
berenger-bramas committed
83
84
85
    /** This struct is used to represent a particles group to
      * sort them easily
      */
86
87
88
89
90
91
92
93
94
95
    struct ParticlesGroup {
        int number;
        int positionInArray;
        MortonIndex index;
        ParticlesGroup(const int inNumber = 0 , const int inPositionInArray = 0, const MortonIndex inIndex = 0)
            : number(inNumber), positionInArray(inPositionInArray), index(inIndex) {
        }
    };


berenger-bramas's avatar
berenger-bramas committed
96
97
98
99
    /** A particle may not have a MortonIndex Method
      * but they are sorted on their morton index
      * so this struct store a particle and its index
      */
100
101
102
103
104
105
106
107
108
    struct IndexedParticle{
        MortonIndex index;
        ParticleClass particle;

        operator MortonIndex(){
            return this->index;
        }
    };

109
    char* intervals;
110
    FSize nbLeavesInIntervals;
111
112

private:
berenger-bramas's avatar
berenger-bramas committed
113
    // Forbid copy
114
115
116
    FMpiTreeBuilder(const FMpiTreeBuilder&){}
    FMpiTreeBuilder& operator=(const FMpiTreeBuilder&){return *this;}

117
public:
berenger-bramas's avatar
berenger-bramas committed
118
    /** Constructor */
119
120
121
122
    FMpiTreeBuilder()
        :  intervals(0), nbLeavesInIntervals(0) {
    }

berenger-bramas's avatar
berenger-bramas committed
123
    /** Destructor */
124
125
126
127
    virtual ~FMpiTreeBuilder(){
        delete [] intervals;
    }

berenger-bramas's avatar
berenger-bramas committed
128
    /** Split and sort the file */
129
130
    template <class LoaderClass>
    bool splitAndSortFile(LoaderClass& loader, const int NbLevels){
131
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader" , __FILE__ , __LINE__) );
132
133
134
        const int rank = MpiGetRank();
        const int nbProcs = MpiGetNbProcs();

berenger-bramas's avatar
berenger-bramas committed
135
136
137
138
        // First we create the particles that belong to us (our proc)
        // we compute their morton index to be able to sort them
        //

139
        IndexedParticle* outputArray = 0;
140
        FSize outputSize = 0;
141
        {
142
            FTRACE( FTrace::FRegion regionTrace("Insert Particles", __FUNCTION__ , __FILE__ , __LINE__) );
143
144
            // create particles
            IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
145
            FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles());
146
147
            F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
            FTreeCoordinate host;
berenger-bramas's avatar
berenger-bramas committed
148

149
            const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (NbLevels - 1) );
150
151
152
153
154
            for(long idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
                loader.fillParticle(realParticlesIndexed[idxPart].particle);
                host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
                host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
                host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
berenger-bramas's avatar
berenger-bramas committed
155

156
157
158
159
160
161
162
163
                realParticlesIndexed[idxPart].index = host.getMortonIndex(NbLevels - 1);
            }

            // sort particles
            FQuickSort::QsMpi<IndexedParticle,MortonIndex>(realParticlesIndexed, loader.getNumberOfParticles(),outputArray,outputSize);
            delete [] (realParticlesIndexed);
        }
        // be sure there is no splited leaves
berenger-bramas's avatar
berenger-bramas committed
164
        // to do that we exchange the first index with the left proc
165
        {
166
167
            FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) );

168
169
170
171
            MortonIndex otherFirstIndex = -1;
            {
                FMpi::Request req[2];
                int reqiter = 0;
berenger-bramas's avatar
berenger-bramas committed
172
                // can I send my first index? == I am not rank 0 & I have data
173
                if( 0 < rank && outputSize){
174
                    MPI_Isend( &outputArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD, &req[reqiter++]);
175
176
                }
                if( rank != nbProcs - 1){
177
                    MPI_Irecv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD, &req[reqiter++]);
178
179
                }

180
                MPI_Waitall(reqiter,req,MPI_STATUSES_IGNORE);
181

berenger-bramas's avatar
berenger-bramas committed
182
183
                // I could not send because I do not have data, so I transmit the data coming
                // from my right neigbors
184
                if( 0 < rank && !outputSize){
185
                    MPI_Send( &otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD);
186
187
                }
            }
188

189
190
            MPI_Request req[2];
            int reqiter = 0;
191

192
            // at this point every one know the first index of his right neighbors
193
            const bool needToRecvBeforeSend = (rank != 0 && ((outputSize && otherFirstIndex == outputArray[0].index ) || !outputSize));
194
            if( needToRecvBeforeSend || (rank == nbProcs - 1) ){
berenger-bramas's avatar
berenger-bramas committed
195
196
197
198
199
                // Here we want to send data we do not have
                // so we first receive other data and put then into the array
                // this case happens only if I have one leaf with index MX
                // and if proc[rank - 1].last.index == MX && proc[rank + 1].first.index == MX

200
201
                int sendByOther = 0;

202
                MPI_Status probStatus;
203
                MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &probStatus);
204
                MPI_Get_count( &probStatus,  MPI_BYTE, &sendByOther);
205

206
                if(sendByOther){
207
                    sendByOther /= sizeof(IndexedParticle);
208
                    const IndexedParticle* const reallocOutputArray = outputArray;
209
                    const FSize reallocOutputSize = outputSize;
210
211
212

                    outputSize += sendByOther;
                    outputArray = new IndexedParticle[outputSize];
213
                    FMemUtils::memcpy(&outputArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
214
215
                    delete[] reallocOutputArray;

216
                    MPI_Recv(outputArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &probStatus);
217
218
                }
                else{
219
                    MPI_Irecv(0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &req[reqiter++]);
220
221
                }
            }
222

223
            if(rank != nbProcs - 1){
224

225
                FSize idxPart = outputSize - 1 ;
226
227
228
                while(idxPart >= 0 && outputArray[idxPart].index == otherFirstIndex){
                    --idxPart;
                }
229
                const int toSend = int(outputSize - 1 - idxPart);
230
                MPI_Isend( &outputArray[idxPart + 1], toSend * sizeof(IndexedParticle), MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &req[reqiter++]);
231

232
                if( rank != 0 && !needToRecvBeforeSend && (rank != nbProcs - 1)){
233
234
                    int sendByOther = 0;

235
                    MPI_Status probStatus;
236
                    MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &probStatus);
237
                    MPI_Get_count( &probStatus,  MPI_BYTE, &sendByOther);
238

239
                    if(sendByOther){
240
241
242
                        sendByOther /= sizeof(IndexedParticle);
                        char* const tempBuffer = new char[sizeof(IndexedParticle) * sendByOther];

243
                        MPI_Irecv(tempBuffer, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &req[reqiter++]);
244

245
                        MPI_Waitall(reqiter,req, MPI_STATUSES_IGNORE);
246
247
                        reqiter = 0;

248
                        const IndexedParticle* const reallocOutputArray = outputArray;
249
                        const FSize reallocOutputSize = outputSize;
250
251
252

                        outputSize += sendByOther;
                        outputArray = new IndexedParticle[outputSize];
253
                        FMemUtils::memcpy(&outputArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
254
                        delete[] reallocOutputArray;
255
256
                        memcpy(outputArray, tempBuffer, sendByOther * sizeof(IndexedParticle));
                        delete[] tempBuffer;
257
                    }
258
                    else{
259
                        MPI_Irecv( 0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &req[reqiter++]);
260
261
262
                    }
                }
            }
263

264
            MPI_Waitall(reqiter,req,MPI_STATUSES_IGNORE);
265
266
        }

berenger-bramas's avatar
berenger-bramas committed
267
268
        // We now copy the data from a sorted type into real particles array + counter

269
270
271
        nbLeavesInIntervals = 0;
        if(outputSize){
            intervals = new char[outputSize * (sizeof(ParticleClass) + sizeof(int))];
272

273
274
275
            MortonIndex previousIndex = -1;
            char* writeIndex = intervals;
            int* writeCounter = 0;
276

277
            for( FSize idxPart = 0; idxPart < outputSize ; ++idxPart){
278
279
280
                if( outputArray[idxPart].index != previousIndex ){
                    previousIndex = outputArray[idxPart].index;
                    ++nbLeavesInIntervals;
281

282
283
                    writeCounter = reinterpret_cast<int*>( writeIndex );
                    writeIndex += sizeof(int);
284

285
286
                    (*writeCounter) = 0;
                }
287

288
                memcpy(writeIndex, &outputArray[idxPart].particle, sizeof(ParticleClass));
289

290
291
292
                writeIndex += sizeof(ParticleClass);
                ++(*writeCounter);
            }
293
        }
294
        delete [] outputArray;
295
296
297
298

        return true;
    }

berenger-bramas's avatar
berenger-bramas committed
299
    /** Put the interval into a tree */
300
    template <class OctreeClass>
301
    bool intervalsToTree(OctreeClass& realTree){
302
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader" , __FILE__ , __LINE__) );
303
304
        const int rank = MpiGetRank();
        const int nbProcs = MpiGetNbProcs();
305
        const FSize currentNbLeafs = nbLeavesInIntervals;
306

307
308
309
        int leavesPerProcs[nbProcs];
        memset(leavesPerProcs, 0, sizeof(int) * nbProcs);
        MPI_Allgather(&nbLeavesInIntervals, 1, MPI_INT, leavesPerProcs, 1, MPI_INT, MPI_COMM_WORLD);
310

311
312
313
314
315
316
        FSize currentLeafsOnMyLeft  = 0;
        FSize currentLeafsOnMyRight = 0;
        for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc ){
            if(idxProc < rank) currentLeafsOnMyLeft  += leavesPerProcs[idxProc];
            if(rank < idxProc) currentLeafsOnMyRight += leavesPerProcs[idxProc];
        }
317

318
319
320
        const FSize totalNbLeafs     = (currentLeafsOnMyLeft + currentNbLeafs + currentLeafsOnMyRight);
        const FSize nbLeafsOnMyLeft  = GetLeft(totalNbLeafs);
        const FSize nbLeafsOnMyRight = GetRight(totalNbLeafs);
321

322

323
324
325
326
327
328
        int leavesToSend[nbProcs];
        memset(leavesPerProcs, 0, sizeof(int) * nbProcs);
        int bytesToSend[nbProcs];
        memset(bytesToSend, 0, sizeof(int) * nbProcs);
        int bytesOffset[nbProcs];
        memset(bytesToSend, 0, sizeof(int) * nbProcs);
329

330
        FSize currentIntervalPosition = 0;
331

332
333
        const bool iNeedToSendToLeft  = currentLeafsOnMyLeft < nbLeafsOnMyLeft;
        const bool iNeedToSendToRight = nbLeafsOnMyRight < currentLeafsOnMyLeft + currentNbLeafs;
334

335
336
337
338
339
340
341
        int leftProcToStartSend = rank;
        if(iNeedToSendToLeft){
            int idxProc = rank - 1;
            while( idxProc >= 0 ){
                const FSize thisProcRight = GetOtherRight(totalNbLeafs, idxProc);
                if( currentLeafsOnMyLeft < thisProcRight ){
                    break;
342
                }
343
                --idxProc;
344
345
            }

346
347
348
349
350
351
352
            leftProcToStartSend = idxProc;
            int hasToGive = currentNbLeafs;
            leavesToSend[idxProc] = Min(GetOtherRight(totalNbLeafs, idxProc) - GetOtherLeft(totalNbLeafs, idxProc) , hasToGive);
            {
                bytesOffset[idxProc] = 0;
                for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
353
                }
354
                bytesToSend[idxProc] = currentIntervalPosition - bytesOffset[idxProc];
355
            }
356
357
            hasToGive -= leavesToSend[idxProc];
            ++idxProc;
358

359
360
            while(idxProc < rank && hasToGive){
                leavesToSend[idxProc] = Min( GetOtherRight(totalNbLeafs, idxProc) - GetOtherLeft(totalNbLeafs, idxProc), hasToGive);
361

362
363
364
                bytesOffset[idxProc] = currentIntervalPosition;
                for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
365
                }
366
                bytesToSend[idxProc] = currentIntervalPosition - bytesOffset[idxProc];
367

368
369
                hasToGive -= leavesToSend[idxProc];
                ++idxProc;
370
371
            }
        }
372

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
        {
            const FSize iNeedToSendLeftCount = nbLeafsOnMyLeft - currentLeafsOnMyLeft;
            const FSize iNeedToSendRightCount = currentLeafsOnMyLeft + currentNbLeafs - nbLeafsOnMyRight;
            FSize endForMe = currentNbLeafs;
            if(iNeedToSendToRight) endForMe -= iNeedToSendRightCount;

            for(FSize idxLeaf = iNeedToSendLeftCount ; idxLeaf < endForMe ; ++idxLeaf){
                const int nbPartInLeaf = (*(int*)&intervals[currentIntervalPosition]);
                ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&intervals[currentIntervalPosition] + sizeof(int));

                for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){
                    realTree.insert(particles[idxPart]);
                }
                currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int);
            }
        }
389

390
391
392
        int rightProcToStartSend = rank;
        if(iNeedToSendToRight){
            int idxProc = rank + 1;
393
394
395
            while( idxProc + 1 < nbProcs ){
                const FSize thisProcLeft = GetOtherLeft(totalNbLeafs, idxProc + 1);
                if( totalNbLeafs - currentLeafsOnMyRight < thisProcLeft ){
396
                    break;
397
                }
398
                ++idxProc;
399
            }
400

401
402
            rightProcToStartSend = idxProc;
            int hasToGive = currentNbLeafs;
403
404
            leavesToSend[idxProc] = Min((totalNbLeafs - currentLeafsOnMyRight) - GetOtherLeft(totalNbLeafs, idxProc) , hasToGive);

405
406
407
408
            {
                bytesOffset[idxProc] = currentIntervalPosition;
                for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
409
                }
410
411
412
413
                bytesToSend[idxProc] = currentIntervalPosition - bytesOffset[idxProc];
            }
            hasToGive -= leavesToSend[idxProc];
            --idxProc;
414

415
416
            while(rank < idxProc && hasToGive){
                leavesToSend[idxProc] = Min( GetOtherRight(totalNbLeafs, idxProc) - GetOtherLeft(totalNbLeafs, idxProc), hasToGive);
417

418
419
420
                bytesOffset[idxProc] = currentIntervalPosition;
                for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
421
                }
422
                bytesToSend[idxProc] = currentIntervalPosition - bytesOffset[idxProc];
423

424
425
426
                hasToGive -= leavesToSend[idxProc];
                --idxProc;
            }
427
        }
428

429
430
431
        int bytesToSendRecv[nbProcs * nbProcs];
        memset(bytesToSendRecv, 0, sizeof(int) * nbProcs * nbProcs);
        MPI_Allgather(bytesToSend, nbProcs, MPI_INT, bytesToSendRecv, nbProcs, MPI_INT, MPI_COMM_WORLD);
432

433
434
435
436
        int bytesToRecv[nbProcs];
        memset(bytesToRecv, 0, sizeof(int) * nbProcs);
        int bytesOffsetToRecv[nbProcs];
        memset(bytesOffsetToRecv, 0, sizeof(int) * nbProcs);
437

438
439
440
441
442
443
444
        FSize sumBytesToRecv = 0;
        for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc){
            if( bytesToSendRecv[idxProc * nbProcs + rank] ){
                bytesOffsetToRecv[idxProc] = sumBytesToRecv;
                sumBytesToRecv += bytesToSendRecv[idxProc * nbProcs + rank];
                bytesToRecv[idxProc] = bytesToSendRecv[idxProc * nbProcs + rank];
            }
445
446
        }

447
        char* const recvbuf = new char[sumBytesToRecv];
448

449
450
451
        MPI_Alltoallv(intervals, bytesToSend, bytesOffset, MPI_BYTE,
                      recvbuf, bytesToRecv, bytesOffsetToRecv, MPI_BYTE,
                      MPI_COMM_WORLD);
452

453
454
        FSize recvBufferPosition = 0;
        while( recvBufferPosition < sumBytesToRecv){
455
            const int nbPartInLeaf = (*reinterpret_cast<int*>(&recvbuf[recvBufferPosition]));
456
            ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&recvbuf[recvBufferPosition] + sizeof(int));
457

458
459
            for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){
                realTree.insert(particles[idxPart]);
460
            }
461
            recvBufferPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int);
462
463
        }

464
        delete[] recvbuf;
465
466
467

        return true;
    }
468
469
470
};

#endif // FMPITREEBUILDER_H